/*************************************************************************
 *
 *  $RCSfile: lngconv.cxx,v $
 *
 *  $Revision: 1.14 $
 *
 *  last change: $Author: vg $ $Date: 2003/06/12 11:04:25 $
 *
 *  The Contents of this file are made available subject to the terms of
 *  either of the following licenses
 *
 *         - GNU Lesser General Public License Version 2.1
 *         - Sun Industry Standards Source License Version 1.1
 *
 *  Sun Microsystems Inc., October, 2000
 *
 *  GNU Lesser General Public License Version 2.1
 *  =============================================
 *  Copyright 2000 by Sun Microsystems, Inc.
 *  901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License version 2.1, as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *
 *  Sun Industry Standards Source License Version 1.1
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.1 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://www.openoffice.org/license.html.
 *
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 *
 *  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 *
 *  Copyright: 2000 by Sun Microsystems, Inc.
 *
 *  All Rights Reserved.
 *
 *  Contributor(s): _______________________________________
 *
 *
 ************************************************************************/

#include <stdlib.h>
#include <stdio.h>

#if defined(LINUX) || defined (SOLARIS) || defined (IRIX) || defined (FREEBSD)
#include <locale.h>
#include <langinfo.h>
#endif

#include <string.h>
#include <osl/profile.h>
#include <osl/file.h>
#include <osl/diagnose.h>
#include <rtl/tencinfo.h>

#ifdef UNX
#include <unistd.h>

#define stricmp strcasecmp
#define strnicmp strncasecmp 

/*****************************************************************************/
/* typedefs
/*****************************************************************************/

typedef struct {
	const char				*key;
	const rtl_TextEncoding	 value;
} _pair;

static int _pair_compare (const char *key, const _pair *pair);
static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );


const char * delimiter = "/";

#elif defined WNT

#define stricmp _stricmp
#define strnicmp _strnicmp

const char * delimiter = "\\";

#endif

static sal_Char section_buf[2048];
static rtl_uString * value_arr[256];
static sal_uInt32 n_items = 0;

static const char * pAppName = "lngconv";

/*
 * The lists are taken from sal/osl/unx/nlsupport.h
 */

const _pair _full_locale_list[] = {
    { "ja_JP.EUC",    RTL_TEXTENCODING_EUC_JP      },
    { "ko_KR.EUC",    RTL_TEXTENCODING_EUC_KR      },
    { "zh_CN.EUC",    RTL_TEXTENCODING_EUC_CN      },
    { "zh_TW.EUC",    RTL_TEXTENCODING_EUC_TW      }
};
 
 
const _pair _locale_extension_list[] = {
    { "ansi-1251",    RTL_TEXTENCODING_MS_1251     },
    { "big5",         RTL_TEXTENCODING_BIG5        },
    { "big5hk",       RTL_TEXTENCODING_BIG5_HKSCS  },
    { "euc",          RTL_TEXTENCODING_EUC_JP      },
    { "eucjp",        RTL_TEXTENCODING_EUC_JP      },
    { "gb18030",      RTL_TEXTENCODING_GB_18030    },
    { "gb2312",       RTL_TEXTENCODING_GB_2312     },
    { "gbk",          RTL_TEXTENCODING_GBK         },
    { "iso8859-1",    RTL_TEXTENCODING_ISO_8859_1  },
    { "iso8859-10",   RTL_TEXTENCODING_ISO_8859_10 }, 
    { "iso8859-13",   RTL_TEXTENCODING_ISO_8859_13 }, 
    { "iso8859-14",   RTL_TEXTENCODING_ISO_8859_14 },
    { "iso8859-15",   RTL_TEXTENCODING_ISO_8859_15 },
    { "iso8859-2",    RTL_TEXTENCODING_ISO_8859_2  },
    { "iso8859-3",    RTL_TEXTENCODING_ISO_8859_3  },
    { "iso8859-4",    RTL_TEXTENCODING_ISO_8859_4  },
    { "iso8859-5",    RTL_TEXTENCODING_ISO_8859_5  },
    { "iso8859-6",    RTL_TEXTENCODING_ISO_8859_6  },
    { "iso8859-7",    RTL_TEXTENCODING_ISO_8859_7  },
    { "iso8859-8",    RTL_TEXTENCODING_ISO_8859_8  },
    { "iso8859-9",    RTL_TEXTENCODING_ISO_8859_9  },
    { "koi8-r",       RTL_TEXTENCODING_KOI8_R      },
    { "pck",          RTL_TEXTENCODING_MS_932      },
#if (0)
    { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW    },
#endif
    { "utf-16",       RTL_TEXTENCODING_UNICODE     },
    { "utf-7",        RTL_TEXTENCODING_UTF7        },
    { "utf-8",        RTL_TEXTENCODING_UTF8        }
};

const _pair _iso_language_list[] = {
    { "af",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "ar",  RTL_TEXTENCODING_ISO_8859_6 }, 
    { "az",  RTL_TEXTENCODING_ISO_8859_9 }, 
    { "be",  RTL_TEXTENCODING_ISO_8859_5 }, 
    { "bg",  RTL_TEXTENCODING_ISO_8859_5 }, 
    { "ca",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "cs",  RTL_TEXTENCODING_ISO_8859_2 }, 
    { "da",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "de",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "el",  RTL_TEXTENCODING_ISO_8859_7 }, 
    { "en",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "es",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "et",  RTL_TEXTENCODING_ISO_8859_4 }, 
    { "eu",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "fa",  RTL_TEXTENCODING_ISO_8859_6 }, 
    { "fi",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "fo",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "fr",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "gr",  RTL_TEXTENCODING_ISO_8859_7 }, 
    { "hi",  RTL_TEXTENCODING_DONTKNOW }, 
    { "hr",  RTL_TEXTENCODING_ISO_8859_2 }, 
    { "hu",  RTL_TEXTENCODING_ISO_8859_2 }, 
    { "hy",  RTL_TEXTENCODING_DONTKNOW }, 
    { "id",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "is",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "it",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "iw",  RTL_TEXTENCODING_ISO_8859_8 }, 
    { "ja",  RTL_TEXTENCODING_EUC_JP }, 
    { "ka",  RTL_TEXTENCODING_DONTKNOW }, 
    { "kk",  RTL_TEXTENCODING_ISO_8859_5 }, 
    { "ko",  RTL_TEXTENCODING_EUC_KR }, 
    { "lt",  RTL_TEXTENCODING_ISO_8859_4 }, 
    { "lv",  RTL_TEXTENCODING_ISO_8859_4 }, 
    { "mk",  RTL_TEXTENCODING_ISO_8859_5 }, 
    { "mr",  RTL_TEXTENCODING_DONTKNOW }, 
    { "ms",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "nl",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "no",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "pl",  RTL_TEXTENCODING_ISO_8859_2 }, 
    { "pt",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "ro",  RTL_TEXTENCODING_ISO_8859_2 }, 
    { "ru",  RTL_TEXTENCODING_ISO_8859_5 }, 
    { "sa",  RTL_TEXTENCODING_DONTKNOW }, 
    { "sk",  RTL_TEXTENCODING_ISO_8859_2 }, 
    { "sl",  RTL_TEXTENCODING_ISO_8859_2 }, 
    { "sq",  RTL_TEXTENCODING_ISO_8859_2 }, 
    { "sv",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "sw",  RTL_TEXTENCODING_ISO_8859_1 }, 
    { "ta",  RTL_TEXTENCODING_DONTKNOW }, 
    { "th",  RTL_TEXTENCODING_DONTKNOW }, 
    { "tr",  RTL_TEXTENCODING_ISO_8859_9 }, 
    { "tt",  RTL_TEXTENCODING_ISO_8859_5 }, 
    { "uk",  RTL_TEXTENCODING_ISO_8859_5 }, 
    { "ur",  RTL_TEXTENCODING_ISO_8859_6 }, 
    { "uz",  RTL_TEXTENCODING_ISO_8859_9 }, 
    { "vi",  RTL_TEXTENCODING_DONTKNOW }, 
    { "zh",  RTL_TEXTENCODING_BIG5 }
};


/************************************************************************
 * Usage
 ************************************************************************/

void Usage() 
{
    printf( "Usage: lngconv [-o <outpath>] <lng-file> <lang no> -CHARSET_<lng charset> <locale> [<locale2>] ...\n" );
    exit(-1);
}

/*****************************************************************************/
/* compare function for binary search   
/*****************************************************************************/

static int
_pair_compare (const char *key, const _pair *pair)
{
    int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
    return result;
}

/*****************************************************************************/
/* binary search on encoding tables
/*****************************************************************************/

static const _pair*
_pair_search (const char *key, const _pair *base, unsigned int member )
{
    unsigned int lower = 0;
    unsigned int upper = member;
    unsigned int current;
    int comparison;

    /* check for validity of input */
    if ( (key == NULL) || (base == NULL) || (member == 0) )
        return NULL;

    /* binary search */
    while ( lower < upper )
    {
        current = (lower + upper) / 2;
        comparison = _pair_compare( key, base + current );
        if (comparison < 0)
            upper = current;
        else
        if (comparison > 0)
            lower = current + 1;
        else
            return base + current;
    }

    return NULL;
}

/************************************************************************
 * ReadSourceFile
 ************************************************************************/

int ReadSourceFile( const char * sourceFile, long nLang, rtl_TextEncoding encoding )
{
	rtl_uString * pFile = NULL;
	oslProfile aProfile = NULL;

	const char * pcDefaultLang = "01";
	const char * pc;
	
	char  absFileName[2048];
	char  Lang[4];

	/* convert language no to string */	
	sprintf( Lang, "%d", nLang );

	/* make source file absolute if necessary */
	if( sourceFile[0] != '/' )
	{
		getcwd( absFileName, sizeof( absFileName ) );
		OSL_ASSERT( strlen( absFileName ) + strlen( sourceFile ) < sizeof( absFileName ) - 2 );
		strcat( absFileName, "/" );
		strcat( absFileName, sourceFile );
		sourceFile = absFileName;
	}
	
	/* convert file name to unicode file url */
	rtl_uString_newFromAscii( &pFile, sourceFile );
	osl_getFileURLFromSystemPath( pFile, &pFile );

	/* open .lng file - use old oslProfile implementation */
	aProfile = osl_openProfile( pFile, 0 );
	rtl_uString_release( pFile );
	
	if( NULL == aProfile )
	{
		fprintf( stderr, "%s: unable to open language file %s.\n\n", pAppName, pFile );
		exit( 1 );
	}

	/* read section names into buffer */
	osl_getProfileSections( aProfile, section_buf, sizeof( section_buf ) );
	
	/* iterate over all sections */
	for( pc = section_buf; *pc != '\0'; pc += strlen( pc ) + 1 )
	{
		char entry[2048];
		
		rtl_TextEncoding entry_enc = encoding;
		
		/* read string for selected language */
		osl_readProfileString( aProfile, pc, Lang, entry, sizeof(entry), "" );
		
		/* use default language if string is empty */
		if( 0 == strlen( entry ) )
		{
			osl_readProfileString( aProfile, pc, pcDefaultLang, entry, sizeof(entry), "" );
			entry_enc = RTL_TEXTENCODING_ASCII_US;
		}
		
		/* convert entry string to unicode */
		value_arr[n_items] = NULL;
		rtl_string2UString( value_arr + n_items, entry, strlen( entry ), entry_enc, OSTRING_TO_OUSTRING_CVTFLAGS );
		++n_items;
	}
	

	/* close source file */
	osl_closeProfile( aProfile );
	
	return 1;
}


/************************************************************************
 * WriteDestFile
 ************************************************************************/

int WriteDestFile( const char * destFile, rtl_TextEncoding encoding )
{
	FILE * pFile = fopen( destFile, "w" );
	
	if( NULL != pFile )
	{
		unsigned long n = 0;
		const char * pc;
		
		/* iterate over all sections */
		for( pc = section_buf; *pc != '\0'; pc += strlen( pc ) + 1 )
		{
			rtl_String  * value   = NULL;
			rtl_uString * u_value = value_arr[n++];
		
			if( NULL != u_value )
			{
				/* convert string to desired encoding */
				rtl_uString2String( &value, u_value->buffer, u_value->length, encoding, OUSTRING_TO_OSTRING_CVTFLAGS );
			
				if( NULL != value )
				{
					/* write key = value pairs to file */
					fprintf( pFile, "%s = %s\n", pc, value->buffer );
					rtl_string_release( value );
				}
			}
		}
					
		fclose( pFile );
	}

 	return 1;
}


/************************************************************************
 * main
 ************************************************************************/

int main( int argc, const char * argv[] )
{
    const char * pOutPath = NULL;
    const char * pLNGFile = NULL;
	
    rtl_TextEncoding nSourceCharSet = RTL_TEXTENCODING_DONTKNOW;
    
    long nLang = 0;
    int i = 0;

    /* default behavior without parameter */
    if( argc < 4 )
        Usage();

    /* iterate over command line arguments */
    for( i = 1; i < argc; i++ )
    {
        /* check if outpath is specified */
        if( 0 == strncmp(argv[i], "-o", 2) )
        {
            if(argc - i >= 2)
                pOutPath = argv[++i];
            else
                Usage();
        }

        /* the .lng file to read from name */
        else if( pLNGFile == NULL )
            pLNGFile = argv[i];

        /* the language no */
        else if( nLang == 0 )
            sscanf( argv[i], "%d", &nLang );

        /* the encoding used for the language in .lng file */            
        else if( nSourceCharSet == RTL_TEXTENCODING_DONTKNOW )
        {
            const char * pStr = argv[i];
            
            if( !strnicmp( pStr, "-CHARSET_", 9 ) )
			{
				if( !stricmp( pStr + 9, "ANSI" ) )
					 nSourceCharSet = RTL_TEXTENCODING_MS_1252;
				else if( !stricmp( pStr + 9, "MAC" ) )
					 nSourceCharSet = RTL_TEXTENCODING_APPLE_ROMAN;
				else if( !stricmp( pStr + 9, "IBMPC_437" ) )
					 nSourceCharSet = RTL_TEXTENCODING_IBM_437;
				else if( !stricmp( pStr + 9, "IBMPC_850" ) )
					 nSourceCharSet = RTL_TEXTENCODING_IBM_850;
				else if( !stricmp( pStr + 9, "IBMPC_860" ) )
					 nSourceCharSet = RTL_TEXTENCODING_IBM_860;
				else if( !stricmp( pStr + 9, "IBMPC_861" ) )
					 nSourceCharSet = RTL_TEXTENCODING_IBM_861;
				else if( !stricmp( pStr + 9, "IBMPC_863" ) )
					 nSourceCharSet = RTL_TEXTENCODING_IBM_863;
				else if( !stricmp( pStr + 9, "IBMPC_865" ) )
					 nSourceCharSet = RTL_TEXTENCODING_IBM_865;
				else if( !stricmp( pStr + 9, "IBMPC" ) )
					 nSourceCharSet = RTL_TEXTENCODING_IBM_850;
				else if( !stricmp( pStr + 9, "UTF8" ) )
					 nSourceCharSet = RTL_TEXTENCODING_UTF8;
                else if( !strnicmp( pStr + 9, "microsoft-", 10 ) )
                {
                     if( !stricmp( pStr + 19, "cp1250" ) )
						nSourceCharSet = RTL_TEXTENCODING_MS_1250;
                     else if( !stricmp( pStr + 19, "cp1251" ) )
						nSourceCharSet = RTL_TEXTENCODING_MS_1251;
                     else if( !stricmp( pStr + 19, "cp1252" ) )
						nSourceCharSet = RTL_TEXTENCODING_MS_1252;
                     else if( !stricmp( pStr + 19, "cp1253" ) )
						nSourceCharSet = RTL_TEXTENCODING_MS_1253;
                     else if( !stricmp( pStr + 19, "cp1254" ) )
						nSourceCharSet = RTL_TEXTENCODING_MS_1254;
                     else if( !stricmp( pStr + 19, "cp1255" ) )
						nSourceCharSet = RTL_TEXTENCODING_MS_1255;
                     else if( !stricmp( pStr + 19, "cp1256" ) )
						nSourceCharSet = RTL_TEXTENCODING_MS_1256;
                     else
                     {
                        fprintf( stderr, "unknown TextEncoding: %s\n" , pStr );
						nSourceCharSet = RTL_TEXTENCODING_MS_1252;
                    }
                }
				else if( !stricmp( pStr + 9, "DONTKNOW" ) )
					 nSourceCharSet = RTL_TEXTENCODING_DONTKNOW;
				else
				{
                    fprintf( stderr, "unknown TextEncoding: %s\n" , pStr );
					nSourceCharSet = rtl_getTextEncodingFromUnixCharset( pStr + 9 );
					if( nSourceCharSet == 0 )
						nSourceCharSet = RTL_TEXTENCODING_MS_1252;
				}
            }
        }
        
        /* all other arguments expected to be destination locals / charsets */
        else
            break;
    }
    
    if( pLNGFile == NULL )
    {
        fprintf( stderr, "%s: no language file specified.\n\n", pAppName );
        Usage();
    }
    
    if( nLang == 0 )
    {
        fprintf( stderr, "%s: no language specified.\n\n", pAppName );
        Usage();
    }
    
    if( nSourceCharSet == RTL_TEXTENCODING_DONTKNOW )
    {
        fprintf( stderr, "%s: no source encoding specified.\n\n", pAppName );
        Usage();
    }
    
    if( i >= argc )
    {
        fprintf( stderr, "%s: no destination locales/languages specified.\n\n", pAppName );
        Usage();
    }

	ReadSourceFile( pLNGFile, nLang, nSourceCharSet );

    /* iterate over languages / locales */
    while( i < argc )
    {
        const _pair *language = 0;

        char * pOutFile = NULL;
		char * locale_buf = NULL;
        char * pc;

        // ignore leading path
        if( pc = (char * ) strrchr( pLNGFile, *delimiter ) )
            pLNGFile = pc + 1;

        int nStrLen = strlen( pLNGFile ) + strlen( argv[i] ) + 1;
        
        if( pOutPath )
            nStrLen += strlen( pOutPath ) + 1;
            
        pOutFile = (char *) malloc( nStrLen + 1 );
        pOutFile[0] = '\0';

        /* construct output file name */
        if( pOutPath )
        {
            strcpy(pOutFile, pOutPath);
            strcat(pOutFile, delimiter);
        }
        
        /* copy file name and cut off .lng extension */
        strcat( pOutFile, pLNGFile );
        if( ( pc = strstr( pOutFile, ".lng") ) || ( pc = strstr( pOutFile, ".LNG") ) )
            *pc = '\0';
        
        /* append ".<locale>" to the file name */
        strcat( pOutFile, "." );
        strcat( pOutFile, argv[i] );

		/* scan and split locale string */
		locale_buf = strdup( argv[i++] );

        /* check special handling list (EUC) first */
        {		
            const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair );
            language = _pair_search( locale_buf, _full_locale_list, members );
        }

        if( NULL == language )
        {
            /*
             *  check if there is a charset qualifier at the end of the given locale string
             *  e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
             *  charset to use
             */
		    pc = strrchr( locale_buf, '.' );

            if( NULL != pc )
            {
                const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair );
                language = _pair_search( pc + 1, _locale_extension_list, members);
            }
        }

        /* use iso language code to determine the charset */
        if( NULL == language )
        {
            const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair );

            /* iso lang codes have 2 charaters */
            locale_buf[2] = '\0';

            language = _pair_search( locale_buf, _iso_language_list, members);
        }
        
		free( locale_buf );

        /* a matching item in our list provides a mapping from codeset to
         * rtl-codeset */

        if ( NULL == language )
        {
            fprintf( stderr, "unknown locale %s\n", argv[i-1] );
            continue;
        }

		printf( "Writing output file %s.\n", pOutFile );
		WriteDestFile( pOutFile, language->value );
    }
	
	/* FIXME: destroy value array list */
    
    return 0;
}
