1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconv_fw/tools/convtool/convtool.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,376 @@
1.4 +/*
1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#pragma warning (disable: 4514) // unreferenced inline/local function has been removed
1.23 +
1.24 +#include <stdio.h>
1.25 +#include <stdlib.h>
1.26 +#include <string.h>
1.27 +#include <locale.h>
1.28 +#include <wchar.h>
1.29 +#if defined(__VC32__)
1.30 +#include <FCNTL.H>
1.31 +#include <IO.H>
1.32 +#endif
1.33 +
1.34 +#undef BIG_ENDIAN
1.35 +
1.36 +#if (defined(__MSVCRT__) || defined(_MSC_VER))
1.37 +//#define _stricmp _stricmp
1.38 +//#define _strnicmp _strnicmp
1.39 +#else // linux
1.40 +#define _stricmp strcasecmp
1.41 +#define _strnicmp strncasecmp
1.42 +#endif
1.43 +
1.44 +const int KVersionNumber=025;
1.45 +const int KLargeNumber=1000000;
1.46 +
1.47 +extern int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8);
1.48 +extern int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode);
1.49 +
1.50 +enum TByteOrder
1.51 + {
1.52 + EByteOrderUnspecified,
1.53 + EByteOrderBigEndian,
1.54 + EByteOrderLittleEndian,
1.55 +#if defined(BIG_ENDIAN)
1.56 + EByteOrderNative=EByteOrderBigEndian,
1.57 + EByteOrderForeign=EByteOrderLittleEndian
1.58 +#else
1.59 + EByteOrderNative=EByteOrderLittleEndian,
1.60 + EByteOrderForeign=EByteOrderBigEndian
1.61 +#endif
1.62 + };
1.63 +
1.64 +struct SBuffer
1.65 + {
1.66 + int iNumberOfBytes;
1.67 + void* iData;
1.68 + };
1.69 +
1.70 +void PrintUsage(const char* aProgramName)
1.71 + {
1.72 + fprintf(stderr, "\nVersion %03d\n\nCharacter set conversion tool\nCopyright (c) 1999 Symbian Ltd\n\n", KVersionNumber);
1.73 + fprintf(stderr, "Usage:\n\n\t%s [<options>] <inputspec> <outputspec>\n\nwhere\n\n\t"
1.74 + "options := [-big|-little][-byteordermark]\n\t"
1.75 + "inputspec := -input=<format> [<input_file>]\n\t"
1.76 + "outputspec := -output=<format> [<output_file>]\n\t"
1.77 + "format := unicode|1252|utf8|...\n\n", aProgramName);
1.78 + const char* localeData=setlocale(LC_ALL, "");
1.79 + while (*localeData!='.')
1.80 + {
1.81 + ++localeData;
1.82 + }
1.83 + fprintf(stderr, "(The default encoding is currently \"%s\")\n\n", localeData+1);
1.84 + }
1.85 +
1.86 +void Assert(int aCondition, const char* aErrorMessageFormat, const void* aExtraParameter1=NULL, const void* aExtraParameter2=NULL)
1.87 + {
1.88 + if (!aCondition)
1.89 + {
1.90 + char errorMessage[100];
1.91 + sprintf(errorMessage, aErrorMessageFormat, aExtraParameter1, aExtraParameter2);
1.92 + fprintf(stderr, "Error: %s\n", errorMessage);
1.93 + exit(1);
1.94 + }
1.95 + }
1.96 +
1.97 +void PrintWarning(const char* aWarningMessage)
1.98 + {
1.99 + fprintf(stderr, "Warning: %s\n", aWarningMessage);
1.100 + }
1.101 +
1.102 +int TryFileParameter(int aArgc, char* aArgv[], int& aArgIndex, const char* aInputOrOutput, const char*& aEncoding, FILE*& aFile, const char* aFileMode)
1.103 + {
1.104 + char prefix[100];
1.105 + strcpy(prefix, "-");
1.106 + strcat(prefix, aInputOrOutput);
1.107 + strcat(prefix, "=");
1.108 + int lengthOfPrefix=strlen(prefix);
1.109 + if (_strnicmp(aArgv[aArgIndex], prefix, lengthOfPrefix)==0)
1.110 + {
1.111 + Assert(aEncoding==NULL, "\"%s...\" is specified more than once", prefix);
1.112 + aEncoding=aArgv[aArgIndex]+lengthOfPrefix;
1.113 + ++aArgIndex;
1.114 + if ((aArgIndex>=aArgc) || (aArgv[aArgIndex][0]=='-'))
1.115 + {
1.116 + --aArgIndex;
1.117 + }
1.118 + else
1.119 + {
1.120 + aFile=fopen(aArgv[aArgIndex], aFileMode);
1.121 + Assert(aFile!=NULL, "opening %s-file failed", aInputOrOutput);
1.122 + }
1.123 + return 1;
1.124 + }
1.125 + return 0;
1.126 + }
1.127 +
1.128 +void ReadParameters(int aArgc, char* aArgv[], int& aOutputByteOrderMark, TByteOrder& aUnicodeByteOrder, const char*& aInputEncoding, const char*& aOutputEncoding, FILE*& aInputFile, FILE*& aOutputFile)
1.129 + {
1.130 + if ((aArgc<=1) || (_stricmp(aArgv[1], "?")==0) || (_stricmp(aArgv[1], "/?")==0))
1.131 + {
1.132 + PrintUsage(aArgv[0]);
1.133 + exit(0);
1.134 + }
1.135 + for (int i=1; i<aArgc; ++i) // start at index 1 to avoid the program name (which is the first parameter)
1.136 + {
1.137 + if (_stricmp(aArgv[i], "-byteordermark")==0)
1.138 + {
1.139 + Assert(!aOutputByteOrderMark, "\"-byteordermark\" is specified more than once");
1.140 + aOutputByteOrderMark=1;
1.141 + }
1.142 + else if (_stricmp(aArgv[i], "-big")==0)
1.143 + {
1.144 + Assert(aUnicodeByteOrder==EByteOrderUnspecified, "the byte order of unicode text (i.e. \"-big\"/\"-little\") is specified more than once");
1.145 + aUnicodeByteOrder=EByteOrderBigEndian;
1.146 + }
1.147 + else if (_stricmp(aArgv[i], "-little")==0)
1.148 + {
1.149 + Assert(aUnicodeByteOrder==EByteOrderUnspecified, "the byte order of unicode text (i.e. \"-big\"/\"-little\") is specified more than once");
1.150 + aUnicodeByteOrder=EByteOrderLittleEndian;
1.151 + }
1.152 + else
1.153 + {
1.154 + Assert(TryFileParameter(aArgc, aArgv, i, "input", aInputEncoding, aInputFile, "r") ||
1.155 + TryFileParameter(aArgc, aArgv, i, "output", aOutputEncoding, aOutputFile, "w"), "bad parameter \"%s\"", aArgv[i]);
1.156 + }
1.157 + }
1.158 + Assert(aInputEncoding!=NULL, "no input encoding is specified");
1.159 + Assert(aOutputEncoding!=NULL, "no output encoding is specified");
1.160 + }
1.161 +
1.162 +int ReadFromFileReturningNumberOfBytesRead(void* aBuffer, int aNumberOfBytesToRead, FILE* aInputFile)
1.163 + {
1.164 + int numberOfBytesRead=0;
1.165 + int numberOfBytesToReadThisTime=aNumberOfBytesToRead;
1.166 + for (;;)
1.167 + {
1.168 + for (;;)
1.169 + {
1.170 + const int remainingNumberOfBytesToRead=aNumberOfBytesToRead-numberOfBytesRead;
1.171 + if (numberOfBytesToReadThisTime>remainingNumberOfBytesToRead)
1.172 + {
1.173 + numberOfBytesToReadThisTime=remainingNumberOfBytesToRead;
1.174 + }
1.175 + const int numberOfBytesReadThisTime=fread(aBuffer, 1, numberOfBytesToReadThisTime, aInputFile);
1.176 + const int error=ferror(aInputFile);
1.177 + if (error==0)
1.178 + {
1.179 + aBuffer=((unsigned char*)aBuffer)+numberOfBytesReadThisTime;
1.180 + numberOfBytesRead+=numberOfBytesReadThisTime;
1.181 + Assert(numberOfBytesRead<=aNumberOfBytesToRead, "internal error (read too many bytes)");
1.182 + if ((numberOfBytesRead>=aNumberOfBytesToRead) || feof(aInputFile))
1.183 + {
1.184 + return numberOfBytesRead;
1.185 + }
1.186 + break;
1.187 + }
1.188 + numberOfBytesToReadThisTime/=2;
1.189 + Assert(numberOfBytesToReadThisTime>0, "reading from file failed with error number %d", (const void*)error);
1.190 + clearerr(aInputFile);
1.191 + }
1.192 + }
1.193 + }
1.194 +
1.195 +void WriteToFile(const void* aBuffer, int aNumberOfBytesToWrite, FILE* aOutputFile)
1.196 + {
1.197 + const int numberOfBytesWritten=fwrite(aBuffer, 1, aNumberOfBytesToWrite, aOutputFile);
1.198 + Assert(numberOfBytesWritten==aNumberOfBytesToWrite, "only %d out of %d bytes could be written to file", (const void*)numberOfBytesWritten, (const void*)aNumberOfBytesToWrite);
1.199 + const int error=ferror(aOutputFile);
1.200 + Assert(error==0, "writing to file failed with error number %d", (const void*)error);
1.201 + }
1.202 +
1.203 +void HandleByteOrderMarks(int aOutputByteOrderMark, TByteOrder& aUnicodeByteOrder, const char* aInputEncoding, const char* aOutputEncoding, FILE* aInputFile, FILE* aOutputFile)
1.204 + {
1.205 + if (_stricmp(aInputEncoding, "unicode")==0)
1.206 + {
1.207 + unsigned short firstUnicodeCharacter=0;
1.208 + const int numberOfBytesRead=ReadFromFileReturningNumberOfBytesRead((void*)&firstUnicodeCharacter, sizeof(unsigned short), aInputFile);
1.209 + TByteOrder byteOrderSpecifiedByByteOrderMark=EByteOrderUnspecified;
1.210 + if (numberOfBytesRead==sizeof(unsigned short))
1.211 + {
1.212 + switch (firstUnicodeCharacter)
1.213 + {
1.214 + case 0xfeff:
1.215 + byteOrderSpecifiedByByteOrderMark=EByteOrderNative;
1.216 + break;
1.217 + case 0xfffe:
1.218 + byteOrderSpecifiedByByteOrderMark=EByteOrderForeign;
1.219 + break;
1.220 + default:
1.221 + const int error=fseek(aInputFile, 0, SEEK_SET); // rewind to the start of the file
1.222 + Assert(error==0, "could not rewind to the start of the input file");
1.223 + break;
1.224 + }
1.225 + }
1.226 + if (byteOrderSpecifiedByByteOrderMark!=EByteOrderUnspecified)
1.227 + {
1.228 + if ((aUnicodeByteOrder!=EByteOrderUnspecified) && (byteOrderSpecifiedByByteOrderMark!=aUnicodeByteOrder))
1.229 + {
1.230 + PrintWarning("the byte order specified by the byte-order mark in the unicode input is different from the byte order specified by the parameter - taking the byte-order specified by the byte-order mark in the unicode input");
1.231 + }
1.232 + aUnicodeByteOrder=byteOrderSpecifiedByByteOrderMark;
1.233 + }
1.234 + }
1.235 + if (aOutputByteOrderMark)
1.236 + {
1.237 + if (_stricmp(aOutputEncoding, "unicode")!=0)
1.238 + {
1.239 + PrintWarning("\"-byteordermark\" is only relevant for unicode output");
1.240 + }
1.241 + else
1.242 + {
1.243 + Assert(aUnicodeByteOrder!=EByteOrderUnspecified, "the byte order must be specified if a byte-order mark is to be added to the unicode output");
1.244 + unsigned short firstUnicodeCharacter=(unsigned short)((aUnicodeByteOrder==EByteOrderNative)? 0xfeff: 0xfffe);
1.245 + WriteToFile((const void*)&firstUnicodeCharacter, sizeof(unsigned short), aOutputFile);
1.246 + }
1.247 + }
1.248 + }
1.249 +
1.250 +void ObeyRequiredByteOrderIfUnicode(TByteOrder& aUnicodeByteOrder, const char* aEncoding, SBuffer& aBuffer)
1.251 + {
1.252 + if (_stricmp(aEncoding, "unicode")==0)
1.253 + {
1.254 + Assert(aBuffer.iNumberOfBytes%sizeof(wchar_t)==0, "internal error (bad number of bytes in unicode buffer)");
1.255 + if (aUnicodeByteOrder==EByteOrderUnspecified)
1.256 + {
1.257 + PrintWarning("the byte order of unicode text is unspecified - defaulting to little endian");
1.258 + aUnicodeByteOrder=EByteOrderLittleEndian;
1.259 + }
1.260 + if (aUnicodeByteOrder==EByteOrderForeign)
1.261 + {
1.262 + for (unsigned char* bytePointer=((unsigned char*)aBuffer.iData)+(aBuffer.iNumberOfBytes-sizeof(wchar_t)); bytePointer>=aBuffer.iData; bytePointer-=sizeof(wchar_t))
1.263 + {
1.264 + unsigned char temp=*bytePointer;
1.265 + *bytePointer=*(bytePointer+1);
1.266 + *(bytePointer+1)=temp;
1.267 + }
1.268 + }
1.269 + }
1.270 + }
1.271 +
1.272 +int OtherToUnicode(const char* aInputEncoding, wchar_t* aUnicode, const char* aOther)
1.273 +// if the output parameter is NULL, it returns the precise size of the would-be output parameter (in terms of number of "wchar_t"s) excluding any trailing '\0', otherwise it returns 0
1.274 + {
1.275 + if (_stricmp(aInputEncoding, "utf8")==0)
1.276 + {
1.277 + return Utf8ToUnicode(aUnicode, aOther);
1.278 + }
1.279 + char localeData[100];
1.280 + strcpy(localeData, ".");
1.281 + strcat(localeData, aInputEncoding);
1.282 + Assert(setlocale(LC_ALL, localeData)!=NULL, "could not convert from encoding \"%s\"", aInputEncoding);
1.283 + return mbstowcs(aUnicode, aOther, KLargeNumber);
1.284 + }
1.285 +
1.286 +int UnicodeToOther(const char* aOutputEncoding, char* aOther, const wchar_t* aUnicode)
1.287 +// if the output parameter is NULL, it returns the precise size of the would-be output parameter (in terms of number of "char"s) excluding any trailing '\0', otherwise it returns 0
1.288 + {
1.289 + if (_stricmp(aOutputEncoding, "utf8")==0)
1.290 + {
1.291 + return UnicodeToUtf8(aOther, aUnicode);
1.292 + }
1.293 + char localeData[100];
1.294 + strcpy(localeData, ".");
1.295 + strcat(localeData, aOutputEncoding);
1.296 + Assert(setlocale(LC_ALL, localeData)!=NULL, "could not convert to encoding \"%s\"", aOutputEncoding);
1.297 + return wcstombs(aOther, aUnicode, KLargeNumber);
1.298 + }
1.299 +
1.300 +void DoConversion(TByteOrder& aUnicodeByteOrder, const char* aInputEncoding, const char* aOutputEncoding, FILE* aInputFile, FILE* aOutputFile)
1.301 + {
1.302 + SBuffer arrayOfBuffers[3];
1.303 + arrayOfBuffers[0].iNumberOfBytes=0;
1.304 + arrayOfBuffers[0].iData=malloc(KLargeNumber+2); // +2 for the 2 '\0' bytes appended to the data read from file
1.305 + Assert(arrayOfBuffers[0].iData!=NULL, "cannot allocate enough memory");
1.306 + arrayOfBuffers[1].iNumberOfBytes=0;
1.307 + arrayOfBuffers[1].iData=NULL;
1.308 + arrayOfBuffers[2].iNumberOfBytes=0;
1.309 + arrayOfBuffers[2].iData=NULL;
1.310 + SBuffer* currentBuffer=arrayOfBuffers;
1.311 + currentBuffer->iNumberOfBytes=ReadFromFileReturningNumberOfBytesRead(currentBuffer->iData, KLargeNumber, aInputFile);
1.312 + // append 2 '\0' bytes at the end of the buffer read from file (2 in case it is unicode)
1.313 + ((char*)currentBuffer->iData)[currentBuffer->iNumberOfBytes]='\0';
1.314 + ((char*)currentBuffer->iData)[currentBuffer->iNumberOfBytes+1]='\0';
1.315 + ObeyRequiredByteOrderIfUnicode(aUnicodeByteOrder, aInputEncoding, *currentBuffer);
1.316 + // if the input and output encodings are different, convert from one to the other (via unicode if neither is itself unicode)
1.317 + if (_stricmp(aInputEncoding, aOutputEncoding)!=0)
1.318 + {
1.319 + if (_stricmp(aInputEncoding, "unicode")!=0)
1.320 + {
1.321 + SBuffer* nextBuffer=currentBuffer+1;
1.322 + nextBuffer->iNumberOfBytes=sizeof(wchar_t)*OtherToUnicode(aInputEncoding, NULL, (const char*)currentBuffer->iData);
1.323 + Assert(nextBuffer->iNumberOfBytes>=0, "invalid multi-byte character encountered");
1.324 + nextBuffer->iData=malloc(nextBuffer->iNumberOfBytes+sizeof(wchar_t)); // "+sizeof(wchar_t)" for terminating '\0'
1.325 + Assert(nextBuffer->iData!=NULL, "cannot allocate enough memory");
1.326 + OtherToUnicode(aInputEncoding, (wchar_t*)nextBuffer->iData, (const char*)currentBuffer->iData);
1.327 + currentBuffer=nextBuffer;
1.328 + }
1.329 + if (_stricmp(aOutputEncoding, "unicode")!=0)
1.330 + {
1.331 + SBuffer* nextBuffer=currentBuffer+1;
1.332 + nextBuffer->iNumberOfBytes=sizeof(char)*UnicodeToOther(aOutputEncoding, NULL, (const wchar_t*)currentBuffer->iData);
1.333 + Assert(nextBuffer->iNumberOfBytes>=0, "unconvertible unicode character encountered");
1.334 + nextBuffer->iData=malloc(nextBuffer->iNumberOfBytes+sizeof(char)); // "+sizeof(char)" for terminating '\0'
1.335 + Assert(nextBuffer->iData!=NULL, "cannot allocate enough memory");
1.336 + UnicodeToOther(aOutputEncoding, (char*)nextBuffer->iData, (const wchar_t*)currentBuffer->iData);
1.337 + currentBuffer=nextBuffer;
1.338 + }
1.339 + }
1.340 + ObeyRequiredByteOrderIfUnicode(aUnicodeByteOrder, aOutputEncoding, *currentBuffer);
1.341 + WriteToFile((const void*)currentBuffer->iData, currentBuffer->iNumberOfBytes, aOutputFile);
1.342 + free(arrayOfBuffers[0].iData);
1.343 + free(arrayOfBuffers[1].iData);
1.344 + free(arrayOfBuffers[2].iData);
1.345 + }
1.346 +
1.347 +void FlushAndCloseFiles(FILE* aInputFile, FILE* aOutputFile)
1.348 + {
1.349 + Assert(fflush(aOutputFile)==0, "flushing output-file failed");
1.350 + if (aInputFile!=stdin)
1.351 + {
1.352 + Assert(fclose(aInputFile)==0, "closing input-file failed");
1.353 + }
1.354 + if (aOutputFile!=stdout)
1.355 + {
1.356 + Assert(fclose(aOutputFile)==0, "closing output-file failed");
1.357 + }
1.358 + }
1.359 +
1.360 +int main(int aArgc, char* aArgv[])
1.361 + {
1.362 + int outputByteOrderMark=0;
1.363 + TByteOrder unicodeByteOrder=EByteOrderUnspecified;
1.364 + const char* inputEncoding=NULL;
1.365 + const char* outputEncoding=NULL;
1.366 + FILE* inputFile=stdin;
1.367 + FILE* outputFile=stdout;
1.368 + ReadParameters(aArgc, aArgv, outputByteOrderMark, unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile);
1.369 +#if defined(__VC32__)
1.370 + _setmode(_fileno(inputFile), _O_BINARY);
1.371 + _setmode(_fileno(outputFile), _O_BINARY);
1.372 +#endif
1.373 + HandleByteOrderMarks(outputByteOrderMark, unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile);
1.374 + DoConversion(unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile);
1.375 + FlushAndCloseFiles(inputFile, outputFile);
1.376 + return 0;
1.377 + }
1.378 +
1.379 +