First public contribution.
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
19 #pragma warning (disable: 4514) // unreferenced inline/local function has been removed
33 #if (defined(__MSVCRT__) || defined(_MSC_VER))
34 //#define _stricmp _stricmp
35 //#define _strnicmp _strnicmp
37 #define _stricmp strcasecmp
38 #define _strnicmp strncasecmp
41 const int KVersionNumber=025;
42 const int KLargeNumber=1000000;
44 extern int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8);
45 extern int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode);
49 EByteOrderUnspecified,
51 EByteOrderLittleEndian,
52 #if defined(BIG_ENDIAN)
53 EByteOrderNative=EByteOrderBigEndian,
54 EByteOrderForeign=EByteOrderLittleEndian
56 EByteOrderNative=EByteOrderLittleEndian,
57 EByteOrderForeign=EByteOrderBigEndian
67 void PrintUsage(const char* aProgramName)
69 fprintf(stderr, "\nVersion %03d\n\nCharacter set conversion tool\nCopyright (c) 1999 Symbian Ltd\n\n", KVersionNumber);
70 fprintf(stderr, "Usage:\n\n\t%s [<options>] <inputspec> <outputspec>\n\nwhere\n\n\t"
71 "options := [-big|-little][-byteordermark]\n\t"
72 "inputspec := -input=<format> [<input_file>]\n\t"
73 "outputspec := -output=<format> [<output_file>]\n\t"
74 "format := unicode|1252|utf8|...\n\n", aProgramName);
75 const char* localeData=setlocale(LC_ALL, "");
76 while (*localeData!='.')
80 fprintf(stderr, "(The default encoding is currently \"%s\")\n\n", localeData+1);
83 void Assert(int aCondition, const char* aErrorMessageFormat, const void* aExtraParameter1=NULL, const void* aExtraParameter2=NULL)
87 char errorMessage[100];
88 sprintf(errorMessage, aErrorMessageFormat, aExtraParameter1, aExtraParameter2);
89 fprintf(stderr, "Error: %s\n", errorMessage);
94 void PrintWarning(const char* aWarningMessage)
96 fprintf(stderr, "Warning: %s\n", aWarningMessage);
99 int TryFileParameter(int aArgc, char* aArgv[], int& aArgIndex, const char* aInputOrOutput, const char*& aEncoding, FILE*& aFile, const char* aFileMode)
103 strcat(prefix, aInputOrOutput);
105 int lengthOfPrefix=strlen(prefix);
106 if (_strnicmp(aArgv[aArgIndex], prefix, lengthOfPrefix)==0)
108 Assert(aEncoding==NULL, "\"%s...\" is specified more than once", prefix);
109 aEncoding=aArgv[aArgIndex]+lengthOfPrefix;
111 if ((aArgIndex>=aArgc) || (aArgv[aArgIndex][0]=='-'))
117 aFile=fopen(aArgv[aArgIndex], aFileMode);
118 Assert(aFile!=NULL, "opening %s-file failed", aInputOrOutput);
125 void ReadParameters(int aArgc, char* aArgv[], int& aOutputByteOrderMark, TByteOrder& aUnicodeByteOrder, const char*& aInputEncoding, const char*& aOutputEncoding, FILE*& aInputFile, FILE*& aOutputFile)
127 if ((aArgc<=1) || (_stricmp(aArgv[1], "?")==0) || (_stricmp(aArgv[1], "/?")==0))
129 PrintUsage(aArgv[0]);
132 for (int i=1; i<aArgc; ++i) // start at index 1 to avoid the program name (which is the first parameter)
134 if (_stricmp(aArgv[i], "-byteordermark")==0)
136 Assert(!aOutputByteOrderMark, "\"-byteordermark\" is specified more than once");
137 aOutputByteOrderMark=1;
139 else if (_stricmp(aArgv[i], "-big")==0)
141 Assert(aUnicodeByteOrder==EByteOrderUnspecified, "the byte order of unicode text (i.e. \"-big\"/\"-little\") is specified more than once");
142 aUnicodeByteOrder=EByteOrderBigEndian;
144 else if (_stricmp(aArgv[i], "-little")==0)
146 Assert(aUnicodeByteOrder==EByteOrderUnspecified, "the byte order of unicode text (i.e. \"-big\"/\"-little\") is specified more than once");
147 aUnicodeByteOrder=EByteOrderLittleEndian;
151 Assert(TryFileParameter(aArgc, aArgv, i, "input", aInputEncoding, aInputFile, "r") ||
152 TryFileParameter(aArgc, aArgv, i, "output", aOutputEncoding, aOutputFile, "w"), "bad parameter \"%s\"", aArgv[i]);
155 Assert(aInputEncoding!=NULL, "no input encoding is specified");
156 Assert(aOutputEncoding!=NULL, "no output encoding is specified");
159 int ReadFromFileReturningNumberOfBytesRead(void* aBuffer, int aNumberOfBytesToRead, FILE* aInputFile)
161 int numberOfBytesRead=0;
162 int numberOfBytesToReadThisTime=aNumberOfBytesToRead;
167 const int remainingNumberOfBytesToRead=aNumberOfBytesToRead-numberOfBytesRead;
168 if (numberOfBytesToReadThisTime>remainingNumberOfBytesToRead)
170 numberOfBytesToReadThisTime=remainingNumberOfBytesToRead;
172 const int numberOfBytesReadThisTime=fread(aBuffer, 1, numberOfBytesToReadThisTime, aInputFile);
173 const int error=ferror(aInputFile);
176 aBuffer=((unsigned char*)aBuffer)+numberOfBytesReadThisTime;
177 numberOfBytesRead+=numberOfBytesReadThisTime;
178 Assert(numberOfBytesRead<=aNumberOfBytesToRead, "internal error (read too many bytes)");
179 if ((numberOfBytesRead>=aNumberOfBytesToRead) || feof(aInputFile))
181 return numberOfBytesRead;
185 numberOfBytesToReadThisTime/=2;
186 Assert(numberOfBytesToReadThisTime>0, "reading from file failed with error number %d", (const void*)error);
187 clearerr(aInputFile);
192 void WriteToFile(const void* aBuffer, int aNumberOfBytesToWrite, FILE* aOutputFile)
194 const int numberOfBytesWritten=fwrite(aBuffer, 1, aNumberOfBytesToWrite, aOutputFile);
195 Assert(numberOfBytesWritten==aNumberOfBytesToWrite, "only %d out of %d bytes could be written to file", (const void*)numberOfBytesWritten, (const void*)aNumberOfBytesToWrite);
196 const int error=ferror(aOutputFile);
197 Assert(error==0, "writing to file failed with error number %d", (const void*)error);
200 void HandleByteOrderMarks(int aOutputByteOrderMark, TByteOrder& aUnicodeByteOrder, const char* aInputEncoding, const char* aOutputEncoding, FILE* aInputFile, FILE* aOutputFile)
202 if (_stricmp(aInputEncoding, "unicode")==0)
204 unsigned short firstUnicodeCharacter=0;
205 const int numberOfBytesRead=ReadFromFileReturningNumberOfBytesRead((void*)&firstUnicodeCharacter, sizeof(unsigned short), aInputFile);
206 TByteOrder byteOrderSpecifiedByByteOrderMark=EByteOrderUnspecified;
207 if (numberOfBytesRead==sizeof(unsigned short))
209 switch (firstUnicodeCharacter)
212 byteOrderSpecifiedByByteOrderMark=EByteOrderNative;
215 byteOrderSpecifiedByByteOrderMark=EByteOrderForeign;
218 const int error=fseek(aInputFile, 0, SEEK_SET); // rewind to the start of the file
219 Assert(error==0, "could not rewind to the start of the input file");
223 if (byteOrderSpecifiedByByteOrderMark!=EByteOrderUnspecified)
225 if ((aUnicodeByteOrder!=EByteOrderUnspecified) && (byteOrderSpecifiedByByteOrderMark!=aUnicodeByteOrder))
227 PrintWarning("the byte order specified by the byte-order mark in the unicode input is different from the byte order specified by the parameter - taking the byte-order specified by the byte-order mark in the unicode input");
229 aUnicodeByteOrder=byteOrderSpecifiedByByteOrderMark;
232 if (aOutputByteOrderMark)
234 if (_stricmp(aOutputEncoding, "unicode")!=0)
236 PrintWarning("\"-byteordermark\" is only relevant for unicode output");
240 Assert(aUnicodeByteOrder!=EByteOrderUnspecified, "the byte order must be specified if a byte-order mark is to be added to the unicode output");
241 unsigned short firstUnicodeCharacter=(unsigned short)((aUnicodeByteOrder==EByteOrderNative)? 0xfeff: 0xfffe);
242 WriteToFile((const void*)&firstUnicodeCharacter, sizeof(unsigned short), aOutputFile);
247 void ObeyRequiredByteOrderIfUnicode(TByteOrder& aUnicodeByteOrder, const char* aEncoding, SBuffer& aBuffer)
249 if (_stricmp(aEncoding, "unicode")==0)
251 Assert(aBuffer.iNumberOfBytes%sizeof(wchar_t)==0, "internal error (bad number of bytes in unicode buffer)");
252 if (aUnicodeByteOrder==EByteOrderUnspecified)
254 PrintWarning("the byte order of unicode text is unspecified - defaulting to little endian");
255 aUnicodeByteOrder=EByteOrderLittleEndian;
257 if (aUnicodeByteOrder==EByteOrderForeign)
259 for (unsigned char* bytePointer=((unsigned char*)aBuffer.iData)+(aBuffer.iNumberOfBytes-sizeof(wchar_t)); bytePointer>=aBuffer.iData; bytePointer-=sizeof(wchar_t))
261 unsigned char temp=*bytePointer;
262 *bytePointer=*(bytePointer+1);
263 *(bytePointer+1)=temp;
269 int OtherToUnicode(const char* aInputEncoding, wchar_t* aUnicode, const char* aOther)
270 // if the output parameter is NULL, it returns the precise size of the would-be output parameter (in terms of number of "wchar_t"s) excluding any trailing '\0', otherwise it returns 0
272 if (_stricmp(aInputEncoding, "utf8")==0)
274 return Utf8ToUnicode(aUnicode, aOther);
276 char localeData[100];
277 strcpy(localeData, ".");
278 strcat(localeData, aInputEncoding);
279 Assert(setlocale(LC_ALL, localeData)!=NULL, "could not convert from encoding \"%s\"", aInputEncoding);
280 return mbstowcs(aUnicode, aOther, KLargeNumber);
283 int UnicodeToOther(const char* aOutputEncoding, char* aOther, const wchar_t* aUnicode)
284 // if the output parameter is NULL, it returns the precise size of the would-be output parameter (in terms of number of "char"s) excluding any trailing '\0', otherwise it returns 0
286 if (_stricmp(aOutputEncoding, "utf8")==0)
288 return UnicodeToUtf8(aOther, aUnicode);
290 char localeData[100];
291 strcpy(localeData, ".");
292 strcat(localeData, aOutputEncoding);
293 Assert(setlocale(LC_ALL, localeData)!=NULL, "could not convert to encoding \"%s\"", aOutputEncoding);
294 return wcstombs(aOther, aUnicode, KLargeNumber);
297 void DoConversion(TByteOrder& aUnicodeByteOrder, const char* aInputEncoding, const char* aOutputEncoding, FILE* aInputFile, FILE* aOutputFile)
299 SBuffer arrayOfBuffers[3];
300 arrayOfBuffers[0].iNumberOfBytes=0;
301 arrayOfBuffers[0].iData=malloc(KLargeNumber+2); // +2 for the 2 '\0' bytes appended to the data read from file
302 Assert(arrayOfBuffers[0].iData!=NULL, "cannot allocate enough memory");
303 arrayOfBuffers[1].iNumberOfBytes=0;
304 arrayOfBuffers[1].iData=NULL;
305 arrayOfBuffers[2].iNumberOfBytes=0;
306 arrayOfBuffers[2].iData=NULL;
307 SBuffer* currentBuffer=arrayOfBuffers;
308 currentBuffer->iNumberOfBytes=ReadFromFileReturningNumberOfBytesRead(currentBuffer->iData, KLargeNumber, aInputFile);
309 // append 2 '\0' bytes at the end of the buffer read from file (2 in case it is unicode)
310 ((char*)currentBuffer->iData)[currentBuffer->iNumberOfBytes]='\0';
311 ((char*)currentBuffer->iData)[currentBuffer->iNumberOfBytes+1]='\0';
312 ObeyRequiredByteOrderIfUnicode(aUnicodeByteOrder, aInputEncoding, *currentBuffer);
313 // if the input and output encodings are different, convert from one to the other (via unicode if neither is itself unicode)
314 if (_stricmp(aInputEncoding, aOutputEncoding)!=0)
316 if (_stricmp(aInputEncoding, "unicode")!=0)
318 SBuffer* nextBuffer=currentBuffer+1;
319 nextBuffer->iNumberOfBytes=sizeof(wchar_t)*OtherToUnicode(aInputEncoding, NULL, (const char*)currentBuffer->iData);
320 Assert(nextBuffer->iNumberOfBytes>=0, "invalid multi-byte character encountered");
321 nextBuffer->iData=malloc(nextBuffer->iNumberOfBytes+sizeof(wchar_t)); // "+sizeof(wchar_t)" for terminating '\0'
322 Assert(nextBuffer->iData!=NULL, "cannot allocate enough memory");
323 OtherToUnicode(aInputEncoding, (wchar_t*)nextBuffer->iData, (const char*)currentBuffer->iData);
324 currentBuffer=nextBuffer;
326 if (_stricmp(aOutputEncoding, "unicode")!=0)
328 SBuffer* nextBuffer=currentBuffer+1;
329 nextBuffer->iNumberOfBytes=sizeof(char)*UnicodeToOther(aOutputEncoding, NULL, (const wchar_t*)currentBuffer->iData);
330 Assert(nextBuffer->iNumberOfBytes>=0, "unconvertible unicode character encountered");
331 nextBuffer->iData=malloc(nextBuffer->iNumberOfBytes+sizeof(char)); // "+sizeof(char)" for terminating '\0'
332 Assert(nextBuffer->iData!=NULL, "cannot allocate enough memory");
333 UnicodeToOther(aOutputEncoding, (char*)nextBuffer->iData, (const wchar_t*)currentBuffer->iData);
334 currentBuffer=nextBuffer;
337 ObeyRequiredByteOrderIfUnicode(aUnicodeByteOrder, aOutputEncoding, *currentBuffer);
338 WriteToFile((const void*)currentBuffer->iData, currentBuffer->iNumberOfBytes, aOutputFile);
339 free(arrayOfBuffers[0].iData);
340 free(arrayOfBuffers[1].iData);
341 free(arrayOfBuffers[2].iData);
344 void FlushAndCloseFiles(FILE* aInputFile, FILE* aOutputFile)
346 Assert(fflush(aOutputFile)==0, "flushing output-file failed");
347 if (aInputFile!=stdin)
349 Assert(fclose(aInputFile)==0, "closing input-file failed");
351 if (aOutputFile!=stdout)
353 Assert(fclose(aOutputFile)==0, "closing output-file failed");
357 int main(int aArgc, char* aArgv[])
359 int outputByteOrderMark=0;
360 TByteOrder unicodeByteOrder=EByteOrderUnspecified;
361 const char* inputEncoding=NULL;
362 const char* outputEncoding=NULL;
363 FILE* inputFile=stdin;
364 FILE* outputFile=stdout;
365 ReadParameters(aArgc, aArgv, outputByteOrderMark, unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile);
366 #if defined(__VC32__)
367 _setmode(_fileno(inputFile), _O_BINARY);
368 _setmode(_fileno(outputFile), _O_BINARY);
370 HandleByteOrderMarks(outputByteOrderMark, unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile);
371 DoConversion(unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile);
372 FlushAndCloseFiles(inputFile, outputFile);