sl@0: /* sl@0: * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #pragma warning (disable: 4514) // unreferenced inline/local function has been removed sl@0: sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #if defined(__VC32__) sl@0: #include sl@0: #include sl@0: #endif sl@0: sl@0: #undef BIG_ENDIAN sl@0: sl@0: #if (defined(__MSVCRT__) || defined(_MSC_VER)) sl@0: //#define _stricmp _stricmp sl@0: //#define _strnicmp _strnicmp sl@0: #else // linux sl@0: #define _stricmp strcasecmp sl@0: #define _strnicmp strncasecmp sl@0: #endif sl@0: sl@0: const int KVersionNumber=025; sl@0: const int KLargeNumber=1000000; sl@0: sl@0: extern int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8); sl@0: extern int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode); sl@0: sl@0: enum TByteOrder sl@0: { sl@0: EByteOrderUnspecified, sl@0: EByteOrderBigEndian, sl@0: EByteOrderLittleEndian, sl@0: #if defined(BIG_ENDIAN) sl@0: EByteOrderNative=EByteOrderBigEndian, sl@0: EByteOrderForeign=EByteOrderLittleEndian sl@0: #else sl@0: EByteOrderNative=EByteOrderLittleEndian, sl@0: EByteOrderForeign=EByteOrderBigEndian sl@0: #endif sl@0: }; sl@0: sl@0: struct SBuffer sl@0: { sl@0: int iNumberOfBytes; sl@0: void* iData; sl@0: }; sl@0: sl@0: void PrintUsage(const char* aProgramName) sl@0: { sl@0: fprintf(stderr, "\nVersion %03d\n\nCharacter set conversion tool\nCopyright (c) 1999 Symbian Ltd\n\n", KVersionNumber); sl@0: fprintf(stderr, "Usage:\n\n\t%s [] \n\nwhere\n\n\t" sl@0: "options := [-big|-little][-byteordermark]\n\t" sl@0: "inputspec := -input= []\n\t" sl@0: "outputspec := -output= []\n\t" sl@0: "format := unicode|1252|utf8|...\n\n", aProgramName); sl@0: const char* localeData=setlocale(LC_ALL, ""); sl@0: while (*localeData!='.') sl@0: { sl@0: ++localeData; sl@0: } sl@0: fprintf(stderr, "(The default encoding is currently \"%s\")\n\n", localeData+1); sl@0: } sl@0: sl@0: void Assert(int aCondition, const char* aErrorMessageFormat, const void* aExtraParameter1=NULL, const void* aExtraParameter2=NULL) sl@0: { sl@0: if (!aCondition) sl@0: { sl@0: char errorMessage[100]; sl@0: sprintf(errorMessage, aErrorMessageFormat, aExtraParameter1, aExtraParameter2); sl@0: fprintf(stderr, "Error: %s\n", errorMessage); sl@0: exit(1); sl@0: } sl@0: } sl@0: sl@0: void PrintWarning(const char* aWarningMessage) sl@0: { sl@0: fprintf(stderr, "Warning: %s\n", aWarningMessage); sl@0: } sl@0: sl@0: int TryFileParameter(int aArgc, char* aArgv[], int& aArgIndex, const char* aInputOrOutput, const char*& aEncoding, FILE*& aFile, const char* aFileMode) sl@0: { sl@0: char prefix[100]; sl@0: strcpy(prefix, "-"); sl@0: strcat(prefix, aInputOrOutput); sl@0: strcat(prefix, "="); sl@0: int lengthOfPrefix=strlen(prefix); sl@0: if (_strnicmp(aArgv[aArgIndex], prefix, lengthOfPrefix)==0) sl@0: { sl@0: Assert(aEncoding==NULL, "\"%s...\" is specified more than once", prefix); sl@0: aEncoding=aArgv[aArgIndex]+lengthOfPrefix; sl@0: ++aArgIndex; sl@0: if ((aArgIndex>=aArgc) || (aArgv[aArgIndex][0]=='-')) sl@0: { sl@0: --aArgIndex; sl@0: } sl@0: else sl@0: { sl@0: aFile=fopen(aArgv[aArgIndex], aFileMode); sl@0: Assert(aFile!=NULL, "opening %s-file failed", aInputOrOutput); sl@0: } sl@0: return 1; sl@0: } sl@0: return 0; sl@0: } sl@0: sl@0: void ReadParameters(int aArgc, char* aArgv[], int& aOutputByteOrderMark, TByteOrder& aUnicodeByteOrder, const char*& aInputEncoding, const char*& aOutputEncoding, FILE*& aInputFile, FILE*& aOutputFile) sl@0: { sl@0: if ((aArgc<=1) || (_stricmp(aArgv[1], "?")==0) || (_stricmp(aArgv[1], "/?")==0)) sl@0: { sl@0: PrintUsage(aArgv[0]); sl@0: exit(0); sl@0: } sl@0: for (int i=1; iremainingNumberOfBytesToRead) sl@0: { sl@0: numberOfBytesToReadThisTime=remainingNumberOfBytesToRead; sl@0: } sl@0: const int numberOfBytesReadThisTime=fread(aBuffer, 1, numberOfBytesToReadThisTime, aInputFile); sl@0: const int error=ferror(aInputFile); sl@0: if (error==0) sl@0: { sl@0: aBuffer=((unsigned char*)aBuffer)+numberOfBytesReadThisTime; sl@0: numberOfBytesRead+=numberOfBytesReadThisTime; sl@0: Assert(numberOfBytesRead<=aNumberOfBytesToRead, "internal error (read too many bytes)"); sl@0: if ((numberOfBytesRead>=aNumberOfBytesToRead) || feof(aInputFile)) sl@0: { sl@0: return numberOfBytesRead; sl@0: } sl@0: break; sl@0: } sl@0: numberOfBytesToReadThisTime/=2; sl@0: Assert(numberOfBytesToReadThisTime>0, "reading from file failed with error number %d", (const void*)error); sl@0: clearerr(aInputFile); sl@0: } sl@0: } sl@0: } sl@0: sl@0: void WriteToFile(const void* aBuffer, int aNumberOfBytesToWrite, FILE* aOutputFile) sl@0: { sl@0: const int numberOfBytesWritten=fwrite(aBuffer, 1, aNumberOfBytesToWrite, aOutputFile); sl@0: Assert(numberOfBytesWritten==aNumberOfBytesToWrite, "only %d out of %d bytes could be written to file", (const void*)numberOfBytesWritten, (const void*)aNumberOfBytesToWrite); sl@0: const int error=ferror(aOutputFile); sl@0: Assert(error==0, "writing to file failed with error number %d", (const void*)error); sl@0: } sl@0: sl@0: void HandleByteOrderMarks(int aOutputByteOrderMark, TByteOrder& aUnicodeByteOrder, const char* aInputEncoding, const char* aOutputEncoding, FILE* aInputFile, FILE* aOutputFile) sl@0: { sl@0: if (_stricmp(aInputEncoding, "unicode")==0) sl@0: { sl@0: unsigned short firstUnicodeCharacter=0; sl@0: const int numberOfBytesRead=ReadFromFileReturningNumberOfBytesRead((void*)&firstUnicodeCharacter, sizeof(unsigned short), aInputFile); sl@0: TByteOrder byteOrderSpecifiedByByteOrderMark=EByteOrderUnspecified; sl@0: if (numberOfBytesRead==sizeof(unsigned short)) sl@0: { sl@0: switch (firstUnicodeCharacter) sl@0: { sl@0: case 0xfeff: sl@0: byteOrderSpecifiedByByteOrderMark=EByteOrderNative; sl@0: break; sl@0: case 0xfffe: sl@0: byteOrderSpecifiedByByteOrderMark=EByteOrderForeign; sl@0: break; sl@0: default: sl@0: const int error=fseek(aInputFile, 0, SEEK_SET); // rewind to the start of the file sl@0: Assert(error==0, "could not rewind to the start of the input file"); sl@0: break; sl@0: } sl@0: } sl@0: if (byteOrderSpecifiedByByteOrderMark!=EByteOrderUnspecified) sl@0: { sl@0: if ((aUnicodeByteOrder!=EByteOrderUnspecified) && (byteOrderSpecifiedByByteOrderMark!=aUnicodeByteOrder)) sl@0: { sl@0: PrintWarning("the byte order specified by the byte-order mark in the unicode input is different from the byte order specified by the parameter - taking the byte-order specified by the byte-order mark in the unicode input"); sl@0: } sl@0: aUnicodeByteOrder=byteOrderSpecifiedByByteOrderMark; sl@0: } sl@0: } sl@0: if (aOutputByteOrderMark) sl@0: { sl@0: if (_stricmp(aOutputEncoding, "unicode")!=0) sl@0: { sl@0: PrintWarning("\"-byteordermark\" is only relevant for unicode output"); sl@0: } sl@0: else sl@0: { sl@0: Assert(aUnicodeByteOrder!=EByteOrderUnspecified, "the byte order must be specified if a byte-order mark is to be added to the unicode output"); sl@0: unsigned short firstUnicodeCharacter=(unsigned short)((aUnicodeByteOrder==EByteOrderNative)? 0xfeff: 0xfffe); sl@0: WriteToFile((const void*)&firstUnicodeCharacter, sizeof(unsigned short), aOutputFile); sl@0: } sl@0: } sl@0: } sl@0: sl@0: void ObeyRequiredByteOrderIfUnicode(TByteOrder& aUnicodeByteOrder, const char* aEncoding, SBuffer& aBuffer) sl@0: { sl@0: if (_stricmp(aEncoding, "unicode")==0) sl@0: { sl@0: Assert(aBuffer.iNumberOfBytes%sizeof(wchar_t)==0, "internal error (bad number of bytes in unicode buffer)"); sl@0: if (aUnicodeByteOrder==EByteOrderUnspecified) sl@0: { sl@0: PrintWarning("the byte order of unicode text is unspecified - defaulting to little endian"); sl@0: aUnicodeByteOrder=EByteOrderLittleEndian; sl@0: } sl@0: if (aUnicodeByteOrder==EByteOrderForeign) sl@0: { sl@0: for (unsigned char* bytePointer=((unsigned char*)aBuffer.iData)+(aBuffer.iNumberOfBytes-sizeof(wchar_t)); bytePointer>=aBuffer.iData; bytePointer-=sizeof(wchar_t)) sl@0: { sl@0: unsigned char temp=*bytePointer; sl@0: *bytePointer=*(bytePointer+1); sl@0: *(bytePointer+1)=temp; sl@0: } sl@0: } sl@0: } sl@0: } sl@0: sl@0: int OtherToUnicode(const char* aInputEncoding, wchar_t* aUnicode, const char* aOther) sl@0: // if the output parameter is NULL, it returns the precise size of the would-be output parameter (in terms of number of "wchar_t"s) excluding any trailing '\0', otherwise it returns 0 sl@0: { sl@0: if (_stricmp(aInputEncoding, "utf8")==0) sl@0: { sl@0: return Utf8ToUnicode(aUnicode, aOther); sl@0: } sl@0: char localeData[100]; sl@0: strcpy(localeData, "."); sl@0: strcat(localeData, aInputEncoding); sl@0: Assert(setlocale(LC_ALL, localeData)!=NULL, "could not convert from encoding \"%s\"", aInputEncoding); sl@0: return mbstowcs(aUnicode, aOther, KLargeNumber); sl@0: } sl@0: sl@0: int UnicodeToOther(const char* aOutputEncoding, char* aOther, const wchar_t* aUnicode) sl@0: // if the output parameter is NULL, it returns the precise size of the would-be output parameter (in terms of number of "char"s) excluding any trailing '\0', otherwise it returns 0 sl@0: { sl@0: if (_stricmp(aOutputEncoding, "utf8")==0) sl@0: { sl@0: return UnicodeToUtf8(aOther, aUnicode); sl@0: } sl@0: char localeData[100]; sl@0: strcpy(localeData, "."); sl@0: strcat(localeData, aOutputEncoding); sl@0: Assert(setlocale(LC_ALL, localeData)!=NULL, "could not convert to encoding \"%s\"", aOutputEncoding); sl@0: return wcstombs(aOther, aUnicode, KLargeNumber); sl@0: } sl@0: sl@0: void DoConversion(TByteOrder& aUnicodeByteOrder, const char* aInputEncoding, const char* aOutputEncoding, FILE* aInputFile, FILE* aOutputFile) sl@0: { sl@0: SBuffer arrayOfBuffers[3]; sl@0: arrayOfBuffers[0].iNumberOfBytes=0; sl@0: arrayOfBuffers[0].iData=malloc(KLargeNumber+2); // +2 for the 2 '\0' bytes appended to the data read from file sl@0: Assert(arrayOfBuffers[0].iData!=NULL, "cannot allocate enough memory"); sl@0: arrayOfBuffers[1].iNumberOfBytes=0; sl@0: arrayOfBuffers[1].iData=NULL; sl@0: arrayOfBuffers[2].iNumberOfBytes=0; sl@0: arrayOfBuffers[2].iData=NULL; sl@0: SBuffer* currentBuffer=arrayOfBuffers; sl@0: currentBuffer->iNumberOfBytes=ReadFromFileReturningNumberOfBytesRead(currentBuffer->iData, KLargeNumber, aInputFile); sl@0: // append 2 '\0' bytes at the end of the buffer read from file (2 in case it is unicode) sl@0: ((char*)currentBuffer->iData)[currentBuffer->iNumberOfBytes]='\0'; sl@0: ((char*)currentBuffer->iData)[currentBuffer->iNumberOfBytes+1]='\0'; sl@0: ObeyRequiredByteOrderIfUnicode(aUnicodeByteOrder, aInputEncoding, *currentBuffer); sl@0: // if the input and output encodings are different, convert from one to the other (via unicode if neither is itself unicode) sl@0: if (_stricmp(aInputEncoding, aOutputEncoding)!=0) sl@0: { sl@0: if (_stricmp(aInputEncoding, "unicode")!=0) sl@0: { sl@0: SBuffer* nextBuffer=currentBuffer+1; sl@0: nextBuffer->iNumberOfBytes=sizeof(wchar_t)*OtherToUnicode(aInputEncoding, NULL, (const char*)currentBuffer->iData); sl@0: Assert(nextBuffer->iNumberOfBytes>=0, "invalid multi-byte character encountered"); sl@0: nextBuffer->iData=malloc(nextBuffer->iNumberOfBytes+sizeof(wchar_t)); // "+sizeof(wchar_t)" for terminating '\0' sl@0: Assert(nextBuffer->iData!=NULL, "cannot allocate enough memory"); sl@0: OtherToUnicode(aInputEncoding, (wchar_t*)nextBuffer->iData, (const char*)currentBuffer->iData); sl@0: currentBuffer=nextBuffer; sl@0: } sl@0: if (_stricmp(aOutputEncoding, "unicode")!=0) sl@0: { sl@0: SBuffer* nextBuffer=currentBuffer+1; sl@0: nextBuffer->iNumberOfBytes=sizeof(char)*UnicodeToOther(aOutputEncoding, NULL, (const wchar_t*)currentBuffer->iData); sl@0: Assert(nextBuffer->iNumberOfBytes>=0, "unconvertible unicode character encountered"); sl@0: nextBuffer->iData=malloc(nextBuffer->iNumberOfBytes+sizeof(char)); // "+sizeof(char)" for terminating '\0' sl@0: Assert(nextBuffer->iData!=NULL, "cannot allocate enough memory"); sl@0: UnicodeToOther(aOutputEncoding, (char*)nextBuffer->iData, (const wchar_t*)currentBuffer->iData); sl@0: currentBuffer=nextBuffer; sl@0: } sl@0: } sl@0: ObeyRequiredByteOrderIfUnicode(aUnicodeByteOrder, aOutputEncoding, *currentBuffer); sl@0: WriteToFile((const void*)currentBuffer->iData, currentBuffer->iNumberOfBytes, aOutputFile); sl@0: free(arrayOfBuffers[0].iData); sl@0: free(arrayOfBuffers[1].iData); sl@0: free(arrayOfBuffers[2].iData); sl@0: } sl@0: sl@0: void FlushAndCloseFiles(FILE* aInputFile, FILE* aOutputFile) sl@0: { sl@0: Assert(fflush(aOutputFile)==0, "flushing output-file failed"); sl@0: if (aInputFile!=stdin) sl@0: { sl@0: Assert(fclose(aInputFile)==0, "closing input-file failed"); sl@0: } sl@0: if (aOutputFile!=stdout) sl@0: { sl@0: Assert(fclose(aOutputFile)==0, "closing output-file failed"); sl@0: } sl@0: } sl@0: sl@0: int main(int aArgc, char* aArgv[]) sl@0: { sl@0: int outputByteOrderMark=0; sl@0: TByteOrder unicodeByteOrder=EByteOrderUnspecified; sl@0: const char* inputEncoding=NULL; sl@0: const char* outputEncoding=NULL; sl@0: FILE* inputFile=stdin; sl@0: FILE* outputFile=stdout; sl@0: ReadParameters(aArgc, aArgv, outputByteOrderMark, unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile); sl@0: #if defined(__VC32__) sl@0: _setmode(_fileno(inputFile), _O_BINARY); sl@0: _setmode(_fileno(outputFile), _O_BINARY); sl@0: #endif sl@0: HandleByteOrderMarks(outputByteOrderMark, unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile); sl@0: DoConversion(unicodeByteOrder, inputEncoding, outputEncoding, inputFile, outputFile); sl@0: FlushAndCloseFiles(inputFile, outputFile); sl@0: return 0; sl@0: } sl@0: sl@0: