sl@0: /* sl@0: * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: GB18030 converter implementation sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include "gb2312.h" sl@0: #include "gbk.h" sl@0: #include "gb18030_4byte.h" sl@0: #include "gb18030_diff_gbk.h" sl@0: #include sl@0: #include sl@0: sl@0: class CGB18030ConverterImpl : public CCharacterSetConverterPluginInterface sl@0: { sl@0: sl@0: public: sl@0: virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); sl@0: sl@0: virtual TInt ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); sl@0: sl@0: virtual TInt ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: sl@0: virtual TBool IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample); sl@0: sl@0: static CGB18030ConverterImpl* NewL(); sl@0: virtual ~CGB18030ConverterImpl(); sl@0: sl@0: private: sl@0: CGB18030ConverterImpl(); sl@0: TInt ConstructL(); sl@0: SCnvConversionData * completeGb18030_2byteConversionData; // a merged conversion data including Gb18030-diff-g2312, GB2312, Gb18030-diff-gbk and Gbk sl@0: TUint8 * workingMemory; sl@0: }; sl@0: sl@0: // Implement gb18030 plug-in using cnvutils framework in which gb2312 and gbk conversion data is re-used for memory saving sl@0: // 1) foreign->unicode: sl@0: // 1.1) 1 byte->unicode bmp: use gb2312 mapping table; sl@0: // sl@0: // 1.2) 2 byte->unicode bmp: use gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData); sl@0: // sl@0: // 1.3) 4 byte->unicode bmp: use gb18030-4byte mapping table; sl@0: // sl@0: // 1.4) 4 byte->unicode non-bmp: calculate with formula. sl@0: // sl@0: // 2) unicode->foreign: sl@0: // 2.1) firstly check gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData); sl@0: // sl@0: // 2.2) if not found in 2.1), check gb18030-4byte mapping table; sl@0: // sl@0: // 2.3) if not found in 2.2), calculate with formula (gb18030-4byte non BMP); sl@0: // sl@0: sl@0: sl@0: // GB18030-diff-gb2312 defines 1 foreign-to-Unicode range and 2 unicode-to-Foreign range sl@0: // GB2312.CTL defines 21 foreign-to-Unicode ranges and 21 Unicode-to-foreign ranges sl@0: // GB18030-diff-gbk defines 1 foreign-to-Unicode ranges and 2 Unicode-to-foreign range sl@0: // GBK.CTL defines 2 foreign-to-Unicode ranges and 2 Unicode-to-foreign range sl@0: const TInt KNumberOfBytesOfWorkingMemory=(1+2+21+21+1+2+2+2)*sizeof(SCnvConversionData::SOneDirectionData::SRange); //totally 1040 bytes sl@0: sl@0: const TDesC8& CGB18030ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() sl@0: { sl@0: return CnvGb18030_diff_gbk::ReplacementForUnconvertibleUnicodeCharacters(); sl@0: } sl@0: sl@0: _LIT(KLitPanicText, "GB18030"); sl@0: enum TPanic sl@0: { sl@0: EPanicNothingToConvert1=1, sl@0: EPanicNothingToConvert2, sl@0: EPanicNothingToConvert3, sl@0: EPanicNothingToConvert4, sl@0: EPanicNothingToConvert5, sl@0: EPanicNothingToConvert6, sl@0: EPanicOddNumberOfBytes1, sl@0: EPanicOddNumberOfBytes2, sl@0: EPanicOddNumberOfBytes3, sl@0: EPanicOddNumberOfBytes4, sl@0: EPanicOddNumberOfBytes5, sl@0: EPanicOddNumberOfBytes6, sl@0: EPanicBadHighBit1, sl@0: EPanicBadHighBit2, sl@0: EPanicBadHighBit3, sl@0: EPanicBadHighBit4, sl@0: EPanicBadHighBit5, sl@0: EPanicBadHighBit6, sl@0: EPanicBadHighBit7, sl@0: EPanicBadPointers1, sl@0: EPanicBadPointers2, sl@0: EPanicBadPointers3, sl@0: EPanicBadPointers4, sl@0: EPanicBadPointers5, sl@0: EPanicBadPointers6, sl@0: EPanicBadPointers7, sl@0: EPanicBadPointers8, sl@0: EPanicBadPointers9, sl@0: EPanicBadPointers10, sl@0: EPanicBadPointers11, sl@0: EPanicBadPointers12, sl@0: EPanicBadPointers13, sl@0: EPanicBadPointers14, sl@0: EPanicBadPointers15, sl@0: EPanicBadPointers16, sl@0: EPanicBadPointers17, sl@0: EPanicBadPointers18, sl@0: EPanicBadPointers19, sl@0: EPanicBadPointers20, sl@0: EPanicBadPointers21, sl@0: EPanicBadPointers22, sl@0: EPanicBadPointers23, sl@0: EPanicBadPointers24, sl@0: EPanicBadPointers25, sl@0: EPanicBadPointers26, sl@0: EPanicBadPointers27, sl@0: EPanicBadPointers28, sl@0: EPanicBadPointers29, sl@0: EPanicBadPointers30, sl@0: EPanicBadPointers31, sl@0: EPanicBadPointers32, sl@0: EPanicBadPointers33, sl@0: EPanicBadPointers34, sl@0: EPanicBadPointers35, sl@0: EPanicBadPointers36, sl@0: EPanicBadCalculation1, sl@0: EPanicBadCalculation2, sl@0: EPanicNumberOfBytesIsNotMultipleOfThree1, sl@0: EPanicNumberOfBytesIsNotMultipleOfThree2, sl@0: EPanicSingleShift2Expected, sl@0: EPanicSingleShift3Expected, sl@0: EPanicTooManyBytesOfWorkingMemoryUsed1, sl@0: EPanicTooManyBytesOfWorkingMemoryUsed2 sl@0: }; sl@0: sl@0: LOCAL_C void Panic(TPanic aPanic) sl@0: { sl@0: User::Panic(KLitPanicText, aPanic); sl@0: } sl@0: sl@0: #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0])) sl@0: sl@0: LOCAL_C void Step12DummyConvertFromIntermediateBufferInPlace(TInt, TDes8&, TInt& aNumberOfCharactersThatDroppedOut) sl@0: { sl@0: aNumberOfCharactersThatDroppedOut=0; sl@0: } sl@0: sl@0: // Perform the actual conversion (unicode -> gb18030 4byte non-BMP) using formula in this function sl@0: LOCAL_C void Step3ConvertFromIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut) sl@0: { sl@0: aNumberOfCharactersThatDroppedOut = 0; // no drop out, because all GB18030 outside BMP are exactly 4-bytes sl@0: sl@0: const TInt descriptorLength=aDescriptor.Length(); sl@0: TUint8* pVeryFrom = CONST_CAST(TUint8*, aDescriptor.Ptr()); sl@0: const TUint8* pEnd = pVeryFrom + descriptorLength; sl@0: TUint8* pFrom = pVeryFrom + aStartPositionInDescriptor; sl@0: FOREVER sl@0: { sl@0: if (pFrom + 4 > pEnd) sl@0: { sl@0: __ASSERT_DEBUG(pFrom==pEnd, Panic(EPanicBadPointers25)); sl@0: break; sl@0: } sl@0: TUint characterCode = 0; sl@0: for (TInt i=0; i<4; i++) sl@0: { sl@0: characterCode <<= 8; sl@0: characterCode += pFrom[i]; sl@0: } sl@0: sl@0: // to gb18030 sl@0: characterCode -= 0x10000; sl@0: TUint b4 = characterCode % 10 + 0x30; sl@0: characterCode /= 10; sl@0: TUint b3 = characterCode % 126 + 0x81; sl@0: characterCode /= 126; sl@0: TUint b2 = characterCode % 10 + 0x30; sl@0: TUint b1 = characterCode / 10 + 0x90; sl@0: sl@0: *pFrom++ = b1; sl@0: *pFrom++ = b2; sl@0: *pFrom++ = b3; sl@0: *pFrom++ = b4; sl@0: } sl@0: aDescriptor.SetLength(pFrom-pVeryFrom); sl@0: } sl@0: sl@0: // gb2312-1byte ->unicode (0x00 - 0x7F) sl@0: LOCAL_C TInt Step0NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor) sl@0: { sl@0: const TInt descriptorLength=aDescriptor.Length(); sl@0: const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1; sl@0: const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length(); sl@0: sl@0: TInt numOfBytes = 0; sl@0: FOREVER sl@0: { sl@0: if (pointerToPreviousByte>=pointerToLastByte) sl@0: { sl@0: break; sl@0: } sl@0: // byte 1 sl@0: TUint b1 = pointerToPreviousByte[1]; sl@0: if (b1 <= 0x7F) sl@0: { sl@0: pointerToPreviousByte++; sl@0: numOfBytes++; sl@0: } sl@0: else sl@0: break; sl@0: } sl@0: return numOfBytes; sl@0: } sl@0: sl@0: // gb18030-2byte --> unicode (0x8140 - 0xFE7E, 0x8180 - 0xFEFE) sl@0: LOCAL_C TInt Step1NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor) sl@0: { sl@0: const TInt descriptorLength=aDescriptor.Length(); sl@0: const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1; sl@0: const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length(); sl@0: sl@0: TInt numOfBytes = 0; sl@0: FOREVER sl@0: { sl@0: if (pointerToPreviousByte>=pointerToLastByte) sl@0: { sl@0: break; sl@0: } sl@0: // byte 1 sl@0: TUint b1 = pointerToPreviousByte[1]; sl@0: if (b1 <= 0x80 || b1 > 0xFE) sl@0: break; sl@0: sl@0: // byte 2 sl@0: if (pointerToPreviousByte+1 >= pointerToLastByte) sl@0: break; sl@0: TUint b2 = pointerToPreviousByte[2]; sl@0: if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F) // all gb18030 2-byte code sl@0: { sl@0: pointerToPreviousByte = pointerToPreviousByte + 2; sl@0: numOfBytes = numOfBytes + 2; sl@0: } sl@0: else if (b2 < 0x30 || b2 > 0x39) sl@0: { sl@0: if (numOfBytes <= 0) sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: else sl@0: break; sl@0: } sl@0: else sl@0: break; sl@0: } sl@0: return numOfBytes; sl@0: } sl@0: sl@0: sl@0: // gb18030 4-bytes bmp --> unicode (0x81308130 - 0x8439FE39) sl@0: LOCAL_C TInt Step2NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor) sl@0: { sl@0: const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1; sl@0: const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length(); sl@0: __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25)); sl@0: sl@0: TInt numOfBytes = 0; sl@0: FOREVER sl@0: { sl@0: if (pointerToPreviousByte>=pointerToLastByte) sl@0: { sl@0: break; sl@0: } sl@0: sl@0: // byte 1 sl@0: TUint b1 = pointerToPreviousByte[1]; sl@0: if ((b1 < 0x81) || (b1 > 0x84)){ sl@0: break; sl@0: } sl@0: sl@0: // byte 2 sl@0: if (pointerToPreviousByte+1 >= pointerToLastByte) sl@0: break; sl@0: TUint b2 = pointerToPreviousByte[2]; sl@0: if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F) // all gb18030 2-byte code sl@0: break; sl@0: else if (b2 < 0x30 || b2 > 0x39) sl@0: { sl@0: if (numOfBytes == 0) sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: else sl@0: break; sl@0: } sl@0: sl@0: sl@0: // byte 3 sl@0: if (pointerToPreviousByte+2 >= pointerToLastByte) sl@0: break; sl@0: TUint b3 = pointerToPreviousByte[3]; sl@0: if (b3 < 0x81 || b3 > 0xFE) sl@0: { sl@0: if (numOfBytes == 0) sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: else sl@0: break; sl@0: } sl@0: sl@0: // byte 4 sl@0: if (pointerToPreviousByte+3 >= pointerToLastByte) sl@0: break; sl@0: TUint b4 = pointerToPreviousByte[4]; sl@0: if (b4 < 0x30 || b4 > 0x39) sl@0: { sl@0: if (numOfBytes == 0) sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: else sl@0: break; sl@0: } sl@0: else sl@0: { sl@0: numOfBytes = numOfBytes + 4; sl@0: pointerToPreviousByte = pointerToPreviousByte+4; sl@0: } sl@0: } sl@0: sl@0: return numOfBytes; sl@0: } sl@0: sl@0: // gb18030 4-bytes non-bmp --> unicode (0x90308130~0xE339FE39) sl@0: LOCAL_C TInt Step3NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor) sl@0: { sl@0: const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1; sl@0: const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length(); sl@0: __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25)); sl@0: sl@0: TInt numOfBytes = 0; sl@0: FOREVER sl@0: { sl@0: if (pointerToPreviousByte>=pointerToLastByte) sl@0: { sl@0: break; sl@0: } sl@0: sl@0: // byte 1 sl@0: TUint b1 = pointerToPreviousByte[1]; sl@0: if (b1 < 0x90 || b1 > 0xE3) sl@0: break; sl@0: sl@0: // byte 2 sl@0: if (pointerToPreviousByte+1 >= pointerToLastByte) sl@0: break; sl@0: TUint b2 = pointerToPreviousByte[2]; sl@0: if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F) sl@0: break; sl@0: else if (b2 < 0x30 || b2 > 0x39) sl@0: { sl@0: if (numOfBytes == 0) sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: else sl@0: break; sl@0: } sl@0: sl@0: // byte 3 sl@0: if (pointerToPreviousByte+2 >= pointerToLastByte) sl@0: break; sl@0: TUint b3 = pointerToPreviousByte[3]; sl@0: if (b3 < 0x81 || b3 > 0xFE) sl@0: { sl@0: if (numOfBytes == 0) sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: else sl@0: break; sl@0: } sl@0: sl@0: // byte 4 sl@0: if (pointerToPreviousByte+3 >= pointerToLastByte) sl@0: break; sl@0: TUint b4 = pointerToPreviousByte[4]; sl@0: if (b4 < 0x30 || b4 > 0x39) sl@0: { sl@0: if (numOfBytes == 0) sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: else sl@0: break; sl@0: } sl@0: else sl@0: { sl@0: numOfBytes = numOfBytes + 4; sl@0: pointerToPreviousByte = pointerToPreviousByte + 4; sl@0: } sl@0: } sl@0: return numOfBytes; sl@0: } sl@0: sl@0: void Step012DummyConvertToIntermediateBufferInPlace(TDes8&) sl@0: { sl@0: } sl@0: sl@0: // Perform the actual conversion (gb18030 4byte non-BMP -> unicode) using formula in this function sl@0: LOCAL_C void Step3ConvertToIntermediateBufferInPlace(TDes8& aDescriptor) sl@0: { sl@0: const TInt descriptorLength=aDescriptor.Length(); sl@0: __ASSERT_DEBUG(descriptorLength%4 == 0, Panic(EPanicNothingToConvert5)); sl@0: TUint8* pointerToTargetByte=CONST_CAST(TUint8*, aDescriptor.Ptr()); sl@0: const TUint8* pointerToSourceByte=pointerToTargetByte; sl@0: const TUint8* const pointerToLastByte=pointerToSourceByte+descriptorLength; sl@0: sl@0: FOREVER sl@0: { sl@0: if (pointerToLastByte - pointerToSourceByte < 4) sl@0: break; sl@0: sl@0: // conversion sl@0: TUint8 b1 = pointerToSourceByte[0]; sl@0: TUint8 b2 = pointerToSourceByte[1]; sl@0: TUint8 b3 = pointerToSourceByte[2]; sl@0: TUint8 b4 = pointerToSourceByte[3]; sl@0: sl@0: TUint characterCode = 0x10000 + (b1 - 0x90) * 12600 + sl@0: (b2 - 0x30) * 1260 + sl@0: (b3 - 0x81) * 10 + sl@0: (b4 - 0x30); sl@0: sl@0: pointerToTargetByte[0] = ((characterCode >> 24) & 0xFF); sl@0: pointerToTargetByte[1] = ((characterCode >> 16) & 0xFF); sl@0: pointerToTargetByte[2] = ((characterCode >> 8) & 0xFF); sl@0: pointerToTargetByte[3] = (characterCode & 0xFF); sl@0: sl@0: pointerToSourceByte = pointerToSourceByte + 4; sl@0: pointerToTargetByte = pointerToTargetByte + 4; sl@0: } sl@0: sl@0: aDescriptor.SetLength(descriptorLength); sl@0: } sl@0: sl@0: sl@0: // A dummy "direct" mapping table for non-Bmp chars in step 3 sl@0: // Use 32-bit Unicode value as intermediate coding sl@0: LOCAL_D const SCnvConversionData::SVariableByteData::SRange step3ForeignVariableByteDataRanges[]= sl@0: { sl@0: { sl@0: 0x00, // from 0x10000 sl@0: 0x00, // to 0x10FFFF sl@0: 3, // total 4 bytes sl@0: 0 sl@0: }, sl@0: }; sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3ForeignToUnicodeDataRanges[]= sl@0: { sl@0: { sl@0: 0x10000, // from 0x10000 sl@0: 0x10ffff, // to 0x10FFFF sl@0: SCnvConversionData::SOneDirectionData::SRange::EDirect, sl@0: 0, sl@0: 0, sl@0: { sl@0: 0 // map from intermediate to unicode with offset = 0 sl@0: } sl@0: }, sl@0: }; sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3UnicodeToForeignDataRanges[]= sl@0: { sl@0: { sl@0: 0x10000, //from 0x10000 sl@0: 0x10FFFF, //to 0x10FFFF sl@0: SCnvConversionData::SOneDirectionData::SRange::EDirect, sl@0: 4, // output byte count = 4 sl@0: 0, sl@0: { sl@0: 0 // offset = 0 sl@0: } sl@0: }, sl@0: }; sl@0: GLDEF_D const SCnvConversionData step3ConversionData= sl@0: { sl@0: SCnvConversionData::EFixedBigEndian, sl@0: { sl@0: ARRAY_LENGTH(step3ForeignVariableByteDataRanges), sl@0: step3ForeignVariableByteDataRanges sl@0: }, sl@0: { sl@0: ARRAY_LENGTH(step3ForeignToUnicodeDataRanges), sl@0: step3ForeignToUnicodeDataRanges sl@0: }, sl@0: { sl@0: ARRAY_LENGTH(step3UnicodeToForeignDataRanges), sl@0: step3UnicodeToForeignDataRanges sl@0: }, sl@0: NULL, sl@0: NULL sl@0: }; sl@0: sl@0: sl@0: // An internal mapping table to reslove the conflict introduced in symbian GB2312-80 plug-in. sl@0: // It will be merged into the gb18030-2byte Conversion Data. sl@0: // It includes mapping: (0xA1A4 -> 0x00B7, 0xA1AA -> 0x2014, 0xA844 <- 0x2015, 0x8139A739 <- 0x30FB) sl@0: LOCAL_D const SCnvConversionData::SVariableByteData::SRange gb18030_diff_gb2312ForeignVariableByteDataRanges[]= sl@0: { sl@0: { sl@0: 0xA1, //from 0xA1A4 sl@0: 0xA1, //to 0xA1AA sl@0: 1, sl@0: 0 sl@0: }, sl@0: }; sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_foreignToUnicode_1[]= sl@0: { sl@0: { sl@0: 0xA1A4, sl@0: 0x00B7 sl@0: }, sl@0: { sl@0: 0xA1AA, sl@0: 0x2014 sl@0: } sl@0: }; sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312ForeignToUnicodeDataRanges[]= sl@0: { sl@0: { sl@0: 0xA1A4, sl@0: 0xA1AA, sl@0: SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616, sl@0: 0, sl@0: 0, sl@0: { sl@0: UData_SKeyedTable1616(keyedTable1616_foreignToUnicode_1) sl@0: } sl@0: }, sl@0: }; sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToForeign_1[]= sl@0: { sl@0: { sl@0: 0x2015, sl@0: 0xA844 sl@0: } sl@0: }; sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable3232::SEntry keyedTable3232_unicodeToForeign_1[]= sl@0: { sl@0: { sl@0: 0x30FB, sl@0: 0x8139A739 sl@0: } sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312UnicodeToForeignDataRanges[]= sl@0: { sl@0: { sl@0: 0x2015, sl@0: 0x2015, sl@0: SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616, sl@0: 2, // output byte count = 2 sl@0: 0, sl@0: { sl@0: UData_SKeyedTable1616(keyedTable1616_unicodeToForeign_1) sl@0: } sl@0: }, sl@0: { sl@0: 0x30FB, sl@0: 0x30FB, sl@0: SCnvConversionData::SOneDirectionData::SRange::EKeyedTable3232, sl@0: 4, // output byte count = 4 sl@0: 0, sl@0: { sl@0: UData_SKeyedTable3232(keyedTable3232_unicodeToForeign_1) sl@0: } sl@0: }, sl@0: }; sl@0: GLDEF_D const SCnvConversionData gb18030_diff_gb2312ConversionData= sl@0: { sl@0: SCnvConversionData::EFixedBigEndian, sl@0: { sl@0: ARRAY_LENGTH(gb18030_diff_gb2312ForeignVariableByteDataRanges), sl@0: gb18030_diff_gb2312ForeignVariableByteDataRanges sl@0: }, sl@0: { sl@0: ARRAY_LENGTH(gb18030_diff_gb2312ForeignToUnicodeDataRanges), sl@0: gb18030_diff_gb2312ForeignToUnicodeDataRanges sl@0: }, sl@0: { sl@0: ARRAY_LENGTH(gb18030_diff_gb2312UnicodeToForeignDataRanges), sl@0: gb18030_diff_gb2312UnicodeToForeignDataRanges sl@0: }, sl@0: NULL, sl@0: NULL sl@0: }; sl@0: sl@0: LOCAL_D const SCnvConversionData::SVariableByteData::SRange foreignVariableByteDataRanges[]= sl@0: { sl@0: { sl@0: 0x00, sl@0: 0x7f, sl@0: 0, sl@0: 0 sl@0: }, sl@0: { sl@0: 0x80, sl@0: 0xff, sl@0: 1, sl@0: 0 sl@0: } sl@0: }; sl@0: sl@0: LOCAL_C void SetUpCompleteGb18030_2byteConversionData(SCnvConversionData& aCompleteGb18030_2byteConversionData, TUint8* aWorkingMemory) sl@0: { sl@0: const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData(); sl@0: const SCnvConversionData& gb18030_diff_gbkConversionData=CnvGb18030_diff_gbk::ConversionData(); sl@0: const SCnvConversionData& gbkConversionData=CnvGbk::ConversionData(); sl@0: // create a SCnvConversionData that is the combination of gb18030_diff_gb2312ConversionData, gb2312ConversionData, gb18030_diff_gbkConversionData and gbkConversionData; sl@0: aCompleteGb18030_2byteConversionData.iEndiannessOfForeignCharacters=SCnvConversionData::EFixedBigEndian; sl@0: aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iNumberOfRanges=ARRAY_LENGTH(foreignVariableByteDataRanges); sl@0: aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iRangeArray=foreignVariableByteDataRanges; sl@0: TInt numberOfBytesOfWorkingMemoryUsed=0; sl@0: sl@0: // set up the foreign-to-Unicode data sl@0: const TInt numberOfForeignToUnicodeDataRanges=gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges + gbkConversionData.iForeignToUnicodeData.iNumberOfRanges; sl@0: aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iNumberOfRanges=numberOfForeignToUnicodeDataRanges; sl@0: SCnvConversionData::SOneDirectionData::SRange* foreignToUnicodeDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed); sl@0: numberOfBytesOfWorkingMemoryUsed+=(numberOfForeignToUnicodeDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: __ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed1)); sl@0: aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iRangeArray=foreignToUnicodeDataRangeArray; sl@0: Mem::Copy(foreignToUnicodeDataRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges, gbkConversionData.iForeignToUnicodeData.iRangeArray, gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: sl@0: // set up the Unicode-to-foreign data sl@0: const TInt numberOfUnicodeToForeignDataRanges=gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges + gbkConversionData.iUnicodeToForeignData.iNumberOfRanges; sl@0: aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iNumberOfRanges=numberOfUnicodeToForeignDataRanges; sl@0: SCnvConversionData::SOneDirectionData::SRange* unicodeToForeignDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed); sl@0: numberOfBytesOfWorkingMemoryUsed+=(numberOfUnicodeToForeignDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: __ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed2)); sl@0: aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iRangeArray=unicodeToForeignDataRangeArray; sl@0: Mem::Copy(unicodeToForeignDataRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges, gbkConversionData.iUnicodeToForeignData.iRangeArray, gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange)); sl@0: } sl@0: sl@0: sl@0: TInt CGB18030ConverterImpl::ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) sl@0: { sl@0: TFixedArray characterSets; sl@0: sl@0: // step 1) gb18030-2byte sl@0: characterSets[0].iConversionData = completeGb18030_2byteConversionData; sl@0: characterSets[0].iConvertFromIntermediateBufferInPlace = Step12DummyConvertFromIntermediateBufferInPlace; sl@0: characterSets[0].iEscapeSequence = &KNullDesC8; sl@0: sl@0: // step 2) gb18030-4byte BMP sl@0: characterSets[1].iConversionData = &CnvGb18030_4byte::ConversionData(); sl@0: characterSets[1].iConvertFromIntermediateBufferInPlace = Step12DummyConvertFromIntermediateBufferInPlace; sl@0: characterSets[1].iEscapeSequence = &KNullDesC8; sl@0: sl@0: // step 3) gb18030-4byte non-BMP sl@0: characterSets[2].iConversionData = &step3ConversionData; sl@0: characterSets[2].iConvertFromIntermediateBufferInPlace = Step3ConvertFromIntermediateBufferInPlace; sl@0: characterSets[2].iEscapeSequence = &KNullDesC8; sl@0: sl@0: return CnvUtilities::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters, characterSets.Array()); sl@0: } sl@0: sl@0: sl@0: TInt CGB18030ConverterImpl::ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& /*aState*/, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) sl@0: { sl@0: TFixedArray methods; sl@0: // step 0) gb2312-1byte sl@0: methods[0].iNumberOfBytesAbleToConvert = Step0NumberOfBytesAbleToConvertToUnicode; sl@0: methods[0].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace; sl@0: methods[0].iConversionData = &CnvGb2312::ConversionData(); //only use one byte part sl@0: methods[0].iNumberOfBytesPerCharacter = 1; sl@0: methods[0].iNumberOfCoreBytesPerCharacter = 1; sl@0: sl@0: // step 1) gb18030-2byte sl@0: methods[1].iNumberOfBytesAbleToConvert = Step1NumberOfBytesAbleToConvertToUnicode; sl@0: methods[1].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace; sl@0: methods[1].iConversionData = completeGb18030_2byteConversionData; sl@0: methods[1].iNumberOfBytesPerCharacter = 2; sl@0: methods[1].iNumberOfCoreBytesPerCharacter = 2; sl@0: sl@0: // step 2) gb18030 4-byte BMP sl@0: methods[2].iNumberOfBytesAbleToConvert = Step2NumberOfBytesAbleToConvertToUnicode; sl@0: methods[2].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace; sl@0: methods[2].iConversionData = &CnvGb18030_4byte::ConversionData(); sl@0: methods[2].iNumberOfBytesPerCharacter = 4; sl@0: methods[2].iNumberOfCoreBytesPerCharacter = 4; sl@0: sl@0: // step 3) gb18030 4-byte non-BMP sl@0: methods[3].iNumberOfBytesAbleToConvert = Step3NumberOfBytesAbleToConvertToUnicode; sl@0: methods[3].iConvertToIntermediateBufferInPlace = Step3ConvertToIntermediateBufferInPlace; sl@0: methods[3].iConversionData = &step3ConversionData; sl@0: methods[3].iNumberOfBytesPerCharacter = 4; sl@0: methods[3].iNumberOfCoreBytesPerCharacter = 4; sl@0: sl@0: return CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, methods.Array()); sl@0: } sl@0: sl@0: TBool CGB18030ConverterImpl::IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample) sl@0: { sl@0: aSetToTrue = ETrue; sl@0: return CnvGb2312::IsCharGBBased(aConfidenceLevel, aSample); sl@0: } sl@0: sl@0: CGB18030ConverterImpl* CGB18030ConverterImpl::NewL() sl@0: { sl@0: CGB18030ConverterImpl* self = new(ELeave) CGB18030ConverterImpl(); sl@0: CleanupStack::PushL(self); sl@0: self->ConstructL(); sl@0: CleanupStack::Pop(); // self sl@0: return self; sl@0: } sl@0: sl@0: CGB18030ConverterImpl::~CGB18030ConverterImpl() sl@0: { sl@0: if (workingMemory) sl@0: delete[] workingMemory; sl@0: if (completeGb18030_2byteConversionData) sl@0: delete completeGb18030_2byteConversionData; sl@0: } sl@0: sl@0: CGB18030ConverterImpl::CGB18030ConverterImpl() sl@0: { sl@0: } sl@0: sl@0: TInt CGB18030ConverterImpl::ConstructL() sl@0: { sl@0: completeGb18030_2byteConversionData = new (ELeave)SCnvConversionData; sl@0: CleanupStack::PushL(completeGb18030_2byteConversionData); sl@0: workingMemory = new (ELeave) TUint8[KNumberOfBytesOfWorkingMemory]; //1040 bytes sl@0: CleanupStack::Pop(); // completeGb18030_2byteConversionData sl@0: SetUpCompleteGb18030_2byteConversionData(*completeGb18030_2byteConversionData, workingMemory); sl@0: return 1; sl@0: } sl@0: sl@0: const TImplementationProxy ImplementationTable[] = sl@0: { sl@0: IMPLEMENTATION_PROXY_ENTRY(0x10287038,CGB18030ConverterImpl::NewL) sl@0: }; sl@0: sl@0: EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) sl@0: { sl@0: aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); sl@0: sl@0: return ImplementationTable; sl@0: } sl@0: