1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/gb18030.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,784 @@
1.4 +/*
1.5 +* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description: GB18030 converter implementation
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include <e32std.h>
1.23 +#include <charconv.h>
1.24 +#include <convutils.h>
1.25 +#include <convdata.h>
1.26 +#include "gb2312.h"
1.27 +#include "gbk.h"
1.28 +#include "gb18030_4byte.h"
1.29 +#include "gb18030_diff_gbk.h"
1.30 +#include <ecom/implementationproxy.h>
1.31 +#include <charactersetconverter.h>
1.32 +
1.33 +class CGB18030ConverterImpl : public CCharacterSetConverterPluginInterface
1.34 + {
1.35 +
1.36 +public:
1.37 + virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
1.38 +
1.39 + virtual TInt ConvertFromUnicode(
1.40 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.41 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.42 + TDes8& aForeign,
1.43 + const TDesC16& aUnicode,
1.44 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
1.45 +
1.46 + virtual TInt ConvertToUnicode(
1.47 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.48 + TDes16& aUnicode,
1.49 + const TDesC8& aForeign,
1.50 + TInt& aState,
1.51 + TInt& aNumberOfUnconvertibleCharacters,
1.52 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
1.53 +
1.54 + virtual TBool IsInThisCharacterSetL(
1.55 + TBool& aSetToTrue,
1.56 + TInt& aConfidenceLevel,
1.57 + const TDesC8& aSample);
1.58 +
1.59 + static CGB18030ConverterImpl* NewL();
1.60 + virtual ~CGB18030ConverterImpl();
1.61 +
1.62 +private:
1.63 + CGB18030ConverterImpl();
1.64 + TInt ConstructL();
1.65 + SCnvConversionData * completeGb18030_2byteConversionData; // a merged conversion data including Gb18030-diff-g2312, GB2312, Gb18030-diff-gbk and Gbk
1.66 + TUint8 * workingMemory;
1.67 + };
1.68 +
1.69 +// Implement gb18030 plug-in using cnvutils framework in which gb2312 and gbk conversion data is re-used for memory saving
1.70 +// 1) foreign->unicode:
1.71 +// 1.1) 1 byte->unicode bmp: use gb2312 mapping table;
1.72 +//
1.73 +// 1.2) 2 byte->unicode bmp: use gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData);
1.74 +//
1.75 +// 1.3) 4 byte->unicode bmp: use gb18030-4byte mapping table;
1.76 +//
1.77 +// 1.4) 4 byte->unicode non-bmp: calculate with formula.
1.78 +//
1.79 +// 2) unicode->foreign:
1.80 +// 2.1) firstly check gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData);
1.81 +//
1.82 +// 2.2) if not found in 2.1), check gb18030-4byte mapping table;
1.83 +//
1.84 +// 2.3) if not found in 2.2), calculate with formula (gb18030-4byte non BMP);
1.85 +//
1.86 +
1.87 +
1.88 +// GB18030-diff-gb2312 defines 1 foreign-to-Unicode range and 2 unicode-to-Foreign range
1.89 +// GB2312.CTL defines 21 foreign-to-Unicode ranges and 21 Unicode-to-foreign ranges
1.90 +// GB18030-diff-gbk defines 1 foreign-to-Unicode ranges and 2 Unicode-to-foreign range
1.91 +// GBK.CTL defines 2 foreign-to-Unicode ranges and 2 Unicode-to-foreign range
1.92 +const TInt KNumberOfBytesOfWorkingMemory=(1+2+21+21+1+2+2+2)*sizeof(SCnvConversionData::SOneDirectionData::SRange); //totally 1040 bytes
1.93 +
1.94 +const TDesC8& CGB18030ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
1.95 + {
1.96 + return CnvGb18030_diff_gbk::ReplacementForUnconvertibleUnicodeCharacters();
1.97 + }
1.98 +
1.99 +_LIT(KLitPanicText, "GB18030");
1.100 +enum TPanic
1.101 + {
1.102 + EPanicNothingToConvert1=1,
1.103 + EPanicNothingToConvert2,
1.104 + EPanicNothingToConvert3,
1.105 + EPanicNothingToConvert4,
1.106 + EPanicNothingToConvert5,
1.107 + EPanicNothingToConvert6,
1.108 + EPanicOddNumberOfBytes1,
1.109 + EPanicOddNumberOfBytes2,
1.110 + EPanicOddNumberOfBytes3,
1.111 + EPanicOddNumberOfBytes4,
1.112 + EPanicOddNumberOfBytes5,
1.113 + EPanicOddNumberOfBytes6,
1.114 + EPanicBadHighBit1,
1.115 + EPanicBadHighBit2,
1.116 + EPanicBadHighBit3,
1.117 + EPanicBadHighBit4,
1.118 + EPanicBadHighBit5,
1.119 + EPanicBadHighBit6,
1.120 + EPanicBadHighBit7,
1.121 + EPanicBadPointers1,
1.122 + EPanicBadPointers2,
1.123 + EPanicBadPointers3,
1.124 + EPanicBadPointers4,
1.125 + EPanicBadPointers5,
1.126 + EPanicBadPointers6,
1.127 + EPanicBadPointers7,
1.128 + EPanicBadPointers8,
1.129 + EPanicBadPointers9,
1.130 + EPanicBadPointers10,
1.131 + EPanicBadPointers11,
1.132 + EPanicBadPointers12,
1.133 + EPanicBadPointers13,
1.134 + EPanicBadPointers14,
1.135 + EPanicBadPointers15,
1.136 + EPanicBadPointers16,
1.137 + EPanicBadPointers17,
1.138 + EPanicBadPointers18,
1.139 + EPanicBadPointers19,
1.140 + EPanicBadPointers20,
1.141 + EPanicBadPointers21,
1.142 + EPanicBadPointers22,
1.143 + EPanicBadPointers23,
1.144 + EPanicBadPointers24,
1.145 + EPanicBadPointers25,
1.146 + EPanicBadPointers26,
1.147 + EPanicBadPointers27,
1.148 + EPanicBadPointers28,
1.149 + EPanicBadPointers29,
1.150 + EPanicBadPointers30,
1.151 + EPanicBadPointers31,
1.152 + EPanicBadPointers32,
1.153 + EPanicBadPointers33,
1.154 + EPanicBadPointers34,
1.155 + EPanicBadPointers35,
1.156 + EPanicBadPointers36,
1.157 + EPanicBadCalculation1,
1.158 + EPanicBadCalculation2,
1.159 + EPanicNumberOfBytesIsNotMultipleOfThree1,
1.160 + EPanicNumberOfBytesIsNotMultipleOfThree2,
1.161 + EPanicSingleShift2Expected,
1.162 + EPanicSingleShift3Expected,
1.163 + EPanicTooManyBytesOfWorkingMemoryUsed1,
1.164 + EPanicTooManyBytesOfWorkingMemoryUsed2
1.165 + };
1.166 +
1.167 +LOCAL_C void Panic(TPanic aPanic)
1.168 + {
1.169 + User::Panic(KLitPanicText, aPanic);
1.170 + }
1.171 +
1.172 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
1.173 +
1.174 +LOCAL_C void Step12DummyConvertFromIntermediateBufferInPlace(TInt, TDes8&, TInt& aNumberOfCharactersThatDroppedOut)
1.175 + {
1.176 + aNumberOfCharactersThatDroppedOut=0;
1.177 + }
1.178 +
1.179 +// Perform the actual conversion (unicode -> gb18030 4byte non-BMP) using formula in this function
1.180 +LOCAL_C void Step3ConvertFromIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut)
1.181 + {
1.182 + aNumberOfCharactersThatDroppedOut = 0; // no drop out, because all GB18030 outside BMP are exactly 4-bytes
1.183 +
1.184 + const TInt descriptorLength=aDescriptor.Length();
1.185 + TUint8* pVeryFrom = CONST_CAST(TUint8*, aDescriptor.Ptr());
1.186 + const TUint8* pEnd = pVeryFrom + descriptorLength;
1.187 + TUint8* pFrom = pVeryFrom + aStartPositionInDescriptor;
1.188 + FOREVER
1.189 + {
1.190 + if (pFrom + 4 > pEnd)
1.191 + {
1.192 + __ASSERT_DEBUG(pFrom==pEnd, Panic(EPanicBadPointers25));
1.193 + break;
1.194 + }
1.195 + TUint characterCode = 0;
1.196 + for (TInt i=0; i<4; i++)
1.197 + {
1.198 + characterCode <<= 8;
1.199 + characterCode += pFrom[i];
1.200 + }
1.201 +
1.202 + // to gb18030
1.203 + characterCode -= 0x10000;
1.204 + TUint b4 = characterCode % 10 + 0x30;
1.205 + characterCode /= 10;
1.206 + TUint b3 = characterCode % 126 + 0x81;
1.207 + characterCode /= 126;
1.208 + TUint b2 = characterCode % 10 + 0x30;
1.209 + TUint b1 = characterCode / 10 + 0x90;
1.210 +
1.211 + *pFrom++ = b1;
1.212 + *pFrom++ = b2;
1.213 + *pFrom++ = b3;
1.214 + *pFrom++ = b4;
1.215 + }
1.216 + aDescriptor.SetLength(pFrom-pVeryFrom);
1.217 + }
1.218 +
1.219 +// gb2312-1byte ->unicode (0x00 - 0x7F)
1.220 +LOCAL_C TInt Step0NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
1.221 + {
1.222 + const TInt descriptorLength=aDescriptor.Length();
1.223 + const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
1.224 + const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
1.225 +
1.226 + TInt numOfBytes = 0;
1.227 + FOREVER
1.228 + {
1.229 + if (pointerToPreviousByte>=pointerToLastByte)
1.230 + {
1.231 + break;
1.232 + }
1.233 + // byte 1
1.234 + TUint b1 = pointerToPreviousByte[1];
1.235 + if (b1 <= 0x7F)
1.236 + {
1.237 + pointerToPreviousByte++;
1.238 + numOfBytes++;
1.239 + }
1.240 + else
1.241 + break;
1.242 + }
1.243 + return numOfBytes;
1.244 + }
1.245 +
1.246 +// gb18030-2byte --> unicode (0x8140 - 0xFE7E, 0x8180 - 0xFEFE)
1.247 +LOCAL_C TInt Step1NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
1.248 + {
1.249 + const TInt descriptorLength=aDescriptor.Length();
1.250 + const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
1.251 + const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
1.252 +
1.253 + TInt numOfBytes = 0;
1.254 + FOREVER
1.255 + {
1.256 + if (pointerToPreviousByte>=pointerToLastByte)
1.257 + {
1.258 + break;
1.259 + }
1.260 + // byte 1
1.261 + TUint b1 = pointerToPreviousByte[1];
1.262 + if (b1 <= 0x80 || b1 > 0xFE)
1.263 + break;
1.264 +
1.265 + // byte 2
1.266 + if (pointerToPreviousByte+1 >= pointerToLastByte)
1.267 + break;
1.268 + TUint b2 = pointerToPreviousByte[2];
1.269 + if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F) // all gb18030 2-byte code
1.270 + {
1.271 + pointerToPreviousByte = pointerToPreviousByte + 2;
1.272 + numOfBytes = numOfBytes + 2;
1.273 + }
1.274 + else if (b2 < 0x30 || b2 > 0x39)
1.275 + {
1.276 + if (numOfBytes <= 0)
1.277 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.278 + else
1.279 + break;
1.280 + }
1.281 + else
1.282 + break;
1.283 + }
1.284 + return numOfBytes;
1.285 + }
1.286 +
1.287 +
1.288 +// gb18030 4-bytes bmp --> unicode (0x81308130 - 0x8439FE39)
1.289 +LOCAL_C TInt Step2NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
1.290 + {
1.291 + const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
1.292 + const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
1.293 + __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25));
1.294 +
1.295 + TInt numOfBytes = 0;
1.296 + FOREVER
1.297 + {
1.298 + if (pointerToPreviousByte>=pointerToLastByte)
1.299 + {
1.300 + break;
1.301 + }
1.302 +
1.303 + // byte 1
1.304 + TUint b1 = pointerToPreviousByte[1];
1.305 + if ((b1 < 0x81) || (b1 > 0x84)){
1.306 + break;
1.307 + }
1.308 +
1.309 + // byte 2
1.310 + if (pointerToPreviousByte+1 >= pointerToLastByte)
1.311 + break;
1.312 + TUint b2 = pointerToPreviousByte[2];
1.313 + if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F) // all gb18030 2-byte code
1.314 + break;
1.315 + else if (b2 < 0x30 || b2 > 0x39)
1.316 + {
1.317 + if (numOfBytes == 0)
1.318 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.319 + else
1.320 + break;
1.321 + }
1.322 +
1.323 +
1.324 + // byte 3
1.325 + if (pointerToPreviousByte+2 >= pointerToLastByte)
1.326 + break;
1.327 + TUint b3 = pointerToPreviousByte[3];
1.328 + if (b3 < 0x81 || b3 > 0xFE)
1.329 + {
1.330 + if (numOfBytes == 0)
1.331 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.332 + else
1.333 + break;
1.334 + }
1.335 +
1.336 + // byte 4
1.337 + if (pointerToPreviousByte+3 >= pointerToLastByte)
1.338 + break;
1.339 + TUint b4 = pointerToPreviousByte[4];
1.340 + if (b4 < 0x30 || b4 > 0x39)
1.341 + {
1.342 + if (numOfBytes == 0)
1.343 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.344 + else
1.345 + break;
1.346 + }
1.347 + else
1.348 + {
1.349 + numOfBytes = numOfBytes + 4;
1.350 + pointerToPreviousByte = pointerToPreviousByte+4;
1.351 + }
1.352 + }
1.353 +
1.354 + return numOfBytes;
1.355 + }
1.356 +
1.357 +// gb18030 4-bytes non-bmp --> unicode (0x90308130~0xE339FE39)
1.358 +LOCAL_C TInt Step3NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
1.359 + {
1.360 + const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
1.361 + const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
1.362 + __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25));
1.363 +
1.364 + TInt numOfBytes = 0;
1.365 + FOREVER
1.366 + {
1.367 + if (pointerToPreviousByte>=pointerToLastByte)
1.368 + {
1.369 + break;
1.370 + }
1.371 +
1.372 + // byte 1
1.373 + TUint b1 = pointerToPreviousByte[1];
1.374 + if (b1 < 0x90 || b1 > 0xE3)
1.375 + break;
1.376 +
1.377 + // byte 2
1.378 + if (pointerToPreviousByte+1 >= pointerToLastByte)
1.379 + break;
1.380 + TUint b2 = pointerToPreviousByte[2];
1.381 + if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)
1.382 + break;
1.383 + else if (b2 < 0x30 || b2 > 0x39)
1.384 + {
1.385 + if (numOfBytes == 0)
1.386 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.387 + else
1.388 + break;
1.389 + }
1.390 +
1.391 + // byte 3
1.392 + if (pointerToPreviousByte+2 >= pointerToLastByte)
1.393 + break;
1.394 + TUint b3 = pointerToPreviousByte[3];
1.395 + if (b3 < 0x81 || b3 > 0xFE)
1.396 + {
1.397 + if (numOfBytes == 0)
1.398 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.399 + else
1.400 + break;
1.401 + }
1.402 +
1.403 + // byte 4
1.404 + if (pointerToPreviousByte+3 >= pointerToLastByte)
1.405 + break;
1.406 + TUint b4 = pointerToPreviousByte[4];
1.407 + if (b4 < 0x30 || b4 > 0x39)
1.408 + {
1.409 + if (numOfBytes == 0)
1.410 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.411 + else
1.412 + break;
1.413 + }
1.414 + else
1.415 + {
1.416 + numOfBytes = numOfBytes + 4;
1.417 + pointerToPreviousByte = pointerToPreviousByte + 4;
1.418 + }
1.419 + }
1.420 + return numOfBytes;
1.421 + }
1.422 +
1.423 +void Step012DummyConvertToIntermediateBufferInPlace(TDes8&)
1.424 + {
1.425 + }
1.426 +
1.427 +// Perform the actual conversion (gb18030 4byte non-BMP -> unicode) using formula in this function
1.428 +LOCAL_C void Step3ConvertToIntermediateBufferInPlace(TDes8& aDescriptor)
1.429 + {
1.430 + const TInt descriptorLength=aDescriptor.Length();
1.431 + __ASSERT_DEBUG(descriptorLength%4 == 0, Panic(EPanicNothingToConvert5));
1.432 + TUint8* pointerToTargetByte=CONST_CAST(TUint8*, aDescriptor.Ptr());
1.433 + const TUint8* pointerToSourceByte=pointerToTargetByte;
1.434 + const TUint8* const pointerToLastByte=pointerToSourceByte+descriptorLength;
1.435 +
1.436 + FOREVER
1.437 + {
1.438 + if (pointerToLastByte - pointerToSourceByte < 4)
1.439 + break;
1.440 +
1.441 + // conversion
1.442 + TUint8 b1 = pointerToSourceByte[0];
1.443 + TUint8 b2 = pointerToSourceByte[1];
1.444 + TUint8 b3 = pointerToSourceByte[2];
1.445 + TUint8 b4 = pointerToSourceByte[3];
1.446 +
1.447 + TUint characterCode = 0x10000 + (b1 - 0x90) * 12600 +
1.448 + (b2 - 0x30) * 1260 +
1.449 + (b3 - 0x81) * 10 +
1.450 + (b4 - 0x30);
1.451 +
1.452 + pointerToTargetByte[0] = ((characterCode >> 24) & 0xFF);
1.453 + pointerToTargetByte[1] = ((characterCode >> 16) & 0xFF);
1.454 + pointerToTargetByte[2] = ((characterCode >> 8) & 0xFF);
1.455 + pointerToTargetByte[3] = (characterCode & 0xFF);
1.456 +
1.457 + pointerToSourceByte = pointerToSourceByte + 4;
1.458 + pointerToTargetByte = pointerToTargetByte + 4;
1.459 + }
1.460 +
1.461 + aDescriptor.SetLength(descriptorLength);
1.462 + }
1.463 +
1.464 +
1.465 +// A dummy "direct" mapping table for non-Bmp chars in step 3
1.466 +// Use 32-bit Unicode value as intermediate coding
1.467 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange step3ForeignVariableByteDataRanges[]=
1.468 + {
1.469 + {
1.470 + 0x00, // from 0x10000
1.471 + 0x00, // to 0x10FFFF
1.472 + 3, // total 4 bytes
1.473 + 0
1.474 + },
1.475 + };
1.476 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3ForeignToUnicodeDataRanges[]=
1.477 + {
1.478 + {
1.479 + 0x10000, // from 0x10000
1.480 + 0x10ffff, // to 0x10FFFF
1.481 + SCnvConversionData::SOneDirectionData::SRange::EDirect,
1.482 + 0,
1.483 + 0,
1.484 + {
1.485 + 0 // map from intermediate to unicode with offset = 0
1.486 + }
1.487 + },
1.488 + };
1.489 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3UnicodeToForeignDataRanges[]=
1.490 + {
1.491 + {
1.492 + 0x10000, //from 0x10000
1.493 + 0x10FFFF, //to 0x10FFFF
1.494 + SCnvConversionData::SOneDirectionData::SRange::EDirect,
1.495 + 4, // output byte count = 4
1.496 + 0,
1.497 + {
1.498 + 0 // offset = 0
1.499 + }
1.500 + },
1.501 + };
1.502 +GLDEF_D const SCnvConversionData step3ConversionData=
1.503 + {
1.504 + SCnvConversionData::EFixedBigEndian,
1.505 + {
1.506 + ARRAY_LENGTH(step3ForeignVariableByteDataRanges),
1.507 + step3ForeignVariableByteDataRanges
1.508 + },
1.509 + {
1.510 + ARRAY_LENGTH(step3ForeignToUnicodeDataRanges),
1.511 + step3ForeignToUnicodeDataRanges
1.512 + },
1.513 + {
1.514 + ARRAY_LENGTH(step3UnicodeToForeignDataRanges),
1.515 + step3UnicodeToForeignDataRanges
1.516 + },
1.517 + NULL,
1.518 + NULL
1.519 + };
1.520 +
1.521 +
1.522 +// An internal mapping table to reslove the conflict introduced in symbian GB2312-80 plug-in.
1.523 +// It will be merged into the gb18030-2byte Conversion Data.
1.524 +// It includes mapping: (0xA1A4 -> 0x00B7, 0xA1AA -> 0x2014, 0xA844 <- 0x2015, 0x8139A739 <- 0x30FB)
1.525 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange gb18030_diff_gb2312ForeignVariableByteDataRanges[]=
1.526 + {
1.527 + {
1.528 + 0xA1, //from 0xA1A4
1.529 + 0xA1, //to 0xA1AA
1.530 + 1,
1.531 + 0
1.532 + },
1.533 + };
1.534 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_foreignToUnicode_1[]=
1.535 + {
1.536 + {
1.537 + 0xA1A4,
1.538 + 0x00B7
1.539 + },
1.540 + {
1.541 + 0xA1AA,
1.542 + 0x2014
1.543 + }
1.544 + };
1.545 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312ForeignToUnicodeDataRanges[]=
1.546 + {
1.547 + {
1.548 + 0xA1A4,
1.549 + 0xA1AA,
1.550 + SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
1.551 + 0,
1.552 + 0,
1.553 + {
1.554 + UData_SKeyedTable1616(keyedTable1616_foreignToUnicode_1)
1.555 + }
1.556 + },
1.557 + };
1.558 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToForeign_1[]=
1.559 + {
1.560 + {
1.561 + 0x2015,
1.562 + 0xA844
1.563 + }
1.564 + };
1.565 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable3232::SEntry keyedTable3232_unicodeToForeign_1[]=
1.566 + {
1.567 + {
1.568 + 0x30FB,
1.569 + 0x8139A739
1.570 + }
1.571 + };
1.572 +
1.573 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312UnicodeToForeignDataRanges[]=
1.574 + {
1.575 + {
1.576 + 0x2015,
1.577 + 0x2015,
1.578 + SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
1.579 + 2, // output byte count = 2
1.580 + 0,
1.581 + {
1.582 + UData_SKeyedTable1616(keyedTable1616_unicodeToForeign_1)
1.583 + }
1.584 + },
1.585 + {
1.586 + 0x30FB,
1.587 + 0x30FB,
1.588 + SCnvConversionData::SOneDirectionData::SRange::EKeyedTable3232,
1.589 + 4, // output byte count = 4
1.590 + 0,
1.591 + {
1.592 + UData_SKeyedTable3232(keyedTable3232_unicodeToForeign_1)
1.593 + }
1.594 + },
1.595 + };
1.596 +GLDEF_D const SCnvConversionData gb18030_diff_gb2312ConversionData=
1.597 + {
1.598 + SCnvConversionData::EFixedBigEndian,
1.599 + {
1.600 + ARRAY_LENGTH(gb18030_diff_gb2312ForeignVariableByteDataRanges),
1.601 + gb18030_diff_gb2312ForeignVariableByteDataRanges
1.602 + },
1.603 + {
1.604 + ARRAY_LENGTH(gb18030_diff_gb2312ForeignToUnicodeDataRanges),
1.605 + gb18030_diff_gb2312ForeignToUnicodeDataRanges
1.606 + },
1.607 + {
1.608 + ARRAY_LENGTH(gb18030_diff_gb2312UnicodeToForeignDataRanges),
1.609 + gb18030_diff_gb2312UnicodeToForeignDataRanges
1.610 + },
1.611 + NULL,
1.612 + NULL
1.613 + };
1.614 +
1.615 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange foreignVariableByteDataRanges[]=
1.616 + {
1.617 + {
1.618 + 0x00,
1.619 + 0x7f,
1.620 + 0,
1.621 + 0
1.622 + },
1.623 + {
1.624 + 0x80,
1.625 + 0xff,
1.626 + 1,
1.627 + 0
1.628 + }
1.629 + };
1.630 +
1.631 +LOCAL_C void SetUpCompleteGb18030_2byteConversionData(SCnvConversionData& aCompleteGb18030_2byteConversionData, TUint8* aWorkingMemory)
1.632 + {
1.633 + const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData();
1.634 + const SCnvConversionData& gb18030_diff_gbkConversionData=CnvGb18030_diff_gbk::ConversionData();
1.635 + const SCnvConversionData& gbkConversionData=CnvGbk::ConversionData();
1.636 + // create a SCnvConversionData that is the combination of gb18030_diff_gb2312ConversionData, gb2312ConversionData, gb18030_diff_gbkConversionData and gbkConversionData;
1.637 + aCompleteGb18030_2byteConversionData.iEndiannessOfForeignCharacters=SCnvConversionData::EFixedBigEndian;
1.638 + aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iNumberOfRanges=ARRAY_LENGTH(foreignVariableByteDataRanges);
1.639 + aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iRangeArray=foreignVariableByteDataRanges;
1.640 + TInt numberOfBytesOfWorkingMemoryUsed=0;
1.641 +
1.642 + // set up the foreign-to-Unicode data
1.643 + const TInt numberOfForeignToUnicodeDataRanges=gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges + gbkConversionData.iForeignToUnicodeData.iNumberOfRanges;
1.644 + aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iNumberOfRanges=numberOfForeignToUnicodeDataRanges;
1.645 + SCnvConversionData::SOneDirectionData::SRange* foreignToUnicodeDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed);
1.646 + numberOfBytesOfWorkingMemoryUsed+=(numberOfForeignToUnicodeDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.647 + __ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed1));
1.648 + aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iRangeArray=foreignToUnicodeDataRangeArray;
1.649 + Mem::Copy(foreignToUnicodeDataRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.650 + Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.651 + Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.652 + Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges, gbkConversionData.iForeignToUnicodeData.iRangeArray, gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.653 +
1.654 + // set up the Unicode-to-foreign data
1.655 + const TInt numberOfUnicodeToForeignDataRanges=gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges + gbkConversionData.iUnicodeToForeignData.iNumberOfRanges;
1.656 + aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iNumberOfRanges=numberOfUnicodeToForeignDataRanges;
1.657 + SCnvConversionData::SOneDirectionData::SRange* unicodeToForeignDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed);
1.658 + numberOfBytesOfWorkingMemoryUsed+=(numberOfUnicodeToForeignDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.659 + __ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed2));
1.660 + aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iRangeArray=unicodeToForeignDataRangeArray;
1.661 + Mem::Copy(unicodeToForeignDataRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.662 + Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.663 + Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.664 + Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges, gbkConversionData.iUnicodeToForeignData.iRangeArray, gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
1.665 + }
1.666 +
1.667 +
1.668 +TInt CGB18030ConverterImpl::ConvertFromUnicode(
1.669 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.670 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.671 + TDes8& aForeign,
1.672 + const TDesC16& aUnicode,
1.673 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
1.674 + {
1.675 + TFixedArray<CnvUtilities::SCharacterSet, 3> characterSets;
1.676 +
1.677 + // step 1) gb18030-2byte
1.678 + characterSets[0].iConversionData = completeGb18030_2byteConversionData;
1.679 + characterSets[0].iConvertFromIntermediateBufferInPlace = Step12DummyConvertFromIntermediateBufferInPlace;
1.680 + characterSets[0].iEscapeSequence = &KNullDesC8;
1.681 +
1.682 + // step 2) gb18030-4byte BMP
1.683 + characterSets[1].iConversionData = &CnvGb18030_4byte::ConversionData();
1.684 + characterSets[1].iConvertFromIntermediateBufferInPlace = Step12DummyConvertFromIntermediateBufferInPlace;
1.685 + characterSets[1].iEscapeSequence = &KNullDesC8;
1.686 +
1.687 + // step 3) gb18030-4byte non-BMP
1.688 + characterSets[2].iConversionData = &step3ConversionData;
1.689 + characterSets[2].iConvertFromIntermediateBufferInPlace = Step3ConvertFromIntermediateBufferInPlace;
1.690 + characterSets[2].iEscapeSequence = &KNullDesC8;
1.691 +
1.692 + return CnvUtilities::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters, characterSets.Array());
1.693 + }
1.694 +
1.695 +
1.696 +TInt CGB18030ConverterImpl::ConvertToUnicode(
1.697 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.698 + TDes16& aUnicode,
1.699 + const TDesC8& aForeign,
1.700 + TInt& /*aState*/,
1.701 + TInt& aNumberOfUnconvertibleCharacters,
1.702 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
1.703 + {
1.704 + TFixedArray<CnvUtilities::SMethod, 4> methods;
1.705 + // step 0) gb2312-1byte
1.706 + methods[0].iNumberOfBytesAbleToConvert = Step0NumberOfBytesAbleToConvertToUnicode;
1.707 + methods[0].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace;
1.708 + methods[0].iConversionData = &CnvGb2312::ConversionData(); //only use one byte part
1.709 + methods[0].iNumberOfBytesPerCharacter = 1;
1.710 + methods[0].iNumberOfCoreBytesPerCharacter = 1;
1.711 +
1.712 + // step 1) gb18030-2byte
1.713 + methods[1].iNumberOfBytesAbleToConvert = Step1NumberOfBytesAbleToConvertToUnicode;
1.714 + methods[1].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace;
1.715 + methods[1].iConversionData = completeGb18030_2byteConversionData;
1.716 + methods[1].iNumberOfBytesPerCharacter = 2;
1.717 + methods[1].iNumberOfCoreBytesPerCharacter = 2;
1.718 +
1.719 + // step 2) gb18030 4-byte BMP
1.720 + methods[2].iNumberOfBytesAbleToConvert = Step2NumberOfBytesAbleToConvertToUnicode;
1.721 + methods[2].iConvertToIntermediateBufferInPlace = Step012DummyConvertToIntermediateBufferInPlace;
1.722 + methods[2].iConversionData = &CnvGb18030_4byte::ConversionData();
1.723 + methods[2].iNumberOfBytesPerCharacter = 4;
1.724 + methods[2].iNumberOfCoreBytesPerCharacter = 4;
1.725 +
1.726 + // step 3) gb18030 4-byte non-BMP
1.727 + methods[3].iNumberOfBytesAbleToConvert = Step3NumberOfBytesAbleToConvertToUnicode;
1.728 + methods[3].iConvertToIntermediateBufferInPlace = Step3ConvertToIntermediateBufferInPlace;
1.729 + methods[3].iConversionData = &step3ConversionData;
1.730 + methods[3].iNumberOfBytesPerCharacter = 4;
1.731 + methods[3].iNumberOfCoreBytesPerCharacter = 4;
1.732 +
1.733 + return CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, methods.Array());
1.734 + }
1.735 +
1.736 +TBool CGB18030ConverterImpl::IsInThisCharacterSetL(
1.737 + TBool& aSetToTrue,
1.738 + TInt& aConfidenceLevel,
1.739 + const TDesC8& aSample)
1.740 + {
1.741 + aSetToTrue = ETrue;
1.742 + return CnvGb2312::IsCharGBBased(aConfidenceLevel, aSample);
1.743 + }
1.744 +
1.745 +CGB18030ConverterImpl* CGB18030ConverterImpl::NewL()
1.746 + {
1.747 + CGB18030ConverterImpl* self = new(ELeave) CGB18030ConverterImpl();
1.748 + CleanupStack::PushL(self);
1.749 + self->ConstructL();
1.750 + CleanupStack::Pop(); // self
1.751 + return self;
1.752 + }
1.753 +
1.754 +CGB18030ConverterImpl::~CGB18030ConverterImpl()
1.755 + {
1.756 + if (workingMemory)
1.757 + delete[] workingMemory;
1.758 + if (completeGb18030_2byteConversionData)
1.759 + delete completeGb18030_2byteConversionData;
1.760 + }
1.761 +
1.762 +CGB18030ConverterImpl::CGB18030ConverterImpl()
1.763 + {
1.764 + }
1.765 +
1.766 +TInt CGB18030ConverterImpl::ConstructL()
1.767 + {
1.768 + completeGb18030_2byteConversionData = new (ELeave)SCnvConversionData;
1.769 + CleanupStack::PushL(completeGb18030_2byteConversionData);
1.770 + workingMemory = new (ELeave) TUint8[KNumberOfBytesOfWorkingMemory]; //1040 bytes
1.771 + CleanupStack::Pop(); // completeGb18030_2byteConversionData
1.772 + SetUpCompleteGb18030_2byteConversionData(*completeGb18030_2byteConversionData, workingMemory);
1.773 + return 1;
1.774 + }
1.775 +
1.776 +const TImplementationProxy ImplementationTable[] =
1.777 + {
1.778 + IMPLEMENTATION_PROXY_ENTRY(0x10287038,CGB18030ConverterImpl::NewL)
1.779 + };
1.780 +
1.781 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
1.782 + {
1.783 + aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
1.784 +
1.785 + return ImplementationTable;
1.786 + }
1.787 +