os/textandloc/charconvfw/charconvplugins/src/plugins/gb18030.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/gb18030.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,784 @@
     1.4 +/*
     1.5 +* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description:  GB18030 converter implementation
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <charconv.h>
    1.24 +#include <convutils.h>
    1.25 +#include <convdata.h>
    1.26 +#include "gb2312.h"
    1.27 +#include "gbk.h"
    1.28 +#include "gb18030_4byte.h"
    1.29 +#include "gb18030_diff_gbk.h"
    1.30 +#include <ecom/implementationproxy.h>
    1.31 +#include <charactersetconverter.h>
    1.32 +
    1.33 +class CGB18030ConverterImpl : public CCharacterSetConverterPluginInterface
    1.34 +	{
    1.35 +
    1.36 +public:
    1.37 +	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    1.38 +
    1.39 +	virtual TInt ConvertFromUnicode(
    1.40 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.41 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.42 +		TDes8& aForeign, 
    1.43 +		const TDesC16& aUnicode, 
    1.44 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    1.45 +
    1.46 +	virtual TInt ConvertToUnicode(
    1.47 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.48 +		TDes16& aUnicode, 
    1.49 +		const TDesC8& aForeign, 
    1.50 +		TInt& aState, 
    1.51 +		TInt& aNumberOfUnconvertibleCharacters, 
    1.52 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
    1.53 +
    1.54 +	virtual TBool IsInThisCharacterSetL(
    1.55 +		TBool& aSetToTrue, 
    1.56 +		TInt& aConfidenceLevel, 
    1.57 +		const TDesC8& aSample);
    1.58 +
    1.59 +	static CGB18030ConverterImpl* NewL();
    1.60 +	virtual ~CGB18030ConverterImpl();
    1.61 +
    1.62 +private:
    1.63 +	CGB18030ConverterImpl();
    1.64 +	TInt ConstructL();
    1.65 +	SCnvConversionData * completeGb18030_2byteConversionData; // a merged conversion data including Gb18030-diff-g2312, GB2312, Gb18030-diff-gbk and Gbk
    1.66 +	TUint8 * workingMemory;
    1.67 +	};
    1.68 +
    1.69 +// Implement gb18030 plug-in using cnvutils framework in which gb2312 and gbk conversion data is re-used for memory saving 
    1.70 +// 1) foreign->unicode:
    1.71 +//    1.1) 1 byte->unicode bmp: use gb2312 mapping table;
    1.72 +//
    1.73 +//    1.2) 2 byte->unicode bmp: use gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData);
    1.74 +//
    1.75 +//    1.3) 4 byte->unicode bmp: use gb18030-4byte mapping table;
    1.76 +//
    1.77 +//    1.4) 4 byte->unicode non-bmp: calculate with formula.
    1.78 +//
    1.79 +// 2) unicode->foreign:
    1.80 +//    2.1) firstly check gb18030-2byte mapping table (gb18030_diff_gb2312ConversionData + gb2312ConversionData + gb18030_diff_gbkConversionData + gbkConversionData);
    1.81 +//
    1.82 +//    2.2) if not found in 2.1), check gb18030-4byte mapping table;
    1.83 +//
    1.84 +//    2.3) if not found in 2.2), calculate with formula (gb18030-4byte non BMP);
    1.85 +//
    1.86 +
    1.87 +
    1.88 +// GB18030-diff-gb2312 defines 1 foreign-to-Unicode range and 2 unicode-to-Foreign range
    1.89 +// GB2312.CTL defines 21 foreign-to-Unicode ranges and 21 Unicode-to-foreign ranges
    1.90 +// GB18030-diff-gbk defines 1 foreign-to-Unicode ranges and 2 Unicode-to-foreign range
    1.91 +// GBK.CTL defines 2 foreign-to-Unicode ranges and 2 Unicode-to-foreign range
    1.92 +const TInt KNumberOfBytesOfWorkingMemory=(1+2+21+21+1+2+2+2)*sizeof(SCnvConversionData::SOneDirectionData::SRange);  //totally 1040 bytes
    1.93 +
    1.94 +const TDesC8& CGB18030ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
    1.95 +	{
    1.96 +	return CnvGb18030_diff_gbk::ReplacementForUnconvertibleUnicodeCharacters();
    1.97 +	}
    1.98 +
    1.99 +_LIT(KLitPanicText, "GB18030");
   1.100 +enum TPanic
   1.101 +	{
   1.102 +	EPanicNothingToConvert1=1,
   1.103 +	EPanicNothingToConvert2,
   1.104 +	EPanicNothingToConvert3,
   1.105 +	EPanicNothingToConvert4,
   1.106 +	EPanicNothingToConvert5,
   1.107 +	EPanicNothingToConvert6,
   1.108 +	EPanicOddNumberOfBytes1,
   1.109 +	EPanicOddNumberOfBytes2,
   1.110 +	EPanicOddNumberOfBytes3,
   1.111 +	EPanicOddNumberOfBytes4,
   1.112 +	EPanicOddNumberOfBytes5,
   1.113 +	EPanicOddNumberOfBytes6,
   1.114 +	EPanicBadHighBit1,
   1.115 +	EPanicBadHighBit2,
   1.116 +	EPanicBadHighBit3,
   1.117 +	EPanicBadHighBit4,
   1.118 +	EPanicBadHighBit5,
   1.119 +	EPanicBadHighBit6,
   1.120 +	EPanicBadHighBit7,
   1.121 +	EPanicBadPointers1,
   1.122 +	EPanicBadPointers2,
   1.123 +	EPanicBadPointers3,
   1.124 +	EPanicBadPointers4,
   1.125 +	EPanicBadPointers5,
   1.126 +	EPanicBadPointers6,
   1.127 +	EPanicBadPointers7,
   1.128 +	EPanicBadPointers8,
   1.129 +	EPanicBadPointers9,
   1.130 +	EPanicBadPointers10,
   1.131 +	EPanicBadPointers11,
   1.132 +	EPanicBadPointers12,
   1.133 +	EPanicBadPointers13,
   1.134 +	EPanicBadPointers14,
   1.135 +	EPanicBadPointers15,
   1.136 +	EPanicBadPointers16,
   1.137 +	EPanicBadPointers17,
   1.138 +	EPanicBadPointers18,
   1.139 +	EPanicBadPointers19,
   1.140 +	EPanicBadPointers20,
   1.141 +	EPanicBadPointers21,
   1.142 +	EPanicBadPointers22,
   1.143 +	EPanicBadPointers23,
   1.144 +	EPanicBadPointers24,
   1.145 +	EPanicBadPointers25,
   1.146 +	EPanicBadPointers26,
   1.147 +	EPanicBadPointers27,
   1.148 +	EPanicBadPointers28,
   1.149 +	EPanicBadPointers29,
   1.150 +	EPanicBadPointers30,
   1.151 +	EPanicBadPointers31,
   1.152 +	EPanicBadPointers32,
   1.153 +	EPanicBadPointers33,
   1.154 +	EPanicBadPointers34,
   1.155 +	EPanicBadPointers35,
   1.156 +	EPanicBadPointers36,
   1.157 +	EPanicBadCalculation1,
   1.158 +	EPanicBadCalculation2,
   1.159 +	EPanicNumberOfBytesIsNotMultipleOfThree1,
   1.160 +	EPanicNumberOfBytesIsNotMultipleOfThree2,
   1.161 +	EPanicSingleShift2Expected,
   1.162 +	EPanicSingleShift3Expected,
   1.163 +	EPanicTooManyBytesOfWorkingMemoryUsed1,
   1.164 +	EPanicTooManyBytesOfWorkingMemoryUsed2
   1.165 +	};
   1.166 +
   1.167 +LOCAL_C void Panic(TPanic aPanic)
   1.168 +	{
   1.169 +	User::Panic(KLitPanicText, aPanic);
   1.170 +	}
   1.171 +
   1.172 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
   1.173 +
   1.174 +LOCAL_C void Step12DummyConvertFromIntermediateBufferInPlace(TInt, TDes8&, TInt& aNumberOfCharactersThatDroppedOut)
   1.175 +	{
   1.176 +	aNumberOfCharactersThatDroppedOut=0;
   1.177 +	}
   1.178 +
   1.179 +// Perform the actual conversion (unicode -> gb18030 4byte non-BMP) using formula in this function
   1.180 +LOCAL_C void Step3ConvertFromIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut)
   1.181 +	{
   1.182 +	aNumberOfCharactersThatDroppedOut = 0;		// no drop out, because all GB18030 outside BMP are exactly 4-bytes
   1.183 +	
   1.184 +	const TInt descriptorLength=aDescriptor.Length();
   1.185 +	TUint8* pVeryFrom = CONST_CAST(TUint8*, aDescriptor.Ptr());
   1.186 +	const TUint8* pEnd = pVeryFrom + descriptorLength;
   1.187 +	TUint8* pFrom = pVeryFrom + aStartPositionInDescriptor;
   1.188 +	FOREVER
   1.189 +		{
   1.190 +		if (pFrom + 4 > pEnd)
   1.191 +			{
   1.192 +			__ASSERT_DEBUG(pFrom==pEnd, Panic(EPanicBadPointers25));
   1.193 +			break;
   1.194 +			}
   1.195 +		TUint characterCode = 0;
   1.196 +		for (TInt i=0; i<4; i++)
   1.197 +			{
   1.198 +			characterCode <<= 8;
   1.199 +			characterCode += pFrom[i];
   1.200 +			}
   1.201 +		
   1.202 +		// to gb18030
   1.203 +		characterCode -= 0x10000;
   1.204 +		TUint b4 = characterCode % 10 + 0x30;
   1.205 +		characterCode /= 10;
   1.206 +		TUint b3 = characterCode % 126 + 0x81;
   1.207 +		characterCode /= 126;
   1.208 +		TUint b2 = characterCode % 10 + 0x30;
   1.209 +		TUint b1 = characterCode / 10 + 0x90;
   1.210 +		
   1.211 +		*pFrom++ = b1;
   1.212 +		*pFrom++ = b2;
   1.213 +		*pFrom++ = b3;
   1.214 +		*pFrom++ = b4;
   1.215 +		}
   1.216 +	aDescriptor.SetLength(pFrom-pVeryFrom);
   1.217 +	}
   1.218 +
   1.219 +// gb2312-1byte ->unicode (0x00 - 0x7F)
   1.220 +LOCAL_C TInt Step0NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
   1.221 +	{
   1.222 +	const TInt descriptorLength=aDescriptor.Length();
   1.223 +	const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
   1.224 +	const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
   1.225 +	
   1.226 +	TInt numOfBytes = 0;
   1.227 +	FOREVER
   1.228 +		{
   1.229 +		if (pointerToPreviousByte>=pointerToLastByte)
   1.230 +			{
   1.231 +			break;
   1.232 +			}
   1.233 +		// byte 1
   1.234 +		TUint b1 = pointerToPreviousByte[1];
   1.235 +		if (b1 <= 0x7F)
   1.236 +			{
   1.237 +			pointerToPreviousByte++;
   1.238 +			numOfBytes++;
   1.239 +			}
   1.240 +		else
   1.241 +			break;
   1.242 +		}
   1.243 +		return numOfBytes;
   1.244 +	}
   1.245 +
   1.246 +// gb18030-2byte --> unicode (0x8140 - 0xFE7E, 0x8180 - 0xFEFE)
   1.247 +LOCAL_C TInt Step1NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
   1.248 +	{
   1.249 +	const TInt descriptorLength=aDescriptor.Length();
   1.250 +	const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
   1.251 +	const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
   1.252 +
   1.253 +	TInt numOfBytes = 0;
   1.254 +	FOREVER
   1.255 +		{
   1.256 +		if (pointerToPreviousByte>=pointerToLastByte)
   1.257 +			{
   1.258 +			break;
   1.259 +			}
   1.260 +		// byte 1
   1.261 +		TUint b1 = pointerToPreviousByte[1];
   1.262 +		if (b1 <= 0x80 || b1 > 0xFE)
   1.263 +			break;
   1.264 +
   1.265 +		// byte 2
   1.266 +		if (pointerToPreviousByte+1 >= pointerToLastByte)
   1.267 +			break;
   1.268 +		TUint b2 = pointerToPreviousByte[2];
   1.269 +		if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)		// all gb18030 2-byte code
   1.270 +			{
   1.271 +			pointerToPreviousByte = pointerToPreviousByte + 2;
   1.272 +			numOfBytes = numOfBytes + 2;
   1.273 +			}
   1.274 +		else if (b2 < 0x30 || b2 > 0x39)
   1.275 +			{
   1.276 +			if (numOfBytes <= 0)
   1.277 +				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.278 +			else
   1.279 +				break;
   1.280 +			}
   1.281 +		else
   1.282 +			break;
   1.283 +		}
   1.284 +		return numOfBytes;
   1.285 +	}
   1.286 +
   1.287 +
   1.288 +// gb18030 4-bytes bmp --> unicode (0x81308130 - 0x8439FE39)
   1.289 +LOCAL_C TInt Step2NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
   1.290 +	{
   1.291 +	const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
   1.292 +	const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
   1.293 +	__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25));
   1.294 +	
   1.295 +	TInt numOfBytes = 0;
   1.296 +	FOREVER
   1.297 +		{
   1.298 +		if (pointerToPreviousByte>=pointerToLastByte)
   1.299 +			{
   1.300 +			break;
   1.301 +			}
   1.302 +	
   1.303 +		// byte 1
   1.304 +		TUint b1 = pointerToPreviousByte[1];
   1.305 +		if ((b1 < 0x81) || (b1 > 0x84)){
   1.306 +			break;
   1.307 +		}
   1.308 +			
   1.309 +		// byte 2
   1.310 +		if (pointerToPreviousByte+1 >= pointerToLastByte)
   1.311 +			break;
   1.312 +		TUint b2 = pointerToPreviousByte[2];
   1.313 +		if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)		// all gb18030 2-byte code
   1.314 +			break;
   1.315 +		else if (b2 < 0x30 || b2 > 0x39)
   1.316 +			{
   1.317 +			if (numOfBytes == 0)
   1.318 +				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.319 +			else 
   1.320 +				break;
   1.321 +			}
   1.322 +
   1.323 +		
   1.324 +		// byte 3
   1.325 +		if (pointerToPreviousByte+2 >= pointerToLastByte)
   1.326 +			break;
   1.327 +		TUint b3 = pointerToPreviousByte[3];
   1.328 +		if (b3 < 0x81 || b3 > 0xFE)
   1.329 +			{
   1.330 +			if (numOfBytes == 0)
   1.331 +				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.332 +			else 
   1.333 +				break;
   1.334 +			}
   1.335 +		
   1.336 +		// byte 4
   1.337 +		if (pointerToPreviousByte+3 >= pointerToLastByte)
   1.338 +			break;
   1.339 +		TUint b4 = pointerToPreviousByte[4];
   1.340 +		if (b4 < 0x30 || b4 > 0x39)
   1.341 +			{
   1.342 +			if (numOfBytes == 0)
   1.343 +				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.344 +			else 
   1.345 +				break;
   1.346 +			}
   1.347 +		else
   1.348 +			{
   1.349 +				numOfBytes = numOfBytes + 4;
   1.350 +				pointerToPreviousByte = pointerToPreviousByte+4;
   1.351 +			}
   1.352 +		}
   1.353 +		
   1.354 +		return numOfBytes;
   1.355 +	}
   1.356 +
   1.357 +// gb18030 4-bytes non-bmp --> unicode (0x90308130~0xE339FE39)
   1.358 +LOCAL_C TInt Step3NumberOfBytesAbleToConvertToUnicode(const TDesC8& aDescriptor)
   1.359 +	{
   1.360 +	const TUint8* pointerToPreviousByte=aDescriptor.Ptr()-1;
   1.361 +	const TUint8* const pointerToLastByte=pointerToPreviousByte+aDescriptor.Length();
   1.362 +	__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastByte, Panic(EPanicBadPointers25));
   1.363 +
   1.364 +	TInt numOfBytes = 0;
   1.365 +	FOREVER
   1.366 +		{
   1.367 +		if (pointerToPreviousByte>=pointerToLastByte)
   1.368 +			{
   1.369 +			break;
   1.370 +			}
   1.371 +		
   1.372 +		// byte 1
   1.373 +		TUint b1 = pointerToPreviousByte[1];
   1.374 +		if (b1 < 0x90 || b1 > 0xE3)
   1.375 +			break;
   1.376 +		
   1.377 +		// byte 2
   1.378 +		if (pointerToPreviousByte+1 >= pointerToLastByte)
   1.379 +			break;
   1.380 +		TUint b2 = pointerToPreviousByte[2];
   1.381 +		if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)
   1.382 +			break;
   1.383 +		else if (b2 < 0x30 || b2 > 0x39)
   1.384 +			{
   1.385 +			if (numOfBytes == 0)
   1.386 +				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.387 +			else 
   1.388 +				break;
   1.389 +			}
   1.390 +			
   1.391 +		// byte 3
   1.392 +		if (pointerToPreviousByte+2 >= pointerToLastByte)
   1.393 +			break;
   1.394 +		TUint b3 = pointerToPreviousByte[3];
   1.395 +		if (b3 < 0x81 || b3 > 0xFE)
   1.396 +			{
   1.397 +			if (numOfBytes == 0)
   1.398 +				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.399 +			else 
   1.400 +				break;
   1.401 +			}
   1.402 +		
   1.403 +		// byte 4
   1.404 +		if (pointerToPreviousByte+3 >= pointerToLastByte)
   1.405 +			break;
   1.406 +		TUint b4 = pointerToPreviousByte[4];
   1.407 +		if (b4 < 0x30 || b4 > 0x39)
   1.408 +			{
   1.409 +			if (numOfBytes == 0)
   1.410 +				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.411 +			else 
   1.412 +				break;
   1.413 +			}
   1.414 +		else 
   1.415 +			{
   1.416 +			numOfBytes = numOfBytes + 4;
   1.417 +			pointerToPreviousByte = pointerToPreviousByte + 4;
   1.418 +			}
   1.419 +		}
   1.420 +		return numOfBytes;
   1.421 +	}
   1.422 +	
   1.423 +void Step012DummyConvertToIntermediateBufferInPlace(TDes8&)
   1.424 +	{
   1.425 +	}
   1.426 +
   1.427 +// Perform the actual conversion (gb18030 4byte non-BMP -> unicode) using formula in this function
   1.428 +LOCAL_C void Step3ConvertToIntermediateBufferInPlace(TDes8& aDescriptor)
   1.429 +	{
   1.430 +	const TInt descriptorLength=aDescriptor.Length();
   1.431 +	__ASSERT_DEBUG(descriptorLength%4 == 0, Panic(EPanicNothingToConvert5));
   1.432 +	TUint8* pointerToTargetByte=CONST_CAST(TUint8*, aDescriptor.Ptr());
   1.433 +	const TUint8* pointerToSourceByte=pointerToTargetByte;
   1.434 +	const TUint8* const pointerToLastByte=pointerToSourceByte+descriptorLength;
   1.435 +	
   1.436 +	FOREVER
   1.437 +		{
   1.438 +		if (pointerToLastByte - pointerToSourceByte < 4)
   1.439 +			break;
   1.440 +		
   1.441 +		// conversion
   1.442 +		TUint8 b1 = pointerToSourceByte[0];
   1.443 +		TUint8 b2 = pointerToSourceByte[1];
   1.444 +		TUint8 b3 = pointerToSourceByte[2];
   1.445 +		TUint8 b4 = pointerToSourceByte[3];
   1.446 +		
   1.447 +		TUint characterCode = 0x10000 + (b1 - 0x90) * 12600 +
   1.448 +										(b2 - 0x30) * 1260 +
   1.449 +										(b3 - 0x81) * 10 +
   1.450 +										(b4 - 0x30);
   1.451 +		
   1.452 +		pointerToTargetByte[0] = ((characterCode >> 24) & 0xFF);
   1.453 +		pointerToTargetByte[1] = ((characterCode >> 16) & 0xFF);
   1.454 +		pointerToTargetByte[2] = ((characterCode >> 8) & 0xFF);
   1.455 +		pointerToTargetByte[3] = (characterCode & 0xFF);
   1.456 +		
   1.457 +		pointerToSourceByte = pointerToSourceByte + 4;
   1.458 +		pointerToTargetByte = pointerToTargetByte + 4;
   1.459 +		}
   1.460 +	
   1.461 +		aDescriptor.SetLength(descriptorLength);
   1.462 +	}
   1.463 +
   1.464 +
   1.465 +// A dummy "direct" mapping table for non-Bmp chars in step 3
   1.466 +// Use 32-bit Unicode value as intermediate coding
   1.467 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange step3ForeignVariableByteDataRanges[]=
   1.468 +	{
   1.469 +		{
   1.470 +		0x00,		// from 0x10000
   1.471 +		0x00,		// to  0x10FFFF
   1.472 +		3,			// total 4 bytes
   1.473 +		0
   1.474 +		},		
   1.475 +	};
   1.476 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3ForeignToUnicodeDataRanges[]=
   1.477 +	{
   1.478 +		{
   1.479 +		0x10000,	// from 0x10000
   1.480 +		0x10ffff,	// to  0x10FFFF
   1.481 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
   1.482 +		0,
   1.483 +		0,
   1.484 +			{
   1.485 +			0		// map from intermediate to unicode with offset = 0
   1.486 +			}
   1.487 +		},
   1.488 +	};
   1.489 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange step3UnicodeToForeignDataRanges[]=
   1.490 +	{
   1.491 +		{
   1.492 +		0x10000,	//from 0x10000
   1.493 +		0x10FFFF, //to 0x10FFFF
   1.494 +		SCnvConversionData::SOneDirectionData::SRange::EDirect,
   1.495 +		4,			// output byte count = 4
   1.496 +		0,
   1.497 +			{
   1.498 +			0		// offset = 0
   1.499 +			}
   1.500 +		},
   1.501 +	};
   1.502 +GLDEF_D const SCnvConversionData step3ConversionData=
   1.503 +	{
   1.504 +	SCnvConversionData::EFixedBigEndian,
   1.505 +		{
   1.506 +		ARRAY_LENGTH(step3ForeignVariableByteDataRanges),
   1.507 +		step3ForeignVariableByteDataRanges
   1.508 +		},
   1.509 +		{
   1.510 +		ARRAY_LENGTH(step3ForeignToUnicodeDataRanges),
   1.511 +		step3ForeignToUnicodeDataRanges
   1.512 +		},
   1.513 +		{
   1.514 +		ARRAY_LENGTH(step3UnicodeToForeignDataRanges),
   1.515 +		step3UnicodeToForeignDataRanges
   1.516 +		},
   1.517 +	NULL,
   1.518 +	NULL
   1.519 +	};
   1.520 +
   1.521 +
   1.522 +// An internal mapping table to reslove the conflict introduced in symbian GB2312-80 plug-in.
   1.523 +// It will be merged into the gb18030-2byte Conversion Data.
   1.524 +// It includes mapping: (0xA1A4 -> 0x00B7, 0xA1AA -> 0x2014, 0xA844 <- 0x2015, 0x8139A739 <- 0x30FB)
   1.525 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange gb18030_diff_gb2312ForeignVariableByteDataRanges[]=
   1.526 +	{
   1.527 +		{
   1.528 +		0xA1, //from 0xA1A4
   1.529 +		0xA1, //to 0xA1AA
   1.530 +		1,			
   1.531 +		0
   1.532 +		},		
   1.533 +	};
   1.534 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_foreignToUnicode_1[]=
   1.535 +	{
   1.536 +			{
   1.537 +			0xA1A4,
   1.538 +			0x00B7
   1.539 +			},
   1.540 +			{
   1.541 +			0xA1AA,
   1.542 +			0x2014
   1.543 +			}
   1.544 +	};
   1.545 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312ForeignToUnicodeDataRanges[]=
   1.546 +	{
   1.547 +		{
   1.548 +		0xA1A4,
   1.549 +		0xA1AA,
   1.550 +		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
   1.551 +		0,
   1.552 +		0,
   1.553 +			{
   1.554 +			UData_SKeyedTable1616(keyedTable1616_foreignToUnicode_1)
   1.555 +			}
   1.556 +		},
   1.557 +	};
   1.558 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry keyedTable1616_unicodeToForeign_1[]=
   1.559 +	{
   1.560 +			{
   1.561 +			0x2015,
   1.562 +			0xA844
   1.563 +			}
   1.564 +	};
   1.565 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable3232::SEntry keyedTable3232_unicodeToForeign_1[]=
   1.566 +	{
   1.567 +			{
   1.568 +			0x30FB,
   1.569 +			0x8139A739
   1.570 +			}
   1.571 +	};
   1.572 +
   1.573 +LOCAL_D const SCnvConversionData::SOneDirectionData::SRange gb18030_diff_gb2312UnicodeToForeignDataRanges[]=
   1.574 +	{
   1.575 +		{
   1.576 +		0x2015,
   1.577 +		0x2015,
   1.578 +		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable1616,
   1.579 +		2,			// output byte count = 2
   1.580 +		0,
   1.581 +			{
   1.582 +			UData_SKeyedTable1616(keyedTable1616_unicodeToForeign_1)
   1.583 +			}
   1.584 +		},
   1.585 +		{
   1.586 +		0x30FB,
   1.587 +		0x30FB,
   1.588 +		SCnvConversionData::SOneDirectionData::SRange::EKeyedTable3232,
   1.589 +		4,			// output byte count = 4
   1.590 +		0,
   1.591 +			{
   1.592 +			UData_SKeyedTable3232(keyedTable3232_unicodeToForeign_1)
   1.593 +			}
   1.594 +		},
   1.595 +	};
   1.596 +GLDEF_D const SCnvConversionData gb18030_diff_gb2312ConversionData=
   1.597 +	{
   1.598 +	SCnvConversionData::EFixedBigEndian,
   1.599 +		{
   1.600 +		ARRAY_LENGTH(gb18030_diff_gb2312ForeignVariableByteDataRanges),
   1.601 +		gb18030_diff_gb2312ForeignVariableByteDataRanges
   1.602 +		},
   1.603 +		{
   1.604 +		ARRAY_LENGTH(gb18030_diff_gb2312ForeignToUnicodeDataRanges),
   1.605 +		gb18030_diff_gb2312ForeignToUnicodeDataRanges
   1.606 +		},
   1.607 +		{
   1.608 +		ARRAY_LENGTH(gb18030_diff_gb2312UnicodeToForeignDataRanges),
   1.609 +		gb18030_diff_gb2312UnicodeToForeignDataRanges
   1.610 +		},
   1.611 +	NULL,
   1.612 +	NULL
   1.613 +	};
   1.614 +
   1.615 +LOCAL_D const SCnvConversionData::SVariableByteData::SRange foreignVariableByteDataRanges[]=
   1.616 +	{
   1.617 +		{
   1.618 +		0x00,
   1.619 +		0x7f,
   1.620 +		0,
   1.621 +		0
   1.622 +		},
   1.623 +		{
   1.624 +		0x80,
   1.625 +		0xff,
   1.626 +		1,
   1.627 +		0
   1.628 +		}
   1.629 +	};
   1.630 +
   1.631 +LOCAL_C void SetUpCompleteGb18030_2byteConversionData(SCnvConversionData& aCompleteGb18030_2byteConversionData, TUint8* aWorkingMemory)
   1.632 +	{
   1.633 +	const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData();
   1.634 +	const SCnvConversionData& gb18030_diff_gbkConversionData=CnvGb18030_diff_gbk::ConversionData();
   1.635 +	const SCnvConversionData& gbkConversionData=CnvGbk::ConversionData();
   1.636 +	// create a SCnvConversionData that is the combination of gb18030_diff_gb2312ConversionData, gb2312ConversionData, gb18030_diff_gbkConversionData and gbkConversionData;
   1.637 +	aCompleteGb18030_2byteConversionData.iEndiannessOfForeignCharacters=SCnvConversionData::EFixedBigEndian;
   1.638 +	aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iNumberOfRanges=ARRAY_LENGTH(foreignVariableByteDataRanges);
   1.639 +	aCompleteGb18030_2byteConversionData.iForeignVariableByteData.iRangeArray=foreignVariableByteDataRanges;
   1.640 +	TInt numberOfBytesOfWorkingMemoryUsed=0;
   1.641 +	
   1.642 +	// set up the foreign-to-Unicode data
   1.643 +	const TInt numberOfForeignToUnicodeDataRanges=gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges + gbkConversionData.iForeignToUnicodeData.iNumberOfRanges;
   1.644 +	aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iNumberOfRanges=numberOfForeignToUnicodeDataRanges;
   1.645 +	SCnvConversionData::SOneDirectionData::SRange* foreignToUnicodeDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed);
   1.646 +	numberOfBytesOfWorkingMemoryUsed+=(numberOfForeignToUnicodeDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.647 +	__ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed1));
   1.648 +	aCompleteGb18030_2byteConversionData.iForeignToUnicodeData.iRangeArray=foreignToUnicodeDataRangeArray;
   1.649 +	Mem::Copy(foreignToUnicodeDataRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.650 +	Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb2312ConversionData.iForeignToUnicodeData.iRangeArray, gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.651 +	Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iRangeArray, gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.652 +	Mem::Copy(foreignToUnicodeDataRangeArray + gb18030_diff_gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb2312ConversionData.iForeignToUnicodeData.iNumberOfRanges + gb18030_diff_gbkConversionData.iForeignToUnicodeData.iNumberOfRanges, gbkConversionData.iForeignToUnicodeData.iRangeArray, gbkConversionData.iForeignToUnicodeData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.653 +
   1.654 +	// set up the Unicode-to-foreign data
   1.655 +	const TInt numberOfUnicodeToForeignDataRanges=gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges + gbkConversionData.iUnicodeToForeignData.iNumberOfRanges;
   1.656 +	aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iNumberOfRanges=numberOfUnicodeToForeignDataRanges;
   1.657 +	SCnvConversionData::SOneDirectionData::SRange* unicodeToForeignDataRangeArray=REINTERPRET_CAST(SCnvConversionData::SOneDirectionData::SRange*, aWorkingMemory+numberOfBytesOfWorkingMemoryUsed);
   1.658 +	numberOfBytesOfWorkingMemoryUsed+=(numberOfUnicodeToForeignDataRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.659 +	__ASSERT_ALWAYS(numberOfBytesOfWorkingMemoryUsed<=KNumberOfBytesOfWorkingMemory, Panic(EPanicTooManyBytesOfWorkingMemoryUsed2));
   1.660 +	aCompleteGb18030_2byteConversionData.iUnicodeToForeignData.iRangeArray=unicodeToForeignDataRangeArray;
   1.661 +	Mem::Copy(unicodeToForeignDataRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.662 +	Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb2312ConversionData.iUnicodeToForeignData.iRangeArray, gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.663 +	Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iRangeArray, gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.664 +	Mem::Copy(unicodeToForeignDataRangeArray + gb18030_diff_gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb2312ConversionData.iUnicodeToForeignData.iNumberOfRanges + gb18030_diff_gbkConversionData.iUnicodeToForeignData.iNumberOfRanges, gbkConversionData.iUnicodeToForeignData.iRangeArray, gbkConversionData.iUnicodeToForeignData.iNumberOfRanges*sizeof(SCnvConversionData::SOneDirectionData::SRange));
   1.665 +	}
   1.666 +
   1.667 +
   1.668 +TInt CGB18030ConverterImpl::ConvertFromUnicode(
   1.669 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.670 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
   1.671 +		TDes8& aForeign, 
   1.672 +		const TDesC16& aUnicode, 
   1.673 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
   1.674 +	{
   1.675 +	TFixedArray<CnvUtilities::SCharacterSet, 3> characterSets;
   1.676 +	
   1.677 +	// step 1) gb18030-2byte
   1.678 +	characterSets[0].iConversionData						= completeGb18030_2byteConversionData;
   1.679 +	characterSets[0].iConvertFromIntermediateBufferInPlace	= Step12DummyConvertFromIntermediateBufferInPlace;
   1.680 +	characterSets[0].iEscapeSequence						= &KNullDesC8;
   1.681 +	
   1.682 +	// step 2) gb18030-4byte BMP
   1.683 +	characterSets[1].iConversionData						= &CnvGb18030_4byte::ConversionData();
   1.684 +	characterSets[1].iConvertFromIntermediateBufferInPlace	= Step12DummyConvertFromIntermediateBufferInPlace;
   1.685 +	characterSets[1].iEscapeSequence						= &KNullDesC8;
   1.686 +	
   1.687 +	// step 3) gb18030-4byte non-BMP
   1.688 +	characterSets[2].iConversionData						= &step3ConversionData;
   1.689 +	characterSets[2].iConvertFromIntermediateBufferInPlace	= Step3ConvertFromIntermediateBufferInPlace;
   1.690 +	characterSets[2].iEscapeSequence						= &KNullDesC8;
   1.691 +	
   1.692 +	return CnvUtilities::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters, characterSets.Array());
   1.693 +	}
   1.694 +
   1.695 +
   1.696 +TInt CGB18030ConverterImpl::ConvertToUnicode(
   1.697 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.698 +		TDes16& aUnicode, 
   1.699 +		const TDesC8& aForeign, 
   1.700 +		TInt& /*aState*/, 
   1.701 +		TInt& aNumberOfUnconvertibleCharacters, 
   1.702 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   1.703 +	{
   1.704 +	TFixedArray<CnvUtilities::SMethod, 4> methods;
   1.705 +	// step 0) gb2312-1byte
   1.706 +	methods[0].iNumberOfBytesAbleToConvert			= Step0NumberOfBytesAbleToConvertToUnicode;
   1.707 +	methods[0].iConvertToIntermediateBufferInPlace	= Step012DummyConvertToIntermediateBufferInPlace;
   1.708 +	methods[0].iConversionData						= &CnvGb2312::ConversionData(); //only use one byte part
   1.709 +	methods[0].iNumberOfBytesPerCharacter			= 1;
   1.710 +	methods[0].iNumberOfCoreBytesPerCharacter		= 1;
   1.711 +	
   1.712 +	// step 1) gb18030-2byte
   1.713 +	methods[1].iNumberOfBytesAbleToConvert			= Step1NumberOfBytesAbleToConvertToUnicode;
   1.714 +	methods[1].iConvertToIntermediateBufferInPlace	= Step012DummyConvertToIntermediateBufferInPlace;
   1.715 +	methods[1].iConversionData						= completeGb18030_2byteConversionData;
   1.716 +	methods[1].iNumberOfBytesPerCharacter			= 2;
   1.717 +	methods[1].iNumberOfCoreBytesPerCharacter		= 2;
   1.718 +	
   1.719 +	// step 2) gb18030 4-byte BMP
   1.720 +	methods[2].iNumberOfBytesAbleToConvert			= Step2NumberOfBytesAbleToConvertToUnicode;
   1.721 +	methods[2].iConvertToIntermediateBufferInPlace	= Step012DummyConvertToIntermediateBufferInPlace;
   1.722 +	methods[2].iConversionData						= &CnvGb18030_4byte::ConversionData();
   1.723 +	methods[2].iNumberOfBytesPerCharacter			= 4;
   1.724 +	methods[2].iNumberOfCoreBytesPerCharacter		= 4;
   1.725 +
   1.726 +	// step 3) gb18030 4-byte non-BMP
   1.727 +	methods[3].iNumberOfBytesAbleToConvert			= Step3NumberOfBytesAbleToConvertToUnicode;
   1.728 +	methods[3].iConvertToIntermediateBufferInPlace	= Step3ConvertToIntermediateBufferInPlace;
   1.729 +	methods[3].iConversionData						= &step3ConversionData;
   1.730 +	methods[3].iNumberOfBytesPerCharacter			= 4;
   1.731 +	methods[3].iNumberOfCoreBytesPerCharacter		= 4;
   1.732 +	
   1.733 +	return CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, methods.Array());
   1.734 +	}
   1.735 +
   1.736 +TBool CGB18030ConverterImpl::IsInThisCharacterSetL(
   1.737 +		TBool& aSetToTrue, 
   1.738 +		TInt& aConfidenceLevel, 
   1.739 +		const TDesC8& aSample)
   1.740 +	{
   1.741 +	aSetToTrue = ETrue;
   1.742 +	return CnvGb2312::IsCharGBBased(aConfidenceLevel, aSample);
   1.743 +	}
   1.744 +
   1.745 +CGB18030ConverterImpl* CGB18030ConverterImpl::NewL()
   1.746 +	{
   1.747 +	CGB18030ConverterImpl* self = new(ELeave) CGB18030ConverterImpl();
   1.748 +	CleanupStack::PushL(self);
   1.749 +	self->ConstructL();
   1.750 +	CleanupStack::Pop(); // self
   1.751 +	return self;
   1.752 +	}
   1.753 +
   1.754 +CGB18030ConverterImpl::~CGB18030ConverterImpl()
   1.755 +	{
   1.756 +	if (workingMemory)
   1.757 +		delete[] workingMemory;
   1.758 +	if (completeGb18030_2byteConversionData)
   1.759 +		delete completeGb18030_2byteConversionData;
   1.760 +	}
   1.761 +
   1.762 +CGB18030ConverterImpl::CGB18030ConverterImpl()
   1.763 +	{
   1.764 +	}
   1.765 +
   1.766 +TInt CGB18030ConverterImpl::ConstructL()
   1.767 +	{
   1.768 +	completeGb18030_2byteConversionData = new (ELeave)SCnvConversionData;
   1.769 +	CleanupStack::PushL(completeGb18030_2byteConversionData);
   1.770 +	workingMemory = new (ELeave) TUint8[KNumberOfBytesOfWorkingMemory]; //1040 bytes
   1.771 +	CleanupStack::Pop(); // completeGb18030_2byteConversionData
   1.772 +	SetUpCompleteGb18030_2byteConversionData(*completeGb18030_2byteConversionData, workingMemory);
   1.773 +	return 1;
   1.774 +	}
   1.775 +
   1.776 +const TImplementationProxy ImplementationTable[] = 
   1.777 +	{
   1.778 +		IMPLEMENTATION_PROXY_ENTRY(0x10287038,CGB18030ConverterImpl::NewL)
   1.779 +	};
   1.780 +
   1.781 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   1.782 +	{
   1.783 +	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   1.784 +
   1.785 +	return ImplementationTable;
   1.786 +	}
   1.787 +