os/textandloc/charconvfw/charconvplugins/src/plugins/hz.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/hz.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,577 @@
     1.4 +/*
     1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +* HZ is defined in RFC 1843
    1.19 +*
    1.20 +*/
    1.21 +
    1.22 +
    1.23 +#include <e32std.h>
    1.24 +#include <charconv.h>
    1.25 +#include "gb2312.h"
    1.26 +#include <ecom/implementationproxy.h>
    1.27 +#include <charactersetconverter.h>
    1.28 +
    1.29 +const TInt KIsInGbBlock=CCnvCharacterSetConverter::KStateDefault+1;
    1.30 +#if defined(_DEBUG)
    1.31 +const TInt KLengthOfIntermediateBuffer=6;
    1.32 +#else
    1.33 +const TInt KLengthOfIntermediateBuffer=150;
    1.34 +#endif
    1.35 +
    1.36 +#if defined(_DEBUG)
    1.37 +
    1.38 +_LIT(KLitPanicText, "HZ");
    1.39 +
    1.40 +enum TPanic
    1.41 +	{
    1.42 +	EPanicTooManyMatchingIndicesFound=1,
    1.43 +	EPanicBadNumberOfBytesRequiredToBeAvailable,
    1.44 +	EPanicBadNumberOfBytesAvailable,
    1.45 +	EPanicBadNumberOfBytesThatCanBeMadeAvailable,
    1.46 +	EPanicBadNumberOfBytesMadeAvailable1,
    1.47 +	EPanicBadNumberOfBytesMadeAvailable2,
    1.48 +	EPanicBadDescriptorSubDivision1,
    1.49 +	EPanicBadDescriptorSubDivision2,
    1.50 +	EPanicBadDescriptorSubDivision3,
    1.51 +	EPanicBadDescriptorSubDivision4,
    1.52 +	EPanicBadPointers1,
    1.53 +	EPanicBadPointers2,
    1.54 +	EPanicBadPointers3,
    1.55 +	EPanicBadPointers4,
    1.56 +	EPanicBadPointers5,
    1.57 +	EPanicBadPointers6,
    1.58 +	EPanicBadPointers7,
    1.59 +	EPanicBadPointers8,
    1.60 +	EPanicBadPointers9,
    1.61 +	EPanicBadPointers10,
    1.62 +	EPanicBadPointers11,
    1.63 +	EPanicBadPointers12,
    1.64 +	EPanicStillInGbBlock,
    1.65 +	EPanicBadState,
    1.66 +	EPanicSplitBoundaryIsNotAsLateAsPossible1,
    1.67 +	EPanicSplitBoundaryIsNotAsLateAsPossible2,
    1.68 +	EPanicBadGb2312Index,
    1.69 +	EPanicBadHzIndex,
    1.70 +	EPanicBadTildeSequence,
    1.71 +	EPanicBadReturnValue1,
    1.72 +	EPanicBadReturnValue2,
    1.73 +	EPanicRemainderOfHzHasGotLonger
    1.74 +	};
    1.75 +
    1.76 +LOCAL_C void Panic(TPanic aPanic)
    1.77 +	{
    1.78 +	User::Panic(KLitPanicText, aPanic);
    1.79 +	}
    1.80 +
    1.81 +#endif
    1.82 +
    1.83 +class CHZConverterImpl : public CCharacterSetConverterPluginInterface
    1.84 +	{
    1.85 +
    1.86 +public:
    1.87 +	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    1.88 +
    1.89 +	virtual TInt ConvertFromUnicode(
    1.90 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.91 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.92 +		TDes8& aForeign, 
    1.93 +		const TDesC16& aUnicode, 
    1.94 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    1.95 +
    1.96 +	virtual TInt ConvertToUnicode(
    1.97 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.98 +		TDes16& aUnicode, 
    1.99 +		const TDesC8& aForeign, 
   1.100 +		TInt& aState, 
   1.101 +		TInt& aNumberOfUnconvertibleCharacters, 
   1.102 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
   1.103 +
   1.104 +	virtual TBool IsInThisCharacterSetL(
   1.105 +		TBool& aSetToTrue, 
   1.106 +		TInt& aConfidenceLevel, 
   1.107 +		const TDesC8& aSample);
   1.108 +
   1.109 +	static CHZConverterImpl* NewL();
   1.110 +	virtual ~CHZConverterImpl();
   1.111 +
   1.112 +private:
   1.113 +	CHZConverterImpl();
   1.114 +
   1.115 +	};
   1.116 +
   1.117 +
   1.118 +
   1.119 +const TDesC8& CHZConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
   1.120 +	{
   1.121 +	return CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters();
   1.122 +	}
   1.123 +
   1.124 +LOCAL_C void IncrementNumberOfUnicodeCharactersNotConverted(TInt aLengthOfUnicode, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) // these seemingly haphazard order of these paramters is to match the position of the second and third parameters with the caller
   1.125 +	{
   1.126 +	++aNumberOfUnicodeCharactersNotConverted;
   1.127 +	const TInt indexOfUnicodeCharacterNowNotConverted=aLengthOfUnicode-aNumberOfUnicodeCharactersNotConverted;
   1.128 +#if defined(_DEBUG)
   1.129 +	TInt numberOfMatchingIndicesFound=0;
   1.130 +#endif
   1.131 +	for (TInt i=aIndicesOfUnconvertibleCharacters.NumberOfIndices()-1; i>=0; --i) // must iterate backwards as items from aIndicesOfUnconvertibleCharacters may be deleted
   1.132 +		{
   1.133 +		if (aIndicesOfUnconvertibleCharacters[i]==indexOfUnicodeCharacterNowNotConverted)
   1.134 +			{
   1.135 +			aIndicesOfUnconvertibleCharacters.Remove(i);
   1.136 +#if defined(_DEBUG)
   1.137 +			++numberOfMatchingIndicesFound;
   1.138 +#endif
   1.139 +			}
   1.140 +		}
   1.141 +	__ASSERT_DEBUG(numberOfMatchingIndicesFound<=1, Panic(EPanicTooManyMatchingIndicesFound));
   1.142 +	}
   1.143 +
   1.144 +LOCAL_C void MakeAvailable(TInt aNumberOfBytesRequiredToBeAvailable, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode, const TUint8*& aPointerToLastUsedByte, TInt& aNumberOfBytesAvailable, TInt aNumberOfBytesThatCanBeMadeAvailable) // these seemingly haphazard order of these paramters is to match the position of the second to fourth parameters (inclusive) with the caller
   1.145 +// makes available as much of aNumberOfBytesRequiredToBeAvailable as it can, even if the final value (i.e. value on returning) of aNumberOfBytesAvailable<aNumberOfBytesRequiredToBeAvailable (i.e. it doesn't initially give up straight away and do nothing if aNumberOfBytesRequiredToBeAvailable>aNumberOfBytesThatCanBeMadeAvailable+aNumberOfBytesAvailable)
   1.146 +	{
   1.147 +	__ASSERT_DEBUG(aNumberOfBytesRequiredToBeAvailable>0, Panic(EPanicBadNumberOfBytesRequiredToBeAvailable));
   1.148 +	__ASSERT_DEBUG(aNumberOfBytesAvailable>=0, Panic(EPanicBadNumberOfBytesAvailable));
   1.149 +	__ASSERT_DEBUG(aNumberOfBytesThatCanBeMadeAvailable>=0, Panic(EPanicBadNumberOfBytesThatCanBeMadeAvailable));
   1.150 +	TInt numberOfBytesMadeAvailable=0;
   1.151 +	FOREVER
   1.152 +		{
   1.153 +		if (aNumberOfBytesAvailable>=aNumberOfBytesRequiredToBeAvailable)
   1.154 +			{
   1.155 +			break; // no more needs to be done
   1.156 +			}
   1.157 +		__ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable1));
   1.158 +		if (numberOfBytesMadeAvailable>=aNumberOfBytesThatCanBeMadeAvailable)
   1.159 +			{
   1.160 +			break; // give up - no more can be done
   1.161 +			}
   1.162 +		const TInt numberOfBytesInCharacter=(*aPointerToLastUsedByte&0x80)? 2: 1;
   1.163 +		aPointerToLastUsedByte-=numberOfBytesInCharacter;
   1.164 +		aNumberOfBytesAvailable+=numberOfBytesInCharacter;
   1.165 +		numberOfBytesMadeAvailable+=numberOfBytesInCharacter;
   1.166 +		IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
   1.167 +		}
   1.168 +	__ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable2));
   1.169 +	}
   1.170 +
   1.171 +LOCAL_C void ConvertFromGb2312ToHzInPlace(TDes8& aDescriptor, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode)
   1.172 +	{
   1.173 +	// it is legal for aDescriptor to be of length 0
   1.174 +	const TInt originalLengthOfDescriptor=aDescriptor.Length();
   1.175 +	if (originalLengthOfDescriptor>0)
   1.176 +		{
   1.177 +		TInt numberOfBytesAvailable=aDescriptor.MaxLength()-originalLengthOfDescriptor;
   1.178 +		TUint8* pointerToPreviousByte=CONST_CAST(TUint8*, aDescriptor.Ptr()-1);
   1.179 +		const TUint8* pointerToLastUsedByte=pointerToPreviousByte+originalLengthOfDescriptor;
   1.180 +		TBool isInGbBlock=EFalse;
   1.181 +		FOREVER
   1.182 +			{
   1.183 +			__ASSERT_DEBUG((pointerToLastUsedByte-(aDescriptor.Ptr()-1))+numberOfBytesAvailable==aDescriptor.MaxLength(), Panic(EPanicBadDescriptorSubDivision1));
   1.184 +			__ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers1));
   1.185 +			const TUint currentByte=*(pointerToPreviousByte+1);
   1.186 +			if (currentByte&0x80)
   1.187 +				{
   1.188 +				if (!isInGbBlock)
   1.189 +					{
   1.190 +					MakeAvailable(4, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-2); // what's passed into the last parameter is not a typo - we do not want the two-byte character currently pointed to by (pointerToPreviousByte+1) to be made available
   1.191 +					if (numberOfBytesAvailable<4) // 4 bytes are required for the "~{" "~}" escape sequences (thus ensuring that at least a single double-byte character can be put into the GB-block)
   1.192 +						{
   1.193 +						break;
   1.194 +						}
   1.195 +					isInGbBlock=ETrue;
   1.196 +					Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
   1.197 +					++pointerToPreviousByte;
   1.198 +					*pointerToPreviousByte='~';
   1.199 +					++pointerToPreviousByte;
   1.200 +					*pointerToPreviousByte='{';
   1.201 +					numberOfBytesAvailable-=2;
   1.202 +					pointerToLastUsedByte+=2;
   1.203 +					}
   1.204 +				++pointerToPreviousByte;
   1.205 +				*pointerToPreviousByte&=~0x80;
   1.206 +				__ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers2));
   1.207 +				++pointerToPreviousByte;
   1.208 +				*pointerToPreviousByte&=~0x80;
   1.209 +				}
   1.210 +			else
   1.211 +				{
   1.212 +				if (isInGbBlock)
   1.213 +					{
   1.214 +closeGbBlock:
   1.215 +					isInGbBlock=EFalse;
   1.216 +					MakeAvailable(2, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, pointerToLastUsedByte-pointerToPreviousByte);
   1.217 +					if (numberOfBytesAvailable<2) // 2 bytes are required for the "~}" escape sequence
   1.218 +						{
   1.219 +						IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
   1.220 +						*(pointerToPreviousByte-1)='~';
   1.221 +						*pointerToPreviousByte='}';
   1.222 +						break;
   1.223 +						}
   1.224 +					Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
   1.225 +					++pointerToPreviousByte;
   1.226 +					*pointerToPreviousByte='~';
   1.227 +					++pointerToPreviousByte;
   1.228 +					*pointerToPreviousByte='}';
   1.229 +					numberOfBytesAvailable-=2;
   1.230 +					pointerToLastUsedByte+=2;
   1.231 +					__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers3));
   1.232 +					if (pointerToPreviousByte>=pointerToLastUsedByte)
   1.233 +						{
   1.234 +						break;
   1.235 +						}
   1.236 +					}
   1.237 +				if (currentByte=='~')
   1.238 +					{
   1.239 +					MakeAvailable(1, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-1); // what's passed into the last parameter is not a typo - we do not want the "~" currently pointed to by (pointerToPreviousByte+1) to be made available
   1.240 +					if (numberOfBytesAvailable<1) // 1 byte is required for the extra "~" character
   1.241 +						{
   1.242 +						break;
   1.243 +						}
   1.244 +					Mem::Copy(pointerToPreviousByte+2, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
   1.245 +					++pointerToPreviousByte;
   1.246 +					*pointerToPreviousByte='~';
   1.247 +					numberOfBytesAvailable-=1;
   1.248 +					pointerToLastUsedByte+=1;
   1.249 +					}
   1.250 +				++pointerToPreviousByte;
   1.251 +				}
   1.252 +			__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers4));
   1.253 +			if (pointerToPreviousByte>=pointerToLastUsedByte)
   1.254 +				{
   1.255 +				if (isInGbBlock)
   1.256 +					{
   1.257 +					goto closeGbBlock; // this is to share the code for closing the GB-block
   1.258 +					}
   1.259 +				break;
   1.260 +				}
   1.261 +			}
   1.262 +		__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers5));
   1.263 +		if (pointerToPreviousByte<pointerToLastUsedByte)
   1.264 +			{
   1.265 +			__ASSERT_DEBUG((pointerToPreviousByte==pointerToLastUsedByte-1) || (pointerToPreviousByte==pointerToLastUsedByte-2), Panic(EPanicBadPointers6));
   1.266 +			numberOfBytesAvailable+=(pointerToLastUsedByte-pointerToPreviousByte);
   1.267 +			pointerToLastUsedByte=pointerToPreviousByte;
   1.268 +			IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
   1.269 +			}
   1.270 +		//if it gets out from FOREVER, isInGbBlock could not be ETrue ~~~ so wouldn't need the assert
   1.271 +		//__ASSERT_DEBUG(!isInGbBlock, Panic(EPanicStillInGbBlock));
   1.272 +		aDescriptor.SetLength(aDescriptor.MaxLength()-numberOfBytesAvailable);
   1.273 +		__ASSERT_DEBUG(aDescriptor.Length()==pointerToLastUsedByte-(aDescriptor.Ptr()-1), Panic(EPanicBadDescriptorSubDivision2));
   1.274 +		}
   1.275 +	}
   1.276 +
   1.277 +TInt CHZConverterImpl::ConvertFromUnicode(
   1.278 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.279 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
   1.280 +		TDes8& aForeign, 
   1.281 +		const TDesC16& aUnicode, 
   1.282 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
   1.283 +	{
   1.284 +	TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(CnvGb2312::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
   1.285 +	if (returnValue<0)
   1.286 +		{
   1.287 +		return returnValue; // this is an error-code
   1.288 +		}
   1.289 +	ConvertFromGb2312ToHzInPlace(aForeign, returnValue, aIndicesOfUnconvertibleCharacters, aUnicode.Length());
   1.290 +	return returnValue;
   1.291 +	}
   1.292 +
   1.293 +LOCAL_C TInt ConvertFromHzToHomogeneousGb2312(TBuf8<KLengthOfIntermediateBuffer>& aGb2312, TPtrC8& aHzBeingConsumed, TPtrC8& aRemainderOfHz, TInt& aState, TUint& aOutputConversionFlags)
   1.294 +	{
   1.295 +	// this function panics if aRemainderOfHz is of length 0
   1.296 +	TUint8* pointerToPreviousGb2312Byte=CONST_CAST(TUint8*, aGb2312.Ptr()-1);
   1.297 +	const TUint8* pointerToCurrentHzByte=aRemainderOfHz.Ptr();
   1.298 +	const TUint8* const pointerToLastHzByte=pointerToCurrentHzByte+(aRemainderOfHz.Length()-1);
   1.299 +	const TUint8* const pointerToLastHzByteToConvertThisTime=Min(pointerToLastHzByte, pointerToCurrentHzByte+(KLengthOfIntermediateBuffer-1));
   1.300 +	FOREVER
   1.301 +		{
   1.302 +		const TUint currentHzByte=*pointerToCurrentHzByte;
   1.303 +		if (currentHzByte=='~')
   1.304 +			{
   1.305 +			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers7));
   1.306 +			if (pointerToCurrentHzByte>=pointerToLastHzByte)
   1.307 +				{
   1.308 +				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
   1.309 +				--pointerToCurrentHzByte;
   1.310 +				break;
   1.311 +				}
   1.312 +			++pointerToCurrentHzByte;
   1.313 +			const TUint nextHzByte=*pointerToCurrentHzByte;
   1.314 +			switch (nextHzByte)
   1.315 +				{
   1.316 +			case '{':
   1.317 +				if (aState==KIsInGbBlock)
   1.318 +					{
   1.319 +					return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.320 +					}
   1.321 +				aState=KIsInGbBlock;
   1.322 +				break;
   1.323 +			case '}':
   1.324 +				if (aState==CCnvCharacterSetConverter::KStateDefault)
   1.325 +					{
   1.326 +					return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.327 +					}
   1.328 +				aState=CCnvCharacterSetConverter::KStateDefault;
   1.329 +				break;
   1.330 +			case '~':
   1.331 +				++pointerToPreviousGb2312Byte;
   1.332 +				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
   1.333 +				break;
   1.334 +			case 0x0a:
   1.335 +				break;
   1.336 +			default:
   1.337 +				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.338 +				}
   1.339 +			}
   1.340 +		else
   1.341 +			{
   1.342 +			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers8));
   1.343 +			if (pointerToCurrentHzByte>pointerToLastHzByteToConvertThisTime)
   1.344 +				{
   1.345 +				--pointerToCurrentHzByte;
   1.346 +				break;
   1.347 +				}
   1.348 +			if (aState==CCnvCharacterSetConverter::KStateDefault)
   1.349 +				{
   1.350 +				++pointerToPreviousGb2312Byte;
   1.351 +				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
   1.352 +				}
   1.353 +			else
   1.354 +				{
   1.355 +				__ASSERT_DEBUG(aState==KIsInGbBlock, Panic(EPanicBadState));
   1.356 +				__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByteToConvertThisTime, Panic(EPanicBadPointers9));
   1.357 +				if (pointerToCurrentHzByte>=pointerToLastHzByteToConvertThisTime)
   1.358 +					{
   1.359 +					aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
   1.360 +					--pointerToCurrentHzByte;
   1.361 +					break;
   1.362 +					}
   1.363 +				++pointerToCurrentHzByte;
   1.364 +				++pointerToPreviousGb2312Byte;
   1.365 +				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte|0x80);
   1.366 +				++pointerToPreviousGb2312Byte;
   1.367 +				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, *pointerToCurrentHzByte|0x80);
   1.368 +				}
   1.369 +			}
   1.370 +		__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers10));
   1.371 +		if (pointerToCurrentHzByte>=pointerToLastHzByte)
   1.372 +			{
   1.373 +			break;
   1.374 +			}
   1.375 +		++pointerToCurrentHzByte;
   1.376 +		}
   1.377 +	aGb2312.SetLength((pointerToPreviousGb2312Byte+1)-aGb2312.Ptr());
   1.378 +	const TInt numberOfHzBytesBeingConsumed=(pointerToCurrentHzByte+1)-aRemainderOfHz.Ptr();
   1.379 +	aHzBeingConsumed.Set(aRemainderOfHz.Left(numberOfHzBytesBeingConsumed));
   1.380 +	aRemainderOfHz.Set(aRemainderOfHz.Mid(numberOfHzBytesBeingConsumed));
   1.381 +#if defined(_DEBUG)
   1.382 +	// AAA: check that if the split occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "BBB" relies on this)
   1.383 +	if (aRemainderOfHz.Length()>=2)
   1.384 +		{
   1.385 +		__ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~{"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible1));
   1.386 +		__ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~}"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible2));
   1.387 +		}
   1.388 +#endif
   1.389 +	return 0;
   1.390 +	}
   1.391 +
   1.392 +LOCAL_C TInt Gb2312IndexToHzIndex(const TDesC8& aHz, TInt aGb2312Index, TBool aReturnMaximalHzIndex)
   1.393 +	{
   1.394 +	// this function panics if aHz is of length 0
   1.395 +	// aHz may start in either KIsInGbBlock or CCnvCharacterSetConverter::KStateDefault state, but it must *not* have any truncated sequences (i.e. "tilde <something>" sequence that is not complete, or part of a 2-byte character sequence) at either its start or its end
   1.396 +	__ASSERT_DEBUG(aGb2312Index>=0, Panic(EPanicBadGb2312Index));
   1.397 +	TInt hzIndex=0;
   1.398 +	TInt offsetFromGb2312IndexToHzIndex=0;
   1.399 +	const TUint8* const pointerToFirstHzByte=aHz.Ptr();
   1.400 +	const TUint8* pointerToCurrentHzByte=pointerToFirstHzByte;
   1.401 +	const TUint8* const pointerToLastHzByte=pointerToFirstHzByte+(aHz.Length()-1);
   1.402 +	FOREVER
   1.403 +		{
   1.404 +		const TInt newHzIndex=pointerToCurrentHzByte-pointerToFirstHzByte;
   1.405 +		const TInt candidateHzIndex=aGb2312Index+offsetFromGb2312IndexToHzIndex;
   1.406 +		__ASSERT_DEBUG(hzIndex<=candidateHzIndex, Panic(EPanicBadHzIndex));
   1.407 +		if (aReturnMaximalHzIndex? (newHzIndex>candidateHzIndex): (hzIndex>=candidateHzIndex))
   1.408 +			{
   1.409 +			break;
   1.410 +			}
   1.411 +		hzIndex=newHzIndex;
   1.412 +		if (*pointerToCurrentHzByte=='~')
   1.413 +			{
   1.414 +			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers11));
   1.415 +			if (pointerToCurrentHzByte>=pointerToLastHzByte)
   1.416 +				{
   1.417 +				break;
   1.418 +				}
   1.419 +			++pointerToCurrentHzByte;
   1.420 +			const TUint currentHzByte=*pointerToCurrentHzByte;
   1.421 +			if (currentHzByte=='~')
   1.422 +				{
   1.423 +				++offsetFromGb2312IndexToHzIndex;
   1.424 +				}
   1.425 +			else
   1.426 +				{
   1.427 +				__ASSERT_DEBUG((currentHzByte=='{') || (currentHzByte=='}') || (currentHzByte==0x0a), Panic(EPanicBadTildeSequence));
   1.428 +				offsetFromGb2312IndexToHzIndex+=2;
   1.429 +				}
   1.430 +			}
   1.431 +		__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers12));
   1.432 +		if (pointerToCurrentHzByte>=pointerToLastHzByte)
   1.433 +			{
   1.434 +			break;
   1.435 +			}
   1.436 +		++pointerToCurrentHzByte;
   1.437 +		}
   1.438 +	return hzIndex;
   1.439 +	}
   1.440 +
   1.441 +TInt CHZConverterImpl::ConvertToUnicode(
   1.442 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.443 +		TDes16& aUnicode, 
   1.444 +		const TDesC8& aForeign, 
   1.445 +		TInt& aState, 
   1.446 +		TInt& aNumberOfUnconvertibleCharacters, 
   1.447 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   1.448 +	{
   1.449 +	aUnicode.SetLength(0);
   1.450 +	TPtrC8 remainderOfHz(aForeign);
   1.451 +	TInt numberOfHzBytesConsumed=0;
   1.452 +	TUint outputConversionFlags=0;
   1.453 +	TUint inputConversionFlags=CCnvCharacterSetConverter::EInputConversionFlagAppend;
   1.454 +	const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData();
   1.455 +	FOREVER
   1.456 +		{
   1.457 +		__ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision3));
   1.458 +#if defined(_DEBUG)
   1.459 +		const TInt oldLengthOfRemainderOfHz=remainderOfHz.Length();
   1.460 +#endif
   1.461 +		TBuf8<KLengthOfIntermediateBuffer> gb2312;
   1.462 +		TPtrC8 hzBeingConsumed;
   1.463 +		const TInt returnValue1=ConvertFromHzToHomogeneousGb2312(gb2312, hzBeingConsumed, remainderOfHz, aState, outputConversionFlags);
   1.464 +		if (returnValue1<0)
   1.465 +			{
   1.466 +			return returnValue1; // this is an error-code
   1.467 +			}
   1.468 +		__ASSERT_DEBUG(returnValue1==0, Panic(EPanicBadReturnValue1));
   1.469 +		__ASSERT_DEBUG(hzBeingConsumed.Length()+remainderOfHz.Length()==oldLengthOfRemainderOfHz, Panic(EPanicRemainderOfHzHasGotLonger));
   1.470 +		if (hzBeingConsumed.Length()==0)
   1.471 +			{
   1.472 +			break;
   1.473 +			}
   1.474 +		TInt numberOfUnconvertibleCharacters;
   1.475 +		TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
   1.476 +		const TInt returnValue2=CCnvCharacterSetConverter::DoConvertToUnicode(gb2312ConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, gb2312, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, outputConversionFlags, inputConversionFlags);
   1.477 +		if (returnValue2<0)
   1.478 +			{
   1.479 +			return returnValue2; // this is an error-code
   1.480 +			}
   1.481 +		if (numberOfUnconvertibleCharacters>0)
   1.482 +			{
   1.483 +			if (aNumberOfUnconvertibleCharacters==0)
   1.484 +				{
   1.485 +				aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfHzBytesConsumed+Gb2312IndexToHzIndex(hzBeingConsumed, indexOfFirstByteOfFirstUnconvertibleCharacter, EFalse);
   1.486 +				}
   1.487 +			aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
   1.488 +			}
   1.489 +		if (returnValue2>0)
   1.490 +			{
   1.491 +			const TInt numberOfGb2312BytesConverted=gb2312.Length()-returnValue2;
   1.492 +			__ASSERT_DEBUG(numberOfGb2312BytesConverted>=0, Panic(EPanicBadReturnValue2));
   1.493 +			// don't call gb2312.SetLength(numberOfGb2312BytesConverted) as we want to access gb2312[numberOfGb2312BytesConverted] - in any case, gb2312's length is never going to be used again
   1.494 +			// don't bother re-setting remainderOfHz as it won't be used again
   1.495 +			numberOfHzBytesConsumed+=Gb2312IndexToHzIndex(hzBeingConsumed, numberOfGb2312BytesConverted, ETrue);
   1.496 +			aState=(gb2312[numberOfGb2312BytesConverted]&0x80)? KIsInGbBlock: CCnvCharacterSetConverter::KStateDefault; // BBB: if the split (between the text that was converted and the text that wasn't converted) occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "AAA" checks this) - this means that we set aState according to gb2312[numberOfGb2312BytesConverted] rather than gb2312[numberOfGb2312BytesConverted-1]
   1.497 +			break;
   1.498 +			}
   1.499 +		numberOfHzBytesConsumed+=hzBeingConsumed.Length();
   1.500 +		remainderOfHz.Set(aForeign.Mid(numberOfHzBytesConsumed));
   1.501 +		__ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision4));
   1.502 +		if (remainderOfHz.Length()==0)
   1.503 +			{
   1.504 +			break;
   1.505 +			}
   1.506 +		if (numberOfHzBytesConsumed>0)
   1.507 +			{
   1.508 +			inputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
   1.509 +			}
   1.510 +		}
   1.511 +	// N.B. remainderOfHz is in an undefined state by this point
   1.512 +	if ((numberOfHzBytesConsumed==0) && (outputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated))
   1.513 +		{
   1.514 +		return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.515 +		}
   1.516 +	return aForeign.Length()-numberOfHzBytesConsumed;
   1.517 +	}
   1.518 +
   1.519 +TBool CHZConverterImpl::IsInThisCharacterSetL(
   1.520 +		TBool& aSetToTrue, 
   1.521 +		TInt& aConfidenceLevel, 
   1.522 +		const TDesC8& aSample)
   1.523 +	{
   1.524 +	aSetToTrue=ETrue;
   1.525 +	TInt sampleLength = aSample.Length();
   1.526 +	TInt pairOfTilde=0;
   1.527 +	TInt occrenceOfNonHz=0;
   1.528 +	aConfidenceLevel = 50;
   1.529 +	// Hz encoding uses escape sequences...
   1.530 +	for (TInt i = 0; i < sampleLength; ++i)
   1.531 +		{
   1.532 +		if (aSample[i]>0x7e)
   1.533 +			occrenceOfNonHz++;
   1.534 +		if (aSample[i]==0x7e)
   1.535 +			{
   1.536 +			TInt increment1 = i+1;
   1.537 +			if (increment1 >= sampleLength)
   1.538 +				break;
   1.539 +			if ((aSample[increment1] == 0x7b)||(aSample[increment1] == 0x7d)||(aSample[increment1] == 0x7e))
   1.540 +				{
   1.541 +				pairOfTilde++;
   1.542 +				i++;
   1.543 +				}
   1.544 +			}
   1.545 +		}//for
   1.546 +	if (sampleLength)
   1.547 +		{
   1.548 +		TInt occurrenceOftilde =2*pairOfTilde*100/sampleLength;
   1.549 +		aConfidenceLevel=aConfidenceLevel-Max(0,(4-occurrenceOftilde));
   1.550 +		aConfidenceLevel += occurrenceOftilde;
   1.551 +		aConfidenceLevel -= ((occrenceOfNonHz*100)/sampleLength);
   1.552 +		}
   1.553 +	return ETrue;
   1.554 +	}
   1.555 +
   1.556 +CHZConverterImpl* CHZConverterImpl::NewL()
   1.557 +	{
   1.558 +	CHZConverterImpl* self = new(ELeave) CHZConverterImpl();
   1.559 +	return self;
   1.560 +	}
   1.561 +
   1.562 +CHZConverterImpl::~CHZConverterImpl()
   1.563 +	{
   1.564 +	}
   1.565 +
   1.566 +CHZConverterImpl::CHZConverterImpl()
   1.567 +	{
   1.568 +	}
   1.569 +
   1.570 +const TImplementationProxy ImplementationTable[] = 
   1.571 +	{
   1.572 +		IMPLEMENTATION_PROXY_ENTRY(0x10006065,	CHZConverterImpl::NewL)
   1.573 +	};
   1.574 +
   1.575 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   1.576 +	{
   1.577 +	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   1.578 +
   1.579 +	return ImplementationTable;
   1.580 +	}