os/textandloc/charconvfw/charconvplugins/src/plugins/hz.cpp
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 /*
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 * HZ is defined in RFC 1843
    16 *
    17 */
    18 
    19 
    20 #include <e32std.h>
    21 #include <charconv.h>
    22 #include "gb2312.h"
    23 #include <ecom/implementationproxy.h>
    24 #include <charactersetconverter.h>
    25 
    26 const TInt KIsInGbBlock=CCnvCharacterSetConverter::KStateDefault+1;
    27 #if defined(_DEBUG)
    28 const TInt KLengthOfIntermediateBuffer=6;
    29 #else
    30 const TInt KLengthOfIntermediateBuffer=150;
    31 #endif
    32 
    33 #if defined(_DEBUG)
    34 
    35 _LIT(KLitPanicText, "HZ");
    36 
    37 enum TPanic
    38 	{
    39 	EPanicTooManyMatchingIndicesFound=1,
    40 	EPanicBadNumberOfBytesRequiredToBeAvailable,
    41 	EPanicBadNumberOfBytesAvailable,
    42 	EPanicBadNumberOfBytesThatCanBeMadeAvailable,
    43 	EPanicBadNumberOfBytesMadeAvailable1,
    44 	EPanicBadNumberOfBytesMadeAvailable2,
    45 	EPanicBadDescriptorSubDivision1,
    46 	EPanicBadDescriptorSubDivision2,
    47 	EPanicBadDescriptorSubDivision3,
    48 	EPanicBadDescriptorSubDivision4,
    49 	EPanicBadPointers1,
    50 	EPanicBadPointers2,
    51 	EPanicBadPointers3,
    52 	EPanicBadPointers4,
    53 	EPanicBadPointers5,
    54 	EPanicBadPointers6,
    55 	EPanicBadPointers7,
    56 	EPanicBadPointers8,
    57 	EPanicBadPointers9,
    58 	EPanicBadPointers10,
    59 	EPanicBadPointers11,
    60 	EPanicBadPointers12,
    61 	EPanicStillInGbBlock,
    62 	EPanicBadState,
    63 	EPanicSplitBoundaryIsNotAsLateAsPossible1,
    64 	EPanicSplitBoundaryIsNotAsLateAsPossible2,
    65 	EPanicBadGb2312Index,
    66 	EPanicBadHzIndex,
    67 	EPanicBadTildeSequence,
    68 	EPanicBadReturnValue1,
    69 	EPanicBadReturnValue2,
    70 	EPanicRemainderOfHzHasGotLonger
    71 	};
    72 
    73 LOCAL_C void Panic(TPanic aPanic)
    74 	{
    75 	User::Panic(KLitPanicText, aPanic);
    76 	}
    77 
    78 #endif
    79 
    80 class CHZConverterImpl : public CCharacterSetConverterPluginInterface
    81 	{
    82 
    83 public:
    84 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    85 
    86 	virtual TInt ConvertFromUnicode(
    87 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    88 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    89 		TDes8& aForeign, 
    90 		const TDesC16& aUnicode, 
    91 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    92 
    93 	virtual TInt ConvertToUnicode(
    94 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    95 		TDes16& aUnicode, 
    96 		const TDesC8& aForeign, 
    97 		TInt& aState, 
    98 		TInt& aNumberOfUnconvertibleCharacters, 
    99 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
   100 
   101 	virtual TBool IsInThisCharacterSetL(
   102 		TBool& aSetToTrue, 
   103 		TInt& aConfidenceLevel, 
   104 		const TDesC8& aSample);
   105 
   106 	static CHZConverterImpl* NewL();
   107 	virtual ~CHZConverterImpl();
   108 
   109 private:
   110 	CHZConverterImpl();
   111 
   112 	};
   113 
   114 
   115 
   116 const TDesC8& CHZConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
   117 	{
   118 	return CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters();
   119 	}
   120 
   121 LOCAL_C void IncrementNumberOfUnicodeCharactersNotConverted(TInt aLengthOfUnicode, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) // these seemingly haphazard order of these paramters is to match the position of the second and third parameters with the caller
   122 	{
   123 	++aNumberOfUnicodeCharactersNotConverted;
   124 	const TInt indexOfUnicodeCharacterNowNotConverted=aLengthOfUnicode-aNumberOfUnicodeCharactersNotConverted;
   125 #if defined(_DEBUG)
   126 	TInt numberOfMatchingIndicesFound=0;
   127 #endif
   128 	for (TInt i=aIndicesOfUnconvertibleCharacters.NumberOfIndices()-1; i>=0; --i) // must iterate backwards as items from aIndicesOfUnconvertibleCharacters may be deleted
   129 		{
   130 		if (aIndicesOfUnconvertibleCharacters[i]==indexOfUnicodeCharacterNowNotConverted)
   131 			{
   132 			aIndicesOfUnconvertibleCharacters.Remove(i);
   133 #if defined(_DEBUG)
   134 			++numberOfMatchingIndicesFound;
   135 #endif
   136 			}
   137 		}
   138 	__ASSERT_DEBUG(numberOfMatchingIndicesFound<=1, Panic(EPanicTooManyMatchingIndicesFound));
   139 	}
   140 
   141 LOCAL_C void MakeAvailable(TInt aNumberOfBytesRequiredToBeAvailable, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode, const TUint8*& aPointerToLastUsedByte, TInt& aNumberOfBytesAvailable, TInt aNumberOfBytesThatCanBeMadeAvailable) // these seemingly haphazard order of these paramters is to match the position of the second to fourth parameters (inclusive) with the caller
   142 // makes available as much of aNumberOfBytesRequiredToBeAvailable as it can, even if the final value (i.e. value on returning) of aNumberOfBytesAvailable<aNumberOfBytesRequiredToBeAvailable (i.e. it doesn't initially give up straight away and do nothing if aNumberOfBytesRequiredToBeAvailable>aNumberOfBytesThatCanBeMadeAvailable+aNumberOfBytesAvailable)
   143 	{
   144 	__ASSERT_DEBUG(aNumberOfBytesRequiredToBeAvailable>0, Panic(EPanicBadNumberOfBytesRequiredToBeAvailable));
   145 	__ASSERT_DEBUG(aNumberOfBytesAvailable>=0, Panic(EPanicBadNumberOfBytesAvailable));
   146 	__ASSERT_DEBUG(aNumberOfBytesThatCanBeMadeAvailable>=0, Panic(EPanicBadNumberOfBytesThatCanBeMadeAvailable));
   147 	TInt numberOfBytesMadeAvailable=0;
   148 	FOREVER
   149 		{
   150 		if (aNumberOfBytesAvailable>=aNumberOfBytesRequiredToBeAvailable)
   151 			{
   152 			break; // no more needs to be done
   153 			}
   154 		__ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable1));
   155 		if (numberOfBytesMadeAvailable>=aNumberOfBytesThatCanBeMadeAvailable)
   156 			{
   157 			break; // give up - no more can be done
   158 			}
   159 		const TInt numberOfBytesInCharacter=(*aPointerToLastUsedByte&0x80)? 2: 1;
   160 		aPointerToLastUsedByte-=numberOfBytesInCharacter;
   161 		aNumberOfBytesAvailable+=numberOfBytesInCharacter;
   162 		numberOfBytesMadeAvailable+=numberOfBytesInCharacter;
   163 		IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
   164 		}
   165 	__ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable2));
   166 	}
   167 
   168 LOCAL_C void ConvertFromGb2312ToHzInPlace(TDes8& aDescriptor, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode)
   169 	{
   170 	// it is legal for aDescriptor to be of length 0
   171 	const TInt originalLengthOfDescriptor=aDescriptor.Length();
   172 	if (originalLengthOfDescriptor>0)
   173 		{
   174 		TInt numberOfBytesAvailable=aDescriptor.MaxLength()-originalLengthOfDescriptor;
   175 		TUint8* pointerToPreviousByte=CONST_CAST(TUint8*, aDescriptor.Ptr()-1);
   176 		const TUint8* pointerToLastUsedByte=pointerToPreviousByte+originalLengthOfDescriptor;
   177 		TBool isInGbBlock=EFalse;
   178 		FOREVER
   179 			{
   180 			__ASSERT_DEBUG((pointerToLastUsedByte-(aDescriptor.Ptr()-1))+numberOfBytesAvailable==aDescriptor.MaxLength(), Panic(EPanicBadDescriptorSubDivision1));
   181 			__ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers1));
   182 			const TUint currentByte=*(pointerToPreviousByte+1);
   183 			if (currentByte&0x80)
   184 				{
   185 				if (!isInGbBlock)
   186 					{
   187 					MakeAvailable(4, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-2); // what's passed into the last parameter is not a typo - we do not want the two-byte character currently pointed to by (pointerToPreviousByte+1) to be made available
   188 					if (numberOfBytesAvailable<4) // 4 bytes are required for the "~{" "~}" escape sequences (thus ensuring that at least a single double-byte character can be put into the GB-block)
   189 						{
   190 						break;
   191 						}
   192 					isInGbBlock=ETrue;
   193 					Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
   194 					++pointerToPreviousByte;
   195 					*pointerToPreviousByte='~';
   196 					++pointerToPreviousByte;
   197 					*pointerToPreviousByte='{';
   198 					numberOfBytesAvailable-=2;
   199 					pointerToLastUsedByte+=2;
   200 					}
   201 				++pointerToPreviousByte;
   202 				*pointerToPreviousByte&=~0x80;
   203 				__ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers2));
   204 				++pointerToPreviousByte;
   205 				*pointerToPreviousByte&=~0x80;
   206 				}
   207 			else
   208 				{
   209 				if (isInGbBlock)
   210 					{
   211 closeGbBlock:
   212 					isInGbBlock=EFalse;
   213 					MakeAvailable(2, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, pointerToLastUsedByte-pointerToPreviousByte);
   214 					if (numberOfBytesAvailable<2) // 2 bytes are required for the "~}" escape sequence
   215 						{
   216 						IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
   217 						*(pointerToPreviousByte-1)='~';
   218 						*pointerToPreviousByte='}';
   219 						break;
   220 						}
   221 					Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
   222 					++pointerToPreviousByte;
   223 					*pointerToPreviousByte='~';
   224 					++pointerToPreviousByte;
   225 					*pointerToPreviousByte='}';
   226 					numberOfBytesAvailable-=2;
   227 					pointerToLastUsedByte+=2;
   228 					__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers3));
   229 					if (pointerToPreviousByte>=pointerToLastUsedByte)
   230 						{
   231 						break;
   232 						}
   233 					}
   234 				if (currentByte=='~')
   235 					{
   236 					MakeAvailable(1, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-1); // what's passed into the last parameter is not a typo - we do not want the "~" currently pointed to by (pointerToPreviousByte+1) to be made available
   237 					if (numberOfBytesAvailable<1) // 1 byte is required for the extra "~" character
   238 						{
   239 						break;
   240 						}
   241 					Mem::Copy(pointerToPreviousByte+2, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
   242 					++pointerToPreviousByte;
   243 					*pointerToPreviousByte='~';
   244 					numberOfBytesAvailable-=1;
   245 					pointerToLastUsedByte+=1;
   246 					}
   247 				++pointerToPreviousByte;
   248 				}
   249 			__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers4));
   250 			if (pointerToPreviousByte>=pointerToLastUsedByte)
   251 				{
   252 				if (isInGbBlock)
   253 					{
   254 					goto closeGbBlock; // this is to share the code for closing the GB-block
   255 					}
   256 				break;
   257 				}
   258 			}
   259 		__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers5));
   260 		if (pointerToPreviousByte<pointerToLastUsedByte)
   261 			{
   262 			__ASSERT_DEBUG((pointerToPreviousByte==pointerToLastUsedByte-1) || (pointerToPreviousByte==pointerToLastUsedByte-2), Panic(EPanicBadPointers6));
   263 			numberOfBytesAvailable+=(pointerToLastUsedByte-pointerToPreviousByte);
   264 			pointerToLastUsedByte=pointerToPreviousByte;
   265 			IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
   266 			}
   267 		//if it gets out from FOREVER, isInGbBlock could not be ETrue ~~~ so wouldn't need the assert
   268 		//__ASSERT_DEBUG(!isInGbBlock, Panic(EPanicStillInGbBlock));
   269 		aDescriptor.SetLength(aDescriptor.MaxLength()-numberOfBytesAvailable);
   270 		__ASSERT_DEBUG(aDescriptor.Length()==pointerToLastUsedByte-(aDescriptor.Ptr()-1), Panic(EPanicBadDescriptorSubDivision2));
   271 		}
   272 	}
   273 
   274 TInt CHZConverterImpl::ConvertFromUnicode(
   275 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   276 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
   277 		TDes8& aForeign, 
   278 		const TDesC16& aUnicode, 
   279 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
   280 	{
   281 	TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(CnvGb2312::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
   282 	if (returnValue<0)
   283 		{
   284 		return returnValue; // this is an error-code
   285 		}
   286 	ConvertFromGb2312ToHzInPlace(aForeign, returnValue, aIndicesOfUnconvertibleCharacters, aUnicode.Length());
   287 	return returnValue;
   288 	}
   289 
   290 LOCAL_C TInt ConvertFromHzToHomogeneousGb2312(TBuf8<KLengthOfIntermediateBuffer>& aGb2312, TPtrC8& aHzBeingConsumed, TPtrC8& aRemainderOfHz, TInt& aState, TUint& aOutputConversionFlags)
   291 	{
   292 	// this function panics if aRemainderOfHz is of length 0
   293 	TUint8* pointerToPreviousGb2312Byte=CONST_CAST(TUint8*, aGb2312.Ptr()-1);
   294 	const TUint8* pointerToCurrentHzByte=aRemainderOfHz.Ptr();
   295 	const TUint8* const pointerToLastHzByte=pointerToCurrentHzByte+(aRemainderOfHz.Length()-1);
   296 	const TUint8* const pointerToLastHzByteToConvertThisTime=Min(pointerToLastHzByte, pointerToCurrentHzByte+(KLengthOfIntermediateBuffer-1));
   297 	FOREVER
   298 		{
   299 		const TUint currentHzByte=*pointerToCurrentHzByte;
   300 		if (currentHzByte=='~')
   301 			{
   302 			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers7));
   303 			if (pointerToCurrentHzByte>=pointerToLastHzByte)
   304 				{
   305 				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
   306 				--pointerToCurrentHzByte;
   307 				break;
   308 				}
   309 			++pointerToCurrentHzByte;
   310 			const TUint nextHzByte=*pointerToCurrentHzByte;
   311 			switch (nextHzByte)
   312 				{
   313 			case '{':
   314 				if (aState==KIsInGbBlock)
   315 					{
   316 					return CCnvCharacterSetConverter::EErrorIllFormedInput;
   317 					}
   318 				aState=KIsInGbBlock;
   319 				break;
   320 			case '}':
   321 				if (aState==CCnvCharacterSetConverter::KStateDefault)
   322 					{
   323 					return CCnvCharacterSetConverter::EErrorIllFormedInput;
   324 					}
   325 				aState=CCnvCharacterSetConverter::KStateDefault;
   326 				break;
   327 			case '~':
   328 				++pointerToPreviousGb2312Byte;
   329 				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
   330 				break;
   331 			case 0x0a:
   332 				break;
   333 			default:
   334 				return CCnvCharacterSetConverter::EErrorIllFormedInput;
   335 				}
   336 			}
   337 		else
   338 			{
   339 			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers8));
   340 			if (pointerToCurrentHzByte>pointerToLastHzByteToConvertThisTime)
   341 				{
   342 				--pointerToCurrentHzByte;
   343 				break;
   344 				}
   345 			if (aState==CCnvCharacterSetConverter::KStateDefault)
   346 				{
   347 				++pointerToPreviousGb2312Byte;
   348 				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
   349 				}
   350 			else
   351 				{
   352 				__ASSERT_DEBUG(aState==KIsInGbBlock, Panic(EPanicBadState));
   353 				__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByteToConvertThisTime, Panic(EPanicBadPointers9));
   354 				if (pointerToCurrentHzByte>=pointerToLastHzByteToConvertThisTime)
   355 					{
   356 					aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
   357 					--pointerToCurrentHzByte;
   358 					break;
   359 					}
   360 				++pointerToCurrentHzByte;
   361 				++pointerToPreviousGb2312Byte;
   362 				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte|0x80);
   363 				++pointerToPreviousGb2312Byte;
   364 				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, *pointerToCurrentHzByte|0x80);
   365 				}
   366 			}
   367 		__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers10));
   368 		if (pointerToCurrentHzByte>=pointerToLastHzByte)
   369 			{
   370 			break;
   371 			}
   372 		++pointerToCurrentHzByte;
   373 		}
   374 	aGb2312.SetLength((pointerToPreviousGb2312Byte+1)-aGb2312.Ptr());
   375 	const TInt numberOfHzBytesBeingConsumed=(pointerToCurrentHzByte+1)-aRemainderOfHz.Ptr();
   376 	aHzBeingConsumed.Set(aRemainderOfHz.Left(numberOfHzBytesBeingConsumed));
   377 	aRemainderOfHz.Set(aRemainderOfHz.Mid(numberOfHzBytesBeingConsumed));
   378 #if defined(_DEBUG)
   379 	// AAA: check that if the split occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "BBB" relies on this)
   380 	if (aRemainderOfHz.Length()>=2)
   381 		{
   382 		__ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~{"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible1));
   383 		__ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~}"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible2));
   384 		}
   385 #endif
   386 	return 0;
   387 	}
   388 
   389 LOCAL_C TInt Gb2312IndexToHzIndex(const TDesC8& aHz, TInt aGb2312Index, TBool aReturnMaximalHzIndex)
   390 	{
   391 	// this function panics if aHz is of length 0
   392 	// aHz may start in either KIsInGbBlock or CCnvCharacterSetConverter::KStateDefault state, but it must *not* have any truncated sequences (i.e. "tilde <something>" sequence that is not complete, or part of a 2-byte character sequence) at either its start or its end
   393 	__ASSERT_DEBUG(aGb2312Index>=0, Panic(EPanicBadGb2312Index));
   394 	TInt hzIndex=0;
   395 	TInt offsetFromGb2312IndexToHzIndex=0;
   396 	const TUint8* const pointerToFirstHzByte=aHz.Ptr();
   397 	const TUint8* pointerToCurrentHzByte=pointerToFirstHzByte;
   398 	const TUint8* const pointerToLastHzByte=pointerToFirstHzByte+(aHz.Length()-1);
   399 	FOREVER
   400 		{
   401 		const TInt newHzIndex=pointerToCurrentHzByte-pointerToFirstHzByte;
   402 		const TInt candidateHzIndex=aGb2312Index+offsetFromGb2312IndexToHzIndex;
   403 		__ASSERT_DEBUG(hzIndex<=candidateHzIndex, Panic(EPanicBadHzIndex));
   404 		if (aReturnMaximalHzIndex? (newHzIndex>candidateHzIndex): (hzIndex>=candidateHzIndex))
   405 			{
   406 			break;
   407 			}
   408 		hzIndex=newHzIndex;
   409 		if (*pointerToCurrentHzByte=='~')
   410 			{
   411 			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers11));
   412 			if (pointerToCurrentHzByte>=pointerToLastHzByte)
   413 				{
   414 				break;
   415 				}
   416 			++pointerToCurrentHzByte;
   417 			const TUint currentHzByte=*pointerToCurrentHzByte;
   418 			if (currentHzByte=='~')
   419 				{
   420 				++offsetFromGb2312IndexToHzIndex;
   421 				}
   422 			else
   423 				{
   424 				__ASSERT_DEBUG((currentHzByte=='{') || (currentHzByte=='}') || (currentHzByte==0x0a), Panic(EPanicBadTildeSequence));
   425 				offsetFromGb2312IndexToHzIndex+=2;
   426 				}
   427 			}
   428 		__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers12));
   429 		if (pointerToCurrentHzByte>=pointerToLastHzByte)
   430 			{
   431 			break;
   432 			}
   433 		++pointerToCurrentHzByte;
   434 		}
   435 	return hzIndex;
   436 	}
   437 
   438 TInt CHZConverterImpl::ConvertToUnicode(
   439 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   440 		TDes16& aUnicode, 
   441 		const TDesC8& aForeign, 
   442 		TInt& aState, 
   443 		TInt& aNumberOfUnconvertibleCharacters, 
   444 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   445 	{
   446 	aUnicode.SetLength(0);
   447 	TPtrC8 remainderOfHz(aForeign);
   448 	TInt numberOfHzBytesConsumed=0;
   449 	TUint outputConversionFlags=0;
   450 	TUint inputConversionFlags=CCnvCharacterSetConverter::EInputConversionFlagAppend;
   451 	const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData();
   452 	FOREVER
   453 		{
   454 		__ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision3));
   455 #if defined(_DEBUG)
   456 		const TInt oldLengthOfRemainderOfHz=remainderOfHz.Length();
   457 #endif
   458 		TBuf8<KLengthOfIntermediateBuffer> gb2312;
   459 		TPtrC8 hzBeingConsumed;
   460 		const TInt returnValue1=ConvertFromHzToHomogeneousGb2312(gb2312, hzBeingConsumed, remainderOfHz, aState, outputConversionFlags);
   461 		if (returnValue1<0)
   462 			{
   463 			return returnValue1; // this is an error-code
   464 			}
   465 		__ASSERT_DEBUG(returnValue1==0, Panic(EPanicBadReturnValue1));
   466 		__ASSERT_DEBUG(hzBeingConsumed.Length()+remainderOfHz.Length()==oldLengthOfRemainderOfHz, Panic(EPanicRemainderOfHzHasGotLonger));
   467 		if (hzBeingConsumed.Length()==0)
   468 			{
   469 			break;
   470 			}
   471 		TInt numberOfUnconvertibleCharacters;
   472 		TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
   473 		const TInt returnValue2=CCnvCharacterSetConverter::DoConvertToUnicode(gb2312ConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, gb2312, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, outputConversionFlags, inputConversionFlags);
   474 		if (returnValue2<0)
   475 			{
   476 			return returnValue2; // this is an error-code
   477 			}
   478 		if (numberOfUnconvertibleCharacters>0)
   479 			{
   480 			if (aNumberOfUnconvertibleCharacters==0)
   481 				{
   482 				aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfHzBytesConsumed+Gb2312IndexToHzIndex(hzBeingConsumed, indexOfFirstByteOfFirstUnconvertibleCharacter, EFalse);
   483 				}
   484 			aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
   485 			}
   486 		if (returnValue2>0)
   487 			{
   488 			const TInt numberOfGb2312BytesConverted=gb2312.Length()-returnValue2;
   489 			__ASSERT_DEBUG(numberOfGb2312BytesConverted>=0, Panic(EPanicBadReturnValue2));
   490 			// don't call gb2312.SetLength(numberOfGb2312BytesConverted) as we want to access gb2312[numberOfGb2312BytesConverted] - in any case, gb2312's length is never going to be used again
   491 			// don't bother re-setting remainderOfHz as it won't be used again
   492 			numberOfHzBytesConsumed+=Gb2312IndexToHzIndex(hzBeingConsumed, numberOfGb2312BytesConverted, ETrue);
   493 			aState=(gb2312[numberOfGb2312BytesConverted]&0x80)? KIsInGbBlock: CCnvCharacterSetConverter::KStateDefault; // BBB: if the split (between the text that was converted and the text that wasn't converted) occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "AAA" checks this) - this means that we set aState according to gb2312[numberOfGb2312BytesConverted] rather than gb2312[numberOfGb2312BytesConverted-1]
   494 			break;
   495 			}
   496 		numberOfHzBytesConsumed+=hzBeingConsumed.Length();
   497 		remainderOfHz.Set(aForeign.Mid(numberOfHzBytesConsumed));
   498 		__ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision4));
   499 		if (remainderOfHz.Length()==0)
   500 			{
   501 			break;
   502 			}
   503 		if (numberOfHzBytesConsumed>0)
   504 			{
   505 			inputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
   506 			}
   507 		}
   508 	// N.B. remainderOfHz is in an undefined state by this point
   509 	if ((numberOfHzBytesConsumed==0) && (outputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated))
   510 		{
   511 		return CCnvCharacterSetConverter::EErrorIllFormedInput;
   512 		}
   513 	return aForeign.Length()-numberOfHzBytesConsumed;
   514 	}
   515 
   516 TBool CHZConverterImpl::IsInThisCharacterSetL(
   517 		TBool& aSetToTrue, 
   518 		TInt& aConfidenceLevel, 
   519 		const TDesC8& aSample)
   520 	{
   521 	aSetToTrue=ETrue;
   522 	TInt sampleLength = aSample.Length();
   523 	TInt pairOfTilde=0;
   524 	TInt occrenceOfNonHz=0;
   525 	aConfidenceLevel = 50;
   526 	// Hz encoding uses escape sequences...
   527 	for (TInt i = 0; i < sampleLength; ++i)
   528 		{
   529 		if (aSample[i]>0x7e)
   530 			occrenceOfNonHz++;
   531 		if (aSample[i]==0x7e)
   532 			{
   533 			TInt increment1 = i+1;
   534 			if (increment1 >= sampleLength)
   535 				break;
   536 			if ((aSample[increment1] == 0x7b)||(aSample[increment1] == 0x7d)||(aSample[increment1] == 0x7e))
   537 				{
   538 				pairOfTilde++;
   539 				i++;
   540 				}
   541 			}
   542 		}//for
   543 	if (sampleLength)
   544 		{
   545 		TInt occurrenceOftilde =2*pairOfTilde*100/sampleLength;
   546 		aConfidenceLevel=aConfidenceLevel-Max(0,(4-occurrenceOftilde));
   547 		aConfidenceLevel += occurrenceOftilde;
   548 		aConfidenceLevel -= ((occrenceOfNonHz*100)/sampleLength);
   549 		}
   550 	return ETrue;
   551 	}
   552 
   553 CHZConverterImpl* CHZConverterImpl::NewL()
   554 	{
   555 	CHZConverterImpl* self = new(ELeave) CHZConverterImpl();
   556 	return self;
   557 	}
   558 
   559 CHZConverterImpl::~CHZConverterImpl()
   560 	{
   561 	}
   562 
   563 CHZConverterImpl::CHZConverterImpl()
   564 	{
   565 	}
   566 
   567 const TImplementationProxy ImplementationTable[] = 
   568 	{
   569 		IMPLEMENTATION_PROXY_ENTRY(0x10006065,	CHZConverterImpl::NewL)
   570 	};
   571 
   572 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   573 	{
   574 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   575 
   576 	return ImplementationTable;
   577 	}