os/textandloc/charconvfw/charconvplugins/src/plugins/hz.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
* HZ is defined in RFC 1843
sl@0
    16
*
sl@0
    17
*/
sl@0
    18
sl@0
    19
sl@0
    20
#include <e32std.h>
sl@0
    21
#include <charconv.h>
sl@0
    22
#include "gb2312.h"
sl@0
    23
#include <ecom/implementationproxy.h>
sl@0
    24
#include <charactersetconverter.h>
sl@0
    25
sl@0
    26
const TInt KIsInGbBlock=CCnvCharacterSetConverter::KStateDefault+1;
sl@0
    27
#if defined(_DEBUG)
sl@0
    28
const TInt KLengthOfIntermediateBuffer=6;
sl@0
    29
#else
sl@0
    30
const TInt KLengthOfIntermediateBuffer=150;
sl@0
    31
#endif
sl@0
    32
sl@0
    33
#if defined(_DEBUG)
sl@0
    34
sl@0
    35
_LIT(KLitPanicText, "HZ");
sl@0
    36
sl@0
    37
enum TPanic
sl@0
    38
	{
sl@0
    39
	EPanicTooManyMatchingIndicesFound=1,
sl@0
    40
	EPanicBadNumberOfBytesRequiredToBeAvailable,
sl@0
    41
	EPanicBadNumberOfBytesAvailable,
sl@0
    42
	EPanicBadNumberOfBytesThatCanBeMadeAvailable,
sl@0
    43
	EPanicBadNumberOfBytesMadeAvailable1,
sl@0
    44
	EPanicBadNumberOfBytesMadeAvailable2,
sl@0
    45
	EPanicBadDescriptorSubDivision1,
sl@0
    46
	EPanicBadDescriptorSubDivision2,
sl@0
    47
	EPanicBadDescriptorSubDivision3,
sl@0
    48
	EPanicBadDescriptorSubDivision4,
sl@0
    49
	EPanicBadPointers1,
sl@0
    50
	EPanicBadPointers2,
sl@0
    51
	EPanicBadPointers3,
sl@0
    52
	EPanicBadPointers4,
sl@0
    53
	EPanicBadPointers5,
sl@0
    54
	EPanicBadPointers6,
sl@0
    55
	EPanicBadPointers7,
sl@0
    56
	EPanicBadPointers8,
sl@0
    57
	EPanicBadPointers9,
sl@0
    58
	EPanicBadPointers10,
sl@0
    59
	EPanicBadPointers11,
sl@0
    60
	EPanicBadPointers12,
sl@0
    61
	EPanicStillInGbBlock,
sl@0
    62
	EPanicBadState,
sl@0
    63
	EPanicSplitBoundaryIsNotAsLateAsPossible1,
sl@0
    64
	EPanicSplitBoundaryIsNotAsLateAsPossible2,
sl@0
    65
	EPanicBadGb2312Index,
sl@0
    66
	EPanicBadHzIndex,
sl@0
    67
	EPanicBadTildeSequence,
sl@0
    68
	EPanicBadReturnValue1,
sl@0
    69
	EPanicBadReturnValue2,
sl@0
    70
	EPanicRemainderOfHzHasGotLonger
sl@0
    71
	};
sl@0
    72
sl@0
    73
LOCAL_C void Panic(TPanic aPanic)
sl@0
    74
	{
sl@0
    75
	User::Panic(KLitPanicText, aPanic);
sl@0
    76
	}
sl@0
    77
sl@0
    78
#endif
sl@0
    79
sl@0
    80
class CHZConverterImpl : public CCharacterSetConverterPluginInterface
sl@0
    81
	{
sl@0
    82
sl@0
    83
public:
sl@0
    84
	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
sl@0
    85
sl@0
    86
	virtual TInt ConvertFromUnicode(
sl@0
    87
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    88
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    89
		TDes8& aForeign, 
sl@0
    90
		const TDesC16& aUnicode, 
sl@0
    91
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
sl@0
    92
sl@0
    93
	virtual TInt ConvertToUnicode(
sl@0
    94
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    95
		TDes16& aUnicode, 
sl@0
    96
		const TDesC8& aForeign, 
sl@0
    97
		TInt& aState, 
sl@0
    98
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
    99
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
   100
sl@0
   101
	virtual TBool IsInThisCharacterSetL(
sl@0
   102
		TBool& aSetToTrue, 
sl@0
   103
		TInt& aConfidenceLevel, 
sl@0
   104
		const TDesC8& aSample);
sl@0
   105
sl@0
   106
	static CHZConverterImpl* NewL();
sl@0
   107
	virtual ~CHZConverterImpl();
sl@0
   108
sl@0
   109
private:
sl@0
   110
	CHZConverterImpl();
sl@0
   111
sl@0
   112
	};
sl@0
   113
sl@0
   114
sl@0
   115
sl@0
   116
const TDesC8& CHZConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
sl@0
   117
	{
sl@0
   118
	return CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters();
sl@0
   119
	}
sl@0
   120
sl@0
   121
LOCAL_C void IncrementNumberOfUnicodeCharactersNotConverted(TInt aLengthOfUnicode, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) // these seemingly haphazard order of these paramters is to match the position of the second and third parameters with the caller
sl@0
   122
	{
sl@0
   123
	++aNumberOfUnicodeCharactersNotConverted;
sl@0
   124
	const TInt indexOfUnicodeCharacterNowNotConverted=aLengthOfUnicode-aNumberOfUnicodeCharactersNotConverted;
sl@0
   125
#if defined(_DEBUG)
sl@0
   126
	TInt numberOfMatchingIndicesFound=0;
sl@0
   127
#endif
sl@0
   128
	for (TInt i=aIndicesOfUnconvertibleCharacters.NumberOfIndices()-1; i>=0; --i) // must iterate backwards as items from aIndicesOfUnconvertibleCharacters may be deleted
sl@0
   129
		{
sl@0
   130
		if (aIndicesOfUnconvertibleCharacters[i]==indexOfUnicodeCharacterNowNotConverted)
sl@0
   131
			{
sl@0
   132
			aIndicesOfUnconvertibleCharacters.Remove(i);
sl@0
   133
#if defined(_DEBUG)
sl@0
   134
			++numberOfMatchingIndicesFound;
sl@0
   135
#endif
sl@0
   136
			}
sl@0
   137
		}
sl@0
   138
	__ASSERT_DEBUG(numberOfMatchingIndicesFound<=1, Panic(EPanicTooManyMatchingIndicesFound));
sl@0
   139
	}
sl@0
   140
sl@0
   141
LOCAL_C void MakeAvailable(TInt aNumberOfBytesRequiredToBeAvailable, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode, const TUint8*& aPointerToLastUsedByte, TInt& aNumberOfBytesAvailable, TInt aNumberOfBytesThatCanBeMadeAvailable) // these seemingly haphazard order of these paramters is to match the position of the second to fourth parameters (inclusive) with the caller
sl@0
   142
// makes available as much of aNumberOfBytesRequiredToBeAvailable as it can, even if the final value (i.e. value on returning) of aNumberOfBytesAvailable<aNumberOfBytesRequiredToBeAvailable (i.e. it doesn't initially give up straight away and do nothing if aNumberOfBytesRequiredToBeAvailable>aNumberOfBytesThatCanBeMadeAvailable+aNumberOfBytesAvailable)
sl@0
   143
	{
sl@0
   144
	__ASSERT_DEBUG(aNumberOfBytesRequiredToBeAvailable>0, Panic(EPanicBadNumberOfBytesRequiredToBeAvailable));
sl@0
   145
	__ASSERT_DEBUG(aNumberOfBytesAvailable>=0, Panic(EPanicBadNumberOfBytesAvailable));
sl@0
   146
	__ASSERT_DEBUG(aNumberOfBytesThatCanBeMadeAvailable>=0, Panic(EPanicBadNumberOfBytesThatCanBeMadeAvailable));
sl@0
   147
	TInt numberOfBytesMadeAvailable=0;
sl@0
   148
	FOREVER
sl@0
   149
		{
sl@0
   150
		if (aNumberOfBytesAvailable>=aNumberOfBytesRequiredToBeAvailable)
sl@0
   151
			{
sl@0
   152
			break; // no more needs to be done
sl@0
   153
			}
sl@0
   154
		__ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable1));
sl@0
   155
		if (numberOfBytesMadeAvailable>=aNumberOfBytesThatCanBeMadeAvailable)
sl@0
   156
			{
sl@0
   157
			break; // give up - no more can be done
sl@0
   158
			}
sl@0
   159
		const TInt numberOfBytesInCharacter=(*aPointerToLastUsedByte&0x80)? 2: 1;
sl@0
   160
		aPointerToLastUsedByte-=numberOfBytesInCharacter;
sl@0
   161
		aNumberOfBytesAvailable+=numberOfBytesInCharacter;
sl@0
   162
		numberOfBytesMadeAvailable+=numberOfBytesInCharacter;
sl@0
   163
		IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
sl@0
   164
		}
sl@0
   165
	__ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable2));
sl@0
   166
	}
sl@0
   167
sl@0
   168
LOCAL_C void ConvertFromGb2312ToHzInPlace(TDes8& aDescriptor, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode)
sl@0
   169
	{
sl@0
   170
	// it is legal for aDescriptor to be of length 0
sl@0
   171
	const TInt originalLengthOfDescriptor=aDescriptor.Length();
sl@0
   172
	if (originalLengthOfDescriptor>0)
sl@0
   173
		{
sl@0
   174
		TInt numberOfBytesAvailable=aDescriptor.MaxLength()-originalLengthOfDescriptor;
sl@0
   175
		TUint8* pointerToPreviousByte=CONST_CAST(TUint8*, aDescriptor.Ptr()-1);
sl@0
   176
		const TUint8* pointerToLastUsedByte=pointerToPreviousByte+originalLengthOfDescriptor;
sl@0
   177
		TBool isInGbBlock=EFalse;
sl@0
   178
		FOREVER
sl@0
   179
			{
sl@0
   180
			__ASSERT_DEBUG((pointerToLastUsedByte-(aDescriptor.Ptr()-1))+numberOfBytesAvailable==aDescriptor.MaxLength(), Panic(EPanicBadDescriptorSubDivision1));
sl@0
   181
			__ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers1));
sl@0
   182
			const TUint currentByte=*(pointerToPreviousByte+1);
sl@0
   183
			if (currentByte&0x80)
sl@0
   184
				{
sl@0
   185
				if (!isInGbBlock)
sl@0
   186
					{
sl@0
   187
					MakeAvailable(4, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-2); // what's passed into the last parameter is not a typo - we do not want the two-byte character currently pointed to by (pointerToPreviousByte+1) to be made available
sl@0
   188
					if (numberOfBytesAvailable<4) // 4 bytes are required for the "~{" "~}" escape sequences (thus ensuring that at least a single double-byte character can be put into the GB-block)
sl@0
   189
						{
sl@0
   190
						break;
sl@0
   191
						}
sl@0
   192
					isInGbBlock=ETrue;
sl@0
   193
					Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
sl@0
   194
					++pointerToPreviousByte;
sl@0
   195
					*pointerToPreviousByte='~';
sl@0
   196
					++pointerToPreviousByte;
sl@0
   197
					*pointerToPreviousByte='{';
sl@0
   198
					numberOfBytesAvailable-=2;
sl@0
   199
					pointerToLastUsedByte+=2;
sl@0
   200
					}
sl@0
   201
				++pointerToPreviousByte;
sl@0
   202
				*pointerToPreviousByte&=~0x80;
sl@0
   203
				__ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers2));
sl@0
   204
				++pointerToPreviousByte;
sl@0
   205
				*pointerToPreviousByte&=~0x80;
sl@0
   206
				}
sl@0
   207
			else
sl@0
   208
				{
sl@0
   209
				if (isInGbBlock)
sl@0
   210
					{
sl@0
   211
closeGbBlock:
sl@0
   212
					isInGbBlock=EFalse;
sl@0
   213
					MakeAvailable(2, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, pointerToLastUsedByte-pointerToPreviousByte);
sl@0
   214
					if (numberOfBytesAvailable<2) // 2 bytes are required for the "~}" escape sequence
sl@0
   215
						{
sl@0
   216
						IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
sl@0
   217
						*(pointerToPreviousByte-1)='~';
sl@0
   218
						*pointerToPreviousByte='}';
sl@0
   219
						break;
sl@0
   220
						}
sl@0
   221
					Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
sl@0
   222
					++pointerToPreviousByte;
sl@0
   223
					*pointerToPreviousByte='~';
sl@0
   224
					++pointerToPreviousByte;
sl@0
   225
					*pointerToPreviousByte='}';
sl@0
   226
					numberOfBytesAvailable-=2;
sl@0
   227
					pointerToLastUsedByte+=2;
sl@0
   228
					__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers3));
sl@0
   229
					if (pointerToPreviousByte>=pointerToLastUsedByte)
sl@0
   230
						{
sl@0
   231
						break;
sl@0
   232
						}
sl@0
   233
					}
sl@0
   234
				if (currentByte=='~')
sl@0
   235
					{
sl@0
   236
					MakeAvailable(1, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-1); // what's passed into the last parameter is not a typo - we do not want the "~" currently pointed to by (pointerToPreviousByte+1) to be made available
sl@0
   237
					if (numberOfBytesAvailable<1) // 1 byte is required for the extra "~" character
sl@0
   238
						{
sl@0
   239
						break;
sl@0
   240
						}
sl@0
   241
					Mem::Copy(pointerToPreviousByte+2, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
sl@0
   242
					++pointerToPreviousByte;
sl@0
   243
					*pointerToPreviousByte='~';
sl@0
   244
					numberOfBytesAvailable-=1;
sl@0
   245
					pointerToLastUsedByte+=1;
sl@0
   246
					}
sl@0
   247
				++pointerToPreviousByte;
sl@0
   248
				}
sl@0
   249
			__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers4));
sl@0
   250
			if (pointerToPreviousByte>=pointerToLastUsedByte)
sl@0
   251
				{
sl@0
   252
				if (isInGbBlock)
sl@0
   253
					{
sl@0
   254
					goto closeGbBlock; // this is to share the code for closing the GB-block
sl@0
   255
					}
sl@0
   256
				break;
sl@0
   257
				}
sl@0
   258
			}
sl@0
   259
		__ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers5));
sl@0
   260
		if (pointerToPreviousByte<pointerToLastUsedByte)
sl@0
   261
			{
sl@0
   262
			__ASSERT_DEBUG((pointerToPreviousByte==pointerToLastUsedByte-1) || (pointerToPreviousByte==pointerToLastUsedByte-2), Panic(EPanicBadPointers6));
sl@0
   263
			numberOfBytesAvailable+=(pointerToLastUsedByte-pointerToPreviousByte);
sl@0
   264
			pointerToLastUsedByte=pointerToPreviousByte;
sl@0
   265
			IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
sl@0
   266
			}
sl@0
   267
		//if it gets out from FOREVER, isInGbBlock could not be ETrue ~~~ so wouldn't need the assert
sl@0
   268
		//__ASSERT_DEBUG(!isInGbBlock, Panic(EPanicStillInGbBlock));
sl@0
   269
		aDescriptor.SetLength(aDescriptor.MaxLength()-numberOfBytesAvailable);
sl@0
   270
		__ASSERT_DEBUG(aDescriptor.Length()==pointerToLastUsedByte-(aDescriptor.Ptr()-1), Panic(EPanicBadDescriptorSubDivision2));
sl@0
   271
		}
sl@0
   272
	}
sl@0
   273
sl@0
   274
TInt CHZConverterImpl::ConvertFromUnicode(
sl@0
   275
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   276
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
   277
		TDes8& aForeign, 
sl@0
   278
		const TDesC16& aUnicode, 
sl@0
   279
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
sl@0
   280
	{
sl@0
   281
	TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(CnvGb2312::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
sl@0
   282
	if (returnValue<0)
sl@0
   283
		{
sl@0
   284
		return returnValue; // this is an error-code
sl@0
   285
		}
sl@0
   286
	ConvertFromGb2312ToHzInPlace(aForeign, returnValue, aIndicesOfUnconvertibleCharacters, aUnicode.Length());
sl@0
   287
	return returnValue;
sl@0
   288
	}
sl@0
   289
sl@0
   290
LOCAL_C TInt ConvertFromHzToHomogeneousGb2312(TBuf8<KLengthOfIntermediateBuffer>& aGb2312, TPtrC8& aHzBeingConsumed, TPtrC8& aRemainderOfHz, TInt& aState, TUint& aOutputConversionFlags)
sl@0
   291
	{
sl@0
   292
	// this function panics if aRemainderOfHz is of length 0
sl@0
   293
	TUint8* pointerToPreviousGb2312Byte=CONST_CAST(TUint8*, aGb2312.Ptr()-1);
sl@0
   294
	const TUint8* pointerToCurrentHzByte=aRemainderOfHz.Ptr();
sl@0
   295
	const TUint8* const pointerToLastHzByte=pointerToCurrentHzByte+(aRemainderOfHz.Length()-1);
sl@0
   296
	const TUint8* const pointerToLastHzByteToConvertThisTime=Min(pointerToLastHzByte, pointerToCurrentHzByte+(KLengthOfIntermediateBuffer-1));
sl@0
   297
	FOREVER
sl@0
   298
		{
sl@0
   299
		const TUint currentHzByte=*pointerToCurrentHzByte;
sl@0
   300
		if (currentHzByte=='~')
sl@0
   301
			{
sl@0
   302
			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers7));
sl@0
   303
			if (pointerToCurrentHzByte>=pointerToLastHzByte)
sl@0
   304
				{
sl@0
   305
				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
sl@0
   306
				--pointerToCurrentHzByte;
sl@0
   307
				break;
sl@0
   308
				}
sl@0
   309
			++pointerToCurrentHzByte;
sl@0
   310
			const TUint nextHzByte=*pointerToCurrentHzByte;
sl@0
   311
			switch (nextHzByte)
sl@0
   312
				{
sl@0
   313
			case '{':
sl@0
   314
				if (aState==KIsInGbBlock)
sl@0
   315
					{
sl@0
   316
					return CCnvCharacterSetConverter::EErrorIllFormedInput;
sl@0
   317
					}
sl@0
   318
				aState=KIsInGbBlock;
sl@0
   319
				break;
sl@0
   320
			case '}':
sl@0
   321
				if (aState==CCnvCharacterSetConverter::KStateDefault)
sl@0
   322
					{
sl@0
   323
					return CCnvCharacterSetConverter::EErrorIllFormedInput;
sl@0
   324
					}
sl@0
   325
				aState=CCnvCharacterSetConverter::KStateDefault;
sl@0
   326
				break;
sl@0
   327
			case '~':
sl@0
   328
				++pointerToPreviousGb2312Byte;
sl@0
   329
				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
sl@0
   330
				break;
sl@0
   331
			case 0x0a:
sl@0
   332
				break;
sl@0
   333
			default:
sl@0
   334
				return CCnvCharacterSetConverter::EErrorIllFormedInput;
sl@0
   335
				}
sl@0
   336
			}
sl@0
   337
		else
sl@0
   338
			{
sl@0
   339
			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers8));
sl@0
   340
			if (pointerToCurrentHzByte>pointerToLastHzByteToConvertThisTime)
sl@0
   341
				{
sl@0
   342
				--pointerToCurrentHzByte;
sl@0
   343
				break;
sl@0
   344
				}
sl@0
   345
			if (aState==CCnvCharacterSetConverter::KStateDefault)
sl@0
   346
				{
sl@0
   347
				++pointerToPreviousGb2312Byte;
sl@0
   348
				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
sl@0
   349
				}
sl@0
   350
			else
sl@0
   351
				{
sl@0
   352
				__ASSERT_DEBUG(aState==KIsInGbBlock, Panic(EPanicBadState));
sl@0
   353
				__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByteToConvertThisTime, Panic(EPanicBadPointers9));
sl@0
   354
				if (pointerToCurrentHzByte>=pointerToLastHzByteToConvertThisTime)
sl@0
   355
					{
sl@0
   356
					aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
sl@0
   357
					--pointerToCurrentHzByte;
sl@0
   358
					break;
sl@0
   359
					}
sl@0
   360
				++pointerToCurrentHzByte;
sl@0
   361
				++pointerToPreviousGb2312Byte;
sl@0
   362
				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte|0x80);
sl@0
   363
				++pointerToPreviousGb2312Byte;
sl@0
   364
				*pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, *pointerToCurrentHzByte|0x80);
sl@0
   365
				}
sl@0
   366
			}
sl@0
   367
		__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers10));
sl@0
   368
		if (pointerToCurrentHzByte>=pointerToLastHzByte)
sl@0
   369
			{
sl@0
   370
			break;
sl@0
   371
			}
sl@0
   372
		++pointerToCurrentHzByte;
sl@0
   373
		}
sl@0
   374
	aGb2312.SetLength((pointerToPreviousGb2312Byte+1)-aGb2312.Ptr());
sl@0
   375
	const TInt numberOfHzBytesBeingConsumed=(pointerToCurrentHzByte+1)-aRemainderOfHz.Ptr();
sl@0
   376
	aHzBeingConsumed.Set(aRemainderOfHz.Left(numberOfHzBytesBeingConsumed));
sl@0
   377
	aRemainderOfHz.Set(aRemainderOfHz.Mid(numberOfHzBytesBeingConsumed));
sl@0
   378
#if defined(_DEBUG)
sl@0
   379
	// AAA: check that if the split occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "BBB" relies on this)
sl@0
   380
	if (aRemainderOfHz.Length()>=2)
sl@0
   381
		{
sl@0
   382
		__ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~{"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible1));
sl@0
   383
		__ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~}"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible2));
sl@0
   384
		}
sl@0
   385
#endif
sl@0
   386
	return 0;
sl@0
   387
	}
sl@0
   388
sl@0
   389
LOCAL_C TInt Gb2312IndexToHzIndex(const TDesC8& aHz, TInt aGb2312Index, TBool aReturnMaximalHzIndex)
sl@0
   390
	{
sl@0
   391
	// this function panics if aHz is of length 0
sl@0
   392
	// aHz may start in either KIsInGbBlock or CCnvCharacterSetConverter::KStateDefault state, but it must *not* have any truncated sequences (i.e. "tilde <something>" sequence that is not complete, or part of a 2-byte character sequence) at either its start or its end
sl@0
   393
	__ASSERT_DEBUG(aGb2312Index>=0, Panic(EPanicBadGb2312Index));
sl@0
   394
	TInt hzIndex=0;
sl@0
   395
	TInt offsetFromGb2312IndexToHzIndex=0;
sl@0
   396
	const TUint8* const pointerToFirstHzByte=aHz.Ptr();
sl@0
   397
	const TUint8* pointerToCurrentHzByte=pointerToFirstHzByte;
sl@0
   398
	const TUint8* const pointerToLastHzByte=pointerToFirstHzByte+(aHz.Length()-1);
sl@0
   399
	FOREVER
sl@0
   400
		{
sl@0
   401
		const TInt newHzIndex=pointerToCurrentHzByte-pointerToFirstHzByte;
sl@0
   402
		const TInt candidateHzIndex=aGb2312Index+offsetFromGb2312IndexToHzIndex;
sl@0
   403
		__ASSERT_DEBUG(hzIndex<=candidateHzIndex, Panic(EPanicBadHzIndex));
sl@0
   404
		if (aReturnMaximalHzIndex? (newHzIndex>candidateHzIndex): (hzIndex>=candidateHzIndex))
sl@0
   405
			{
sl@0
   406
			break;
sl@0
   407
			}
sl@0
   408
		hzIndex=newHzIndex;
sl@0
   409
		if (*pointerToCurrentHzByte=='~')
sl@0
   410
			{
sl@0
   411
			__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers11));
sl@0
   412
			if (pointerToCurrentHzByte>=pointerToLastHzByte)
sl@0
   413
				{
sl@0
   414
				break;
sl@0
   415
				}
sl@0
   416
			++pointerToCurrentHzByte;
sl@0
   417
			const TUint currentHzByte=*pointerToCurrentHzByte;
sl@0
   418
			if (currentHzByte=='~')
sl@0
   419
				{
sl@0
   420
				++offsetFromGb2312IndexToHzIndex;
sl@0
   421
				}
sl@0
   422
			else
sl@0
   423
				{
sl@0
   424
				__ASSERT_DEBUG((currentHzByte=='{') || (currentHzByte=='}') || (currentHzByte==0x0a), Panic(EPanicBadTildeSequence));
sl@0
   425
				offsetFromGb2312IndexToHzIndex+=2;
sl@0
   426
				}
sl@0
   427
			}
sl@0
   428
		__ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers12));
sl@0
   429
		if (pointerToCurrentHzByte>=pointerToLastHzByte)
sl@0
   430
			{
sl@0
   431
			break;
sl@0
   432
			}
sl@0
   433
		++pointerToCurrentHzByte;
sl@0
   434
		}
sl@0
   435
	return hzIndex;
sl@0
   436
	}
sl@0
   437
sl@0
   438
TInt CHZConverterImpl::ConvertToUnicode(
sl@0
   439
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   440
		TDes16& aUnicode, 
sl@0
   441
		const TDesC8& aForeign, 
sl@0
   442
		TInt& aState, 
sl@0
   443
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   444
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
sl@0
   445
	{
sl@0
   446
	aUnicode.SetLength(0);
sl@0
   447
	TPtrC8 remainderOfHz(aForeign);
sl@0
   448
	TInt numberOfHzBytesConsumed=0;
sl@0
   449
	TUint outputConversionFlags=0;
sl@0
   450
	TUint inputConversionFlags=CCnvCharacterSetConverter::EInputConversionFlagAppend;
sl@0
   451
	const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData();
sl@0
   452
	FOREVER
sl@0
   453
		{
sl@0
   454
		__ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision3));
sl@0
   455
#if defined(_DEBUG)
sl@0
   456
		const TInt oldLengthOfRemainderOfHz=remainderOfHz.Length();
sl@0
   457
#endif
sl@0
   458
		TBuf8<KLengthOfIntermediateBuffer> gb2312;
sl@0
   459
		TPtrC8 hzBeingConsumed;
sl@0
   460
		const TInt returnValue1=ConvertFromHzToHomogeneousGb2312(gb2312, hzBeingConsumed, remainderOfHz, aState, outputConversionFlags);
sl@0
   461
		if (returnValue1<0)
sl@0
   462
			{
sl@0
   463
			return returnValue1; // this is an error-code
sl@0
   464
			}
sl@0
   465
		__ASSERT_DEBUG(returnValue1==0, Panic(EPanicBadReturnValue1));
sl@0
   466
		__ASSERT_DEBUG(hzBeingConsumed.Length()+remainderOfHz.Length()==oldLengthOfRemainderOfHz, Panic(EPanicRemainderOfHzHasGotLonger));
sl@0
   467
		if (hzBeingConsumed.Length()==0)
sl@0
   468
			{
sl@0
   469
			break;
sl@0
   470
			}
sl@0
   471
		TInt numberOfUnconvertibleCharacters;
sl@0
   472
		TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
sl@0
   473
		const TInt returnValue2=CCnvCharacterSetConverter::DoConvertToUnicode(gb2312ConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, gb2312, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, outputConversionFlags, inputConversionFlags);
sl@0
   474
		if (returnValue2<0)
sl@0
   475
			{
sl@0
   476
			return returnValue2; // this is an error-code
sl@0
   477
			}
sl@0
   478
		if (numberOfUnconvertibleCharacters>0)
sl@0
   479
			{
sl@0
   480
			if (aNumberOfUnconvertibleCharacters==0)
sl@0
   481
				{
sl@0
   482
				aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfHzBytesConsumed+Gb2312IndexToHzIndex(hzBeingConsumed, indexOfFirstByteOfFirstUnconvertibleCharacter, EFalse);
sl@0
   483
				}
sl@0
   484
			aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
sl@0
   485
			}
sl@0
   486
		if (returnValue2>0)
sl@0
   487
			{
sl@0
   488
			const TInt numberOfGb2312BytesConverted=gb2312.Length()-returnValue2;
sl@0
   489
			__ASSERT_DEBUG(numberOfGb2312BytesConverted>=0, Panic(EPanicBadReturnValue2));
sl@0
   490
			// don't call gb2312.SetLength(numberOfGb2312BytesConverted) as we want to access gb2312[numberOfGb2312BytesConverted] - in any case, gb2312's length is never going to be used again
sl@0
   491
			// don't bother re-setting remainderOfHz as it won't be used again
sl@0
   492
			numberOfHzBytesConsumed+=Gb2312IndexToHzIndex(hzBeingConsumed, numberOfGb2312BytesConverted, ETrue);
sl@0
   493
			aState=(gb2312[numberOfGb2312BytesConverted]&0x80)? KIsInGbBlock: CCnvCharacterSetConverter::KStateDefault; // BBB: if the split (between the text that was converted and the text that wasn't converted) occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "AAA" checks this) - this means that we set aState according to gb2312[numberOfGb2312BytesConverted] rather than gb2312[numberOfGb2312BytesConverted-1]
sl@0
   494
			break;
sl@0
   495
			}
sl@0
   496
		numberOfHzBytesConsumed+=hzBeingConsumed.Length();
sl@0
   497
		remainderOfHz.Set(aForeign.Mid(numberOfHzBytesConsumed));
sl@0
   498
		__ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision4));
sl@0
   499
		if (remainderOfHz.Length()==0)
sl@0
   500
			{
sl@0
   501
			break;
sl@0
   502
			}
sl@0
   503
		if (numberOfHzBytesConsumed>0)
sl@0
   504
			{
sl@0
   505
			inputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
sl@0
   506
			}
sl@0
   507
		}
sl@0
   508
	// N.B. remainderOfHz is in an undefined state by this point
sl@0
   509
	if ((numberOfHzBytesConsumed==0) && (outputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated))
sl@0
   510
		{
sl@0
   511
		return CCnvCharacterSetConverter::EErrorIllFormedInput;
sl@0
   512
		}
sl@0
   513
	return aForeign.Length()-numberOfHzBytesConsumed;
sl@0
   514
	}
sl@0
   515
sl@0
   516
TBool CHZConverterImpl::IsInThisCharacterSetL(
sl@0
   517
		TBool& aSetToTrue, 
sl@0
   518
		TInt& aConfidenceLevel, 
sl@0
   519
		const TDesC8& aSample)
sl@0
   520
	{
sl@0
   521
	aSetToTrue=ETrue;
sl@0
   522
	TInt sampleLength = aSample.Length();
sl@0
   523
	TInt pairOfTilde=0;
sl@0
   524
	TInt occrenceOfNonHz=0;
sl@0
   525
	aConfidenceLevel = 50;
sl@0
   526
	// Hz encoding uses escape sequences...
sl@0
   527
	for (TInt i = 0; i < sampleLength; ++i)
sl@0
   528
		{
sl@0
   529
		if (aSample[i]>0x7e)
sl@0
   530
			occrenceOfNonHz++;
sl@0
   531
		if (aSample[i]==0x7e)
sl@0
   532
			{
sl@0
   533
			TInt increment1 = i+1;
sl@0
   534
			if (increment1 >= sampleLength)
sl@0
   535
				break;
sl@0
   536
			if ((aSample[increment1] == 0x7b)||(aSample[increment1] == 0x7d)||(aSample[increment1] == 0x7e))
sl@0
   537
				{
sl@0
   538
				pairOfTilde++;
sl@0
   539
				i++;
sl@0
   540
				}
sl@0
   541
			}
sl@0
   542
		}//for
sl@0
   543
	if (sampleLength)
sl@0
   544
		{
sl@0
   545
		TInt occurrenceOftilde =2*pairOfTilde*100/sampleLength;
sl@0
   546
		aConfidenceLevel=aConfidenceLevel-Max(0,(4-occurrenceOftilde));
sl@0
   547
		aConfidenceLevel += occurrenceOftilde;
sl@0
   548
		aConfidenceLevel -= ((occrenceOfNonHz*100)/sampleLength);
sl@0
   549
		}
sl@0
   550
	return ETrue;
sl@0
   551
	}
sl@0
   552
sl@0
   553
CHZConverterImpl* CHZConverterImpl::NewL()
sl@0
   554
	{
sl@0
   555
	CHZConverterImpl* self = new(ELeave) CHZConverterImpl();
sl@0
   556
	return self;
sl@0
   557
	}
sl@0
   558
sl@0
   559
CHZConverterImpl::~CHZConverterImpl()
sl@0
   560
	{
sl@0
   561
	}
sl@0
   562
sl@0
   563
CHZConverterImpl::CHZConverterImpl()
sl@0
   564
	{
sl@0
   565
	}
sl@0
   566
sl@0
   567
const TImplementationProxy ImplementationTable[] = 
sl@0
   568
	{
sl@0
   569
		IMPLEMENTATION_PROXY_ENTRY(0x10006065,	CHZConverterImpl::NewL)
sl@0
   570
	};
sl@0
   571
sl@0
   572
EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
sl@0
   573
	{
sl@0
   574
	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
sl@0
   575
sl@0
   576
	return ImplementationTable;
sl@0
   577
	}