sl@0: /* sl@0: * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * HZ is defined in RFC 1843 sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include sl@0: #include sl@0: #include "gb2312.h" sl@0: #include sl@0: #include sl@0: sl@0: const TInt KIsInGbBlock=CCnvCharacterSetConverter::KStateDefault+1; sl@0: #if defined(_DEBUG) sl@0: const TInt KLengthOfIntermediateBuffer=6; sl@0: #else sl@0: const TInt KLengthOfIntermediateBuffer=150; sl@0: #endif sl@0: sl@0: #if defined(_DEBUG) sl@0: sl@0: _LIT(KLitPanicText, "HZ"); sl@0: sl@0: enum TPanic sl@0: { sl@0: EPanicTooManyMatchingIndicesFound=1, sl@0: EPanicBadNumberOfBytesRequiredToBeAvailable, sl@0: EPanicBadNumberOfBytesAvailable, sl@0: EPanicBadNumberOfBytesThatCanBeMadeAvailable, sl@0: EPanicBadNumberOfBytesMadeAvailable1, sl@0: EPanicBadNumberOfBytesMadeAvailable2, sl@0: EPanicBadDescriptorSubDivision1, sl@0: EPanicBadDescriptorSubDivision2, sl@0: EPanicBadDescriptorSubDivision3, sl@0: EPanicBadDescriptorSubDivision4, sl@0: EPanicBadPointers1, sl@0: EPanicBadPointers2, sl@0: EPanicBadPointers3, sl@0: EPanicBadPointers4, sl@0: EPanicBadPointers5, sl@0: EPanicBadPointers6, sl@0: EPanicBadPointers7, sl@0: EPanicBadPointers8, sl@0: EPanicBadPointers9, sl@0: EPanicBadPointers10, sl@0: EPanicBadPointers11, sl@0: EPanicBadPointers12, sl@0: EPanicStillInGbBlock, sl@0: EPanicBadState, sl@0: EPanicSplitBoundaryIsNotAsLateAsPossible1, sl@0: EPanicSplitBoundaryIsNotAsLateAsPossible2, sl@0: EPanicBadGb2312Index, sl@0: EPanicBadHzIndex, sl@0: EPanicBadTildeSequence, sl@0: EPanicBadReturnValue1, sl@0: EPanicBadReturnValue2, sl@0: EPanicRemainderOfHzHasGotLonger sl@0: }; sl@0: sl@0: LOCAL_C void Panic(TPanic aPanic) sl@0: { sl@0: User::Panic(KLitPanicText, aPanic); sl@0: } sl@0: sl@0: #endif sl@0: sl@0: class CHZConverterImpl : public CCharacterSetConverterPluginInterface sl@0: { sl@0: sl@0: public: sl@0: virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); sl@0: sl@0: virtual TInt ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); sl@0: sl@0: virtual TInt ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: sl@0: virtual TBool IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample); sl@0: sl@0: static CHZConverterImpl* NewL(); sl@0: virtual ~CHZConverterImpl(); sl@0: sl@0: private: sl@0: CHZConverterImpl(); sl@0: sl@0: }; sl@0: sl@0: sl@0: sl@0: const TDesC8& CHZConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() sl@0: { sl@0: return CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters(); sl@0: } sl@0: sl@0: LOCAL_C void IncrementNumberOfUnicodeCharactersNotConverted(TInt aLengthOfUnicode, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) // these seemingly haphazard order of these paramters is to match the position of the second and third parameters with the caller sl@0: { sl@0: ++aNumberOfUnicodeCharactersNotConverted; sl@0: const TInt indexOfUnicodeCharacterNowNotConverted=aLengthOfUnicode-aNumberOfUnicodeCharactersNotConverted; sl@0: #if defined(_DEBUG) sl@0: TInt numberOfMatchingIndicesFound=0; sl@0: #endif sl@0: for (TInt i=aIndicesOfUnconvertibleCharacters.NumberOfIndices()-1; i>=0; --i) // must iterate backwards as items from aIndicesOfUnconvertibleCharacters may be deleted sl@0: { sl@0: if (aIndicesOfUnconvertibleCharacters[i]==indexOfUnicodeCharacterNowNotConverted) sl@0: { sl@0: aIndicesOfUnconvertibleCharacters.Remove(i); sl@0: #if defined(_DEBUG) sl@0: ++numberOfMatchingIndicesFound; sl@0: #endif sl@0: } sl@0: } sl@0: __ASSERT_DEBUG(numberOfMatchingIndicesFound<=1, Panic(EPanicTooManyMatchingIndicesFound)); sl@0: } sl@0: sl@0: LOCAL_C void MakeAvailable(TInt aNumberOfBytesRequiredToBeAvailable, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode, const TUint8*& aPointerToLastUsedByte, TInt& aNumberOfBytesAvailable, TInt aNumberOfBytesThatCanBeMadeAvailable) // these seemingly haphazard order of these paramters is to match the position of the second to fourth parameters (inclusive) with the caller sl@0: // makes available as much of aNumberOfBytesRequiredToBeAvailable as it can, even if the final value (i.e. value on returning) of aNumberOfBytesAvailableaNumberOfBytesThatCanBeMadeAvailable+aNumberOfBytesAvailable) sl@0: { sl@0: __ASSERT_DEBUG(aNumberOfBytesRequiredToBeAvailable>0, Panic(EPanicBadNumberOfBytesRequiredToBeAvailable)); sl@0: __ASSERT_DEBUG(aNumberOfBytesAvailable>=0, Panic(EPanicBadNumberOfBytesAvailable)); sl@0: __ASSERT_DEBUG(aNumberOfBytesThatCanBeMadeAvailable>=0, Panic(EPanicBadNumberOfBytesThatCanBeMadeAvailable)); sl@0: TInt numberOfBytesMadeAvailable=0; sl@0: FOREVER sl@0: { sl@0: if (aNumberOfBytesAvailable>=aNumberOfBytesRequiredToBeAvailable) sl@0: { sl@0: break; // no more needs to be done sl@0: } sl@0: __ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable1)); sl@0: if (numberOfBytesMadeAvailable>=aNumberOfBytesThatCanBeMadeAvailable) sl@0: { sl@0: break; // give up - no more can be done sl@0: } sl@0: const TInt numberOfBytesInCharacter=(*aPointerToLastUsedByte&0x80)? 2: 1; sl@0: aPointerToLastUsedByte-=numberOfBytesInCharacter; sl@0: aNumberOfBytesAvailable+=numberOfBytesInCharacter; sl@0: numberOfBytesMadeAvailable+=numberOfBytesInCharacter; sl@0: IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters); sl@0: } sl@0: __ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable2)); sl@0: } sl@0: sl@0: LOCAL_C void ConvertFromGb2312ToHzInPlace(TDes8& aDescriptor, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode) sl@0: { sl@0: // it is legal for aDescriptor to be of length 0 sl@0: const TInt originalLengthOfDescriptor=aDescriptor.Length(); sl@0: if (originalLengthOfDescriptor>0) sl@0: { sl@0: TInt numberOfBytesAvailable=aDescriptor.MaxLength()-originalLengthOfDescriptor; sl@0: TUint8* pointerToPreviousByte=CONST_CAST(TUint8*, aDescriptor.Ptr()-1); sl@0: const TUint8* pointerToLastUsedByte=pointerToPreviousByte+originalLengthOfDescriptor; sl@0: TBool isInGbBlock=EFalse; sl@0: FOREVER sl@0: { sl@0: __ASSERT_DEBUG((pointerToLastUsedByte-(aDescriptor.Ptr()-1))+numberOfBytesAvailable==aDescriptor.MaxLength(), Panic(EPanicBadDescriptorSubDivision1)); sl@0: __ASSERT_DEBUG(pointerToPreviousByte=pointerToLastUsedByte) sl@0: { sl@0: break; sl@0: } sl@0: } sl@0: if (currentByte=='~') sl@0: { sl@0: MakeAvailable(1, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-1); // what's passed into the last parameter is not a typo - we do not want the "~" currently pointed to by (pointerToPreviousByte+1) to be made available sl@0: if (numberOfBytesAvailable<1) // 1 byte is required for the extra "~" character sl@0: { sl@0: break; sl@0: } sl@0: Mem::Copy(pointerToPreviousByte+2, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte); sl@0: ++pointerToPreviousByte; sl@0: *pointerToPreviousByte='~'; sl@0: numberOfBytesAvailable-=1; sl@0: pointerToLastUsedByte+=1; sl@0: } sl@0: ++pointerToPreviousByte; sl@0: } sl@0: __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers4)); sl@0: if (pointerToPreviousByte>=pointerToLastUsedByte) sl@0: { sl@0: if (isInGbBlock) sl@0: { sl@0: goto closeGbBlock; // this is to share the code for closing the GB-block sl@0: } sl@0: break; sl@0: } sl@0: } sl@0: __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers5)); sl@0: if (pointerToPreviousByte& aGb2312, TPtrC8& aHzBeingConsumed, TPtrC8& aRemainderOfHz, TInt& aState, TUint& aOutputConversionFlags) sl@0: { sl@0: // this function panics if aRemainderOfHz is of length 0 sl@0: TUint8* pointerToPreviousGb2312Byte=CONST_CAST(TUint8*, aGb2312.Ptr()-1); sl@0: const TUint8* pointerToCurrentHzByte=aRemainderOfHz.Ptr(); sl@0: const TUint8* const pointerToLastHzByte=pointerToCurrentHzByte+(aRemainderOfHz.Length()-1); sl@0: const TUint8* const pointerToLastHzByteToConvertThisTime=Min(pointerToLastHzByte, pointerToCurrentHzByte+(KLengthOfIntermediateBuffer-1)); sl@0: FOREVER sl@0: { sl@0: const TUint currentHzByte=*pointerToCurrentHzByte; sl@0: if (currentHzByte=='~') sl@0: { sl@0: __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers7)); sl@0: if (pointerToCurrentHzByte>=pointerToLastHzByte) sl@0: { sl@0: aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated; sl@0: --pointerToCurrentHzByte; sl@0: break; sl@0: } sl@0: ++pointerToCurrentHzByte; sl@0: const TUint nextHzByte=*pointerToCurrentHzByte; sl@0: switch (nextHzByte) sl@0: { sl@0: case '{': sl@0: if (aState==KIsInGbBlock) sl@0: { sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: } sl@0: aState=KIsInGbBlock; sl@0: break; sl@0: case '}': sl@0: if (aState==CCnvCharacterSetConverter::KStateDefault) sl@0: { sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: } sl@0: aState=CCnvCharacterSetConverter::KStateDefault; sl@0: break; sl@0: case '~': sl@0: ++pointerToPreviousGb2312Byte; sl@0: *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte); sl@0: break; sl@0: case 0x0a: sl@0: break; sl@0: default: sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: } sl@0: } sl@0: else sl@0: { sl@0: __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers8)); sl@0: if (pointerToCurrentHzByte>pointerToLastHzByteToConvertThisTime) sl@0: { sl@0: --pointerToCurrentHzByte; sl@0: break; sl@0: } sl@0: if (aState==CCnvCharacterSetConverter::KStateDefault) sl@0: { sl@0: ++pointerToPreviousGb2312Byte; sl@0: *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte); sl@0: } sl@0: else sl@0: { sl@0: __ASSERT_DEBUG(aState==KIsInGbBlock, Panic(EPanicBadState)); sl@0: __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByteToConvertThisTime, Panic(EPanicBadPointers9)); sl@0: if (pointerToCurrentHzByte>=pointerToLastHzByteToConvertThisTime) sl@0: { sl@0: aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated; sl@0: --pointerToCurrentHzByte; sl@0: break; sl@0: } sl@0: ++pointerToCurrentHzByte; sl@0: ++pointerToPreviousGb2312Byte; sl@0: *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte|0x80); sl@0: ++pointerToPreviousGb2312Byte; sl@0: *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, *pointerToCurrentHzByte|0x80); sl@0: } sl@0: } sl@0: __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers10)); sl@0: if (pointerToCurrentHzByte>=pointerToLastHzByte) sl@0: { sl@0: break; sl@0: } sl@0: ++pointerToCurrentHzByte; sl@0: } sl@0: aGb2312.SetLength((pointerToPreviousGb2312Byte+1)-aGb2312.Ptr()); sl@0: const TInt numberOfHzBytesBeingConsumed=(pointerToCurrentHzByte+1)-aRemainderOfHz.Ptr(); sl@0: aHzBeingConsumed.Set(aRemainderOfHz.Left(numberOfHzBytesBeingConsumed)); sl@0: aRemainderOfHz.Set(aRemainderOfHz.Mid(numberOfHzBytesBeingConsumed)); sl@0: #if defined(_DEBUG) sl@0: // AAA: check that if the split occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "BBB" relies on this) sl@0: if (aRemainderOfHz.Length()>=2) sl@0: { sl@0: __ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~{"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible1)); sl@0: __ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~}"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible2)); sl@0: } sl@0: #endif sl@0: return 0; sl@0: } sl@0: sl@0: LOCAL_C TInt Gb2312IndexToHzIndex(const TDesC8& aHz, TInt aGb2312Index, TBool aReturnMaximalHzIndex) sl@0: { sl@0: // this function panics if aHz is of length 0 sl@0: // aHz may start in either KIsInGbBlock or CCnvCharacterSetConverter::KStateDefault state, but it must *not* have any truncated sequences (i.e. "tilde " sequence that is not complete, or part of a 2-byte character sequence) at either its start or its end sl@0: __ASSERT_DEBUG(aGb2312Index>=0, Panic(EPanicBadGb2312Index)); sl@0: TInt hzIndex=0; sl@0: TInt offsetFromGb2312IndexToHzIndex=0; sl@0: const TUint8* const pointerToFirstHzByte=aHz.Ptr(); sl@0: const TUint8* pointerToCurrentHzByte=pointerToFirstHzByte; sl@0: const TUint8* const pointerToLastHzByte=pointerToFirstHzByte+(aHz.Length()-1); sl@0: FOREVER sl@0: { sl@0: const TInt newHzIndex=pointerToCurrentHzByte-pointerToFirstHzByte; sl@0: const TInt candidateHzIndex=aGb2312Index+offsetFromGb2312IndexToHzIndex; sl@0: __ASSERT_DEBUG(hzIndex<=candidateHzIndex, Panic(EPanicBadHzIndex)); sl@0: if (aReturnMaximalHzIndex? (newHzIndex>candidateHzIndex): (hzIndex>=candidateHzIndex)) sl@0: { sl@0: break; sl@0: } sl@0: hzIndex=newHzIndex; sl@0: if (*pointerToCurrentHzByte=='~') sl@0: { sl@0: __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers11)); sl@0: if (pointerToCurrentHzByte>=pointerToLastHzByte) sl@0: { sl@0: break; sl@0: } sl@0: ++pointerToCurrentHzByte; sl@0: const TUint currentHzByte=*pointerToCurrentHzByte; sl@0: if (currentHzByte=='~') sl@0: { sl@0: ++offsetFromGb2312IndexToHzIndex; sl@0: } sl@0: else sl@0: { sl@0: __ASSERT_DEBUG((currentHzByte=='{') || (currentHzByte=='}') || (currentHzByte==0x0a), Panic(EPanicBadTildeSequence)); sl@0: offsetFromGb2312IndexToHzIndex+=2; sl@0: } sl@0: } sl@0: __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers12)); sl@0: if (pointerToCurrentHzByte>=pointerToLastHzByte) sl@0: { sl@0: break; sl@0: } sl@0: ++pointerToCurrentHzByte; sl@0: } sl@0: return hzIndex; sl@0: } sl@0: sl@0: TInt CHZConverterImpl::ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) sl@0: { sl@0: aUnicode.SetLength(0); sl@0: TPtrC8 remainderOfHz(aForeign); sl@0: TInt numberOfHzBytesConsumed=0; sl@0: TUint outputConversionFlags=0; sl@0: TUint inputConversionFlags=CCnvCharacterSetConverter::EInputConversionFlagAppend; sl@0: const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData(); sl@0: FOREVER sl@0: { sl@0: __ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision3)); sl@0: #if defined(_DEBUG) sl@0: const TInt oldLengthOfRemainderOfHz=remainderOfHz.Length(); sl@0: #endif sl@0: TBuf8 gb2312; sl@0: TPtrC8 hzBeingConsumed; sl@0: const TInt returnValue1=ConvertFromHzToHomogeneousGb2312(gb2312, hzBeingConsumed, remainderOfHz, aState, outputConversionFlags); sl@0: if (returnValue1<0) sl@0: { sl@0: return returnValue1; // this is an error-code sl@0: } sl@0: __ASSERT_DEBUG(returnValue1==0, Panic(EPanicBadReturnValue1)); sl@0: __ASSERT_DEBUG(hzBeingConsumed.Length()+remainderOfHz.Length()==oldLengthOfRemainderOfHz, Panic(EPanicRemainderOfHzHasGotLonger)); sl@0: if (hzBeingConsumed.Length()==0) sl@0: { sl@0: break; sl@0: } sl@0: TInt numberOfUnconvertibleCharacters; sl@0: TInt indexOfFirstByteOfFirstUnconvertibleCharacter; sl@0: const TInt returnValue2=CCnvCharacterSetConverter::DoConvertToUnicode(gb2312ConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, gb2312, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, outputConversionFlags, inputConversionFlags); sl@0: if (returnValue2<0) sl@0: { sl@0: return returnValue2; // this is an error-code sl@0: } sl@0: if (numberOfUnconvertibleCharacters>0) sl@0: { sl@0: if (aNumberOfUnconvertibleCharacters==0) sl@0: { sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfHzBytesConsumed+Gb2312IndexToHzIndex(hzBeingConsumed, indexOfFirstByteOfFirstUnconvertibleCharacter, EFalse); sl@0: } sl@0: aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters; sl@0: } sl@0: if (returnValue2>0) sl@0: { sl@0: const TInt numberOfGb2312BytesConverted=gb2312.Length()-returnValue2; sl@0: __ASSERT_DEBUG(numberOfGb2312BytesConverted>=0, Panic(EPanicBadReturnValue2)); sl@0: // don't call gb2312.SetLength(numberOfGb2312BytesConverted) as we want to access gb2312[numberOfGb2312BytesConverted] - in any case, gb2312's length is never going to be used again sl@0: // don't bother re-setting remainderOfHz as it won't be used again sl@0: numberOfHzBytesConsumed+=Gb2312IndexToHzIndex(hzBeingConsumed, numberOfGb2312BytesConverted, ETrue); sl@0: aState=(gb2312[numberOfGb2312BytesConverted]&0x80)? KIsInGbBlock: CCnvCharacterSetConverter::KStateDefault; // BBB: if the split (between the text that was converted and the text that wasn't converted) occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "AAA" checks this) - this means that we set aState according to gb2312[numberOfGb2312BytesConverted] rather than gb2312[numberOfGb2312BytesConverted-1] sl@0: break; sl@0: } sl@0: numberOfHzBytesConsumed+=hzBeingConsumed.Length(); sl@0: remainderOfHz.Set(aForeign.Mid(numberOfHzBytesConsumed)); sl@0: __ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision4)); sl@0: if (remainderOfHz.Length()==0) sl@0: { sl@0: break; sl@0: } sl@0: if (numberOfHzBytesConsumed>0) sl@0: { sl@0: inputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable; sl@0: } sl@0: } sl@0: // N.B. remainderOfHz is in an undefined state by this point sl@0: if ((numberOfHzBytesConsumed==0) && (outputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated)) sl@0: { sl@0: return CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: } sl@0: return aForeign.Length()-numberOfHzBytesConsumed; sl@0: } sl@0: sl@0: TBool CHZConverterImpl::IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample) sl@0: { sl@0: aSetToTrue=ETrue; sl@0: TInt sampleLength = aSample.Length(); sl@0: TInt pairOfTilde=0; sl@0: TInt occrenceOfNonHz=0; sl@0: aConfidenceLevel = 50; sl@0: // Hz encoding uses escape sequences... sl@0: for (TInt i = 0; i < sampleLength; ++i) sl@0: { sl@0: if (aSample[i]>0x7e) sl@0: occrenceOfNonHz++; sl@0: if (aSample[i]==0x7e) sl@0: { sl@0: TInt increment1 = i+1; sl@0: if (increment1 >= sampleLength) sl@0: break; sl@0: if ((aSample[increment1] == 0x7b)||(aSample[increment1] == 0x7d)||(aSample[increment1] == 0x7e)) sl@0: { sl@0: pairOfTilde++; sl@0: i++; sl@0: } sl@0: } sl@0: }//for sl@0: if (sampleLength) sl@0: { sl@0: TInt occurrenceOftilde =2*pairOfTilde*100/sampleLength; sl@0: aConfidenceLevel=aConfidenceLevel-Max(0,(4-occurrenceOftilde)); sl@0: aConfidenceLevel += occurrenceOftilde; sl@0: aConfidenceLevel -= ((occrenceOfNonHz*100)/sampleLength); sl@0: } sl@0: return ETrue; sl@0: } sl@0: sl@0: CHZConverterImpl* CHZConverterImpl::NewL() sl@0: { sl@0: CHZConverterImpl* self = new(ELeave) CHZConverterImpl(); sl@0: return self; sl@0: } sl@0: sl@0: CHZConverterImpl::~CHZConverterImpl() sl@0: { sl@0: } sl@0: sl@0: CHZConverterImpl::CHZConverterImpl() sl@0: { sl@0: } sl@0: sl@0: const TImplementationProxy ImplementationTable[] = sl@0: { sl@0: IMPLEMENTATION_PROXY_ENTRY(0x10006065, CHZConverterImpl::NewL) sl@0: }; sl@0: sl@0: EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) sl@0: { sl@0: aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); sl@0: sl@0: return ImplementationTable; sl@0: }