1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/hz.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,577 @@
1.4 +/*
1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +* HZ is defined in RFC 1843
1.19 +*
1.20 +*/
1.21 +
1.22 +
1.23 +#include <e32std.h>
1.24 +#include <charconv.h>
1.25 +#include "gb2312.h"
1.26 +#include <ecom/implementationproxy.h>
1.27 +#include <charactersetconverter.h>
1.28 +
1.29 +const TInt KIsInGbBlock=CCnvCharacterSetConverter::KStateDefault+1;
1.30 +#if defined(_DEBUG)
1.31 +const TInt KLengthOfIntermediateBuffer=6;
1.32 +#else
1.33 +const TInt KLengthOfIntermediateBuffer=150;
1.34 +#endif
1.35 +
1.36 +#if defined(_DEBUG)
1.37 +
1.38 +_LIT(KLitPanicText, "HZ");
1.39 +
1.40 +enum TPanic
1.41 + {
1.42 + EPanicTooManyMatchingIndicesFound=1,
1.43 + EPanicBadNumberOfBytesRequiredToBeAvailable,
1.44 + EPanicBadNumberOfBytesAvailable,
1.45 + EPanicBadNumberOfBytesThatCanBeMadeAvailable,
1.46 + EPanicBadNumberOfBytesMadeAvailable1,
1.47 + EPanicBadNumberOfBytesMadeAvailable2,
1.48 + EPanicBadDescriptorSubDivision1,
1.49 + EPanicBadDescriptorSubDivision2,
1.50 + EPanicBadDescriptorSubDivision3,
1.51 + EPanicBadDescriptorSubDivision4,
1.52 + EPanicBadPointers1,
1.53 + EPanicBadPointers2,
1.54 + EPanicBadPointers3,
1.55 + EPanicBadPointers4,
1.56 + EPanicBadPointers5,
1.57 + EPanicBadPointers6,
1.58 + EPanicBadPointers7,
1.59 + EPanicBadPointers8,
1.60 + EPanicBadPointers9,
1.61 + EPanicBadPointers10,
1.62 + EPanicBadPointers11,
1.63 + EPanicBadPointers12,
1.64 + EPanicStillInGbBlock,
1.65 + EPanicBadState,
1.66 + EPanicSplitBoundaryIsNotAsLateAsPossible1,
1.67 + EPanicSplitBoundaryIsNotAsLateAsPossible2,
1.68 + EPanicBadGb2312Index,
1.69 + EPanicBadHzIndex,
1.70 + EPanicBadTildeSequence,
1.71 + EPanicBadReturnValue1,
1.72 + EPanicBadReturnValue2,
1.73 + EPanicRemainderOfHzHasGotLonger
1.74 + };
1.75 +
1.76 +LOCAL_C void Panic(TPanic aPanic)
1.77 + {
1.78 + User::Panic(KLitPanicText, aPanic);
1.79 + }
1.80 +
1.81 +#endif
1.82 +
1.83 +class CHZConverterImpl : public CCharacterSetConverterPluginInterface
1.84 + {
1.85 +
1.86 +public:
1.87 + virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
1.88 +
1.89 + virtual TInt ConvertFromUnicode(
1.90 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.91 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.92 + TDes8& aForeign,
1.93 + const TDesC16& aUnicode,
1.94 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
1.95 +
1.96 + virtual TInt ConvertToUnicode(
1.97 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.98 + TDes16& aUnicode,
1.99 + const TDesC8& aForeign,
1.100 + TInt& aState,
1.101 + TInt& aNumberOfUnconvertibleCharacters,
1.102 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
1.103 +
1.104 + virtual TBool IsInThisCharacterSetL(
1.105 + TBool& aSetToTrue,
1.106 + TInt& aConfidenceLevel,
1.107 + const TDesC8& aSample);
1.108 +
1.109 + static CHZConverterImpl* NewL();
1.110 + virtual ~CHZConverterImpl();
1.111 +
1.112 +private:
1.113 + CHZConverterImpl();
1.114 +
1.115 + };
1.116 +
1.117 +
1.118 +
1.119 +const TDesC8& CHZConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
1.120 + {
1.121 + return CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters();
1.122 + }
1.123 +
1.124 +LOCAL_C void IncrementNumberOfUnicodeCharactersNotConverted(TInt aLengthOfUnicode, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) // these seemingly haphazard order of these paramters is to match the position of the second and third parameters with the caller
1.125 + {
1.126 + ++aNumberOfUnicodeCharactersNotConverted;
1.127 + const TInt indexOfUnicodeCharacterNowNotConverted=aLengthOfUnicode-aNumberOfUnicodeCharactersNotConverted;
1.128 +#if defined(_DEBUG)
1.129 + TInt numberOfMatchingIndicesFound=0;
1.130 +#endif
1.131 + for (TInt i=aIndicesOfUnconvertibleCharacters.NumberOfIndices()-1; i>=0; --i) // must iterate backwards as items from aIndicesOfUnconvertibleCharacters may be deleted
1.132 + {
1.133 + if (aIndicesOfUnconvertibleCharacters[i]==indexOfUnicodeCharacterNowNotConverted)
1.134 + {
1.135 + aIndicesOfUnconvertibleCharacters.Remove(i);
1.136 +#if defined(_DEBUG)
1.137 + ++numberOfMatchingIndicesFound;
1.138 +#endif
1.139 + }
1.140 + }
1.141 + __ASSERT_DEBUG(numberOfMatchingIndicesFound<=1, Panic(EPanicTooManyMatchingIndicesFound));
1.142 + }
1.143 +
1.144 +LOCAL_C void MakeAvailable(TInt aNumberOfBytesRequiredToBeAvailable, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode, const TUint8*& aPointerToLastUsedByte, TInt& aNumberOfBytesAvailable, TInt aNumberOfBytesThatCanBeMadeAvailable) // these seemingly haphazard order of these paramters is to match the position of the second to fourth parameters (inclusive) with the caller
1.145 +// makes available as much of aNumberOfBytesRequiredToBeAvailable as it can, even if the final value (i.e. value on returning) of aNumberOfBytesAvailable<aNumberOfBytesRequiredToBeAvailable (i.e. it doesn't initially give up straight away and do nothing if aNumberOfBytesRequiredToBeAvailable>aNumberOfBytesThatCanBeMadeAvailable+aNumberOfBytesAvailable)
1.146 + {
1.147 + __ASSERT_DEBUG(aNumberOfBytesRequiredToBeAvailable>0, Panic(EPanicBadNumberOfBytesRequiredToBeAvailable));
1.148 + __ASSERT_DEBUG(aNumberOfBytesAvailable>=0, Panic(EPanicBadNumberOfBytesAvailable));
1.149 + __ASSERT_DEBUG(aNumberOfBytesThatCanBeMadeAvailable>=0, Panic(EPanicBadNumberOfBytesThatCanBeMadeAvailable));
1.150 + TInt numberOfBytesMadeAvailable=0;
1.151 + FOREVER
1.152 + {
1.153 + if (aNumberOfBytesAvailable>=aNumberOfBytesRequiredToBeAvailable)
1.154 + {
1.155 + break; // no more needs to be done
1.156 + }
1.157 + __ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable1));
1.158 + if (numberOfBytesMadeAvailable>=aNumberOfBytesThatCanBeMadeAvailable)
1.159 + {
1.160 + break; // give up - no more can be done
1.161 + }
1.162 + const TInt numberOfBytesInCharacter=(*aPointerToLastUsedByte&0x80)? 2: 1;
1.163 + aPointerToLastUsedByte-=numberOfBytesInCharacter;
1.164 + aNumberOfBytesAvailable+=numberOfBytesInCharacter;
1.165 + numberOfBytesMadeAvailable+=numberOfBytesInCharacter;
1.166 + IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
1.167 + }
1.168 + __ASSERT_DEBUG(numberOfBytesMadeAvailable<=aNumberOfBytesThatCanBeMadeAvailable, Panic(EPanicBadNumberOfBytesMadeAvailable2));
1.169 + }
1.170 +
1.171 +LOCAL_C void ConvertFromGb2312ToHzInPlace(TDes8& aDescriptor, TInt& aNumberOfUnicodeCharactersNotConverted, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TInt aLengthOfUnicode)
1.172 + {
1.173 + // it is legal for aDescriptor to be of length 0
1.174 + const TInt originalLengthOfDescriptor=aDescriptor.Length();
1.175 + if (originalLengthOfDescriptor>0)
1.176 + {
1.177 + TInt numberOfBytesAvailable=aDescriptor.MaxLength()-originalLengthOfDescriptor;
1.178 + TUint8* pointerToPreviousByte=CONST_CAST(TUint8*, aDescriptor.Ptr()-1);
1.179 + const TUint8* pointerToLastUsedByte=pointerToPreviousByte+originalLengthOfDescriptor;
1.180 + TBool isInGbBlock=EFalse;
1.181 + FOREVER
1.182 + {
1.183 + __ASSERT_DEBUG((pointerToLastUsedByte-(aDescriptor.Ptr()-1))+numberOfBytesAvailable==aDescriptor.MaxLength(), Panic(EPanicBadDescriptorSubDivision1));
1.184 + __ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers1));
1.185 + const TUint currentByte=*(pointerToPreviousByte+1);
1.186 + if (currentByte&0x80)
1.187 + {
1.188 + if (!isInGbBlock)
1.189 + {
1.190 + MakeAvailable(4, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-2); // what's passed into the last parameter is not a typo - we do not want the two-byte character currently pointed to by (pointerToPreviousByte+1) to be made available
1.191 + if (numberOfBytesAvailable<4) // 4 bytes are required for the "~{" "~}" escape sequences (thus ensuring that at least a single double-byte character can be put into the GB-block)
1.192 + {
1.193 + break;
1.194 + }
1.195 + isInGbBlock=ETrue;
1.196 + Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
1.197 + ++pointerToPreviousByte;
1.198 + *pointerToPreviousByte='~';
1.199 + ++pointerToPreviousByte;
1.200 + *pointerToPreviousByte='{';
1.201 + numberOfBytesAvailable-=2;
1.202 + pointerToLastUsedByte+=2;
1.203 + }
1.204 + ++pointerToPreviousByte;
1.205 + *pointerToPreviousByte&=~0x80;
1.206 + __ASSERT_DEBUG(pointerToPreviousByte<pointerToLastUsedByte, Panic(EPanicBadPointers2));
1.207 + ++pointerToPreviousByte;
1.208 + *pointerToPreviousByte&=~0x80;
1.209 + }
1.210 + else
1.211 + {
1.212 + if (isInGbBlock)
1.213 + {
1.214 +closeGbBlock:
1.215 + isInGbBlock=EFalse;
1.216 + MakeAvailable(2, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, pointerToLastUsedByte-pointerToPreviousByte);
1.217 + if (numberOfBytesAvailable<2) // 2 bytes are required for the "~}" escape sequence
1.218 + {
1.219 + IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
1.220 + *(pointerToPreviousByte-1)='~';
1.221 + *pointerToPreviousByte='}';
1.222 + break;
1.223 + }
1.224 + Mem::Copy(pointerToPreviousByte+3, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
1.225 + ++pointerToPreviousByte;
1.226 + *pointerToPreviousByte='~';
1.227 + ++pointerToPreviousByte;
1.228 + *pointerToPreviousByte='}';
1.229 + numberOfBytesAvailable-=2;
1.230 + pointerToLastUsedByte+=2;
1.231 + __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers3));
1.232 + if (pointerToPreviousByte>=pointerToLastUsedByte)
1.233 + {
1.234 + break;
1.235 + }
1.236 + }
1.237 + if (currentByte=='~')
1.238 + {
1.239 + MakeAvailable(1, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters, aLengthOfUnicode, pointerToLastUsedByte, numberOfBytesAvailable, (pointerToLastUsedByte-pointerToPreviousByte)-1); // what's passed into the last parameter is not a typo - we do not want the "~" currently pointed to by (pointerToPreviousByte+1) to be made available
1.240 + if (numberOfBytesAvailable<1) // 1 byte is required for the extra "~" character
1.241 + {
1.242 + break;
1.243 + }
1.244 + Mem::Copy(pointerToPreviousByte+2, pointerToPreviousByte+1, pointerToLastUsedByte-pointerToPreviousByte);
1.245 + ++pointerToPreviousByte;
1.246 + *pointerToPreviousByte='~';
1.247 + numberOfBytesAvailable-=1;
1.248 + pointerToLastUsedByte+=1;
1.249 + }
1.250 + ++pointerToPreviousByte;
1.251 + }
1.252 + __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers4));
1.253 + if (pointerToPreviousByte>=pointerToLastUsedByte)
1.254 + {
1.255 + if (isInGbBlock)
1.256 + {
1.257 + goto closeGbBlock; // this is to share the code for closing the GB-block
1.258 + }
1.259 + break;
1.260 + }
1.261 + }
1.262 + __ASSERT_DEBUG(pointerToPreviousByte<=pointerToLastUsedByte, Panic(EPanicBadPointers5));
1.263 + if (pointerToPreviousByte<pointerToLastUsedByte)
1.264 + {
1.265 + __ASSERT_DEBUG((pointerToPreviousByte==pointerToLastUsedByte-1) || (pointerToPreviousByte==pointerToLastUsedByte-2), Panic(EPanicBadPointers6));
1.266 + numberOfBytesAvailable+=(pointerToLastUsedByte-pointerToPreviousByte);
1.267 + pointerToLastUsedByte=pointerToPreviousByte;
1.268 + IncrementNumberOfUnicodeCharactersNotConverted(aLengthOfUnicode, aNumberOfUnicodeCharactersNotConverted, aIndicesOfUnconvertibleCharacters);
1.269 + }
1.270 + //if it gets out from FOREVER, isInGbBlock could not be ETrue ~~~ so wouldn't need the assert
1.271 + //__ASSERT_DEBUG(!isInGbBlock, Panic(EPanicStillInGbBlock));
1.272 + aDescriptor.SetLength(aDescriptor.MaxLength()-numberOfBytesAvailable);
1.273 + __ASSERT_DEBUG(aDescriptor.Length()==pointerToLastUsedByte-(aDescriptor.Ptr()-1), Panic(EPanicBadDescriptorSubDivision2));
1.274 + }
1.275 + }
1.276 +
1.277 +TInt CHZConverterImpl::ConvertFromUnicode(
1.278 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.279 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.280 + TDes8& aForeign,
1.281 + const TDesC16& aUnicode,
1.282 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
1.283 + {
1.284 + TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(CnvGb2312::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
1.285 + if (returnValue<0)
1.286 + {
1.287 + return returnValue; // this is an error-code
1.288 + }
1.289 + ConvertFromGb2312ToHzInPlace(aForeign, returnValue, aIndicesOfUnconvertibleCharacters, aUnicode.Length());
1.290 + return returnValue;
1.291 + }
1.292 +
1.293 +LOCAL_C TInt ConvertFromHzToHomogeneousGb2312(TBuf8<KLengthOfIntermediateBuffer>& aGb2312, TPtrC8& aHzBeingConsumed, TPtrC8& aRemainderOfHz, TInt& aState, TUint& aOutputConversionFlags)
1.294 + {
1.295 + // this function panics if aRemainderOfHz is of length 0
1.296 + TUint8* pointerToPreviousGb2312Byte=CONST_CAST(TUint8*, aGb2312.Ptr()-1);
1.297 + const TUint8* pointerToCurrentHzByte=aRemainderOfHz.Ptr();
1.298 + const TUint8* const pointerToLastHzByte=pointerToCurrentHzByte+(aRemainderOfHz.Length()-1);
1.299 + const TUint8* const pointerToLastHzByteToConvertThisTime=Min(pointerToLastHzByte, pointerToCurrentHzByte+(KLengthOfIntermediateBuffer-1));
1.300 + FOREVER
1.301 + {
1.302 + const TUint currentHzByte=*pointerToCurrentHzByte;
1.303 + if (currentHzByte=='~')
1.304 + {
1.305 + __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers7));
1.306 + if (pointerToCurrentHzByte>=pointerToLastHzByte)
1.307 + {
1.308 + aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
1.309 + --pointerToCurrentHzByte;
1.310 + break;
1.311 + }
1.312 + ++pointerToCurrentHzByte;
1.313 + const TUint nextHzByte=*pointerToCurrentHzByte;
1.314 + switch (nextHzByte)
1.315 + {
1.316 + case '{':
1.317 + if (aState==KIsInGbBlock)
1.318 + {
1.319 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.320 + }
1.321 + aState=KIsInGbBlock;
1.322 + break;
1.323 + case '}':
1.324 + if (aState==CCnvCharacterSetConverter::KStateDefault)
1.325 + {
1.326 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.327 + }
1.328 + aState=CCnvCharacterSetConverter::KStateDefault;
1.329 + break;
1.330 + case '~':
1.331 + ++pointerToPreviousGb2312Byte;
1.332 + *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
1.333 + break;
1.334 + case 0x0a:
1.335 + break;
1.336 + default:
1.337 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.338 + }
1.339 + }
1.340 + else
1.341 + {
1.342 + __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers8));
1.343 + if (pointerToCurrentHzByte>pointerToLastHzByteToConvertThisTime)
1.344 + {
1.345 + --pointerToCurrentHzByte;
1.346 + break;
1.347 + }
1.348 + if (aState==CCnvCharacterSetConverter::KStateDefault)
1.349 + {
1.350 + ++pointerToPreviousGb2312Byte;
1.351 + *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte);
1.352 + }
1.353 + else
1.354 + {
1.355 + __ASSERT_DEBUG(aState==KIsInGbBlock, Panic(EPanicBadState));
1.356 + __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByteToConvertThisTime, Panic(EPanicBadPointers9));
1.357 + if (pointerToCurrentHzByte>=pointerToLastHzByteToConvertThisTime)
1.358 + {
1.359 + aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
1.360 + --pointerToCurrentHzByte;
1.361 + break;
1.362 + }
1.363 + ++pointerToCurrentHzByte;
1.364 + ++pointerToPreviousGb2312Byte;
1.365 + *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, currentHzByte|0x80);
1.366 + ++pointerToPreviousGb2312Byte;
1.367 + *pointerToPreviousGb2312Byte=STATIC_CAST(TUint8, *pointerToCurrentHzByte|0x80);
1.368 + }
1.369 + }
1.370 + __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers10));
1.371 + if (pointerToCurrentHzByte>=pointerToLastHzByte)
1.372 + {
1.373 + break;
1.374 + }
1.375 + ++pointerToCurrentHzByte;
1.376 + }
1.377 + aGb2312.SetLength((pointerToPreviousGb2312Byte+1)-aGb2312.Ptr());
1.378 + const TInt numberOfHzBytesBeingConsumed=(pointerToCurrentHzByte+1)-aRemainderOfHz.Ptr();
1.379 + aHzBeingConsumed.Set(aRemainderOfHz.Left(numberOfHzBytesBeingConsumed));
1.380 + aRemainderOfHz.Set(aRemainderOfHz.Mid(numberOfHzBytesBeingConsumed));
1.381 +#if defined(_DEBUG)
1.382 + // AAA: check that if the split occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "BBB" relies on this)
1.383 + if (aRemainderOfHz.Length()>=2)
1.384 + {
1.385 + __ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~{"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible1));
1.386 + __ASSERT_DEBUG(aRemainderOfHz.Left(2)!=_L8("~}"), Panic(EPanicSplitBoundaryIsNotAsLateAsPossible2));
1.387 + }
1.388 +#endif
1.389 + return 0;
1.390 + }
1.391 +
1.392 +LOCAL_C TInt Gb2312IndexToHzIndex(const TDesC8& aHz, TInt aGb2312Index, TBool aReturnMaximalHzIndex)
1.393 + {
1.394 + // this function panics if aHz is of length 0
1.395 + // aHz may start in either KIsInGbBlock or CCnvCharacterSetConverter::KStateDefault state, but it must *not* have any truncated sequences (i.e. "tilde <something>" sequence that is not complete, or part of a 2-byte character sequence) at either its start or its end
1.396 + __ASSERT_DEBUG(aGb2312Index>=0, Panic(EPanicBadGb2312Index));
1.397 + TInt hzIndex=0;
1.398 + TInt offsetFromGb2312IndexToHzIndex=0;
1.399 + const TUint8* const pointerToFirstHzByte=aHz.Ptr();
1.400 + const TUint8* pointerToCurrentHzByte=pointerToFirstHzByte;
1.401 + const TUint8* const pointerToLastHzByte=pointerToFirstHzByte+(aHz.Length()-1);
1.402 + FOREVER
1.403 + {
1.404 + const TInt newHzIndex=pointerToCurrentHzByte-pointerToFirstHzByte;
1.405 + const TInt candidateHzIndex=aGb2312Index+offsetFromGb2312IndexToHzIndex;
1.406 + __ASSERT_DEBUG(hzIndex<=candidateHzIndex, Panic(EPanicBadHzIndex));
1.407 + if (aReturnMaximalHzIndex? (newHzIndex>candidateHzIndex): (hzIndex>=candidateHzIndex))
1.408 + {
1.409 + break;
1.410 + }
1.411 + hzIndex=newHzIndex;
1.412 + if (*pointerToCurrentHzByte=='~')
1.413 + {
1.414 + __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers11));
1.415 + if (pointerToCurrentHzByte>=pointerToLastHzByte)
1.416 + {
1.417 + break;
1.418 + }
1.419 + ++pointerToCurrentHzByte;
1.420 + const TUint currentHzByte=*pointerToCurrentHzByte;
1.421 + if (currentHzByte=='~')
1.422 + {
1.423 + ++offsetFromGb2312IndexToHzIndex;
1.424 + }
1.425 + else
1.426 + {
1.427 + __ASSERT_DEBUG((currentHzByte=='{') || (currentHzByte=='}') || (currentHzByte==0x0a), Panic(EPanicBadTildeSequence));
1.428 + offsetFromGb2312IndexToHzIndex+=2;
1.429 + }
1.430 + }
1.431 + __ASSERT_DEBUG(pointerToCurrentHzByte<=pointerToLastHzByte, Panic(EPanicBadPointers12));
1.432 + if (pointerToCurrentHzByte>=pointerToLastHzByte)
1.433 + {
1.434 + break;
1.435 + }
1.436 + ++pointerToCurrentHzByte;
1.437 + }
1.438 + return hzIndex;
1.439 + }
1.440 +
1.441 +TInt CHZConverterImpl::ConvertToUnicode(
1.442 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.443 + TDes16& aUnicode,
1.444 + const TDesC8& aForeign,
1.445 + TInt& aState,
1.446 + TInt& aNumberOfUnconvertibleCharacters,
1.447 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
1.448 + {
1.449 + aUnicode.SetLength(0);
1.450 + TPtrC8 remainderOfHz(aForeign);
1.451 + TInt numberOfHzBytesConsumed=0;
1.452 + TUint outputConversionFlags=0;
1.453 + TUint inputConversionFlags=CCnvCharacterSetConverter::EInputConversionFlagAppend;
1.454 + const SCnvConversionData& gb2312ConversionData=CnvGb2312::ConversionData();
1.455 + FOREVER
1.456 + {
1.457 + __ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision3));
1.458 +#if defined(_DEBUG)
1.459 + const TInt oldLengthOfRemainderOfHz=remainderOfHz.Length();
1.460 +#endif
1.461 + TBuf8<KLengthOfIntermediateBuffer> gb2312;
1.462 + TPtrC8 hzBeingConsumed;
1.463 + const TInt returnValue1=ConvertFromHzToHomogeneousGb2312(gb2312, hzBeingConsumed, remainderOfHz, aState, outputConversionFlags);
1.464 + if (returnValue1<0)
1.465 + {
1.466 + return returnValue1; // this is an error-code
1.467 + }
1.468 + __ASSERT_DEBUG(returnValue1==0, Panic(EPanicBadReturnValue1));
1.469 + __ASSERT_DEBUG(hzBeingConsumed.Length()+remainderOfHz.Length()==oldLengthOfRemainderOfHz, Panic(EPanicRemainderOfHzHasGotLonger));
1.470 + if (hzBeingConsumed.Length()==0)
1.471 + {
1.472 + break;
1.473 + }
1.474 + TInt numberOfUnconvertibleCharacters;
1.475 + TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
1.476 + const TInt returnValue2=CCnvCharacterSetConverter::DoConvertToUnicode(gb2312ConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, gb2312, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, outputConversionFlags, inputConversionFlags);
1.477 + if (returnValue2<0)
1.478 + {
1.479 + return returnValue2; // this is an error-code
1.480 + }
1.481 + if (numberOfUnconvertibleCharacters>0)
1.482 + {
1.483 + if (aNumberOfUnconvertibleCharacters==0)
1.484 + {
1.485 + aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfHzBytesConsumed+Gb2312IndexToHzIndex(hzBeingConsumed, indexOfFirstByteOfFirstUnconvertibleCharacter, EFalse);
1.486 + }
1.487 + aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
1.488 + }
1.489 + if (returnValue2>0)
1.490 + {
1.491 + const TInt numberOfGb2312BytesConverted=gb2312.Length()-returnValue2;
1.492 + __ASSERT_DEBUG(numberOfGb2312BytesConverted>=0, Panic(EPanicBadReturnValue2));
1.493 + // don't call gb2312.SetLength(numberOfGb2312BytesConverted) as we want to access gb2312[numberOfGb2312BytesConverted] - in any case, gb2312's length is never going to be used again
1.494 + // don't bother re-setting remainderOfHz as it won't be used again
1.495 + numberOfHzBytesConsumed+=Gb2312IndexToHzIndex(hzBeingConsumed, numberOfGb2312BytesConverted, ETrue);
1.496 + aState=(gb2312[numberOfGb2312BytesConverted]&0x80)? KIsInGbBlock: CCnvCharacterSetConverter::KStateDefault; // BBB: if the split (between the text that was converted and the text that wasn't converted) occurs on a boundary between some one-byte and some two-byte text, then aState corresponds to the state *after* the split (the code marked "AAA" checks this) - this means that we set aState according to gb2312[numberOfGb2312BytesConverted] rather than gb2312[numberOfGb2312BytesConverted-1]
1.497 + break;
1.498 + }
1.499 + numberOfHzBytesConsumed+=hzBeingConsumed.Length();
1.500 + remainderOfHz.Set(aForeign.Mid(numberOfHzBytesConsumed));
1.501 + __ASSERT_DEBUG(numberOfHzBytesConsumed+remainderOfHz.Length()==aForeign.Length(), Panic(EPanicBadDescriptorSubDivision4));
1.502 + if (remainderOfHz.Length()==0)
1.503 + {
1.504 + break;
1.505 + }
1.506 + if (numberOfHzBytesConsumed>0)
1.507 + {
1.508 + inputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
1.509 + }
1.510 + }
1.511 + // N.B. remainderOfHz is in an undefined state by this point
1.512 + if ((numberOfHzBytesConsumed==0) && (outputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated))
1.513 + {
1.514 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.515 + }
1.516 + return aForeign.Length()-numberOfHzBytesConsumed;
1.517 + }
1.518 +
1.519 +TBool CHZConverterImpl::IsInThisCharacterSetL(
1.520 + TBool& aSetToTrue,
1.521 + TInt& aConfidenceLevel,
1.522 + const TDesC8& aSample)
1.523 + {
1.524 + aSetToTrue=ETrue;
1.525 + TInt sampleLength = aSample.Length();
1.526 + TInt pairOfTilde=0;
1.527 + TInt occrenceOfNonHz=0;
1.528 + aConfidenceLevel = 50;
1.529 + // Hz encoding uses escape sequences...
1.530 + for (TInt i = 0; i < sampleLength; ++i)
1.531 + {
1.532 + if (aSample[i]>0x7e)
1.533 + occrenceOfNonHz++;
1.534 + if (aSample[i]==0x7e)
1.535 + {
1.536 + TInt increment1 = i+1;
1.537 + if (increment1 >= sampleLength)
1.538 + break;
1.539 + if ((aSample[increment1] == 0x7b)||(aSample[increment1] == 0x7d)||(aSample[increment1] == 0x7e))
1.540 + {
1.541 + pairOfTilde++;
1.542 + i++;
1.543 + }
1.544 + }
1.545 + }//for
1.546 + if (sampleLength)
1.547 + {
1.548 + TInt occurrenceOftilde =2*pairOfTilde*100/sampleLength;
1.549 + aConfidenceLevel=aConfidenceLevel-Max(0,(4-occurrenceOftilde));
1.550 + aConfidenceLevel += occurrenceOftilde;
1.551 + aConfidenceLevel -= ((occrenceOfNonHz*100)/sampleLength);
1.552 + }
1.553 + return ETrue;
1.554 + }
1.555 +
1.556 +CHZConverterImpl* CHZConverterImpl::NewL()
1.557 + {
1.558 + CHZConverterImpl* self = new(ELeave) CHZConverterImpl();
1.559 + return self;
1.560 + }
1.561 +
1.562 +CHZConverterImpl::~CHZConverterImpl()
1.563 + {
1.564 + }
1.565 +
1.566 +CHZConverterImpl::CHZConverterImpl()
1.567 + {
1.568 + }
1.569 +
1.570 +const TImplementationProxy ImplementationTable[] =
1.571 + {
1.572 + IMPLEMENTATION_PROXY_ENTRY(0x10006065, CHZConverterImpl::NewL)
1.573 + };
1.574 +
1.575 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
1.576 + {
1.577 + aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
1.578 +
1.579 + return ImplementationTable;
1.580 + }