1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/convutils/convutils.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,900 @@
1.4 +/*
1.5 +* Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include <e32std.h>
1.23 +#include <e32base.h>
1.24 +#include <charconv.h>
1.25 +#include <convutils.h>
1.26 +
1.27 +const TInt KNoPreviousCharacterSet=-1;
1.28 +const TInt KDefaultCharacterSet = 0;
1.29 +const TUint KControlCharacterEscape=0x1b;
1.30 +
1.31 +#if defined(_DEBUG)
1.32 +//It will cause performance problem with small KMaximumLengthOfIntermediateBuffer.
1.33 +//Please use release version to test performance cases.
1.34 +const TInt KMaximumLengthOfIntermediateBuffer=5;
1.35 +#else
1.36 +const TInt KMaximumLengthOfIntermediateBuffer=150;
1.37 +#endif
1.38 +
1.39 +struct SCnvConversionData;
1.40 +
1.41 +_LIT(KLitPanicText, "CONVUTILS");
1.42 +
1.43 +enum TPanic
1.44 + {
1.45 + EPanicBadInputConversionFlags1=1,
1.46 + EPanicBadInputConversionFlags2,
1.47 + EPanicBadInputConversionFlags3,
1.48 + EPanicBadNumberOfUnicodeElementsConsumed,
1.49 + EPanicAppendFlagViolated,
1.50 + EPanicBadNumberOfUnicodeCharactersConverted,
1.51 + EPanicBadNumberOfCharactersThatDroppedOut,
1.52 + EPanicLoopCounterOverRun1,
1.53 + EPanicLoopCounterOverRun2,
1.54 + EPanicDescriptorNotWholeNumberOfCharacters1,
1.55 + EPanicDescriptorNotWholeNumberOfCharacters2,
1.56 + EPanicDescriptorNotWholeNumberOfCharacters3,
1.57 + EPanicDescriptorNotWholeNumberOfCharacters4,
1.58 + EPanicBadStartOfNextEscapeSequence,
1.59 + EPanicInconsistentNumberOfForeignBytesRemaining,
1.60 + EPanicBadLengthOfRunToConvert1,
1.61 + EPanicBadLengthOfRunToConvert2,
1.62 + EPanicBadMethodPointer,
1.63 + EPanicBadMethodData1,
1.64 + EPanicBadMethodData2,
1.65 + EPanicBadMethodData3,
1.66 + EPanicBadMethodData4,
1.67 + EPanicBadNumberOfCharacterSets,
1.68 + EPanicBadConversionDataPointer1,
1.69 + EPanicBadConversionDataPointer2,
1.70 + EPanicBadConversionDataPointer3,
1.71 + EPanicBadFunctionPointer1,
1.72 + EPanicBadFunctionPointer2,
1.73 + EPanicBadFunctionPointer3,
1.74 + EPanicBadEscapeSequencePointer1,
1.75 + EPanicBadEscapeSequencePointer2,
1.76 + EPanicBadNumberOfStates,
1.77 + EPanicBadEscapeSequenceStart,
1.78 + EPanicBadNumberOfMethods,
1.79 + EPanicBadSurrogatePair1,
1.80 + EPanicBadSurrogatePair2,
1.81 + EPanicBadRemainderOfForeign,
1.82 + EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet
1.83 + };
1.84 +
1.85 +LOCAL_C void Panic(TPanic aPanic)
1.86 + {
1.87 + User::Panic(KLitPanicText, aPanic);
1.88 + }
1.89 +
1.90 +/** Converts Unicode text into a complex foreign character set encoding. This
1.91 +is an encoding which cannot be converted simply by calling
1.92 +CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS)
1.93 +or non-modal (e.g. Shift-JIS).
1.94 +
1.95 +The Unicode text specified in aUnicode is converted using the array of
1.96 +conversion data objects (aArrayOfCharacterSets) provided by the plug-in for
1.97 +the complex character set encoding, and the converted text is returned in
1.98 +aForeign. Any existing contents in aForeign are overwritten.
1.99 +
1.100 +Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character
1.101 +sets can be specified. aUnicode is converted using the first character conversion
1.102 +data object in the array. When a character is found which cannot be converted
1.103 +using that data, each character set in the array is tried in turn. If it cannot
1.104 +be converted using any object in the array, the index of the character is
1.105 +appended to aIndicesOfUnconvertibleCharacters and the character is replaced
1.106 +by aReplacementForUnconvertibleUnicodeCharacters.
1.107 +
1.108 +If it can be converted using another object in the array, that object is used
1.109 +to convert all subsequent characters until another unconvertible character
1.110 +is found.
1.111 +
1.112 +@param aDefaultEndiannessOfForeignCharacters The default endian-ness to use
1.113 +when writing the characters in the foreign character set. If an endian-ness
1.114 +for foreign characters is specified in the current conversion data object,
1.115 +then that is used instead and the value of
1.116 +aDefaultEndiannessOfForeignCharacters is ignored.
1.117 +@param aReplacementForUnconvertibleUnicodeCharacters The single character (one
1.118 +or more byte values) which is used to replace unconvertible characters.
1.119 +@param aForeign On return, contains the converted text in the non-Unicode
1.120 +character set.
1.121 +@param aUnicode The source Unicode text to be converted.
1.122 +@param aIndicesOfUnconvertibleCharacters On return, holds an ascending array
1.123 +of the indices of each Unicode character in the source text which could not
1.124 +be converted (because none of the target character sets have an equivalent
1.125 +character).
1.126 +@param aArrayOfCharacterSets Array of character conversion data objects,
1.127 +representing the character sets which comprise a complex character set
1.128 +encoding. These are used in sequence to convert the Unicode text. There must
1.129 +be at least one character set in this array and no character set may have any
1.130 +NULL member data, or a panic occurs.
1.131 +@return The number of unconverted characters left at the end of the input
1.132 +descriptor (e.g. because aForeign was not long enough to hold all the text),
1.133 +or a negative error value, as defined in CCnvCharacterSetConverter::TError. */
1.134 +EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
1.135 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.136 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.137 + TDes8& aForeign,
1.138 + const TDesC16& aUnicode,
1.139 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters,
1.140 + const TArray<SCharacterSet>& aArrayOfCharacterSets)
1.141 + {
1.142 + TUint notUsed;
1.143 + return ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters,
1.144 + aReplacementForUnconvertibleUnicodeCharacters,
1.145 + aForeign,
1.146 + aUnicode,
1.147 + aIndicesOfUnconvertibleCharacters,
1.148 + aArrayOfCharacterSets,
1.149 + notUsed,
1.150 + 0);
1.151 + }
1.152 +
1.153 +/** Converts Unicode text into a complex foreign character set encoding. This is
1.154 +an encoding which cannot be converted simply by a call to
1.155 +CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS)
1.156 +or non-modal (e.g. Shift-JIS).
1.157 +
1.158 +The Unicode text specified in aUnicode is converted using the array of conversion
1.159 +data objects (aArrayOfCharacterSets) provided by the plug-in for the complex
1.160 +character set encoding and the converted text is returned in aForeign. The
1.161 +function can either append to aForeign or overwrite its contents (if any).
1.162 +
1.163 +Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character
1.164 +sets can be specified. aUnicode is converted using the first character conversion
1.165 +data object in the array. When a character is found which cannot be converted
1.166 +using that data, each character set in the array is tried in turn. If it cannot
1.167 +be converted using any object in the array, the index of the character is
1.168 +appended to aIndicesOfUnconvertibleCharacters and the character is replaced
1.169 +by aReplacementForUnconvertibleUnicodeCharacters.
1.170 +
1.171 +If it can be converted using another object in the array, that object is used
1.172 +to convert all subsequent characters until another unconvertible character
1.173 +is found.
1.174 +
1.175 +@param aDefaultEndiannessOfForeignCharacters The default endian-ness to use
1.176 +when writing the characters in the foreign character set. If an endian-ness
1.177 +for foreign characters is specified in the current conversion data object,
1.178 +then that is used instead and the value of
1.179 +aDefaultEndiannessOfForeignCharacters is ignored.
1.180 +@param aReplacementForUnconvertibleUnicodeCharacters The single character (one
1.181 +or more byte values) which is used to replace unconvertible characters.
1.182 +@param aForeign On return, contains the converted text in the non-Unicode
1.183 +character set. This may already contain some text. If it does, and if
1.184 +aInputConversionFlags specifies EInputConversionFlagAppend, then the converted
1.185 +text is appended to this descriptor.
1.186 +@param aUnicode The source Unicode text to be converted.
1.187 +@param aIndicesOfUnconvertibleCharacters On return, holds an ascending array
1.188 +of the indices of each Unicode character in the source text which could not
1.189 +be converted (because none of the target character sets have an equivalent
1.190 +character).
1.191 +@param aArrayOfCharacterSets Array of character set data objects. These are
1.192 +used in sequence to convert the Unicode text. There must be at least one
1.193 +character set in this array and no character set may have any NULL member
1.194 +data, or a panic occurs.
1.195 +@param aOutputConversionFlags If the input descriptor ended in a truncated
1.196 +sequence, e.g. the first half only of a Unicode surrogate pair, this returns
1.197 +with the EOutputConversionFlagInputIsTruncated flag set.
1.198 +@param aInputConversionFlags Specify
1.199 +CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to
1.200 +aForeign. Specify CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable
1.201 +to prevent the function from returning the error-code EErrorIllFormedInput
1.202 +when the input descriptor consists of nothing but a truncated sequence. The
1.203 +CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter
1.204 +flag must not be set, otherwise a panic occurs.
1.205 +@return The number of unconverted characters left at the end of the input descriptor
1.206 +(e.g. because aForeign was not long enough to hold all the text), or a negative
1.207 +error value, as defined in CCnvCharacterSetConverter::TError. */
1.208 +EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
1.209 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.210 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.211 + TDes8& aForeign,
1.212 + const TDesC16& aUnicode,
1.213 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters,
1.214 + const TArray<SCharacterSet>& aArrayOfCharacterSets,
1.215 + TUint& aOutputConversionFlags,
1.216 + TUint aInputConversionFlags)
1.217 + {
1.218 + __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags1));
1.219 + CheckArrayOfCharacterSets(aArrayOfCharacterSets);
1.220 + aOutputConversionFlags=0;
1.221 + TUint internalInputConversionFlags=aInputConversionFlags;
1.222 + if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
1.223 + {
1.224 + aForeign.SetLength(0);
1.225 + internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
1.226 + }
1.227 + if (aUnicode.Length()==0)
1.228 + {
1.229 + return 0;
1.230 + }
1.231 + if (aForeign.MaxLength()==aForeign.Length()) // relies on the fact that aForeign's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
1.232 + {
1.233 + return aUnicode.Length();
1.234 + }
1.235 + TDes8* foreign=&aForeign;
1.236 + TPtr8 dummyForeign(NULL, 0, 0);
1.237 + if (aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet)
1.238 + {
1.239 + TInt dummyMaximumLength =
1.240 + aForeign.MaxLength() - aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence->Length();
1.241 + __ASSERT_ALWAYS(dummyMaximumLength >= 0,
1.242 + Panic(EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet));
1.243 + dummyForeign.Set(const_cast <TUint8*> (aForeign.Ptr()),
1.244 + aForeign.Length(),
1.245 + dummyMaximumLength);
1.246 + foreign=&dummyForeign;
1.247 + }
1.248 + const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
1.249 + TInt numberOfUnicodeElementsConsumed=0;
1.250 + internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter; // this is not just an optimization - it ensures that "foreign" doesn't get filled up too much each time CCnvCharacterSetConverter::DoConvertFromUnicode is called
1.251 + TInt previousCharacterSet = aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAssumeStartInDefaultCharacterSet?
1.252 + KDefaultCharacterSet : KNoPreviousCharacterSet;
1.253 + FOREVER
1.254 + {
1.255 + for (TInt presentCharacterSet=KDefaultCharacterSet;;)
1.256 + {
1.257 + __ASSERT_DEBUG(numberOfUnicodeElementsConsumed<=aUnicode.Length(), Panic(EPanicBadNumberOfUnicodeElementsConsumed));
1.258 + if (numberOfUnicodeElementsConsumed>=aUnicode.Length())
1.259 + {
1.260 + goto end;
1.261 + }
1.262 + const SCharacterSet& characterSet=aArrayOfCharacterSets[presentCharacterSet];
1.263 + const TInt oldNumberOfBytesInForeign=foreign->Length();
1.264 + if (numberOfUnicodeElementsConsumed>0)
1.265 + {
1.266 + internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
1.267 + }
1.268 + CCnvCharacterSetConverter::TArrayOfAscendingIndices indicesOfUnconvertibleCharacters;
1.269 + const TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(*characterSet.iConversionData, aDefaultEndiannessOfForeignCharacters, KNullDesC8, *foreign, aUnicode.Mid(numberOfUnicodeElementsConsumed), indicesOfUnconvertibleCharacters, aOutputConversionFlags, internalInputConversionFlags);
1.270 + if (returnValue<0)
1.271 + {
1.272 + return returnValue; // this is an error-code
1.273 + }
1.274 + __ASSERT_DEBUG(foreign->Length()>=oldNumberOfBytesInForeign, Panic(EPanicAppendFlagViolated));
1.275 + TInt indexOfFirstUnconvertibleCharacter;
1.276 + if (indicesOfUnconvertibleCharacters.NumberOfIndices()==0)
1.277 + {
1.278 + indexOfFirstUnconvertibleCharacter=-1;
1.279 + numberOfUnicodeElementsConsumed=aUnicode.Length()-returnValue;
1.280 + }
1.281 + else
1.282 + {
1.283 + indexOfFirstUnconvertibleCharacter=indicesOfUnconvertibleCharacters[0];
1.284 + numberOfUnicodeElementsConsumed+=indexOfFirstUnconvertibleCharacter;
1.285 + __ASSERT_DEBUG(numberOfUnicodeElementsConsumed+LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed)==aUnicode.Length()-returnValue, Panic(EPanicBadNumberOfUnicodeCharactersConverted));
1.286 + }
1.287 + if (indexOfFirstUnconvertibleCharacter!=0) // if at least one Unicode character at the start of CCnvCharacterSetConverter::DoConvertFromUnicode's input descriptor was convertible...
1.288 + {
1.289 + TBool gotoEnd = EFalse;
1.290 + if (foreign->Length()>oldNumberOfBytesInForeign)
1.291 + {
1.292 + TInt numberOfCharactersThatDroppedOut=0;
1.293 + // Insert an escape sequence if this character set is different from the last one.
1.294 + if (presentCharacterSet != previousCharacterSet)
1.295 + {
1.296 + // Insert escape sequence (if requred) in front of the last encoded run of text.
1.297 + // Note that this may cause some characters to drop out at the end.
1.298 + (*characterSet.iConvertFromIntermediateBufferInPlace)(oldNumberOfBytesInForeign, *foreign, numberOfCharactersThatDroppedOut);
1.299 + if (oldNumberOfBytesInForeign < foreign->Length())
1.300 + previousCharacterSet = presentCharacterSet;
1.301 + }
1.302 + numberOfUnicodeElementsConsumed-=numberOfCharactersThatDroppedOut;
1.303 + if (numberOfCharactersThatDroppedOut>0 )// if "foreign" has been filled to as much as it will hold...
1.304 + {
1.305 + gotoEnd = ETrue;
1.306 + }
1.307 + }
1.308 + if (indexOfFirstUnconvertibleCharacter<0) // if we've successfully converted up to the end of aUnicode (using *characterSet.iConversionData)...
1.309 + {
1.310 + gotoEnd = ETrue;
1.311 + }
1.312 + if (gotoEnd)
1.313 + {
1.314 + if ( aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet
1.315 + && previousCharacterSet != KDefaultCharacterSet
1.316 + && previousCharacterSet != KNoPreviousCharacterSet)
1.317 + {
1.318 + aForeign.SetLength(foreign->Length());
1.319 + aForeign.Append(*aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence);
1.320 + foreign=NULL;
1.321 + }
1.322 + goto end;
1.323 + }
1.324 + break;
1.325 + }
1.326 + __ASSERT_DEBUG(presentCharacterSet<numberOfCharacterSets, Panic(EPanicLoopCounterOverRun1));
1.327 + ++presentCharacterSet;
1.328 + if (presentCharacterSet>=numberOfCharacterSets)
1.329 + {
1.330 + if ((foreign->MaxLength()-foreign->Length()<aReplacementForUnconvertibleUnicodeCharacters.Length()) ||
1.331 + (aIndicesOfUnconvertibleCharacters.AppendIndex(numberOfUnicodeElementsConsumed)!=CCnvCharacterSetConverter::TArrayOfAscendingIndices::EAppendSuccessful)) // the tests must be done in this order as AppendIndex must only be called if there is room for aReplacementForUnconvertibleUnicodeCharacters
1.332 + {
1.333 + goto end;
1.334 + }
1.335 + numberOfUnicodeElementsConsumed+=LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed);
1.336 + foreign->Append(aReplacementForUnconvertibleUnicodeCharacters);
1.337 + break;
1.338 + }
1.339 + }
1.340 + }
1.341 +end:
1.342 + if (foreign!=NULL)
1.343 + {
1.344 + aForeign.SetLength(foreign->Length());
1.345 + foreign=NULL;
1.346 + }
1.347 + if ((numberOfUnicodeElementsConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
1.348 + {
1.349 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.350 + }
1.351 + return aUnicode.Length()-numberOfUnicodeElementsConsumed;
1.352 + }
1.353 +
1.354 +
1.355 +/** Inserts an escape sequence into the descriptor.
1.356 +
1.357 +This function is provided to help in the implementation of
1.358 +ConvertFromUnicode() for modal character set encodings.
1.359 +Each SCharacterSet object in the array passed to
1.360 +ConvertFromUnicode() must have its
1.361 +iConvertFromIntermediateBufferInPlace member assigned. To
1.362 +do this for a modal character set encoding, implement a function whose
1.363 +signature matches that of FConvertFromIntermediateBufferInPlace
1.364 +and which calls this function, passing all arguments unchanged, and
1.365 +specifying the character set's escape sequence and the number of bytes per
1.366 +character.
1.367 +
1.368 +@param aStartPositionInDescriptor The byte position in aDescriptor at which
1.369 +the escape sequence is inserted. If the character set uses more than one byte
1.370 +per character, this position must be the start of a character, otherwise a
1.371 +panic occurs.
1.372 +@param aDescriptor The descriptor into which the escape sequence is inserted.
1.373 +@param aNumberOfCharactersThatDroppedOut The escape sequence is inserted into
1.374 +the start of aDescriptor and any characters that need to drop out to make
1.375 +room for the escape sequence (because the descriptor's maximum length was
1.376 +not long enough) drop out from the end of the buffer. This parameter indicates
1.377 +the number of characters that needed to drop out.
1.378 +@param aEscapeSequence The escape sequence for the character set.
1.379 +@param aNumberOfBytesPerCharacter The number of bytes per character. */
1.380 +EXPORT_C void CnvUtilities::ConvertFromIntermediateBufferInPlace(
1.381 + TInt aStartPositionInDescriptor,
1.382 + TDes8& aDescriptor,
1.383 + TInt& aNumberOfCharactersThatDroppedOut,
1.384 + const TDesC8& aEscapeSequence,
1.385 + TInt aNumberOfBytesPerCharacter)
1.386 + {
1.387 + const TInt lengthOfDescriptor=aDescriptor.Length();
1.388 + __ASSERT_ALWAYS((lengthOfDescriptor-aStartPositionInDescriptor)%aNumberOfBytesPerCharacter==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters1));
1.389 + aNumberOfCharactersThatDroppedOut=(Max(0, aEscapeSequence.Length()-(aDescriptor.MaxLength()-lengthOfDescriptor))+(aNumberOfBytesPerCharacter-1))/aNumberOfBytesPerCharacter;
1.390 + const TInt lengthOfRunInCharacters=(lengthOfDescriptor-aStartPositionInDescriptor)/aNumberOfBytesPerCharacter;
1.391 + if (aNumberOfCharactersThatDroppedOut>=lengthOfRunInCharacters) // ">=" is correct (rather than ">") as if there's only room for the escape sequence we don't want to have it in the descriptor
1.392 + {
1.393 + aNumberOfCharactersThatDroppedOut=lengthOfRunInCharacters;
1.394 + aDescriptor.SetLength(aStartPositionInDescriptor);
1.395 + }
1.396 + else
1.397 + {
1.398 + aDescriptor.SetLength(lengthOfDescriptor-(aNumberOfCharactersThatDroppedOut*aNumberOfBytesPerCharacter));
1.399 + aDescriptor.Insert(aStartPositionInDescriptor, aEscapeSequence);
1.400 + }
1.401 + }
1.402 +
1.403 +
1.404 +/** Converts text from a modal foreign character set encoding into Unicode.
1.405 +
1.406 +The non-Unicode text specified in aForeign is converted using
1.407 +the array of character set conversion objects (aArrayOfStates)
1.408 +provided by the plug-in, and the converted text is returned in
1.409 +aUnicode. The function can either append to aUnicode
1.410 +or overwrite its contents (if any), depending on the input conversion flags
1.411 +specified. The first element in aArrayOfStates is taken to be
1.412 +the default mode (i.e. the mode to assume by default if there is no preceding
1.413 +escape sequence).
1.414 +
1.415 +@param aDefaultEndiannessOfForeignCharacters The default endian-ness of the
1.416 +foreign characters. If an endian-ness for foreign characters is specified
1.417 +in the conversion data, then that is used instead and the value of
1.418 +aDefaultEndiannessOfForeignCharacters is ignored.
1.419 +@param aUnicode On return, contains the text converted into Unicode.
1.420 +@param aForeign The non-Unicode source text to be converted.
1.421 +@param aState Used to store a modal character set encoding's current mode across
1.422 +multiple calls to ConvertToUnicode() on the same input descriptor. This argument
1.423 +should be passed the same object as passed to the plug-in's ConvertToUnicode()
1.424 +exported function.
1.425 +@param aNumberOfUnconvertibleCharacters On return, contains the number of
1.426 +characters in aForeign which were not converted. Characters which cannot be
1.427 +converted are output as Unicode replacement characters (0xfffd).
1.428 +@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index
1.429 +of the first byte of the first unconvertible character. For instance if the
1.430 +first character in the input descriptor (aForeign) could not be converted,
1.431 +then this parameter is set to the first byte of that character, i.e. zero.
1.432 +A negative value is returned if all the characters were converted.
1.433 +@param aArrayOfStates Array of character set conversion data objects, and their
1.434 +escape sequences ("modes"). There must be one or more modes in this array,
1.435 +none of the modes can have any NULL member data, and each mode's escape sequence
1.436 +must begin with KControlCharacterEscape (0x1b) or a panic occurs.
1.437 +@return The number of unconverted bytes left at the end of the input descriptor,
1.438 +or a negative error value, as defined in TError. */
1.439 +EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
1.440 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.441 + TDes16& aUnicode,
1.442 + const TDesC8& aForeign,
1.443 + TInt& aState,
1.444 + TInt& aNumberOfUnconvertibleCharacters,
1.445 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter,
1.446 + const TArray<SState>& aArrayOfStates)
1.447 + {
1.448 + TUint notUsed;
1.449 + return ConvertToUnicodeFromModalForeign(aDefaultEndiannessOfForeignCharacters,
1.450 + aUnicode,
1.451 + aForeign,
1.452 + aState,
1.453 + aNumberOfUnconvertibleCharacters,
1.454 + aIndexOfFirstByteOfFirstUnconvertibleCharacter,
1.455 + aArrayOfStates,
1.456 + notUsed,
1.457 + 0);
1.458 + }
1.459 +
1.460 +/** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for
1.461 +the foreign characters. If an endian-ness for foreign characters is specified
1.462 +in the conversion data, then that is used instead and the value of
1.463 +aDefaultEndiannessOfForeignCharacters is ignored.
1.464 +@param aUnicode On return, contains the text converted into Unicode.
1.465 +@param aForeign The non-Unicode source text to be converted.
1.466 +@param aState Used to store a modal character set encoding's current mode
1.467 +across multiple calls to ConvertToUnicode() on the same input descriptor. This
1.468 +argument should be passed the same object as passed to the plug-in's
1.469 +ConvertToUnicode() exported function.
1.470 +@param aNumberOfUnconvertibleCharacters On return, contains the number of
1.471 +characters in aForeign which were not converted. Characters which cannot be
1.472 +converted are output as Unicode replacement characters (0xfffd).
1.473 +@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index
1.474 +of the first byte of the first unconvertible character. For instance if the
1.475 +first character in the input descriptor (aForeign) could not be converted,
1.476 +then this parameter is set to the first byte of that character, i.e. zero.
1.477 +A negative value is returned if all the characters were converted.
1.478 +@param aArrayOfStates Array of character set conversion data objects, and their
1.479 +escape sequences. There must be one or more modes in this array, none of the
1.480 +modes can have any NULL member data, and each mode's escape sequence must
1.481 +begin with KControlCharacterEscape (0x1b) or a panic occurs.
1.482 +@param aOutputConversionFlags If the input descriptor ended in a truncated
1.483 +sequence, e.g. a part of a multi-byte character, aOutputConversionFlags
1.484 +returns with the EOutputConversionFlagInputIsTruncated flag set.
1.485 +@param aInputConversionFlags Specify
1.486 +CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to
1.487 +aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable
1.488 +to prevent the function from returning the error-code EErrorIllFormedInput
1.489 +when the input descriptor consists of nothing but a truncated sequence. The
1.490 +CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter
1.491 +flag must not be set, otherwise a panic occurs.
1.492 +@return The number of unconverted bytes left at the end of the input descriptor,
1.493 +or a negative error value, as defined in TError. */
1.494 +EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
1.495 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.496 + TDes16& aUnicode,
1.497 + const TDesC8& aForeign,
1.498 + TInt& aState,
1.499 + TInt& aNumberOfUnconvertibleCharacters,
1.500 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter,
1.501 + const TArray<SState>& aArrayOfStates,
1.502 + TUint& aOutputConversionFlags,
1.503 + TUint aInputConversionFlags)
1.504 + {
1.505 + __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags2));
1.506 + CheckArrayOfStates(aArrayOfStates);
1.507 + aNumberOfUnconvertibleCharacters=0;
1.508 + aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
1.509 + aOutputConversionFlags=0;
1.510 + TUint internalInputConversionFlags=aInputConversionFlags;
1.511 + if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
1.512 + {
1.513 + aUnicode.SetLength(0);
1.514 + internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
1.515 + }
1.516 + if (aForeign.Length()==0)
1.517 + {
1.518 + return 0;
1.519 + }
1.520 + if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
1.521 + {
1.522 + return aForeign.Length();
1.523 + }
1.524 + TPtrC8 remainderOfForeign(aForeign);
1.525 + TPtrC8 homogeneousRun;
1.526 + TInt numberOfForeignBytesConsumed=0;
1.527 + const SCnvConversionData* conversionData = NULL;
1.528 + const TInt startOfNextEscapeSequence=aForeign.Locate(KControlCharacterEscape);
1.529 + if (startOfNextEscapeSequence!=0) // if aForeign doesn't start with an escape sequence...
1.530 + {
1.531 + conversionData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): aArrayOfStates[0].iConversionData;
1.532 + if (startOfNextEscapeSequence==KErrNotFound)
1.533 + {
1.534 + homogeneousRun.Set(remainderOfForeign);
1.535 + remainderOfForeign.Set(NULL, 0);
1.536 + }
1.537 + else
1.538 + {
1.539 + __ASSERT_DEBUG(startOfNextEscapeSequence>0, Panic(EPanicBadStartOfNextEscapeSequence));
1.540 + homogeneousRun.Set(remainderOfForeign.Left(startOfNextEscapeSequence));
1.541 + remainderOfForeign.Set(remainderOfForeign.Mid(startOfNextEscapeSequence));
1.542 + }
1.543 + goto handleHomogeneousRun;
1.544 + }
1.545 + FOREVER
1.546 + {
1.547 + if (!NextHomogeneousForeignRun(conversionData, numberOfForeignBytesConsumed, homogeneousRun, remainderOfForeign, aArrayOfStates, aOutputConversionFlags))
1.548 + {
1.549 + goto end;
1.550 + }
1.551 +handleHomogeneousRun:
1.552 + if (conversionData==NULL)
1.553 + {
1.554 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.555 + }
1.556 + TInt numberOfUnconvertibleCharacters;
1.557 + TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
1.558 + const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*conversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
1.559 + if (returnValue<0)
1.560 + {
1.561 + return returnValue; // this is an error-code
1.562 + }
1.563 + if (numberOfUnconvertibleCharacters>0)
1.564 + {
1.565 + if (aNumberOfUnconvertibleCharacters==0)
1.566 + {
1.567 + aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
1.568 + }
1.569 + aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
1.570 + }
1.571 + numberOfForeignBytesConsumed+=homogeneousRun.Length();
1.572 + if (returnValue>0)
1.573 + {
1.574 + numberOfForeignBytesConsumed-=returnValue;
1.575 + goto end;
1.576 + }
1.577 + if (numberOfForeignBytesConsumed>0)
1.578 + {
1.579 + internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
1.580 + }
1.581 + __ASSERT_DEBUG(remainderOfForeign==aForeign.Mid(numberOfForeignBytesConsumed), Panic(EPanicInconsistentNumberOfForeignBytesRemaining));
1.582 + }
1.583 +end:
1.584 + if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
1.585 + {
1.586 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.587 + }
1.588 + aState=REINTERPRET_CAST(TInt, conversionData);
1.589 + return aForeign.Length()-numberOfForeignBytesConsumed;
1.590 + }
1.591 +
1.592 +
1.593 +/** Converts text from a non-modal complex character set encoding (e.g.
1.594 +Shift-JIS or EUC-JP) into Unicode.The non-Unicode text specified in
1.595 +aForeign is converted using the array of character set
1.596 +conversion methods (aArrayOfMethods) provided by the
1.597 +plug-in, and the converted text is returned in aUnicode.
1.598 +Overwrites the contents, if any, of aUnicode.
1.599 +
1.600 +@param aDefaultEndiannessOfForeignCharacters The default endian-ness of the
1.601 +foreign characters. If an endian-ness for foreign characters is specified
1.602 +in the conversion data, then that is used instead and the value of
1.603 +aDefaultEndiannessOfForeignCharacters is ignored.
1.604 +@param aUnicode On return, contains the text converted into Unicode.
1.605 +@param aForeign The non-Unicode source text to be converted.
1.606 +@param aNumberOfUnconvertibleCharacters On return, contains the number of
1.607 +characters in aForeign which were not converted. Characters which cannot be
1.608 +converted are output as Unicode replacement characters (0xfffd).
1.609 +@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index
1.610 +of the first byte of the first unconvertible character. For instance if the
1.611 +first character in the input descriptor (aForeign) could not be converted,
1.612 +then this parameter is set to the first byte of that character, i.e. zero.
1.613 +A negative value is returned if all the characters were converted.
1.614 +@param aArrayOfMethods Array of conversion methods. There must be one or more
1.615 +methods in this array and none of the methods in the array can have any NULL
1.616 +member data or a panic occurs.
1.617 +@return The number of unconverted bytes left at the end of the input descriptor,
1.618 +or a negative error value, as defined in TError. */
1.619 +EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
1.620 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.621 + TDes16& aUnicode,
1.622 + const TDesC8& aForeign,
1.623 + TInt& aNumberOfUnconvertibleCharacters,
1.624 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter,
1.625 + const TArray<SMethod>& aArrayOfMethods)
1.626 + {
1.627 + TUint notUsed;
1.628 + return ConvertToUnicodeFromHeterogeneousForeign(
1.629 + aDefaultEndiannessOfForeignCharacters,
1.630 + aUnicode,
1.631 + aForeign,
1.632 + aNumberOfUnconvertibleCharacters,
1.633 + aIndexOfFirstByteOfFirstUnconvertibleCharacter,
1.634 + aArrayOfMethods,
1.635 + notUsed,
1.636 + 0);
1.637 + }
1.638 +
1.639 +/** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for the
1.640 +foreign characters. If an endian-ness for foreign characters is specified
1.641 +in the conversion data, then that is used instead and the value of
1.642 +aDefaultEndiannessOfForeignCharacters is ignored.
1.643 +@param aUnicode On return, contains the text converted into Unicode.
1.644 +@param aForeign The non-Unicode source text to be converted.
1.645 +@param aNumberOfUnconvertibleCharacters On return, contains the number of
1.646 +characters in aForeign which were not converted. Characters which cannot be
1.647 +converted are output as Unicode replacement characters (0xfffd).
1.648 +@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index
1.649 +of the first byte of the first unconvertible character. For instance if the
1.650 +first character in the input descriptor (aForeign) could not be converted,
1.651 +then this parameter is set to the first byte of that character, i.e. zero.
1.652 +A negative value is returned if all the characters were converted.
1.653 +@param aArrayOfMethods Array of conversion methods. There must be one or more
1.654 +methods in this array and none of the methods in the array can have any NULL
1.655 +member data or a panic occurs.
1.656 +@param aOutputConversionFlags If the input descriptor ended in a truncated
1.657 +sequence, e.g. a part of a multi-byte character, aOutputConversionFlags
1.658 +returns with the EOutputConversionFlagInputIsTruncated flag set.
1.659 +@param aInputConversionFlags Specify
1.660 +CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to
1.661 +aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable
1.662 +to prevent the function from returning the error-code EErrorIllFormedInput
1.663 +when the input descriptor consists of nothing but a truncated sequence. The
1.664 +CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter
1.665 +flag must not be set, otherwise a panic occurs.
1.666 +@return The number of unconverted bytes left at the end of the input descriptor,
1.667 +or a negative error value, as defined in TError. */
1.668 +EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
1.669 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.670 + TDes16& aUnicode,
1.671 + const TDesC8& aForeign,
1.672 + TInt& aNumberOfUnconvertibleCharacters,
1.673 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter,
1.674 + const TArray<SMethod>& aArrayOfMethods,
1.675 + TUint& aOutputConversionFlags,
1.676 + TUint aInputConversionFlags)
1.677 + {
1.678 + __ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags3));
1.679 + CheckArrayOfMethods(aArrayOfMethods);
1.680 + aNumberOfUnconvertibleCharacters=0;
1.681 + aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
1.682 + aOutputConversionFlags=0;
1.683 + TUint internalInputConversionFlags=aInputConversionFlags;
1.684 + if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
1.685 + {
1.686 + aUnicode.SetLength(0);
1.687 + internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
1.688 + }
1.689 + if (aForeign.Length()==0)
1.690 + {
1.691 + return 0;
1.692 + }
1.693 + if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
1.694 + {
1.695 + return aForeign.Length();
1.696 + }
1.697 + const TInt numberOfMethods=aArrayOfMethods.Count();
1.698 + TPtrC8 remainderOfForeign(aForeign);
1.699 + TInt numberOfForeignBytesConsumed=0;
1.700 + FOREVER
1.701 + {
1.702 + TInt lengthOfRunToConvert=0;
1.703 + const SMethod* method=NULL;
1.704 + for (TInt i=0;;)
1.705 + {
1.706 + method=&aArrayOfMethods[i];
1.707 + __ASSERT_DEBUG(method!=NULL, Panic(EPanicBadMethodPointer));
1.708 + lengthOfRunToConvert=(*method->iNumberOfBytesAbleToConvert)(remainderOfForeign);
1.709 + if (lengthOfRunToConvert<0)
1.710 + {
1.711 + return lengthOfRunToConvert; // this is an error-code
1.712 + }
1.713 + if (lengthOfRunToConvert>0)
1.714 + {
1.715 + break;
1.716 + }
1.717 + __ASSERT_DEBUG(i<numberOfMethods, Panic(EPanicLoopCounterOverRun2));
1.718 + ++i;
1.719 + if (i>=numberOfMethods)
1.720 + {
1.721 + aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
1.722 + goto end;
1.723 + }
1.724 + }
1.725 + TBuf8<KMaximumLengthOfIntermediateBuffer> intermediateBuffer;
1.726 + const TInt maximumUsableLengthOfIntermediateBuffer=ReduceToNearestMultipleOf(KMaximumLengthOfIntermediateBuffer, method->iNumberOfBytesPerCharacter);
1.727 + FOREVER
1.728 + {
1.729 + const TInt numberOfForeignBytesConsumedThisTime=Min(lengthOfRunToConvert, maximumUsableLengthOfIntermediateBuffer);
1.730 + intermediateBuffer=remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime);
1.731 + __ASSERT_DEBUG((numberOfForeignBytesConsumedThisTime%method->iNumberOfBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters2));
1.732 + (*method->iConvertToIntermediateBufferInPlace)(intermediateBuffer);
1.733 + __ASSERT_DEBUG((intermediateBuffer.Length()%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters3));
1.734 + __ASSERT_DEBUG((intermediateBuffer.Length()/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter==numberOfForeignBytesConsumedThisTime, Panic(EPanicBadMethodData1));
1.735 + TInt numberOfUnconvertibleCharacters;
1.736 + TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
1.737 + const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*method->iConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, intermediateBuffer, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
1.738 + if (returnValue<0)
1.739 + {
1.740 + return returnValue; // this is an error-code
1.741 + }
1.742 + if (numberOfUnconvertibleCharacters>0)
1.743 + {
1.744 + if (aNumberOfUnconvertibleCharacters==0)
1.745 + {
1.746 + aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
1.747 + }
1.748 + aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
1.749 + }
1.750 + numberOfForeignBytesConsumed+=numberOfForeignBytesConsumedThisTime;
1.751 + if (returnValue>0)
1.752 + {
1.753 + __ASSERT_DEBUG((returnValue%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters4));
1.754 + numberOfForeignBytesConsumed-=(returnValue/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter;
1.755 + goto end;
1.756 + }
1.757 + if (numberOfForeignBytesConsumed>0)
1.758 + {
1.759 + internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
1.760 + }
1.761 + remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed));
1.762 + lengthOfRunToConvert-=numberOfForeignBytesConsumedThisTime;
1.763 + __ASSERT_DEBUG(lengthOfRunToConvert>=0, Panic(EPanicBadLengthOfRunToConvert2));
1.764 + if (lengthOfRunToConvert<=0)
1.765 + {
1.766 + break;
1.767 + }
1.768 + }
1.769 + }
1.770 +end:
1.771 + if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
1.772 + {
1.773 + return CCnvCharacterSetConverter::EErrorIllFormedInput;
1.774 + }
1.775 + return aForeign.Length()-numberOfForeignBytesConsumed;
1.776 + }
1.777 +
1.778 +void CnvUtilities::CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets)
1.779 + {
1.780 + const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
1.781 + __ASSERT_ALWAYS(numberOfCharacterSets>0, Panic(EPanicBadNumberOfCharacterSets));
1.782 + for (TInt i=0; i<numberOfCharacterSets; ++i)
1.783 + {
1.784 + const SCharacterSet& characterSet=aArrayOfCharacterSets[i];
1.785 + __ASSERT_ALWAYS(characterSet.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer1));
1.786 + __ASSERT_ALWAYS(characterSet.iConvertFromIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer1));
1.787 + __ASSERT_ALWAYS(characterSet.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer1));
1.788 + }
1.789 + }
1.790 +
1.791 +void CnvUtilities::CheckArrayOfStates(const TArray<SState>& aArrayOfStates)
1.792 + {
1.793 + const TInt numberOfStates=aArrayOfStates.Count();
1.794 + __ASSERT_ALWAYS(numberOfStates>0, Panic(EPanicBadNumberOfStates));
1.795 + for (TInt i=0; i<numberOfStates; ++i)
1.796 + {
1.797 + const SState& state=aArrayOfStates[i];
1.798 + __ASSERT_ALWAYS(state.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer2));
1.799 + __ASSERT_ALWAYS((*state.iEscapeSequence)[0]==KControlCharacterEscape, Panic(EPanicBadEscapeSequenceStart));
1.800 + __ASSERT_ALWAYS(state.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer2));
1.801 + }
1.802 + }
1.803 +
1.804 +void CnvUtilities::CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods)
1.805 + {
1.806 + const TInt numberOfMethods=aArrayOfMethods.Count();
1.807 + __ASSERT_ALWAYS(numberOfMethods>0, Panic(EPanicBadNumberOfMethods));
1.808 + for (TInt i=0; i<numberOfMethods; ++i)
1.809 + {
1.810 + const SMethod& method=aArrayOfMethods[i];
1.811 + __ASSERT_ALWAYS(method.iNumberOfBytesAbleToConvert!=NULL, Panic(EPanicBadFunctionPointer2));
1.812 + __ASSERT_ALWAYS(method.iConvertToIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer3));
1.813 + __ASSERT_ALWAYS(method.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer3));
1.814 + __ASSERT_ALWAYS(method.iNumberOfBytesPerCharacter>0, Panic(EPanicBadMethodData2));
1.815 + __ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter>0, Panic(EPanicBadMethodData3));
1.816 + __ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter<=method.iNumberOfBytesPerCharacter, Panic(EPanicBadMethodData4));
1.817 + }
1.818 + }
1.819 +
1.820 +TInt CnvUtilities::LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex)
1.821 + {
1.822 + const TUint unicodeCharacter=aUnicode[aIndex];
1.823 + if ((unicodeCharacter>=0xd800) && (unicodeCharacter<=0xdbff)) // if the unicode character is the first half of a surrogate-pair...
1.824 + {
1.825 + __ASSERT_DEBUG(aIndex+1<aUnicode.Length(), Panic(EPanicBadSurrogatePair1));
1.826 +#if defined(_DEBUG)
1.827 + const TUint secondHalfOfSurrogatePair=aUnicode[aIndex+1];
1.828 +#endif
1.829 + __ASSERT_DEBUG((secondHalfOfSurrogatePair>=0xdc00) && (secondHalfOfSurrogatePair<=0xdfff), Panic(EPanicBadSurrogatePair2)); // this can be asserted as CCnvCharacterSetConverter::DoConvertFromUnicode should have returned an error value if this was a bad surrogate pair
1.830 + return 2;
1.831 + }
1.832 + return 1;
1.833 + }
1.834 +
1.835 +TBool CnvUtilities::NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags)
1.836 + {
1.837 + __ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), Panic(EPanicBadRemainderOfForeign));
1.838 + FOREVER
1.839 + {
1.840 + if (aRemainderOfForeign.Length()==0)
1.841 + {
1.842 + return EFalse;
1.843 + }
1.844 + const TInt numberOfStates=aArrayOfStates.Count();
1.845 + TInt i;
1.846 + for (i=0; i<numberOfStates; ++i)
1.847 + {
1.848 + const SState& state=aArrayOfStates[i];
1.849 + if (MatchesEscapeSequence(aNumberOfForeignBytesConsumed, aHomogeneousRun, aRemainderOfForeign, *state.iEscapeSequence))
1.850 + {
1.851 + aConversionData=state.iConversionData;
1.852 + goto foundState;
1.853 + }
1.854 + }
1.855 + for (i=0; i<numberOfStates; ++i)
1.856 + {
1.857 + if (IsStartOf(aRemainderOfForeign, *aArrayOfStates[i].iEscapeSequence))
1.858 + {
1.859 + // aRemainderOfForeign ends with a truncated escape sequence, so ConvertToUnicode cannot convert any more
1.860 + aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
1.861 + return EFalse;
1.862 + }
1.863 + }
1.864 + // force ConvertToUnicode to return CCnvCharacterSetConverter::EErrorIllFormedInput
1.865 + aConversionData=NULL;
1.866 + return ETrue;
1.867 +foundState:
1.868 + if (aHomogeneousRun.Length()>0)
1.869 + {
1.870 + return ETrue;
1.871 + }
1.872 + }
1.873 + }
1.874 +
1.875 +TBool CnvUtilities::MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence)
1.876 + {
1.877 + const TInt lengthOfEscapeSequence=aEscapeSequence.Length();
1.878 + if (IsStartOf(aEscapeSequence, aRemainderOfForeign))
1.879 + {
1.880 + aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence));
1.881 + const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape);
1.882 + if (startOfNextEscapeSequence==KErrNotFound)
1.883 + {
1.884 + aHomogeneousRun.Set(aRemainderOfForeign);
1.885 + aRemainderOfForeign.Set(NULL, 0);
1.886 + }
1.887 + else
1.888 + {
1.889 + aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence));
1.890 + aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence));
1.891 + }
1.892 + aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence;
1.893 + return ETrue;
1.894 + }
1.895 + return EFalse;
1.896 + }
1.897 +
1.898 +TBool CnvUtilities::IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor)
1.899 + {
1.900 + const TInt lengthOfStart=aStart.Length();
1.901 + return (aPotentiallyLongerDescriptor.Length()>=lengthOfStart) && (aPotentiallyLongerDescriptor.Left(lengthOfStart)==aStart);
1.902 + }
1.903 +