os/textandloc/charconvfw/charconv_fw/src/convutils/convutils.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconv_fw/src/convutils/convutils.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,900 @@
     1.4 +/*
     1.5 +* Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <e32base.h>
    1.24 +#include <charconv.h>
    1.25 +#include <convutils.h>
    1.26 +                   
    1.27 +const TInt KNoPreviousCharacterSet=-1;
    1.28 +const TInt KDefaultCharacterSet = 0;
    1.29 +const TUint KControlCharacterEscape=0x1b;
    1.30 +
    1.31 +#if defined(_DEBUG)
    1.32 +//It will cause performance problem with small KMaximumLengthOfIntermediateBuffer.
    1.33 +//Please use release version to test performance cases.
    1.34 +const TInt KMaximumLengthOfIntermediateBuffer=5;
    1.35 +#else
    1.36 +const TInt KMaximumLengthOfIntermediateBuffer=150;
    1.37 +#endif
    1.38 +
    1.39 +struct SCnvConversionData;
    1.40 +
    1.41 +_LIT(KLitPanicText, "CONVUTILS");
    1.42 +
    1.43 +enum TPanic
    1.44 +	{
    1.45 +	EPanicBadInputConversionFlags1=1,
    1.46 +	EPanicBadInputConversionFlags2,
    1.47 +	EPanicBadInputConversionFlags3,
    1.48 +	EPanicBadNumberOfUnicodeElementsConsumed,
    1.49 +	EPanicAppendFlagViolated,
    1.50 +	EPanicBadNumberOfUnicodeCharactersConverted,
    1.51 +	EPanicBadNumberOfCharactersThatDroppedOut,
    1.52 +	EPanicLoopCounterOverRun1,
    1.53 +	EPanicLoopCounterOverRun2,
    1.54 +	EPanicDescriptorNotWholeNumberOfCharacters1,
    1.55 +	EPanicDescriptorNotWholeNumberOfCharacters2,
    1.56 +	EPanicDescriptorNotWholeNumberOfCharacters3,
    1.57 +	EPanicDescriptorNotWholeNumberOfCharacters4,
    1.58 +	EPanicBadStartOfNextEscapeSequence,
    1.59 +	EPanicInconsistentNumberOfForeignBytesRemaining,
    1.60 +	EPanicBadLengthOfRunToConvert1,
    1.61 +	EPanicBadLengthOfRunToConvert2,
    1.62 +	EPanicBadMethodPointer,
    1.63 +	EPanicBadMethodData1,
    1.64 +	EPanicBadMethodData2,
    1.65 +	EPanicBadMethodData3,
    1.66 +	EPanicBadMethodData4,
    1.67 +	EPanicBadNumberOfCharacterSets,
    1.68 +	EPanicBadConversionDataPointer1,
    1.69 +	EPanicBadConversionDataPointer2,
    1.70 +	EPanicBadConversionDataPointer3,
    1.71 +	EPanicBadFunctionPointer1,
    1.72 +	EPanicBadFunctionPointer2,
    1.73 +	EPanicBadFunctionPointer3,
    1.74 +	EPanicBadEscapeSequencePointer1,
    1.75 +	EPanicBadEscapeSequencePointer2,
    1.76 +	EPanicBadNumberOfStates,
    1.77 +	EPanicBadEscapeSequenceStart,
    1.78 +	EPanicBadNumberOfMethods,
    1.79 +	EPanicBadSurrogatePair1,
    1.80 +	EPanicBadSurrogatePair2,
    1.81 +	EPanicBadRemainderOfForeign,
    1.82 +	EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet
    1.83 +	};
    1.84 +
    1.85 +LOCAL_C void Panic(TPanic aPanic)
    1.86 +	{
    1.87 +	User::Panic(KLitPanicText, aPanic);
    1.88 +	}
    1.89 +
    1.90 +/** Converts Unicode text into a complex foreign character set encoding. This 
    1.91 +is an encoding which cannot be converted simply by calling 
    1.92 +CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) 
    1.93 +or non-modal (e.g. Shift-JIS).
    1.94 +
    1.95 +The Unicode text specified in aUnicode is converted using the array of 
    1.96 +conversion data objects (aArrayOfCharacterSets) provided by the plug-in for 
    1.97 +the complex character set encoding, and the converted text is returned in 
    1.98 +aForeign. Any existing contents in aForeign are overwritten.
    1.99 +
   1.100 +Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character 
   1.101 +sets can be specified. aUnicode is converted using the first character conversion 
   1.102 +data object in the array. When a character is found which cannot be converted 
   1.103 +using that data, each character set in the array is tried in turn. If it cannot 
   1.104 +be converted using any object in the array, the index of the character is 
   1.105 +appended to aIndicesOfUnconvertibleCharacters and the character is replaced 
   1.106 +by aReplacementForUnconvertibleUnicodeCharacters.
   1.107 +
   1.108 +If it can be converted using another object in the array, that object is used 
   1.109 +to convert all subsequent characters until another unconvertible character 
   1.110 +is found.
   1.111 +
   1.112 +@param aDefaultEndiannessOfForeignCharacters The default endian-ness to use 
   1.113 +when writing the characters in the foreign character set. If an endian-ness 
   1.114 +for foreign characters is specified in the current conversion data object, 
   1.115 +then that is used instead and the value of 
   1.116 +aDefaultEndiannessOfForeignCharacters is ignored. 
   1.117 +@param aReplacementForUnconvertibleUnicodeCharacters The single character (one 
   1.118 +or more byte values) which is used to replace unconvertible characters. 
   1.119 +@param aForeign On return, contains the converted text in the non-Unicode 
   1.120 +character set. 
   1.121 +@param aUnicode The source Unicode text to be converted. 
   1.122 +@param aIndicesOfUnconvertibleCharacters On return, holds an ascending array 
   1.123 +of the indices of each Unicode character in the source text which could not 
   1.124 +be converted (because none of the target character sets have an equivalent 
   1.125 +character). 
   1.126 +@param aArrayOfCharacterSets Array of character conversion data objects, 
   1.127 +representing the character sets which comprise a complex character set 
   1.128 +encoding. These are used in sequence to convert the Unicode text. There must 
   1.129 +be at least one character set in this array and no character set may have any 
   1.130 +NULL member data, or a panic occurs. 
   1.131 +@return The number of unconverted characters left at the end of the input 
   1.132 +descriptor (e.g. because aForeign was not long enough to hold all the text), 
   1.133 +or a negative error value, as defined in CCnvCharacterSetConverter::TError. */
   1.134 +EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
   1.135 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.136 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
   1.137 +		TDes8& aForeign, 
   1.138 +		const TDesC16& aUnicode, 
   1.139 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
   1.140 +		const TArray<SCharacterSet>& aArrayOfCharacterSets)
   1.141 +	{
   1.142 +	TUint notUsed;
   1.143 +	return ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, 
   1.144 +								aReplacementForUnconvertibleUnicodeCharacters, 
   1.145 +								aForeign, 
   1.146 +								aUnicode, 
   1.147 +								aIndicesOfUnconvertibleCharacters, 
   1.148 +								aArrayOfCharacterSets, 
   1.149 +								notUsed, 
   1.150 +								0);
   1.151 +	}
   1.152 +
   1.153 +/** Converts Unicode text into a complex foreign character set encoding. This is 
   1.154 +an encoding which cannot be converted simply by a call to 
   1.155 +CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) 
   1.156 +or non-modal (e.g. Shift-JIS).
   1.157 +
   1.158 +The Unicode text specified in aUnicode is converted using the array of conversion 
   1.159 +data objects (aArrayOfCharacterSets) provided by the plug-in for the complex 
   1.160 +character set encoding and the converted text is returned in aForeign. The 
   1.161 +function can either append to aForeign or overwrite its contents (if any).
   1.162 +
   1.163 +Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character 
   1.164 +sets can be specified. aUnicode is converted using the first character conversion 
   1.165 +data object in the array. When a character is found which cannot be converted 
   1.166 +using that data, each character set in the array is tried in turn. If it cannot 
   1.167 +be converted using any object in the array, the index of the character is 
   1.168 +appended to aIndicesOfUnconvertibleCharacters and the character is replaced 
   1.169 +by aReplacementForUnconvertibleUnicodeCharacters.
   1.170 +
   1.171 +If it can be converted using another object in the array, that object is used 
   1.172 +to convert all subsequent characters until another unconvertible character 
   1.173 +is found.
   1.174 +
   1.175 +@param aDefaultEndiannessOfForeignCharacters The default endian-ness to use 
   1.176 +when writing the characters in the foreign character set. If an endian-ness 
   1.177 +for foreign characters is specified in the current conversion data object, 
   1.178 +then that is used instead and the value of 
   1.179 +aDefaultEndiannessOfForeignCharacters is ignored. 
   1.180 +@param aReplacementForUnconvertibleUnicodeCharacters The single character (one 
   1.181 +or more byte values) which is used to replace unconvertible characters. 
   1.182 +@param aForeign On return, contains the converted text in the non-Unicode 
   1.183 +character set. This may already contain some text. If it does, and if 
   1.184 +aInputConversionFlags specifies EInputConversionFlagAppend, then the converted 
   1.185 +text is appended to this descriptor. 
   1.186 +@param aUnicode The source Unicode text to be converted. 
   1.187 +@param aIndicesOfUnconvertibleCharacters On return, holds an ascending array 
   1.188 +of the indices of each Unicode character in the source text which could not 
   1.189 +be converted (because none of the target character sets have an equivalent 
   1.190 +character). 
   1.191 +@param aArrayOfCharacterSets Array of character set data objects. These are 
   1.192 +used in sequence to convert the Unicode text. There must be at least one 
   1.193 +character set in this array and no character set may have any NULL member 
   1.194 +data, or a panic occurs. 
   1.195 +@param aOutputConversionFlags If the input descriptor ended in a truncated 
   1.196 +sequence, e.g. the first half only of a Unicode surrogate pair, this returns 
   1.197 +with the EOutputConversionFlagInputIsTruncated flag set. 
   1.198 +@param aInputConversionFlags Specify 
   1.199 +CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
   1.200 +aForeign. Specify CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
   1.201 +to prevent the function from returning the error-code EErrorIllFormedInput 
   1.202 +when the input descriptor consists of nothing but a truncated sequence. The 
   1.203 +CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
   1.204 +flag must not be set, otherwise a panic occurs. 
   1.205 +@return The number of unconverted characters left at the end of the input descriptor 
   1.206 +(e.g. because aForeign was not long enough to hold all the text), or a negative 
   1.207 +error value, as defined in CCnvCharacterSetConverter::TError. */
   1.208 +EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
   1.209 +				CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.210 +				const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
   1.211 +				TDes8& aForeign, 
   1.212 +				const TDesC16& aUnicode, 
   1.213 +				CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
   1.214 +				const TArray<SCharacterSet>& aArrayOfCharacterSets, 
   1.215 +				TUint& aOutputConversionFlags, 
   1.216 +				TUint aInputConversionFlags)
   1.217 +	{
   1.218 +	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags1));
   1.219 +	CheckArrayOfCharacterSets(aArrayOfCharacterSets);
   1.220 +	aOutputConversionFlags=0;
   1.221 +	TUint internalInputConversionFlags=aInputConversionFlags;
   1.222 +	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
   1.223 +		{
   1.224 +		aForeign.SetLength(0);
   1.225 +		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
   1.226 +		}
   1.227 +	if (aUnicode.Length()==0)
   1.228 +		{
   1.229 +		return 0;
   1.230 +		}
   1.231 +	if (aForeign.MaxLength()==aForeign.Length()) // relies on the fact that aForeign's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
   1.232 +		{
   1.233 +		return aUnicode.Length();
   1.234 +		}
   1.235 +	TDes8* foreign=&aForeign;
   1.236 +	TPtr8 dummyForeign(NULL, 0, 0);
   1.237 +	if (aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet)
   1.238 +		{
   1.239 +		TInt dummyMaximumLength =
   1.240 +			aForeign.MaxLength() - aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence->Length();
   1.241 +		__ASSERT_ALWAYS(dummyMaximumLength >= 0, 
   1.242 +			Panic(EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet));
   1.243 +		dummyForeign.Set(const_cast <TUint8*> (aForeign.Ptr()),
   1.244 +						 aForeign.Length(), 
   1.245 +						 dummyMaximumLength);
   1.246 +		foreign=&dummyForeign;
   1.247 +		}
   1.248 +	const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
   1.249 +	TInt numberOfUnicodeElementsConsumed=0;
   1.250 +	internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter; // this is not just an optimization - it ensures that "foreign" doesn't get filled up too much each time CCnvCharacterSetConverter::DoConvertFromUnicode is called
   1.251 +    TInt previousCharacterSet = aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAssumeStartInDefaultCharacterSet?
   1.252 +        KDefaultCharacterSet : KNoPreviousCharacterSet;
   1.253 +	FOREVER
   1.254 +		{
   1.255 +		for (TInt presentCharacterSet=KDefaultCharacterSet;;)
   1.256 +			{
   1.257 +			__ASSERT_DEBUG(numberOfUnicodeElementsConsumed<=aUnicode.Length(), Panic(EPanicBadNumberOfUnicodeElementsConsumed));
   1.258 +			if (numberOfUnicodeElementsConsumed>=aUnicode.Length())
   1.259 +				{
   1.260 +				goto end;
   1.261 +				}
   1.262 +			const SCharacterSet& characterSet=aArrayOfCharacterSets[presentCharacterSet];
   1.263 +			const TInt oldNumberOfBytesInForeign=foreign->Length();
   1.264 +			if (numberOfUnicodeElementsConsumed>0)
   1.265 +				{
   1.266 +				internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
   1.267 +				}
   1.268 +			CCnvCharacterSetConverter::TArrayOfAscendingIndices indicesOfUnconvertibleCharacters;
   1.269 +			const TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(*characterSet.iConversionData, aDefaultEndiannessOfForeignCharacters, KNullDesC8, *foreign, aUnicode.Mid(numberOfUnicodeElementsConsumed), indicesOfUnconvertibleCharacters, aOutputConversionFlags, internalInputConversionFlags);
   1.270 +			if (returnValue<0)
   1.271 +				{
   1.272 +				return returnValue; // this is an error-code
   1.273 +				}
   1.274 +			__ASSERT_DEBUG(foreign->Length()>=oldNumberOfBytesInForeign, Panic(EPanicAppendFlagViolated));
   1.275 +			TInt indexOfFirstUnconvertibleCharacter;
   1.276 +			if (indicesOfUnconvertibleCharacters.NumberOfIndices()==0)
   1.277 +				{
   1.278 +				indexOfFirstUnconvertibleCharacter=-1;
   1.279 +				numberOfUnicodeElementsConsumed=aUnicode.Length()-returnValue;
   1.280 +				}
   1.281 +			else
   1.282 +				{
   1.283 +				indexOfFirstUnconvertibleCharacter=indicesOfUnconvertibleCharacters[0];
   1.284 +				numberOfUnicodeElementsConsumed+=indexOfFirstUnconvertibleCharacter;
   1.285 +				__ASSERT_DEBUG(numberOfUnicodeElementsConsumed+LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed)==aUnicode.Length()-returnValue, Panic(EPanicBadNumberOfUnicodeCharactersConverted));
   1.286 +				}
   1.287 +			if (indexOfFirstUnconvertibleCharacter!=0) // if at least one Unicode character at the start of CCnvCharacterSetConverter::DoConvertFromUnicode's input descriptor was convertible...
   1.288 +				{
   1.289 +				TBool gotoEnd = EFalse;
   1.290 +				if (foreign->Length()>oldNumberOfBytesInForeign)
   1.291 +					{
   1.292 +					TInt numberOfCharactersThatDroppedOut=0;
   1.293 +                     // Insert an escape sequence if this character set is different from the last one.
   1.294 +                     if (presentCharacterSet  != previousCharacterSet)
   1.295 +                        {
   1.296 +                        // Insert escape sequence (if requred) in front of the last encoded run of text.
   1.297 +                        // Note that this may cause some characters to drop out at the end.
   1.298 +                        (*characterSet.iConvertFromIntermediateBufferInPlace)(oldNumberOfBytesInForeign, *foreign, numberOfCharactersThatDroppedOut);
   1.299 +                        if (oldNumberOfBytesInForeign < foreign->Length())
   1.300 +                			previousCharacterSet = presentCharacterSet;
   1.301 +                        }
   1.302 +					numberOfUnicodeElementsConsumed-=numberOfCharactersThatDroppedOut;
   1.303 +					if (numberOfCharactersThatDroppedOut>0 )// if "foreign" has been filled to as much as it will hold...
   1.304 +						{
   1.305 +						gotoEnd = ETrue;
   1.306 +						}
   1.307 +					}
   1.308 +				if (indexOfFirstUnconvertibleCharacter<0) // if we've successfully converted up to the end of aUnicode (using *characterSet.iConversionData)...
   1.309 +					{
   1.310 +					gotoEnd = ETrue;
   1.311 +					}
   1.312 +				if (gotoEnd)
   1.313 +					{
   1.314 +					if ( aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet
   1.315 +					    && previousCharacterSet != KDefaultCharacterSet
   1.316 +					    && previousCharacterSet != KNoPreviousCharacterSet)
   1.317 +					    {
   1.318 +					    aForeign.SetLength(foreign->Length());
   1.319 +    				    aForeign.Append(*aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence);
   1.320 +    				    foreign=NULL;
   1.321 +					    }
   1.322 +					goto end;
   1.323 +					}
   1.324 +				break;
   1.325 +				}
   1.326 +			__ASSERT_DEBUG(presentCharacterSet<numberOfCharacterSets, Panic(EPanicLoopCounterOverRun1));
   1.327 +			++presentCharacterSet;
   1.328 +			if (presentCharacterSet>=numberOfCharacterSets)
   1.329 +				{
   1.330 +				if ((foreign->MaxLength()-foreign->Length()<aReplacementForUnconvertibleUnicodeCharacters.Length()) ||
   1.331 +					(aIndicesOfUnconvertibleCharacters.AppendIndex(numberOfUnicodeElementsConsumed)!=CCnvCharacterSetConverter::TArrayOfAscendingIndices::EAppendSuccessful)) // the tests must be done in this order as AppendIndex must only be called if there is room for aReplacementForUnconvertibleUnicodeCharacters
   1.332 +					{
   1.333 +					goto end;
   1.334 +					}
   1.335 +				numberOfUnicodeElementsConsumed+=LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed);
   1.336 +				foreign->Append(aReplacementForUnconvertibleUnicodeCharacters);
   1.337 +				break;
   1.338 +				}
   1.339 +			}
   1.340 +		}
   1.341 +end:
   1.342 +	if (foreign!=NULL)
   1.343 +		{
   1.344 +		aForeign.SetLength(foreign->Length());
   1.345 +		foreign=NULL;
   1.346 +		}
   1.347 +	if ((numberOfUnicodeElementsConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
   1.348 +		{
   1.349 +		return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.350 +		}
   1.351 +	return aUnicode.Length()-numberOfUnicodeElementsConsumed;
   1.352 +	}
   1.353 +
   1.354 +
   1.355 +/** Inserts an escape sequence into the descriptor.
   1.356 +
   1.357 +This function is provided to help in the implementation of
   1.358 +ConvertFromUnicode() for modal character set encodings.
   1.359 +Each SCharacterSet object in the array passed to
   1.360 +ConvertFromUnicode() must have its
   1.361 +iConvertFromIntermediateBufferInPlace member assigned. To
   1.362 +do this for a modal character set encoding, implement a function whose
   1.363 +signature matches that of FConvertFromIntermediateBufferInPlace 
   1.364 +and which calls this function, passing all arguments unchanged, and 
   1.365 +specifying the character set's escape sequence and the number of bytes per 
   1.366 +character.
   1.367 +
   1.368 +@param aStartPositionInDescriptor The byte position in aDescriptor at which 
   1.369 +the escape sequence is inserted. If the character set uses more than one byte 
   1.370 +per character, this position must be the start of a character, otherwise a 
   1.371 +panic occurs. 
   1.372 +@param aDescriptor The descriptor into which the escape sequence is inserted. 
   1.373 +@param aNumberOfCharactersThatDroppedOut The escape sequence is inserted into 
   1.374 +the start of aDescriptor and any characters that need to drop out to make 
   1.375 +room for the escape sequence (because the descriptor's maximum length was 
   1.376 +not long enough) drop out from the end of the buffer. This parameter indicates 
   1.377 +the number of characters that needed to drop out.
   1.378 +@param aEscapeSequence The escape sequence for the character set. 
   1.379 +@param aNumberOfBytesPerCharacter The number of bytes per character. */
   1.380 +EXPORT_C void CnvUtilities::ConvertFromIntermediateBufferInPlace(
   1.381 +					TInt aStartPositionInDescriptor, 
   1.382 +					TDes8& aDescriptor, 
   1.383 +					TInt& aNumberOfCharactersThatDroppedOut, 
   1.384 +					const TDesC8& aEscapeSequence, 
   1.385 +					TInt aNumberOfBytesPerCharacter)
   1.386 +	{
   1.387 +	const TInt lengthOfDescriptor=aDescriptor.Length();
   1.388 +	__ASSERT_ALWAYS((lengthOfDescriptor-aStartPositionInDescriptor)%aNumberOfBytesPerCharacter==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters1));
   1.389 +	aNumberOfCharactersThatDroppedOut=(Max(0, aEscapeSequence.Length()-(aDescriptor.MaxLength()-lengthOfDescriptor))+(aNumberOfBytesPerCharacter-1))/aNumberOfBytesPerCharacter;
   1.390 +	const TInt lengthOfRunInCharacters=(lengthOfDescriptor-aStartPositionInDescriptor)/aNumberOfBytesPerCharacter;
   1.391 +	if (aNumberOfCharactersThatDroppedOut>=lengthOfRunInCharacters) // ">=" is correct (rather than ">") as if there's only room for the escape sequence we don't want to have it in the descriptor
   1.392 +		{
   1.393 +		aNumberOfCharactersThatDroppedOut=lengthOfRunInCharacters;
   1.394 +		aDescriptor.SetLength(aStartPositionInDescriptor);
   1.395 +		}
   1.396 +	else
   1.397 +		{
   1.398 +		aDescriptor.SetLength(lengthOfDescriptor-(aNumberOfCharactersThatDroppedOut*aNumberOfBytesPerCharacter));
   1.399 +		aDescriptor.Insert(aStartPositionInDescriptor, aEscapeSequence);
   1.400 +		}
   1.401 +	}
   1.402 +
   1.403 +
   1.404 +/**  Converts text from a modal foreign character set encoding into Unicode.
   1.405 +
   1.406 +The non-Unicode text specified in aForeign is converted using 
   1.407 +the array of character set conversion objects (aArrayOfStates) 
   1.408 +provided by the plug-in, and the converted text is returned in 
   1.409 +aUnicode. The function can either append to aUnicode 
   1.410 +or overwrite its contents (if any), depending on the input conversion flags 
   1.411 +specified. The first element in aArrayOfStates is taken to be 
   1.412 +the default mode (i.e. the mode to assume by default if there is no preceding 
   1.413 +escape sequence).
   1.414 + 
   1.415 +@param aDefaultEndiannessOfForeignCharacters The default endian-ness of the 
   1.416 +foreign characters. If an endian-ness for foreign characters is specified 
   1.417 +in the conversion data, then that is used instead and the value of 
   1.418 +aDefaultEndiannessOfForeignCharacters is ignored. 
   1.419 +@param aUnicode On return, contains the text converted into Unicode. 
   1.420 +@param aForeign The non-Unicode source text to be converted. 
   1.421 +@param aState Used to store a modal character set encoding's current mode across 
   1.422 +multiple calls to ConvertToUnicode() on the same input descriptor. This argument 
   1.423 +should be passed the same object as passed to the plug-in's ConvertToUnicode() 
   1.424 +exported function.
   1.425 +@param aNumberOfUnconvertibleCharacters On return, contains the number of 
   1.426 +characters in aForeign which were not converted. Characters which cannot be 
   1.427 +converted are output as Unicode replacement characters (0xfffd). 
   1.428 +@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
   1.429 +of the first byte of the first unconvertible character. For instance if the 
   1.430 +first character in the input descriptor (aForeign) could not be converted, 
   1.431 +then this parameter is set to the first byte of that character, i.e. zero. 
   1.432 +A negative value is returned if all the characters were converted. 
   1.433 +@param aArrayOfStates Array of character set conversion data objects, and their 
   1.434 +escape sequences ("modes"). There must be one or more modes in this array, 
   1.435 +none of the modes can have any NULL member data, and each mode's escape sequence 
   1.436 +must begin with KControlCharacterEscape (0x1b) or a panic occurs. 
   1.437 +@return The number of unconverted bytes left at the end of the input descriptor, 
   1.438 +or a negative error value, as defined in TError. */
   1.439 +EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
   1.440 +					CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.441 +					TDes16& aUnicode, 
   1.442 +					const TDesC8& aForeign, 
   1.443 +					TInt& aState, 
   1.444 +					TInt& aNumberOfUnconvertibleCharacters, 
   1.445 +					TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
   1.446 +					const TArray<SState>& aArrayOfStates)
   1.447 +	{
   1.448 +	TUint notUsed;
   1.449 +	return ConvertToUnicodeFromModalForeign(aDefaultEndiannessOfForeignCharacters, 
   1.450 +											aUnicode, 
   1.451 +											aForeign, 
   1.452 +											aState, 
   1.453 +											aNumberOfUnconvertibleCharacters, 
   1.454 +											aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
   1.455 +											aArrayOfStates, 
   1.456 +											notUsed, 
   1.457 +											0);
   1.458 +	}
   1.459 +
   1.460 +/** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for 
   1.461 +the foreign characters. If an endian-ness for foreign characters is specified 
   1.462 +in the conversion data, then that is used instead and the value of 
   1.463 +aDefaultEndiannessOfForeignCharacters is ignored. 
   1.464 +@param aUnicode On return, contains the text converted into Unicode. 
   1.465 +@param aForeign The non-Unicode source text to be converted. 
   1.466 +@param aState Used to store a modal character set encoding's current mode 
   1.467 +across multiple calls to ConvertToUnicode() on the same input descriptor. This 
   1.468 +argument should be passed the same object as passed to the plug-in's 
   1.469 +ConvertToUnicode() exported function. 
   1.470 +@param aNumberOfUnconvertibleCharacters On return, contains the number of 
   1.471 +characters in aForeign which were not converted. Characters which cannot be 
   1.472 +converted are output as Unicode replacement characters (0xfffd). 
   1.473 +@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
   1.474 +of the first byte of the first unconvertible character. For instance if the 
   1.475 +first character in the input descriptor (aForeign) could not be converted, 
   1.476 +then this parameter is set to the first byte of that character, i.e. zero. 
   1.477 +A negative value is returned if all the characters were converted. 
   1.478 +@param aArrayOfStates Array of character set conversion data objects, and their 
   1.479 +escape sequences. There must be one or more modes in this array, none of the 
   1.480 +modes can have any NULL member data, and each mode's escape sequence must 
   1.481 +begin with KControlCharacterEscape (0x1b) or a panic occurs. 
   1.482 +@param aOutputConversionFlags If the input descriptor ended in a truncated 
   1.483 +sequence, e.g. a part of a multi-byte character, aOutputConversionFlags 
   1.484 +returns with the EOutputConversionFlagInputIsTruncated flag set. 
   1.485 +@param aInputConversionFlags Specify 
   1.486 +CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
   1.487 +aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
   1.488 +to prevent the function from returning the error-code EErrorIllFormedInput 
   1.489 +when the input descriptor consists of nothing but a truncated sequence. The 
   1.490 +CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
   1.491 +flag must not be set, otherwise a panic occurs. 
   1.492 +@return The number of unconverted bytes left at the end of the input descriptor, 
   1.493 +or a negative error value, as defined in TError. */
   1.494 +EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
   1.495 +								CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.496 +								TDes16& aUnicode, 
   1.497 +								const TDesC8& aForeign, 
   1.498 +								TInt& aState, 
   1.499 +								TInt& aNumberOfUnconvertibleCharacters, 
   1.500 +								TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
   1.501 +								const TArray<SState>& aArrayOfStates, 
   1.502 +								TUint& aOutputConversionFlags, 
   1.503 +								TUint aInputConversionFlags)
   1.504 +	{
   1.505 +	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags2));
   1.506 +	CheckArrayOfStates(aArrayOfStates);
   1.507 +	aNumberOfUnconvertibleCharacters=0;
   1.508 +	aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
   1.509 +	aOutputConversionFlags=0;
   1.510 +	TUint internalInputConversionFlags=aInputConversionFlags;
   1.511 +	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
   1.512 +		{
   1.513 +		aUnicode.SetLength(0);
   1.514 +		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
   1.515 +		}
   1.516 +	if (aForeign.Length()==0)
   1.517 +		{
   1.518 +		return 0;
   1.519 +		}
   1.520 +	if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
   1.521 +		{
   1.522 +		return aForeign.Length();
   1.523 +		}
   1.524 +	TPtrC8 remainderOfForeign(aForeign);
   1.525 +	TPtrC8 homogeneousRun;
   1.526 +	TInt numberOfForeignBytesConsumed=0;
   1.527 +	const SCnvConversionData* conversionData = NULL;
   1.528 +	const TInt startOfNextEscapeSequence=aForeign.Locate(KControlCharacterEscape);
   1.529 +	if (startOfNextEscapeSequence!=0) // if aForeign doesn't start with an escape sequence...
   1.530 +		{
   1.531 +		conversionData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): aArrayOfStates[0].iConversionData;
   1.532 +		if (startOfNextEscapeSequence==KErrNotFound)
   1.533 +			{
   1.534 +			homogeneousRun.Set(remainderOfForeign);
   1.535 +			remainderOfForeign.Set(NULL, 0);
   1.536 +			}
   1.537 +		else
   1.538 +			{
   1.539 +			__ASSERT_DEBUG(startOfNextEscapeSequence>0, Panic(EPanicBadStartOfNextEscapeSequence));
   1.540 +			homogeneousRun.Set(remainderOfForeign.Left(startOfNextEscapeSequence));
   1.541 +			remainderOfForeign.Set(remainderOfForeign.Mid(startOfNextEscapeSequence));
   1.542 +			}
   1.543 +		goto handleHomogeneousRun;
   1.544 +		}
   1.545 +	FOREVER
   1.546 +		{
   1.547 +		if (!NextHomogeneousForeignRun(conversionData, numberOfForeignBytesConsumed, homogeneousRun, remainderOfForeign, aArrayOfStates, aOutputConversionFlags))
   1.548 +			{
   1.549 +			goto end;
   1.550 +			}
   1.551 +handleHomogeneousRun:
   1.552 +		if (conversionData==NULL)
   1.553 +			{
   1.554 +			return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.555 +			}
   1.556 +		TInt numberOfUnconvertibleCharacters;
   1.557 +		TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
   1.558 +		const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*conversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
   1.559 +		if (returnValue<0)
   1.560 +			{
   1.561 +			return returnValue; // this is an error-code
   1.562 +			}
   1.563 +		if (numberOfUnconvertibleCharacters>0)
   1.564 +			{
   1.565 +			if (aNumberOfUnconvertibleCharacters==0)
   1.566 +				{
   1.567 +				aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
   1.568 +				}
   1.569 +			aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
   1.570 +			}
   1.571 +		numberOfForeignBytesConsumed+=homogeneousRun.Length();
   1.572 +		if (returnValue>0)
   1.573 +			{
   1.574 +			numberOfForeignBytesConsumed-=returnValue;
   1.575 +			goto end;
   1.576 +			}
   1.577 +		if (numberOfForeignBytesConsumed>0)
   1.578 +			{
   1.579 +			internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
   1.580 +			}
   1.581 +		__ASSERT_DEBUG(remainderOfForeign==aForeign.Mid(numberOfForeignBytesConsumed), Panic(EPanicInconsistentNumberOfForeignBytesRemaining));
   1.582 +		}
   1.583 +end:
   1.584 +	if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
   1.585 +		{
   1.586 +		return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.587 +		}
   1.588 +	aState=REINTERPRET_CAST(TInt, conversionData);
   1.589 +	return aForeign.Length()-numberOfForeignBytesConsumed;
   1.590 +	}
   1.591 +
   1.592 +
   1.593 +/**  Converts text from a non-modal complex character set encoding (e.g. 
   1.594 +Shift-JIS or EUC-JP) into Unicode.The non-Unicode text specified in
   1.595 +aForeign is converted using the array of character set
   1.596 +conversion methods (aArrayOfMethods) provided by the
   1.597 +plug-in, and the converted text is returned in aUnicode.
   1.598 +Overwrites the contents, if any, of aUnicode.
   1.599 +
   1.600 +@param aDefaultEndiannessOfForeignCharacters The default endian-ness of the 
   1.601 +foreign characters. If an endian-ness for foreign characters is specified 
   1.602 +in the conversion data, then that is used instead and the value of 
   1.603 +aDefaultEndiannessOfForeignCharacters is ignored. 
   1.604 +@param aUnicode On return, contains the text converted into Unicode. 
   1.605 +@param aForeign The non-Unicode source text to be converted. 
   1.606 +@param aNumberOfUnconvertibleCharacters On return, contains the number of 
   1.607 +characters in aForeign which were not converted. Characters which cannot be 
   1.608 +converted are output as Unicode replacement characters (0xfffd). 
   1.609 +@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
   1.610 +of the first byte of the first unconvertible character. For instance if the 
   1.611 +first character in the input descriptor (aForeign) could not be converted, 
   1.612 +then this parameter is set to the first byte of that character, i.e. zero. 
   1.613 +A negative value is returned if all the characters were converted. 
   1.614 +@param aArrayOfMethods Array of conversion methods. There must be one or more 
   1.615 +methods in this array and none of the methods in the array can have any NULL 
   1.616 +member data or a panic occurs. 
   1.617 +@return The number of unconverted bytes left at the end of the input descriptor, 
   1.618 +or a negative error value, as defined in TError. */
   1.619 +EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
   1.620 +					CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.621 +					TDes16& aUnicode, 
   1.622 +					const TDesC8& aForeign, 
   1.623 +					TInt& aNumberOfUnconvertibleCharacters, 
   1.624 +					TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
   1.625 +					const TArray<SMethod>& aArrayOfMethods)
   1.626 +	{
   1.627 +	TUint notUsed;
   1.628 +	return ConvertToUnicodeFromHeterogeneousForeign(
   1.629 +				aDefaultEndiannessOfForeignCharacters, 
   1.630 +				aUnicode, 
   1.631 +				aForeign, 
   1.632 +				aNumberOfUnconvertibleCharacters, 
   1.633 +				aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
   1.634 +				aArrayOfMethods, 
   1.635 +				notUsed, 
   1.636 +				0);
   1.637 +	}
   1.638 +
   1.639 +/** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for the 
   1.640 +foreign characters. If an endian-ness for foreign characters is specified 
   1.641 +in the conversion data, then that is used instead and the value of 
   1.642 +aDefaultEndiannessOfForeignCharacters is ignored. 
   1.643 +@param aUnicode On return, contains the text converted into Unicode. 
   1.644 +@param aForeign The non-Unicode source text to be converted. 
   1.645 +@param aNumberOfUnconvertibleCharacters On return, contains the number of 
   1.646 +characters in aForeign which were not converted. Characters which cannot be 
   1.647 +converted are output as Unicode replacement characters (0xfffd). 
   1.648 +@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
   1.649 +of the first byte of the first unconvertible character. For instance if the 
   1.650 +first character in the input descriptor (aForeign) could not be converted, 
   1.651 +then this parameter is set to the first byte of that character, i.e. zero. 
   1.652 +A negative value is returned if all the characters were converted. 
   1.653 +@param aArrayOfMethods Array of conversion methods. There must be one or more 
   1.654 +methods in this array and none of the methods in the array can have any NULL 
   1.655 +member data or a panic occurs. 
   1.656 +@param aOutputConversionFlags If the input descriptor ended in a truncated 
   1.657 +sequence, e.g. a part of a multi-byte character, aOutputConversionFlags 
   1.658 +returns with the EOutputConversionFlagInputIsTruncated flag set. 
   1.659 +@param aInputConversionFlags Specify 
   1.660 +CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
   1.661 +aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
   1.662 +to prevent the function from returning the error-code EErrorIllFormedInput 
   1.663 +when the input descriptor consists of nothing but a truncated sequence. The 
   1.664 +CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
   1.665 +flag must not be set, otherwise a panic occurs. 
   1.666 +@return The number of unconverted bytes left at the end of the input descriptor, 
   1.667 +or a negative error value, as defined in TError. */
   1.668 +EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
   1.669 +						CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.670 +						TDes16& aUnicode, 
   1.671 +						const TDesC8& aForeign, 
   1.672 +						TInt& aNumberOfUnconvertibleCharacters, 
   1.673 +						TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
   1.674 +						const TArray<SMethod>& aArrayOfMethods, 
   1.675 +						TUint& aOutputConversionFlags, 
   1.676 +						TUint aInputConversionFlags)
   1.677 +	{
   1.678 +	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags3));
   1.679 +	CheckArrayOfMethods(aArrayOfMethods);
   1.680 +	aNumberOfUnconvertibleCharacters=0;
   1.681 +	aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
   1.682 +	aOutputConversionFlags=0;
   1.683 +	TUint internalInputConversionFlags=aInputConversionFlags;
   1.684 +	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
   1.685 +		{
   1.686 +		aUnicode.SetLength(0);
   1.687 +		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
   1.688 +		}
   1.689 +	if (aForeign.Length()==0)
   1.690 +		{
   1.691 +		return 0;
   1.692 +		}
   1.693 +	if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
   1.694 +		{
   1.695 +		return aForeign.Length();
   1.696 +		}
   1.697 +	const TInt numberOfMethods=aArrayOfMethods.Count();
   1.698 +	TPtrC8 remainderOfForeign(aForeign);
   1.699 +	TInt numberOfForeignBytesConsumed=0;
   1.700 +	FOREVER
   1.701 +		{
   1.702 +		TInt lengthOfRunToConvert=0;
   1.703 +		const SMethod* method=NULL;
   1.704 +		for (TInt i=0;;)
   1.705 +			{
   1.706 +			method=&aArrayOfMethods[i];
   1.707 +			__ASSERT_DEBUG(method!=NULL, Panic(EPanicBadMethodPointer));
   1.708 +			lengthOfRunToConvert=(*method->iNumberOfBytesAbleToConvert)(remainderOfForeign);
   1.709 +			if (lengthOfRunToConvert<0)
   1.710 +				{
   1.711 +				return lengthOfRunToConvert; // this is an error-code
   1.712 +				}
   1.713 +			if (lengthOfRunToConvert>0)
   1.714 +				{
   1.715 +				break;
   1.716 +				}
   1.717 +			__ASSERT_DEBUG(i<numberOfMethods, Panic(EPanicLoopCounterOverRun2));
   1.718 +			++i;
   1.719 +			if (i>=numberOfMethods)
   1.720 +				{
   1.721 +				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
   1.722 +				goto end;
   1.723 +				}
   1.724 +			}
   1.725 +		TBuf8<KMaximumLengthOfIntermediateBuffer> intermediateBuffer;
   1.726 +		const TInt maximumUsableLengthOfIntermediateBuffer=ReduceToNearestMultipleOf(KMaximumLengthOfIntermediateBuffer, method->iNumberOfBytesPerCharacter);
   1.727 +		FOREVER
   1.728 +			{
   1.729 +			const TInt numberOfForeignBytesConsumedThisTime=Min(lengthOfRunToConvert, maximumUsableLengthOfIntermediateBuffer);
   1.730 +			intermediateBuffer=remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime);
   1.731 +			__ASSERT_DEBUG((numberOfForeignBytesConsumedThisTime%method->iNumberOfBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters2));
   1.732 +			(*method->iConvertToIntermediateBufferInPlace)(intermediateBuffer);
   1.733 +			__ASSERT_DEBUG((intermediateBuffer.Length()%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters3));
   1.734 +			__ASSERT_DEBUG((intermediateBuffer.Length()/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter==numberOfForeignBytesConsumedThisTime, Panic(EPanicBadMethodData1));
   1.735 +			TInt numberOfUnconvertibleCharacters;
   1.736 +			TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
   1.737 +			const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*method->iConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, intermediateBuffer, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
   1.738 +			if (returnValue<0)
   1.739 +				{
   1.740 +				return returnValue; // this is an error-code
   1.741 +				}
   1.742 +			if (numberOfUnconvertibleCharacters>0)
   1.743 +				{
   1.744 +				if (aNumberOfUnconvertibleCharacters==0)
   1.745 +					{
   1.746 +					aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
   1.747 +					}
   1.748 +				aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
   1.749 +				}
   1.750 +			numberOfForeignBytesConsumed+=numberOfForeignBytesConsumedThisTime;
   1.751 +			if (returnValue>0)
   1.752 +				{
   1.753 +				__ASSERT_DEBUG((returnValue%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters4));
   1.754 +				numberOfForeignBytesConsumed-=(returnValue/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter;
   1.755 +				goto end;
   1.756 +				}
   1.757 +			if (numberOfForeignBytesConsumed>0)
   1.758 +				{
   1.759 +				internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
   1.760 +				}
   1.761 +			remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed));
   1.762 +			lengthOfRunToConvert-=numberOfForeignBytesConsumedThisTime;
   1.763 +			__ASSERT_DEBUG(lengthOfRunToConvert>=0, Panic(EPanicBadLengthOfRunToConvert2));
   1.764 +			if (lengthOfRunToConvert<=0)
   1.765 +				{
   1.766 +				break;
   1.767 +				}
   1.768 +			}
   1.769 +		}
   1.770 +end:
   1.771 +	if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
   1.772 +		{
   1.773 +		return CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.774 +		}
   1.775 +	return aForeign.Length()-numberOfForeignBytesConsumed;
   1.776 +	}
   1.777 +
   1.778 +void CnvUtilities::CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets)
   1.779 +	{
   1.780 +	const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
   1.781 +	__ASSERT_ALWAYS(numberOfCharacterSets>0, Panic(EPanicBadNumberOfCharacterSets));
   1.782 +	for (TInt i=0; i<numberOfCharacterSets; ++i)
   1.783 +		{
   1.784 +		const SCharacterSet& characterSet=aArrayOfCharacterSets[i];
   1.785 +		__ASSERT_ALWAYS(characterSet.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer1));
   1.786 +		__ASSERT_ALWAYS(characterSet.iConvertFromIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer1));
   1.787 +		__ASSERT_ALWAYS(characterSet.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer1));
   1.788 +		}
   1.789 +	}
   1.790 +
   1.791 +void CnvUtilities::CheckArrayOfStates(const TArray<SState>& aArrayOfStates)
   1.792 +	{
   1.793 +	const TInt numberOfStates=aArrayOfStates.Count();
   1.794 +	__ASSERT_ALWAYS(numberOfStates>0, Panic(EPanicBadNumberOfStates));
   1.795 +	for (TInt i=0; i<numberOfStates; ++i)
   1.796 +		{
   1.797 +		const SState& state=aArrayOfStates[i];
   1.798 +		__ASSERT_ALWAYS(state.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer2));
   1.799 +		__ASSERT_ALWAYS((*state.iEscapeSequence)[0]==KControlCharacterEscape, Panic(EPanicBadEscapeSequenceStart));
   1.800 +		__ASSERT_ALWAYS(state.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer2));
   1.801 +		}
   1.802 +	}
   1.803 +
   1.804 +void CnvUtilities::CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods)
   1.805 +	{
   1.806 +	const TInt numberOfMethods=aArrayOfMethods.Count();
   1.807 +	__ASSERT_ALWAYS(numberOfMethods>0, Panic(EPanicBadNumberOfMethods));
   1.808 +	for (TInt i=0; i<numberOfMethods; ++i)
   1.809 +		{
   1.810 +		const SMethod& method=aArrayOfMethods[i];
   1.811 +		__ASSERT_ALWAYS(method.iNumberOfBytesAbleToConvert!=NULL, Panic(EPanicBadFunctionPointer2));
   1.812 +		__ASSERT_ALWAYS(method.iConvertToIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer3));
   1.813 +		__ASSERT_ALWAYS(method.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer3));
   1.814 +		__ASSERT_ALWAYS(method.iNumberOfBytesPerCharacter>0, Panic(EPanicBadMethodData2));
   1.815 +		__ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter>0, Panic(EPanicBadMethodData3));
   1.816 +		__ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter<=method.iNumberOfBytesPerCharacter, Panic(EPanicBadMethodData4));
   1.817 +		}
   1.818 +	}
   1.819 +
   1.820 +TInt CnvUtilities::LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex)
   1.821 +	{
   1.822 +	const TUint unicodeCharacter=aUnicode[aIndex];
   1.823 +	if ((unicodeCharacter>=0xd800) && (unicodeCharacter<=0xdbff)) // if the unicode character is the first half of a surrogate-pair...
   1.824 +		{
   1.825 +		__ASSERT_DEBUG(aIndex+1<aUnicode.Length(), Panic(EPanicBadSurrogatePair1));
   1.826 +#if defined(_DEBUG)
   1.827 +		const TUint secondHalfOfSurrogatePair=aUnicode[aIndex+1];
   1.828 +#endif
   1.829 +		__ASSERT_DEBUG((secondHalfOfSurrogatePair>=0xdc00) && (secondHalfOfSurrogatePair<=0xdfff), Panic(EPanicBadSurrogatePair2)); // this can be asserted as CCnvCharacterSetConverter::DoConvertFromUnicode should have returned an error value if this was a bad surrogate pair
   1.830 +		return 2;
   1.831 +		}
   1.832 +	return 1;
   1.833 +	}
   1.834 +
   1.835 +TBool CnvUtilities::NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags)
   1.836 +	{
   1.837 +	__ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), Panic(EPanicBadRemainderOfForeign));
   1.838 +	FOREVER
   1.839 +		{
   1.840 +		if (aRemainderOfForeign.Length()==0)
   1.841 +			{
   1.842 +			return EFalse;
   1.843 +			}
   1.844 +		const TInt numberOfStates=aArrayOfStates.Count();
   1.845 +		TInt i;
   1.846 +		for (i=0; i<numberOfStates; ++i)
   1.847 +			{
   1.848 +			const SState& state=aArrayOfStates[i];
   1.849 +			if (MatchesEscapeSequence(aNumberOfForeignBytesConsumed, aHomogeneousRun, aRemainderOfForeign, *state.iEscapeSequence))
   1.850 +				{
   1.851 +				aConversionData=state.iConversionData;
   1.852 +				goto foundState;
   1.853 +				}
   1.854 +			}
   1.855 +		for (i=0; i<numberOfStates; ++i)
   1.856 +			{
   1.857 +			if (IsStartOf(aRemainderOfForeign, *aArrayOfStates[i].iEscapeSequence))
   1.858 +				{
   1.859 +				// aRemainderOfForeign ends with a truncated escape sequence, so ConvertToUnicode cannot convert any more
   1.860 +				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
   1.861 +				return EFalse;
   1.862 +				}
   1.863 +			}
   1.864 +		// force ConvertToUnicode to return CCnvCharacterSetConverter::EErrorIllFormedInput
   1.865 +		aConversionData=NULL;
   1.866 +		return ETrue;
   1.867 +foundState:
   1.868 +		if (aHomogeneousRun.Length()>0)
   1.869 +			{
   1.870 +			return ETrue;
   1.871 +			}
   1.872 +		}
   1.873 +	}
   1.874 +
   1.875 +TBool CnvUtilities::MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence)
   1.876 +	{
   1.877 +	const TInt lengthOfEscapeSequence=aEscapeSequence.Length();
   1.878 +	if (IsStartOf(aEscapeSequence, aRemainderOfForeign))
   1.879 +		{
   1.880 +		aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence));
   1.881 +		const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape);
   1.882 +		if (startOfNextEscapeSequence==KErrNotFound)
   1.883 +			{
   1.884 +			aHomogeneousRun.Set(aRemainderOfForeign);
   1.885 +			aRemainderOfForeign.Set(NULL, 0);
   1.886 +			}
   1.887 +		else
   1.888 +			{
   1.889 +			aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence));
   1.890 +			aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence));
   1.891 +			}
   1.892 +		aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence;
   1.893 +		return ETrue;
   1.894 +		}
   1.895 +	return EFalse;
   1.896 +	}
   1.897 +
   1.898 +TBool CnvUtilities::IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor)
   1.899 +	{
   1.900 +	const TInt lengthOfStart=aStart.Length();
   1.901 +	return (aPotentiallyLongerDescriptor.Length()>=lengthOfStart) && (aPotentiallyLongerDescriptor.Left(lengthOfStart)==aStart);
   1.902 +	}
   1.903 +