os/textandloc/charconvfw/charconv_fw/src/convutils/convutils.cpp
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#include <e32std.h>
sl@0
    20
#include <e32base.h>
sl@0
    21
#include <charconv.h>
sl@0
    22
#include <convutils.h>
sl@0
    23
                   
sl@0
    24
const TInt KNoPreviousCharacterSet=-1;
sl@0
    25
const TInt KDefaultCharacterSet = 0;
sl@0
    26
const TUint KControlCharacterEscape=0x1b;
sl@0
    27
sl@0
    28
#if defined(_DEBUG)
sl@0
    29
//It will cause performance problem with small KMaximumLengthOfIntermediateBuffer.
sl@0
    30
//Please use release version to test performance cases.
sl@0
    31
const TInt KMaximumLengthOfIntermediateBuffer=5;
sl@0
    32
#else
sl@0
    33
const TInt KMaximumLengthOfIntermediateBuffer=150;
sl@0
    34
#endif
sl@0
    35
sl@0
    36
struct SCnvConversionData;
sl@0
    37
sl@0
    38
_LIT(KLitPanicText, "CONVUTILS");
sl@0
    39
sl@0
    40
enum TPanic
sl@0
    41
	{
sl@0
    42
	EPanicBadInputConversionFlags1=1,
sl@0
    43
	EPanicBadInputConversionFlags2,
sl@0
    44
	EPanicBadInputConversionFlags3,
sl@0
    45
	EPanicBadNumberOfUnicodeElementsConsumed,
sl@0
    46
	EPanicAppendFlagViolated,
sl@0
    47
	EPanicBadNumberOfUnicodeCharactersConverted,
sl@0
    48
	EPanicBadNumberOfCharactersThatDroppedOut,
sl@0
    49
	EPanicLoopCounterOverRun1,
sl@0
    50
	EPanicLoopCounterOverRun2,
sl@0
    51
	EPanicDescriptorNotWholeNumberOfCharacters1,
sl@0
    52
	EPanicDescriptorNotWholeNumberOfCharacters2,
sl@0
    53
	EPanicDescriptorNotWholeNumberOfCharacters3,
sl@0
    54
	EPanicDescriptorNotWholeNumberOfCharacters4,
sl@0
    55
	EPanicBadStartOfNextEscapeSequence,
sl@0
    56
	EPanicInconsistentNumberOfForeignBytesRemaining,
sl@0
    57
	EPanicBadLengthOfRunToConvert1,
sl@0
    58
	EPanicBadLengthOfRunToConvert2,
sl@0
    59
	EPanicBadMethodPointer,
sl@0
    60
	EPanicBadMethodData1,
sl@0
    61
	EPanicBadMethodData2,
sl@0
    62
	EPanicBadMethodData3,
sl@0
    63
	EPanicBadMethodData4,
sl@0
    64
	EPanicBadNumberOfCharacterSets,
sl@0
    65
	EPanicBadConversionDataPointer1,
sl@0
    66
	EPanicBadConversionDataPointer2,
sl@0
    67
	EPanicBadConversionDataPointer3,
sl@0
    68
	EPanicBadFunctionPointer1,
sl@0
    69
	EPanicBadFunctionPointer2,
sl@0
    70
	EPanicBadFunctionPointer3,
sl@0
    71
	EPanicBadEscapeSequencePointer1,
sl@0
    72
	EPanicBadEscapeSequencePointer2,
sl@0
    73
	EPanicBadNumberOfStates,
sl@0
    74
	EPanicBadEscapeSequenceStart,
sl@0
    75
	EPanicBadNumberOfMethods,
sl@0
    76
	EPanicBadSurrogatePair1,
sl@0
    77
	EPanicBadSurrogatePair2,
sl@0
    78
	EPanicBadRemainderOfForeign,
sl@0
    79
	EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet
sl@0
    80
	};
sl@0
    81
sl@0
    82
LOCAL_C void Panic(TPanic aPanic)
sl@0
    83
	{
sl@0
    84
	User::Panic(KLitPanicText, aPanic);
sl@0
    85
	}
sl@0
    86
sl@0
    87
/** Converts Unicode text into a complex foreign character set encoding. This 
sl@0
    88
is an encoding which cannot be converted simply by calling 
sl@0
    89
CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) 
sl@0
    90
or non-modal (e.g. Shift-JIS).
sl@0
    91
sl@0
    92
The Unicode text specified in aUnicode is converted using the array of 
sl@0
    93
conversion data objects (aArrayOfCharacterSets) provided by the plug-in for 
sl@0
    94
the complex character set encoding, and the converted text is returned in 
sl@0
    95
aForeign. Any existing contents in aForeign are overwritten.
sl@0
    96
sl@0
    97
Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character 
sl@0
    98
sets can be specified. aUnicode is converted using the first character conversion 
sl@0
    99
data object in the array. When a character is found which cannot be converted 
sl@0
   100
using that data, each character set in the array is tried in turn. If it cannot 
sl@0
   101
be converted using any object in the array, the index of the character is 
sl@0
   102
appended to aIndicesOfUnconvertibleCharacters and the character is replaced 
sl@0
   103
by aReplacementForUnconvertibleUnicodeCharacters.
sl@0
   104
sl@0
   105
If it can be converted using another object in the array, that object is used 
sl@0
   106
to convert all subsequent characters until another unconvertible character 
sl@0
   107
is found.
sl@0
   108
sl@0
   109
@param aDefaultEndiannessOfForeignCharacters The default endian-ness to use 
sl@0
   110
when writing the characters in the foreign character set. If an endian-ness 
sl@0
   111
for foreign characters is specified in the current conversion data object, 
sl@0
   112
then that is used instead and the value of 
sl@0
   113
aDefaultEndiannessOfForeignCharacters is ignored. 
sl@0
   114
@param aReplacementForUnconvertibleUnicodeCharacters The single character (one 
sl@0
   115
or more byte values) which is used to replace unconvertible characters. 
sl@0
   116
@param aForeign On return, contains the converted text in the non-Unicode 
sl@0
   117
character set. 
sl@0
   118
@param aUnicode The source Unicode text to be converted. 
sl@0
   119
@param aIndicesOfUnconvertibleCharacters On return, holds an ascending array 
sl@0
   120
of the indices of each Unicode character in the source text which could not 
sl@0
   121
be converted (because none of the target character sets have an equivalent 
sl@0
   122
character). 
sl@0
   123
@param aArrayOfCharacterSets Array of character conversion data objects, 
sl@0
   124
representing the character sets which comprise a complex character set 
sl@0
   125
encoding. These are used in sequence to convert the Unicode text. There must 
sl@0
   126
be at least one character set in this array and no character set may have any 
sl@0
   127
NULL member data, or a panic occurs. 
sl@0
   128
@return The number of unconverted characters left at the end of the input 
sl@0
   129
descriptor (e.g. because aForeign was not long enough to hold all the text), 
sl@0
   130
or a negative error value, as defined in CCnvCharacterSetConverter::TError. */
sl@0
   131
EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
sl@0
   132
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   133
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
   134
		TDes8& aForeign, 
sl@0
   135
		const TDesC16& aUnicode, 
sl@0
   136
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
sl@0
   137
		const TArray<SCharacterSet>& aArrayOfCharacterSets)
sl@0
   138
	{
sl@0
   139
	TUint notUsed;
sl@0
   140
	return ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, 
sl@0
   141
								aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
   142
								aForeign, 
sl@0
   143
								aUnicode, 
sl@0
   144
								aIndicesOfUnconvertibleCharacters, 
sl@0
   145
								aArrayOfCharacterSets, 
sl@0
   146
								notUsed, 
sl@0
   147
								0);
sl@0
   148
	}
sl@0
   149
sl@0
   150
/** Converts Unicode text into a complex foreign character set encoding. This is 
sl@0
   151
an encoding which cannot be converted simply by a call to 
sl@0
   152
CCnvCharacterSetConverter::DoConvertFromUnicode(). It may be modal (e.g. JIS) 
sl@0
   153
or non-modal (e.g. Shift-JIS).
sl@0
   154
sl@0
   155
The Unicode text specified in aUnicode is converted using the array of conversion 
sl@0
   156
data objects (aArrayOfCharacterSets) provided by the plug-in for the complex 
sl@0
   157
character set encoding and the converted text is returned in aForeign. The 
sl@0
   158
function can either append to aForeign or overwrite its contents (if any).
sl@0
   159
sl@0
   160
Unlike CCnvCharacterSetConverter::DoConvertFromUnicode(), multiple character 
sl@0
   161
sets can be specified. aUnicode is converted using the first character conversion 
sl@0
   162
data object in the array. When a character is found which cannot be converted 
sl@0
   163
using that data, each character set in the array is tried in turn. If it cannot 
sl@0
   164
be converted using any object in the array, the index of the character is 
sl@0
   165
appended to aIndicesOfUnconvertibleCharacters and the character is replaced 
sl@0
   166
by aReplacementForUnconvertibleUnicodeCharacters.
sl@0
   167
sl@0
   168
If it can be converted using another object in the array, that object is used 
sl@0
   169
to convert all subsequent characters until another unconvertible character 
sl@0
   170
is found.
sl@0
   171
sl@0
   172
@param aDefaultEndiannessOfForeignCharacters The default endian-ness to use 
sl@0
   173
when writing the characters in the foreign character set. If an endian-ness 
sl@0
   174
for foreign characters is specified in the current conversion data object, 
sl@0
   175
then that is used instead and the value of 
sl@0
   176
aDefaultEndiannessOfForeignCharacters is ignored. 
sl@0
   177
@param aReplacementForUnconvertibleUnicodeCharacters The single character (one 
sl@0
   178
or more byte values) which is used to replace unconvertible characters. 
sl@0
   179
@param aForeign On return, contains the converted text in the non-Unicode 
sl@0
   180
character set. This may already contain some text. If it does, and if 
sl@0
   181
aInputConversionFlags specifies EInputConversionFlagAppend, then the converted 
sl@0
   182
text is appended to this descriptor. 
sl@0
   183
@param aUnicode The source Unicode text to be converted. 
sl@0
   184
@param aIndicesOfUnconvertibleCharacters On return, holds an ascending array 
sl@0
   185
of the indices of each Unicode character in the source text which could not 
sl@0
   186
be converted (because none of the target character sets have an equivalent 
sl@0
   187
character). 
sl@0
   188
@param aArrayOfCharacterSets Array of character set data objects. These are 
sl@0
   189
used in sequence to convert the Unicode text. There must be at least one 
sl@0
   190
character set in this array and no character set may have any NULL member 
sl@0
   191
data, or a panic occurs. 
sl@0
   192
@param aOutputConversionFlags If the input descriptor ended in a truncated 
sl@0
   193
sequence, e.g. the first half only of a Unicode surrogate pair, this returns 
sl@0
   194
with the EOutputConversionFlagInputIsTruncated flag set. 
sl@0
   195
@param aInputConversionFlags Specify 
sl@0
   196
CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
sl@0
   197
aForeign. Specify CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
sl@0
   198
to prevent the function from returning the error-code EErrorIllFormedInput 
sl@0
   199
when the input descriptor consists of nothing but a truncated sequence. The 
sl@0
   200
CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
sl@0
   201
flag must not be set, otherwise a panic occurs. 
sl@0
   202
@return The number of unconverted characters left at the end of the input descriptor 
sl@0
   203
(e.g. because aForeign was not long enough to hold all the text), or a negative 
sl@0
   204
error value, as defined in CCnvCharacterSetConverter::TError. */
sl@0
   205
EXPORT_C TInt CnvUtilities::ConvertFromUnicode(
sl@0
   206
				CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   207
				const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
   208
				TDes8& aForeign, 
sl@0
   209
				const TDesC16& aUnicode, 
sl@0
   210
				CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
sl@0
   211
				const TArray<SCharacterSet>& aArrayOfCharacterSets, 
sl@0
   212
				TUint& aOutputConversionFlags, 
sl@0
   213
				TUint aInputConversionFlags)
sl@0
   214
	{
sl@0
   215
	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags1));
sl@0
   216
	CheckArrayOfCharacterSets(aArrayOfCharacterSets);
sl@0
   217
	aOutputConversionFlags=0;
sl@0
   218
	TUint internalInputConversionFlags=aInputConversionFlags;
sl@0
   219
	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
sl@0
   220
		{
sl@0
   221
		aForeign.SetLength(0);
sl@0
   222
		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
sl@0
   223
		}
sl@0
   224
	if (aUnicode.Length()==0)
sl@0
   225
		{
sl@0
   226
		return 0;
sl@0
   227
		}
sl@0
   228
	if (aForeign.MaxLength()==aForeign.Length()) // relies on the fact that aForeign's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
sl@0
   229
		{
sl@0
   230
		return aUnicode.Length();
sl@0
   231
		}
sl@0
   232
	TDes8* foreign=&aForeign;
sl@0
   233
	TPtr8 dummyForeign(NULL, 0, 0);
sl@0
   234
	if (aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet)
sl@0
   235
		{
sl@0
   236
		TInt dummyMaximumLength =
sl@0
   237
			aForeign.MaxLength() - aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence->Length();
sl@0
   238
		__ASSERT_ALWAYS(dummyMaximumLength >= 0, 
sl@0
   239
			Panic(EPanicOutputDescriptorTooShortEvenToHoldEscapeSequenceToDefaultCharacterSet));
sl@0
   240
		dummyForeign.Set(const_cast <TUint8*> (aForeign.Ptr()),
sl@0
   241
						 aForeign.Length(), 
sl@0
   242
						 dummyMaximumLength);
sl@0
   243
		foreign=&dummyForeign;
sl@0
   244
		}
sl@0
   245
	const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
sl@0
   246
	TInt numberOfUnicodeElementsConsumed=0;
sl@0
   247
	internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter; // this is not just an optimization - it ensures that "foreign" doesn't get filled up too much each time CCnvCharacterSetConverter::DoConvertFromUnicode is called
sl@0
   248
    TInt previousCharacterSet = aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAssumeStartInDefaultCharacterSet?
sl@0
   249
        KDefaultCharacterSet : KNoPreviousCharacterSet;
sl@0
   250
	FOREVER
sl@0
   251
		{
sl@0
   252
		for (TInt presentCharacterSet=KDefaultCharacterSet;;)
sl@0
   253
			{
sl@0
   254
			__ASSERT_DEBUG(numberOfUnicodeElementsConsumed<=aUnicode.Length(), Panic(EPanicBadNumberOfUnicodeElementsConsumed));
sl@0
   255
			if (numberOfUnicodeElementsConsumed>=aUnicode.Length())
sl@0
   256
				{
sl@0
   257
				goto end;
sl@0
   258
				}
sl@0
   259
			const SCharacterSet& characterSet=aArrayOfCharacterSets[presentCharacterSet];
sl@0
   260
			const TInt oldNumberOfBytesInForeign=foreign->Length();
sl@0
   261
			if (numberOfUnicodeElementsConsumed>0)
sl@0
   262
				{
sl@0
   263
				internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
sl@0
   264
				}
sl@0
   265
			CCnvCharacterSetConverter::TArrayOfAscendingIndices indicesOfUnconvertibleCharacters;
sl@0
   266
			const TInt returnValue=CCnvCharacterSetConverter::DoConvertFromUnicode(*characterSet.iConversionData, aDefaultEndiannessOfForeignCharacters, KNullDesC8, *foreign, aUnicode.Mid(numberOfUnicodeElementsConsumed), indicesOfUnconvertibleCharacters, aOutputConversionFlags, internalInputConversionFlags);
sl@0
   267
			if (returnValue<0)
sl@0
   268
				{
sl@0
   269
				return returnValue; // this is an error-code
sl@0
   270
				}
sl@0
   271
			__ASSERT_DEBUG(foreign->Length()>=oldNumberOfBytesInForeign, Panic(EPanicAppendFlagViolated));
sl@0
   272
			TInt indexOfFirstUnconvertibleCharacter;
sl@0
   273
			if (indicesOfUnconvertibleCharacters.NumberOfIndices()==0)
sl@0
   274
				{
sl@0
   275
				indexOfFirstUnconvertibleCharacter=-1;
sl@0
   276
				numberOfUnicodeElementsConsumed=aUnicode.Length()-returnValue;
sl@0
   277
				}
sl@0
   278
			else
sl@0
   279
				{
sl@0
   280
				indexOfFirstUnconvertibleCharacter=indicesOfUnconvertibleCharacters[0];
sl@0
   281
				numberOfUnicodeElementsConsumed+=indexOfFirstUnconvertibleCharacter;
sl@0
   282
				__ASSERT_DEBUG(numberOfUnicodeElementsConsumed+LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed)==aUnicode.Length()-returnValue, Panic(EPanicBadNumberOfUnicodeCharactersConverted));
sl@0
   283
				}
sl@0
   284
			if (indexOfFirstUnconvertibleCharacter!=0) // if at least one Unicode character at the start of CCnvCharacterSetConverter::DoConvertFromUnicode's input descriptor was convertible...
sl@0
   285
				{
sl@0
   286
				TBool gotoEnd = EFalse;
sl@0
   287
				if (foreign->Length()>oldNumberOfBytesInForeign)
sl@0
   288
					{
sl@0
   289
					TInt numberOfCharactersThatDroppedOut=0;
sl@0
   290
                     // Insert an escape sequence if this character set is different from the last one.
sl@0
   291
                     if (presentCharacterSet  != previousCharacterSet)
sl@0
   292
                        {
sl@0
   293
                        // Insert escape sequence (if requred) in front of the last encoded run of text.
sl@0
   294
                        // Note that this may cause some characters to drop out at the end.
sl@0
   295
                        (*characterSet.iConvertFromIntermediateBufferInPlace)(oldNumberOfBytesInForeign, *foreign, numberOfCharactersThatDroppedOut);
sl@0
   296
                        if (oldNumberOfBytesInForeign < foreign->Length())
sl@0
   297
                			previousCharacterSet = presentCharacterSet;
sl@0
   298
                        }
sl@0
   299
					numberOfUnicodeElementsConsumed-=numberOfCharactersThatDroppedOut;
sl@0
   300
					if (numberOfCharactersThatDroppedOut>0 )// if "foreign" has been filled to as much as it will hold...
sl@0
   301
						{
sl@0
   302
						gotoEnd = ETrue;
sl@0
   303
						}
sl@0
   304
					}
sl@0
   305
				if (indexOfFirstUnconvertibleCharacter<0) // if we've successfully converted up to the end of aUnicode (using *characterSet.iConversionData)...
sl@0
   306
					{
sl@0
   307
					gotoEnd = ETrue;
sl@0
   308
					}
sl@0
   309
				if (gotoEnd)
sl@0
   310
					{
sl@0
   311
					if ( aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagMustEndInDefaultCharacterSet
sl@0
   312
					    && previousCharacterSet != KDefaultCharacterSet
sl@0
   313
					    && previousCharacterSet != KNoPreviousCharacterSet)
sl@0
   314
					    {
sl@0
   315
					    aForeign.SetLength(foreign->Length());
sl@0
   316
    				    aForeign.Append(*aArrayOfCharacterSets[KDefaultCharacterSet].iEscapeSequence);
sl@0
   317
    				    foreign=NULL;
sl@0
   318
					    }
sl@0
   319
					goto end;
sl@0
   320
					}
sl@0
   321
				break;
sl@0
   322
				}
sl@0
   323
			__ASSERT_DEBUG(presentCharacterSet<numberOfCharacterSets, Panic(EPanicLoopCounterOverRun1));
sl@0
   324
			++presentCharacterSet;
sl@0
   325
			if (presentCharacterSet>=numberOfCharacterSets)
sl@0
   326
				{
sl@0
   327
				if ((foreign->MaxLength()-foreign->Length()<aReplacementForUnconvertibleUnicodeCharacters.Length()) ||
sl@0
   328
					(aIndicesOfUnconvertibleCharacters.AppendIndex(numberOfUnicodeElementsConsumed)!=CCnvCharacterSetConverter::TArrayOfAscendingIndices::EAppendSuccessful)) // the tests must be done in this order as AppendIndex must only be called if there is room for aReplacementForUnconvertibleUnicodeCharacters
sl@0
   329
					{
sl@0
   330
					goto end;
sl@0
   331
					}
sl@0
   332
				numberOfUnicodeElementsConsumed+=LengthOfUnicodeCharacter(aUnicode, numberOfUnicodeElementsConsumed);
sl@0
   333
				foreign->Append(aReplacementForUnconvertibleUnicodeCharacters);
sl@0
   334
				break;
sl@0
   335
				}
sl@0
   336
			}
sl@0
   337
		}
sl@0
   338
end:
sl@0
   339
	if (foreign!=NULL)
sl@0
   340
		{
sl@0
   341
		aForeign.SetLength(foreign->Length());
sl@0
   342
		foreign=NULL;
sl@0
   343
		}
sl@0
   344
	if ((numberOfUnicodeElementsConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
sl@0
   345
		{
sl@0
   346
		return CCnvCharacterSetConverter::EErrorIllFormedInput;
sl@0
   347
		}
sl@0
   348
	return aUnicode.Length()-numberOfUnicodeElementsConsumed;
sl@0
   349
	}
sl@0
   350
sl@0
   351
sl@0
   352
/** Inserts an escape sequence into the descriptor.
sl@0
   353
sl@0
   354
This function is provided to help in the implementation of
sl@0
   355
ConvertFromUnicode() for modal character set encodings.
sl@0
   356
Each SCharacterSet object in the array passed to
sl@0
   357
ConvertFromUnicode() must have its
sl@0
   358
iConvertFromIntermediateBufferInPlace member assigned. To
sl@0
   359
do this for a modal character set encoding, implement a function whose
sl@0
   360
signature matches that of FConvertFromIntermediateBufferInPlace 
sl@0
   361
and which calls this function, passing all arguments unchanged, and 
sl@0
   362
specifying the character set's escape sequence and the number of bytes per 
sl@0
   363
character.
sl@0
   364
sl@0
   365
@param aStartPositionInDescriptor The byte position in aDescriptor at which 
sl@0
   366
the escape sequence is inserted. If the character set uses more than one byte 
sl@0
   367
per character, this position must be the start of a character, otherwise a 
sl@0
   368
panic occurs. 
sl@0
   369
@param aDescriptor The descriptor into which the escape sequence is inserted. 
sl@0
   370
@param aNumberOfCharactersThatDroppedOut The escape sequence is inserted into 
sl@0
   371
the start of aDescriptor and any characters that need to drop out to make 
sl@0
   372
room for the escape sequence (because the descriptor's maximum length was 
sl@0
   373
not long enough) drop out from the end of the buffer. This parameter indicates 
sl@0
   374
the number of characters that needed to drop out.
sl@0
   375
@param aEscapeSequence The escape sequence for the character set. 
sl@0
   376
@param aNumberOfBytesPerCharacter The number of bytes per character. */
sl@0
   377
EXPORT_C void CnvUtilities::ConvertFromIntermediateBufferInPlace(
sl@0
   378
					TInt aStartPositionInDescriptor, 
sl@0
   379
					TDes8& aDescriptor, 
sl@0
   380
					TInt& aNumberOfCharactersThatDroppedOut, 
sl@0
   381
					const TDesC8& aEscapeSequence, 
sl@0
   382
					TInt aNumberOfBytesPerCharacter)
sl@0
   383
	{
sl@0
   384
	const TInt lengthOfDescriptor=aDescriptor.Length();
sl@0
   385
	__ASSERT_ALWAYS((lengthOfDescriptor-aStartPositionInDescriptor)%aNumberOfBytesPerCharacter==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters1));
sl@0
   386
	aNumberOfCharactersThatDroppedOut=(Max(0, aEscapeSequence.Length()-(aDescriptor.MaxLength()-lengthOfDescriptor))+(aNumberOfBytesPerCharacter-1))/aNumberOfBytesPerCharacter;
sl@0
   387
	const TInt lengthOfRunInCharacters=(lengthOfDescriptor-aStartPositionInDescriptor)/aNumberOfBytesPerCharacter;
sl@0
   388
	if (aNumberOfCharactersThatDroppedOut>=lengthOfRunInCharacters) // ">=" is correct (rather than ">") as if there's only room for the escape sequence we don't want to have it in the descriptor
sl@0
   389
		{
sl@0
   390
		aNumberOfCharactersThatDroppedOut=lengthOfRunInCharacters;
sl@0
   391
		aDescriptor.SetLength(aStartPositionInDescriptor);
sl@0
   392
		}
sl@0
   393
	else
sl@0
   394
		{
sl@0
   395
		aDescriptor.SetLength(lengthOfDescriptor-(aNumberOfCharactersThatDroppedOut*aNumberOfBytesPerCharacter));
sl@0
   396
		aDescriptor.Insert(aStartPositionInDescriptor, aEscapeSequence);
sl@0
   397
		}
sl@0
   398
	}
sl@0
   399
sl@0
   400
sl@0
   401
/**  Converts text from a modal foreign character set encoding into Unicode.
sl@0
   402
sl@0
   403
The non-Unicode text specified in aForeign is converted using 
sl@0
   404
the array of character set conversion objects (aArrayOfStates) 
sl@0
   405
provided by the plug-in, and the converted text is returned in 
sl@0
   406
aUnicode. The function can either append to aUnicode 
sl@0
   407
or overwrite its contents (if any), depending on the input conversion flags 
sl@0
   408
specified. The first element in aArrayOfStates is taken to be 
sl@0
   409
the default mode (i.e. the mode to assume by default if there is no preceding 
sl@0
   410
escape sequence).
sl@0
   411
 
sl@0
   412
@param aDefaultEndiannessOfForeignCharacters The default endian-ness of the 
sl@0
   413
foreign characters. If an endian-ness for foreign characters is specified 
sl@0
   414
in the conversion data, then that is used instead and the value of 
sl@0
   415
aDefaultEndiannessOfForeignCharacters is ignored. 
sl@0
   416
@param aUnicode On return, contains the text converted into Unicode. 
sl@0
   417
@param aForeign The non-Unicode source text to be converted. 
sl@0
   418
@param aState Used to store a modal character set encoding's current mode across 
sl@0
   419
multiple calls to ConvertToUnicode() on the same input descriptor. This argument 
sl@0
   420
should be passed the same object as passed to the plug-in's ConvertToUnicode() 
sl@0
   421
exported function.
sl@0
   422
@param aNumberOfUnconvertibleCharacters On return, contains the number of 
sl@0
   423
characters in aForeign which were not converted. Characters which cannot be 
sl@0
   424
converted are output as Unicode replacement characters (0xfffd). 
sl@0
   425
@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
sl@0
   426
of the first byte of the first unconvertible character. For instance if the 
sl@0
   427
first character in the input descriptor (aForeign) could not be converted, 
sl@0
   428
then this parameter is set to the first byte of that character, i.e. zero. 
sl@0
   429
A negative value is returned if all the characters were converted. 
sl@0
   430
@param aArrayOfStates Array of character set conversion data objects, and their 
sl@0
   431
escape sequences ("modes"). There must be one or more modes in this array, 
sl@0
   432
none of the modes can have any NULL member data, and each mode's escape sequence 
sl@0
   433
must begin with KControlCharacterEscape (0x1b) or a panic occurs. 
sl@0
   434
@return The number of unconverted bytes left at the end of the input descriptor, 
sl@0
   435
or a negative error value, as defined in TError. */
sl@0
   436
EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
sl@0
   437
					CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   438
					TDes16& aUnicode, 
sl@0
   439
					const TDesC8& aForeign, 
sl@0
   440
					TInt& aState, 
sl@0
   441
					TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   442
					TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
sl@0
   443
					const TArray<SState>& aArrayOfStates)
sl@0
   444
	{
sl@0
   445
	TUint notUsed;
sl@0
   446
	return ConvertToUnicodeFromModalForeign(aDefaultEndiannessOfForeignCharacters, 
sl@0
   447
											aUnicode, 
sl@0
   448
											aForeign, 
sl@0
   449
											aState, 
sl@0
   450
											aNumberOfUnconvertibleCharacters, 
sl@0
   451
											aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
sl@0
   452
											aArrayOfStates, 
sl@0
   453
											notUsed, 
sl@0
   454
											0);
sl@0
   455
	}
sl@0
   456
sl@0
   457
/** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for 
sl@0
   458
the foreign characters. If an endian-ness for foreign characters is specified 
sl@0
   459
in the conversion data, then that is used instead and the value of 
sl@0
   460
aDefaultEndiannessOfForeignCharacters is ignored. 
sl@0
   461
@param aUnicode On return, contains the text converted into Unicode. 
sl@0
   462
@param aForeign The non-Unicode source text to be converted. 
sl@0
   463
@param aState Used to store a modal character set encoding's current mode 
sl@0
   464
across multiple calls to ConvertToUnicode() on the same input descriptor. This 
sl@0
   465
argument should be passed the same object as passed to the plug-in's 
sl@0
   466
ConvertToUnicode() exported function. 
sl@0
   467
@param aNumberOfUnconvertibleCharacters On return, contains the number of 
sl@0
   468
characters in aForeign which were not converted. Characters which cannot be 
sl@0
   469
converted are output as Unicode replacement characters (0xfffd). 
sl@0
   470
@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
sl@0
   471
of the first byte of the first unconvertible character. For instance if the 
sl@0
   472
first character in the input descriptor (aForeign) could not be converted, 
sl@0
   473
then this parameter is set to the first byte of that character, i.e. zero. 
sl@0
   474
A negative value is returned if all the characters were converted. 
sl@0
   475
@param aArrayOfStates Array of character set conversion data objects, and their 
sl@0
   476
escape sequences. There must be one or more modes in this array, none of the 
sl@0
   477
modes can have any NULL member data, and each mode's escape sequence must 
sl@0
   478
begin with KControlCharacterEscape (0x1b) or a panic occurs. 
sl@0
   479
@param aOutputConversionFlags If the input descriptor ended in a truncated 
sl@0
   480
sequence, e.g. a part of a multi-byte character, aOutputConversionFlags 
sl@0
   481
returns with the EOutputConversionFlagInputIsTruncated flag set. 
sl@0
   482
@param aInputConversionFlags Specify 
sl@0
   483
CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
sl@0
   484
aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
sl@0
   485
to prevent the function from returning the error-code EErrorIllFormedInput 
sl@0
   486
when the input descriptor consists of nothing but a truncated sequence. The 
sl@0
   487
CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
sl@0
   488
flag must not be set, otherwise a panic occurs. 
sl@0
   489
@return The number of unconverted bytes left at the end of the input descriptor, 
sl@0
   490
or a negative error value, as defined in TError. */
sl@0
   491
EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromModalForeign(
sl@0
   492
								CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   493
								TDes16& aUnicode, 
sl@0
   494
								const TDesC8& aForeign, 
sl@0
   495
								TInt& aState, 
sl@0
   496
								TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   497
								TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
sl@0
   498
								const TArray<SState>& aArrayOfStates, 
sl@0
   499
								TUint& aOutputConversionFlags, 
sl@0
   500
								TUint aInputConversionFlags)
sl@0
   501
	{
sl@0
   502
	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags2));
sl@0
   503
	CheckArrayOfStates(aArrayOfStates);
sl@0
   504
	aNumberOfUnconvertibleCharacters=0;
sl@0
   505
	aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
sl@0
   506
	aOutputConversionFlags=0;
sl@0
   507
	TUint internalInputConversionFlags=aInputConversionFlags;
sl@0
   508
	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
sl@0
   509
		{
sl@0
   510
		aUnicode.SetLength(0);
sl@0
   511
		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
sl@0
   512
		}
sl@0
   513
	if (aForeign.Length()==0)
sl@0
   514
		{
sl@0
   515
		return 0;
sl@0
   516
		}
sl@0
   517
	if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
sl@0
   518
		{
sl@0
   519
		return aForeign.Length();
sl@0
   520
		}
sl@0
   521
	TPtrC8 remainderOfForeign(aForeign);
sl@0
   522
	TPtrC8 homogeneousRun;
sl@0
   523
	TInt numberOfForeignBytesConsumed=0;
sl@0
   524
	const SCnvConversionData* conversionData = NULL;
sl@0
   525
	const TInt startOfNextEscapeSequence=aForeign.Locate(KControlCharacterEscape);
sl@0
   526
	if (startOfNextEscapeSequence!=0) // if aForeign doesn't start with an escape sequence...
sl@0
   527
		{
sl@0
   528
		conversionData=(aState!=CCnvCharacterSetConverter::KStateDefault)? REINTERPRET_CAST(const SCnvConversionData*, aState): aArrayOfStates[0].iConversionData;
sl@0
   529
		if (startOfNextEscapeSequence==KErrNotFound)
sl@0
   530
			{
sl@0
   531
			homogeneousRun.Set(remainderOfForeign);
sl@0
   532
			remainderOfForeign.Set(NULL, 0);
sl@0
   533
			}
sl@0
   534
		else
sl@0
   535
			{
sl@0
   536
			__ASSERT_DEBUG(startOfNextEscapeSequence>0, Panic(EPanicBadStartOfNextEscapeSequence));
sl@0
   537
			homogeneousRun.Set(remainderOfForeign.Left(startOfNextEscapeSequence));
sl@0
   538
			remainderOfForeign.Set(remainderOfForeign.Mid(startOfNextEscapeSequence));
sl@0
   539
			}
sl@0
   540
		goto handleHomogeneousRun;
sl@0
   541
		}
sl@0
   542
	FOREVER
sl@0
   543
		{
sl@0
   544
		if (!NextHomogeneousForeignRun(conversionData, numberOfForeignBytesConsumed, homogeneousRun, remainderOfForeign, aArrayOfStates, aOutputConversionFlags))
sl@0
   545
			{
sl@0
   546
			goto end;
sl@0
   547
			}
sl@0
   548
handleHomogeneousRun:
sl@0
   549
		if (conversionData==NULL)
sl@0
   550
			{
sl@0
   551
			return CCnvCharacterSetConverter::EErrorIllFormedInput;
sl@0
   552
			}
sl@0
   553
		TInt numberOfUnconvertibleCharacters;
sl@0
   554
		TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
sl@0
   555
		const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*conversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, homogeneousRun, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
sl@0
   556
		if (returnValue<0)
sl@0
   557
			{
sl@0
   558
			return returnValue; // this is an error-code
sl@0
   559
			}
sl@0
   560
		if (numberOfUnconvertibleCharacters>0)
sl@0
   561
			{
sl@0
   562
			if (aNumberOfUnconvertibleCharacters==0)
sl@0
   563
				{
sl@0
   564
				aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
sl@0
   565
				}
sl@0
   566
			aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
sl@0
   567
			}
sl@0
   568
		numberOfForeignBytesConsumed+=homogeneousRun.Length();
sl@0
   569
		if (returnValue>0)
sl@0
   570
			{
sl@0
   571
			numberOfForeignBytesConsumed-=returnValue;
sl@0
   572
			goto end;
sl@0
   573
			}
sl@0
   574
		if (numberOfForeignBytesConsumed>0)
sl@0
   575
			{
sl@0
   576
			internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
sl@0
   577
			}
sl@0
   578
		__ASSERT_DEBUG(remainderOfForeign==aForeign.Mid(numberOfForeignBytesConsumed), Panic(EPanicInconsistentNumberOfForeignBytesRemaining));
sl@0
   579
		}
sl@0
   580
end:
sl@0
   581
	if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
sl@0
   582
		{
sl@0
   583
		return CCnvCharacterSetConverter::EErrorIllFormedInput;
sl@0
   584
		}
sl@0
   585
	aState=REINTERPRET_CAST(TInt, conversionData);
sl@0
   586
	return aForeign.Length()-numberOfForeignBytesConsumed;
sl@0
   587
	}
sl@0
   588
sl@0
   589
sl@0
   590
/**  Converts text from a non-modal complex character set encoding (e.g. 
sl@0
   591
Shift-JIS or EUC-JP) into Unicode.The non-Unicode text specified in
sl@0
   592
aForeign is converted using the array of character set
sl@0
   593
conversion methods (aArrayOfMethods) provided by the
sl@0
   594
plug-in, and the converted text is returned in aUnicode.
sl@0
   595
Overwrites the contents, if any, of aUnicode.
sl@0
   596
sl@0
   597
@param aDefaultEndiannessOfForeignCharacters The default endian-ness of the 
sl@0
   598
foreign characters. If an endian-ness for foreign characters is specified 
sl@0
   599
in the conversion data, then that is used instead and the value of 
sl@0
   600
aDefaultEndiannessOfForeignCharacters is ignored. 
sl@0
   601
@param aUnicode On return, contains the text converted into Unicode. 
sl@0
   602
@param aForeign The non-Unicode source text to be converted. 
sl@0
   603
@param aNumberOfUnconvertibleCharacters On return, contains the number of 
sl@0
   604
characters in aForeign which were not converted. Characters which cannot be 
sl@0
   605
converted are output as Unicode replacement characters (0xfffd). 
sl@0
   606
@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
sl@0
   607
of the first byte of the first unconvertible character. For instance if the 
sl@0
   608
first character in the input descriptor (aForeign) could not be converted, 
sl@0
   609
then this parameter is set to the first byte of that character, i.e. zero. 
sl@0
   610
A negative value is returned if all the characters were converted. 
sl@0
   611
@param aArrayOfMethods Array of conversion methods. There must be one or more 
sl@0
   612
methods in this array and none of the methods in the array can have any NULL 
sl@0
   613
member data or a panic occurs. 
sl@0
   614
@return The number of unconverted bytes left at the end of the input descriptor, 
sl@0
   615
or a negative error value, as defined in TError. */
sl@0
   616
EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
sl@0
   617
					CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   618
					TDes16& aUnicode, 
sl@0
   619
					const TDesC8& aForeign, 
sl@0
   620
					TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   621
					TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
sl@0
   622
					const TArray<SMethod>& aArrayOfMethods)
sl@0
   623
	{
sl@0
   624
	TUint notUsed;
sl@0
   625
	return ConvertToUnicodeFromHeterogeneousForeign(
sl@0
   626
				aDefaultEndiannessOfForeignCharacters, 
sl@0
   627
				aUnicode, 
sl@0
   628
				aForeign, 
sl@0
   629
				aNumberOfUnconvertibleCharacters, 
sl@0
   630
				aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
sl@0
   631
				aArrayOfMethods, 
sl@0
   632
				notUsed, 
sl@0
   633
				0);
sl@0
   634
	}
sl@0
   635
sl@0
   636
/** @param aDefaultEndiannessOfForeignCharacters The default endian-ness for the 
sl@0
   637
foreign characters. If an endian-ness for foreign characters is specified 
sl@0
   638
in the conversion data, then that is used instead and the value of 
sl@0
   639
aDefaultEndiannessOfForeignCharacters is ignored. 
sl@0
   640
@param aUnicode On return, contains the text converted into Unicode. 
sl@0
   641
@param aForeign The non-Unicode source text to be converted. 
sl@0
   642
@param aNumberOfUnconvertibleCharacters On return, contains the number of 
sl@0
   643
characters in aForeign which were not converted. Characters which cannot be 
sl@0
   644
converted are output as Unicode replacement characters (0xfffd). 
sl@0
   645
@param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
sl@0
   646
of the first byte of the first unconvertible character. For instance if the 
sl@0
   647
first character in the input descriptor (aForeign) could not be converted, 
sl@0
   648
then this parameter is set to the first byte of that character, i.e. zero. 
sl@0
   649
A negative value is returned if all the characters were converted. 
sl@0
   650
@param aArrayOfMethods Array of conversion methods. There must be one or more 
sl@0
   651
methods in this array and none of the methods in the array can have any NULL 
sl@0
   652
member data or a panic occurs. 
sl@0
   653
@param aOutputConversionFlags If the input descriptor ended in a truncated 
sl@0
   654
sequence, e.g. a part of a multi-byte character, aOutputConversionFlags 
sl@0
   655
returns with the EOutputConversionFlagInputIsTruncated flag set. 
sl@0
   656
@param aInputConversionFlags Specify 
sl@0
   657
CCnvCharacterSetConverter::EInputConversionFlagAppend to append the text to 
sl@0
   658
aUnicode. Specify EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable 
sl@0
   659
to prevent the function from returning the error-code EErrorIllFormedInput 
sl@0
   660
when the input descriptor consists of nothing but a truncated sequence. The 
sl@0
   661
CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter 
sl@0
   662
flag must not be set, otherwise a panic occurs. 
sl@0
   663
@return The number of unconverted bytes left at the end of the input descriptor, 
sl@0
   664
or a negative error value, as defined in TError. */
sl@0
   665
EXPORT_C TInt CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(
sl@0
   666
						CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   667
						TDes16& aUnicode, 
sl@0
   668
						const TDesC8& aForeign, 
sl@0
   669
						TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   670
						TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
sl@0
   671
						const TArray<SMethod>& aArrayOfMethods, 
sl@0
   672
						TUint& aOutputConversionFlags, 
sl@0
   673
						TUint aInputConversionFlags)
sl@0
   674
	{
sl@0
   675
	__ASSERT_ALWAYS(~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter, Panic(EPanicBadInputConversionFlags3));
sl@0
   676
	CheckArrayOfMethods(aArrayOfMethods);
sl@0
   677
	aNumberOfUnconvertibleCharacters=0;
sl@0
   678
	aIndexOfFirstByteOfFirstUnconvertibleCharacter=-1;
sl@0
   679
	aOutputConversionFlags=0;
sl@0
   680
	TUint internalInputConversionFlags=aInputConversionFlags;
sl@0
   681
	if (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend)
sl@0
   682
		{
sl@0
   683
		aUnicode.SetLength(0);
sl@0
   684
		internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAppend;
sl@0
   685
		}
sl@0
   686
	if (aForeign.Length()==0)
sl@0
   687
		{
sl@0
   688
		return 0;
sl@0
   689
		}
sl@0
   690
	if (aUnicode.MaxLength()==aUnicode.Length()) // relies on the fact that aUnicode's length has been set to zero if aInputConversionFlags does not have CCnvCharacterSetConverter::EInputConversionFlagAppend set
sl@0
   691
		{
sl@0
   692
		return aForeign.Length();
sl@0
   693
		}
sl@0
   694
	const TInt numberOfMethods=aArrayOfMethods.Count();
sl@0
   695
	TPtrC8 remainderOfForeign(aForeign);
sl@0
   696
	TInt numberOfForeignBytesConsumed=0;
sl@0
   697
	FOREVER
sl@0
   698
		{
sl@0
   699
		TInt lengthOfRunToConvert=0;
sl@0
   700
		const SMethod* method=NULL;
sl@0
   701
		for (TInt i=0;;)
sl@0
   702
			{
sl@0
   703
			method=&aArrayOfMethods[i];
sl@0
   704
			__ASSERT_DEBUG(method!=NULL, Panic(EPanicBadMethodPointer));
sl@0
   705
			lengthOfRunToConvert=(*method->iNumberOfBytesAbleToConvert)(remainderOfForeign);
sl@0
   706
			if (lengthOfRunToConvert<0)
sl@0
   707
				{
sl@0
   708
				return lengthOfRunToConvert; // this is an error-code
sl@0
   709
				}
sl@0
   710
			if (lengthOfRunToConvert>0)
sl@0
   711
				{
sl@0
   712
				break;
sl@0
   713
				}
sl@0
   714
			__ASSERT_DEBUG(i<numberOfMethods, Panic(EPanicLoopCounterOverRun2));
sl@0
   715
			++i;
sl@0
   716
			if (i>=numberOfMethods)
sl@0
   717
				{
sl@0
   718
				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
sl@0
   719
				goto end;
sl@0
   720
				}
sl@0
   721
			}
sl@0
   722
		TBuf8<KMaximumLengthOfIntermediateBuffer> intermediateBuffer;
sl@0
   723
		const TInt maximumUsableLengthOfIntermediateBuffer=ReduceToNearestMultipleOf(KMaximumLengthOfIntermediateBuffer, method->iNumberOfBytesPerCharacter);
sl@0
   724
		FOREVER
sl@0
   725
			{
sl@0
   726
			const TInt numberOfForeignBytesConsumedThisTime=Min(lengthOfRunToConvert, maximumUsableLengthOfIntermediateBuffer);
sl@0
   727
			intermediateBuffer=remainderOfForeign.Left(numberOfForeignBytesConsumedThisTime);
sl@0
   728
			__ASSERT_DEBUG((numberOfForeignBytesConsumedThisTime%method->iNumberOfBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters2));
sl@0
   729
			(*method->iConvertToIntermediateBufferInPlace)(intermediateBuffer);
sl@0
   730
			__ASSERT_DEBUG((intermediateBuffer.Length()%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters3));
sl@0
   731
			__ASSERT_DEBUG((intermediateBuffer.Length()/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter==numberOfForeignBytesConsumedThisTime, Panic(EPanicBadMethodData1));
sl@0
   732
			TInt numberOfUnconvertibleCharacters;
sl@0
   733
			TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
sl@0
   734
			const TInt returnValue=CCnvCharacterSetConverter::DoConvertToUnicode(*method->iConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, intermediateBuffer, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, internalInputConversionFlags);
sl@0
   735
			if (returnValue<0)
sl@0
   736
				{
sl@0
   737
				return returnValue; // this is an error-code
sl@0
   738
				}
sl@0
   739
			if (numberOfUnconvertibleCharacters>0)
sl@0
   740
				{
sl@0
   741
				if (aNumberOfUnconvertibleCharacters==0)
sl@0
   742
					{
sl@0
   743
					aIndexOfFirstByteOfFirstUnconvertibleCharacter=numberOfForeignBytesConsumed+indexOfFirstByteOfFirstUnconvertibleCharacter;
sl@0
   744
					}
sl@0
   745
				aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
sl@0
   746
				}
sl@0
   747
			numberOfForeignBytesConsumed+=numberOfForeignBytesConsumedThisTime;
sl@0
   748
			if (returnValue>0)
sl@0
   749
				{
sl@0
   750
				__ASSERT_DEBUG((returnValue%method->iNumberOfCoreBytesPerCharacter)==0, Panic(EPanicDescriptorNotWholeNumberOfCharacters4));
sl@0
   751
				numberOfForeignBytesConsumed-=(returnValue/method->iNumberOfCoreBytesPerCharacter)*method->iNumberOfBytesPerCharacter;
sl@0
   752
				goto end;
sl@0
   753
				}
sl@0
   754
			if (numberOfForeignBytesConsumed>0)
sl@0
   755
				{
sl@0
   756
				internalInputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
sl@0
   757
				}
sl@0
   758
			remainderOfForeign.Set(aForeign.Mid(numberOfForeignBytesConsumed));
sl@0
   759
			lengthOfRunToConvert-=numberOfForeignBytesConsumedThisTime;
sl@0
   760
			__ASSERT_DEBUG(lengthOfRunToConvert>=0, Panic(EPanicBadLengthOfRunToConvert2));
sl@0
   761
			if (lengthOfRunToConvert<=0)
sl@0
   762
				{
sl@0
   763
				break;
sl@0
   764
				}
sl@0
   765
			}
sl@0
   766
		}
sl@0
   767
end:
sl@0
   768
	if ((numberOfForeignBytesConsumed==0) && (aOutputConversionFlags&CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated) && (~aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable))
sl@0
   769
		{
sl@0
   770
		return CCnvCharacterSetConverter::EErrorIllFormedInput;
sl@0
   771
		}
sl@0
   772
	return aForeign.Length()-numberOfForeignBytesConsumed;
sl@0
   773
	}
sl@0
   774
sl@0
   775
void CnvUtilities::CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets)
sl@0
   776
	{
sl@0
   777
	const TInt numberOfCharacterSets=aArrayOfCharacterSets.Count();
sl@0
   778
	__ASSERT_ALWAYS(numberOfCharacterSets>0, Panic(EPanicBadNumberOfCharacterSets));
sl@0
   779
	for (TInt i=0; i<numberOfCharacterSets; ++i)
sl@0
   780
		{
sl@0
   781
		const SCharacterSet& characterSet=aArrayOfCharacterSets[i];
sl@0
   782
		__ASSERT_ALWAYS(characterSet.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer1));
sl@0
   783
		__ASSERT_ALWAYS(characterSet.iConvertFromIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer1));
sl@0
   784
		__ASSERT_ALWAYS(characterSet.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer1));
sl@0
   785
		}
sl@0
   786
	}
sl@0
   787
sl@0
   788
void CnvUtilities::CheckArrayOfStates(const TArray<SState>& aArrayOfStates)
sl@0
   789
	{
sl@0
   790
	const TInt numberOfStates=aArrayOfStates.Count();
sl@0
   791
	__ASSERT_ALWAYS(numberOfStates>0, Panic(EPanicBadNumberOfStates));
sl@0
   792
	for (TInt i=0; i<numberOfStates; ++i)
sl@0
   793
		{
sl@0
   794
		const SState& state=aArrayOfStates[i];
sl@0
   795
		__ASSERT_ALWAYS(state.iEscapeSequence!=NULL, Panic(EPanicBadEscapeSequencePointer2));
sl@0
   796
		__ASSERT_ALWAYS((*state.iEscapeSequence)[0]==KControlCharacterEscape, Panic(EPanicBadEscapeSequenceStart));
sl@0
   797
		__ASSERT_ALWAYS(state.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer2));
sl@0
   798
		}
sl@0
   799
	}
sl@0
   800
sl@0
   801
void CnvUtilities::CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods)
sl@0
   802
	{
sl@0
   803
	const TInt numberOfMethods=aArrayOfMethods.Count();
sl@0
   804
	__ASSERT_ALWAYS(numberOfMethods>0, Panic(EPanicBadNumberOfMethods));
sl@0
   805
	for (TInt i=0; i<numberOfMethods; ++i)
sl@0
   806
		{
sl@0
   807
		const SMethod& method=aArrayOfMethods[i];
sl@0
   808
		__ASSERT_ALWAYS(method.iNumberOfBytesAbleToConvert!=NULL, Panic(EPanicBadFunctionPointer2));
sl@0
   809
		__ASSERT_ALWAYS(method.iConvertToIntermediateBufferInPlace!=NULL, Panic(EPanicBadFunctionPointer3));
sl@0
   810
		__ASSERT_ALWAYS(method.iConversionData!=NULL, Panic(EPanicBadConversionDataPointer3));
sl@0
   811
		__ASSERT_ALWAYS(method.iNumberOfBytesPerCharacter>0, Panic(EPanicBadMethodData2));
sl@0
   812
		__ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter>0, Panic(EPanicBadMethodData3));
sl@0
   813
		__ASSERT_ALWAYS(method.iNumberOfCoreBytesPerCharacter<=method.iNumberOfBytesPerCharacter, Panic(EPanicBadMethodData4));
sl@0
   814
		}
sl@0
   815
	}
sl@0
   816
sl@0
   817
TInt CnvUtilities::LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex)
sl@0
   818
	{
sl@0
   819
	const TUint unicodeCharacter=aUnicode[aIndex];
sl@0
   820
	if ((unicodeCharacter>=0xd800) && (unicodeCharacter<=0xdbff)) // if the unicode character is the first half of a surrogate-pair...
sl@0
   821
		{
sl@0
   822
		__ASSERT_DEBUG(aIndex+1<aUnicode.Length(), Panic(EPanicBadSurrogatePair1));
sl@0
   823
#if defined(_DEBUG)
sl@0
   824
		const TUint secondHalfOfSurrogatePair=aUnicode[aIndex+1];
sl@0
   825
#endif
sl@0
   826
		__ASSERT_DEBUG((secondHalfOfSurrogatePair>=0xdc00) && (secondHalfOfSurrogatePair<=0xdfff), Panic(EPanicBadSurrogatePair2)); // this can be asserted as CCnvCharacterSetConverter::DoConvertFromUnicode should have returned an error value if this was a bad surrogate pair
sl@0
   827
		return 2;
sl@0
   828
		}
sl@0
   829
	return 1;
sl@0
   830
	}
sl@0
   831
sl@0
   832
TBool CnvUtilities::NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags)
sl@0
   833
	{
sl@0
   834
	__ASSERT_DEBUG((aRemainderOfForeign.Length()==0) || (aRemainderOfForeign[0]==KControlCharacterEscape), Panic(EPanicBadRemainderOfForeign));
sl@0
   835
	FOREVER
sl@0
   836
		{
sl@0
   837
		if (aRemainderOfForeign.Length()==0)
sl@0
   838
			{
sl@0
   839
			return EFalse;
sl@0
   840
			}
sl@0
   841
		const TInt numberOfStates=aArrayOfStates.Count();
sl@0
   842
		TInt i;
sl@0
   843
		for (i=0; i<numberOfStates; ++i)
sl@0
   844
			{
sl@0
   845
			const SState& state=aArrayOfStates[i];
sl@0
   846
			if (MatchesEscapeSequence(aNumberOfForeignBytesConsumed, aHomogeneousRun, aRemainderOfForeign, *state.iEscapeSequence))
sl@0
   847
				{
sl@0
   848
				aConversionData=state.iConversionData;
sl@0
   849
				goto foundState;
sl@0
   850
				}
sl@0
   851
			}
sl@0
   852
		for (i=0; i<numberOfStates; ++i)
sl@0
   853
			{
sl@0
   854
			if (IsStartOf(aRemainderOfForeign, *aArrayOfStates[i].iEscapeSequence))
sl@0
   855
				{
sl@0
   856
				// aRemainderOfForeign ends with a truncated escape sequence, so ConvertToUnicode cannot convert any more
sl@0
   857
				aOutputConversionFlags|=CCnvCharacterSetConverter::EOutputConversionFlagInputIsTruncated;
sl@0
   858
				return EFalse;
sl@0
   859
				}
sl@0
   860
			}
sl@0
   861
		// force ConvertToUnicode to return CCnvCharacterSetConverter::EErrorIllFormedInput
sl@0
   862
		aConversionData=NULL;
sl@0
   863
		return ETrue;
sl@0
   864
foundState:
sl@0
   865
		if (aHomogeneousRun.Length()>0)
sl@0
   866
			{
sl@0
   867
			return ETrue;
sl@0
   868
			}
sl@0
   869
		}
sl@0
   870
	}
sl@0
   871
sl@0
   872
TBool CnvUtilities::MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence)
sl@0
   873
	{
sl@0
   874
	const TInt lengthOfEscapeSequence=aEscapeSequence.Length();
sl@0
   875
	if (IsStartOf(aEscapeSequence, aRemainderOfForeign))
sl@0
   876
		{
sl@0
   877
		aRemainderOfForeign.Set(aRemainderOfForeign.Mid(lengthOfEscapeSequence));
sl@0
   878
		const TInt startOfNextEscapeSequence=aRemainderOfForeign.Locate(KControlCharacterEscape);
sl@0
   879
		if (startOfNextEscapeSequence==KErrNotFound)
sl@0
   880
			{
sl@0
   881
			aHomogeneousRun.Set(aRemainderOfForeign);
sl@0
   882
			aRemainderOfForeign.Set(NULL, 0);
sl@0
   883
			}
sl@0
   884
		else
sl@0
   885
			{
sl@0
   886
			aHomogeneousRun.Set(aRemainderOfForeign.Left(startOfNextEscapeSequence));
sl@0
   887
			aRemainderOfForeign.Set(aRemainderOfForeign.Mid(startOfNextEscapeSequence));
sl@0
   888
			}
sl@0
   889
		aNumberOfForeignBytesConsumed+=lengthOfEscapeSequence;
sl@0
   890
		return ETrue;
sl@0
   891
		}
sl@0
   892
	return EFalse;
sl@0
   893
	}
sl@0
   894
sl@0
   895
TBool CnvUtilities::IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor)
sl@0
   896
	{
sl@0
   897
	const TInt lengthOfStart=aStart.Length();
sl@0
   898
	return (aPotentiallyLongerDescriptor.Length()>=lengthOfStart) && (aPotentiallyLongerDescriptor.Left(lengthOfStart)==aStart);
sl@0
   899
	}
sl@0
   900