epoc32/include/charconv.h
author William Roberts <williamr@symbian.org>
Tue, 16 Mar 2010 16:12:26 +0000
branchSymbian2
changeset 2 2fe1408b6811
parent 0 061f57f2323e
child 4 837f303aceeb
permissions -rw-r--r--
Final list of Symbian^2 public API header files
williamr@2
     1
// Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
williamr@2
     2
// All rights reserved.
williamr@2
     3
// This component and the accompanying materials are made available
williamr@2
     4
// under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members
williamr@2
     5
// which accompanies this distribution, and is available
williamr@2
     6
// at the URL "http://www.symbianfoundation.org/legal/licencesv10.html".
williamr@2
     7
//
williamr@2
     8
// Initial Contributors:
williamr@2
     9
// Nokia Corporation - initial contribution.
williamr@2
    10
//
williamr@2
    11
// Contributors:
williamr@2
    12
//
williamr@2
    13
// Description:
williamr@2
    14
//
williamr@2
    15
williamr@2
    16
#if !defined(__CHARCONV_H__)
williamr@2
    17
#define __CHARCONV_H__
williamr@2
    18
williamr@2
    19
#if !defined(__E32STD_H__)
williamr@2
    20
#include <e32std.h>
williamr@2
    21
#endif
williamr@2
    22
williamr@2
    23
#if !defined(__E32BASE_H__)
williamr@2
    24
#include <e32base.h>
williamr@2
    25
#endif
williamr@2
    26
williamr@2
    27
/** 
williamr@2
    28
The maximum length in bytes of the replacement text for unconvertible Unicode 
williamr@2
    29
characters (=50) (see CCnvCharacterSetConverter::SetReplacementForUnconvertibleUnicodeCharactersL()). 
williamr@2
    30
@publishedAll
williamr@2
    31
@released
williamr@2
    32
*/
williamr@2
    33
const TInt KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters=50;
williamr@2
    34
williamr@2
    35
/** 
williamr@2
    36
UTF-7 
williamr@2
    37
@publishedAll
williamr@2
    38
@released
williamr@2
    39
*/
williamr@2
    40
const TUint KCharacterSetIdentifierUtf7=0x1000582c;
williamr@2
    41
/** 
williamr@2
    42
UTF-8 
williamr@2
    43
@publishedAll
williamr@2
    44
@released
williamr@2
    45
*/
williamr@2
    46
const TUint KCharacterSetIdentifierUtf8=0x1000582d;
williamr@2
    47
/** 
williamr@2
    48
IMAP UTF-7 
williamr@2
    49
@publishedAll
williamr@2
    50
@released
williamr@2
    51
*/
williamr@2
    52
const TUint KCharacterSetIdentifierImapUtf7=0x1000582e;
williamr@2
    53
/** 
williamr@2
    54
Java UTF-8 
williamr@2
    55
@publishedAll
williamr@2
    56
@released
williamr@2
    57
*/
williamr@2
    58
const TUint KCharacterSetIdentifierJavaConformantUtf8=0x1000582f;
williamr@2
    59
/** 
williamr@2
    60
Code Page 1252 
williamr@2
    61
@publishedAll
williamr@2
    62
@released
williamr@2
    63
*/
williamr@2
    64
const TUint KCharacterSetIdentifierCodePage1252=0x100012b6;
williamr@2
    65
/** 
williamr@2
    66
ISO 8859-1 
williamr@2
    67
@publishedAll
williamr@2
    68
@released
williamr@2
    69
*/
williamr@2
    70
const TUint KCharacterSetIdentifierIso88591=0x10003b10;
williamr@2
    71
/** 
williamr@2
    72
ISO 8859-2 
williamr@2
    73
@publishedAll
williamr@2
    74
@released
williamr@2
    75
*/
williamr@2
    76
const TUint KCharacterSetIdentifierIso88592=0x1000507e;
williamr@2
    77
/** 
williamr@2
    78
ISO 8859-3 
williamr@2
    79
@publishedAll
williamr@2
    80
@released
williamr@2
    81
*/
williamr@2
    82
const TUint KCharacterSetIdentifierIso88593=0x10008a28;
williamr@2
    83
/** 
williamr@2
    84
ISO 8859-4 
williamr@2
    85
@publishedAll
williamr@2
    86
@released
williamr@2
    87
*/
williamr@2
    88
const TUint KCharacterSetIdentifierIso88594=0x1000507f;
williamr@2
    89
/** 
williamr@2
    90
ISO 8859-5 
williamr@2
    91
@publishedAll
williamr@2
    92
@released
williamr@2
    93
*/
williamr@2
    94
const TUint KCharacterSetIdentifierIso88595=0x10005080;
williamr@2
    95
/** 
williamr@2
    96
ISO 8859-6 
williamr@2
    97
@publishedAll
williamr@2
    98
@released
williamr@2
    99
*/
williamr@2
   100
const TUint KCharacterSetIdentifierIso88596=0x10008a29;
williamr@2
   101
/** 
williamr@2
   102
ISO 8859-7 
williamr@2
   103
@publishedAll
williamr@2
   104
@released
williamr@2
   105
*/
williamr@2
   106
const TUint KCharacterSetIdentifierIso88597=0x10005081;
williamr@2
   107
/** 
williamr@2
   108
ISO 8859-8 
williamr@2
   109
@publishedAll
williamr@2
   110
@released
williamr@2
   111
*/
williamr@2
   112
const TUint KCharacterSetIdentifierIso88598=0x10008a2a;
williamr@2
   113
/** 
williamr@2
   114
ISO 8859-9 
williamr@2
   115
@publishedAll
williamr@2
   116
@released
williamr@2
   117
*/
williamr@2
   118
const TUint KCharacterSetIdentifierIso88599=0x10005082;
williamr@2
   119
/** 
williamr@2
   120
ISO 8859-10 
williamr@2
   121
@publishedAll
williamr@2
   122
@released
williamr@2
   123
*/
williamr@2
   124
const TUint KCharacterSetIdentifierIso885910=0x10008a2b;
williamr@2
   125
/** 
williamr@2
   126
ISO 8859-13 
williamr@2
   127
@publishedAll
williamr@2
   128
@released
williamr@2
   129
*/
williamr@2
   130
const TUint KCharacterSetIdentifierIso885913=0x10008a2c;
williamr@2
   131
/** 
williamr@2
   132
ISO 8859-14 
williamr@2
   133
@publishedAll
williamr@2
   134
@released
williamr@2
   135
*/
williamr@2
   136
const TUint KCharacterSetIdentifierIso885914=0x10008a2d;
williamr@2
   137
/** 
williamr@2
   138
ISO 8859-15 
williamr@2
   139
@publishedAll
williamr@2
   140
@released
williamr@2
   141
*/
williamr@2
   142
const TUint KCharacterSetIdentifierIso885915=0x10008a2e;
williamr@2
   143
/** 
williamr@2
   144
ASCII 
williamr@2
   145
@publishedAll
williamr@2
   146
@released
williamr@2
   147
*/
williamr@2
   148
const TUint KCharacterSetIdentifierAscii=0x10004cc6;
williamr@2
   149
/** 
williamr@2
   150
SMS 7-bit 
williamr@2
   151
@publishedAll
williamr@2
   152
@released
williamr@2
   153
*/
williamr@2
   154
const TUint KCharacterSetIdentifierSms7Bit=0x100053ab;
williamr@2
   155
/** 
williamr@2
   156
GB 2312 
williamr@2
   157
@publishedAll
williamr@2
   158
@released
williamr@2
   159
*/
williamr@2
   160
const TUint KCharacterSetIdentifierGb2312=0x10000fbe;
williamr@2
   161
/** 
williamr@2
   162
HZ-GB-2312 
williamr@2
   163
@publishedAll
williamr@2
   164
@released
williamr@2
   165
*/
williamr@2
   166
const TUint KCharacterSetIdentifierHz=0x10006065;
williamr@2
   167
/** 
williamr@2
   168
GB 12345 
williamr@2
   169
@publishedAll
williamr@2
   170
@released
williamr@2
   171
*/
williamr@2
   172
const TUint KCharacterSetIdentifierGb12345=0x1000401a;
williamr@2
   173
/** 
williamr@2
   174
GBK 
williamr@2
   175
@publishedAll
williamr@2
   176
@released
williamr@2
   177
*/
williamr@2
   178
const TUint KCharacterSetIdentifierGbk=0x10003ecb;
williamr@2
   179
/** 
williamr@2
   180
Big 5 
williamr@2
   181
@publishedAll
williamr@2
   182
@released
williamr@2
   183
*/
williamr@2
   184
const TUint KCharacterSetIdentifierBig5=0x10000fbf;
williamr@2
   185
/** 
williamr@2
   186
Shift-JIS 
williamr@2
   187
@publishedAll
williamr@2
   188
@released
williamr@2
   189
*/
williamr@2
   190
const TUint KCharacterSetIdentifierShiftJis=0x10000fbd;
williamr@2
   191
/** 
williamr@2
   192
ISO-2022-JP 
williamr@2
   193
@publishedAll
williamr@2
   194
@released
williamr@2
   195
*/
williamr@2
   196
const TUint KCharacterSetIdentifierIso2022Jp=0x100066a0;
williamr@2
   197
/** 
williamr@2
   198
ISO-2022-JP-1 
williamr@2
   199
@publishedAll
williamr@2
   200
@released
williamr@2
   201
*/
williamr@2
   202
const TUint KCharacterSetIdentifierIso2022Jp1=0x100066a3;
williamr@2
   203
/** 
williamr@2
   204
JIS Encoding 
williamr@2
   205
@publishedAll
williamr@2
   206
@released
williamr@2
   207
*/
williamr@2
   208
const TUint KCharacterSetIdentifierJis=0x10006066;
williamr@2
   209
/** 
williamr@2
   210
EUC-JP 
williamr@2
   211
@publishedAll
williamr@2
   212
@released
williamr@2
   213
*/
williamr@2
   214
const TUint KCharacterSetIdentifierEucJpPacked=0x10006067;
williamr@2
   215
williamr@2
   216
/** 
williamr@2
   217
JP5 
williamr@2
   218
@publishedAll
williamr@2
   219
@released
williamr@2
   220
*/
williamr@2
   221
const TUint KCharacterSetIdentifierJ5=0x1020D408;
williamr@2
   222
/** 
williamr@2
   223
CP850 
williamr@2
   224
@publishedAll
williamr@2
   225
@released
williamr@2
   226
*/
williamr@2
   227
const TUint KCharacterSetIdentifierCP850=0x102825AD;
williamr@2
   228
williamr@2
   229
const TUint KCharacterSetIdentifierUnicodeLittle=0x101f3fae;  //Little Endian Unicode
williamr@2
   230
const TUint KCharacterSetIdentifierUnicodeBig=0x101f4052; // Big Endian Unicode 
williamr@2
   231
const TUint KCharacterSetIdentifierUcs2=0x101ff492;
williamr@2
   232
williamr@2
   233
/** 
williamr@2
   234
Extended SMS 7-bit (not supported before v9.5) 
williamr@2
   235
@publishedAll
williamr@2
   236
@released
williamr@2
   237
*/
williamr@2
   238
const TUint KCharacterSetIdentifierExtendedSms7Bit=0x102863FD;
williamr@2
   239
williamr@2
   240
/** 
williamr@2
   241
Turkish 
williamr@2
   242
@publishedAll
williamr@2
   243
@released
williamr@2
   244
*/
williamr@2
   245
const TUint KCharacterSetIdentifierTurkishSingleSms7Bit=0x102863FE;
williamr@2
   246
const TUint KCharacterSetIdentifierTurkishLockingSms7Bit=0x102863FF;
williamr@2
   247
const TUint KCharacterSetIdentifierTurkishLockingAndSingleSms7Bit=0x10286400;
williamr@2
   248
williamr@2
   249
/** 
williamr@2
   250
Portuguese 
williamr@2
   251
@publishedAll
williamr@2
   252
@released
williamr@2
   253
*/
williamr@2
   254
const TUint KCharacterSetIdentifierPortugueseSingleSms7Bit=0x10286407;
williamr@2
   255
const TUint KCharacterSetIdentifierPortugueseLockingSms7Bit=0x10286408;
williamr@2
   256
const TUint KCharacterSetIdentifierPortugueseLockingAndSingleSms7Bit=0x10286409;
williamr@2
   257
williamr@2
   258
/** 
williamr@2
   259
Spanish
williamr@2
   260
@publishedAll
williamr@2
   261
@released
williamr@2
   262
*/
williamr@2
   263
const TUint KCharacterSetIdentifierSpanishSingleSms7Bit=0x1028640A;
williamr@2
   264
 
williamr@2
   265
// note that other character sets than those listed above may be available at run-time, and also that none of the above are necessarily available at run-time
williamr@2
   266
williamr@2
   267
struct SCnvConversionData;
williamr@2
   268
class CDeepDestructingArrayOfCharactersSets;
williamr@2
   269
class CFileReader;
williamr@2
   270
class CStandardNamesAndMibEnums;
williamr@2
   271
class RFs;
williamr@2
   272
class CCharsetCnvCache;
williamr@2
   273
/** 
williamr@2
   274
Converts text between Unicode and other character sets. 
williamr@2
   275
williamr@2
   276
The first stage of the conversion is to specify the non-Unicode character 
williamr@2
   277
set being converted to or from. This is done by calling one of the overloads 
williamr@2
   278
of PrepareToConvertToOrFromL().
williamr@2
   279
williamr@2
   280
The second stage is to convert the text, using one of the overloads of 
williamr@2
   281
ConvertFromUnicode() or ConvertToUnicode().
williamr@2
   282
williamr@2
   283
Where possible the first documented overload of PrepareToConvertToOrFromL() 
williamr@2
   284
should be used because the second overload panics if the specified character 
williamr@2
   285
set is not available: the first overload simply returns whether the character 
williamr@2
   286
set is available or not available. However if the conversions are to be 
williamr@2
   287
performed often, or if the user must select the character set for the 
williamr@2
   288
conversion from a list, the second overload may be more appropriate.
williamr@2
   289
williamr@2
   290
The first overload is less efficient than the second, because it searches 
williamr@2
   291
through the file system for the selected character set every time it is invoked. 
williamr@2
   292
The second overload searches through an array of all available character sets. 
williamr@2
   293
In this method, the file system need only be searched once - when 
williamr@2
   294
CreateArrayOfCharacterSetsAvailableLC() or 
williamr@2
   295
CreateArrayOfCharacterSetsAvailableL() is used to create the array.
williamr@2
   296
williamr@2
   297
The conversion functions allow users of this class to perform partial 
williamr@2
   298
conversions on an input descriptor, handling the situation where the input 
williamr@2
   299
descriptor is truncated mid way through a multi-byte character. This means 
williamr@2
   300
that you do not have to guess how big to make the output descriptor for a 
williamr@2
   301
given input descriptor, you can simply do the conversion in a loop using a 
williamr@2
   302
small output descriptor. The ability to handle truncated descriptors also 
williamr@2
   303
allows users of the class to convert information received in chunks from an 
williamr@2
   304
external source.
williamr@2
   305
williamr@2
   306
The class also provides a number of utility functions. 
williamr@2
   307
@publishedAll
williamr@2
   308
@released
williamr@2
   309
*/
williamr@2
   310
class CCnvCharacterSetConverter : public CBase
williamr@2
   311
	{
williamr@2
   312
public:
williamr@2
   313
	/** Indicates whether a character set is available or unavailable 
williamr@2
   314
	for conversion. Used by the second overload of 
williamr@2
   315
	PrepareToConvertToOrFromL(). */
williamr@2
   316
	enum TAvailability
williamr@2
   317
		{
williamr@2
   318
		/** The requested character set can be converted. */
williamr@2
   319
		EAvailable,
williamr@2
   320
		/** The requested character set cannot be converted. */
williamr@2
   321
		ENotAvailable
williamr@2
   322
		};
williamr@2
   323
williamr@2
   324
	/** Conversion error flags. At this stage there is only one error 
williamr@2
   325
	flag- others may be added in the future. */
williamr@2
   326
	enum TError
williamr@2
   327
		{
williamr@2
   328
		/** The input descriptor contains a single corrupt character. This 
williamr@2
   329
		might occur when the input descriptor only contains some of the bytes 
williamr@2
   330
		of a single multi-byte character. */
williamr@2
   331
		EErrorIllFormedInput=KErrCorrupt
williamr@2
   332
		};
williamr@2
   333
williamr@2
   334
	/** Specifies the default endian-ness of the current character set. 
williamr@2
   335
	Used by SetDefaultEndiannessOfForeignCharacters(). */
williamr@2
   336
	enum TEndianness
williamr@2
   337
		{
williamr@2
   338
		/** The character set is big-endian. */
williamr@2
   339
		ELittleEndian,
williamr@2
   340
		/** The character set is little-endian. */
williamr@2
   341
		EBigEndian
williamr@2
   342
		};
williamr@2
   343
	
williamr@2
   344
	/** Downgrade for line and paragraph separators */
williamr@2
   345
	enum TDowngradeForExoticLineTerminatingCharacters
williamr@2
   346
		{
williamr@2
   347
		/** Paragraph/line separators should be downgraded (if necessary) 
williamr@2
   348
		into carriage return and line feed pairs. */
williamr@2
   349
		EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed,
williamr@2
   350
		/** Paragraph/line separators should be downgraded (if necessary) 
williamr@2
   351
		into a line feed only. */
williamr@2
   352
		EDowngradeExoticLineTerminatingCharactersToJustLineFeed
williamr@2
   353
		};
williamr@2
   354
williamr@2
   355
	/** Output flag used to indicate whether or not a character in the source
williamr@2
   356
	descriptor is the first half of a surrogate pair, but is the last
williamr@2
   357
	character in the descriptor to convert.
williamr@2
   358
	 
williamr@2
   359
	Note: This enumeration can be used in the DoConvertToUnicode() and
williamr@2
   360
	DoConvertFromUnicode() functions. These are part of the
williamr@2
   361
	Character Conversion Plug-in Provider API and are for use by plug-in
williamr@2
   362
	conversion libraries only.
williamr@2
   363
	@since 6.0 */
williamr@2
   364
	enum
williamr@2
   365
		{
williamr@2
   366
		/** Appends the converted text to the output descriptor.*/
williamr@2
   367
		EInputConversionFlagAppend	=0x00010000,
williamr@2
   368
		/** By default, when the input descriptor passed to DoConvertFromUnicode()
williamr@2
   369
		or DoConvertToUnicode() consists of nothing but a truncated sequence, 
williamr@2
   370
		the error-code EErrorIllFormedInput is returned. 
williamr@2
   371
		If this behaviour is undesirable, the input flag  
williamr@2
   372
		EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable
williamr@2
   373
		should be set. */
williamr@2
   374
		EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable	=0x00020000,
williamr@2
   375
		/** Stops converting when the first unconvertible character is reached. */
williamr@2
   376
		EInputConversionFlagStopAtFirstUnconvertibleCharacter			=0x00040000,
williamr@2
   377
		/** Appends the default character set Escape sequence at end of converted text */
williamr@2
   378
		EInputConversionFlagMustEndInDefaultCharacterSet				=0x00080000,
williamr@2
   379
		/*defect fix: INC053609; According to RFC1468 we can assume the line starts 
williamr@2
   380
		in ASCII so there is no need to always insert an escape sequence*/
williamr@2
   381
		EInputConversionFlagAssumeStartInDefaultCharacterSet			=0x00100000
williamr@2
   382
		};
williamr@2
   383
	enum
williamr@2
   384
		{
williamr@2
   385
		/** Indicates whether or not the source descriptor ends in a truncated
williamr@2
   386
		sequence, e.g. the first half only of a surrogate pair. */
williamr@2
   387
		EOutputConversionFlagInputIsTruncated							=0x01000000
williamr@2
   388
		};
williamr@2
   389
williamr@2
   390
		/** Initial value for the state argument in a set of related calls to
williamr@2
   391
		ConvertToUnicode(). */
williamr@2
   392
	enum {KStateDefault=0};
williamr@2
   393
	enum 
williamr@2
   394
		{
williamr@2
   395
		/** The lowest confidence value for a character set accepted by 
williamr@2
   396
		Autodetect*/
williamr@2
   397
		ELowestThreshold = 25
williamr@2
   398
		};
williamr@2
   399
		
williamr@2
   400
	/** Stores information about a non-Unicode character set. The information 
williamr@2
   401
	is used	to locate the conversion information required by 
williamr@2
   402
	ConvertFromUnicode() and ConvertToUnicode().
williamr@2
   403
williamr@2
   404
	An array of these structs that contain all available character sets 
williamr@2
   405
	can be generated by CreateArrayOfCharacterSetsAvailableLC() and 
williamr@2
   406
	CreateArrayOfCharacterSetsAvailableL(), and is used by one of the 
williamr@2
   407
	overloads of PrepareToConvertToOrFromL(). */
williamr@2
   408
	struct SCharacterSet
williamr@2
   409
		{
williamr@2
   410
		/** Gets the character sets UID.
williamr@2
   411
	
williamr@2
   412
		@return The UID of the character set. */
williamr@2
   413
		inline TUint Identifier() const {return iIdentifier;}
williamr@2
   414
williamr@2
   415
		/** Tests whether a filename given by the function SCharacterSet::Name() 
williamr@2
   416
		is a real file name (i.e. conversion is provided by a plug in DLL), or 
williamr@2
   417
		just the character set name (i.e. conversion is built into Symbian OS).
williamr@2
   418
		
williamr@2
   419
		Note: If the function returns ETrue then the path and filename can be 
williamr@2
   420
		parsed using TParse or TParsePtrC functions to obtain just the filename.
williamr@2
   421
		
williamr@2
   422
		@return ETrue if the name is a real filename. EFalse if it is just the 
williamr@2
   423
		character set name. */
williamr@2
   424
		inline TBool NameIsFileName() const {return iFlags&EFlagNameIsFileName;}
williamr@2
   425
williamr@2
   426
		/** Gets the full path and filename of the DLL which implements 
williamr@2
   427
		conversion for the character set. 
williamr@2
   428
		
williamr@2
   429
		If the character set is one for which conversion is built into Symbian 
williamr@2
   430
		OS rather than implemented by a plug in DLL, the function just returns 
williamr@2
   431
		the name of the character set. The NameIsFileName() function can be 
williamr@2
   432
		used to determine whether or not it is legal to create a TParsePtrC 
williamr@2
   433
		object over the descriptor 	returned by Name().
williamr@2
   434
		
williamr@2
   435
		Notes:
williamr@2
   436
		
williamr@2
   437
		The name returned cannot be treated as an Internet-standard name, it 
williamr@2
   438
		is locale-independent and should be mapped to the locale-dependent name 
williamr@2
   439
		by software at a higher level before being shown to the user. Conversion 
williamr@2
   440
		from Internet-standard names of character sets to the UID identifiers 
williamr@2
   441
		is provided by the member function 
williamr@2
   442
		ConvertStandardNameOfCharacterSetToIdentifierL().
williamr@2
   443
		
williamr@2
   444
		Typically, to find the user-displayable name (as opposed to the 
williamr@2
   445
		internet-standard name) of a character set, you would do something 
williamr@2
   446
		like this:
williamr@2
   447
		
williamr@2
   448
		@code
williamr@2
   449
		const CCnvCharacterSetConverter::SCharacterSet& characterSet=...;
williamr@2
   450
		const TPtrC userDisplayable(characterSet.NameIsFileName()? TParsePtrC(characterSet.Name()).Name(): 
williamr@2
   451
		characterSet.Name()); 
williamr@2
   452
		@endcode
williamr@2
   453
williamr@2
   454
		@return Full path and filename of the character set converter plug in 
williamr@2
   455
		DLL, or just the name of the character set. */
williamr@2
   456
		inline TPtrC Name() const {return *iName;}
williamr@2
   457
	private:
williamr@2
   458
		enum
williamr@2
   459
			{
williamr@2
   460
			EFlagNameIsFileName					=0x00000001,
williamr@2
   461
			EFlagFileIsConversionPlugInLibrary	=0x00000002
williamr@2
   462
			};
williamr@2
   463
	private:
williamr@2
   464
		inline TBool FileIsConversionPlugInLibrary() const {return iFlags&EFlagFileIsConversionPlugInLibrary;}
williamr@2
   465
	private:
williamr@2
   466
		TUint iIdentifier;
williamr@2
   467
		TUint iFlags;
williamr@2
   468
		HBufC* iName;
williamr@2
   469
	private:
williamr@2
   470
		friend class CCnvCharacterSetConverter;
williamr@2
   471
		friend class CDeepDestructingArrayOfCharactersSets;
williamr@2
   472
		}; //SCharacterSet
williamr@2
   473
	
williamr@2
   474
williamr@2
   475
	/** 
williamr@2
   476
	Holds an ascending array of the indices of the characters in the 
williamr@2
   477
	source Unicode text which could not be converted by 
williamr@2
   478
	CCnvCharacterSetConverter::ConvertFromUnicode() into the foreign 
williamr@2
   479
	character set 
williamr@2
   480
	@publishedAll
williamr@2
   481
	@released
williamr@2
   482
	*/
williamr@2
   483
	class TArrayOfAscendingIndices
williamr@2
   484
		{
williamr@2
   485
	public:
williamr@2
   486
		/** The return value of CCnvCharacterSetConverter::AppendIndex(). */
williamr@2
   487
		enum TAppendResult
williamr@2
   488
			{
williamr@2
   489
			/** The append failed. */
williamr@2
   490
			EAppendFailed,
williamr@2
   491
			/** The append succeeded. */
williamr@2
   492
			EAppendSuccessful
williamr@2
   493
			};
williamr@2
   494
	public:
williamr@2
   495
		/** C++ constructor. The array is initialised to be of length zero. */
williamr@2
   496
		inline TArrayOfAscendingIndices() :iArrayOfIndices(0) {}
williamr@2
   497
	
williamr@2
   498
		IMPORT_C TAppendResult AppendIndex(TInt aIndex);
williamr@2
   499
		
williamr@2
   500
		/** Deletes a single index from the array.
williamr@2
   501
		
williamr@2
   502
		@param aIndexOfIndex The index of the index to delete. Must not be 
williamr@2
   503
		negative and must not be greater than the length of the array, or a 
williamr@2
   504
		panic occurs. */
williamr@2
   505
		inline void Remove(TInt aIndexOfIndex) {iArrayOfIndices.Delete(aIndexOfIndex, 1);}
williamr@2
   506
		
williamr@2
   507
		/** Deletes all indices from the array. */
williamr@2
   508
		inline void RemoveAll() {iArrayOfIndices.SetLength(0);}
williamr@2
   509
williamr@2
   510
		/** Returns the number of indices in the array.
williamr@2
   511
	
williamr@2
   512
		@return The number of indices in the array. */
williamr@2
   513
		inline TInt NumberOfIndices() const {return iArrayOfIndices.Length();}
williamr@2
   514
williamr@2
   515
		/** Gets the value of the specified index.
williamr@2
   516
	
williamr@2
   517
		@param aIndexOfIndex Index into the array.
williamr@2
   518
		@return The value of the index. */
williamr@2
   519
		inline TInt operator[](TInt aIndexOfIndex) const {return iArrayOfIndices[aIndexOfIndex];}
williamr@2
   520
	private:
williamr@2
   521
		enum {KMaximumNumberOfIndices=25};
williamr@2
   522
	private:
williamr@2
   523
		TBuf16<KMaximumNumberOfIndices> iArrayOfIndices;
williamr@2
   524
		};
williamr@2
   525
public:
williamr@2
   526
	IMPORT_C static CCnvCharacterSetConverter* NewL();
williamr@2
   527
	IMPORT_C static CCnvCharacterSetConverter* NewLC();
williamr@2
   528
	IMPORT_C virtual ~CCnvCharacterSetConverter();
williamr@2
   529
	IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableL(RFs& aFileServerSession);
williamr@2
   530
	IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession);
williamr@2
   531
	IMPORT_C TUint ConvertStandardNameOfCharacterSetToIdentifierL(const TDesC8& aStandardNameOfCharacterSet, RFs& aFileServerSession);
williamr@2
   532
	IMPORT_C HBufC8* ConvertCharacterSetIdentifierToStandardNameL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
williamr@2
   533
	IMPORT_C TUint ConvertMibEnumOfCharacterSetToIdentifierL(TInt aMibEnumOfCharacterSet, RFs& aFileServerSession);
williamr@2
   534
	IMPORT_C TInt ConvertCharacterSetIdentifierToMibEnumL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
williamr@2
   535
	IMPORT_C void PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, RFs& aFileServerSession);
williamr@2
   536
	IMPORT_C TAvailability PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
williamr@2
   537
	// the following attribute-setting functions should be called (if at all) after calling PrepareToConvertToOrFromL and before calling ConvertFromUnicode and/or ConvertToUnicode
williamr@2
   538
	IMPORT_C void SetDefaultEndiannessOfForeignCharacters(TEndianness aEndianness);
williamr@2
   539
	IMPORT_C void SetDowngradeForExoticLineTerminatingCharacters(TDowngradeForExoticLineTerminatingCharacters aDowngradeForExoticLineTerminatingCharacters); // by default this attribute is set to EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed
williamr@2
   540
	IMPORT_C void SetReplacementForUnconvertibleUnicodeCharactersL(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters); // must be a single character preceded by its escape sequence (if any), and must be little-endian if the endianness of the character-set is unspecified, otherwise in the same endianness as the character-set
williamr@2
   541
	
williamr@2
   542
	// the conversion functions return either one of the TError values above, or the number of unconverted elements left at the end of the input descriptor
williamr@2
   543
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode) const;
williamr@2
   544
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters) const;
williamr@2
   545
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstUnconvertibleCharacter) const;
williamr@2
   546
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) const;
williamr@2
   547
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState) const;
williamr@2
   548
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters) const;
williamr@2
   549
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) const;
williamr@2
   550
	IMPORT_C static void AutoDetectCharacterSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
williamr@2
   551
	IMPORT_C void AutoDetectCharSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
williamr@2
   552
	IMPORT_C static void ConvertibleToCharacterSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
williamr@2
   553
    IMPORT_C void ConvertibleToCharSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
williamr@2
   554
	IMPORT_C void SetMaxCacheSize(TInt aSize);
williamr@2
   555
	// the following functions are only to be called by conversion plug-in libraries
williamr@2
   556
	IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
williamr@2
   557
	IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
williamr@2
   558
	IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
williamr@2
   559
	IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
williamr@2
   560
	IMPORT_C static const SCnvConversionData& AsciiConversionData();
williamr@2
   561
	inline TDowngradeForExoticLineTerminatingCharacters GetDowngradeForExoticLineTerminatingCharacters () 
williamr@2
   562
		{
williamr@2
   563
		return iDowngradeForExoticLineTerminatingCharacters ;
williamr@2
   564
		} ; 
williamr@2
   565
williamr@2
   566
private:
williamr@2
   567
	enum
williamr@2
   568
		{
williamr@2
   569
		EStoredFlagOwnsConversionData				=0x00000001,
williamr@2
   570
		EStoredFlagConversionPlugInLibraryIsLoaded	=0x00000002
williamr@2
   571
		};
williamr@2
   572
	enum TCharacterSetSearch
williamr@2
   573
		{
williamr@2
   574
		EStopCharacterSetSearch,
williamr@2
   575
		EContinueCharacterSetSearch
williamr@2
   576
		};
williamr@2
   577
	enum TConversionPlugInFunctionOrdinals
williamr@2
   578
		{
williamr@2
   579
		EReplacementForUnconvertibleUnicodeCharacters=1,
williamr@2
   580
		EConvertFromUnicode=2,
williamr@2
   581
		EConvertToUnicode=3,
williamr@2
   582
		EIsInThisCharacterSet=4
williamr@2
   583
		};
williamr@2
   584
		
williamr@2
   585
private:
williamr@2
   586
	CCnvCharacterSetConverter();
williamr@2
   587
	void ConstructL();
williamr@2
   588
	static CArrayFix<SCharacterSet>* DoCreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession, TUint aIdentifierOfOnlyCharacterSetOfInterest);
williamr@2
   589
	static TCharacterSetSearch AppendHardCodedCharacterSetIfRequiredL(CArrayFix<SCharacterSet>& aArrayOfCharacterSets, TUint aIdentifierOfOnlyCharacterSetOfInterest, TUint aIdentifierOfHardCodedCharacterSet, const TDesC& aNameOfHardCodedCharacterSet);
williamr@2
   590
	void ScanForStandardNamesAndMibEnumsL(RFs& aFileServerSession);
williamr@2
   591
	void ScanForStandardNamesAndMibEnumsROMOnlyL(RFs& aFileServerSession);
williamr@2
   592
	TAvailability DoPrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>* aArrayOfCharacterSetsAvailable, RFs& aFileServerSession);
williamr@2
   593
	static void DeleteConversionData(const SCnvConversionData* aConversionData);
williamr@2
   594
	static void DeleteConversionData(TAny* aConversionData);
williamr@2
   595
	static TEndianness EndiannessOfForeignCharacters(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters);
williamr@2
   596
williamr@2
   597
private:
williamr@2
   598
	TUint iStoredFlags;
williamr@2
   599
	TUint iCharacterSetIdentifierOfLoadedConversionData; // 0 or a UID of the loaded plugin
williamr@2
   600
	const SCnvConversionData* iConversionData;
williamr@2
   601
	TEndianness iDefaultEndiannessOfForeignCharacters;
williamr@2
   602
	TDowngradeForExoticLineTerminatingCharacters iDowngradeForExoticLineTerminatingCharacters;
williamr@2
   603
	TBuf8<KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters> iReplacementForUnconvertibleUnicodeCharacters;
williamr@2
   604
	CStandardNamesAndMibEnums* iStandardNamesAndMibEnums;
williamr@2
   605
	TBool iFullyConstructed;
williamr@2
   606
	CCharsetCnvCache* iCharsetCnvCache;
williamr@2
   607
	TBool iIsSystemStandardNamesAndMibEnumsScanned;
williamr@2
   608
	};
williamr@2
   609
williamr@2
   610
#endif