os/textandloc/charconvfw/charconv_fw/inc/charconv.h
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#if !defined(__CHARCONV_H__)
sl@0
    20
#define __CHARCONV_H__
sl@0
    21
sl@0
    22
#if !defined(__E32STD_H__)
sl@0
    23
#include <e32std.h>
sl@0
    24
#endif
sl@0
    25
sl@0
    26
#if !defined(__E32BASE_H__)
sl@0
    27
#include <e32base.h>
sl@0
    28
#endif
sl@0
    29
sl@0
    30
/** 
sl@0
    31
The maximum length in bytes of the replacement text for unconvertible Unicode 
sl@0
    32
characters (=50) (see CCnvCharacterSetConverter::SetReplacementForUnconvertibleUnicodeCharactersL()). 
sl@0
    33
@publishedAll
sl@0
    34
@released
sl@0
    35
*/
sl@0
    36
const TInt KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters=50;
sl@0
    37
sl@0
    38
/** 
sl@0
    39
UTF-7 
sl@0
    40
@publishedAll
sl@0
    41
@released
sl@0
    42
*/
sl@0
    43
const TUint KCharacterSetIdentifierUtf7=0x1000582c;
sl@0
    44
/** 
sl@0
    45
UTF-8 
sl@0
    46
@publishedAll
sl@0
    47
@released
sl@0
    48
*/
sl@0
    49
const TUint KCharacterSetIdentifierUtf8=0x1000582d;
sl@0
    50
/** 
sl@0
    51
IMAP UTF-7 
sl@0
    52
@publishedAll
sl@0
    53
@released
sl@0
    54
*/
sl@0
    55
const TUint KCharacterSetIdentifierImapUtf7=0x1000582e;
sl@0
    56
/** 
sl@0
    57
Java UTF-8 
sl@0
    58
@publishedAll
sl@0
    59
@released
sl@0
    60
*/
sl@0
    61
const TUint KCharacterSetIdentifierJavaConformantUtf8=0x1000582f;
sl@0
    62
/** 
sl@0
    63
Code Page 1252 
sl@0
    64
@publishedAll
sl@0
    65
@released
sl@0
    66
*/
sl@0
    67
const TUint KCharacterSetIdentifierCodePage1252=0x100012b6;
sl@0
    68
/** 
sl@0
    69
ISO 8859-1 
sl@0
    70
@publishedAll
sl@0
    71
@released
sl@0
    72
*/
sl@0
    73
const TUint KCharacterSetIdentifierIso88591=0x10003b10;
sl@0
    74
/** 
sl@0
    75
ISO 8859-2 
sl@0
    76
@publishedAll
sl@0
    77
@released
sl@0
    78
*/
sl@0
    79
const TUint KCharacterSetIdentifierIso88592=0x1000507e;
sl@0
    80
/** 
sl@0
    81
ISO 8859-3 
sl@0
    82
@publishedAll
sl@0
    83
@released
sl@0
    84
*/
sl@0
    85
const TUint KCharacterSetIdentifierIso88593=0x10008a28;
sl@0
    86
/** 
sl@0
    87
ISO 8859-4 
sl@0
    88
@publishedAll
sl@0
    89
@released
sl@0
    90
*/
sl@0
    91
const TUint KCharacterSetIdentifierIso88594=0x1000507f;
sl@0
    92
/** 
sl@0
    93
ISO 8859-5 
sl@0
    94
@publishedAll
sl@0
    95
@released
sl@0
    96
*/
sl@0
    97
const TUint KCharacterSetIdentifierIso88595=0x10005080;
sl@0
    98
/** 
sl@0
    99
ISO 8859-6 
sl@0
   100
@publishedAll
sl@0
   101
@released
sl@0
   102
*/
sl@0
   103
const TUint KCharacterSetIdentifierIso88596=0x10008a29;
sl@0
   104
/** 
sl@0
   105
ISO 8859-7 
sl@0
   106
@publishedAll
sl@0
   107
@released
sl@0
   108
*/
sl@0
   109
const TUint KCharacterSetIdentifierIso88597=0x10005081;
sl@0
   110
/** 
sl@0
   111
ISO 8859-8 
sl@0
   112
@publishedAll
sl@0
   113
@released
sl@0
   114
*/
sl@0
   115
const TUint KCharacterSetIdentifierIso88598=0x10008a2a;
sl@0
   116
/** 
sl@0
   117
ISO 8859-9 
sl@0
   118
@publishedAll
sl@0
   119
@released
sl@0
   120
*/
sl@0
   121
const TUint KCharacterSetIdentifierIso88599=0x10005082;
sl@0
   122
/** 
sl@0
   123
ISO 8859-10 
sl@0
   124
@publishedAll
sl@0
   125
@released
sl@0
   126
*/
sl@0
   127
const TUint KCharacterSetIdentifierIso885910=0x10008a2b;
sl@0
   128
/** 
sl@0
   129
ISO 8859-13 
sl@0
   130
@publishedAll
sl@0
   131
@released
sl@0
   132
*/
sl@0
   133
const TUint KCharacterSetIdentifierIso885913=0x10008a2c;
sl@0
   134
/** 
sl@0
   135
ISO 8859-14 
sl@0
   136
@publishedAll
sl@0
   137
@released
sl@0
   138
*/
sl@0
   139
const TUint KCharacterSetIdentifierIso885914=0x10008a2d;
sl@0
   140
/** 
sl@0
   141
ISO 8859-15 
sl@0
   142
@publishedAll
sl@0
   143
@released
sl@0
   144
*/
sl@0
   145
const TUint KCharacterSetIdentifierIso885915=0x10008a2e;
sl@0
   146
/** 
sl@0
   147
ASCII 
sl@0
   148
@publishedAll
sl@0
   149
@released
sl@0
   150
*/
sl@0
   151
const TUint KCharacterSetIdentifierAscii=0x10004cc6;
sl@0
   152
/** 
sl@0
   153
SMS 7-bit 
sl@0
   154
@publishedAll
sl@0
   155
@released
sl@0
   156
*/
sl@0
   157
const TUint KCharacterSetIdentifierSms7Bit=0x100053ab;
sl@0
   158
/** 
sl@0
   159
GB 2312 
sl@0
   160
@publishedAll
sl@0
   161
@released
sl@0
   162
*/
sl@0
   163
const TUint KCharacterSetIdentifierGb2312=0x10000fbe;
sl@0
   164
/** 
sl@0
   165
HZ-GB-2312 
sl@0
   166
@publishedAll
sl@0
   167
@released
sl@0
   168
*/
sl@0
   169
const TUint KCharacterSetIdentifierHz=0x10006065;
sl@0
   170
/** 
sl@0
   171
GB 12345 
sl@0
   172
@publishedAll
sl@0
   173
@released
sl@0
   174
*/
sl@0
   175
const TUint KCharacterSetIdentifierGb12345=0x1000401a;
sl@0
   176
/** 
sl@0
   177
GBK 
sl@0
   178
@publishedAll
sl@0
   179
@released
sl@0
   180
*/
sl@0
   181
const TUint KCharacterSetIdentifierGbk=0x10003ecb;
sl@0
   182
/** 
sl@0
   183
GB18030
sl@0
   184
@publishedAll
sl@0
   185
@released
sl@0
   186
*/
sl@0
   187
const TUint KCharacterSetIdentifierGb18030=0x10287038;
sl@0
   188
/** 
sl@0
   189
Big 5 
sl@0
   190
@publishedAll
sl@0
   191
@released
sl@0
   192
*/
sl@0
   193
const TUint KCharacterSetIdentifierBig5=0x10000fbf;
sl@0
   194
/** 
sl@0
   195
Shift-JIS 
sl@0
   196
@publishedAll
sl@0
   197
@released
sl@0
   198
*/
sl@0
   199
const TUint KCharacterSetIdentifierShiftJis=0x10000fbd;
sl@0
   200
/** 
sl@0
   201
ISO-2022-JP 
sl@0
   202
@publishedAll
sl@0
   203
@released
sl@0
   204
*/
sl@0
   205
const TUint KCharacterSetIdentifierIso2022Jp=0x100066a0;
sl@0
   206
/** 
sl@0
   207
ISO-2022-JP-1 
sl@0
   208
@publishedAll
sl@0
   209
@released
sl@0
   210
*/
sl@0
   211
const TUint KCharacterSetIdentifierIso2022Jp1=0x100066a3;
sl@0
   212
/** 
sl@0
   213
JIS Encoding 
sl@0
   214
@publishedAll
sl@0
   215
@released
sl@0
   216
*/
sl@0
   217
const TUint KCharacterSetIdentifierJis=0x10006066;
sl@0
   218
/** 
sl@0
   219
EUC-JP 
sl@0
   220
@publishedAll
sl@0
   221
@released
sl@0
   222
*/
sl@0
   223
const TUint KCharacterSetIdentifierEucJpPacked=0x10006067;
sl@0
   224
sl@0
   225
/** 
sl@0
   226
JP5 
sl@0
   227
@publishedAll
sl@0
   228
@released
sl@0
   229
*/
sl@0
   230
const TUint KCharacterSetIdentifierJ5=0x1020D408;
sl@0
   231
/** 
sl@0
   232
CP850 
sl@0
   233
@publishedAll
sl@0
   234
@released
sl@0
   235
*/
sl@0
   236
const TUint KCharacterSetIdentifierCP850=0x102825AD;
sl@0
   237
sl@0
   238
const TUint KCharacterSetIdentifierUnicodeLittle=0x101f3fae;  //Little Endian Unicode
sl@0
   239
const TUint KCharacterSetIdentifierUnicodeBig=0x101f4052; // Big Endian Unicode 
sl@0
   240
const TUint KCharacterSetIdentifierUcs2=0x101ff492; 
sl@0
   241
sl@0
   242
sl@0
   243
/** 
sl@0
   244
Extended SMS 7-bit 
sl@0
   245
@publishedAll
sl@0
   246
@released
sl@0
   247
*/
sl@0
   248
const TUint KCharacterSetIdentifierExtendedSms7Bit=0x102863FD;
sl@0
   249
sl@0
   250
/** 
sl@0
   251
Turkish 
sl@0
   252
@publishedAll
sl@0
   253
@released
sl@0
   254
*/
sl@0
   255
const TUint KCharacterSetIdentifierTurkishSingleSms7Bit=0x102863FE;
sl@0
   256
const TUint KCharacterSetIdentifierTurkishLockingSms7Bit=0x102863FF;
sl@0
   257
const TUint KCharacterSetIdentifierTurkishLockingAndSingleSms7Bit=0x10286400;
sl@0
   258
sl@0
   259
/** 
sl@0
   260
Portuguese 
sl@0
   261
@publishedAll
sl@0
   262
@released
sl@0
   263
*/
sl@0
   264
const TUint KCharacterSetIdentifierPortugueseSingleSms7Bit=0x10286407;
sl@0
   265
const TUint KCharacterSetIdentifierPortugueseLockingSms7Bit=0x10286408;
sl@0
   266
const TUint KCharacterSetIdentifierPortugueseLockingAndSingleSms7Bit=0x10286409;
sl@0
   267
sl@0
   268
/** 
sl@0
   269
Spanish
sl@0
   270
@publishedAll
sl@0
   271
@released
sl@0
   272
*/
sl@0
   273
const TUint KCharacterSetIdentifierSpanishSingleSms7Bit=0x1028640A;
sl@0
   274
sl@0
   275
sl@0
   276
/**
sl@0
   277
code page 949
sl@0
   278
@publishedAll
sl@0
   279
@released
sl@0
   280
*/
sl@0
   281
const TUint KCharacterSetIdentifierCP949=0x200100FF;
sl@0
   282
sl@0
   283
/**
sl@0
   284
Shift-JIS with Pictograph
sl@0
   285
@publishedAll
sl@0
   286
@released 
sl@0
   287
*/
sl@0
   288
const TUint KCharacterSetIdentifierShiftJisDirectmap=0x101F8691;
sl@0
   289
sl@0
   290
/**
sl@0
   291
EUC-JP with direct mapped pictograph
sl@0
   292
@publishedAll
sl@0
   293
@released 
sl@0
   294
*/
sl@0
   295
const TUint KCharacterSetIdentifierEucJpDirectmap=0x101F86A6;
sl@0
   296
sl@0
   297
/**
sl@0
   298
EUC-KR 
sl@0
   299
@publishedAll
sl@0
   300
@released
sl@0
   301
*/
sl@0
   302
const TUint KCharacterSetIdentifierEUCKR=0x2000E526;
sl@0
   303
sl@0
   304
/**
sl@0
   305
iscii 
sl@0
   306
@publishedAll
sl@0
   307
@released
sl@0
   308
*/
sl@0
   309
const TUint KCharacterSetIdentifierIscii=0x1027508E;
sl@0
   310
sl@0
   311
/**
sl@0
   312
ISO2022 Korean
sl@0
   313
@publishedAll
sl@0
   314
@released
sl@0
   315
*/
sl@0
   316
const TUint KCharacterSetIdentifierIso2022kr=0x20010101;
sl@0
   317
sl@0
   318
/**
sl@0
   319
KOI8-R Russian
sl@0
   320
@publishedAll
sl@0
   321
@released
sl@0
   322
*/
sl@0
   323
const TUint KCharacterSetIdentifierKOI8R=0x101F8778;
sl@0
   324
sl@0
   325
/**
sl@0
   326
KOI8-U Belorusian/Ukrainian Cyrillic
sl@0
   327
@publishedAll
sl@0
   328
@released 
sl@0
   329
*/
sl@0
   330
const TUint KCharacterSetIdentifierKOI8U=0x101F8761;
sl@0
   331
sl@0
   332
/**
sl@0
   333
KSC5601 Korean
sl@0
   334
@publishedAll
sl@0
   335
@released 
sl@0
   336
*/
sl@0
   337
const TUint KCharacterSetIdentifierKsc5601=0x200113CD;
sl@0
   338
sl@0
   339
/**
sl@0
   340
TIS_620 Thai
sl@0
   341
@publishedAll
sl@0
   342
@released 
sl@0
   343
*/
sl@0
   344
const TUint KCharacterSetIdentifierTIS_620=0x101F8549;
sl@0
   345
sl@0
   346
/**
sl@0
   347
Code page 874 Thai
sl@0
   348
@publishedAll
sl@0
   349
@released 
sl@0
   350
*/
sl@0
   351
const TUint KCharacterSetIdentifierWin874=0x101F854A;
sl@0
   352
sl@0
   353
/**
sl@0
   354
Code page 1250 Eastern European
sl@0
   355
@publishedAll
sl@0
   356
@released 
sl@0
   357
*/
sl@0
   358
const TUint KCharacterSetIdentifierWin1250=0x100059D6;
sl@0
   359
sl@0
   360
/**
sl@0
   361
Code page 1251 Cyrillic
sl@0
   362
@publishedAll
sl@0
   363
@released 
sl@0
   364
*/
sl@0
   365
const TUint KCharacterSetIdentifierWin1251=0x100059D7;
sl@0
   366
sl@0
   367
/**
sl@0
   368
Code page 1253 Greek
sl@0
   369
@publishedAll
sl@0
   370
@released 
sl@0
   371
*/
sl@0
   372
const TUint KCharacterSetIdentifierWin1253=0x100059D8;
sl@0
   373
sl@0
   374
/**
sl@0
   375
Code page 1254 Turkish
sl@0
   376
@publishedAll
sl@0
   377
@released 
sl@0
   378
*/
sl@0
   379
const TUint KCharacterSetIdentifierWin1254=0x100059D9;
sl@0
   380
sl@0
   381
/**
sl@0
   382
Code page 1255 Hebrew
sl@0
   383
@publishedAll
sl@0
   384
@released 
sl@0
   385
*/
sl@0
   386
const TUint KCharacterSetIdentifierWin1255=0x101F8547;
sl@0
   387
sl@0
   388
/**
sl@0
   389
Code page 1256 Arabic
sl@0
   390
@publishedAll
sl@0
   391
@released 
sl@0
   392
*/
sl@0
   393
const TUint KCharacterSetIdentifierWin1256=0x101F8548;
sl@0
   394
sl@0
   395
/**
sl@0
   396
Code page 1257 Baltic
sl@0
   397
@publishedAll
sl@0
   398
@released 
sl@0
   399
*/
sl@0
   400
const TUint KCharacterSetIdentifierWin1257=0x100059DA;
sl@0
   401
sl@0
   402
/**
sl@0
   403
Windows-1258
sl@0
   404
@publishedAll
sl@0
   405
@released
sl@0
   406
*/ 
sl@0
   407
const TUint KCharacterSetIdentifierWin1258=0x102073B8;
sl@0
   408
sl@0
   409
// note that other character sets than those listed above may be available at run-time, and also that none of the above are necessarily available at run-time
sl@0
   410
sl@0
   411
struct SCnvConversionData;
sl@0
   412
class CDeepDestructingArrayOfCharactersSets;
sl@0
   413
class CFileReader;
sl@0
   414
class CStandardNamesAndMibEnums;
sl@0
   415
class RFs;
sl@0
   416
class CCharsetCnvCache;
sl@0
   417
/** 
sl@0
   418
Converts text between Unicode and other character sets. 
sl@0
   419
sl@0
   420
The first stage of the conversion is to specify the non-Unicode character 
sl@0
   421
set being converted to or from. This is done by calling one of the overloads 
sl@0
   422
of PrepareToConvertToOrFromL().
sl@0
   423
sl@0
   424
The second stage is to convert the text, using one of the overloads of 
sl@0
   425
ConvertFromUnicode() or ConvertToUnicode().
sl@0
   426
sl@0
   427
Where possible the first documented overload of PrepareToConvertToOrFromL() 
sl@0
   428
should be used because the second overload panics if the specified character 
sl@0
   429
set is not available: the first overload simply returns whether the character 
sl@0
   430
set is available or not available. However if the conversions are to be 
sl@0
   431
performed often, or if the user must select the character set for the 
sl@0
   432
conversion from a list, the second overload may be more appropriate.
sl@0
   433
sl@0
   434
The first overload is less efficient than the second, because it searches 
sl@0
   435
through the file system for the selected character set every time it is invoked. 
sl@0
   436
The second overload searches through an array of all available character sets. 
sl@0
   437
In this method, the file system need only be searched once - when 
sl@0
   438
CreateArrayOfCharacterSetsAvailableLC() or 
sl@0
   439
CreateArrayOfCharacterSetsAvailableL() is used to create the array.
sl@0
   440
sl@0
   441
The conversion functions allow users of this class to perform partial 
sl@0
   442
conversions on an input descriptor, handling the situation where the input 
sl@0
   443
descriptor is truncated mid way through a multi-byte character. This means 
sl@0
   444
that you do not have to guess how big to make the output descriptor for a 
sl@0
   445
given input descriptor, you can simply do the conversion in a loop using a 
sl@0
   446
small output descriptor. The ability to handle truncated descriptors also 
sl@0
   447
allows users of the class to convert information received in chunks from an 
sl@0
   448
external source.
sl@0
   449
sl@0
   450
The class also provides a number of utility functions. 
sl@0
   451
@publishedAll
sl@0
   452
@released
sl@0
   453
*/
sl@0
   454
class CCnvCharacterSetConverter : public CBase
sl@0
   455
	{
sl@0
   456
public:
sl@0
   457
	/** Indicates whether a character set is available or unavailable
sl@0
   458
	for conversion. Used by the second overload of 
sl@0
   459
	PrepareToConvertToOrFromL(). */
sl@0
   460
	enum TAvailability
sl@0
   461
		{
sl@0
   462
		/** The requested character set can be converted. */
sl@0
   463
		EAvailable,
sl@0
   464
		/** The requested character set cannot be converted. */
sl@0
   465
		ENotAvailable
sl@0
   466
		};
sl@0
   467
sl@0
   468
	/** Conversion error flags. At this stage there is only one error 
sl@0
   469
	flag- others may be added in the future. */
sl@0
   470
	enum TError
sl@0
   471
		{
sl@0
   472
		/** The input descriptor contains a single corrupt character. This 
sl@0
   473
		might occur when the input descriptor only contains some of the bytes 
sl@0
   474
		of a single multi-byte character. */
sl@0
   475
		EErrorIllFormedInput=KErrCorrupt
sl@0
   476
		};
sl@0
   477
sl@0
   478
	/** Specifies the default endian-ness of the current character set. 
sl@0
   479
	Used by SetDefaultEndiannessOfForeignCharacters(). */
sl@0
   480
	enum TEndianness
sl@0
   481
		{
sl@0
   482
		/** The character set is big-endian. */
sl@0
   483
		ELittleEndian,
sl@0
   484
		/** The character set is little-endian. */
sl@0
   485
		EBigEndian
sl@0
   486
		};
sl@0
   487
	
sl@0
   488
	/** Downgrade for line and paragraph separators */
sl@0
   489
	enum TDowngradeForExoticLineTerminatingCharacters
sl@0
   490
		{
sl@0
   491
		/** Paragraph/line separators should be downgraded (if necessary) 
sl@0
   492
		into carriage return and line feed pairs. */
sl@0
   493
		EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed,
sl@0
   494
		/** Paragraph/line separators should be downgraded (if necessary) 
sl@0
   495
		into a line feed only. */
sl@0
   496
		EDowngradeExoticLineTerminatingCharactersToJustLineFeed
sl@0
   497
		};
sl@0
   498
sl@0
   499
	/** Output flag used to indicate whether or not a character in the source
sl@0
   500
	descriptor is the first half of a surrogate pair, but is the last
sl@0
   501
	character in the descriptor to convert.
sl@0
   502
	 
sl@0
   503
	Note: This enumeration can be used in the DoConvertToUnicode() and
sl@0
   504
	DoConvertFromUnicode() functions. These are part of the
sl@0
   505
	Character Conversion Plug-in Provider API and are for use by plug-in
sl@0
   506
	conversion libraries only.
sl@0
   507
	@since 6.0 */
sl@0
   508
	enum
sl@0
   509
		{
sl@0
   510
		/** Appends the converted text to the output descriptor.*/
sl@0
   511
		EInputConversionFlagAppend	=0x00010000,
sl@0
   512
		/** By default, when the input descriptor passed to DoConvertFromUnicode()
sl@0
   513
		or DoConvertToUnicode() consists of nothing but a truncated sequence, 
sl@0
   514
		the error-code EErrorIllFormedInput is returned. 
sl@0
   515
		If this behaviour is undesirable, the input flag  
sl@0
   516
		EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable
sl@0
   517
		should be set. */
sl@0
   518
		EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable	=0x00020000,
sl@0
   519
		/** Stops converting when the first unconvertible character is reached. */
sl@0
   520
		EInputConversionFlagStopAtFirstUnconvertibleCharacter			=0x00040000,
sl@0
   521
		/** Appends the default character set Escape sequence at end of converted text */
sl@0
   522
		EInputConversionFlagMustEndInDefaultCharacterSet				=0x00080000,
sl@0
   523
		/*defect fix: INC053609; According to RFC1468 we can assume the line starts 
sl@0
   524
		in ASCII so there is no need to always insert an escape sequence*/
sl@0
   525
		EInputConversionFlagAssumeStartInDefaultCharacterSet			=0x00100000
sl@0
   526
		};
sl@0
   527
	enum
sl@0
   528
		{
sl@0
   529
		/** Indicates whether or not the source descriptor ends in a truncated
sl@0
   530
		sequence, e.g. the first half only of a surrogate pair. */
sl@0
   531
		EOutputConversionFlagInputIsTruncated							=0x01000000
sl@0
   532
		};
sl@0
   533
sl@0
   534
		/** Initial value for the state argument in a set of related calls to
sl@0
   535
		ConvertToUnicode(). */
sl@0
   536
	enum {KStateDefault=0};
sl@0
   537
	enum 
sl@0
   538
		{
sl@0
   539
		/** The lowest confidence value for a character set accepted by 
sl@0
   540
		Autodetect*/
sl@0
   541
		ELowestThreshold = 25
sl@0
   542
		};
sl@0
   543
		
sl@0
   544
	/** Stores information about a non-Unicode character set. The information 
sl@0
   545
	is used	to locate the conversion information required by 
sl@0
   546
	ConvertFromUnicode() and ConvertToUnicode().
sl@0
   547
sl@0
   548
	An array of these structs that contain all available character sets 
sl@0
   549
	can be generated by CreateArrayOfCharacterSetsAvailableLC() and 
sl@0
   550
	CreateArrayOfCharacterSetsAvailableL(), and is used by one of the 
sl@0
   551
	overloads of PrepareToConvertToOrFromL(). */
sl@0
   552
	struct SCharacterSet
sl@0
   553
		{
sl@0
   554
		/** Gets the character sets UID.
sl@0
   555
	
sl@0
   556
		@return The UID of the character set. */
sl@0
   557
		inline TUint Identifier() const {return iIdentifier;}
sl@0
   558
sl@0
   559
		/** Tests whether a filename given by the function SCharacterSet::Name() 
sl@0
   560
		is a real file name (i.e. conversion is provided by a plug in DLL), or 
sl@0
   561
		just the character set name (i.e. conversion is built into Symbian OS).
sl@0
   562
		
sl@0
   563
		Note: If the function returns ETrue then the path and filename can be 
sl@0
   564
		parsed using TParse or TParsePtrC functions to obtain just the filename.
sl@0
   565
		
sl@0
   566
		@return ETrue if the name is a real filename. EFalse if it is just the 
sl@0
   567
		character set name. */
sl@0
   568
		inline TBool NameIsFileName() const {return iFlags&EFlagNameIsFileName;}
sl@0
   569
sl@0
   570
		/** Gets the full path and filename of the DLL which implements 
sl@0
   571
		conversion for the character set. 
sl@0
   572
		
sl@0
   573
		If the character set is one for which conversion is built into Symbian 
sl@0
   574
		OS rather than implemented by a plug in DLL, the function just returns 
sl@0
   575
		the name of the character set. The NameIsFileName() function can be 
sl@0
   576
		used to determine whether or not it is legal to create a TParsePtrC 
sl@0
   577
		object over the descriptor 	returned by Name().
sl@0
   578
		
sl@0
   579
		Notes:
sl@0
   580
		
sl@0
   581
		The name returned cannot be treated as an Internet-standard name, it 
sl@0
   582
		is locale-independent and should be mapped to the locale-dependent name 
sl@0
   583
		by software at a higher level before being shown to the user. Conversion 
sl@0
   584
		from Internet-standard names of character sets to the UID identifiers 
sl@0
   585
		is provided by the member function 
sl@0
   586
		ConvertStandardNameOfCharacterSetToIdentifierL().
sl@0
   587
		
sl@0
   588
		Typically, to find the user-displayable name (as opposed to the 
sl@0
   589
		internet-standard name) of a character set, you would do something 
sl@0
   590
		like this:
sl@0
   591
		
sl@0
   592
		@code
sl@0
   593
		const CCnvCharacterSetConverter::SCharacterSet& characterSet=...;
sl@0
   594
		const TPtrC userDisplayable(characterSet.NameIsFileName()? TParsePtrC(characterSet.Name()).Name(): 
sl@0
   595
		characterSet.Name()); 
sl@0
   596
		@endcode
sl@0
   597
sl@0
   598
		@return Full path and filename of the character set converter plug in 
sl@0
   599
		DLL, or just the name of the character set. */
sl@0
   600
		inline TPtrC Name() const {return *iName;}
sl@0
   601
	private:
sl@0
   602
		enum
sl@0
   603
			{
sl@0
   604
			EFlagNameIsFileName					=0x00000001,
sl@0
   605
			EFlagFileIsConversionPlugInLibrary	=0x00000002
sl@0
   606
			};
sl@0
   607
	private:
sl@0
   608
		inline TBool FileIsConversionPlugInLibrary() const {return iFlags&EFlagFileIsConversionPlugInLibrary;}
sl@0
   609
	private:
sl@0
   610
		TUint iIdentifier;
sl@0
   611
		TUint iFlags;
sl@0
   612
		HBufC* iName;
sl@0
   613
	private:
sl@0
   614
		friend class CCnvCharacterSetConverter;
sl@0
   615
		friend class CDeepDestructingArrayOfCharactersSets;
sl@0
   616
		}; //SCharacterSet
sl@0
   617
	
sl@0
   618
sl@0
   619
	/** 
sl@0
   620
	Holds an ascending array of the indices of the characters in the 
sl@0
   621
	source Unicode text which could not be converted by 
sl@0
   622
	CCnvCharacterSetConverter::ConvertFromUnicode() into the foreign 
sl@0
   623
	character set 
sl@0
   624
	@publishedAll
sl@0
   625
	@released
sl@0
   626
	*/
sl@0
   627
	class TArrayOfAscendingIndices
sl@0
   628
		{
sl@0
   629
	public:
sl@0
   630
		/** The return value of CCnvCharacterSetConverter::AppendIndex(). */
sl@0
   631
		enum TAppendResult
sl@0
   632
			{
sl@0
   633
			/** The append failed. */
sl@0
   634
			EAppendFailed,
sl@0
   635
			/** The append succeeded. */
sl@0
   636
			EAppendSuccessful
sl@0
   637
			};
sl@0
   638
	public:
sl@0
   639
		/** C++ constructor. The array is initialised to be of length zero. */
sl@0
   640
		inline TArrayOfAscendingIndices() :iArrayOfIndices(0) {}
sl@0
   641
	
sl@0
   642
		IMPORT_C TAppendResult AppendIndex(TInt aIndex);
sl@0
   643
		
sl@0
   644
		/** Deletes a single index from the array.
sl@0
   645
		
sl@0
   646
		@param aIndexOfIndex The index of the index to delete. Must not be 
sl@0
   647
		negative and must not be greater than the length of the array, or a 
sl@0
   648
		panic occurs. */
sl@0
   649
		inline void Remove(TInt aIndexOfIndex) {iArrayOfIndices.Delete(aIndexOfIndex, 1);}
sl@0
   650
		
sl@0
   651
		/** Deletes all indices from the array. */
sl@0
   652
		inline void RemoveAll() {iArrayOfIndices.SetLength(0);}
sl@0
   653
sl@0
   654
		/** Returns the number of indices in the array.
sl@0
   655
	
sl@0
   656
		@return The number of indices in the array. */
sl@0
   657
		inline TInt NumberOfIndices() const {return iArrayOfIndices.Length();}
sl@0
   658
sl@0
   659
		/** Gets the value of the specified index.
sl@0
   660
	
sl@0
   661
		@param aIndexOfIndex Index into the array.
sl@0
   662
		@return The value of the index. */
sl@0
   663
		inline TInt operator[](TInt aIndexOfIndex) const {return iArrayOfIndices[aIndexOfIndex];}
sl@0
   664
	private:
sl@0
   665
		enum {KMaximumNumberOfIndices=25};
sl@0
   666
	private:
sl@0
   667
		TBuf16<KMaximumNumberOfIndices> iArrayOfIndices;
sl@0
   668
		};
sl@0
   669
public:
sl@0
   670
	IMPORT_C static CCnvCharacterSetConverter* NewL();
sl@0
   671
	IMPORT_C static CCnvCharacterSetConverter* NewLC();
sl@0
   672
	IMPORT_C virtual ~CCnvCharacterSetConverter();
sl@0
   673
	IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableL(RFs& aFileServerSession);
sl@0
   674
	IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession);
sl@0
   675
	IMPORT_C TUint ConvertStandardNameOfCharacterSetToIdentifierL(const TDesC8& aStandardNameOfCharacterSet, RFs& aFileServerSession);
sl@0
   676
	IMPORT_C HBufC8* ConvertCharacterSetIdentifierToStandardNameL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
sl@0
   677
	IMPORT_C TUint ConvertMibEnumOfCharacterSetToIdentifierL(TInt aMibEnumOfCharacterSet, RFs& aFileServerSession);
sl@0
   678
	IMPORT_C TInt ConvertCharacterSetIdentifierToMibEnumL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
sl@0
   679
	IMPORT_C void PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, RFs& aFileServerSession);
sl@0
   680
	IMPORT_C TAvailability PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
sl@0
   681
	// the following attribute-setting functions should be called (if at all) after calling PrepareToConvertToOrFromL and before calling ConvertFromUnicode and/or ConvertToUnicode
sl@0
   682
	IMPORT_C void SetDefaultEndiannessOfForeignCharacters(TEndianness aEndianness);
sl@0
   683
	IMPORT_C void SetDowngradeForExoticLineTerminatingCharacters(TDowngradeForExoticLineTerminatingCharacters aDowngradeForExoticLineTerminatingCharacters); // by default this attribute is set to EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed
sl@0
   684
	IMPORT_C void SetReplacementForUnconvertibleUnicodeCharactersL(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters); // must be a single character preceded by its escape sequence (if any), and must be little-endian if the endianness of the character-set is unspecified, otherwise in the same endianness as the character-set
sl@0
   685
	
sl@0
   686
	// the conversion functions return either one of the TError values above, or the number of unconverted elements left at the end of the input descriptor
sl@0
   687
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode) const;
sl@0
   688
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters) const;
sl@0
   689
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstUnconvertibleCharacter) const;
sl@0
   690
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) const;
sl@0
   691
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState) const;
sl@0
   692
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters) const;
sl@0
   693
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) const;
sl@0
   694
	IMPORT_C static void AutoDetectCharacterSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
sl@0
   695
	IMPORT_C void AutoDetectCharSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
sl@0
   696
	IMPORT_C static void ConvertibleToCharacterSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
sl@0
   697
    IMPORT_C void ConvertibleToCharSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
sl@0
   698
	IMPORT_C void SetMaxCacheSize(TInt aSize);
sl@0
   699
	// the following functions are only to be called by conversion plug-in libraries
sl@0
   700
	IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
sl@0
   701
	IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
sl@0
   702
	IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
   703
	IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
sl@0
   704
	IMPORT_C static const SCnvConversionData& AsciiConversionData();
sl@0
   705
	inline TDowngradeForExoticLineTerminatingCharacters GetDowngradeForExoticLineTerminatingCharacters () 
sl@0
   706
		{
sl@0
   707
		return iDowngradeForExoticLineTerminatingCharacters ;
sl@0
   708
		} ; 
sl@0
   709
sl@0
   710
private:
sl@0
   711
	enum
sl@0
   712
		{
sl@0
   713
		EStoredFlagOwnsConversionData				=0x00000001,
sl@0
   714
		EStoredFlagConversionPlugInLibraryIsLoaded	=0x00000002
sl@0
   715
		};
sl@0
   716
	enum TCharacterSetSearch
sl@0
   717
		{
sl@0
   718
		EStopCharacterSetSearch,
sl@0
   719
		EContinueCharacterSetSearch
sl@0
   720
		};
sl@0
   721
	enum TConversionPlugInFunctionOrdinals
sl@0
   722
		{
sl@0
   723
		EReplacementForUnconvertibleUnicodeCharacters=1,
sl@0
   724
		EConvertFromUnicode=2,
sl@0
   725
		EConvertToUnicode=3,
sl@0
   726
		EIsInThisCharacterSet=4
sl@0
   727
		};
sl@0
   728
		
sl@0
   729
private:
sl@0
   730
	CCnvCharacterSetConverter();
sl@0
   731
	void ConstructL();
sl@0
   732
	static CArrayFix<SCharacterSet>* DoCreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession, TUint aIdentifierOfOnlyCharacterSetOfInterest);
sl@0
   733
	static TCharacterSetSearch AppendHardCodedCharacterSetIfRequiredL(CArrayFix<SCharacterSet>& aArrayOfCharacterSets, TUint aIdentifierOfOnlyCharacterSetOfInterest, TUint aIdentifierOfHardCodedCharacterSet, const TDesC& aNameOfHardCodedCharacterSet);
sl@0
   734
	void ScanForStandardNamesAndMibEnumsL(RFs& aFileServerSession);
sl@0
   735
	void ScanForStandardNamesAndMibEnumsROMOnlyL(RFs& aFileServerSession);
sl@0
   736
	TAvailability DoPrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>* aArrayOfCharacterSetsAvailable, RFs& aFileServerSession);
sl@0
   737
	static void DeleteConversionData(const SCnvConversionData* aConversionData);
sl@0
   738
	static void DeleteConversionData(TAny* aConversionData);
sl@0
   739
	static TEndianness EndiannessOfForeignCharacters(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters);
sl@0
   740
sl@0
   741
private:
sl@0
   742
	TUint iStoredFlags;
sl@0
   743
	TUint iCharacterSetIdentifierOfLoadedConversionData; // 0 or a UID of the loaded plugin
sl@0
   744
	const SCnvConversionData* iConversionData;
sl@0
   745
	TEndianness iDefaultEndiannessOfForeignCharacters;
sl@0
   746
	TDowngradeForExoticLineTerminatingCharacters iDowngradeForExoticLineTerminatingCharacters;
sl@0
   747
	TBuf8<KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters> iReplacementForUnconvertibleUnicodeCharacters;
sl@0
   748
	CStandardNamesAndMibEnums* iStandardNamesAndMibEnums;
sl@0
   749
	TBool iTlsDataConstructed;
sl@0
   750
	CCharsetCnvCache* iCharsetCnvCache;
sl@0
   751
	TBool iIsSystemStandardNamesAndMibEnumsScanned;
sl@0
   752
	};
sl@0
   753
sl@0
   754
#endif
sl@0
   755