epoc32/include/charconv.h
author William Roberts <williamr@symbian.org>
Wed, 31 Mar 2010 12:33:34 +0100
branchSymbian3
changeset 4 837f303aceeb
parent 2 2fe1408b6811
permissions -rw-r--r--
Current Symbian^3 public API header files (from PDK 3.0.h)
This is the epoc32/include tree with the "platform" subtrees removed, and
all but a selected few mbg and rsg files removed.
williamr@4
     1
/*
williamr@4
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
williamr@4
     3
* All rights reserved.
williamr@4
     4
* This component and the accompanying materials are made available
williamr@4
     5
* under the terms of "Eclipse Public License v1.0"
williamr@4
     6
* which accompanies this distribution, and is available
williamr@4
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
williamr@4
     8
*
williamr@4
     9
* Initial Contributors:
williamr@4
    10
* Nokia Corporation - initial contribution.
williamr@4
    11
*
williamr@4
    12
* Contributors:
williamr@4
    13
*
williamr@4
    14
* Description: 
williamr@4
    15
*
williamr@4
    16
*/
williamr@4
    17
williamr@2
    18
williamr@2
    19
#if !defined(__CHARCONV_H__)
williamr@2
    20
#define __CHARCONV_H__
williamr@2
    21
williamr@2
    22
#if !defined(__E32STD_H__)
williamr@2
    23
#include <e32std.h>
williamr@2
    24
#endif
williamr@2
    25
williamr@2
    26
#if !defined(__E32BASE_H__)
williamr@2
    27
#include <e32base.h>
williamr@2
    28
#endif
williamr@2
    29
williamr@2
    30
/** 
williamr@2
    31
The maximum length in bytes of the replacement text for unconvertible Unicode 
williamr@2
    32
characters (=50) (see CCnvCharacterSetConverter::SetReplacementForUnconvertibleUnicodeCharactersL()). 
williamr@2
    33
@publishedAll
williamr@2
    34
@released
williamr@2
    35
*/
williamr@2
    36
const TInt KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters=50;
williamr@2
    37
williamr@2
    38
/** 
williamr@2
    39
UTF-7 
williamr@2
    40
@publishedAll
williamr@2
    41
@released
williamr@2
    42
*/
williamr@2
    43
const TUint KCharacterSetIdentifierUtf7=0x1000582c;
williamr@2
    44
/** 
williamr@2
    45
UTF-8 
williamr@2
    46
@publishedAll
williamr@2
    47
@released
williamr@2
    48
*/
williamr@2
    49
const TUint KCharacterSetIdentifierUtf8=0x1000582d;
williamr@2
    50
/** 
williamr@2
    51
IMAP UTF-7 
williamr@2
    52
@publishedAll
williamr@2
    53
@released
williamr@2
    54
*/
williamr@2
    55
const TUint KCharacterSetIdentifierImapUtf7=0x1000582e;
williamr@2
    56
/** 
williamr@2
    57
Java UTF-8 
williamr@2
    58
@publishedAll
williamr@2
    59
@released
williamr@2
    60
*/
williamr@2
    61
const TUint KCharacterSetIdentifierJavaConformantUtf8=0x1000582f;
williamr@2
    62
/** 
williamr@2
    63
Code Page 1252 
williamr@2
    64
@publishedAll
williamr@2
    65
@released
williamr@2
    66
*/
williamr@2
    67
const TUint KCharacterSetIdentifierCodePage1252=0x100012b6;
williamr@2
    68
/** 
williamr@2
    69
ISO 8859-1 
williamr@2
    70
@publishedAll
williamr@2
    71
@released
williamr@2
    72
*/
williamr@2
    73
const TUint KCharacterSetIdentifierIso88591=0x10003b10;
williamr@2
    74
/** 
williamr@2
    75
ISO 8859-2 
williamr@2
    76
@publishedAll
williamr@2
    77
@released
williamr@2
    78
*/
williamr@2
    79
const TUint KCharacterSetIdentifierIso88592=0x1000507e;
williamr@2
    80
/** 
williamr@2
    81
ISO 8859-3 
williamr@2
    82
@publishedAll
williamr@2
    83
@released
williamr@2
    84
*/
williamr@2
    85
const TUint KCharacterSetIdentifierIso88593=0x10008a28;
williamr@2
    86
/** 
williamr@2
    87
ISO 8859-4 
williamr@2
    88
@publishedAll
williamr@2
    89
@released
williamr@2
    90
*/
williamr@2
    91
const TUint KCharacterSetIdentifierIso88594=0x1000507f;
williamr@2
    92
/** 
williamr@2
    93
ISO 8859-5 
williamr@2
    94
@publishedAll
williamr@2
    95
@released
williamr@2
    96
*/
williamr@2
    97
const TUint KCharacterSetIdentifierIso88595=0x10005080;
williamr@2
    98
/** 
williamr@2
    99
ISO 8859-6 
williamr@2
   100
@publishedAll
williamr@2
   101
@released
williamr@2
   102
*/
williamr@2
   103
const TUint KCharacterSetIdentifierIso88596=0x10008a29;
williamr@2
   104
/** 
williamr@2
   105
ISO 8859-7 
williamr@2
   106
@publishedAll
williamr@2
   107
@released
williamr@2
   108
*/
williamr@2
   109
const TUint KCharacterSetIdentifierIso88597=0x10005081;
williamr@2
   110
/** 
williamr@2
   111
ISO 8859-8 
williamr@2
   112
@publishedAll
williamr@2
   113
@released
williamr@2
   114
*/
williamr@2
   115
const TUint KCharacterSetIdentifierIso88598=0x10008a2a;
williamr@2
   116
/** 
williamr@2
   117
ISO 8859-9 
williamr@2
   118
@publishedAll
williamr@2
   119
@released
williamr@2
   120
*/
williamr@2
   121
const TUint KCharacterSetIdentifierIso88599=0x10005082;
williamr@2
   122
/** 
williamr@2
   123
ISO 8859-10 
williamr@2
   124
@publishedAll
williamr@2
   125
@released
williamr@2
   126
*/
williamr@2
   127
const TUint KCharacterSetIdentifierIso885910=0x10008a2b;
williamr@2
   128
/** 
williamr@2
   129
ISO 8859-13 
williamr@2
   130
@publishedAll
williamr@2
   131
@released
williamr@2
   132
*/
williamr@2
   133
const TUint KCharacterSetIdentifierIso885913=0x10008a2c;
williamr@2
   134
/** 
williamr@2
   135
ISO 8859-14 
williamr@2
   136
@publishedAll
williamr@2
   137
@released
williamr@2
   138
*/
williamr@2
   139
const TUint KCharacterSetIdentifierIso885914=0x10008a2d;
williamr@2
   140
/** 
williamr@2
   141
ISO 8859-15 
williamr@2
   142
@publishedAll
williamr@2
   143
@released
williamr@2
   144
*/
williamr@2
   145
const TUint KCharacterSetIdentifierIso885915=0x10008a2e;
williamr@2
   146
/** 
williamr@2
   147
ASCII 
williamr@2
   148
@publishedAll
williamr@2
   149
@released
williamr@2
   150
*/
williamr@2
   151
const TUint KCharacterSetIdentifierAscii=0x10004cc6;
williamr@2
   152
/** 
williamr@2
   153
SMS 7-bit 
williamr@2
   154
@publishedAll
williamr@2
   155
@released
williamr@2
   156
*/
williamr@2
   157
const TUint KCharacterSetIdentifierSms7Bit=0x100053ab;
williamr@2
   158
/** 
williamr@2
   159
GB 2312 
williamr@2
   160
@publishedAll
williamr@2
   161
@released
williamr@2
   162
*/
williamr@2
   163
const TUint KCharacterSetIdentifierGb2312=0x10000fbe;
williamr@2
   164
/** 
williamr@2
   165
HZ-GB-2312 
williamr@2
   166
@publishedAll
williamr@2
   167
@released
williamr@2
   168
*/
williamr@2
   169
const TUint KCharacterSetIdentifierHz=0x10006065;
williamr@2
   170
/** 
williamr@2
   171
GB 12345 
williamr@2
   172
@publishedAll
williamr@2
   173
@released
williamr@2
   174
*/
williamr@2
   175
const TUint KCharacterSetIdentifierGb12345=0x1000401a;
williamr@2
   176
/** 
williamr@2
   177
GBK 
williamr@2
   178
@publishedAll
williamr@2
   179
@released
williamr@2
   180
*/
williamr@2
   181
const TUint KCharacterSetIdentifierGbk=0x10003ecb;
williamr@2
   182
/** 
williamr@4
   183
GB18030
williamr@4
   184
@publishedAll
williamr@4
   185
@released
williamr@4
   186
*/
williamr@4
   187
const TUint KCharacterSetIdentifierGb18030=0x10287038;
williamr@4
   188
/** 
williamr@2
   189
Big 5 
williamr@2
   190
@publishedAll
williamr@2
   191
@released
williamr@2
   192
*/
williamr@2
   193
const TUint KCharacterSetIdentifierBig5=0x10000fbf;
williamr@2
   194
/** 
williamr@2
   195
Shift-JIS 
williamr@2
   196
@publishedAll
williamr@2
   197
@released
williamr@2
   198
*/
williamr@2
   199
const TUint KCharacterSetIdentifierShiftJis=0x10000fbd;
williamr@2
   200
/** 
williamr@2
   201
ISO-2022-JP 
williamr@2
   202
@publishedAll
williamr@2
   203
@released
williamr@2
   204
*/
williamr@2
   205
const TUint KCharacterSetIdentifierIso2022Jp=0x100066a0;
williamr@2
   206
/** 
williamr@2
   207
ISO-2022-JP-1 
williamr@2
   208
@publishedAll
williamr@2
   209
@released
williamr@2
   210
*/
williamr@2
   211
const TUint KCharacterSetIdentifierIso2022Jp1=0x100066a3;
williamr@2
   212
/** 
williamr@2
   213
JIS Encoding 
williamr@2
   214
@publishedAll
williamr@2
   215
@released
williamr@2
   216
*/
williamr@2
   217
const TUint KCharacterSetIdentifierJis=0x10006066;
williamr@2
   218
/** 
williamr@2
   219
EUC-JP 
williamr@2
   220
@publishedAll
williamr@2
   221
@released
williamr@2
   222
*/
williamr@2
   223
const TUint KCharacterSetIdentifierEucJpPacked=0x10006067;
williamr@2
   224
williamr@2
   225
/** 
williamr@2
   226
JP5 
williamr@2
   227
@publishedAll
williamr@2
   228
@released
williamr@2
   229
*/
williamr@2
   230
const TUint KCharacterSetIdentifierJ5=0x1020D408;
williamr@2
   231
/** 
williamr@2
   232
CP850 
williamr@2
   233
@publishedAll
williamr@2
   234
@released
williamr@2
   235
*/
williamr@2
   236
const TUint KCharacterSetIdentifierCP850=0x102825AD;
williamr@2
   237
williamr@2
   238
const TUint KCharacterSetIdentifierUnicodeLittle=0x101f3fae;  //Little Endian Unicode
williamr@2
   239
const TUint KCharacterSetIdentifierUnicodeBig=0x101f4052; // Big Endian Unicode 
williamr@4
   240
const TUint KCharacterSetIdentifierUcs2=0x101ff492; 
williamr@4
   241
williamr@2
   242
williamr@2
   243
/** 
williamr@4
   244
Extended SMS 7-bit 
williamr@2
   245
@publishedAll
williamr@2
   246
@released
williamr@2
   247
*/
williamr@2
   248
const TUint KCharacterSetIdentifierExtendedSms7Bit=0x102863FD;
williamr@2
   249
williamr@2
   250
/** 
williamr@2
   251
Turkish 
williamr@2
   252
@publishedAll
williamr@2
   253
@released
williamr@2
   254
*/
williamr@2
   255
const TUint KCharacterSetIdentifierTurkishSingleSms7Bit=0x102863FE;
williamr@2
   256
const TUint KCharacterSetIdentifierTurkishLockingSms7Bit=0x102863FF;
williamr@2
   257
const TUint KCharacterSetIdentifierTurkishLockingAndSingleSms7Bit=0x10286400;
williamr@2
   258
williamr@2
   259
/** 
williamr@2
   260
Portuguese 
williamr@2
   261
@publishedAll
williamr@2
   262
@released
williamr@2
   263
*/
williamr@2
   264
const TUint KCharacterSetIdentifierPortugueseSingleSms7Bit=0x10286407;
williamr@2
   265
const TUint KCharacterSetIdentifierPortugueseLockingSms7Bit=0x10286408;
williamr@2
   266
const TUint KCharacterSetIdentifierPortugueseLockingAndSingleSms7Bit=0x10286409;
williamr@2
   267
williamr@2
   268
/** 
williamr@2
   269
Spanish
williamr@2
   270
@publishedAll
williamr@2
   271
@released
williamr@2
   272
*/
williamr@2
   273
const TUint KCharacterSetIdentifierSpanishSingleSms7Bit=0x1028640A;
williamr@4
   274
williamr@4
   275
williamr@4
   276
/**
williamr@4
   277
code page 949
williamr@4
   278
@publishedAll
williamr@4
   279
@released
williamr@4
   280
*/
williamr@4
   281
const TUint KCharacterSetIdentifierCP949=0x200100FF;
williamr@4
   282
williamr@4
   283
/**
williamr@4
   284
Shift-JIS with Pictograph
williamr@4
   285
@publishedAll
williamr@4
   286
@released 
williamr@4
   287
*/
williamr@4
   288
const TUint KCharacterSetIdentifierShiftJisDirectmap=0x101F8691;
williamr@4
   289
williamr@4
   290
/**
williamr@4
   291
EUC-JP with direct mapped pictograph
williamr@4
   292
@publishedAll
williamr@4
   293
@released 
williamr@4
   294
*/
williamr@4
   295
const TUint KCharacterSetIdentifierEucJpDirectmap=0x101F86A6;
williamr@4
   296
williamr@4
   297
/**
williamr@4
   298
EUC-KR 
williamr@4
   299
@publishedAll
williamr@4
   300
@released
williamr@4
   301
*/
williamr@4
   302
const TUint KCharacterSetIdentifierEUCKR=0x2000E526;
williamr@4
   303
williamr@4
   304
/**
williamr@4
   305
iscii 
williamr@4
   306
@publishedAll
williamr@4
   307
@released
williamr@4
   308
*/
williamr@4
   309
const TUint KCharacterSetIdentifierIscii=0x1027508E;
williamr@4
   310
williamr@4
   311
/**
williamr@4
   312
ISO2022 Korean
williamr@4
   313
@publishedAll
williamr@4
   314
@released
williamr@4
   315
*/
williamr@4
   316
const TUint KCharacterSetIdentifierIso2022kr=0x20010101;
williamr@4
   317
williamr@4
   318
/**
williamr@4
   319
KOI8-R Russian
williamr@4
   320
@publishedAll
williamr@4
   321
@released
williamr@4
   322
*/
williamr@4
   323
const TUint KCharacterSetIdentifierKOI8R=0x101F8778;
williamr@4
   324
williamr@4
   325
/**
williamr@4
   326
KOI8-U Belorusian/Ukrainian Cyrillic
williamr@4
   327
@publishedAll
williamr@4
   328
@released 
williamr@4
   329
*/
williamr@4
   330
const TUint KCharacterSetIdentifierKOI8U=0x101F8761;
williamr@4
   331
williamr@4
   332
/**
williamr@4
   333
KSC5601 Korean
williamr@4
   334
@publishedAll
williamr@4
   335
@released 
williamr@4
   336
*/
williamr@4
   337
const TUint KCharacterSetIdentifierKsc5601=0x200113CD;
williamr@4
   338
williamr@4
   339
/**
williamr@4
   340
TIS_620 Thai
williamr@4
   341
@publishedAll
williamr@4
   342
@released 
williamr@4
   343
*/
williamr@4
   344
const TUint KCharacterSetIdentifierTIS_620=0x101F8549;
williamr@4
   345
williamr@4
   346
/**
williamr@4
   347
Code page 874 Thai
williamr@4
   348
@publishedAll
williamr@4
   349
@released 
williamr@4
   350
*/
williamr@4
   351
const TUint KCharacterSetIdentifierWin874=0x101F854A;
williamr@4
   352
williamr@4
   353
/**
williamr@4
   354
Code page 1250 Eastern European
williamr@4
   355
@publishedAll
williamr@4
   356
@released 
williamr@4
   357
*/
williamr@4
   358
const TUint KCharacterSetIdentifierWin1250=0x100059D6;
williamr@4
   359
williamr@4
   360
/**
williamr@4
   361
Code page 1251 Cyrillic
williamr@4
   362
@publishedAll
williamr@4
   363
@released 
williamr@4
   364
*/
williamr@4
   365
const TUint KCharacterSetIdentifierWin1251=0x100059D7;
williamr@4
   366
williamr@4
   367
/**
williamr@4
   368
Code page 1253 Greek
williamr@4
   369
@publishedAll
williamr@4
   370
@released 
williamr@4
   371
*/
williamr@4
   372
const TUint KCharacterSetIdentifierWin1253=0x100059D8;
williamr@4
   373
williamr@4
   374
/**
williamr@4
   375
Code page 1254 Turkish
williamr@4
   376
@publishedAll
williamr@4
   377
@released 
williamr@4
   378
*/
williamr@4
   379
const TUint KCharacterSetIdentifierWin1254=0x100059D9;
williamr@4
   380
williamr@4
   381
/**
williamr@4
   382
Code page 1255 Hebrew
williamr@4
   383
@publishedAll
williamr@4
   384
@released 
williamr@4
   385
*/
williamr@4
   386
const TUint KCharacterSetIdentifierWin1255=0x101F8547;
williamr@4
   387
williamr@4
   388
/**
williamr@4
   389
Code page 1256 Arabic
williamr@4
   390
@publishedAll
williamr@4
   391
@released 
williamr@4
   392
*/
williamr@4
   393
const TUint KCharacterSetIdentifierWin1256=0x101F8548;
williamr@4
   394
williamr@4
   395
/**
williamr@4
   396
Code page 1257 Baltic
williamr@4
   397
@publishedAll
williamr@4
   398
@released 
williamr@4
   399
*/
williamr@4
   400
const TUint KCharacterSetIdentifierWin1257=0x100059DA;
williamr@4
   401
williamr@4
   402
/**
williamr@4
   403
Windows-1258
williamr@4
   404
@publishedAll
williamr@4
   405
@released
williamr@4
   406
*/ 
williamr@4
   407
const TUint KCharacterSetIdentifierWin1258=0x102073B8;
williamr@4
   408
williamr@2
   409
// note that other character sets than those listed above may be available at run-time, and also that none of the above are necessarily available at run-time
williamr@2
   410
williamr@2
   411
struct SCnvConversionData;
williamr@2
   412
class CDeepDestructingArrayOfCharactersSets;
williamr@2
   413
class CFileReader;
williamr@2
   414
class CStandardNamesAndMibEnums;
williamr@2
   415
class RFs;
williamr@2
   416
class CCharsetCnvCache;
williamr@2
   417
/** 
williamr@2
   418
Converts text between Unicode and other character sets. 
williamr@2
   419
williamr@2
   420
The first stage of the conversion is to specify the non-Unicode character 
williamr@2
   421
set being converted to or from. This is done by calling one of the overloads 
williamr@2
   422
of PrepareToConvertToOrFromL().
williamr@2
   423
williamr@2
   424
The second stage is to convert the text, using one of the overloads of 
williamr@2
   425
ConvertFromUnicode() or ConvertToUnicode().
williamr@2
   426
williamr@2
   427
Where possible the first documented overload of PrepareToConvertToOrFromL() 
williamr@2
   428
should be used because the second overload panics if the specified character 
williamr@2
   429
set is not available: the first overload simply returns whether the character 
williamr@2
   430
set is available or not available. However if the conversions are to be 
williamr@2
   431
performed often, or if the user must select the character set for the 
williamr@2
   432
conversion from a list, the second overload may be more appropriate.
williamr@2
   433
williamr@2
   434
The first overload is less efficient than the second, because it searches 
williamr@2
   435
through the file system for the selected character set every time it is invoked. 
williamr@2
   436
The second overload searches through an array of all available character sets. 
williamr@2
   437
In this method, the file system need only be searched once - when 
williamr@2
   438
CreateArrayOfCharacterSetsAvailableLC() or 
williamr@2
   439
CreateArrayOfCharacterSetsAvailableL() is used to create the array.
williamr@2
   440
williamr@2
   441
The conversion functions allow users of this class to perform partial 
williamr@2
   442
conversions on an input descriptor, handling the situation where the input 
williamr@2
   443
descriptor is truncated mid way through a multi-byte character. This means 
williamr@2
   444
that you do not have to guess how big to make the output descriptor for a 
williamr@2
   445
given input descriptor, you can simply do the conversion in a loop using a 
williamr@2
   446
small output descriptor. The ability to handle truncated descriptors also 
williamr@2
   447
allows users of the class to convert information received in chunks from an 
williamr@2
   448
external source.
williamr@2
   449
williamr@2
   450
The class also provides a number of utility functions. 
williamr@2
   451
@publishedAll
williamr@2
   452
@released
williamr@2
   453
*/
williamr@2
   454
class CCnvCharacterSetConverter : public CBase
williamr@2
   455
	{
williamr@2
   456
public:
williamr@4
   457
	/** Indicates whether a character set is available or unavailable
williamr@2
   458
	for conversion. Used by the second overload of 
williamr@2
   459
	PrepareToConvertToOrFromL(). */
williamr@2
   460
	enum TAvailability
williamr@2
   461
		{
williamr@2
   462
		/** The requested character set can be converted. */
williamr@2
   463
		EAvailable,
williamr@2
   464
		/** The requested character set cannot be converted. */
williamr@2
   465
		ENotAvailable
williamr@2
   466
		};
williamr@2
   467
williamr@2
   468
	/** Conversion error flags. At this stage there is only one error 
williamr@2
   469
	flag- others may be added in the future. */
williamr@2
   470
	enum TError
williamr@2
   471
		{
williamr@2
   472
		/** The input descriptor contains a single corrupt character. This 
williamr@2
   473
		might occur when the input descriptor only contains some of the bytes 
williamr@2
   474
		of a single multi-byte character. */
williamr@2
   475
		EErrorIllFormedInput=KErrCorrupt
williamr@2
   476
		};
williamr@2
   477
williamr@2
   478
	/** Specifies the default endian-ness of the current character set. 
williamr@2
   479
	Used by SetDefaultEndiannessOfForeignCharacters(). */
williamr@2
   480
	enum TEndianness
williamr@2
   481
		{
williamr@2
   482
		/** The character set is big-endian. */
williamr@2
   483
		ELittleEndian,
williamr@2
   484
		/** The character set is little-endian. */
williamr@2
   485
		EBigEndian
williamr@2
   486
		};
williamr@2
   487
	
williamr@2
   488
	/** Downgrade for line and paragraph separators */
williamr@2
   489
	enum TDowngradeForExoticLineTerminatingCharacters
williamr@2
   490
		{
williamr@2
   491
		/** Paragraph/line separators should be downgraded (if necessary) 
williamr@2
   492
		into carriage return and line feed pairs. */
williamr@2
   493
		EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed,
williamr@2
   494
		/** Paragraph/line separators should be downgraded (if necessary) 
williamr@2
   495
		into a line feed only. */
williamr@2
   496
		EDowngradeExoticLineTerminatingCharactersToJustLineFeed
williamr@2
   497
		};
williamr@2
   498
williamr@2
   499
	/** Output flag used to indicate whether or not a character in the source
williamr@2
   500
	descriptor is the first half of a surrogate pair, but is the last
williamr@2
   501
	character in the descriptor to convert.
williamr@2
   502
	 
williamr@2
   503
	Note: This enumeration can be used in the DoConvertToUnicode() and
williamr@2
   504
	DoConvertFromUnicode() functions. These are part of the
williamr@2
   505
	Character Conversion Plug-in Provider API and are for use by plug-in
williamr@2
   506
	conversion libraries only.
williamr@2
   507
	@since 6.0 */
williamr@2
   508
	enum
williamr@2
   509
		{
williamr@2
   510
		/** Appends the converted text to the output descriptor.*/
williamr@2
   511
		EInputConversionFlagAppend	=0x00010000,
williamr@2
   512
		/** By default, when the input descriptor passed to DoConvertFromUnicode()
williamr@2
   513
		or DoConvertToUnicode() consists of nothing but a truncated sequence, 
williamr@2
   514
		the error-code EErrorIllFormedInput is returned. 
williamr@2
   515
		If this behaviour is undesirable, the input flag  
williamr@2
   516
		EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable
williamr@2
   517
		should be set. */
williamr@2
   518
		EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable	=0x00020000,
williamr@2
   519
		/** Stops converting when the first unconvertible character is reached. */
williamr@2
   520
		EInputConversionFlagStopAtFirstUnconvertibleCharacter			=0x00040000,
williamr@2
   521
		/** Appends the default character set Escape sequence at end of converted text */
williamr@2
   522
		EInputConversionFlagMustEndInDefaultCharacterSet				=0x00080000,
williamr@2
   523
		/*defect fix: INC053609; According to RFC1468 we can assume the line starts 
williamr@2
   524
		in ASCII so there is no need to always insert an escape sequence*/
williamr@2
   525
		EInputConversionFlagAssumeStartInDefaultCharacterSet			=0x00100000
williamr@2
   526
		};
williamr@2
   527
	enum
williamr@2
   528
		{
williamr@2
   529
		/** Indicates whether or not the source descriptor ends in a truncated
williamr@2
   530
		sequence, e.g. the first half only of a surrogate pair. */
williamr@2
   531
		EOutputConversionFlagInputIsTruncated							=0x01000000
williamr@2
   532
		};
williamr@2
   533
williamr@2
   534
		/** Initial value for the state argument in a set of related calls to
williamr@2
   535
		ConvertToUnicode(). */
williamr@2
   536
	enum {KStateDefault=0};
williamr@2
   537
	enum 
williamr@2
   538
		{
williamr@2
   539
		/** The lowest confidence value for a character set accepted by 
williamr@2
   540
		Autodetect*/
williamr@2
   541
		ELowestThreshold = 25
williamr@2
   542
		};
williamr@2
   543
		
williamr@2
   544
	/** Stores information about a non-Unicode character set. The information 
williamr@2
   545
	is used	to locate the conversion information required by 
williamr@2
   546
	ConvertFromUnicode() and ConvertToUnicode().
williamr@2
   547
williamr@2
   548
	An array of these structs that contain all available character sets 
williamr@2
   549
	can be generated by CreateArrayOfCharacterSetsAvailableLC() and 
williamr@2
   550
	CreateArrayOfCharacterSetsAvailableL(), and is used by one of the 
williamr@2
   551
	overloads of PrepareToConvertToOrFromL(). */
williamr@2
   552
	struct SCharacterSet
williamr@2
   553
		{
williamr@2
   554
		/** Gets the character sets UID.
williamr@2
   555
	
williamr@2
   556
		@return The UID of the character set. */
williamr@2
   557
		inline TUint Identifier() const {return iIdentifier;}
williamr@2
   558
williamr@2
   559
		/** Tests whether a filename given by the function SCharacterSet::Name() 
williamr@2
   560
		is a real file name (i.e. conversion is provided by a plug in DLL), or 
williamr@2
   561
		just the character set name (i.e. conversion is built into Symbian OS).
williamr@2
   562
		
williamr@2
   563
		Note: If the function returns ETrue then the path and filename can be 
williamr@2
   564
		parsed using TParse or TParsePtrC functions to obtain just the filename.
williamr@2
   565
		
williamr@2
   566
		@return ETrue if the name is a real filename. EFalse if it is just the 
williamr@2
   567
		character set name. */
williamr@2
   568
		inline TBool NameIsFileName() const {return iFlags&EFlagNameIsFileName;}
williamr@2
   569
williamr@2
   570
		/** Gets the full path and filename of the DLL which implements 
williamr@2
   571
		conversion for the character set. 
williamr@2
   572
		
williamr@2
   573
		If the character set is one for which conversion is built into Symbian 
williamr@2
   574
		OS rather than implemented by a plug in DLL, the function just returns 
williamr@2
   575
		the name of the character set. The NameIsFileName() function can be 
williamr@2
   576
		used to determine whether or not it is legal to create a TParsePtrC 
williamr@2
   577
		object over the descriptor 	returned by Name().
williamr@2
   578
		
williamr@2
   579
		Notes:
williamr@2
   580
		
williamr@2
   581
		The name returned cannot be treated as an Internet-standard name, it 
williamr@2
   582
		is locale-independent and should be mapped to the locale-dependent name 
williamr@2
   583
		by software at a higher level before being shown to the user. Conversion 
williamr@2
   584
		from Internet-standard names of character sets to the UID identifiers 
williamr@2
   585
		is provided by the member function 
williamr@2
   586
		ConvertStandardNameOfCharacterSetToIdentifierL().
williamr@2
   587
		
williamr@2
   588
		Typically, to find the user-displayable name (as opposed to the 
williamr@2
   589
		internet-standard name) of a character set, you would do something 
williamr@2
   590
		like this:
williamr@2
   591
		
williamr@2
   592
		@code
williamr@2
   593
		const CCnvCharacterSetConverter::SCharacterSet& characterSet=...;
williamr@2
   594
		const TPtrC userDisplayable(characterSet.NameIsFileName()? TParsePtrC(characterSet.Name()).Name(): 
williamr@2
   595
		characterSet.Name()); 
williamr@2
   596
		@endcode
williamr@2
   597
williamr@2
   598
		@return Full path and filename of the character set converter plug in 
williamr@2
   599
		DLL, or just the name of the character set. */
williamr@2
   600
		inline TPtrC Name() const {return *iName;}
williamr@2
   601
	private:
williamr@2
   602
		enum
williamr@2
   603
			{
williamr@2
   604
			EFlagNameIsFileName					=0x00000001,
williamr@2
   605
			EFlagFileIsConversionPlugInLibrary	=0x00000002
williamr@2
   606
			};
williamr@2
   607
	private:
williamr@2
   608
		inline TBool FileIsConversionPlugInLibrary() const {return iFlags&EFlagFileIsConversionPlugInLibrary;}
williamr@2
   609
	private:
williamr@2
   610
		TUint iIdentifier;
williamr@2
   611
		TUint iFlags;
williamr@2
   612
		HBufC* iName;
williamr@2
   613
	private:
williamr@2
   614
		friend class CCnvCharacterSetConverter;
williamr@2
   615
		friend class CDeepDestructingArrayOfCharactersSets;
williamr@2
   616
		}; //SCharacterSet
williamr@2
   617
	
williamr@2
   618
williamr@2
   619
	/** 
williamr@2
   620
	Holds an ascending array of the indices of the characters in the 
williamr@2
   621
	source Unicode text which could not be converted by 
williamr@2
   622
	CCnvCharacterSetConverter::ConvertFromUnicode() into the foreign 
williamr@2
   623
	character set 
williamr@2
   624
	@publishedAll
williamr@2
   625
	@released
williamr@2
   626
	*/
williamr@2
   627
	class TArrayOfAscendingIndices
williamr@2
   628
		{
williamr@2
   629
	public:
williamr@2
   630
		/** The return value of CCnvCharacterSetConverter::AppendIndex(). */
williamr@2
   631
		enum TAppendResult
williamr@2
   632
			{
williamr@2
   633
			/** The append failed. */
williamr@2
   634
			EAppendFailed,
williamr@2
   635
			/** The append succeeded. */
williamr@2
   636
			EAppendSuccessful
williamr@2
   637
			};
williamr@2
   638
	public:
williamr@2
   639
		/** C++ constructor. The array is initialised to be of length zero. */
williamr@2
   640
		inline TArrayOfAscendingIndices() :iArrayOfIndices(0) {}
williamr@2
   641
	
williamr@2
   642
		IMPORT_C TAppendResult AppendIndex(TInt aIndex);
williamr@2
   643
		
williamr@2
   644
		/** Deletes a single index from the array.
williamr@2
   645
		
williamr@2
   646
		@param aIndexOfIndex The index of the index to delete. Must not be 
williamr@2
   647
		negative and must not be greater than the length of the array, or a 
williamr@2
   648
		panic occurs. */
williamr@2
   649
		inline void Remove(TInt aIndexOfIndex) {iArrayOfIndices.Delete(aIndexOfIndex, 1);}
williamr@2
   650
		
williamr@2
   651
		/** Deletes all indices from the array. */
williamr@2
   652
		inline void RemoveAll() {iArrayOfIndices.SetLength(0);}
williamr@2
   653
williamr@2
   654
		/** Returns the number of indices in the array.
williamr@2
   655
	
williamr@2
   656
		@return The number of indices in the array. */
williamr@2
   657
		inline TInt NumberOfIndices() const {return iArrayOfIndices.Length();}
williamr@2
   658
williamr@2
   659
		/** Gets the value of the specified index.
williamr@2
   660
	
williamr@2
   661
		@param aIndexOfIndex Index into the array.
williamr@2
   662
		@return The value of the index. */
williamr@2
   663
		inline TInt operator[](TInt aIndexOfIndex) const {return iArrayOfIndices[aIndexOfIndex];}
williamr@2
   664
	private:
williamr@2
   665
		enum {KMaximumNumberOfIndices=25};
williamr@2
   666
	private:
williamr@2
   667
		TBuf16<KMaximumNumberOfIndices> iArrayOfIndices;
williamr@2
   668
		};
williamr@2
   669
public:
williamr@2
   670
	IMPORT_C static CCnvCharacterSetConverter* NewL();
williamr@2
   671
	IMPORT_C static CCnvCharacterSetConverter* NewLC();
williamr@2
   672
	IMPORT_C virtual ~CCnvCharacterSetConverter();
williamr@2
   673
	IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableL(RFs& aFileServerSession);
williamr@2
   674
	IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession);
williamr@2
   675
	IMPORT_C TUint ConvertStandardNameOfCharacterSetToIdentifierL(const TDesC8& aStandardNameOfCharacterSet, RFs& aFileServerSession);
williamr@2
   676
	IMPORT_C HBufC8* ConvertCharacterSetIdentifierToStandardNameL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
williamr@2
   677
	IMPORT_C TUint ConvertMibEnumOfCharacterSetToIdentifierL(TInt aMibEnumOfCharacterSet, RFs& aFileServerSession);
williamr@2
   678
	IMPORT_C TInt ConvertCharacterSetIdentifierToMibEnumL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
williamr@2
   679
	IMPORT_C void PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, RFs& aFileServerSession);
williamr@2
   680
	IMPORT_C TAvailability PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, RFs& aFileServerSession);
williamr@2
   681
	// the following attribute-setting functions should be called (if at all) after calling PrepareToConvertToOrFromL and before calling ConvertFromUnicode and/or ConvertToUnicode
williamr@2
   682
	IMPORT_C void SetDefaultEndiannessOfForeignCharacters(TEndianness aEndianness);
williamr@2
   683
	IMPORT_C void SetDowngradeForExoticLineTerminatingCharacters(TDowngradeForExoticLineTerminatingCharacters aDowngradeForExoticLineTerminatingCharacters); // by default this attribute is set to EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed
williamr@2
   684
	IMPORT_C void SetReplacementForUnconvertibleUnicodeCharactersL(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters); // must be a single character preceded by its escape sequence (if any), and must be little-endian if the endianness of the character-set is unspecified, otherwise in the same endianness as the character-set
williamr@2
   685
	
williamr@2
   686
	// the conversion functions return either one of the TError values above, or the number of unconverted elements left at the end of the input descriptor
williamr@2
   687
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode) const;
williamr@2
   688
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters) const;
williamr@2
   689
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstUnconvertibleCharacter) const;
williamr@2
   690
	IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) const;
williamr@2
   691
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState) const;
williamr@2
   692
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters) const;
williamr@2
   693
	IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) const;
williamr@2
   694
	IMPORT_C static void AutoDetectCharacterSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
williamr@2
   695
	IMPORT_C void AutoDetectCharSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
williamr@2
   696
	IMPORT_C static void ConvertibleToCharacterSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
williamr@2
   697
    IMPORT_C void ConvertibleToCharSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix<SCharacterSet>& aArrayOfCharacterSetsAvailable, const TDesC8& aSample);
williamr@2
   698
	IMPORT_C void SetMaxCacheSize(TInt aSize);
williamr@2
   699
	// the following functions are only to be called by conversion plug-in libraries
williamr@2
   700
	IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
williamr@2
   701
	IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
williamr@2
   702
	IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
williamr@2
   703
	IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
williamr@2
   704
	IMPORT_C static const SCnvConversionData& AsciiConversionData();
williamr@2
   705
	inline TDowngradeForExoticLineTerminatingCharacters GetDowngradeForExoticLineTerminatingCharacters () 
williamr@2
   706
		{
williamr@2
   707
		return iDowngradeForExoticLineTerminatingCharacters ;
williamr@2
   708
		} ; 
williamr@2
   709
williamr@2
   710
private:
williamr@2
   711
	enum
williamr@2
   712
		{
williamr@2
   713
		EStoredFlagOwnsConversionData				=0x00000001,
williamr@2
   714
		EStoredFlagConversionPlugInLibraryIsLoaded	=0x00000002
williamr@2
   715
		};
williamr@2
   716
	enum TCharacterSetSearch
williamr@2
   717
		{
williamr@2
   718
		EStopCharacterSetSearch,
williamr@2
   719
		EContinueCharacterSetSearch
williamr@2
   720
		};
williamr@2
   721
	enum TConversionPlugInFunctionOrdinals
williamr@2
   722
		{
williamr@2
   723
		EReplacementForUnconvertibleUnicodeCharacters=1,
williamr@2
   724
		EConvertFromUnicode=2,
williamr@2
   725
		EConvertToUnicode=3,
williamr@2
   726
		EIsInThisCharacterSet=4
williamr@2
   727
		};
williamr@2
   728
		
williamr@2
   729
private:
williamr@2
   730
	CCnvCharacterSetConverter();
williamr@2
   731
	void ConstructL();
williamr@2
   732
	static CArrayFix<SCharacterSet>* DoCreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession, TUint aIdentifierOfOnlyCharacterSetOfInterest);
williamr@2
   733
	static TCharacterSetSearch AppendHardCodedCharacterSetIfRequiredL(CArrayFix<SCharacterSet>& aArrayOfCharacterSets, TUint aIdentifierOfOnlyCharacterSetOfInterest, TUint aIdentifierOfHardCodedCharacterSet, const TDesC& aNameOfHardCodedCharacterSet);
williamr@2
   734
	void ScanForStandardNamesAndMibEnumsL(RFs& aFileServerSession);
williamr@2
   735
	void ScanForStandardNamesAndMibEnumsROMOnlyL(RFs& aFileServerSession);
williamr@2
   736
	TAvailability DoPrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix<SCharacterSet>* aArrayOfCharacterSetsAvailable, RFs& aFileServerSession);
williamr@2
   737
	static void DeleteConversionData(const SCnvConversionData* aConversionData);
williamr@2
   738
	static void DeleteConversionData(TAny* aConversionData);
williamr@2
   739
	static TEndianness EndiannessOfForeignCharacters(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters);
williamr@2
   740
williamr@2
   741
private:
williamr@2
   742
	TUint iStoredFlags;
williamr@2
   743
	TUint iCharacterSetIdentifierOfLoadedConversionData; // 0 or a UID of the loaded plugin
williamr@2
   744
	const SCnvConversionData* iConversionData;
williamr@2
   745
	TEndianness iDefaultEndiannessOfForeignCharacters;
williamr@2
   746
	TDowngradeForExoticLineTerminatingCharacters iDowngradeForExoticLineTerminatingCharacters;
williamr@2
   747
	TBuf8<KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters> iReplacementForUnconvertibleUnicodeCharacters;
williamr@2
   748
	CStandardNamesAndMibEnums* iStandardNamesAndMibEnums;
williamr@4
   749
	TBool iTlsDataConstructed;
williamr@2
   750
	CCharsetCnvCache* iCharsetCnvCache;
williamr@2
   751
	TBool iIsSystemStandardNamesAndMibEnumsScanned;
williamr@2
   752
	};
williamr@2
   753
williamr@2
   754
#endif
williamr@4
   755