sl@0: /* sl@0: * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #if !defined(__CHARCONV_H__) sl@0: #define __CHARCONV_H__ sl@0: sl@0: #if !defined(__E32STD_H__) sl@0: #include sl@0: #endif sl@0: sl@0: #if !defined(__E32BASE_H__) sl@0: #include sl@0: #endif sl@0: sl@0: /** sl@0: The maximum length in bytes of the replacement text for unconvertible Unicode sl@0: characters (=50) (see CCnvCharacterSetConverter::SetReplacementForUnconvertibleUnicodeCharactersL()). sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TInt KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters=50; sl@0: sl@0: /** sl@0: UTF-7 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierUtf7=0x1000582c; sl@0: /** sl@0: UTF-8 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierUtf8=0x1000582d; sl@0: /** sl@0: IMAP UTF-7 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierImapUtf7=0x1000582e; sl@0: /** sl@0: Java UTF-8 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierJavaConformantUtf8=0x1000582f; sl@0: /** sl@0: Code Page 1252 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierCodePage1252=0x100012b6; sl@0: /** sl@0: ISO 8859-1 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88591=0x10003b10; sl@0: /** sl@0: ISO 8859-2 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88592=0x1000507e; sl@0: /** sl@0: ISO 8859-3 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88593=0x10008a28; sl@0: /** sl@0: ISO 8859-4 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88594=0x1000507f; sl@0: /** sl@0: ISO 8859-5 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88595=0x10005080; sl@0: /** sl@0: ISO 8859-6 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88596=0x10008a29; sl@0: /** sl@0: ISO 8859-7 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88597=0x10005081; sl@0: /** sl@0: ISO 8859-8 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88598=0x10008a2a; sl@0: /** sl@0: ISO 8859-9 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso88599=0x10005082; sl@0: /** sl@0: ISO 8859-10 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso885910=0x10008a2b; sl@0: /** sl@0: ISO 8859-13 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso885913=0x10008a2c; sl@0: /** sl@0: ISO 8859-14 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso885914=0x10008a2d; sl@0: /** sl@0: ISO 8859-15 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso885915=0x10008a2e; sl@0: /** sl@0: ASCII sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierAscii=0x10004cc6; sl@0: /** sl@0: SMS 7-bit sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierSms7Bit=0x100053ab; sl@0: /** sl@0: GB 2312 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierGb2312=0x10000fbe; sl@0: /** sl@0: HZ-GB-2312 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierHz=0x10006065; sl@0: /** sl@0: GB 12345 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierGb12345=0x1000401a; sl@0: /** sl@0: GBK sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierGbk=0x10003ecb; sl@0: /** sl@0: GB18030 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierGb18030=0x10287038; sl@0: /** sl@0: Big 5 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierBig5=0x10000fbf; sl@0: /** sl@0: Shift-JIS sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierShiftJis=0x10000fbd; sl@0: /** sl@0: ISO-2022-JP sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso2022Jp=0x100066a0; sl@0: /** sl@0: ISO-2022-JP-1 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso2022Jp1=0x100066a3; sl@0: /** sl@0: JIS Encoding sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierJis=0x10006066; sl@0: /** sl@0: EUC-JP sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierEucJpPacked=0x10006067; sl@0: sl@0: /** sl@0: JP5 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierJ5=0x1020D408; sl@0: /** sl@0: CP850 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierCP850=0x102825AD; sl@0: sl@0: const TUint KCharacterSetIdentifierUnicodeLittle=0x101f3fae; //Little Endian Unicode sl@0: const TUint KCharacterSetIdentifierUnicodeBig=0x101f4052; // Big Endian Unicode sl@0: const TUint KCharacterSetIdentifierUcs2=0x101ff492; sl@0: sl@0: sl@0: /** sl@0: Extended SMS 7-bit sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierExtendedSms7Bit=0x102863FD; sl@0: sl@0: /** sl@0: Turkish sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierTurkishSingleSms7Bit=0x102863FE; sl@0: const TUint KCharacterSetIdentifierTurkishLockingSms7Bit=0x102863FF; sl@0: const TUint KCharacterSetIdentifierTurkishLockingAndSingleSms7Bit=0x10286400; sl@0: sl@0: /** sl@0: Portuguese sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierPortugueseSingleSms7Bit=0x10286407; sl@0: const TUint KCharacterSetIdentifierPortugueseLockingSms7Bit=0x10286408; sl@0: const TUint KCharacterSetIdentifierPortugueseLockingAndSingleSms7Bit=0x10286409; sl@0: sl@0: /** sl@0: Spanish sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierSpanishSingleSms7Bit=0x1028640A; sl@0: sl@0: sl@0: /** sl@0: code page 949 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierCP949=0x200100FF; sl@0: sl@0: /** sl@0: Shift-JIS with Pictograph sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierShiftJisDirectmap=0x101F8691; sl@0: sl@0: /** sl@0: EUC-JP with direct mapped pictograph sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierEucJpDirectmap=0x101F86A6; sl@0: sl@0: /** sl@0: EUC-KR sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierEUCKR=0x2000E526; sl@0: sl@0: /** sl@0: iscii sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIscii=0x1027508E; sl@0: sl@0: /** sl@0: ISO2022 Korean sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierIso2022kr=0x20010101; sl@0: sl@0: /** sl@0: KOI8-R Russian sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierKOI8R=0x101F8778; sl@0: sl@0: /** sl@0: KOI8-U Belorusian/Ukrainian Cyrillic sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierKOI8U=0x101F8761; sl@0: sl@0: /** sl@0: KSC5601 Korean sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierKsc5601=0x200113CD; sl@0: sl@0: /** sl@0: TIS_620 Thai sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierTIS_620=0x101F8549; sl@0: sl@0: /** sl@0: Code page 874 Thai sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin874=0x101F854A; sl@0: sl@0: /** sl@0: Code page 1250 Eastern European sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin1250=0x100059D6; sl@0: sl@0: /** sl@0: Code page 1251 Cyrillic sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin1251=0x100059D7; sl@0: sl@0: /** sl@0: Code page 1253 Greek sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin1253=0x100059D8; sl@0: sl@0: /** sl@0: Code page 1254 Turkish sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin1254=0x100059D9; sl@0: sl@0: /** sl@0: Code page 1255 Hebrew sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin1255=0x101F8547; sl@0: sl@0: /** sl@0: Code page 1256 Arabic sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin1256=0x101F8548; sl@0: sl@0: /** sl@0: Code page 1257 Baltic sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin1257=0x100059DA; sl@0: sl@0: /** sl@0: Windows-1258 sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: const TUint KCharacterSetIdentifierWin1258=0x102073B8; sl@0: sl@0: // note that other character sets than those listed above may be available at run-time, and also that none of the above are necessarily available at run-time sl@0: sl@0: struct SCnvConversionData; sl@0: class CDeepDestructingArrayOfCharactersSets; sl@0: class CFileReader; sl@0: class CStandardNamesAndMibEnums; sl@0: class RFs; sl@0: class CCharsetCnvCache; sl@0: /** sl@0: Converts text between Unicode and other character sets. sl@0: sl@0: The first stage of the conversion is to specify the non-Unicode character sl@0: set being converted to or from. This is done by calling one of the overloads sl@0: of PrepareToConvertToOrFromL(). sl@0: sl@0: The second stage is to convert the text, using one of the overloads of sl@0: ConvertFromUnicode() or ConvertToUnicode(). sl@0: sl@0: Where possible the first documented overload of PrepareToConvertToOrFromL() sl@0: should be used because the second overload panics if the specified character sl@0: set is not available: the first overload simply returns whether the character sl@0: set is available or not available. However if the conversions are to be sl@0: performed often, or if the user must select the character set for the sl@0: conversion from a list, the second overload may be more appropriate. sl@0: sl@0: The first overload is less efficient than the second, because it searches sl@0: through the file system for the selected character set every time it is invoked. sl@0: The second overload searches through an array of all available character sets. sl@0: In this method, the file system need only be searched once - when sl@0: CreateArrayOfCharacterSetsAvailableLC() or sl@0: CreateArrayOfCharacterSetsAvailableL() is used to create the array. sl@0: sl@0: The conversion functions allow users of this class to perform partial sl@0: conversions on an input descriptor, handling the situation where the input sl@0: descriptor is truncated mid way through a multi-byte character. This means sl@0: that you do not have to guess how big to make the output descriptor for a sl@0: given input descriptor, you can simply do the conversion in a loop using a sl@0: small output descriptor. The ability to handle truncated descriptors also sl@0: allows users of the class to convert information received in chunks from an sl@0: external source. sl@0: sl@0: The class also provides a number of utility functions. sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: class CCnvCharacterSetConverter : public CBase sl@0: { sl@0: public: sl@0: /** Indicates whether a character set is available or unavailable sl@0: for conversion. Used by the second overload of sl@0: PrepareToConvertToOrFromL(). */ sl@0: enum TAvailability sl@0: { sl@0: /** The requested character set can be converted. */ sl@0: EAvailable, sl@0: /** The requested character set cannot be converted. */ sl@0: ENotAvailable sl@0: }; sl@0: sl@0: /** Conversion error flags. At this stage there is only one error sl@0: flag- others may be added in the future. */ sl@0: enum TError sl@0: { sl@0: /** The input descriptor contains a single corrupt character. This sl@0: might occur when the input descriptor only contains some of the bytes sl@0: of a single multi-byte character. */ sl@0: EErrorIllFormedInput=KErrCorrupt sl@0: }; sl@0: sl@0: /** Specifies the default endian-ness of the current character set. sl@0: Used by SetDefaultEndiannessOfForeignCharacters(). */ sl@0: enum TEndianness sl@0: { sl@0: /** The character set is big-endian. */ sl@0: ELittleEndian, sl@0: /** The character set is little-endian. */ sl@0: EBigEndian sl@0: }; sl@0: sl@0: /** Downgrade for line and paragraph separators */ sl@0: enum TDowngradeForExoticLineTerminatingCharacters sl@0: { sl@0: /** Paragraph/line separators should be downgraded (if necessary) sl@0: into carriage return and line feed pairs. */ sl@0: EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed, sl@0: /** Paragraph/line separators should be downgraded (if necessary) sl@0: into a line feed only. */ sl@0: EDowngradeExoticLineTerminatingCharactersToJustLineFeed sl@0: }; sl@0: sl@0: /** Output flag used to indicate whether or not a character in the source sl@0: descriptor is the first half of a surrogate pair, but is the last sl@0: character in the descriptor to convert. sl@0: sl@0: Note: This enumeration can be used in the DoConvertToUnicode() and sl@0: DoConvertFromUnicode() functions. These are part of the sl@0: Character Conversion Plug-in Provider API and are for use by plug-in sl@0: conversion libraries only. sl@0: @since 6.0 */ sl@0: enum sl@0: { sl@0: /** Appends the converted text to the output descriptor.*/ sl@0: EInputConversionFlagAppend =0x00010000, sl@0: /** By default, when the input descriptor passed to DoConvertFromUnicode() sl@0: or DoConvertToUnicode() consists of nothing but a truncated sequence, sl@0: the error-code EErrorIllFormedInput is returned. sl@0: If this behaviour is undesirable, the input flag sl@0: EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable sl@0: should be set. */ sl@0: EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable =0x00020000, sl@0: /** Stops converting when the first unconvertible character is reached. */ sl@0: EInputConversionFlagStopAtFirstUnconvertibleCharacter =0x00040000, sl@0: /** Appends the default character set Escape sequence at end of converted text */ sl@0: EInputConversionFlagMustEndInDefaultCharacterSet =0x00080000, sl@0: /*defect fix: INC053609; According to RFC1468 we can assume the line starts sl@0: in ASCII so there is no need to always insert an escape sequence*/ sl@0: EInputConversionFlagAssumeStartInDefaultCharacterSet =0x00100000 sl@0: }; sl@0: enum sl@0: { sl@0: /** Indicates whether or not the source descriptor ends in a truncated sl@0: sequence, e.g. the first half only of a surrogate pair. */ sl@0: EOutputConversionFlagInputIsTruncated =0x01000000 sl@0: }; sl@0: sl@0: /** Initial value for the state argument in a set of related calls to sl@0: ConvertToUnicode(). */ sl@0: enum {KStateDefault=0}; sl@0: enum sl@0: { sl@0: /** The lowest confidence value for a character set accepted by sl@0: Autodetect*/ sl@0: ELowestThreshold = 25 sl@0: }; sl@0: sl@0: /** Stores information about a non-Unicode character set. The information sl@0: is used to locate the conversion information required by sl@0: ConvertFromUnicode() and ConvertToUnicode(). sl@0: sl@0: An array of these structs that contain all available character sets sl@0: can be generated by CreateArrayOfCharacterSetsAvailableLC() and sl@0: CreateArrayOfCharacterSetsAvailableL(), and is used by one of the sl@0: overloads of PrepareToConvertToOrFromL(). */ sl@0: struct SCharacterSet sl@0: { sl@0: /** Gets the character sets UID. sl@0: sl@0: @return The UID of the character set. */ sl@0: inline TUint Identifier() const {return iIdentifier;} sl@0: sl@0: /** Tests whether a filename given by the function SCharacterSet::Name() sl@0: is a real file name (i.e. conversion is provided by a plug in DLL), or sl@0: just the character set name (i.e. conversion is built into Symbian OS). sl@0: sl@0: Note: If the function returns ETrue then the path and filename can be sl@0: parsed using TParse or TParsePtrC functions to obtain just the filename. sl@0: sl@0: @return ETrue if the name is a real filename. EFalse if it is just the sl@0: character set name. */ sl@0: inline TBool NameIsFileName() const {return iFlags&EFlagNameIsFileName;} sl@0: sl@0: /** Gets the full path and filename of the DLL which implements sl@0: conversion for the character set. sl@0: sl@0: If the character set is one for which conversion is built into Symbian sl@0: OS rather than implemented by a plug in DLL, the function just returns sl@0: the name of the character set. The NameIsFileName() function can be sl@0: used to determine whether or not it is legal to create a TParsePtrC sl@0: object over the descriptor returned by Name(). sl@0: sl@0: Notes: sl@0: sl@0: The name returned cannot be treated as an Internet-standard name, it sl@0: is locale-independent and should be mapped to the locale-dependent name sl@0: by software at a higher level before being shown to the user. Conversion sl@0: from Internet-standard names of character sets to the UID identifiers sl@0: is provided by the member function sl@0: ConvertStandardNameOfCharacterSetToIdentifierL(). sl@0: sl@0: Typically, to find the user-displayable name (as opposed to the sl@0: internet-standard name) of a character set, you would do something sl@0: like this: sl@0: sl@0: @code sl@0: const CCnvCharacterSetConverter::SCharacterSet& characterSet=...; sl@0: const TPtrC userDisplayable(characterSet.NameIsFileName()? TParsePtrC(characterSet.Name()).Name(): sl@0: characterSet.Name()); sl@0: @endcode sl@0: sl@0: @return Full path and filename of the character set converter plug in sl@0: DLL, or just the name of the character set. */ sl@0: inline TPtrC Name() const {return *iName;} sl@0: private: sl@0: enum sl@0: { sl@0: EFlagNameIsFileName =0x00000001, sl@0: EFlagFileIsConversionPlugInLibrary =0x00000002 sl@0: }; sl@0: private: sl@0: inline TBool FileIsConversionPlugInLibrary() const {return iFlags&EFlagFileIsConversionPlugInLibrary;} sl@0: private: sl@0: TUint iIdentifier; sl@0: TUint iFlags; sl@0: HBufC* iName; sl@0: private: sl@0: friend class CCnvCharacterSetConverter; sl@0: friend class CDeepDestructingArrayOfCharactersSets; sl@0: }; //SCharacterSet sl@0: sl@0: sl@0: /** sl@0: Holds an ascending array of the indices of the characters in the sl@0: source Unicode text which could not be converted by sl@0: CCnvCharacterSetConverter::ConvertFromUnicode() into the foreign sl@0: character set sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: class TArrayOfAscendingIndices sl@0: { sl@0: public: sl@0: /** The return value of CCnvCharacterSetConverter::AppendIndex(). */ sl@0: enum TAppendResult sl@0: { sl@0: /** The append failed. */ sl@0: EAppendFailed, sl@0: /** The append succeeded. */ sl@0: EAppendSuccessful sl@0: }; sl@0: public: sl@0: /** C++ constructor. The array is initialised to be of length zero. */ sl@0: inline TArrayOfAscendingIndices() :iArrayOfIndices(0) {} sl@0: sl@0: IMPORT_C TAppendResult AppendIndex(TInt aIndex); sl@0: sl@0: /** Deletes a single index from the array. sl@0: sl@0: @param aIndexOfIndex The index of the index to delete. Must not be sl@0: negative and must not be greater than the length of the array, or a sl@0: panic occurs. */ sl@0: inline void Remove(TInt aIndexOfIndex) {iArrayOfIndices.Delete(aIndexOfIndex, 1);} sl@0: sl@0: /** Deletes all indices from the array. */ sl@0: inline void RemoveAll() {iArrayOfIndices.SetLength(0);} sl@0: sl@0: /** Returns the number of indices in the array. sl@0: sl@0: @return The number of indices in the array. */ sl@0: inline TInt NumberOfIndices() const {return iArrayOfIndices.Length();} sl@0: sl@0: /** Gets the value of the specified index. sl@0: sl@0: @param aIndexOfIndex Index into the array. sl@0: @return The value of the index. */ sl@0: inline TInt operator[](TInt aIndexOfIndex) const {return iArrayOfIndices[aIndexOfIndex];} sl@0: private: sl@0: enum {KMaximumNumberOfIndices=25}; sl@0: private: sl@0: TBuf16 iArrayOfIndices; sl@0: }; sl@0: public: sl@0: IMPORT_C static CCnvCharacterSetConverter* NewL(); sl@0: IMPORT_C static CCnvCharacterSetConverter* NewLC(); sl@0: IMPORT_C virtual ~CCnvCharacterSetConverter(); sl@0: IMPORT_C static CArrayFix* CreateArrayOfCharacterSetsAvailableL(RFs& aFileServerSession); sl@0: IMPORT_C static CArrayFix* CreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession); sl@0: IMPORT_C TUint ConvertStandardNameOfCharacterSetToIdentifierL(const TDesC8& aStandardNameOfCharacterSet, RFs& aFileServerSession); sl@0: IMPORT_C HBufC8* ConvertCharacterSetIdentifierToStandardNameL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); sl@0: IMPORT_C TUint ConvertMibEnumOfCharacterSetToIdentifierL(TInt aMibEnumOfCharacterSet, RFs& aFileServerSession); sl@0: IMPORT_C TInt ConvertCharacterSetIdentifierToMibEnumL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); sl@0: IMPORT_C void PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix& aArrayOfCharacterSetsAvailable, RFs& aFileServerSession); sl@0: IMPORT_C TAvailability PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); sl@0: // the following attribute-setting functions should be called (if at all) after calling PrepareToConvertToOrFromL and before calling ConvertFromUnicode and/or ConvertToUnicode sl@0: IMPORT_C void SetDefaultEndiannessOfForeignCharacters(TEndianness aEndianness); sl@0: IMPORT_C void SetDowngradeForExoticLineTerminatingCharacters(TDowngradeForExoticLineTerminatingCharacters aDowngradeForExoticLineTerminatingCharacters); // by default this attribute is set to EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed sl@0: IMPORT_C void SetReplacementForUnconvertibleUnicodeCharactersL(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters); // must be a single character preceded by its escape sequence (if any), and must be little-endian if the endianness of the character-set is unspecified, otherwise in the same endianness as the character-set sl@0: sl@0: // the conversion functions return either one of the TError values above, or the number of unconverted elements left at the end of the input descriptor sl@0: IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode) const; sl@0: IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters) const; sl@0: IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstUnconvertibleCharacter) const; sl@0: IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) const; sl@0: IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState) const; sl@0: IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters) const; sl@0: IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) const; sl@0: IMPORT_C static void AutoDetectCharacterSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); sl@0: IMPORT_C void AutoDetectCharSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); sl@0: IMPORT_C static void ConvertibleToCharacterSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); sl@0: IMPORT_C void ConvertibleToCharSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); sl@0: IMPORT_C void SetMaxCacheSize(TInt aSize); sl@0: // the following functions are only to be called by conversion plug-in libraries sl@0: IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); sl@0: IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TUint& aOutputConversionFlags, TUint aInputConversionFlags); sl@0: IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint& aOutputConversionFlags, TUint aInputConversionFlags); sl@0: IMPORT_C static const SCnvConversionData& AsciiConversionData(); sl@0: inline TDowngradeForExoticLineTerminatingCharacters GetDowngradeForExoticLineTerminatingCharacters () sl@0: { sl@0: return iDowngradeForExoticLineTerminatingCharacters ; sl@0: } ; sl@0: sl@0: private: sl@0: enum sl@0: { sl@0: EStoredFlagOwnsConversionData =0x00000001, sl@0: EStoredFlagConversionPlugInLibraryIsLoaded =0x00000002 sl@0: }; sl@0: enum TCharacterSetSearch sl@0: { sl@0: EStopCharacterSetSearch, sl@0: EContinueCharacterSetSearch sl@0: }; sl@0: enum TConversionPlugInFunctionOrdinals sl@0: { sl@0: EReplacementForUnconvertibleUnicodeCharacters=1, sl@0: EConvertFromUnicode=2, sl@0: EConvertToUnicode=3, sl@0: EIsInThisCharacterSet=4 sl@0: }; sl@0: sl@0: private: sl@0: CCnvCharacterSetConverter(); sl@0: void ConstructL(); sl@0: static CArrayFix* DoCreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession, TUint aIdentifierOfOnlyCharacterSetOfInterest); sl@0: static TCharacterSetSearch AppendHardCodedCharacterSetIfRequiredL(CArrayFix& aArrayOfCharacterSets, TUint aIdentifierOfOnlyCharacterSetOfInterest, TUint aIdentifierOfHardCodedCharacterSet, const TDesC& aNameOfHardCodedCharacterSet); sl@0: void ScanForStandardNamesAndMibEnumsL(RFs& aFileServerSession); sl@0: void ScanForStandardNamesAndMibEnumsROMOnlyL(RFs& aFileServerSession); sl@0: TAvailability DoPrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix* aArrayOfCharacterSetsAvailable, RFs& aFileServerSession); sl@0: static void DeleteConversionData(const SCnvConversionData* aConversionData); sl@0: static void DeleteConversionData(TAny* aConversionData); sl@0: static TEndianness EndiannessOfForeignCharacters(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters); sl@0: sl@0: private: sl@0: TUint iStoredFlags; sl@0: TUint iCharacterSetIdentifierOfLoadedConversionData; // 0 or a UID of the loaded plugin sl@0: const SCnvConversionData* iConversionData; sl@0: TEndianness iDefaultEndiannessOfForeignCharacters; sl@0: TDowngradeForExoticLineTerminatingCharacters iDowngradeForExoticLineTerminatingCharacters; sl@0: TBuf8 iReplacementForUnconvertibleUnicodeCharacters; sl@0: CStandardNamesAndMibEnums* iStandardNamesAndMibEnums; sl@0: TBool iTlsDataConstructed; sl@0: CCharsetCnvCache* iCharsetCnvCache; sl@0: TBool iIsSystemStandardNamesAndMibEnumsScanned; sl@0: }; sl@0: sl@0: #endif sl@0: