williamr@4: /* williamr@4: * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). williamr@4: * All rights reserved. williamr@4: * This component and the accompanying materials are made available williamr@4: * under the terms of "Eclipse Public License v1.0" williamr@4: * which accompanies this distribution, and is available williamr@4: * at the URL "http://www.eclipse.org/legal/epl-v10.html". williamr@4: * williamr@4: * Initial Contributors: williamr@4: * Nokia Corporation - initial contribution. williamr@4: * williamr@4: * Contributors: williamr@4: * williamr@4: * Description: williamr@4: * williamr@4: */ williamr@4: williamr@2: williamr@2: #if !defined(__CHARCONV_H__) williamr@2: #define __CHARCONV_H__ williamr@2: williamr@2: #if !defined(__E32STD_H__) williamr@2: #include williamr@2: #endif williamr@2: williamr@2: #if !defined(__E32BASE_H__) williamr@2: #include williamr@2: #endif williamr@2: williamr@2: /** williamr@2: The maximum length in bytes of the replacement text for unconvertible Unicode williamr@2: characters (=50) (see CCnvCharacterSetConverter::SetReplacementForUnconvertibleUnicodeCharactersL()). williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TInt KMaximumLengthOfReplacementForUnconvertibleUnicodeCharacters=50; williamr@2: williamr@2: /** williamr@2: UTF-7 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierUtf7=0x1000582c; williamr@2: /** williamr@2: UTF-8 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierUtf8=0x1000582d; williamr@2: /** williamr@2: IMAP UTF-7 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierImapUtf7=0x1000582e; williamr@2: /** williamr@2: Java UTF-8 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierJavaConformantUtf8=0x1000582f; williamr@2: /** williamr@2: Code Page 1252 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierCodePage1252=0x100012b6; williamr@2: /** williamr@2: ISO 8859-1 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88591=0x10003b10; williamr@2: /** williamr@2: ISO 8859-2 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88592=0x1000507e; williamr@2: /** williamr@2: ISO 8859-3 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88593=0x10008a28; williamr@2: /** williamr@2: ISO 8859-4 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88594=0x1000507f; williamr@2: /** williamr@2: ISO 8859-5 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88595=0x10005080; williamr@2: /** williamr@2: ISO 8859-6 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88596=0x10008a29; williamr@2: /** williamr@2: ISO 8859-7 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88597=0x10005081; williamr@2: /** williamr@2: ISO 8859-8 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88598=0x10008a2a; williamr@2: /** williamr@2: ISO 8859-9 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso88599=0x10005082; williamr@2: /** williamr@2: ISO 8859-10 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso885910=0x10008a2b; williamr@2: /** williamr@2: ISO 8859-13 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso885913=0x10008a2c; williamr@2: /** williamr@2: ISO 8859-14 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso885914=0x10008a2d; williamr@2: /** williamr@2: ISO 8859-15 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso885915=0x10008a2e; williamr@2: /** williamr@2: ASCII williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierAscii=0x10004cc6; williamr@2: /** williamr@2: SMS 7-bit williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierSms7Bit=0x100053ab; williamr@2: /** williamr@2: GB 2312 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierGb2312=0x10000fbe; williamr@2: /** williamr@2: HZ-GB-2312 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierHz=0x10006065; williamr@2: /** williamr@2: GB 12345 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierGb12345=0x1000401a; williamr@2: /** williamr@2: GBK williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierGbk=0x10003ecb; williamr@2: /** williamr@4: GB18030 williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierGb18030=0x10287038; williamr@4: /** williamr@2: Big 5 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierBig5=0x10000fbf; williamr@2: /** williamr@2: Shift-JIS williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierShiftJis=0x10000fbd; williamr@2: /** williamr@2: ISO-2022-JP williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso2022Jp=0x100066a0; williamr@2: /** williamr@2: ISO-2022-JP-1 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierIso2022Jp1=0x100066a3; williamr@2: /** williamr@2: JIS Encoding williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierJis=0x10006066; williamr@2: /** williamr@2: EUC-JP williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierEucJpPacked=0x10006067; williamr@2: williamr@2: /** williamr@2: JP5 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierJ5=0x1020D408; williamr@2: /** williamr@2: CP850 williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierCP850=0x102825AD; williamr@2: williamr@2: const TUint KCharacterSetIdentifierUnicodeLittle=0x101f3fae; //Little Endian Unicode williamr@2: const TUint KCharacterSetIdentifierUnicodeBig=0x101f4052; // Big Endian Unicode williamr@4: const TUint KCharacterSetIdentifierUcs2=0x101ff492; williamr@4: williamr@2: williamr@2: /** williamr@4: Extended SMS 7-bit williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierExtendedSms7Bit=0x102863FD; williamr@2: williamr@2: /** williamr@2: Turkish williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierTurkishSingleSms7Bit=0x102863FE; williamr@2: const TUint KCharacterSetIdentifierTurkishLockingSms7Bit=0x102863FF; williamr@2: const TUint KCharacterSetIdentifierTurkishLockingAndSingleSms7Bit=0x10286400; williamr@2: williamr@2: /** williamr@2: Portuguese williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierPortugueseSingleSms7Bit=0x10286407; williamr@2: const TUint KCharacterSetIdentifierPortugueseLockingSms7Bit=0x10286408; williamr@2: const TUint KCharacterSetIdentifierPortugueseLockingAndSingleSms7Bit=0x10286409; williamr@2: williamr@2: /** williamr@2: Spanish williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: const TUint KCharacterSetIdentifierSpanishSingleSms7Bit=0x1028640A; williamr@4: williamr@4: williamr@4: /** williamr@4: code page 949 williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierCP949=0x200100FF; williamr@4: williamr@4: /** williamr@4: Shift-JIS with Pictograph williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierShiftJisDirectmap=0x101F8691; williamr@4: williamr@4: /** williamr@4: EUC-JP with direct mapped pictograph williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierEucJpDirectmap=0x101F86A6; williamr@4: williamr@4: /** williamr@4: EUC-KR williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierEUCKR=0x2000E526; williamr@4: williamr@4: /** williamr@4: iscii williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierIscii=0x1027508E; williamr@4: williamr@4: /** williamr@4: ISO2022 Korean williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierIso2022kr=0x20010101; williamr@4: williamr@4: /** williamr@4: KOI8-R Russian williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierKOI8R=0x101F8778; williamr@4: williamr@4: /** williamr@4: KOI8-U Belorusian/Ukrainian Cyrillic williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierKOI8U=0x101F8761; williamr@4: williamr@4: /** williamr@4: KSC5601 Korean williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierKsc5601=0x200113CD; williamr@4: williamr@4: /** williamr@4: TIS_620 Thai williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierTIS_620=0x101F8549; williamr@4: williamr@4: /** williamr@4: Code page 874 Thai williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin874=0x101F854A; williamr@4: williamr@4: /** williamr@4: Code page 1250 Eastern European williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin1250=0x100059D6; williamr@4: williamr@4: /** williamr@4: Code page 1251 Cyrillic williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin1251=0x100059D7; williamr@4: williamr@4: /** williamr@4: Code page 1253 Greek williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin1253=0x100059D8; williamr@4: williamr@4: /** williamr@4: Code page 1254 Turkish williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin1254=0x100059D9; williamr@4: williamr@4: /** williamr@4: Code page 1255 Hebrew williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin1255=0x101F8547; williamr@4: williamr@4: /** williamr@4: Code page 1256 Arabic williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin1256=0x101F8548; williamr@4: williamr@4: /** williamr@4: Code page 1257 Baltic williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin1257=0x100059DA; williamr@4: williamr@4: /** williamr@4: Windows-1258 williamr@4: @publishedAll williamr@4: @released williamr@4: */ williamr@4: const TUint KCharacterSetIdentifierWin1258=0x102073B8; williamr@4: williamr@2: // note that other character sets than those listed above may be available at run-time, and also that none of the above are necessarily available at run-time williamr@2: williamr@2: struct SCnvConversionData; williamr@2: class CDeepDestructingArrayOfCharactersSets; williamr@2: class CFileReader; williamr@2: class CStandardNamesAndMibEnums; williamr@2: class RFs; williamr@2: class CCharsetCnvCache; williamr@2: /** williamr@2: Converts text between Unicode and other character sets. williamr@2: williamr@2: The first stage of the conversion is to specify the non-Unicode character williamr@2: set being converted to or from. This is done by calling one of the overloads williamr@2: of PrepareToConvertToOrFromL(). williamr@2: williamr@2: The second stage is to convert the text, using one of the overloads of williamr@2: ConvertFromUnicode() or ConvertToUnicode(). williamr@2: williamr@2: Where possible the first documented overload of PrepareToConvertToOrFromL() williamr@2: should be used because the second overload panics if the specified character williamr@2: set is not available: the first overload simply returns whether the character williamr@2: set is available or not available. However if the conversions are to be williamr@2: performed often, or if the user must select the character set for the williamr@2: conversion from a list, the second overload may be more appropriate. williamr@2: williamr@2: The first overload is less efficient than the second, because it searches williamr@2: through the file system for the selected character set every time it is invoked. williamr@2: The second overload searches through an array of all available character sets. williamr@2: In this method, the file system need only be searched once - when williamr@2: CreateArrayOfCharacterSetsAvailableLC() or williamr@2: CreateArrayOfCharacterSetsAvailableL() is used to create the array. williamr@2: williamr@2: The conversion functions allow users of this class to perform partial williamr@2: conversions on an input descriptor, handling the situation where the input williamr@2: descriptor is truncated mid way through a multi-byte character. This means williamr@2: that you do not have to guess how big to make the output descriptor for a williamr@2: given input descriptor, you can simply do the conversion in a loop using a williamr@2: small output descriptor. The ability to handle truncated descriptors also williamr@2: allows users of the class to convert information received in chunks from an williamr@2: external source. williamr@2: williamr@2: The class also provides a number of utility functions. williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: class CCnvCharacterSetConverter : public CBase williamr@2: { williamr@2: public: williamr@4: /** Indicates whether a character set is available or unavailable williamr@2: for conversion. Used by the second overload of williamr@2: PrepareToConvertToOrFromL(). */ williamr@2: enum TAvailability williamr@2: { williamr@2: /** The requested character set can be converted. */ williamr@2: EAvailable, williamr@2: /** The requested character set cannot be converted. */ williamr@2: ENotAvailable williamr@2: }; williamr@2: williamr@2: /** Conversion error flags. At this stage there is only one error williamr@2: flag- others may be added in the future. */ williamr@2: enum TError williamr@2: { williamr@2: /** The input descriptor contains a single corrupt character. This williamr@2: might occur when the input descriptor only contains some of the bytes williamr@2: of a single multi-byte character. */ williamr@2: EErrorIllFormedInput=KErrCorrupt williamr@2: }; williamr@2: williamr@2: /** Specifies the default endian-ness of the current character set. williamr@2: Used by SetDefaultEndiannessOfForeignCharacters(). */ williamr@2: enum TEndianness williamr@2: { williamr@2: /** The character set is big-endian. */ williamr@2: ELittleEndian, williamr@2: /** The character set is little-endian. */ williamr@2: EBigEndian williamr@2: }; williamr@2: williamr@2: /** Downgrade for line and paragraph separators */ williamr@2: enum TDowngradeForExoticLineTerminatingCharacters williamr@2: { williamr@2: /** Paragraph/line separators should be downgraded (if necessary) williamr@2: into carriage return and line feed pairs. */ williamr@2: EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed, williamr@2: /** Paragraph/line separators should be downgraded (if necessary) williamr@2: into a line feed only. */ williamr@2: EDowngradeExoticLineTerminatingCharactersToJustLineFeed williamr@2: }; williamr@2: williamr@2: /** Output flag used to indicate whether or not a character in the source williamr@2: descriptor is the first half of a surrogate pair, but is the last williamr@2: character in the descriptor to convert. williamr@2: williamr@2: Note: This enumeration can be used in the DoConvertToUnicode() and williamr@2: DoConvertFromUnicode() functions. These are part of the williamr@2: Character Conversion Plug-in Provider API and are for use by plug-in williamr@2: conversion libraries only. williamr@2: @since 6.0 */ williamr@2: enum williamr@2: { williamr@2: /** Appends the converted text to the output descriptor.*/ williamr@2: EInputConversionFlagAppend =0x00010000, williamr@2: /** By default, when the input descriptor passed to DoConvertFromUnicode() williamr@2: or DoConvertToUnicode() consists of nothing but a truncated sequence, williamr@2: the error-code EErrorIllFormedInput is returned. williamr@2: If this behaviour is undesirable, the input flag williamr@2: EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable williamr@2: should be set. */ williamr@2: EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable =0x00020000, williamr@2: /** Stops converting when the first unconvertible character is reached. */ williamr@2: EInputConversionFlagStopAtFirstUnconvertibleCharacter =0x00040000, williamr@2: /** Appends the default character set Escape sequence at end of converted text */ williamr@2: EInputConversionFlagMustEndInDefaultCharacterSet =0x00080000, williamr@2: /*defect fix: INC053609; According to RFC1468 we can assume the line starts williamr@2: in ASCII so there is no need to always insert an escape sequence*/ williamr@2: EInputConversionFlagAssumeStartInDefaultCharacterSet =0x00100000 williamr@2: }; williamr@2: enum williamr@2: { williamr@2: /** Indicates whether or not the source descriptor ends in a truncated williamr@2: sequence, e.g. the first half only of a surrogate pair. */ williamr@2: EOutputConversionFlagInputIsTruncated =0x01000000 williamr@2: }; williamr@2: williamr@2: /** Initial value for the state argument in a set of related calls to williamr@2: ConvertToUnicode(). */ williamr@2: enum {KStateDefault=0}; williamr@2: enum williamr@2: { williamr@2: /** The lowest confidence value for a character set accepted by williamr@2: Autodetect*/ williamr@2: ELowestThreshold = 25 williamr@2: }; williamr@2: williamr@2: /** Stores information about a non-Unicode character set. The information williamr@2: is used to locate the conversion information required by williamr@2: ConvertFromUnicode() and ConvertToUnicode(). williamr@2: williamr@2: An array of these structs that contain all available character sets williamr@2: can be generated by CreateArrayOfCharacterSetsAvailableLC() and williamr@2: CreateArrayOfCharacterSetsAvailableL(), and is used by one of the williamr@2: overloads of PrepareToConvertToOrFromL(). */ williamr@2: struct SCharacterSet williamr@2: { williamr@2: /** Gets the character sets UID. williamr@2: williamr@2: @return The UID of the character set. */ williamr@2: inline TUint Identifier() const {return iIdentifier;} williamr@2: williamr@2: /** Tests whether a filename given by the function SCharacterSet::Name() williamr@2: is a real file name (i.e. conversion is provided by a plug in DLL), or williamr@2: just the character set name (i.e. conversion is built into Symbian OS). williamr@2: williamr@2: Note: If the function returns ETrue then the path and filename can be williamr@2: parsed using TParse or TParsePtrC functions to obtain just the filename. williamr@2: williamr@2: @return ETrue if the name is a real filename. EFalse if it is just the williamr@2: character set name. */ williamr@2: inline TBool NameIsFileName() const {return iFlags&EFlagNameIsFileName;} williamr@2: williamr@2: /** Gets the full path and filename of the DLL which implements williamr@2: conversion for the character set. williamr@2: williamr@2: If the character set is one for which conversion is built into Symbian williamr@2: OS rather than implemented by a plug in DLL, the function just returns williamr@2: the name of the character set. The NameIsFileName() function can be williamr@2: used to determine whether or not it is legal to create a TParsePtrC williamr@2: object over the descriptor returned by Name(). williamr@2: williamr@2: Notes: williamr@2: williamr@2: The name returned cannot be treated as an Internet-standard name, it williamr@2: is locale-independent and should be mapped to the locale-dependent name williamr@2: by software at a higher level before being shown to the user. Conversion williamr@2: from Internet-standard names of character sets to the UID identifiers williamr@2: is provided by the member function williamr@2: ConvertStandardNameOfCharacterSetToIdentifierL(). williamr@2: williamr@2: Typically, to find the user-displayable name (as opposed to the williamr@2: internet-standard name) of a character set, you would do something williamr@2: like this: williamr@2: williamr@2: @code williamr@2: const CCnvCharacterSetConverter::SCharacterSet& characterSet=...; williamr@2: const TPtrC userDisplayable(characterSet.NameIsFileName()? TParsePtrC(characterSet.Name()).Name(): williamr@2: characterSet.Name()); williamr@2: @endcode williamr@2: williamr@2: @return Full path and filename of the character set converter plug in williamr@2: DLL, or just the name of the character set. */ williamr@2: inline TPtrC Name() const {return *iName;} williamr@2: private: williamr@2: enum williamr@2: { williamr@2: EFlagNameIsFileName =0x00000001, williamr@2: EFlagFileIsConversionPlugInLibrary =0x00000002 williamr@2: }; williamr@2: private: williamr@2: inline TBool FileIsConversionPlugInLibrary() const {return iFlags&EFlagFileIsConversionPlugInLibrary;} williamr@2: private: williamr@2: TUint iIdentifier; williamr@2: TUint iFlags; williamr@2: HBufC* iName; williamr@2: private: williamr@2: friend class CCnvCharacterSetConverter; williamr@2: friend class CDeepDestructingArrayOfCharactersSets; williamr@2: }; //SCharacterSet williamr@2: williamr@2: williamr@2: /** williamr@2: Holds an ascending array of the indices of the characters in the williamr@2: source Unicode text which could not be converted by williamr@2: CCnvCharacterSetConverter::ConvertFromUnicode() into the foreign williamr@2: character set williamr@2: @publishedAll williamr@2: @released williamr@2: */ williamr@2: class TArrayOfAscendingIndices williamr@2: { williamr@2: public: williamr@2: /** The return value of CCnvCharacterSetConverter::AppendIndex(). */ williamr@2: enum TAppendResult williamr@2: { williamr@2: /** The append failed. */ williamr@2: EAppendFailed, williamr@2: /** The append succeeded. */ williamr@2: EAppendSuccessful williamr@2: }; williamr@2: public: williamr@2: /** C++ constructor. The array is initialised to be of length zero. */ williamr@2: inline TArrayOfAscendingIndices() :iArrayOfIndices(0) {} williamr@2: williamr@2: IMPORT_C TAppendResult AppendIndex(TInt aIndex); williamr@2: williamr@2: /** Deletes a single index from the array. williamr@2: williamr@2: @param aIndexOfIndex The index of the index to delete. Must not be williamr@2: negative and must not be greater than the length of the array, or a williamr@2: panic occurs. */ williamr@2: inline void Remove(TInt aIndexOfIndex) {iArrayOfIndices.Delete(aIndexOfIndex, 1);} williamr@2: williamr@2: /** Deletes all indices from the array. */ williamr@2: inline void RemoveAll() {iArrayOfIndices.SetLength(0);} williamr@2: williamr@2: /** Returns the number of indices in the array. williamr@2: williamr@2: @return The number of indices in the array. */ williamr@2: inline TInt NumberOfIndices() const {return iArrayOfIndices.Length();} williamr@2: williamr@2: /** Gets the value of the specified index. williamr@2: williamr@2: @param aIndexOfIndex Index into the array. williamr@2: @return The value of the index. */ williamr@2: inline TInt operator[](TInt aIndexOfIndex) const {return iArrayOfIndices[aIndexOfIndex];} williamr@2: private: williamr@2: enum {KMaximumNumberOfIndices=25}; williamr@2: private: williamr@2: TBuf16 iArrayOfIndices; williamr@2: }; williamr@2: public: williamr@2: IMPORT_C static CCnvCharacterSetConverter* NewL(); williamr@2: IMPORT_C static CCnvCharacterSetConverter* NewLC(); williamr@2: IMPORT_C virtual ~CCnvCharacterSetConverter(); williamr@2: IMPORT_C static CArrayFix* CreateArrayOfCharacterSetsAvailableL(RFs& aFileServerSession); williamr@2: IMPORT_C static CArrayFix* CreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession); williamr@2: IMPORT_C TUint ConvertStandardNameOfCharacterSetToIdentifierL(const TDesC8& aStandardNameOfCharacterSet, RFs& aFileServerSession); williamr@2: IMPORT_C HBufC8* ConvertCharacterSetIdentifierToStandardNameL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); williamr@2: IMPORT_C TUint ConvertMibEnumOfCharacterSetToIdentifierL(TInt aMibEnumOfCharacterSet, RFs& aFileServerSession); williamr@2: IMPORT_C TInt ConvertCharacterSetIdentifierToMibEnumL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); williamr@2: IMPORT_C void PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix& aArrayOfCharacterSetsAvailable, RFs& aFileServerSession); williamr@2: IMPORT_C TAvailability PrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, RFs& aFileServerSession); williamr@2: // the following attribute-setting functions should be called (if at all) after calling PrepareToConvertToOrFromL and before calling ConvertFromUnicode and/or ConvertToUnicode williamr@2: IMPORT_C void SetDefaultEndiannessOfForeignCharacters(TEndianness aEndianness); williamr@2: IMPORT_C void SetDowngradeForExoticLineTerminatingCharacters(TDowngradeForExoticLineTerminatingCharacters aDowngradeForExoticLineTerminatingCharacters); // by default this attribute is set to EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed williamr@2: IMPORT_C void SetReplacementForUnconvertibleUnicodeCharactersL(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters); // must be a single character preceded by its escape sequence (if any), and must be little-endian if the endianness of the character-set is unspecified, otherwise in the same endianness as the character-set williamr@2: williamr@2: // the conversion functions return either one of the TError values above, or the number of unconverted elements left at the end of the input descriptor williamr@2: IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode) const; williamr@2: IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters) const; williamr@2: IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstUnconvertibleCharacter) const; williamr@2: IMPORT_C TInt ConvertFromUnicode(TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) const; williamr@2: IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState) const; williamr@2: IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters) const; williamr@2: IMPORT_C TInt ConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) const; williamr@2: IMPORT_C static void AutoDetectCharacterSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); williamr@2: IMPORT_C void AutoDetectCharSetL(TInt& aConfidenceLevel, TUint& aCharacterSetIdentifier, const CArrayFix& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); williamr@2: IMPORT_C static void ConvertibleToCharacterSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); williamr@2: IMPORT_C void ConvertibleToCharSetL(TInt& aConfidenceLevel, const TUint aCharacterSetIdentifier,const CArrayFix& aArrayOfCharacterSetsAvailable, const TDesC8& aSample); williamr@2: IMPORT_C void SetMaxCacheSize(TInt aSize); williamr@2: // the following functions are only to be called by conversion plug-in libraries williamr@2: IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); williamr@2: IMPORT_C static TInt DoConvertFromUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, TUint& aOutputConversionFlags, TUint aInputConversionFlags); williamr@2: IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); williamr@2: IMPORT_C static TInt DoConvertToUnicode(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint& aOutputConversionFlags, TUint aInputConversionFlags); williamr@2: IMPORT_C static const SCnvConversionData& AsciiConversionData(); williamr@2: inline TDowngradeForExoticLineTerminatingCharacters GetDowngradeForExoticLineTerminatingCharacters () williamr@2: { williamr@2: return iDowngradeForExoticLineTerminatingCharacters ; williamr@2: } ; williamr@2: williamr@2: private: williamr@2: enum williamr@2: { williamr@2: EStoredFlagOwnsConversionData =0x00000001, williamr@2: EStoredFlagConversionPlugInLibraryIsLoaded =0x00000002 williamr@2: }; williamr@2: enum TCharacterSetSearch williamr@2: { williamr@2: EStopCharacterSetSearch, williamr@2: EContinueCharacterSetSearch williamr@2: }; williamr@2: enum TConversionPlugInFunctionOrdinals williamr@2: { williamr@2: EReplacementForUnconvertibleUnicodeCharacters=1, williamr@2: EConvertFromUnicode=2, williamr@2: EConvertToUnicode=3, williamr@2: EIsInThisCharacterSet=4 williamr@2: }; williamr@2: williamr@2: private: williamr@2: CCnvCharacterSetConverter(); williamr@2: void ConstructL(); williamr@2: static CArrayFix* DoCreateArrayOfCharacterSetsAvailableLC(RFs& aFileServerSession, TUint aIdentifierOfOnlyCharacterSetOfInterest); williamr@2: static TCharacterSetSearch AppendHardCodedCharacterSetIfRequiredL(CArrayFix& aArrayOfCharacterSets, TUint aIdentifierOfOnlyCharacterSetOfInterest, TUint aIdentifierOfHardCodedCharacterSet, const TDesC& aNameOfHardCodedCharacterSet); williamr@2: void ScanForStandardNamesAndMibEnumsL(RFs& aFileServerSession); williamr@2: void ScanForStandardNamesAndMibEnumsROMOnlyL(RFs& aFileServerSession); williamr@2: TAvailability DoPrepareToConvertToOrFromL(TUint aCharacterSetIdentifier, const CArrayFix* aArrayOfCharacterSetsAvailable, RFs& aFileServerSession); williamr@2: static void DeleteConversionData(const SCnvConversionData* aConversionData); williamr@2: static void DeleteConversionData(TAny* aConversionData); williamr@2: static TEndianness EndiannessOfForeignCharacters(const SCnvConversionData& aConversionData, TEndianness aDefaultEndiannessOfForeignCharacters); williamr@2: williamr@2: private: williamr@2: TUint iStoredFlags; williamr@2: TUint iCharacterSetIdentifierOfLoadedConversionData; // 0 or a UID of the loaded plugin williamr@2: const SCnvConversionData* iConversionData; williamr@2: TEndianness iDefaultEndiannessOfForeignCharacters; williamr@2: TDowngradeForExoticLineTerminatingCharacters iDowngradeForExoticLineTerminatingCharacters; williamr@2: TBuf8 iReplacementForUnconvertibleUnicodeCharacters; williamr@2: CStandardNamesAndMibEnums* iStandardNamesAndMibEnums; williamr@4: TBool iTlsDataConstructed; williamr@2: CCharsetCnvCache* iCharsetCnvCache; williamr@2: TBool iIsSystemStandardNamesAndMibEnumsScanned; williamr@2: }; williamr@2: williamr@2: #endif williamr@4: