os/textandloc/charconvfw/charconv_fw/inc/convutils.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconv_fw/inc/convutils.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,171 @@
     1.4 +/*
     1.5 +* Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#if !defined(__CONVUTILS_H__)
    1.23 +#define __CONVUTILS_H__
    1.24 +
    1.25 +#if !defined(__E32STD_H__)
    1.26 +#include <e32std.h>
    1.27 +#endif
    1.28 +
    1.29 +#if !defined(__E32BASE_H__)
    1.30 +#include <e32base.h>
    1.31 +#endif
    1.32 +
    1.33 +#if !defined(__CHARCONV_H__)
    1.34 +#include <charconv.h>
    1.35 +#endif
    1.36 +
    1.37 +struct SCnvConversionData;
    1.38 +
    1.39 + 
    1.40 +class CnvUtilities
    1.41 +/** 
    1.42 +Provides static character conversion utilities for complex encodings. Its functions 
    1.43 +may be called from a plug-in DLL's implementation of ConvertFromUnicode() 
    1.44 +and ConvertToUnicode().
    1.45 +
    1.46 +These utility functions are provided for use when converting to/from complex 
    1.47 +character set encodings, including modal encodings. Modal encodings are those 
    1.48 +where the interpretation of a given byte of data is dependent on the current 
    1.49 +mode; mode changing is performed by escape sequences which occur in the byte 
    1.50 +stream. A non-modal complex encoding is one in which characters are encoded 
    1.51 +using variable numbers of bytes. The number of bytes used to encode a character 
    1.52 +depends on the value of the initial byte.
    1.53 +@publishedAll 
    1.54 +@released
    1.55 +*/
    1.56 +	{
    1.57 +public:
    1.58 +	// type definitions for converting from Unicode
    1.59 +	
    1.60 +	/**  A pointer to a function which "mangles" text when converting from
    1.61 +	Unicode into a complex modal or non-modal foreign character set
    1.62 +	encoding.
    1.63 +
    1.64 +	It might insert a shifting character, escape sequence, or other
    1.65 +	special characters.If the target character set encoding is modal, the
    1.66 +	implementation of this function may call the
    1.67 +	CnvUtilities::ConvertFromIntermediateBufferInPlace()
    1.68 +	utility function which is provided because many modal character sets
    1.69 +	require an identical implementation of this function.
    1.70 +
    1.71 +	" convutils.lib " */
    1.72 +    typedef void (*FConvertFromIntermediateBufferInPlace)(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut);
    1.73 +	struct SCharacterSet
    1.74 +	/** Stores information about a non-Unicode character set. The information 
    1.75 +	is used to locate the conversion information required by 
    1.76 +	ConvertFromUnicode() and ConvertToUnicode().
    1.77 +
    1.78 +	An array of these structs  that contains all available character sets  
    1.79 +	can be generated by CreateArrayOfCharacterSetsAvailableLC() and 
    1.80 +	CreateArrayOfCharacterSetsAvailableL(), and is used by one of the 
    1.81 +	overloads of PrepareToConvertToOrFromL(). */
    1.82 +		{
    1.83 +		/** The conversion data. */
    1.84 +		const SCnvConversionData* iConversionData; // must *not* be set to NULL
    1.85 +		/** A pointer to a function which "mangles" the text in a way 
    1.86 +		appropriate to the target complex character set. For instance it 
    1.87 +		might insert a shifting character, escape sequence, or other special 
    1.88 +		characters. */
    1.89 +		FConvertFromIntermediateBufferInPlace iConvertFromIntermediateBufferInPlace; // must *not* be set to NULL
    1.90 +		/** The escape sequence which introduces the character set, i.e. it 
    1.91 +		identifies this character set as the next one to use. Must not be NULL.
    1.92 +		If the character set is non-modal, this should be set to an empty 
    1.93 +		descriptor. */
    1.94 +		const TDesC8* iEscapeSequence; // must *not* be set to NULL
    1.95 +		};
    1.96 +	// type definitions for converting to Unicode
    1.97 +
    1.98 +	/** A pointer to a function which calculates the number of consecutive
    1.99 +	bytes in the remainder of the foreign descriptor which can be
   1.100 +	converted using the current character set's conversion data.
   1.101 +
   1.102 +	Called when converting from a non-modal complex character set encoding
   1.103 +	into Unicode. It may return a negative
   1.104 +	CCnvCharacterSetConverter::TError value to indicate an
   1.105 +	error in the encoding.
   1.106 +
   1.107 +	" convutils.lib " */
   1.108 +	typedef TInt (*FNumberOfBytesAbleToConvert)(const TDesC8& aDescriptor); // may return negative CCnvCharacterSetConverter::TError values
   1.109 +	
   1.110 +	/** A pointer to a function which prepares the text for conversion into
   1.111 +	Unicode.
   1.112 +
   1.113 +	For instance it might remove any shifting or other special characters.
   1.114 +	Called when converting from a non-modal complex character set encoding
   1.115 +	into Unicode.
   1.116 +
   1.117 +	" convutils.lib " */
   1.118 + 	typedef void (*FConvertToIntermediateBufferInPlace)(TDes8& aDescriptor);
   1.119 +
   1.120 +	struct SState
   1.121 +	/** Character conversion data for one of the character sets which is 
   1.122 +	specified in a modal character set encoding. An array of these structs 
   1.123 +	is used when converting from a modal character set into Unicode, using 
   1.124 +	CnvUtilities::ConvertToUnicodeFromModalForeign(). Neither of the members 
   1.125 +	may be NULL. */
   1.126 +		{
   1.127 +		/** The escape sequence which introduces the character set, i.e. it 
   1.128 +		identifies this character set as the next one to use. This must begin 
   1.129 +		with KControlCharacterEscape. */
   1.130 +		const TDesC8* iEscapeSequence; // must *not* be set to NULL and must begin with 0x1b
   1.131 +		/** The conversion data. */
   1.132 +		const SCnvConversionData* iConversionData; // must *not* be set to NULL
   1.133 +		};
   1.134 +	struct SMethod
   1.135 +		{
   1.136 +		/** A pointer to a function which calculates the number of consecutive 
   1.137 +		bytes in the remainder of the foreign descriptor which can be converted 
   1.138 +		using the current character set's conversion data. It may return a 
   1.139 +		negative CCnvCharacterSetConverter::TError value to indicate an error 
   1.140 +		in the encoding. */
   1.141 +		FNumberOfBytesAbleToConvert iNumberOfBytesAbleToConvert; // must *not* be set to NULL
   1.142 +		/** A pointer to a function which prepares the text for conversion 
   1.143 +		into Unicode. For instance it might remove any shifting or other 
   1.144 +		special characters. */
   1.145 +		FConvertToIntermediateBufferInPlace iConvertToIntermediateBufferInPlace; // must *not* be set to NULL
   1.146 +		/** The conversion data. */
   1.147 +		const SCnvConversionData* iConversionData; // must *not* be set to NULL
   1.148 +		/** The number of bytes per character. */
   1.149 +		TInt16 iNumberOfBytesPerCharacter;
   1.150 +		/** The number of core bytes per character. */
   1.151 +		TInt16 iNumberOfCoreBytesPerCharacter;
   1.152 +		};
   1.153 +public:
   1.154 +	// these functions may *not* have CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter set in aInputConversionFlags
   1.155 +	IMPORT_C static TInt ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, const TArray<SCharacterSet>& aArrayOfCharacterSets);
   1.156 +	IMPORT_C static TInt ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, const TArray<SCharacterSet>& aArrayOfCharacterSets, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
   1.157 +	IMPORT_C static void ConvertFromIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut, const TDesC8& aEscapeSequence, TInt aNumberOfBytesPerCharacter);
   1.158 +	IMPORT_C static TInt ConvertToUnicodeFromModalForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SState>& aArrayOfStates); // the first element of aArrayOfStates is taken to be the default state
   1.159 +	IMPORT_C static TInt ConvertToUnicodeFromModalForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags, TUint aInputConversionFlags); // the first element of aArrayOfStates is taken to be the default state
   1.160 +	IMPORT_C static TInt ConvertToUnicodeFromHeterogeneousForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SMethod>& aArrayOfMethods);
   1.161 +	IMPORT_C static TInt ConvertToUnicodeFromHeterogeneousForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SMethod>& aArrayOfMethods, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
   1.162 +private:
   1.163 +	static void CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets);
   1.164 +	static void CheckArrayOfStates(const TArray<SState>& aArrayOfStates);
   1.165 +	static void CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods);
   1.166 +	static TInt LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex);
   1.167 +	static TBool NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags);
   1.168 +	static TBool MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence);
   1.169 +	static TBool IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor);
   1.170 +	inline static TInt ReduceToNearestMultipleOf(TInt aNumber1, TInt aNumber2) {return (aNumber1/aNumber2)*aNumber2;}
   1.171 +	};
   1.172 +
   1.173 +#endif
   1.174 +