os/textandloc/charconvfw/charconv_fw/inc/convutils.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#if !defined(__CONVUTILS_H__)
sl@0
    20
#define __CONVUTILS_H__
sl@0
    21
sl@0
    22
#if !defined(__E32STD_H__)
sl@0
    23
#include <e32std.h>
sl@0
    24
#endif
sl@0
    25
sl@0
    26
#if !defined(__E32BASE_H__)
sl@0
    27
#include <e32base.h>
sl@0
    28
#endif
sl@0
    29
sl@0
    30
#if !defined(__CHARCONV_H__)
sl@0
    31
#include <charconv.h>
sl@0
    32
#endif
sl@0
    33
sl@0
    34
struct SCnvConversionData;
sl@0
    35
sl@0
    36
 
sl@0
    37
class CnvUtilities
sl@0
    38
/** 
sl@0
    39
Provides static character conversion utilities for complex encodings. Its functions 
sl@0
    40
may be called from a plug-in DLL's implementation of ConvertFromUnicode() 
sl@0
    41
and ConvertToUnicode().
sl@0
    42
sl@0
    43
These utility functions are provided for use when converting to/from complex 
sl@0
    44
character set encodings, including modal encodings. Modal encodings are those 
sl@0
    45
where the interpretation of a given byte of data is dependent on the current 
sl@0
    46
mode; mode changing is performed by escape sequences which occur in the byte 
sl@0
    47
stream. A non-modal complex encoding is one in which characters are encoded 
sl@0
    48
using variable numbers of bytes. The number of bytes used to encode a character 
sl@0
    49
depends on the value of the initial byte.
sl@0
    50
@publishedAll 
sl@0
    51
@released
sl@0
    52
*/
sl@0
    53
	{
sl@0
    54
public:
sl@0
    55
	// type definitions for converting from Unicode
sl@0
    56
	
sl@0
    57
	/**  A pointer to a function which "mangles" text when converting from
sl@0
    58
	Unicode into a complex modal or non-modal foreign character set
sl@0
    59
	encoding.
sl@0
    60
sl@0
    61
	It might insert a shifting character, escape sequence, or other
sl@0
    62
	special characters.If the target character set encoding is modal, the
sl@0
    63
	implementation of this function may call the
sl@0
    64
	CnvUtilities::ConvertFromIntermediateBufferInPlace()
sl@0
    65
	utility function which is provided because many modal character sets
sl@0
    66
	require an identical implementation of this function.
sl@0
    67
sl@0
    68
	" convutils.lib " */
sl@0
    69
    typedef void (*FConvertFromIntermediateBufferInPlace)(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut);
sl@0
    70
	struct SCharacterSet
sl@0
    71
	/** Stores information about a non-Unicode character set. The information 
sl@0
    72
	is used to locate the conversion information required by 
sl@0
    73
	ConvertFromUnicode() and ConvertToUnicode().
sl@0
    74
sl@0
    75
	An array of these structs  that contains all available character sets  
sl@0
    76
	can be generated by CreateArrayOfCharacterSetsAvailableLC() and 
sl@0
    77
	CreateArrayOfCharacterSetsAvailableL(), and is used by one of the 
sl@0
    78
	overloads of PrepareToConvertToOrFromL(). */
sl@0
    79
		{
sl@0
    80
		/** The conversion data. */
sl@0
    81
		const SCnvConversionData* iConversionData; // must *not* be set to NULL
sl@0
    82
		/** A pointer to a function which "mangles" the text in a way 
sl@0
    83
		appropriate to the target complex character set. For instance it 
sl@0
    84
		might insert a shifting character, escape sequence, or other special 
sl@0
    85
		characters. */
sl@0
    86
		FConvertFromIntermediateBufferInPlace iConvertFromIntermediateBufferInPlace; // must *not* be set to NULL
sl@0
    87
		/** The escape sequence which introduces the character set, i.e. it 
sl@0
    88
		identifies this character set as the next one to use. Must not be NULL.
sl@0
    89
		If the character set is non-modal, this should be set to an empty 
sl@0
    90
		descriptor. */
sl@0
    91
		const TDesC8* iEscapeSequence; // must *not* be set to NULL
sl@0
    92
		};
sl@0
    93
	// type definitions for converting to Unicode
sl@0
    94
sl@0
    95
	/** A pointer to a function which calculates the number of consecutive
sl@0
    96
	bytes in the remainder of the foreign descriptor which can be
sl@0
    97
	converted using the current character set's conversion data.
sl@0
    98
sl@0
    99
	Called when converting from a non-modal complex character set encoding
sl@0
   100
	into Unicode. It may return a negative
sl@0
   101
	CCnvCharacterSetConverter::TError value to indicate an
sl@0
   102
	error in the encoding.
sl@0
   103
sl@0
   104
	" convutils.lib " */
sl@0
   105
	typedef TInt (*FNumberOfBytesAbleToConvert)(const TDesC8& aDescriptor); // may return negative CCnvCharacterSetConverter::TError values
sl@0
   106
	
sl@0
   107
	/** A pointer to a function which prepares the text for conversion into
sl@0
   108
	Unicode.
sl@0
   109
sl@0
   110
	For instance it might remove any shifting or other special characters.
sl@0
   111
	Called when converting from a non-modal complex character set encoding
sl@0
   112
	into Unicode.
sl@0
   113
sl@0
   114
	" convutils.lib " */
sl@0
   115
 	typedef void (*FConvertToIntermediateBufferInPlace)(TDes8& aDescriptor);
sl@0
   116
sl@0
   117
	struct SState
sl@0
   118
	/** Character conversion data for one of the character sets which is 
sl@0
   119
	specified in a modal character set encoding. An array of these structs 
sl@0
   120
	is used when converting from a modal character set into Unicode, using 
sl@0
   121
	CnvUtilities::ConvertToUnicodeFromModalForeign(). Neither of the members 
sl@0
   122
	may be NULL. */
sl@0
   123
		{
sl@0
   124
		/** The escape sequence which introduces the character set, i.e. it 
sl@0
   125
		identifies this character set as the next one to use. This must begin 
sl@0
   126
		with KControlCharacterEscape. */
sl@0
   127
		const TDesC8* iEscapeSequence; // must *not* be set to NULL and must begin with 0x1b
sl@0
   128
		/** The conversion data. */
sl@0
   129
		const SCnvConversionData* iConversionData; // must *not* be set to NULL
sl@0
   130
		};
sl@0
   131
	struct SMethod
sl@0
   132
		{
sl@0
   133
		/** A pointer to a function which calculates the number of consecutive 
sl@0
   134
		bytes in the remainder of the foreign descriptor which can be converted 
sl@0
   135
		using the current character set's conversion data. It may return a 
sl@0
   136
		negative CCnvCharacterSetConverter::TError value to indicate an error 
sl@0
   137
		in the encoding. */
sl@0
   138
		FNumberOfBytesAbleToConvert iNumberOfBytesAbleToConvert; // must *not* be set to NULL
sl@0
   139
		/** A pointer to a function which prepares the text for conversion 
sl@0
   140
		into Unicode. For instance it might remove any shifting or other 
sl@0
   141
		special characters. */
sl@0
   142
		FConvertToIntermediateBufferInPlace iConvertToIntermediateBufferInPlace; // must *not* be set to NULL
sl@0
   143
		/** The conversion data. */
sl@0
   144
		const SCnvConversionData* iConversionData; // must *not* be set to NULL
sl@0
   145
		/** The number of bytes per character. */
sl@0
   146
		TInt16 iNumberOfBytesPerCharacter;
sl@0
   147
		/** The number of core bytes per character. */
sl@0
   148
		TInt16 iNumberOfCoreBytesPerCharacter;
sl@0
   149
		};
sl@0
   150
public:
sl@0
   151
	// these functions may *not* have CCnvCharacterSetConverter::EInputConversionFlagStopAtFirstUnconvertibleCharacter set in aInputConversionFlags
sl@0
   152
	IMPORT_C static TInt ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, const TArray<SCharacterSet>& aArrayOfCharacterSets);
sl@0
   153
	IMPORT_C static TInt ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, const TDesC16& aUnicode, CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, const TArray<SCharacterSet>& aArrayOfCharacterSets, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
sl@0
   154
	IMPORT_C static void ConvertFromIntermediateBufferInPlace(TInt aStartPositionInDescriptor, TDes8& aDescriptor, TInt& aNumberOfCharactersThatDroppedOut, const TDesC8& aEscapeSequence, TInt aNumberOfBytesPerCharacter);
sl@0
   155
	IMPORT_C static TInt ConvertToUnicodeFromModalForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SState>& aArrayOfStates); // the first element of aArrayOfStates is taken to be the default state
sl@0
   156
	IMPORT_C static TInt ConvertToUnicodeFromModalForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags, TUint aInputConversionFlags); // the first element of aArrayOfStates is taken to be the default state
sl@0
   157
	IMPORT_C static TInt ConvertToUnicodeFromHeterogeneousForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SMethod>& aArrayOfMethods);
sl@0
   158
	IMPORT_C static TInt ConvertToUnicodeFromHeterogeneousForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<SMethod>& aArrayOfMethods, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
sl@0
   159
private:
sl@0
   160
	static void CheckArrayOfCharacterSets(const TArray<SCharacterSet>& aArrayOfCharacterSets);
sl@0
   161
	static void CheckArrayOfStates(const TArray<SState>& aArrayOfStates);
sl@0
   162
	static void CheckArrayOfMethods(const TArray<SMethod>& aArrayOfMethods);
sl@0
   163
	static TInt LengthOfUnicodeCharacter(const TDesC16& aUnicode, TInt aIndex);
sl@0
   164
	static TBool NextHomogeneousForeignRun(const SCnvConversionData*& aConversionData, TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TArray<SState>& aArrayOfStates, TUint& aOutputConversionFlags);
sl@0
   165
	static TBool MatchesEscapeSequence(TInt& aNumberOfForeignBytesConsumed, TPtrC8& aHomogeneousRun, TPtrC8& aRemainderOfForeign, const TDesC8& aEscapeSequence);
sl@0
   166
	static TBool IsStartOf(const TDesC8& aStart, const TDesC8& aPotentiallyLongerDescriptor);
sl@0
   167
	inline static TInt ReduceToNearestMultipleOf(TInt aNumber1, TInt aNumber2) {return (aNumber1/aNumber2)*aNumber2;}
sl@0
   168
	};
sl@0
   169
sl@0
   170
#endif
sl@0
   171