os/textandloc/charconvfw/charconvplugins/src/plugins/ucs2.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 *
    16 */
    17 
    18 
    19 #include <e32std.h>
    20 #include <charconv.h>
    21 #include <convgeneratedcpp.h>
    22 #include "ucs2.h"
    23 #include <ecom/implementationproxy.h>
    24 #include <charactersetconverter.h>
    25 
    26 class CUCS2ConverterImpl : public CCharacterSetConverterPluginInterface
    27 	{
    28 
    29 public:
    30 	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    31 
    32 	virtual TInt ConvertFromUnicode(
    33 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    34 		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    35 		TDes8& aForeign, 
    36 		const TDesC16& aUnicode, 
    37 		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    38 
    39 	virtual TInt ConvertToUnicode(
    40 		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    41 		TDes16& aUnicode, 
    42 		const TDesC8& aForeign, 
    43 		TInt& aState, 
    44 		TInt& aNumberOfUnconvertibleCharacters, 
    45 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
    46 
    47 	virtual TBool IsInThisCharacterSetL(
    48 		TBool& aSetToTrue, 
    49 		TInt& aConfidenceLevel, 
    50 		const TDesC8& aSample);
    51 
    52 	static CUCS2ConverterImpl* NewL();
    53 	virtual ~CUCS2ConverterImpl();
    54 
    55 private:
    56 	CUCS2ConverterImpl();
    57 
    58 	};
    59 
    60 
    61 
    62 // The following code has been copied and modified from the plugin computer generated code
    63 // that is generated from the charconv/data/???.cpl && /???.txt files
    64 // *** code begins ***
    65 #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    66 
    67 #pragma warning (disable: 4049) // compiler limit : terminating line number emission
    68 
    69 _LIT8(KLit8ReplacementForUnconvertibleUnicodeCharacters, "\xff\xfd");
    70 
    71 GLDEF_C const TDesC8& ReplacementForUnconvertibleUnicodeCharacters_internal()
    72 	{
    73 	return KLit8ReplacementForUnconvertibleUnicodeCharacters;
    74 	}
    75 
    76 GLDEF_D const SCnvConversionData conversionData=
    77 	{
    78 	SCnvConversionData::EUnspecified,
    79 	{NULL,NULL},
    80 	{NULL,NULL},
    81 	{NULL,NULL},
    82 	NULL,
    83 	NULL
    84 	};
    85 
    86 
    87 const TInt    KByteOrderMark = 0xfeff;
    88 const TInt 	  KStateOffset = 0x1000;
    89 
    90 
    91 const TDesC8& CUCS2ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
    92 	{
    93 	return ReplacementForUnconvertibleUnicodeCharacters_internal();
    94 	}
    95 
    96 
    97 /**
    98  * Takes a 16 bit UCS2 descriptor with or without BOM and translates it to an
    99  * eight bit descriptor in Big Endian format.
   100  *
   101  * Note aDefaultEndiannessOfForeignCharacters is not used by this converter
   102  *
   103  * @param CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters
   104  * @param const TDesC8&
   105  * @param TDes8& aForeign
   106  * @param const TDesC16& aUnicode
   107  * @param CCnvCharacterSetConverter::TArrayOfAscendingIndices&
   108  *
   109  * returns number of converted characters
   110 */
   111 TInt CUCS2ConverterImpl::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /*aDefaultEndiannessOfForeignCharacters*/, 
   112 								 const TDesC8& /*aReplacementForUnconvertibleUnicodeCharacters*/, 
   113 								 TDes8& aForeign, 
   114 								 const TDesC16& aUnicode, 
   115 								 CCnvCharacterSetConverter::TArrayOfAscendingIndices& /*aIndicesOfUnconvertibleCharacters*/)
   116 	{
   117 	TInt numberOfUnicodeCharacters =0;
   118 	TInt nextChar;
   119 	
   120 	// start at the begining of the output buffer
   121 	aForeign.Zero();
   122 	
   123 	// while there is unicode data to convert and space in the output buffer
   124 	while ( (aForeign.Length() + 1 < aForeign.MaxLength()) && (numberOfUnicodeCharacters < aUnicode.Length()) )
   125 		{
   126 		nextChar = aUnicode[numberOfUnicodeCharacters];
   127 
   128 		// Note - this always converts to EBigEndian 
   129 		aForeign.Append((nextChar & 0xff00) >> 8);
   130 		aForeign.Append(nextChar & 0xff );
   131 			
   132 		numberOfUnicodeCharacters++;
   133 		}
   134 		
   135 	// returns the number of unconverted characters left at the end of the input descriptor  
   136 	return aUnicode.Length() - numberOfUnicodeCharacters;
   137 	}
   138 
   139 /**
   140  * Takes an 8 bit descriptor with or without a BOM and translates it to unicode 
   141  * Input endiness is determined by Byte Order Markers (BOM) in the source text.
   142  * If no BOM is present aDefaultEndiannessOfForeignCharacters is used.
   143  *
   144  * When the data is too large to fit in the output buffer, the endiness is saved in the state
   145  * variable between conversions
   146  *
   147  * @param aDefaultEndiannessOfForeignCharacters Default endiness if no BOMs present in the source
   148  * @param aUnicode Contains the converted text in the Unicode character set
   149  * @param aForeign The non-Unicode source text to be converted
   150  * @param aState Not used by this converter
   151  * @param aNumberOfUnconvertibleCharacters Contains the number of bytes which could not be converted to unicode
   152  * @param aIndexOfFirstByteOfFirstUnconvertibleCharacter The index of the first unconvertable byte or -1 if all converted.
   153  *
   154  * @return aNumberOfUnconvertibleCharacters The number of unconverted bytes left at the end of the input 
   155  * descriptor (e.g. because the output descriptor is not long enough to hold all the text), or one of the 
   156  * error values defined in TError. 
   157  * @internalTechnology 
   158  */
   159 TInt CUCS2ConverterImpl::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   160 						   TDes16& aUnicode,	
   161 						   const TDesC8& aForeign,
   162 						   TInt& aState,
   163 						   TInt& aNumberOfUnconvertibleCharacters, 
   164 						   TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) 
   165 	{
   166 	TInt numberOfBytesConverted = 0;
   167 	TInt numberOfUnicodeCharacters =0;
   168 	TChar nextChar;
   169 	
   170 	// work out what byte order to use
   171 	CCnvCharacterSetConverter::TEndianness byteOrderMark;
   172 	if ( aState==CCnvCharacterSetConverter::KStateDefault )
   173 		{
   174 		// this is the first call so use the default or BOM for byte order
   175 		byteOrderMark = aDefaultEndiannessOfForeignCharacters;
   176 		}
   177 	else
   178 		{
   179 		// this is not the first call so use the saved byte order
   180 		byteOrderMark = STATIC_CAST( CCnvCharacterSetConverter::TEndianness, aState - KStateOffset );
   181 		}
   182 		
   183 	if ( aForeign.Length() < 2)
   184 		{ // too small to do anything with		
   185 		return -1;
   186 		}
   187 	// If the state is KStateDefault (this is the first call) check for BOM markers
   188 	else if (aState==CCnvCharacterSetConverter::KStateDefault)
   189 		{
   190 		// is there a Little Endian BOM
   191 		if (aForeign[0]==0xff && aForeign[1]==0xfe )
   192 			{ 
   193 			byteOrderMark = CCnvCharacterSetConverter::ELittleEndian; 
   194 			}
   195 		else if (aForeign[0]==0xfe && aForeign[1]==0xff )
   196 			{
   197 			byteOrderMark = CCnvCharacterSetConverter::EBigEndian; 
   198 			}
   199 		// remember the detected state
   200 		aState = byteOrderMark + KStateOffset;
   201 		}
   202 
   203 	// start at begining of the output buffer provided
   204 	aUnicode.Zero();
   205 	
   206 	// while there is at least 2 bytes of data to convert and space in the output buffer
   207 	while ( (numberOfBytesConverted+1 < aForeign.Size()) && (numberOfUnicodeCharacters < aUnicode.MaxLength()) )
   208 		{
   209 		if (byteOrderMark == CCnvCharacterSetConverter::ELittleEndian )
   210 			{
   211 			// ELittleEndian 0x??00
   212 			nextChar = aForeign[numberOfBytesConverted] + ( aForeign[numberOfBytesConverted+1] << 8);
   213 			}
   214 		else
   215 			{
   216 			// EBigEndian 0x00??
   217 			nextChar = ( aForeign[numberOfBytesConverted] <<8 ) + aForeign[numberOfBytesConverted+1];
   218 			}
   219 			
   220 		// save the unicode character extracted	unless it's a BOM
   221 		if ( nextChar != KByteOrderMark )
   222 			{
   223 			aUnicode.Append( nextChar );
   224 			numberOfUnicodeCharacters++;	
   225 			}
   226 			
   227 		numberOfBytesConverted+=2;
   228 		}
   229 	
   230 	// there are no uncovertable characters with UCS2,
   231 	aNumberOfUnconvertibleCharacters = 0;
   232 	// a negative value indicates that all characters converted
   233 	aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1;
   234 			
   235 	// returns the number of unconverted bytes left at the end of the input descriptor 
   236 	// Note there could be 1 byte left over if an odd number of bytes provided for conversion
   237 	return aForeign.Size() - numberOfBytesConverted;
   238 	}
   239 
   240 
   241 /**
   242  * This converter does not support autodetect so always returns a confidence value of 0.
   243  * @internalTechnology 
   244  */
   245 TBool CUCS2ConverterImpl::IsInThisCharacterSetL(TBool& aSetToTrue, TInt& aConfidenceLevel, const TDesC8&)
   246 	{
   247 	aSetToTrue=ETrue;
   248 	aConfidenceLevel=0;
   249 	return EFalse;
   250 	}
   251 
   252 CUCS2ConverterImpl* CUCS2ConverterImpl::NewL()
   253 	{
   254 	CUCS2ConverterImpl* self = new(ELeave) CUCS2ConverterImpl();
   255 	return self;
   256 	}
   257 
   258 CUCS2ConverterImpl::~CUCS2ConverterImpl()
   259 	{
   260 	}
   261 
   262 CUCS2ConverterImpl::CUCS2ConverterImpl()
   263 	{
   264 	}
   265 
   266 const TImplementationProxy ImplementationTable[] = 
   267 	{
   268 		IMPLEMENTATION_PROXY_ENTRY(0x101FF492,	CUCS2ConverterImpl::NewL)
   269 	};
   270 
   271 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   272 	{
   273 	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   274 
   275 	return ImplementationTable;
   276 	}
   277 
   278