os/textandloc/charconvfw/charconvplugins/src/plugins/ucs2.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/ucs2.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,278 @@
     1.4 +/*
     1.5 +* Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <charconv.h>
    1.24 +#include <convgeneratedcpp.h>
    1.25 +#include "ucs2.h"
    1.26 +#include <ecom/implementationproxy.h>
    1.27 +#include <charactersetconverter.h>
    1.28 +
    1.29 +class CUCS2ConverterImpl : public CCharacterSetConverterPluginInterface
    1.30 +	{
    1.31 +
    1.32 +public:
    1.33 +	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    1.34 +
    1.35 +	virtual TInt ConvertFromUnicode(
    1.36 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.37 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.38 +		TDes8& aForeign, 
    1.39 +		const TDesC16& aUnicode, 
    1.40 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    1.41 +
    1.42 +	virtual TInt ConvertToUnicode(
    1.43 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.44 +		TDes16& aUnicode, 
    1.45 +		const TDesC8& aForeign, 
    1.46 +		TInt& aState, 
    1.47 +		TInt& aNumberOfUnconvertibleCharacters, 
    1.48 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
    1.49 +
    1.50 +	virtual TBool IsInThisCharacterSetL(
    1.51 +		TBool& aSetToTrue, 
    1.52 +		TInt& aConfidenceLevel, 
    1.53 +		const TDesC8& aSample);
    1.54 +
    1.55 +	static CUCS2ConverterImpl* NewL();
    1.56 +	virtual ~CUCS2ConverterImpl();
    1.57 +
    1.58 +private:
    1.59 +	CUCS2ConverterImpl();
    1.60 +
    1.61 +	};
    1.62 +
    1.63 +
    1.64 +
    1.65 +// The following code has been copied and modified from the plugin computer generated code
    1.66 +// that is generated from the charconv/data/???.cpl && /???.txt files
    1.67 +// *** code begins ***
    1.68 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
    1.69 +
    1.70 +#pragma warning (disable: 4049) // compiler limit : terminating line number emission
    1.71 +
    1.72 +_LIT8(KLit8ReplacementForUnconvertibleUnicodeCharacters, "\xff\xfd");
    1.73 +
    1.74 +GLDEF_C const TDesC8& ReplacementForUnconvertibleUnicodeCharacters_internal()
    1.75 +	{
    1.76 +	return KLit8ReplacementForUnconvertibleUnicodeCharacters;
    1.77 +	}
    1.78 +
    1.79 +GLDEF_D const SCnvConversionData conversionData=
    1.80 +	{
    1.81 +	SCnvConversionData::EUnspecified,
    1.82 +	{NULL,NULL},
    1.83 +	{NULL,NULL},
    1.84 +	{NULL,NULL},
    1.85 +	NULL,
    1.86 +	NULL
    1.87 +	};
    1.88 +
    1.89 +
    1.90 +const TInt    KByteOrderMark = 0xfeff;
    1.91 +const TInt 	  KStateOffset = 0x1000;
    1.92 +
    1.93 +
    1.94 +const TDesC8& CUCS2ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
    1.95 +	{
    1.96 +	return ReplacementForUnconvertibleUnicodeCharacters_internal();
    1.97 +	}
    1.98 +
    1.99 +
   1.100 +/**
   1.101 + * Takes a 16 bit UCS2 descriptor with or without BOM and translates it to an
   1.102 + * eight bit descriptor in Big Endian format.
   1.103 + *
   1.104 + * Note aDefaultEndiannessOfForeignCharacters is not used by this converter
   1.105 + *
   1.106 + * @param CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters
   1.107 + * @param const TDesC8&
   1.108 + * @param TDes8& aForeign
   1.109 + * @param const TDesC16& aUnicode
   1.110 + * @param CCnvCharacterSetConverter::TArrayOfAscendingIndices&
   1.111 + *
   1.112 + * returns number of converted characters
   1.113 +*/
   1.114 +TInt CUCS2ConverterImpl::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /*aDefaultEndiannessOfForeignCharacters*/, 
   1.115 +								 const TDesC8& /*aReplacementForUnconvertibleUnicodeCharacters*/, 
   1.116 +								 TDes8& aForeign, 
   1.117 +								 const TDesC16& aUnicode, 
   1.118 +								 CCnvCharacterSetConverter::TArrayOfAscendingIndices& /*aIndicesOfUnconvertibleCharacters*/)
   1.119 +	{
   1.120 +	TInt numberOfUnicodeCharacters =0;
   1.121 +	TInt nextChar;
   1.122 +	
   1.123 +	// start at the begining of the output buffer
   1.124 +	aForeign.Zero();
   1.125 +	
   1.126 +	// while there is unicode data to convert and space in the output buffer
   1.127 +	while ( (aForeign.Length() + 1 < aForeign.MaxLength()) && (numberOfUnicodeCharacters < aUnicode.Length()) )
   1.128 +		{
   1.129 +		nextChar = aUnicode[numberOfUnicodeCharacters];
   1.130 +
   1.131 +		// Note - this always converts to EBigEndian 
   1.132 +		aForeign.Append((nextChar & 0xff00) >> 8);
   1.133 +		aForeign.Append(nextChar & 0xff );
   1.134 +			
   1.135 +		numberOfUnicodeCharacters++;
   1.136 +		}
   1.137 +		
   1.138 +	// returns the number of unconverted characters left at the end of the input descriptor  
   1.139 +	return aUnicode.Length() - numberOfUnicodeCharacters;
   1.140 +	}
   1.141 +
   1.142 +/**
   1.143 + * Takes an 8 bit descriptor with or without a BOM and translates it to unicode 
   1.144 + * Input endiness is determined by Byte Order Markers (BOM) in the source text.
   1.145 + * If no BOM is present aDefaultEndiannessOfForeignCharacters is used.
   1.146 + *
   1.147 + * When the data is too large to fit in the output buffer, the endiness is saved in the state
   1.148 + * variable between conversions
   1.149 + *
   1.150 + * @param aDefaultEndiannessOfForeignCharacters Default endiness if no BOMs present in the source
   1.151 + * @param aUnicode Contains the converted text in the Unicode character set
   1.152 + * @param aForeign The non-Unicode source text to be converted
   1.153 + * @param aState Not used by this converter
   1.154 + * @param aNumberOfUnconvertibleCharacters Contains the number of bytes which could not be converted to unicode
   1.155 + * @param aIndexOfFirstByteOfFirstUnconvertibleCharacter The index of the first unconvertable byte or -1 if all converted.
   1.156 + *
   1.157 + * @return aNumberOfUnconvertibleCharacters The number of unconverted bytes left at the end of the input 
   1.158 + * descriptor (e.g. because the output descriptor is not long enough to hold all the text), or one of the 
   1.159 + * error values defined in TError. 
   1.160 + * @internalTechnology 
   1.161 + */
   1.162 +TInt CUCS2ConverterImpl::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.163 +						   TDes16& aUnicode,	
   1.164 +						   const TDesC8& aForeign,
   1.165 +						   TInt& aState,
   1.166 +						   TInt& aNumberOfUnconvertibleCharacters, 
   1.167 +						   TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) 
   1.168 +	{
   1.169 +	TInt numberOfBytesConverted = 0;
   1.170 +	TInt numberOfUnicodeCharacters =0;
   1.171 +	TChar nextChar;
   1.172 +	
   1.173 +	// work out what byte order to use
   1.174 +	CCnvCharacterSetConverter::TEndianness byteOrderMark;
   1.175 +	if ( aState==CCnvCharacterSetConverter::KStateDefault )
   1.176 +		{
   1.177 +		// this is the first call so use the default or BOM for byte order
   1.178 +		byteOrderMark = aDefaultEndiannessOfForeignCharacters;
   1.179 +		}
   1.180 +	else
   1.181 +		{
   1.182 +		// this is not the first call so use the saved byte order
   1.183 +		byteOrderMark = STATIC_CAST( CCnvCharacterSetConverter::TEndianness, aState - KStateOffset );
   1.184 +		}
   1.185 +		
   1.186 +	if ( aForeign.Length() < 2)
   1.187 +		{ // too small to do anything with		
   1.188 +		return -1;
   1.189 +		}
   1.190 +	// If the state is KStateDefault (this is the first call) check for BOM markers
   1.191 +	else if (aState==CCnvCharacterSetConverter::KStateDefault)
   1.192 +		{
   1.193 +		// is there a Little Endian BOM
   1.194 +		if (aForeign[0]==0xff && aForeign[1]==0xfe )
   1.195 +			{ 
   1.196 +			byteOrderMark = CCnvCharacterSetConverter::ELittleEndian; 
   1.197 +			}
   1.198 +		else if (aForeign[0]==0xfe && aForeign[1]==0xff )
   1.199 +			{
   1.200 +			byteOrderMark = CCnvCharacterSetConverter::EBigEndian; 
   1.201 +			}
   1.202 +		// remember the detected state
   1.203 +		aState = byteOrderMark + KStateOffset;
   1.204 +		}
   1.205 +
   1.206 +	// start at begining of the output buffer provided
   1.207 +	aUnicode.Zero();
   1.208 +	
   1.209 +	// while there is at least 2 bytes of data to convert and space in the output buffer
   1.210 +	while ( (numberOfBytesConverted+1 < aForeign.Size()) && (numberOfUnicodeCharacters < aUnicode.MaxLength()) )
   1.211 +		{
   1.212 +		if (byteOrderMark == CCnvCharacterSetConverter::ELittleEndian )
   1.213 +			{
   1.214 +			// ELittleEndian 0x??00
   1.215 +			nextChar = aForeign[numberOfBytesConverted] + ( aForeign[numberOfBytesConverted+1] << 8);
   1.216 +			}
   1.217 +		else
   1.218 +			{
   1.219 +			// EBigEndian 0x00??
   1.220 +			nextChar = ( aForeign[numberOfBytesConverted] <<8 ) + aForeign[numberOfBytesConverted+1];
   1.221 +			}
   1.222 +			
   1.223 +		// save the unicode character extracted	unless it's a BOM
   1.224 +		if ( nextChar != KByteOrderMark )
   1.225 +			{
   1.226 +			aUnicode.Append( nextChar );
   1.227 +			numberOfUnicodeCharacters++;	
   1.228 +			}
   1.229 +			
   1.230 +		numberOfBytesConverted+=2;
   1.231 +		}
   1.232 +	
   1.233 +	// there are no uncovertable characters with UCS2,
   1.234 +	aNumberOfUnconvertibleCharacters = 0;
   1.235 +	// a negative value indicates that all characters converted
   1.236 +	aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1;
   1.237 +			
   1.238 +	// returns the number of unconverted bytes left at the end of the input descriptor 
   1.239 +	// Note there could be 1 byte left over if an odd number of bytes provided for conversion
   1.240 +	return aForeign.Size() - numberOfBytesConverted;
   1.241 +	}
   1.242 +
   1.243 +
   1.244 +/**
   1.245 + * This converter does not support autodetect so always returns a confidence value of 0.
   1.246 + * @internalTechnology 
   1.247 + */
   1.248 +TBool CUCS2ConverterImpl::IsInThisCharacterSetL(TBool& aSetToTrue, TInt& aConfidenceLevel, const TDesC8&)
   1.249 +	{
   1.250 +	aSetToTrue=ETrue;
   1.251 +	aConfidenceLevel=0;
   1.252 +	return EFalse;
   1.253 +	}
   1.254 +
   1.255 +CUCS2ConverterImpl* CUCS2ConverterImpl::NewL()
   1.256 +	{
   1.257 +	CUCS2ConverterImpl* self = new(ELeave) CUCS2ConverterImpl();
   1.258 +	return self;
   1.259 +	}
   1.260 +
   1.261 +CUCS2ConverterImpl::~CUCS2ConverterImpl()
   1.262 +	{
   1.263 +	}
   1.264 +
   1.265 +CUCS2ConverterImpl::CUCS2ConverterImpl()
   1.266 +	{
   1.267 +	}
   1.268 +
   1.269 +const TImplementationProxy ImplementationTable[] = 
   1.270 +	{
   1.271 +		IMPLEMENTATION_PROXY_ENTRY(0x101FF492,	CUCS2ConverterImpl::NewL)
   1.272 +	};
   1.273 +
   1.274 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   1.275 +	{
   1.276 +	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   1.277 +
   1.278 +	return ImplementationTable;
   1.279 +	}
   1.280 +
   1.281 +