1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/ucs2.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,278 @@
1.4 +/*
1.5 +* Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include <e32std.h>
1.23 +#include <charconv.h>
1.24 +#include <convgeneratedcpp.h>
1.25 +#include "ucs2.h"
1.26 +#include <ecom/implementationproxy.h>
1.27 +#include <charactersetconverter.h>
1.28 +
1.29 +class CUCS2ConverterImpl : public CCharacterSetConverterPluginInterface
1.30 + {
1.31 +
1.32 +public:
1.33 + virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
1.34 +
1.35 + virtual TInt ConvertFromUnicode(
1.36 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.37 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.38 + TDes8& aForeign,
1.39 + const TDesC16& aUnicode,
1.40 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
1.41 +
1.42 + virtual TInt ConvertToUnicode(
1.43 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.44 + TDes16& aUnicode,
1.45 + const TDesC8& aForeign,
1.46 + TInt& aState,
1.47 + TInt& aNumberOfUnconvertibleCharacters,
1.48 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
1.49 +
1.50 + virtual TBool IsInThisCharacterSetL(
1.51 + TBool& aSetToTrue,
1.52 + TInt& aConfidenceLevel,
1.53 + const TDesC8& aSample);
1.54 +
1.55 + static CUCS2ConverterImpl* NewL();
1.56 + virtual ~CUCS2ConverterImpl();
1.57 +
1.58 +private:
1.59 + CUCS2ConverterImpl();
1.60 +
1.61 + };
1.62 +
1.63 +
1.64 +
1.65 +// The following code has been copied and modified from the plugin computer generated code
1.66 +// that is generated from the charconv/data/???.cpl && /???.txt files
1.67 +// *** code begins ***
1.68 +#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
1.69 +
1.70 +#pragma warning (disable: 4049) // compiler limit : terminating line number emission
1.71 +
1.72 +_LIT8(KLit8ReplacementForUnconvertibleUnicodeCharacters, "\xff\xfd");
1.73 +
1.74 +GLDEF_C const TDesC8& ReplacementForUnconvertibleUnicodeCharacters_internal()
1.75 + {
1.76 + return KLit8ReplacementForUnconvertibleUnicodeCharacters;
1.77 + }
1.78 +
1.79 +GLDEF_D const SCnvConversionData conversionData=
1.80 + {
1.81 + SCnvConversionData::EUnspecified,
1.82 + {NULL,NULL},
1.83 + {NULL,NULL},
1.84 + {NULL,NULL},
1.85 + NULL,
1.86 + NULL
1.87 + };
1.88 +
1.89 +
1.90 +const TInt KByteOrderMark = 0xfeff;
1.91 +const TInt KStateOffset = 0x1000;
1.92 +
1.93 +
1.94 +const TDesC8& CUCS2ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
1.95 + {
1.96 + return ReplacementForUnconvertibleUnicodeCharacters_internal();
1.97 + }
1.98 +
1.99 +
1.100 +/**
1.101 + * Takes a 16 bit UCS2 descriptor with or without BOM and translates it to an
1.102 + * eight bit descriptor in Big Endian format.
1.103 + *
1.104 + * Note aDefaultEndiannessOfForeignCharacters is not used by this converter
1.105 + *
1.106 + * @param CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters
1.107 + * @param const TDesC8&
1.108 + * @param TDes8& aForeign
1.109 + * @param const TDesC16& aUnicode
1.110 + * @param CCnvCharacterSetConverter::TArrayOfAscendingIndices&
1.111 + *
1.112 + * returns number of converted characters
1.113 +*/
1.114 +TInt CUCS2ConverterImpl::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /*aDefaultEndiannessOfForeignCharacters*/,
1.115 + const TDesC8& /*aReplacementForUnconvertibleUnicodeCharacters*/,
1.116 + TDes8& aForeign,
1.117 + const TDesC16& aUnicode,
1.118 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& /*aIndicesOfUnconvertibleCharacters*/)
1.119 + {
1.120 + TInt numberOfUnicodeCharacters =0;
1.121 + TInt nextChar;
1.122 +
1.123 + // start at the begining of the output buffer
1.124 + aForeign.Zero();
1.125 +
1.126 + // while there is unicode data to convert and space in the output buffer
1.127 + while ( (aForeign.Length() + 1 < aForeign.MaxLength()) && (numberOfUnicodeCharacters < aUnicode.Length()) )
1.128 + {
1.129 + nextChar = aUnicode[numberOfUnicodeCharacters];
1.130 +
1.131 + // Note - this always converts to EBigEndian
1.132 + aForeign.Append((nextChar & 0xff00) >> 8);
1.133 + aForeign.Append(nextChar & 0xff );
1.134 +
1.135 + numberOfUnicodeCharacters++;
1.136 + }
1.137 +
1.138 + // returns the number of unconverted characters left at the end of the input descriptor
1.139 + return aUnicode.Length() - numberOfUnicodeCharacters;
1.140 + }
1.141 +
1.142 +/**
1.143 + * Takes an 8 bit descriptor with or without a BOM and translates it to unicode
1.144 + * Input endiness is determined by Byte Order Markers (BOM) in the source text.
1.145 + * If no BOM is present aDefaultEndiannessOfForeignCharacters is used.
1.146 + *
1.147 + * When the data is too large to fit in the output buffer, the endiness is saved in the state
1.148 + * variable between conversions
1.149 + *
1.150 + * @param aDefaultEndiannessOfForeignCharacters Default endiness if no BOMs present in the source
1.151 + * @param aUnicode Contains the converted text in the Unicode character set
1.152 + * @param aForeign The non-Unicode source text to be converted
1.153 + * @param aState Not used by this converter
1.154 + * @param aNumberOfUnconvertibleCharacters Contains the number of bytes which could not be converted to unicode
1.155 + * @param aIndexOfFirstByteOfFirstUnconvertibleCharacter The index of the first unconvertable byte or -1 if all converted.
1.156 + *
1.157 + * @return aNumberOfUnconvertibleCharacters The number of unconverted bytes left at the end of the input
1.158 + * descriptor (e.g. because the output descriptor is not long enough to hold all the text), or one of the
1.159 + * error values defined in TError.
1.160 + * @internalTechnology
1.161 + */
1.162 +TInt CUCS2ConverterImpl::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.163 + TDes16& aUnicode,
1.164 + const TDesC8& aForeign,
1.165 + TInt& aState,
1.166 + TInt& aNumberOfUnconvertibleCharacters,
1.167 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
1.168 + {
1.169 + TInt numberOfBytesConverted = 0;
1.170 + TInt numberOfUnicodeCharacters =0;
1.171 + TChar nextChar;
1.172 +
1.173 + // work out what byte order to use
1.174 + CCnvCharacterSetConverter::TEndianness byteOrderMark;
1.175 + if ( aState==CCnvCharacterSetConverter::KStateDefault )
1.176 + {
1.177 + // this is the first call so use the default or BOM for byte order
1.178 + byteOrderMark = aDefaultEndiannessOfForeignCharacters;
1.179 + }
1.180 + else
1.181 + {
1.182 + // this is not the first call so use the saved byte order
1.183 + byteOrderMark = STATIC_CAST( CCnvCharacterSetConverter::TEndianness, aState - KStateOffset );
1.184 + }
1.185 +
1.186 + if ( aForeign.Length() < 2)
1.187 + { // too small to do anything with
1.188 + return -1;
1.189 + }
1.190 + // If the state is KStateDefault (this is the first call) check for BOM markers
1.191 + else if (aState==CCnvCharacterSetConverter::KStateDefault)
1.192 + {
1.193 + // is there a Little Endian BOM
1.194 + if (aForeign[0]==0xff && aForeign[1]==0xfe )
1.195 + {
1.196 + byteOrderMark = CCnvCharacterSetConverter::ELittleEndian;
1.197 + }
1.198 + else if (aForeign[0]==0xfe && aForeign[1]==0xff )
1.199 + {
1.200 + byteOrderMark = CCnvCharacterSetConverter::EBigEndian;
1.201 + }
1.202 + // remember the detected state
1.203 + aState = byteOrderMark + KStateOffset;
1.204 + }
1.205 +
1.206 + // start at begining of the output buffer provided
1.207 + aUnicode.Zero();
1.208 +
1.209 + // while there is at least 2 bytes of data to convert and space in the output buffer
1.210 + while ( (numberOfBytesConverted+1 < aForeign.Size()) && (numberOfUnicodeCharacters < aUnicode.MaxLength()) )
1.211 + {
1.212 + if (byteOrderMark == CCnvCharacterSetConverter::ELittleEndian )
1.213 + {
1.214 + // ELittleEndian 0x??00
1.215 + nextChar = aForeign[numberOfBytesConverted] + ( aForeign[numberOfBytesConverted+1] << 8);
1.216 + }
1.217 + else
1.218 + {
1.219 + // EBigEndian 0x00??
1.220 + nextChar = ( aForeign[numberOfBytesConverted] <<8 ) + aForeign[numberOfBytesConverted+1];
1.221 + }
1.222 +
1.223 + // save the unicode character extracted unless it's a BOM
1.224 + if ( nextChar != KByteOrderMark )
1.225 + {
1.226 + aUnicode.Append( nextChar );
1.227 + numberOfUnicodeCharacters++;
1.228 + }
1.229 +
1.230 + numberOfBytesConverted+=2;
1.231 + }
1.232 +
1.233 + // there are no uncovertable characters with UCS2,
1.234 + aNumberOfUnconvertibleCharacters = 0;
1.235 + // a negative value indicates that all characters converted
1.236 + aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1;
1.237 +
1.238 + // returns the number of unconverted bytes left at the end of the input descriptor
1.239 + // Note there could be 1 byte left over if an odd number of bytes provided for conversion
1.240 + return aForeign.Size() - numberOfBytesConverted;
1.241 + }
1.242 +
1.243 +
1.244 +/**
1.245 + * This converter does not support autodetect so always returns a confidence value of 0.
1.246 + * @internalTechnology
1.247 + */
1.248 +TBool CUCS2ConverterImpl::IsInThisCharacterSetL(TBool& aSetToTrue, TInt& aConfidenceLevel, const TDesC8&)
1.249 + {
1.250 + aSetToTrue=ETrue;
1.251 + aConfidenceLevel=0;
1.252 + return EFalse;
1.253 + }
1.254 +
1.255 +CUCS2ConverterImpl* CUCS2ConverterImpl::NewL()
1.256 + {
1.257 + CUCS2ConverterImpl* self = new(ELeave) CUCS2ConverterImpl();
1.258 + return self;
1.259 + }
1.260 +
1.261 +CUCS2ConverterImpl::~CUCS2ConverterImpl()
1.262 + {
1.263 + }
1.264 +
1.265 +CUCS2ConverterImpl::CUCS2ConverterImpl()
1.266 + {
1.267 + }
1.268 +
1.269 +const TImplementationProxy ImplementationTable[] =
1.270 + {
1.271 + IMPLEMENTATION_PROXY_ENTRY(0x101FF492, CUCS2ConverterImpl::NewL)
1.272 + };
1.273 +
1.274 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
1.275 + {
1.276 + aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
1.277 +
1.278 + return ImplementationTable;
1.279 + }
1.280 +
1.281 +