sl@0: /* sl@0: * Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include sl@0: #include sl@0: #include sl@0: #include "ucs2.h" sl@0: #include sl@0: #include sl@0: sl@0: class CUCS2ConverterImpl : public CCharacterSetConverterPluginInterface sl@0: { sl@0: sl@0: public: sl@0: virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); sl@0: sl@0: virtual TInt ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters); sl@0: sl@0: virtual TInt ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: sl@0: virtual TBool IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& aSample); sl@0: sl@0: static CUCS2ConverterImpl* NewL(); sl@0: virtual ~CUCS2ConverterImpl(); sl@0: sl@0: private: sl@0: CUCS2ConverterImpl(); sl@0: sl@0: }; sl@0: sl@0: sl@0: sl@0: // The following code has been copied and modified from the plugin computer generated code sl@0: // that is generated from the charconv/data/???.cpl && /???.txt files sl@0: // *** code begins *** sl@0: #define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0])) sl@0: sl@0: #pragma warning (disable: 4049) // compiler limit : terminating line number emission sl@0: sl@0: _LIT8(KLit8ReplacementForUnconvertibleUnicodeCharacters, "\xff\xfd"); sl@0: sl@0: GLDEF_C const TDesC8& ReplacementForUnconvertibleUnicodeCharacters_internal() sl@0: { sl@0: return KLit8ReplacementForUnconvertibleUnicodeCharacters; sl@0: } sl@0: sl@0: GLDEF_D const SCnvConversionData conversionData= sl@0: { sl@0: SCnvConversionData::EUnspecified, sl@0: {NULL,NULL}, sl@0: {NULL,NULL}, sl@0: {NULL,NULL}, sl@0: NULL, sl@0: NULL sl@0: }; sl@0: sl@0: sl@0: const TInt KByteOrderMark = 0xfeff; sl@0: const TInt KStateOffset = 0x1000; sl@0: sl@0: sl@0: const TDesC8& CUCS2ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters() sl@0: { sl@0: return ReplacementForUnconvertibleUnicodeCharacters_internal(); sl@0: } sl@0: sl@0: sl@0: /** sl@0: * Takes a 16 bit UCS2 descriptor with or without BOM and translates it to an sl@0: * eight bit descriptor in Big Endian format. sl@0: * sl@0: * Note aDefaultEndiannessOfForeignCharacters is not used by this converter sl@0: * sl@0: * @param CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters sl@0: * @param const TDesC8& sl@0: * @param TDes8& aForeign sl@0: * @param const TDesC16& aUnicode sl@0: * @param CCnvCharacterSetConverter::TArrayOfAscendingIndices& sl@0: * sl@0: * returns number of converted characters sl@0: */ sl@0: TInt CUCS2ConverterImpl::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /*aDefaultEndiannessOfForeignCharacters*/, sl@0: const TDesC8& /*aReplacementForUnconvertibleUnicodeCharacters*/, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& /*aIndicesOfUnconvertibleCharacters*/) sl@0: { sl@0: TInt numberOfUnicodeCharacters =0; sl@0: TInt nextChar; sl@0: sl@0: // start at the begining of the output buffer sl@0: aForeign.Zero(); sl@0: sl@0: // while there is unicode data to convert and space in the output buffer sl@0: while ( (aForeign.Length() + 1 < aForeign.MaxLength()) && (numberOfUnicodeCharacters < aUnicode.Length()) ) sl@0: { sl@0: nextChar = aUnicode[numberOfUnicodeCharacters]; sl@0: sl@0: // Note - this always converts to EBigEndian sl@0: aForeign.Append((nextChar & 0xff00) >> 8); sl@0: aForeign.Append(nextChar & 0xff ); sl@0: sl@0: numberOfUnicodeCharacters++; sl@0: } sl@0: sl@0: // returns the number of unconverted characters left at the end of the input descriptor sl@0: return aUnicode.Length() - numberOfUnicodeCharacters; sl@0: } sl@0: sl@0: /** sl@0: * Takes an 8 bit descriptor with or without a BOM and translates it to unicode sl@0: * Input endiness is determined by Byte Order Markers (BOM) in the source text. sl@0: * If no BOM is present aDefaultEndiannessOfForeignCharacters is used. sl@0: * sl@0: * When the data is too large to fit in the output buffer, the endiness is saved in the state sl@0: * variable between conversions sl@0: * sl@0: * @param aDefaultEndiannessOfForeignCharacters Default endiness if no BOMs present in the source sl@0: * @param aUnicode Contains the converted text in the Unicode character set sl@0: * @param aForeign The non-Unicode source text to be converted sl@0: * @param aState Not used by this converter sl@0: * @param aNumberOfUnconvertibleCharacters Contains the number of bytes which could not be converted to unicode sl@0: * @param aIndexOfFirstByteOfFirstUnconvertibleCharacter The index of the first unconvertable byte or -1 if all converted. sl@0: * sl@0: * @return aNumberOfUnconvertibleCharacters The number of unconverted bytes left at the end of the input sl@0: * descriptor (e.g. because the output descriptor is not long enough to hold all the text), or one of the sl@0: * error values defined in TError. sl@0: * @internalTechnology sl@0: */ sl@0: TInt CUCS2ConverterImpl::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) sl@0: { sl@0: TInt numberOfBytesConverted = 0; sl@0: TInt numberOfUnicodeCharacters =0; sl@0: TChar nextChar; sl@0: sl@0: // work out what byte order to use sl@0: CCnvCharacterSetConverter::TEndianness byteOrderMark; sl@0: if ( aState==CCnvCharacterSetConverter::KStateDefault ) sl@0: { sl@0: // this is the first call so use the default or BOM for byte order sl@0: byteOrderMark = aDefaultEndiannessOfForeignCharacters; sl@0: } sl@0: else sl@0: { sl@0: // this is not the first call so use the saved byte order sl@0: byteOrderMark = STATIC_CAST( CCnvCharacterSetConverter::TEndianness, aState - KStateOffset ); sl@0: } sl@0: sl@0: if ( aForeign.Length() < 2) sl@0: { // too small to do anything with sl@0: return -1; sl@0: } sl@0: // If the state is KStateDefault (this is the first call) check for BOM markers sl@0: else if (aState==CCnvCharacterSetConverter::KStateDefault) sl@0: { sl@0: // is there a Little Endian BOM sl@0: if (aForeign[0]==0xff && aForeign[1]==0xfe ) sl@0: { sl@0: byteOrderMark = CCnvCharacterSetConverter::ELittleEndian; sl@0: } sl@0: else if (aForeign[0]==0xfe && aForeign[1]==0xff ) sl@0: { sl@0: byteOrderMark = CCnvCharacterSetConverter::EBigEndian; sl@0: } sl@0: // remember the detected state sl@0: aState = byteOrderMark + KStateOffset; sl@0: } sl@0: sl@0: // start at begining of the output buffer provided sl@0: aUnicode.Zero(); sl@0: sl@0: // while there is at least 2 bytes of data to convert and space in the output buffer sl@0: while ( (numberOfBytesConverted+1 < aForeign.Size()) && (numberOfUnicodeCharacters < aUnicode.MaxLength()) ) sl@0: { sl@0: if (byteOrderMark == CCnvCharacterSetConverter::ELittleEndian ) sl@0: { sl@0: // ELittleEndian 0x??00 sl@0: nextChar = aForeign[numberOfBytesConverted] + ( aForeign[numberOfBytesConverted+1] << 8); sl@0: } sl@0: else sl@0: { sl@0: // EBigEndian 0x00?? sl@0: nextChar = ( aForeign[numberOfBytesConverted] <<8 ) + aForeign[numberOfBytesConverted+1]; sl@0: } sl@0: sl@0: // save the unicode character extracted unless it's a BOM sl@0: if ( nextChar != KByteOrderMark ) sl@0: { sl@0: aUnicode.Append( nextChar ); sl@0: numberOfUnicodeCharacters++; sl@0: } sl@0: sl@0: numberOfBytesConverted+=2; sl@0: } sl@0: sl@0: // there are no uncovertable characters with UCS2, sl@0: aNumberOfUnconvertibleCharacters = 0; sl@0: // a negative value indicates that all characters converted sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1; sl@0: sl@0: // returns the number of unconverted bytes left at the end of the input descriptor sl@0: // Note there could be 1 byte left over if an odd number of bytes provided for conversion sl@0: return aForeign.Size() - numberOfBytesConverted; sl@0: } sl@0: sl@0: sl@0: /** sl@0: * This converter does not support autodetect so always returns a confidence value of 0. sl@0: * @internalTechnology sl@0: */ sl@0: TBool CUCS2ConverterImpl::IsInThisCharacterSetL(TBool& aSetToTrue, TInt& aConfidenceLevel, const TDesC8&) sl@0: { sl@0: aSetToTrue=ETrue; sl@0: aConfidenceLevel=0; sl@0: return EFalse; sl@0: } sl@0: sl@0: CUCS2ConverterImpl* CUCS2ConverterImpl::NewL() sl@0: { sl@0: CUCS2ConverterImpl* self = new(ELeave) CUCS2ConverterImpl(); sl@0: return self; sl@0: } sl@0: sl@0: CUCS2ConverterImpl::~CUCS2ConverterImpl() sl@0: { sl@0: } sl@0: sl@0: CUCS2ConverterImpl::CUCS2ConverterImpl() sl@0: { sl@0: } sl@0: sl@0: const TImplementationProxy ImplementationTable[] = sl@0: { sl@0: IMPLEMENTATION_PROXY_ENTRY(0x101FF492, CUCS2ConverterImpl::NewL) sl@0: }; sl@0: sl@0: EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount) sl@0: { sl@0: aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy); sl@0: sl@0: return ImplementationTable; sl@0: } sl@0: sl@0: