os/textandloc/charconvfw/charconvplugins/src/plugins/ucs2.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#include <e32std.h>
sl@0
    20
#include <charconv.h>
sl@0
    21
#include <convgeneratedcpp.h>
sl@0
    22
#include "ucs2.h"
sl@0
    23
#include <ecom/implementationproxy.h>
sl@0
    24
#include <charactersetconverter.h>
sl@0
    25
sl@0
    26
class CUCS2ConverterImpl : public CCharacterSetConverterPluginInterface
sl@0
    27
	{
sl@0
    28
sl@0
    29
public:
sl@0
    30
	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
sl@0
    31
sl@0
    32
	virtual TInt ConvertFromUnicode(
sl@0
    33
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    34
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    35
		TDes8& aForeign, 
sl@0
    36
		const TDesC16& aUnicode, 
sl@0
    37
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
sl@0
    38
sl@0
    39
	virtual TInt ConvertToUnicode(
sl@0
    40
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    41
		TDes16& aUnicode, 
sl@0
    42
		const TDesC8& aForeign, 
sl@0
    43
		TInt& aState, 
sl@0
    44
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
    45
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
    46
sl@0
    47
	virtual TBool IsInThisCharacterSetL(
sl@0
    48
		TBool& aSetToTrue, 
sl@0
    49
		TInt& aConfidenceLevel, 
sl@0
    50
		const TDesC8& aSample);
sl@0
    51
sl@0
    52
	static CUCS2ConverterImpl* NewL();
sl@0
    53
	virtual ~CUCS2ConverterImpl();
sl@0
    54
sl@0
    55
private:
sl@0
    56
	CUCS2ConverterImpl();
sl@0
    57
sl@0
    58
	};
sl@0
    59
sl@0
    60
sl@0
    61
sl@0
    62
// The following code has been copied and modified from the plugin computer generated code
sl@0
    63
// that is generated from the charconv/data/???.cpl && /???.txt files
sl@0
    64
// *** code begins ***
sl@0
    65
#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)[0]))
sl@0
    66
sl@0
    67
#pragma warning (disable: 4049) // compiler limit : terminating line number emission
sl@0
    68
sl@0
    69
_LIT8(KLit8ReplacementForUnconvertibleUnicodeCharacters, "\xff\xfd");
sl@0
    70
sl@0
    71
GLDEF_C const TDesC8& ReplacementForUnconvertibleUnicodeCharacters_internal()
sl@0
    72
	{
sl@0
    73
	return KLit8ReplacementForUnconvertibleUnicodeCharacters;
sl@0
    74
	}
sl@0
    75
sl@0
    76
GLDEF_D const SCnvConversionData conversionData=
sl@0
    77
	{
sl@0
    78
	SCnvConversionData::EUnspecified,
sl@0
    79
	{NULL,NULL},
sl@0
    80
	{NULL,NULL},
sl@0
    81
	{NULL,NULL},
sl@0
    82
	NULL,
sl@0
    83
	NULL
sl@0
    84
	};
sl@0
    85
sl@0
    86
sl@0
    87
const TInt    KByteOrderMark = 0xfeff;
sl@0
    88
const TInt 	  KStateOffset = 0x1000;
sl@0
    89
sl@0
    90
sl@0
    91
const TDesC8& CUCS2ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
sl@0
    92
	{
sl@0
    93
	return ReplacementForUnconvertibleUnicodeCharacters_internal();
sl@0
    94
	}
sl@0
    95
sl@0
    96
sl@0
    97
/**
sl@0
    98
 * Takes a 16 bit UCS2 descriptor with or without BOM and translates it to an
sl@0
    99
 * eight bit descriptor in Big Endian format.
sl@0
   100
 *
sl@0
   101
 * Note aDefaultEndiannessOfForeignCharacters is not used by this converter
sl@0
   102
 *
sl@0
   103
 * @param CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters
sl@0
   104
 * @param const TDesC8&
sl@0
   105
 * @param TDes8& aForeign
sl@0
   106
 * @param const TDesC16& aUnicode
sl@0
   107
 * @param CCnvCharacterSetConverter::TArrayOfAscendingIndices&
sl@0
   108
 *
sl@0
   109
 * returns number of converted characters
sl@0
   110
*/
sl@0
   111
TInt CUCS2ConverterImpl::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /*aDefaultEndiannessOfForeignCharacters*/, 
sl@0
   112
								 const TDesC8& /*aReplacementForUnconvertibleUnicodeCharacters*/, 
sl@0
   113
								 TDes8& aForeign, 
sl@0
   114
								 const TDesC16& aUnicode, 
sl@0
   115
								 CCnvCharacterSetConverter::TArrayOfAscendingIndices& /*aIndicesOfUnconvertibleCharacters*/)
sl@0
   116
	{
sl@0
   117
	TInt numberOfUnicodeCharacters =0;
sl@0
   118
	TInt nextChar;
sl@0
   119
	
sl@0
   120
	// start at the begining of the output buffer
sl@0
   121
	aForeign.Zero();
sl@0
   122
	
sl@0
   123
	// while there is unicode data to convert and space in the output buffer
sl@0
   124
	while ( (aForeign.Length() + 1 < aForeign.MaxLength()) && (numberOfUnicodeCharacters < aUnicode.Length()) )
sl@0
   125
		{
sl@0
   126
		nextChar = aUnicode[numberOfUnicodeCharacters];
sl@0
   127
sl@0
   128
		// Note - this always converts to EBigEndian 
sl@0
   129
		aForeign.Append((nextChar & 0xff00) >> 8);
sl@0
   130
		aForeign.Append(nextChar & 0xff );
sl@0
   131
			
sl@0
   132
		numberOfUnicodeCharacters++;
sl@0
   133
		}
sl@0
   134
		
sl@0
   135
	// returns the number of unconverted characters left at the end of the input descriptor  
sl@0
   136
	return aUnicode.Length() - numberOfUnicodeCharacters;
sl@0
   137
	}
sl@0
   138
sl@0
   139
/**
sl@0
   140
 * Takes an 8 bit descriptor with or without a BOM and translates it to unicode 
sl@0
   141
 * Input endiness is determined by Byte Order Markers (BOM) in the source text.
sl@0
   142
 * If no BOM is present aDefaultEndiannessOfForeignCharacters is used.
sl@0
   143
 *
sl@0
   144
 * When the data is too large to fit in the output buffer, the endiness is saved in the state
sl@0
   145
 * variable between conversions
sl@0
   146
 *
sl@0
   147
 * @param aDefaultEndiannessOfForeignCharacters Default endiness if no BOMs present in the source
sl@0
   148
 * @param aUnicode Contains the converted text in the Unicode character set
sl@0
   149
 * @param aForeign The non-Unicode source text to be converted
sl@0
   150
 * @param aState Not used by this converter
sl@0
   151
 * @param aNumberOfUnconvertibleCharacters Contains the number of bytes which could not be converted to unicode
sl@0
   152
 * @param aIndexOfFirstByteOfFirstUnconvertibleCharacter The index of the first unconvertable byte or -1 if all converted.
sl@0
   153
 *
sl@0
   154
 * @return aNumberOfUnconvertibleCharacters The number of unconverted bytes left at the end of the input 
sl@0
   155
 * descriptor (e.g. because the output descriptor is not long enough to hold all the text), or one of the 
sl@0
   156
 * error values defined in TError. 
sl@0
   157
 * @internalTechnology 
sl@0
   158
 */
sl@0
   159
TInt CUCS2ConverterImpl::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
   160
						   TDes16& aUnicode,	
sl@0
   161
						   const TDesC8& aForeign,
sl@0
   162
						   TInt& aState,
sl@0
   163
						   TInt& aNumberOfUnconvertibleCharacters, 
sl@0
   164
						   TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) 
sl@0
   165
	{
sl@0
   166
	TInt numberOfBytesConverted = 0;
sl@0
   167
	TInt numberOfUnicodeCharacters =0;
sl@0
   168
	TChar nextChar;
sl@0
   169
	
sl@0
   170
	// work out what byte order to use
sl@0
   171
	CCnvCharacterSetConverter::TEndianness byteOrderMark;
sl@0
   172
	if ( aState==CCnvCharacterSetConverter::KStateDefault )
sl@0
   173
		{
sl@0
   174
		// this is the first call so use the default or BOM for byte order
sl@0
   175
		byteOrderMark = aDefaultEndiannessOfForeignCharacters;
sl@0
   176
		}
sl@0
   177
	else
sl@0
   178
		{
sl@0
   179
		// this is not the first call so use the saved byte order
sl@0
   180
		byteOrderMark = STATIC_CAST( CCnvCharacterSetConverter::TEndianness, aState - KStateOffset );
sl@0
   181
		}
sl@0
   182
		
sl@0
   183
	if ( aForeign.Length() < 2)
sl@0
   184
		{ // too small to do anything with		
sl@0
   185
		return -1;
sl@0
   186
		}
sl@0
   187
	// If the state is KStateDefault (this is the first call) check for BOM markers
sl@0
   188
	else if (aState==CCnvCharacterSetConverter::KStateDefault)
sl@0
   189
		{
sl@0
   190
		// is there a Little Endian BOM
sl@0
   191
		if (aForeign[0]==0xff && aForeign[1]==0xfe )
sl@0
   192
			{ 
sl@0
   193
			byteOrderMark = CCnvCharacterSetConverter::ELittleEndian; 
sl@0
   194
			}
sl@0
   195
		else if (aForeign[0]==0xfe && aForeign[1]==0xff )
sl@0
   196
			{
sl@0
   197
			byteOrderMark = CCnvCharacterSetConverter::EBigEndian; 
sl@0
   198
			}
sl@0
   199
		// remember the detected state
sl@0
   200
		aState = byteOrderMark + KStateOffset;
sl@0
   201
		}
sl@0
   202
sl@0
   203
	// start at begining of the output buffer provided
sl@0
   204
	aUnicode.Zero();
sl@0
   205
	
sl@0
   206
	// while there is at least 2 bytes of data to convert and space in the output buffer
sl@0
   207
	while ( (numberOfBytesConverted+1 < aForeign.Size()) && (numberOfUnicodeCharacters < aUnicode.MaxLength()) )
sl@0
   208
		{
sl@0
   209
		if (byteOrderMark == CCnvCharacterSetConverter::ELittleEndian )
sl@0
   210
			{
sl@0
   211
			// ELittleEndian 0x??00
sl@0
   212
			nextChar = aForeign[numberOfBytesConverted] + ( aForeign[numberOfBytesConverted+1] << 8);
sl@0
   213
			}
sl@0
   214
		else
sl@0
   215
			{
sl@0
   216
			// EBigEndian 0x00??
sl@0
   217
			nextChar = ( aForeign[numberOfBytesConverted] <<8 ) + aForeign[numberOfBytesConverted+1];
sl@0
   218
			}
sl@0
   219
			
sl@0
   220
		// save the unicode character extracted	unless it's a BOM
sl@0
   221
		if ( nextChar != KByteOrderMark )
sl@0
   222
			{
sl@0
   223
			aUnicode.Append( nextChar );
sl@0
   224
			numberOfUnicodeCharacters++;	
sl@0
   225
			}
sl@0
   226
			
sl@0
   227
		numberOfBytesConverted+=2;
sl@0
   228
		}
sl@0
   229
	
sl@0
   230
	// there are no uncovertable characters with UCS2,
sl@0
   231
	aNumberOfUnconvertibleCharacters = 0;
sl@0
   232
	// a negative value indicates that all characters converted
sl@0
   233
	aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1;
sl@0
   234
			
sl@0
   235
	// returns the number of unconverted bytes left at the end of the input descriptor 
sl@0
   236
	// Note there could be 1 byte left over if an odd number of bytes provided for conversion
sl@0
   237
	return aForeign.Size() - numberOfBytesConverted;
sl@0
   238
	}
sl@0
   239
sl@0
   240
sl@0
   241
/**
sl@0
   242
 * This converter does not support autodetect so always returns a confidence value of 0.
sl@0
   243
 * @internalTechnology 
sl@0
   244
 */
sl@0
   245
TBool CUCS2ConverterImpl::IsInThisCharacterSetL(TBool& aSetToTrue, TInt& aConfidenceLevel, const TDesC8&)
sl@0
   246
	{
sl@0
   247
	aSetToTrue=ETrue;
sl@0
   248
	aConfidenceLevel=0;
sl@0
   249
	return EFalse;
sl@0
   250
	}
sl@0
   251
sl@0
   252
CUCS2ConverterImpl* CUCS2ConverterImpl::NewL()
sl@0
   253
	{
sl@0
   254
	CUCS2ConverterImpl* self = new(ELeave) CUCS2ConverterImpl();
sl@0
   255
	return self;
sl@0
   256
	}
sl@0
   257
sl@0
   258
CUCS2ConverterImpl::~CUCS2ConverterImpl()
sl@0
   259
	{
sl@0
   260
	}
sl@0
   261
sl@0
   262
CUCS2ConverterImpl::CUCS2ConverterImpl()
sl@0
   263
	{
sl@0
   264
	}
sl@0
   265
sl@0
   266
const TImplementationProxy ImplementationTable[] = 
sl@0
   267
	{
sl@0
   268
		IMPLEMENTATION_PROXY_ENTRY(0x101FF492,	CUCS2ConverterImpl::NewL)
sl@0
   269
	};
sl@0
   270
sl@0
   271
EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
sl@0
   272
	{
sl@0
   273
	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
sl@0
   274
sl@0
   275
	return ImplementationTable;
sl@0
   276
	}
sl@0
   277
sl@0
   278