os/textandloc/charconvfw/charconvplugins/src/plugins/big5.cpp
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
* All rights reserved.
sl@0
     4
* This component and the accompanying materials are made available
sl@0
     5
* under the terms of "Eclipse Public License v1.0"
sl@0
     6
* which accompanies this distribution, and is available
sl@0
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
*
sl@0
     9
* Initial Contributors:
sl@0
    10
* Nokia Corporation - initial contribution.
sl@0
    11
*
sl@0
    12
* Contributors:
sl@0
    13
*
sl@0
    14
* Description: 
sl@0
    15
*
sl@0
    16
*/
sl@0
    17
sl@0
    18
sl@0
    19
#include <e32std.h>
sl@0
    20
#include <charconv.h>
sl@0
    21
#include "big5.h"
sl@0
    22
#include <ecom/implementationproxy.h>
sl@0
    23
#include <charactersetconverter.h>
sl@0
    24
sl@0
    25
class CBIG5ConverterImpl : public CCharacterSetConverterPluginInterface
sl@0
    26
	{
sl@0
    27
sl@0
    28
public:
sl@0
    29
	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
sl@0
    30
sl@0
    31
	virtual TInt ConvertFromUnicode(
sl@0
    32
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    33
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    34
		TDes8& aForeign, 
sl@0
    35
		const TDesC16& aUnicode, 
sl@0
    36
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
sl@0
    37
sl@0
    38
	virtual TInt ConvertToUnicode(
sl@0
    39
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    40
		TDes16& aUnicode, 
sl@0
    41
		const TDesC8& aForeign, 
sl@0
    42
		TInt& aState, 
sl@0
    43
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
    44
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
    45
sl@0
    46
	virtual TBool IsInThisCharacterSetL(
sl@0
    47
		TBool& aSetToTrue, 
sl@0
    48
		TInt& aConfidenceLevel, 
sl@0
    49
		const TDesC8& aSample);
sl@0
    50
sl@0
    51
	static CBIG5ConverterImpl* NewL();
sl@0
    52
	virtual ~CBIG5ConverterImpl();
sl@0
    53
sl@0
    54
private:
sl@0
    55
	CBIG5ConverterImpl();
sl@0
    56
sl@0
    57
	};
sl@0
    58
sl@0
    59
sl@0
    60
const TDesC8& CBIG5ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
sl@0
    61
	{
sl@0
    62
	return CnvBig5::ReplacementForUnconvertibleUnicodeCharacters();
sl@0
    63
	}
sl@0
    64
sl@0
    65
TInt CBIG5ConverterImpl::ConvertFromUnicode(
sl@0
    66
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    67
		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0
    68
		TDes8& aForeign, 
sl@0
    69
		const TDesC16& aUnicode, 
sl@0
    70
		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
sl@0
    71
	{
sl@0
    72
	return CCnvCharacterSetConverter::DoConvertFromUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
sl@0
    73
	}
sl@0
    74
sl@0
    75
TInt CBIG5ConverterImpl::ConvertToUnicode(
sl@0
    76
		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
sl@0
    77
		TDes16& aUnicode, 
sl@0
    78
		const TDesC8& aForeign, 
sl@0
    79
		TInt& /*aState*/, 
sl@0
    80
		TInt& aNumberOfUnconvertibleCharacters, 
sl@0
    81
		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
sl@0
    82
	{
sl@0
    83
	return CCnvCharacterSetConverter::DoConvertToUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
sl@0
    84
	}
sl@0
    85
sl@0
    86
TBool CBIG5ConverterImpl::IsInThisCharacterSetL(
sl@0
    87
		TBool& aSetToTrue, 
sl@0
    88
		TInt& aConfidenceLevel, 
sl@0
    89
		const TDesC8& aSample)
sl@0
    90
	{
sl@0
    91
	aSetToTrue=ETrue;
sl@0
    92
	TInt sampleLength = aSample.Length();
sl@0
    93
	aConfidenceLevel = 0;
sl@0
    94
	//WBB the following is for distiguish between big5 and GBK
sl@0
    95
	TInt totalWeight=0;		//sum of the weights of 20 most frequent chars
sl@0
    96
	TInt sumOfGoodChar=0;		//the number of chars whose first byte and second are both in the range
sl@0
    97
	TInt sumOfWeight=0;		//sum of the weights of the chars which are included in the sample
sl@0
    98
	TInt sumOutChar=0;		//the number of chars which are not common
sl@0
    99
	TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second
sl@0
   100
	TInt sumOfBadSingleByte=0;	//the number of bad single byte, which is not in valid range
sl@0
   101
	struct referenceChar
sl@0
   102
		{
sl@0
   103
		TUint charBig5;
sl@0
   104
		TInt weight;
sl@0
   105
		};
sl@0
   106
sl@0
   107
	referenceChar refBig5[20];
sl@0
   108
	static const TInt iniWeight[20]=
sl@0
   109
		{
sl@0
   110
		//occurence per 1000 chars
sl@0
   111
		30,20,20,10,10,10,10,10,5,5,
sl@0
   112
		5,5,5,5,5,5,5,5,5,5
sl@0
   113
		};
sl@0
   114
sl@0
   115
	static const TUint iniChar[20]=
sl@0
   116
		{
sl@0
   117
		0xa141,0xaaba,0xa446,0xadd3,0xa4a3,0xa7e2,0xa440,0xac4f,0xad6e,0xa45d,
sl@0
   118
		0xa4d1,0xa457,0xa457,0xa94d,0xa4a4,0xa569,0xa662,0xa470,0xa448,0xa455
sl@0
   119
		};
sl@0
   120
sl@0
   121
	for (TInt k=0; k<20; k++)
sl@0
   122
		{
sl@0
   123
		refBig5[k].charBig5=iniChar[k];
sl@0
   124
		refBig5[k].weight=iniWeight[k];
sl@0
   125
		totalWeight=totalWeight+iniWeight[k];
sl@0
   126
		}
sl@0
   127
	//WBB
sl@0
   128
	for (TInt i = 0; i < sampleLength; ++i)
sl@0
   129
		{
sl@0
   130
		// Big 5 encoding first byte range 0xA1-0xFE 
sl@0
   131
		//                second byte range 0x40-0x7E  0xA1-0xFE
sl@0
   132
		if((aSample[i] >= 0xa1) && (aSample[i] <= 0xfe))
sl@0
   133
			{
sl@0
   134
			TInt increment1 = i+1;
sl@0
   135
			if (increment1 >= sampleLength)
sl@0
   136
				break;
sl@0
   137
			if(((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
sl@0
   138
				((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)))
sl@0
   139
				{
sl@0
   140
				TUint charBig5=(aSample[i]<<8)|(aSample[increment1]);
sl@0
   141
				if (charBig5>=0xc6a1)//Kanas start and rare chars follow after 
sl@0
   142
					sumOutChar++;
sl@0
   143
				TInt j;
sl@0
   144
				for (j=0; j<20; j++)
sl@0
   145
					{
sl@0
   146
					if (charBig5==refBig5[j].charBig5)
sl@0
   147
						{
sl@0
   148
						sumOfWeight=sumOfWeight+refBig5[j].weight;
sl@0
   149
						break;
sl@0
   150
						}
sl@0
   151
					}
sl@0
   152
				sumOfGoodChar++;
sl@0
   153
				i++;
sl@0
   154
				}
sl@0
   155
			else
sl@0
   156
				{
sl@0
   157
				sumOfBadSecondByte++;
sl@0
   158
				}
sl@0
   159
			}
sl@0
   160
		// if seldom used characters
sl@0
   161
		else if (aSample[i] < 0x20 || aSample[i] > 0x7F ) 
sl@0
   162
			{
sl@0
   163
			if (aSample[i]!=0x09 && aSample[i]!=0x0A && aSample[i]!=0x0D)
sl@0
   164
				sumOfBadSingleByte++;
sl@0
   165
			}
sl@0
   166
		} // for 
sl@0
   167
sl@0
   168
	if (sumOfGoodChar)
sl@0
   169
		{
sl@0
   170
		aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar+sumOfBadSingleByte);
sl@0
   171
		aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars 
sl@0
   172
		aConfidenceLevel=aConfidenceLevel-sumOutChar*100/sumOfGoodChar;//against gap
sl@0
   173
		aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
sl@0
   174
		}
sl@0
   175
	else
sl@0
   176
		aConfidenceLevel=0;
sl@0
   177
	return ETrue;
sl@0
   178
	}
sl@0
   179
sl@0
   180
CBIG5ConverterImpl* CBIG5ConverterImpl::NewL()
sl@0
   181
	{
sl@0
   182
	CBIG5ConverterImpl* self = new(ELeave) CBIG5ConverterImpl();
sl@0
   183
	return self;
sl@0
   184
	}
sl@0
   185
sl@0
   186
CBIG5ConverterImpl::~CBIG5ConverterImpl()
sl@0
   187
	{
sl@0
   188
	}
sl@0
   189
sl@0
   190
CBIG5ConverterImpl::CBIG5ConverterImpl()
sl@0
   191
	{
sl@0
   192
	}
sl@0
   193
sl@0
   194
const TImplementationProxy ImplementationTable[] = 
sl@0
   195
	{
sl@0
   196
		IMPLEMENTATION_PROXY_ENTRY(0x10000FBF,CBIG5ConverterImpl::NewL)
sl@0
   197
	};
sl@0
   198
sl@0
   199
EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
sl@0
   200
	{
sl@0
   201
	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
sl@0
   202
sl@0
   203
	return ImplementationTable;
sl@0
   204
	}
sl@0
   205