os/textandloc/charconvfw/charconvplugins/src/plugins/big5.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/big5.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,205 @@
     1.4 +/*
     1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <charconv.h>
    1.24 +#include "big5.h"
    1.25 +#include <ecom/implementationproxy.h>
    1.26 +#include <charactersetconverter.h>
    1.27 +
    1.28 +class CBIG5ConverterImpl : public CCharacterSetConverterPluginInterface
    1.29 +	{
    1.30 +
    1.31 +public:
    1.32 +	virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    1.33 +
    1.34 +	virtual TInt ConvertFromUnicode(
    1.35 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.36 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.37 +		TDes8& aForeign, 
    1.38 +		const TDesC16& aUnicode, 
    1.39 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
    1.40 +
    1.41 +	virtual TInt ConvertToUnicode(
    1.42 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.43 +		TDes16& aUnicode, 
    1.44 +		const TDesC8& aForeign, 
    1.45 +		TInt& aState, 
    1.46 +		TInt& aNumberOfUnconvertibleCharacters, 
    1.47 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
    1.48 +
    1.49 +	virtual TBool IsInThisCharacterSetL(
    1.50 +		TBool& aSetToTrue, 
    1.51 +		TInt& aConfidenceLevel, 
    1.52 +		const TDesC8& aSample);
    1.53 +
    1.54 +	static CBIG5ConverterImpl* NewL();
    1.55 +	virtual ~CBIG5ConverterImpl();
    1.56 +
    1.57 +private:
    1.58 +	CBIG5ConverterImpl();
    1.59 +
    1.60 +	};
    1.61 +
    1.62 +
    1.63 +const TDesC8& CBIG5ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
    1.64 +	{
    1.65 +	return CnvBig5::ReplacementForUnconvertibleUnicodeCharacters();
    1.66 +	}
    1.67 +
    1.68 +TInt CBIG5ConverterImpl::ConvertFromUnicode(
    1.69 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.70 +		const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.71 +		TDes8& aForeign, 
    1.72 +		const TDesC16& aUnicode, 
    1.73 +		CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
    1.74 +	{
    1.75 +	return CCnvCharacterSetConverter::DoConvertFromUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
    1.76 +	}
    1.77 +
    1.78 +TInt CBIG5ConverterImpl::ConvertToUnicode(
    1.79 +		CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.80 +		TDes16& aUnicode, 
    1.81 +		const TDesC8& aForeign, 
    1.82 +		TInt& /*aState*/, 
    1.83 +		TInt& aNumberOfUnconvertibleCharacters, 
    1.84 +		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
    1.85 +	{
    1.86 +	return CCnvCharacterSetConverter::DoConvertToUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
    1.87 +	}
    1.88 +
    1.89 +TBool CBIG5ConverterImpl::IsInThisCharacterSetL(
    1.90 +		TBool& aSetToTrue, 
    1.91 +		TInt& aConfidenceLevel, 
    1.92 +		const TDesC8& aSample)
    1.93 +	{
    1.94 +	aSetToTrue=ETrue;
    1.95 +	TInt sampleLength = aSample.Length();
    1.96 +	aConfidenceLevel = 0;
    1.97 +	//WBB the following is for distiguish between big5 and GBK
    1.98 +	TInt totalWeight=0;		//sum of the weights of 20 most frequent chars
    1.99 +	TInt sumOfGoodChar=0;		//the number of chars whose first byte and second are both in the range
   1.100 +	TInt sumOfWeight=0;		//sum of the weights of the chars which are included in the sample
   1.101 +	TInt sumOutChar=0;		//the number of chars which are not common
   1.102 +	TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second
   1.103 +	TInt sumOfBadSingleByte=0;	//the number of bad single byte, which is not in valid range
   1.104 +	struct referenceChar
   1.105 +		{
   1.106 +		TUint charBig5;
   1.107 +		TInt weight;
   1.108 +		};
   1.109 +
   1.110 +	referenceChar refBig5[20];
   1.111 +	static const TInt iniWeight[20]=
   1.112 +		{
   1.113 +		//occurence per 1000 chars
   1.114 +		30,20,20,10,10,10,10,10,5,5,
   1.115 +		5,5,5,5,5,5,5,5,5,5
   1.116 +		};
   1.117 +
   1.118 +	static const TUint iniChar[20]=
   1.119 +		{
   1.120 +		0xa141,0xaaba,0xa446,0xadd3,0xa4a3,0xa7e2,0xa440,0xac4f,0xad6e,0xa45d,
   1.121 +		0xa4d1,0xa457,0xa457,0xa94d,0xa4a4,0xa569,0xa662,0xa470,0xa448,0xa455
   1.122 +		};
   1.123 +
   1.124 +	for (TInt k=0; k<20; k++)
   1.125 +		{
   1.126 +		refBig5[k].charBig5=iniChar[k];
   1.127 +		refBig5[k].weight=iniWeight[k];
   1.128 +		totalWeight=totalWeight+iniWeight[k];
   1.129 +		}
   1.130 +	//WBB
   1.131 +	for (TInt i = 0; i < sampleLength; ++i)
   1.132 +		{
   1.133 +		// Big 5 encoding first byte range 0xA1-0xFE 
   1.134 +		//                second byte range 0x40-0x7E  0xA1-0xFE
   1.135 +		if((aSample[i] >= 0xa1) && (aSample[i] <= 0xfe))
   1.136 +			{
   1.137 +			TInt increment1 = i+1;
   1.138 +			if (increment1 >= sampleLength)
   1.139 +				break;
   1.140 +			if(((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
   1.141 +				((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)))
   1.142 +				{
   1.143 +				TUint charBig5=(aSample[i]<<8)|(aSample[increment1]);
   1.144 +				if (charBig5>=0xc6a1)//Kanas start and rare chars follow after 
   1.145 +					sumOutChar++;
   1.146 +				TInt j;
   1.147 +				for (j=0; j<20; j++)
   1.148 +					{
   1.149 +					if (charBig5==refBig5[j].charBig5)
   1.150 +						{
   1.151 +						sumOfWeight=sumOfWeight+refBig5[j].weight;
   1.152 +						break;
   1.153 +						}
   1.154 +					}
   1.155 +				sumOfGoodChar++;
   1.156 +				i++;
   1.157 +				}
   1.158 +			else
   1.159 +				{
   1.160 +				sumOfBadSecondByte++;
   1.161 +				}
   1.162 +			}
   1.163 +		// if seldom used characters
   1.164 +		else if (aSample[i] < 0x20 || aSample[i] > 0x7F ) 
   1.165 +			{
   1.166 +			if (aSample[i]!=0x09 && aSample[i]!=0x0A && aSample[i]!=0x0D)
   1.167 +				sumOfBadSingleByte++;
   1.168 +			}
   1.169 +		} // for 
   1.170 +
   1.171 +	if (sumOfGoodChar)
   1.172 +		{
   1.173 +		aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar+sumOfBadSingleByte);
   1.174 +		aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars 
   1.175 +		aConfidenceLevel=aConfidenceLevel-sumOutChar*100/sumOfGoodChar;//against gap
   1.176 +		aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
   1.177 +		}
   1.178 +	else
   1.179 +		aConfidenceLevel=0;
   1.180 +	return ETrue;
   1.181 +	}
   1.182 +
   1.183 +CBIG5ConverterImpl* CBIG5ConverterImpl::NewL()
   1.184 +	{
   1.185 +	CBIG5ConverterImpl* self = new(ELeave) CBIG5ConverterImpl();
   1.186 +	return self;
   1.187 +	}
   1.188 +
   1.189 +CBIG5ConverterImpl::~CBIG5ConverterImpl()
   1.190 +	{
   1.191 +	}
   1.192 +
   1.193 +CBIG5ConverterImpl::CBIG5ConverterImpl()
   1.194 +	{
   1.195 +	}
   1.196 +
   1.197 +const TImplementationProxy ImplementationTable[] = 
   1.198 +	{
   1.199 +		IMPLEMENTATION_PROXY_ENTRY(0x10000FBF,CBIG5ConverterImpl::NewL)
   1.200 +	};
   1.201 +
   1.202 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
   1.203 +	{
   1.204 +	aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
   1.205 +
   1.206 +	return ImplementationTable;
   1.207 +	}
   1.208 +