Update contrib.
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
22 #include <ecom/implementationproxy.h>
23 #include <charactersetconverter.h>
25 class CBIG5ConverterImpl : public CCharacterSetConverterPluginInterface
29 virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
31 virtual TInt ConvertFromUnicode(
32 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
33 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
35 const TDesC16& aUnicode,
36 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
38 virtual TInt ConvertToUnicode(
39 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
41 const TDesC8& aForeign,
43 TInt& aNumberOfUnconvertibleCharacters,
44 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
46 virtual TBool IsInThisCharacterSetL(
48 TInt& aConfidenceLevel,
49 const TDesC8& aSample);
51 static CBIG5ConverterImpl* NewL();
52 virtual ~CBIG5ConverterImpl();
60 const TDesC8& CBIG5ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
62 return CnvBig5::ReplacementForUnconvertibleUnicodeCharacters();
65 TInt CBIG5ConverterImpl::ConvertFromUnicode(
66 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
67 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
69 const TDesC16& aUnicode,
70 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
72 return CCnvCharacterSetConverter::DoConvertFromUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
75 TInt CBIG5ConverterImpl::ConvertToUnicode(
76 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
78 const TDesC8& aForeign,
80 TInt& aNumberOfUnconvertibleCharacters,
81 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
83 return CCnvCharacterSetConverter::DoConvertToUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
86 TBool CBIG5ConverterImpl::IsInThisCharacterSetL(
88 TInt& aConfidenceLevel,
89 const TDesC8& aSample)
92 TInt sampleLength = aSample.Length();
94 //WBB the following is for distiguish between big5 and GBK
95 TInt totalWeight=0; //sum of the weights of 20 most frequent chars
96 TInt sumOfGoodChar=0; //the number of chars whose first byte and second are both in the range
97 TInt sumOfWeight=0; //sum of the weights of the chars which are included in the sample
98 TInt sumOutChar=0; //the number of chars which are not common
99 TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second
100 TInt sumOfBadSingleByte=0; //the number of bad single byte, which is not in valid range
107 referenceChar refBig5[20];
108 static const TInt iniWeight[20]=
110 //occurence per 1000 chars
111 30,20,20,10,10,10,10,10,5,5,
115 static const TUint iniChar[20]=
117 0xa141,0xaaba,0xa446,0xadd3,0xa4a3,0xa7e2,0xa440,0xac4f,0xad6e,0xa45d,
118 0xa4d1,0xa457,0xa457,0xa94d,0xa4a4,0xa569,0xa662,0xa470,0xa448,0xa455
121 for (TInt k=0; k<20; k++)
123 refBig5[k].charBig5=iniChar[k];
124 refBig5[k].weight=iniWeight[k];
125 totalWeight=totalWeight+iniWeight[k];
128 for (TInt i = 0; i < sampleLength; ++i)
130 // Big 5 encoding first byte range 0xA1-0xFE
131 // second byte range 0x40-0x7E 0xA1-0xFE
132 if((aSample[i] >= 0xa1) && (aSample[i] <= 0xfe))
134 TInt increment1 = i+1;
135 if (increment1 >= sampleLength)
137 if(((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
138 ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)))
140 TUint charBig5=(aSample[i]<<8)|(aSample[increment1]);
141 if (charBig5>=0xc6a1)//Kanas start and rare chars follow after
146 if (charBig5==refBig5[j].charBig5)
148 sumOfWeight=sumOfWeight+refBig5[j].weight;
157 sumOfBadSecondByte++;
160 // if seldom used characters
161 else if (aSample[i] < 0x20 || aSample[i] > 0x7F )
163 if (aSample[i]!=0x09 && aSample[i]!=0x0A && aSample[i]!=0x0D)
164 sumOfBadSingleByte++;
170 aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar+sumOfBadSingleByte);
171 aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars
172 aConfidenceLevel=aConfidenceLevel-sumOutChar*100/sumOfGoodChar;//against gap
173 aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
180 CBIG5ConverterImpl* CBIG5ConverterImpl::NewL()
182 CBIG5ConverterImpl* self = new(ELeave) CBIG5ConverterImpl();
186 CBIG5ConverterImpl::~CBIG5ConverterImpl()
190 CBIG5ConverterImpl::CBIG5ConverterImpl()
194 const TImplementationProxy ImplementationTable[] =
196 IMPLEMENTATION_PROXY_ENTRY(0x10000FBF,CBIG5ConverterImpl::NewL)
199 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
201 aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
203 return ImplementationTable;