Update contrib.
2 * Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
20 #include <convgeneratedcpp.h>
23 struct SCnvConversionData;
25 EXPORT_C const TDesC8& CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters()
27 return ReplacementForUnconvertibleUnicodeCharacters_internal();
30 EXPORT_C const SCnvConversionData& CnvGb2312::ConversionData()
32 return conversionData;
35 EXPORT_C TBool CnvGb2312::IsCharGBBased(TInt& aConfidenceLevel, const TDesC8& aSample)
37 TInt sampleLength = aSample.Length();
39 //WBB the following is for distiguish between big5 and GBK
40 TInt totalWeight=0; //sum of the weights of 20 most frequent chars
41 TInt sumOfGoodChar=0; //the number of chars whose first byte and second are both in the range
42 TInt sumOfWeight=0; //sum of the weights of the chars which are included in the sample
43 TInt sumOutChar=0; //the number of chars which are not common
44 TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second
45 TInt sumOfBadSingleByte=0; //the number of bad single byte, which is not in valid range
52 referenceChar refGbk[20];
53 static const TInt iniWeight[20]=
55 //occurence per 1000 chars
56 30,20,20,10,10,10,10,10,5,5,
60 static const TUint iniChar[20]=
62 0xa3ac,0xb5c4,0xc1cb,0xb8f6,0xb2bb,0xb0d1,0xd2bb,0xcac7,0xd2aa,0xbecd,
63 0xd2b2,0xccec,0xc9cf,0xbacd,0xd6d0,0xd4da,0xd0a1,0xc8cb,0xcfc2,0xd6d0,
66 for (TInt k=0; k<20; k++)
68 refGbk[k].charGBK=iniChar[k];
69 refGbk[k].weight=iniWeight[k];
70 totalWeight=totalWeight+iniWeight[k];
75 for (TInt i = 0; i < sampleLength; ++i)
77 //GBK encoding first byte range 0x81-0xfe
78 // second byte range 0x40-0x7e, 0x80-0xfe
79 if((aSample[i] >= 0x81) && (aSample[i] <= 0xfe))
81 TInt increment1 = i+1;
82 if (increment1 >= sampleLength)
84 if (((aSample[increment1] >=0x40) && (aSample[increment1] <= 0x7e)) ||
85 ((aSample[increment1] >=0x80) && (aSample[increment1] <= 0xfe)))
88 TUint charGbk=(aSample[i]<<8)|(aSample[increment1]);
92 if (charGbk==refGbk[j].charGBK)
94 sumOfWeight=sumOfWeight+refGbk[j].weight;
98 if ((aSample[i]>=0xa4)&&(aSample[i]<=0xaf))
106 sumOfBadSecondByte++;
109 // if seldom used characters
110 else if (aSample[i] < 0x20 || aSample[i] > 0x7F )
112 if (aSample[i]!=0x09 && aSample[i]!=0x0A && aSample[i]!=0x0D)
113 sumOfBadSingleByte++;
118 limit = (10*sampleLength)/100;
119 if (sumOfGoodChar > limit)
121 aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar+sumOfBadSingleByte);
122 aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars
123 aConfidenceLevel=aConfidenceLevel-(sumOutChar*100/sumOfGoodChar);//against gap
124 aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;