sl@0
|
1 |
/*
|
sl@0
|
2 |
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
|
sl@0
|
3 |
* All rights reserved.
|
sl@0
|
4 |
* This component and the accompanying materials are made available
|
sl@0
|
5 |
* under the terms of "Eclipse Public License v1.0"
|
sl@0
|
6 |
* which accompanies this distribution, and is available
|
sl@0
|
7 |
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
sl@0
|
8 |
*
|
sl@0
|
9 |
* Initial Contributors:
|
sl@0
|
10 |
* Nokia Corporation - initial contribution.
|
sl@0
|
11 |
*
|
sl@0
|
12 |
* Contributors:
|
sl@0
|
13 |
*
|
sl@0
|
14 |
* Description:
|
sl@0
|
15 |
*
|
sl@0
|
16 |
*/
|
sl@0
|
17 |
|
sl@0
|
18 |
|
sl@0
|
19 |
#include <e32std.h>
|
sl@0
|
20 |
#include <charconv.h>
|
sl@0
|
21 |
#include "big5.h"
|
sl@0
|
22 |
#include <ecom/implementationproxy.h>
|
sl@0
|
23 |
#include <charactersetconverter.h>
|
sl@0
|
24 |
|
sl@0
|
25 |
class CBIG5ConverterImpl : public CCharacterSetConverterPluginInterface
|
sl@0
|
26 |
{
|
sl@0
|
27 |
|
sl@0
|
28 |
public:
|
sl@0
|
29 |
virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
|
sl@0
|
30 |
|
sl@0
|
31 |
virtual TInt ConvertFromUnicode(
|
sl@0
|
32 |
CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
|
sl@0
|
33 |
const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
|
sl@0
|
34 |
TDes8& aForeign,
|
sl@0
|
35 |
const TDesC16& aUnicode,
|
sl@0
|
36 |
CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
|
sl@0
|
37 |
|
sl@0
|
38 |
virtual TInt ConvertToUnicode(
|
sl@0
|
39 |
CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
|
sl@0
|
40 |
TDes16& aUnicode,
|
sl@0
|
41 |
const TDesC8& aForeign,
|
sl@0
|
42 |
TInt& aState,
|
sl@0
|
43 |
TInt& aNumberOfUnconvertibleCharacters,
|
sl@0
|
44 |
TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
|
sl@0
|
45 |
|
sl@0
|
46 |
virtual TBool IsInThisCharacterSetL(
|
sl@0
|
47 |
TBool& aSetToTrue,
|
sl@0
|
48 |
TInt& aConfidenceLevel,
|
sl@0
|
49 |
const TDesC8& aSample);
|
sl@0
|
50 |
|
sl@0
|
51 |
static CBIG5ConverterImpl* NewL();
|
sl@0
|
52 |
virtual ~CBIG5ConverterImpl();
|
sl@0
|
53 |
|
sl@0
|
54 |
private:
|
sl@0
|
55 |
CBIG5ConverterImpl();
|
sl@0
|
56 |
|
sl@0
|
57 |
};
|
sl@0
|
58 |
|
sl@0
|
59 |
|
sl@0
|
60 |
const TDesC8& CBIG5ConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
|
sl@0
|
61 |
{
|
sl@0
|
62 |
return CnvBig5::ReplacementForUnconvertibleUnicodeCharacters();
|
sl@0
|
63 |
}
|
sl@0
|
64 |
|
sl@0
|
65 |
TInt CBIG5ConverterImpl::ConvertFromUnicode(
|
sl@0
|
66 |
CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
|
sl@0
|
67 |
const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
|
sl@0
|
68 |
TDes8& aForeign,
|
sl@0
|
69 |
const TDesC16& aUnicode,
|
sl@0
|
70 |
CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
|
sl@0
|
71 |
{
|
sl@0
|
72 |
return CCnvCharacterSetConverter::DoConvertFromUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
|
sl@0
|
73 |
}
|
sl@0
|
74 |
|
sl@0
|
75 |
TInt CBIG5ConverterImpl::ConvertToUnicode(
|
sl@0
|
76 |
CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
|
sl@0
|
77 |
TDes16& aUnicode,
|
sl@0
|
78 |
const TDesC8& aForeign,
|
sl@0
|
79 |
TInt& /*aState*/,
|
sl@0
|
80 |
TInt& aNumberOfUnconvertibleCharacters,
|
sl@0
|
81 |
TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
|
sl@0
|
82 |
{
|
sl@0
|
83 |
return CCnvCharacterSetConverter::DoConvertToUnicode(CnvBig5::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
|
sl@0
|
84 |
}
|
sl@0
|
85 |
|
sl@0
|
86 |
TBool CBIG5ConverterImpl::IsInThisCharacterSetL(
|
sl@0
|
87 |
TBool& aSetToTrue,
|
sl@0
|
88 |
TInt& aConfidenceLevel,
|
sl@0
|
89 |
const TDesC8& aSample)
|
sl@0
|
90 |
{
|
sl@0
|
91 |
aSetToTrue=ETrue;
|
sl@0
|
92 |
TInt sampleLength = aSample.Length();
|
sl@0
|
93 |
aConfidenceLevel = 0;
|
sl@0
|
94 |
//WBB the following is for distiguish between big5 and GBK
|
sl@0
|
95 |
TInt totalWeight=0; //sum of the weights of 20 most frequent chars
|
sl@0
|
96 |
TInt sumOfGoodChar=0; //the number of chars whose first byte and second are both in the range
|
sl@0
|
97 |
TInt sumOfWeight=0; //sum of the weights of the chars which are included in the sample
|
sl@0
|
98 |
TInt sumOutChar=0; //the number of chars which are not common
|
sl@0
|
99 |
TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second
|
sl@0
|
100 |
TInt sumOfBadSingleByte=0; //the number of bad single byte, which is not in valid range
|
sl@0
|
101 |
struct referenceChar
|
sl@0
|
102 |
{
|
sl@0
|
103 |
TUint charBig5;
|
sl@0
|
104 |
TInt weight;
|
sl@0
|
105 |
};
|
sl@0
|
106 |
|
sl@0
|
107 |
referenceChar refBig5[20];
|
sl@0
|
108 |
static const TInt iniWeight[20]=
|
sl@0
|
109 |
{
|
sl@0
|
110 |
//occurence per 1000 chars
|
sl@0
|
111 |
30,20,20,10,10,10,10,10,5,5,
|
sl@0
|
112 |
5,5,5,5,5,5,5,5,5,5
|
sl@0
|
113 |
};
|
sl@0
|
114 |
|
sl@0
|
115 |
static const TUint iniChar[20]=
|
sl@0
|
116 |
{
|
sl@0
|
117 |
0xa141,0xaaba,0xa446,0xadd3,0xa4a3,0xa7e2,0xa440,0xac4f,0xad6e,0xa45d,
|
sl@0
|
118 |
0xa4d1,0xa457,0xa457,0xa94d,0xa4a4,0xa569,0xa662,0xa470,0xa448,0xa455
|
sl@0
|
119 |
};
|
sl@0
|
120 |
|
sl@0
|
121 |
for (TInt k=0; k<20; k++)
|
sl@0
|
122 |
{
|
sl@0
|
123 |
refBig5[k].charBig5=iniChar[k];
|
sl@0
|
124 |
refBig5[k].weight=iniWeight[k];
|
sl@0
|
125 |
totalWeight=totalWeight+iniWeight[k];
|
sl@0
|
126 |
}
|
sl@0
|
127 |
//WBB
|
sl@0
|
128 |
for (TInt i = 0; i < sampleLength; ++i)
|
sl@0
|
129 |
{
|
sl@0
|
130 |
// Big 5 encoding first byte range 0xA1-0xFE
|
sl@0
|
131 |
// second byte range 0x40-0x7E 0xA1-0xFE
|
sl@0
|
132 |
if((aSample[i] >= 0xa1) && (aSample[i] <= 0xfe))
|
sl@0
|
133 |
{
|
sl@0
|
134 |
TInt increment1 = i+1;
|
sl@0
|
135 |
if (increment1 >= sampleLength)
|
sl@0
|
136 |
break;
|
sl@0
|
137 |
if(((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
|
sl@0
|
138 |
((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)))
|
sl@0
|
139 |
{
|
sl@0
|
140 |
TUint charBig5=(aSample[i]<<8)|(aSample[increment1]);
|
sl@0
|
141 |
if (charBig5>=0xc6a1)//Kanas start and rare chars follow after
|
sl@0
|
142 |
sumOutChar++;
|
sl@0
|
143 |
TInt j;
|
sl@0
|
144 |
for (j=0; j<20; j++)
|
sl@0
|
145 |
{
|
sl@0
|
146 |
if (charBig5==refBig5[j].charBig5)
|
sl@0
|
147 |
{
|
sl@0
|
148 |
sumOfWeight=sumOfWeight+refBig5[j].weight;
|
sl@0
|
149 |
break;
|
sl@0
|
150 |
}
|
sl@0
|
151 |
}
|
sl@0
|
152 |
sumOfGoodChar++;
|
sl@0
|
153 |
i++;
|
sl@0
|
154 |
}
|
sl@0
|
155 |
else
|
sl@0
|
156 |
{
|
sl@0
|
157 |
sumOfBadSecondByte++;
|
sl@0
|
158 |
}
|
sl@0
|
159 |
}
|
sl@0
|
160 |
// if seldom used characters
|
sl@0
|
161 |
else if (aSample[i] < 0x20 || aSample[i] > 0x7F )
|
sl@0
|
162 |
{
|
sl@0
|
163 |
if (aSample[i]!=0x09 && aSample[i]!=0x0A && aSample[i]!=0x0D)
|
sl@0
|
164 |
sumOfBadSingleByte++;
|
sl@0
|
165 |
}
|
sl@0
|
166 |
} // for
|
sl@0
|
167 |
|
sl@0
|
168 |
if (sumOfGoodChar)
|
sl@0
|
169 |
{
|
sl@0
|
170 |
aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar+sumOfBadSingleByte);
|
sl@0
|
171 |
aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars
|
sl@0
|
172 |
aConfidenceLevel=aConfidenceLevel-sumOutChar*100/sumOfGoodChar;//against gap
|
sl@0
|
173 |
aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
|
sl@0
|
174 |
}
|
sl@0
|
175 |
else
|
sl@0
|
176 |
aConfidenceLevel=0;
|
sl@0
|
177 |
return ETrue;
|
sl@0
|
178 |
}
|
sl@0
|
179 |
|
sl@0
|
180 |
CBIG5ConverterImpl* CBIG5ConverterImpl::NewL()
|
sl@0
|
181 |
{
|
sl@0
|
182 |
CBIG5ConverterImpl* self = new(ELeave) CBIG5ConverterImpl();
|
sl@0
|
183 |
return self;
|
sl@0
|
184 |
}
|
sl@0
|
185 |
|
sl@0
|
186 |
CBIG5ConverterImpl::~CBIG5ConverterImpl()
|
sl@0
|
187 |
{
|
sl@0
|
188 |
}
|
sl@0
|
189 |
|
sl@0
|
190 |
CBIG5ConverterImpl::CBIG5ConverterImpl()
|
sl@0
|
191 |
{
|
sl@0
|
192 |
}
|
sl@0
|
193 |
|
sl@0
|
194 |
const TImplementationProxy ImplementationTable[] =
|
sl@0
|
195 |
{
|
sl@0
|
196 |
IMPLEMENTATION_PROXY_ENTRY(0x10000FBF,CBIG5ConverterImpl::NewL)
|
sl@0
|
197 |
};
|
sl@0
|
198 |
|
sl@0
|
199 |
EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
|
sl@0
|
200 |
{
|
sl@0
|
201 |
aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
|
sl@0
|
202 |
|
sl@0
|
203 |
return ImplementationTable;
|
sl@0
|
204 |
}
|
sl@0
|
205 |
|