Update contrib.
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
21 #include "SHIFTJIS_2.H"
22 #include <ecom/implementationproxy.h>
23 #include "charactersetconverter.h"
24 #include "featmgr/featmgr.h"
27 Shift-JIS character converter wrapper
32 class CShiftJisConverterImpl : public CCharacterSetConverterPluginInterface
36 virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
38 virtual TInt ConvertFromUnicode(
39 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
40 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
42 const TDesC16& aUnicode,
43 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters);
45 virtual TInt ConvertToUnicode(
46 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
48 const TDesC8& aForeign,
50 TInt& aNumberOfUnconvertibleCharacters,
51 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter);
53 virtual TBool IsInThisCharacterSetL(
55 TInt& aConfidenceLevel,
56 const TDesC8& aSample);
58 static CShiftJisConverterImpl* NewL();
59 virtual ~CShiftJisConverterImpl();
62 CShiftJisConverterImpl();
68 Get the the Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
70 @return The Shift-JIS byte sequence which will replace any Unicode characters which can't be converted.
73 const TDesC8& CShiftJisConverterImpl::ReplacementForUnconvertibleUnicodeCharacters()
75 return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
78 TInt CShiftJisConverterImpl::ConvertFromUnicode(
79 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
80 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
82 const TDesC16& aUnicode,
83 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
85 return CnvShiftJis::ConvertFromUnicode(aDefaultEndiannessOfForeignCharacters, aReplacementForUnconvertibleUnicodeCharacters, aForeign, aUnicode, aIndicesOfUnconvertibleCharacters);
90 Converts Shift-JIS encoded input text to Unicode
92 NOTE: For debugging the selected character set is returned in the state.
95 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
96 in the foreign character set.
97 @param aUnicode On return, contains the text converted into Unicode.
98 @param aForeign The non-Unicode source text to be converted.
99 @param aState Used to save state information across multiple calls
100 to <code>ConvertToUnicode()</code>.
101 @param aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
103 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
104 input text that could not be converted. A negative
105 value indicates that all the characters were
107 @return The number of unconverted bytes left at the end of the input descriptor
108 (e.g. because the output descriptor is not long enough to hold all the text),
109 or one of the error values defined in TError.
112 TInt CShiftJisConverterImpl::ConvertToUnicode(
113 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
115 const TDesC8& aForeign,
117 TInt& aNumberOfUnconvertibleCharacters,
118 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
120 return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
125 This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL().
126 This method returns a value between 0 and 100, indicating how likely it
127 is that this is the correct converter, for the text supplied.
130 TBool CShiftJisConverterImpl::IsInThisCharacterSetL(
132 TInt& aConfidenceLevel,
133 const TDesC8& aSample)
136 TInt sampleLength = aSample.Length();
137 aConfidenceLevel = 0;
138 TInt numberOfShiftJis=0;
140 for (TInt i = 0; i < sampleLength; ++i)
142 // Check for JISX 0208:1997 Charset
143 // First Byte in range 0x81-0x9f, 0xe0-0xef
144 if (((aSample[i] >= 0x81) && (aSample[i] <= 0x9f)) ||
145 ((aSample[i] >= 0xe0) && (aSample[i] <= 0xef)))
147 // check that the second byte is in range as well
148 TInt increment1 = i+1;
149 if(increment1 >= sampleLength)
151 if (((aSample[increment1] >= 0x40) && (aSample[increment1] <= 0x7e)) ||
152 ((aSample[increment1] >= 0x80) && (aSample[increment1] <= 0xfc)))
154 // increase the confidence of this sample as ShiftJis
155 aConfidenceLevel=(aConfidenceLevel >0)?aConfidenceLevel+5:60;
157 TUint charShiftJis=(aSample[i]<<8)|(aSample[increment1]);
158 if ((charShiftJis>=0x829f)&&(charShiftJis<=0x82f1)||
159 (charShiftJis>=0x8340)&&(charShiftJis<=0x8396))//those are kanas range
165 // Check That no other Japanese escape sequence occur... if they do, cancel this and return 0
166 // eg EUC-JP's SS(Single shift) characters followed by the
169 TInt increment1 = i+1;
170 if(increment1 >= sampleLength)
172 if ((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xdf))
174 // This could be EUC-JP format..
181 TInt increment1 = i+1;
182 TInt increment2 = i+2;
183 if((increment1 >= sampleLength) || (increment2 >= sampleLength))
185 if (((aSample[increment1] >= 0xa1) && (aSample[increment1] <= 0xfe)) &&
186 ((aSample[increment2] >= 0xa1) && (aSample[increment2] <= 0xfe)))
188 // This is definitely EUC-JP format.
193 // Check the half width Katakana
194 if (aSample[i]>=0xa1 && aSample[i]<=0xdf)
196 // increase the confidence of this sample as ShiftJis
197 aConfidenceLevel=(aConfidenceLevel > 0) ? aConfidenceLevel+5 : 75;
201 else if (aSample[i]>=0xf0)
209 aConfidenceLevel=(aConfidenceLevel >100)?100:((aConfidenceLevel <0)?0:aConfidenceLevel);
210 aConfidenceLevel=aConfidenceLevel-Max(0,(30-occurrence*100/numberOfShiftJis));
212 aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
217 CShiftJisConverterImpl* CShiftJisConverterImpl::NewL()
219 CShiftJisConverterImpl* self = new(ELeave) CShiftJisConverterImpl();
220 CleanupStack::PushL(self);
222 CleanupStack::Pop(self);
227 CShiftJisConverterImpl::~CShiftJisConverterImpl()
229 FeatureManager::UnInitializeLib();
232 CShiftJisConverterImpl::CShiftJisConverterImpl()
237 void CShiftJisConverterImpl::ConstructL()
239 FeatureManager::InitializeLibL();
242 const TImplementationProxy ImplementationTable[] =
244 IMPLEMENTATION_PROXY_ENTRY(0x10000FBD, CShiftJisConverterImpl::NewL)
248 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
250 aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
252 return ImplementationTable;