Update contrib.
2 * Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
19 #include "PictographObserver.h"
23 #include <convutils.h>
28 #include "featmgr/featmgr.h"
30 const TUint KControlCharacterEscape=0x1b;
31 const TUint KControlCharacterShiftOut=0x0e;
32 const TUint KControlCharacterShiftIn=0x0f;
33 const TUint KBitsForNonStandardStates=0x03;
35 _LIT8(KLit8EscapeSequenceForJisRoman, "\x1b\x28\x4a");
36 _LIT8(KLit8EscapeSequenceForJisRomanIncorrect, "\x1b\x28\x48");
37 _LIT8(KLit8EscapeSequenceForAscii, "\x1b\x28\x42");
38 _LIT8(KLit8EscapeSequenceForHalfWidthKatakana, "\x1b\x28\x49");
39 _LIT8(KLit8EscapeSequenceForJisC6226_1978, "\x1b\x24\x40");
40 _LIT8(KLit8EscapeSequenceForJisX0208_1983, "\x1b\x24\x42");
41 _LIT8(KLit8EscapeSequenceForJisX0208_199x, "\x1b\x26\x40\x1b\x24\x42");
42 _LIT8(KLit8EscapeSequenceForJisX0212_1990, "\x1b\x24\x28\x44");
44 typedef TInt (*FChangeState)(TInt aState);
45 typedef TInt (*FAppendConvertToUnicode)(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<CnvUtilities::SState>& aArrayOfStates, TUint& aOutputConversionFlags, TUint aInputConversionFlags);
47 enum TNonStandardState // each of these values must fit into KBitsForNonStandardStates and each must also be non-zero
49 ENonStandardStateJis7=1,
54 LOCAL_D const SCnvConversionData::SVariableByteData::SRange halfWidthKatakana7VariableByteDataRange=
62 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange halfWidthKatakana7ToUnicodeDataRange=
66 SCnvConversionData::SOneDirectionData::SRange::EOffset,
70 STATIC_CAST(TUint, 65344),
75 LOCAL_D const SCnvConversionData::SOneDirectionData::SRange unicodeToHalfWidthKatakana7DataRange=
79 SCnvConversionData::SOneDirectionData::SRange::EOffset,
83 STATIC_CAST(TUint, -65344),
88 LOCAL_D const SCnvConversionData halfWidthKatakana7ConversionData=
90 SCnvConversionData::EUnspecified,
93 &halfWidthKatakana7VariableByteDataRange
97 &halfWidthKatakana7ToUnicodeDataRange
101 &unicodeToHalfWidthKatakana7DataRange
107 _LIT(KLitPanicText, "JISBASE_SHARED");
111 EPanicNotAppending1=1,
114 EPanicBadNonStandardState,
119 EPanicBadFunctionPointer
122 LOCAL_C void Panic(TPanic aPanic)
124 User::Panic(KLitPanicText, aPanic);
129 TInt CnvJisBase::ChangeToNonStandardStateJis7(TInt aState)
131 return (aState&~KBitsForNonStandardStates)|ENonStandardStateJis7;
134 TInt CnvJisBase::ChangeToNonStandardStateJis8(TInt aState)
136 return (aState&~KBitsForNonStandardStates)|ENonStandardStateJis8;
139 TInt CnvJisBase::ChangeToStandardState(TInt)
141 return CCnvCharacterSetConverter::KStateDefault; // I actually thought that the correct behaviour for this would be to return "aState&~KBitsForNonStandardStates", but I asked Ken Lunde about it in an email and he said that after a run of JIS7 or JIS8, the bytes should always be interpreted as JIS-Roman
144 TInt CnvJisBase::AppendConvertToUnicodeFromModalForeign(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aModalForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<CnvUtilities::SState>& aArrayOfStates, TUint& aOutputConversionFlags, TUint aInputConversionFlags)
146 __ASSERT_DEBUG(aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend, Panic(EPanicNotAppending1));
147 return CnvUtilities::ConvertToUnicodeFromModalForeign(aDefaultEndiannessOfForeignCharacters, aUnicode, aModalForeign, aState, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, aArrayOfStates, aOutputConversionFlags, aInputConversionFlags);
150 TInt CnvJisBase::AppendConvertToUnicodeFromJis7(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aJis7, TInt&, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<CnvUtilities::SState>&, TUint& aOutputConversionFlags, TUint aInputConversionFlags)
152 __ASSERT_DEBUG(aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend, Panic(EPanicNotAppending2));
153 return CCnvCharacterSetConverter::DoConvertToUnicode(halfWidthKatakana7ConversionData, aDefaultEndiannessOfForeignCharacters, aUnicode, aJis7, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, aInputConversionFlags);
156 TInt CnvJisBase::AppendConvertToUnicodeFromJis8(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aJis8, TInt&, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, const TArray<CnvUtilities::SState>&, TUint& aOutputConversionFlags, TUint aInputConversionFlags)
158 __ASSERT_DEBUG(aInputConversionFlags&CCnvCharacterSetConverter::EInputConversionFlagAppend, Panic(EPanicNotAppending3));
159 return CCnvCharacterSetConverter::DoConvertToUnicode(CnvHalfWidthKatakana8::ConversionData(), aDefaultEndiannessOfForeignCharacters, aUnicode, aJis8, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, aOutputConversionFlags, aInputConversionFlags);
162 EXPORT_C TInt CnvJisBase::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, TDes16& aUnicode, const TDesC8& aForeign, TInt& aState, TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
165 TBool pictographsSupported = FeatureManager::FeatureSupported(KFeatureIdJapanesePicto);
166 RArray<CnvUtilities::SState> states;
167 if ( pictographsSupported )
170 CnvUtilities::SState state;
171 state.iEscapeSequence=&KLit8EscapeSequenceForJisRoman; // Jis-Roman is the default state, so it must come first in the array
172 state.iConversionData=&CnvJisRoman::ConversionData();
173 ret |= states.Append(state);
174 state.iEscapeSequence=&KLit8EscapeSequenceForJisRomanIncorrect;
175 state.iConversionData=&CnvJisRoman::ConversionData();
176 ret |= states.Append(state);
177 state.iEscapeSequence=&KLit8EscapeSequenceForAscii;
178 state.iConversionData=&CCnvCharacterSetConverter::AsciiConversionData();
179 ret |= states.Append(state);
181 SetStatesForPictograph(states);
183 state.iEscapeSequence=&KLit8EscapeSequenceForHalfWidthKatakana;
184 state.iConversionData=&halfWidthKatakana7ConversionData;
185 ret |= states.Append(state);
186 state.iEscapeSequence=&KLit8EscapeSequenceForJisC6226_1978;
187 state.iConversionData=&CnvJisX0208::ConversionData();
188 ret |= states.Append(state);
189 state.iEscapeSequence=&KLit8EscapeSequenceForJisX0208_1983;
190 state.iConversionData=&CnvJisX0208::ConversionData();
191 ret |= states.Append(state);
192 state.iEscapeSequence=&KLit8EscapeSequenceForJisX0208_199x;
193 state.iConversionData=&CnvJisX0208::ConversionData();
194 ret |= states.Append(state);
195 state.iEscapeSequence=&KLit8EscapeSequenceForJisX0212_1990;
196 state.iConversionData=&CnvJisX0212::ConversionData();
197 ret |= states.Append(state);
201 CnvUtilities::SState state;
202 state.iEscapeSequence=&KLit8EscapeSequenceForJisRoman; // Jis-Roman is the default state, so it must come first in the array
203 state.iConversionData=&CnvJisRoman::ConversionData();
204 ret |= states.Append(state);
205 state.iEscapeSequence=&KLit8EscapeSequenceForJisRomanIncorrect;
206 state.iConversionData=&CnvJisRoman::ConversionData();
207 ret |= states.Append(state);
208 state.iEscapeSequence=&KLit8EscapeSequenceForAscii;
209 state.iConversionData=&CCnvCharacterSetConverter::AsciiConversionData();
210 ret |= states.Append(state);
211 state.iEscapeSequence=&KLit8EscapeSequenceForHalfWidthKatakana;
212 state.iConversionData=&halfWidthKatakana7ConversionData;
213 ret |= states.Append(state);
214 state.iEscapeSequence=&KLit8EscapeSequenceForJisC6226_1978;
215 state.iConversionData=&CnvJisX0208::ConversionData();
216 ret |= states.Append(state);
217 state.iEscapeSequence=&KLit8EscapeSequenceForJisX0208_1983;
218 state.iConversionData=&CnvJisX0208::ConversionData();
219 ret |= states.Append(state);
220 state.iEscapeSequence=&KLit8EscapeSequenceForJisX0208_199x;
221 state.iConversionData=&CnvJisX0208::ConversionData();
222 ret |= states.Append(state);
223 state.iEscapeSequence=&KLit8EscapeSequenceForJisX0212_1990;
224 state.iConversionData=&CnvJisX0212::ConversionData();
225 ret |= states.Append(state);
227 __ASSERT_DEBUG(!ret, User::Panic(_L("RArray append failure"), ret));
229 const TArray<CnvUtilities::SState> arrayOfStates(states.Array());
230 aUnicode.SetLength(0);
231 const TUint8* const pointerToFirstByte=aForeign.Ptr();
232 const TUint8* pointerToCurrentByte=pointerToFirstByte;
233 const TUint8* pointerToStartOfNextRunToConvert=pointerToFirstByte;
234 const TUint8* const pointerToLastByte=pointerToFirstByte+(aForeign.Length()-1);
235 TUint outputConversionFlags=0;
236 TUint inputConversionFlags=CCnvCharacterSetConverter::EInputConversionFlagAppend;
239 FChangeState changeState=NULL;
240 FAppendConvertToUnicode appendConvertToUnicode=NULL;
241 TBool skipThisByte=EFalse;
242 const TUint currentByte=*pointerToCurrentByte;
243 switch (aState&KBitsForNonStandardStates)
246 if (currentByte==KControlCharacterShiftOut)
248 changeState=ChangeToNonStandardStateJis7;
251 else if (pictographsSupported && (currentByte==KControlCharacterShiftIn))
253 changeState=ChangeToStandardState;
256 else if (currentByte&0x80)
258 changeState=ChangeToNonStandardStateJis8;
260 appendConvertToUnicode=AppendConvertToUnicodeFromModalForeign;
262 case ENonStandardStateJis7:
263 if (currentByte==KControlCharacterEscape)
265 changeState=ChangeToStandardState; // it doesn't matter what function changeState is set to (as its return value won't actually be used), as long as changeState!=NULL so that the test below (after the end of this switch) passes
267 else if (currentByte==KControlCharacterShiftIn)
269 changeState=ChangeToStandardState;
272 else if (currentByte&0x80)
274 changeState=ChangeToNonStandardStateJis8;
276 appendConvertToUnicode=AppendConvertToUnicodeFromJis7;
278 case ENonStandardStateJis8:
279 if (currentByte==KControlCharacterEscape)
281 changeState=ChangeToStandardState; // it doesn't matter what function changeState is set to (as its return value won't actually be used), as long as changeState!=NULL so that the test below (after the end of this switch) passes
283 else if (currentByte==KControlCharacterShiftOut)
285 changeState=ChangeToNonStandardStateJis7;
288 else if ((currentByte&0x80)==0)
290 changeState=ChangeToStandardState;
292 appendConvertToUnicode=AppendConvertToUnicodeFromJis8;
296 Panic(EPanicBadNonStandardState);
300 __ASSERT_DEBUG(pointerToCurrentByte<=pointerToLastByte, Panic(EPanicBadPointers1));
301 if ((pointerToCurrentByte>=pointerToLastByte) || (changeState!=NULL))
303 TBool lastIteration=EFalse;
304 __ASSERT_DEBUG(pointerToCurrentByte>=pointerToStartOfNextRunToConvert, Panic(EPanicBadPointers2));
305 if (changeState==NULL)
307 ++pointerToCurrentByte; // this may make pointerToCurrentByte greater than pointerToLastByte
310 if (pointerToCurrentByte>pointerToStartOfNextRunToConvert)
312 TPtrC8 runToConvert(pointerToStartOfNextRunToConvert, pointerToCurrentByte-pointerToStartOfNextRunToConvert);
313 TInt numberOfUnconvertibleCharacters;
314 TInt indexOfFirstByteOfFirstUnconvertibleCharacter;
315 __ASSERT_DEBUG(appendConvertToUnicode!=NULL, Panic(EPanicBadFunctionPointer));
316 const TInt returnValue=(*appendConvertToUnicode)(aDefaultEndiannessOfForeignCharacters, aUnicode, runToConvert, aState, numberOfUnconvertibleCharacters, indexOfFirstByteOfFirstUnconvertibleCharacter, arrayOfStates, outputConversionFlags, inputConversionFlags);
320 return returnValue; // this is an error-code
322 if (numberOfUnconvertibleCharacters>0)
324 if (aNumberOfUnconvertibleCharacters==0)
326 aIndexOfFirstByteOfFirstUnconvertibleCharacter=(pointerToStartOfNextRunToConvert-pointerToFirstByte)+indexOfFirstByteOfFirstUnconvertibleCharacter;
328 aNumberOfUnconvertibleCharacters+=numberOfUnconvertibleCharacters;
332 pointerToCurrentByte-=returnValue; // pointerToStartOfNextRunToConvert (which also needs adjusting in the same way) gets set below
337 __ASSERT_DEBUG(pointerToCurrentByte>=pointerToFirstByte, Panic(EPanicBadPointers3));
338 if (pointerToCurrentByte>pointerToFirstByte)
340 inputConversionFlags|=CCnvCharacterSetConverter::EInputConversionFlagAllowTruncatedInputNotEvenPartlyConsumable;
343 if (changeState!=NULL)
345 aState=(*changeState)(aState);
349 if (pointerToCurrentByte==pointerToLastByte) // pointerToCurrentByte may already be greater than pointerToLastByte, in which case lastIteration will already be ETrue
353 ++pointerToCurrentByte;
355 pointerToStartOfNextRunToConvert=pointerToCurrentByte;
356 if (lastIteration) // check this first as pointerToCurrentByte may be greater than pointerToLastByte (but it will only be if lastIteration is EFalse)
360 __ASSERT_DEBUG(pointerToCurrentByte<=pointerToLastByte, Panic(EPanicBadPointers4));
361 if (pointerToCurrentByte>=pointerToLastByte)
366 ++pointerToCurrentByte;
370 // no checking with outputConversionFlags need to be done here
371 return pointerToLastByte-(pointerToCurrentByte-1);
374 EXPORT_C const SCnvConversionData& CnvJisBase::HalfWidthKatakana7ConversionData()
376 return halfWidthKatakana7ConversionData;
379 EXPORT_C void CnvJisBase::IsCharacterJISBased(TInt& aConfidenceLevel, const TDesC8& aSample)
381 // JIS is modal... so start off with a confidence of 0 and to begin with look
382 // for JIS escape sequences....Escape sequences defined above in the KLITs
383 // For each escape sequence, increase the confidenceLevel .....
384 aConfidenceLevel = 55;
385 TInt jisRomanResult = 0;
386 TInt asciiResult = 0;
387 TInt jisX0208Result = 0;
388 TInt jisC6226Result = 0;
389 TInt jixX0212Result = 0;
390 TInt hwKanaResult = 0;
392 TInt EscSequences = 0;
394 TInt sampleLength = aSample.Length();
395 for (TInt i = 0; i < sampleLength; ++i)
398 // JIS is 7 bit encoding
399 if((aSample[i]&0x80)!=0x00)
404 // JIS supports the following character sets
405 if (i > jisC6226Result)
407 jisC6226Result=(aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisC6226_1978);
408 if (jisC6226Result!=KErrNotFound)
412 if (i > jisRomanResult)
414 jisRomanResult=(aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisRoman);
415 if (jisRomanResult!=KErrNotFound)
421 asciiResult=(aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForAscii);
422 if (asciiResult!=KErrNotFound)
426 if (i > jisX0208Result)
428 jisX0208Result=(aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisX0208_1983);
429 if (jisX0208Result!=KErrNotFound)
433 if (i > jixX0212Result)
435 jixX0212Result=(aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForJisX0212_1990);
436 if (jixX0212Result!=KErrNotFound)
440 if (i > hwKanaResult)
442 hwKanaResult=(aSample.Right(sampleLength-i)).Find(KLit8EscapeSequenceForHalfWidthKatakana);
443 if (hwKanaResult!=KErrNotFound)
448 aConfidenceLevel = 0 < sampleLength?
449 aConfidenceLevel + ((EscSequences*100)/sampleLength) : 90;
450 aConfidenceLevel=(aConfidenceLevel >100)?100:aConfidenceLevel;