Update contrib.
2 * Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
15 * J5 charconv character converter
22 #include <ecom/implementationproxy.h>
24 #include <charactersetconverter.h>
25 #include <convutils.h>
34 #include "featmgr/featmgr.h"
37 J5 will use up to KMaxSizeAutoDetectSample to try to deterine the format of data.
39 const TInt KMaxSizeAutoDetectSample = 1000;
41 const TUint8 KEscape = 0x1b;
42 const TInt KByteOrderMark = 0xfeff;
44 const TDesC8& CJ5Converter::ReplacementForUnconvertibleUnicodeCharacters()
46 return CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters();
50 This API should not be used as it is ambiguous as to what encoding is required.
51 The user should instead call the specific plug-in for the appropriate conversion.
52 J5 ConvertFromUnicode() will convert to UTF8 as default.
55 TInt CJ5Converter::ConvertFromUnicode(
56 CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
57 const TDesC8& /* aReplacementForUnconvertibleUnicodeCharacters */,
59 const TDesC16& aUnicode,
60 CCnvCharacterSetConverter::TArrayOfAscendingIndices& /* aIndicesOfUnconvertibleCharacters */)
62 return CnvUtfConverter::ConvertFromUnicodeToUtf8(aForeign, aUnicode);
66 This will automatically determine one of the five supported encodings
67 to use and convert accordingly. This plugin method is available to the
68 user though the CCnvCharacterSetConverter::ConvertToUnicode() method.
69 There is no way for the caller to determine which encoding has been used.
71 NOTE: For debugging the selected character set is returned in the state.
74 @param aDefaultEndiannessOfForeignCharacters The default endian-ness to use when reading characters
75 in the foreign character set.
76 @param aUnicode On return, contains the text converted into Unicode.
77 @param aForeign The non-Unicode source text to be converted.
78 @param aState Used to save state information across multiple calls
79 to <code>ConvertToUnicode()</code>.
80 @param aNumberOfUnconvertibleCharacters On return, contains the number of bytes which were not
82 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, contains the index of the first bytein the
83 input text that could not be converted. A negative
84 value indicates that all the characters were
86 @return The number of unconverted bytes left at the end of the input descriptor
87 (e.g. because the output descriptor is not long enough to hold all the text),
88 or one of the error values defined in TError.
91 TInt CJ5Converter::ConvertToUnicode(
92 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
94 const TDesC8& aForeign,
96 TInt& aNumberOfUnconvertibleCharacters,
97 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
99 // As the aState parameter is used to pass back the detected value
100 // use a "hidden" internal state variable.
101 TInt internalState = CCnvCharacterSetConverter::KStateDefault;
103 // determine the encoding type and then decode appropriatly
104 switch ( DetectEncoding(aDefaultEndiannessOfForeignCharacters, aForeign))
108 return CnvShiftJis::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign,
109 aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
112 aState = EIso2022jp1;
113 return CnvJisBase::ConvertToUnicode(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, internalState,
114 aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
118 return ConvertEEucjpToUnicode(
119 aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, internalState,
120 aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
124 return ConvertUcs2ToUnicode( aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign,
125 aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter);
129 return CnvUtfConverter::ConvertToUnicodeFromUtf8(aUnicode, aForeign);
132 // fall though to the default, which is decode as UTF8
138 return CnvUtfConverter::ConvertToUnicodeFromUtf8(aUnicode, aForeign);
142 This API is used by CCnvCharacterSetConverter::AutoDetectCharacterSetL().
143 This method returns a value between 0 and 100, indicating how likely it
144 is that this is the correct converter, for the text supplied. As J5 is
145 NOT intended to be used with the existing auto-detect mechanism, it will
149 TBool CJ5Converter::IsInThisCharacterSetL(
151 TInt& aConfidenceLevel,
152 const TDesC8& /* aSample */)
155 aSetToTrue - This value should be set to ETrue. It is used to indicate to
156 CCnvCharacterSetConverter::AutoDetectCharacterSetL() that the plug-in DLL
157 is implementing a function of this signature and is therefore not the empty
161 /* no need to look at the sample as this always returns 0
162 as the autodetect feature is not supported by the J5 plug-in
168 CJ5Converter* CJ5Converter::NewL()
170 CJ5Converter* self = new(ELeave) CJ5Converter();
171 CleanupStack::PushL(self);
173 CleanupStack::Pop(self);
177 CJ5Converter::~CJ5Converter()
179 FeatureManager::UnInitializeLib();
182 CJ5Converter::CJ5Converter()
186 void CJ5Converter::ConstructL()
188 FeatureManager::InitializeLibL();
191 const TImplementationProxy ImplementationTable[] =
194 // for the test build use a special test UID
195 IMPLEMENTATION_PROXY_ENTRY(0x01000002, CJ5Converter::NewL)
197 IMPLEMENTATION_PROXY_ENTRY(KCharacterSetIdentifierJ5, CJ5Converter::NewL)
201 EXPORT_C const TImplementationProxy* ImplementationGroupProxy(TInt& aTableCount)
203 aTableCount = sizeof(ImplementationTable) / sizeof(TImplementationProxy);
205 return ImplementationTable;
209 DetectEncoding determine the characterset encoding.
210 The logic for this detection is based on the information in CJKV by Ken Lunde.
211 A detailed diagram of this logic is in the J5 how to document section 2.4
212 @return The detected character set as a enum CJ5Converter.
215 enum CJ5Converter::TJ5Encoding CJ5Converter::DetectEncoding(
216 CCnvCharacterSetConverter::TEndianness& aDefaultEndiannessOfForeignCharacters ,
217 const TDesC8& aForeign)
220 // first check for UCS2
221 CCnvCharacterSetConverter::TEndianness ucs2Endianness = CCnvCharacterSetConverter::ELittleEndian;
222 if ( DetectUcs2(aForeign, ucs2Endianness ))
224 // if ucs2 is detected pass back the detected endianess
225 aDefaultEndiannessOfForeignCharacters = ucs2Endianness;
230 TInt eucJpValidBytes = 0;
231 CJ5Converter::TDectectCharacterSet result = DetectEucJp( aForeign, eucJpValidBytes );
232 if ( result == EIsCharacterSet )
237 // next try Iso 2020JP
238 if ( DetectIso2022( aForeign ) == EIsCharacterSet )
244 if ( DetectUtf8( aForeign ) == EIsCharacterSet )
250 TInt shiftjisValidBytes = 0;
251 result = DetectShiftJis( aForeign, shiftjisValidBytes );
252 if ( result == EIsCharacterSet )
257 // no clear winner so go for the best
258 TInt sampleLength = Min(aForeign.Length(), KMaxSizeAutoDetectSample);
260 // if more than half is shiftjis and more shiftjis than EUC_JP,
261 if ((shiftjisValidBytes > eucJpValidBytes ) && (shiftjisValidBytes * 2> sampleLength))
264 // if more than half is EUC_JP and more EUC_JP than shiftjis,
265 if ((eucJpValidBytes > shiftjisValidBytes ) && (eucJpValidBytes * 2> sampleLength))
268 // return the default
275 If the first two bytes are the Unicode Endian Specifiers (0xfffe or 0xfeff)
276 then this must be UCS2. Otherwise try lookiing for 0x**00 or 0x00**
277 @param A sample of data to be checked
278 @param The Endianness if USC2 is detected
279 @return ETrue if UCS2 else EFalse
282 TBool CJ5Converter::DetectUcs2( const TDesC8& aForeign,
283 CCnvCharacterSetConverter::TEndianness& aTEndianness )
285 // if the sample is not big enough
286 if (aForeign.Length() < 2)
290 else if (aForeign[0]==0xff && aForeign[1]==0xfe )
292 // we have found a Little Endian Byte order mark
293 aTEndianness = CCnvCharacterSetConverter::ELittleEndian;
296 else if (aForeign[0]==0xfe && aForeign[1]==0xff )
298 // we have found a Big Endian Byte order mark
299 aTEndianness = CCnvCharacterSetConverter::EBigEndian;
303 // Next check for sequences of 0x**00 or 0x00** as UCS-2 is the only charset that
304 // specifies 0x**00 or 0x00** (according to endianness) for the ASCII range of characters.
305 // NB: This will fail if there are no ASCII characters in the text.
306 TInt sampleLength = aForeign.Length();
307 sampleLength = Min(aForeign.Length(), KMaxSizeAutoDetectSample);;
309 // check the sample for sequences of 0x**00 or 0x00**
310 TInt bigEndianConfidence = 0;
311 TInt littleEndianConfidence = 0;
313 for(;i< (sampleLength-1); i+=2)
315 if( aForeign[i] == 0x00)
317 bigEndianConfidence +=2;
319 else if ( aForeign[i+1] == 0x00)
321 littleEndianConfidence +=2;
325 // which occurs most BE or LE
326 TInt confidenceLevel = 0;
327 if (bigEndianConfidence > littleEndianConfidence)
329 aTEndianness = CCnvCharacterSetConverter::EBigEndian;
330 confidenceLevel = bigEndianConfidence;
334 aTEndianness = CCnvCharacterSetConverter::ELittleEndian;
335 confidenceLevel = littleEndianConfidence;
338 // if more than 97% count as UCS2
339 if ( confidenceLevel * 100/sampleLength > 97)
346 Check if ShiftJis (reference CJKV by Ken Lunde page 175)
347 @param A sample of data to be checked
348 @param The number of input bytes that can be converted
349 @return The result of the check as either EIsCharacterSet, EIsNotCharacterSet or EMaybeCharacterSet
352 enum CJ5Converter::TDectectCharacterSet CJ5Converter::DetectShiftJis( const TDesC8& aForeign,TInt &aNumberOfBytesConverted )
354 // Get the sample length
355 TInt sampleLength = Min(aForeign.Length(), KMaxSizeAutoDetectSample);;
358 aNumberOfBytesConverted = 0;
361 TText8 characterPlus1;
362 TText8 characterPlus2;
364 // scan the sample text looking for valid shiftjis data
365 while ( i < sampleLength )
367 // get the next few characters, use 0 if there is no more sample
368 // as this will not match any test.
369 character = aForeign[i];
370 characterPlus1 = ( i < (sampleLength-1) ? aForeign[i+1]:0);
371 characterPlus2 = ( i < (sampleLength-2) ? aForeign[i+2]:0);
373 // SHIFTJIS - 0x8e to 0x9f followed by 0x40 to 0xfc
374 if ((character >= 0x81) && (character <= 0x9f) &&
375 (characterPlus1 >= 0x40) && (characterPlus1 <= 0xfc) )
377 // this is SHIFTJIS unless it is EUC JP code set 2 or 3
378 if ((character == 0x8E) && (characterPlus1 >= 0xA1) && (characterPlus1 <= 0xDF))
380 // this could be EUC JP code set 2 (or shiftjis)
381 aNumberOfBytesConverted+=2;
384 else if ((character == 0x8F) &&
385 (characterPlus1 >= 0xA1) && (characterPlus1 <= 0xDF) &&
386 (characterPlus2 >= 0xA1) && (characterPlus2 <= 0xDF))
388 // this could be EUC JP code set 3 (or shiftjis)
389 aNumberOfBytesConverted+=3;
394 // this can only be shift jis
395 return EIsCharacterSet;
399 // SHIFTJIS - 0xE0 to 0xEF followed by .....
400 else if ((character >= 0xE0) && (character <= 0xEF))
402 // 0x40 to 0xFC which overlaps UTF8 between 0x80 and 0xBF
403 // including Mopera extension to shiftjis from 0xEF80 to 0xEFFC
405 if ( (characterPlus1 >= 0x40) && (characterPlus1 <= 0x7E) )
407 // this can only be shift jis
408 return EIsCharacterSet;
410 else if ( (characterPlus1 >= 0xC0) && (characterPlus1 <= 0xFC) )
412 // this could be EUC JP code set 1
413 aNumberOfBytesConverted+=2;
417 // problem here is the overlap between the UTF8 and shiftjis
418 else if ( (characterPlus1 >= 0x80) && (characterPlus1 <= 0xBF) )
420 // this could be shiftjis or utf8
421 aNumberOfBytesConverted+=2;
425 // half width katakana A1-DF
426 else if ((character >= 0xA1) && (character <= 0xDF))
428 aNumberOfBytesConverted+=1;
430 // ASCII or JIS-Roman 20-7e
431 else if ( ((character >= 0x20) && (character <= 0x7E)) || (character == 0x0A) || (character == 0x0D))
433 aNumberOfBytesConverted+=1;
437 // This is not decoding as shiftjis, so reject
438 aNumberOfBytesConverted =0;
439 return EIsNotCharacterSet;
444 // if all the characters could be converted
445 if (aNumberOfBytesConverted == sampleLength)
447 return EIsCharacterSet;
449 else if (aNumberOfBytesConverted == 0)
451 return EIsNotCharacterSet;
455 return EMaybeCharacterSet;
460 Check if UTF8 (reference CJKV by Ken Lunde page 189)
461 @param A sample of data to be checked
462 @param The number of input bytes that can be converted
463 @return The result of the check as either EIsCharacterSet, EIsNotCharacterSet or EMaybeCharacterSet
466 enum CJ5Converter::TDectectCharacterSet CJ5Converter::DetectUtf8( const TDesC8& aForeign )
468 // Get the sample length
469 TInt sampleLength = Min(aForeign.Length(), KMaxSizeAutoDetectSample);;
473 TText8 characterPlus1;
474 TText8 characterPlus2;
475 TText8 characterPlus3;
477 // scan the sample text looking for valid UTF8
478 while ( i < sampleLength )
480 // get the next few characters, use 0 if there is no more sample
481 // as this will not match any test.
482 character = aForeign[i];
483 characterPlus1 = ( i < (sampleLength-1) ? aForeign[i+1]:0);
484 characterPlus2 = ( i < (sampleLength-2) ? aForeign[i+2]:0);
485 characterPlus3 = ( i < (sampleLength-3) ? aForeign[i+3]:0);
487 // UTF8 range 110xxxxx followed by one valid UTF8 bytes
488 if(((character & 0xe0)==0xc0) && (( characterPlus1 & 0xc0)==0x80) )
490 // two bytes of valid UTF8 found
493 // UTF8 range 1110xxxx followed by two valid UTF8 bytes
494 else if(((character & 0xf0)==0xe0) && (( characterPlus1 & 0xc0)==0x80) && (( characterPlus2 & 0xc0)==0x80))
496 // three bytes of valid UTF8 found
499 // UTF8 range 11110xxx followed by three valid UTF8 bytes
500 else if(((character & 0xf8)==0xf0) && (( characterPlus1 & 0xc0)==0x80)
501 && (( characterPlus2 & 0xc0)==0x80) && (( characterPlus3 & 0xc0)==0x80) )
503 // four bytes of valid UTF8 found
507 // ascii range 0 to 0x7F
508 else if((character & 0x80)==0x00)
510 // The value of character is in the range 0x00-0x7f
511 // UTF8 maintains ASCII transparency. So it's a valid UTF8.
514 // if the sample data is longer than KMaxSizeAutoDetectSample then except anything
515 // for the last two bytes as they may not appear valid without more data
516 else if( i >= (KMaxSizeAutoDetectSample -2) )
522 // This is not decoding as UTF8 so reject
523 return EIsNotCharacterSet;
527 // All the characters could be converted
528 return EIsCharacterSet;
534 Check if ISO2022JP by lookiing for the escape sequences.
535 @param A sample of data to be checked
536 @param The number of input bytes that can be converted
537 @return The result of the check as either EIsCharacterSet, EIsNotCharacterSet or EMaybeCharacterSet
540 enum CJ5Converter::TDectectCharacterSet CJ5Converter::DetectIso2022( const TDesC8& aForeign )
542 // Get the sample length
543 TInt sampleLength = Min(aForeign.Length(), KMaxSizeAutoDetectSample);;
547 TText8 characterPlus1;
548 TText8 characterPlus2;
549 TText8 characterPlus3;
550 TText8 characterPlus4;
551 TText8 characterPlus5;
553 // scan the sample text looking for valid UTF8
554 while ( i < sampleLength )
556 // get the next few characters, use 0 if there is no more sample
557 // as this will not match any test.
558 character = aForeign[i];
559 characterPlus1 = ( i < (sampleLength-1) ? aForeign[i+1]:0);
560 characterPlus2 = ( i < (sampleLength-2) ? aForeign[i+2]:0);
561 characterPlus3 = ( i < (sampleLength-3) ? aForeign[i+3]:0);
564 // check for the JIS escape sequences of ISO 2022Jp
565 // These values have been taken from JISBASE_SHARED
566 if (character == KEscape)
568 // Escape Sequence For Jis C6226_1978 \x1b\x24\x40
569 if ((characterPlus1 == 0x24) && (characterPlus2 == 0x40))
571 return EIsCharacterSet;
574 // Escape Sequence For Jis X0208_1983 \x1b\x24\x42
575 else if ((characterPlus1 == 0x24) && (characterPlus2 == 0x42))
577 return EIsCharacterSet;
580 // Escape Sequence For Jis Roman \x1b\x28\x4a
581 else if ((characterPlus1 == 0x28) && (characterPlus2 == 0x4A))
583 return EIsCharacterSet;
586 // Escape Sequence For Jis RomanIncorrect \x1b\x28\x48
587 else if ((characterPlus1 == 0x28) && (characterPlus2 == 0x48))
589 return EIsCharacterSet;
592 // Escape Sequence For Ascii \x1b\x28\x42
593 else if ((characterPlus1 == 0x28) && (characterPlus2 == 0x42))
595 return EIsCharacterSet;
598 // Escape Sequence For EscapeSequenceForHalfWidthKatakana \x1b\x28\x49
599 else if ((characterPlus1 == 0x28) && (characterPlus2 == 0x49))
601 return EIsCharacterSet;
604 // Escape Sequence For Jis X0208_199x \x1b\x26\x40\x1b\x24\x42
605 else if ((characterPlus1 == 0x26) && (characterPlus2 == 0x40))
607 characterPlus4 = ( i < (sampleLength-4) ? aForeign[i+4]:0);
608 characterPlus5 = ( i < (sampleLength-5) ? aForeign[i+5]:0);
610 if ((characterPlus3 == 0x1b) && (characterPlus4 == 0x24) && (characterPlus5 == 0x42))
612 return EIsCharacterSet;
615 // Escape Sequence For Jis X0212_1990 \x1b\x24\x28\x44
616 else if ((characterPlus1 == 0x24) && (characterPlus2 == 0x28))
618 if (characterPlus3 == 0x44)
620 return EIsCharacterSet;
624 // check for the JIS escape sequences of ISO 2022Jp "B@" x42 x40
625 else if ((characterPlus1 == 'B') || (characterPlus1 == '@'))
627 return EIsCharacterSet;
630 } // end of if ( character == KEscape )
635 // if escape sequences have been found then this is not ISO2022
636 return EIsNotCharacterSet;
642 Check if EUC JP (reference CJKV by Ken Lunde page 164)
643 @param A sample of data to be checked
644 @param The number of input bytes that can be converted
645 @return The result of the check as either EIsCharacterSet, EIsNotCharacterSet or EMaybeCharacterSet
648 CJ5Converter::TDectectCharacterSet CJ5Converter::DetectEucJp( const TDesC8& aForeign,TInt &aNumberOfBytesConverted )
650 // Get the sample length
651 TInt sampleLength = Min(aForeign.Length(), KMaxSizeAutoDetectSample);;
654 aNumberOfBytesConverted = 0;
657 TText8 characterPlus1;
658 TText8 characterPlus2;
660 // scan the sample text looking for valid shiftjis data
661 while ( i < sampleLength )
663 // get the next few characters, use 0 if there is no more sample
664 // as this will not match any test.
665 character = aForeign[i];
666 characterPlus1 = ( i < (sampleLength-1) ? aForeign[i+1]:0);
667 characterPlus2 = ( i < (sampleLength-2) ? aForeign[i+2]:0);
669 // EUCJP code set 0 0x21-0x7e
670 if ( (character >= 0x21) && (character <= 0x7e))
672 aNumberOfBytesConverted++;
674 else if ( (character == 0x0a) || (character == 0x0d))
676 aNumberOfBytesConverted++;
679 else if ( (character >= 0xa1) && (character <= 0xff)
680 && (characterPlus1 >= 0xa1) && (characterPlus1 <= 0xff) )
682 aNumberOfBytesConverted+=2;
686 // EUC JP code set 2, starts with the EUC JP SS2 character (0x8E)
687 // and is followed by character in range 0xA1- 0xDF
688 else if ((character == 0x8E) && (characterPlus1 >= 0xA1) && (characterPlus1 <= 0xDF) )
690 // this could be 2 bytes of EUC JP code set 2
691 aNumberOfBytesConverted += 2;
694 // EUC JP code set 3, starts with the EUC JP SS3 character (0x8F)
695 // and is followed by two characters in range A1- DF A1 -FE
696 else if ((character == 0x8F) && (characterPlus1 >= 0xA1) && (characterPlus1 <= 0xDF)
697 && (characterPlus2 >= 0xA1) && (characterPlus2 <= 0xDF))
699 // this could be 3 bytes of EUC JP code set 3
700 aNumberOfBytesConverted += 3;
705 // This is not a valid decoding as EUC JP so reject
706 return EIsNotCharacterSet;
712 // if all the characters could be converted
713 if (aNumberOfBytesConverted == sampleLength)
715 return EIsCharacterSet;
717 else if (aNumberOfBytesConverted == 0)
719 return EIsNotCharacterSet;
723 return EMaybeCharacterSet;
729 Convert from UCS2 (Universal Character Set containing two bytes) to unicode
730 Remove any byte order marks in the UCSs.
731 @param aUnicode Contains the converted text in the Unicode character set.
732 @param aForeign The non-Unicode source text to be converted
733 @param aNumberOfUnconvertibleCharacters Contains the number of bytes which were not converted.
734 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter The index of the first byte of the first unconvertible character.
735 @return the number of bytes converted
738 TInt CJ5Converter::ConvertUcs2ToUnicode(CCnvCharacterSetConverter::TEndianness& aDefaultEndiannessOfForeignCharacters,
740 const TDesC8& aForeign,
741 TInt& aNumberOfUnconvertibleCharacters,
742 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
745 TInt numberOfBytesConverted = 0;
746 TInt numberOfUnicodeCharacters =0;
749 // start at begining of the output buffer provided
752 // while there is at least 2 bytes of data to convert and space in the output buffer
753 while ( (numberOfBytesConverted+1 < aForeign.Size()) && (numberOfUnicodeCharacters < aUnicode.MaxLength()) )
755 if (aDefaultEndiannessOfForeignCharacters == CCnvCharacterSetConverter::ELittleEndian )
757 // ELittleEndian 0x??00
758 nextChar = aForeign[numberOfBytesConverted] + ( aForeign[numberOfBytesConverted+1] << 8);
763 nextChar = ( aForeign[numberOfBytesConverted] <<8 ) + aForeign[numberOfBytesConverted+1];
766 // save the unicode character extracted unless it's a BOM
767 if ( nextChar != KByteOrderMark )
769 aUnicode.Append( nextChar );
770 numberOfUnicodeCharacters++;
773 numberOfBytesConverted+=2;
776 // there are no uncovertable characters with UCS2, but there could be
777 aNumberOfUnconvertibleCharacters = 0;
778 // a negative value indicates that all characters converted
779 aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1;
781 // returns the number of unconverted bytes left at the end of the input descriptor
782 // Note there could be 1 byte left over if an odd number of bytes provided for conversion
783 return aForeign.Size() - numberOfBytesConverted;
787 Convert from EUC_JP (Extended Unix Code encoding for Japanese)
788 Using the standard Charconv method of an array of methods
789 @return the number of bytes converted
792 TInt CJ5Converter::ConvertEEucjpToUnicode(
793 CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
795 const TDesC8& aForeign,
797 TInt& aNumberOfUnconvertibleCharacters,
798 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
800 TFixedArray<CnvUtilities::SMethod, 4> methods;
801 methods[0].iNumberOfBytesAbleToConvert=NumberOfBytesAbleToConvertToJisRoman;
802 methods[0].iConvertToIntermediateBufferInPlace=DummyConvertToIntermediateBufferInPlace;
803 methods[0].iConversionData=&CnvJisRoman::ConversionData();
804 methods[0].iNumberOfBytesPerCharacter=1;
805 methods[0].iNumberOfCoreBytesPerCharacter=1;
806 methods[1].iNumberOfBytesAbleToConvert=NumberOfBytesAbleToConvertToJisX0208;
807 methods[1].iConvertToIntermediateBufferInPlace=ConvertToJisX0208FromEucJpPackedInPlace;
808 methods[1].iConversionData=&CnvJisX0208::ConversionData();
809 methods[1].iNumberOfBytesPerCharacter=2;
810 methods[1].iNumberOfCoreBytesPerCharacter=2;
811 methods[2].iNumberOfBytesAbleToConvert=NumberOfBytesAbleToConvertToHalfWidthKatakana8;
812 methods[2].iConvertToIntermediateBufferInPlace=ConvertToHalfWidthKatakana8FromEucJpPackedInPlace;
813 methods[2].iConversionData=&CnvHalfWidthKatakana8::ConversionData();
814 methods[2].iNumberOfBytesPerCharacter=2;
815 methods[2].iNumberOfCoreBytesPerCharacter=1;
816 methods[3].iNumberOfBytesAbleToConvert=NumberOfBytesAbleToConvertToJisX0212;
817 methods[3].iConvertToIntermediateBufferInPlace=ConvertToJisX0212FromEucJpPackedInPlace;
818 methods[3].iConversionData=&CnvJisX0212::ConversionData();
819 methods[3].iNumberOfBytesPerCharacter=3;
820 methods[3].iNumberOfCoreBytesPerCharacter=2;
821 return CnvUtilities::ConvertToUnicodeFromHeterogeneousForeign(aDefaultEndiannessOfForeignCharacters, aUnicode, aForeign, aNumberOfUnconvertibleCharacters, aIndexOfFirstByteOfFirstUnconvertibleCharacter, methods.Array());