sl@0: /* sl@0: * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: ISO2022kr conversion plugin sl@0: * sl@0: */ sl@0: sl@0: sl@0: // INCLUDES sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include "cp949table.h" sl@0: #include "charactersetconverter.h" sl@0: sl@0: static const TUint KBitsForNonStandardStates = 0x03; sl@0: static const TUint KShiftedToKSCState = 0x01; sl@0: sl@0: static const TUint KMaxSizeOfTmpBuffer = 1024; sl@0: sl@0: static const TUint8 KMaxAscii = 0x9f; sl@0: sl@0: _LIT8(KLit8EscapeSequence, "\x1b\x24\x43"); sl@0: sl@0: #define SHIFT_IN_BYTE 0x0F sl@0: #define SHIFT_OUT_BYTE 0x0E sl@0: sl@0: typedef enum sl@0: { sl@0: EISO2022Initialize, sl@0: EISO2022Ascii, sl@0: EISO2022KSC sl@0: } TISO2022FromUniState; sl@0: sl@0: // New Interface class sl@0: class CISO2022KRImplementation : public CCharacterSetConverterPluginInterface sl@0: { sl@0: public: sl@0: virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters(); sl@0: sl@0: virtual TInt ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters ); sl@0: sl@0: virtual TInt ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter ); sl@0: sl@0: virtual TBool IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& ); sl@0: sl@0: static CISO2022KRImplementation* NewL(); sl@0: sl@0: virtual ~CISO2022KRImplementation(); sl@0: private: sl@0: CISO2022KRImplementation(); sl@0: }; sl@0: sl@0: // FUNCTION DEFINITIONS sl@0: const TDesC8& CISO2022KRImplementation::ReplacementForUnconvertibleUnicodeCharacters() sl@0: { sl@0: return CnvCp949Table::ReplacementForUnconvertibleUnicodeCharacters(); sl@0: } sl@0: sl@0: TInt CISO2022KRImplementation::ConvertFromUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) sl@0: { sl@0: TInt ret; sl@0: TInt currPos = 3; sl@0: TUint outputConversionFlags = 0; sl@0: TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend; sl@0: TISO2022FromUniState currState = EISO2022Initialize; sl@0: TUint8 shiftByte = 0; sl@0: TPtr8 shiftBytePtr(NULL, 0); sl@0: sl@0: aForeign.SetLength(0); sl@0: sl@0: /* Start with escape sequence */ sl@0: aForeign.Append( KLit8EscapeSequence ); sl@0: sl@0: ret = CCnvCharacterSetConverter::DoConvertFromUnicode( CnvCp949Table::ConversionData(), sl@0: aDefaultEndiannessOfForeignCharacters, sl@0: aReplacementForUnconvertibleUnicodeCharacters, sl@0: aForeign, sl@0: aUnicode, sl@0: aIndicesOfUnconvertibleCharacters, sl@0: outputConversionFlags, sl@0: inputConversionFlags ); sl@0: /* Append shift in and out bytes as needed */ sl@0: while( currPos < aForeign.Length() ) sl@0: { sl@0: TUint8 *currChar = (TUint8 *)aForeign.Mid(currPos).Ptr(); sl@0: if( *currChar > KMaxAscii ) sl@0: { /* KSC character */ sl@0: if( currState != EISO2022KSC ) sl@0: { /* Insert shift out byte */ sl@0: shiftByte = SHIFT_OUT_BYTE; sl@0: currState = EISO2022KSC; sl@0: } sl@0: sl@0: /* Clear the 8th bit */ sl@0: *currChar = (*currChar & ~(0x80)); sl@0: } sl@0: else sl@0: { /* ASCII character */ sl@0: if( currState != EISO2022Ascii ) sl@0: { /* Insert shift in byte */ sl@0: shiftByte = SHIFT_IN_BYTE; sl@0: currState = EISO2022Ascii; sl@0: } sl@0: } sl@0: sl@0: if( shiftByte ) sl@0: { sl@0: if( (aForeign.Length() + 1) > aForeign.MaxLength() ) sl@0: { /* Make room for shift byte */ sl@0: if( aForeign[ (aForeign.Length() - 1) ] > KMaxAscii ) sl@0: { /* Drop a dual byte KSC character */ sl@0: aForeign.SetLength( aForeign.Length() - 2 ); sl@0: } sl@0: else sl@0: { /* Drop a single byte ASCII character */ sl@0: aForeign.SetLength( aForeign.Length() - 1 ); sl@0: } sl@0: /* Increase unconverted amount */ sl@0: ret++; sl@0: /* TBD, propably should try to fix aIndicesOfUnconvertibleCharacters sl@0: if possible */ sl@0: } sl@0: shiftBytePtr.Set( &shiftByte, 1, 1 ); sl@0: aForeign.Insert( currPos, shiftBytePtr ); sl@0: currPos++; sl@0: shiftByte = 0; sl@0: } sl@0: sl@0: /* Skip current character */ sl@0: currPos++; sl@0: } sl@0: sl@0: return ret; sl@0: } sl@0: sl@0: TInt CISO2022KRImplementation::ConvertToUnicode( sl@0: CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, sl@0: TDes16& aUnicode, sl@0: const TDesC8& aForeign, sl@0: TInt& aState, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) sl@0: { sl@0: TInt err; sl@0: TInt ret = 0; sl@0: TInt currPos = 0; sl@0: TInt convPos = 0; sl@0: TInt shiftInPos = KErrNotFound; sl@0: TInt shiftOutPos = KErrNotFound; sl@0: TInt shiftPos = KErrNotFound; sl@0: TInt escPos = KErrNotFound; sl@0: TPtrC8 currSegment; sl@0: TPtrC8 convSegment; sl@0: TBool changeState = EFalse; sl@0: sl@0: TUint outputConversionFlags = 0; sl@0: TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend; sl@0: TInt numberOfUnconvertibleCharacters = 0; sl@0: TInt indexOfFirstByteOfFirstUnconvertibleCharacter = 0; sl@0: aNumberOfUnconvertibleCharacters = 0; sl@0: sl@0: while( currPos < aForeign.Length() ) sl@0: { sl@0: sl@0: currSegment.Set( aForeign.Mid( currPos ) ); sl@0: sl@0: /* First change state if needed */ sl@0: if( changeState ) sl@0: { sl@0: changeState = EFalse; sl@0: if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState ) sl@0: { /* Switch back to default ASCII */ sl@0: aState &= ~(KShiftedToKSCState); sl@0: } sl@0: else sl@0: { /* Switch to KSC */ sl@0: aState |= KShiftedToKSCState; sl@0: } sl@0: } sl@0: sl@0: /* Search for escape which should be skipped */ sl@0: escPos = currSegment.Find( KLit8EscapeSequence ); sl@0: sl@0: /* Search for shift in byte */ sl@0: shiftInPos = currSegment.Locate( SHIFT_IN_BYTE ); sl@0: sl@0: /* Search for shift out byte */ sl@0: shiftOutPos = currSegment.Locate( SHIFT_OUT_BYTE ); sl@0: sl@0: /* Set shift pos according to found shift bytes */ sl@0: if( shiftInPos == KErrNotFound && sl@0: shiftOutPos == KErrNotFound ) sl@0: { /* Neither found */ sl@0: shiftPos = KErrNotFound; sl@0: } sl@0: else sl@0: { sl@0: if( (shiftInPos != KErrNotFound) && sl@0: ((shiftInPos < shiftOutPos) || (shiftOutPos == KErrNotFound)) ) sl@0: { /* shift in is nearer or shift out not found */ sl@0: shiftPos = shiftInPos; sl@0: /* Set state change if needed */ sl@0: if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState ) sl@0: { sl@0: changeState = ETrue; sl@0: } sl@0: } sl@0: else sl@0: { /* shift out must be nearer or shift in not fouind */ sl@0: shiftPos = shiftOutPos; sl@0: /* Set state change if needed */ sl@0: if( (aState & KBitsForNonStandardStates) != KShiftedToKSCState ) sl@0: { sl@0: changeState = ETrue; sl@0: } sl@0: } sl@0: } sl@0: sl@0: if( shiftPos == KErrNotFound ) sl@0: { /* Shift byte not found, same coding for the rest of the data */ sl@0: if( escPos == KErrNotFound ) sl@0: { /* No escape sequence either, just convert the rest */ sl@0: convSegment.Set( currSegment ); sl@0: } sl@0: } sl@0: else if( ((escPos != KErrNotFound) && (shiftPos < escPos)) || sl@0: (escPos == KErrNotFound) ) sl@0: { /* Shift byte found and it comes before escape sequence or no escape sl@0: sequence was found, convert data preceeding the shift byte if shift sl@0: byte isn't the first character */ sl@0: if( shiftPos == 0 ) sl@0: { /* No data to convert preceeds the shift byte, just skip it and continue */ sl@0: currPos += 1; sl@0: continue; sl@0: } sl@0: convSegment.Set( currSegment.Left( shiftPos ) ); sl@0: /* Clear to prevent convert to escape sequence */ sl@0: escPos = KErrNotFound; sl@0: } sl@0: sl@0: if( escPos != KErrNotFound ) sl@0: { /* Escape sequence found before any shift bytes, sl@0: clear possible state change and convert data sl@0: preceeding the escape sequence if sl@0: escape sequence is not at the beginning */ sl@0: changeState = EFalse; sl@0: if( escPos == 0 ) sl@0: { /* No data to convert preceeds the escape sequence, just skip it continue */ sl@0: currPos += KLit8EscapeSequence().Length(); sl@0: continue; sl@0: } sl@0: convSegment.Set( currSegment.Left( escPos ) ); sl@0: } sl@0: sl@0: if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState ) sl@0: { /* Convert KSC encoded */ sl@0: HBufC8 *tmpForeign = NULL; sl@0: sl@0: if( (convSegment.Length() & 0x1) ) sl@0: { /* KSC should have even amount of bytes */ sl@0: ret = CCnvCharacterSetConverter::EErrorIllFormedInput; sl@0: } sl@0: else sl@0: { sl@0: convPos = 0; sl@0: while( convPos < convSegment.Length() ) sl@0: { sl@0: TRAP( err, tmpForeign = HBufC8::NewL( KMaxSizeOfTmpBuffer ) ); sl@0: if( err != KErrNone ) sl@0: { sl@0: User::Panic( _L("ISO-2022-KR"), err ); sl@0: } sl@0: sl@0: if( convSegment.Length() < KMaxSizeOfTmpBuffer ) sl@0: { /* Convert whole segment */ sl@0: tmpForeign->Des().Copy( convSegment ); sl@0: } sl@0: else sl@0: { /* Convert in chunks */ sl@0: if( (convPos + KMaxSizeOfTmpBuffer) >= convSegment.Length() ) sl@0: { /* Last chunk */ sl@0: tmpForeign->Des().Copy( convSegment.Mid( convPos ) ); sl@0: } sl@0: else sl@0: { sl@0: tmpForeign->Des().Copy( convSegment.Mid( convPos, KMaxSizeOfTmpBuffer ) ); sl@0: } sl@0: } sl@0: sl@0: TUint8 *chars = (TUint8 *)tmpForeign->Des().Ptr(); sl@0: for( TInt i = 0 ; i < tmpForeign->Length() ; i++ ) sl@0: { /* Set highest bit in characters */ sl@0: chars[i] |= 0x80; sl@0: } sl@0: sl@0: numberOfUnconvertibleCharacters = 0; sl@0: ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(), sl@0: aDefaultEndiannessOfForeignCharacters, sl@0: aUnicode, *tmpForeign, sl@0: numberOfUnconvertibleCharacters, sl@0: indexOfFirstByteOfFirstUnconvertibleCharacter, sl@0: outputConversionFlags, sl@0: inputConversionFlags ); sl@0: if( numberOfUnconvertibleCharacters != 0 && sl@0: aNumberOfUnconvertibleCharacters == 0 ) sl@0: { /* First uncovertible found, set index relative to actual input buffer*/ sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter = (currPos + convPos + indexOfFirstByteOfFirstUnconvertibleCharacter); sl@0: } sl@0: sl@0: aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters; sl@0: sl@0: if( ret < 0 ) sl@0: { /* Some error, break the loop, sl@0: errors are handled later */ sl@0: delete tmpForeign; sl@0: break; sl@0: } sl@0: sl@0: if( ret > 0 ) sl@0: { /* Not all were converted, fix return value sl@0: to be relative to convSegment and break the loop */ sl@0: ret = (convSegment.Length() - convPos - tmpForeign->Length() + ret); sl@0: delete tmpForeign; sl@0: break; sl@0: } sl@0: sl@0: convPos += tmpForeign->Length(); sl@0: delete tmpForeign; sl@0: } sl@0: } sl@0: } sl@0: else sl@0: { /* Convert ASCII encoded by default, KSC can be used without setting highest bit */ sl@0: numberOfUnconvertibleCharacters = 0; sl@0: ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(), sl@0: aDefaultEndiannessOfForeignCharacters, sl@0: aUnicode, convSegment, sl@0: numberOfUnconvertibleCharacters, sl@0: indexOfFirstByteOfFirstUnconvertibleCharacter, sl@0: outputConversionFlags, sl@0: inputConversionFlags ); sl@0: if( numberOfUnconvertibleCharacters != 0 && sl@0: aNumberOfUnconvertibleCharacters == 0 ) sl@0: { /* First uncovertible found, set index relative to actual input buffer*/ sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter = currPos + indexOfFirstByteOfFirstUnconvertibleCharacter; sl@0: } sl@0: aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters; sl@0: } sl@0: sl@0: if( ret < 0 ) sl@0: { /* Error during conversion */ sl@0: return ret; sl@0: } sl@0: else if( ret > 0 ) sl@0: { /* Not all characters where converted, return sl@0: value indicating how many bytes in total are left unconverted */ sl@0: return (aForeign.Length() - currPos - convSegment.Length() + ret); sl@0: } sl@0: sl@0: /* Increase to skip converted data */ sl@0: currPos += convSegment.Length(); sl@0: if( escPos != KErrNotFound ) sl@0: { /* Increase to skip escape sequence */ sl@0: currPos += KLit8EscapeSequence().Length(); sl@0: } sl@0: else if( shiftPos != KErrNotFound ) sl@0: { /* Increase to skip shift byte */ sl@0: currPos += 1; sl@0: } sl@0: sl@0: } sl@0: sl@0: return 0; sl@0: } sl@0: sl@0: sl@0: TBool CISO2022KRImplementation::IsInThisCharacterSetL( sl@0: TBool& aSetToTrue, sl@0: TInt& aConfidenceLevel, sl@0: const TDesC8& /*aBuf*/) sl@0: { sl@0: /* sl@0: aSetToTrue=ETrue; sl@0: aConfidenceLevel=50; sl@0: sl@0: TUint8 ch(0); sl@0: for (TInt i=0;i