os/textandloc/charconvfw/charconvplugins/src/plugins/iso2022kr.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/iso2022kr.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,473 @@
     1.4 +/*
     1.5 +* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description:   ISO2022kr conversion plugin
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +// INCLUDES
    1.23 +#include <e32std.h>
    1.24 +#include <charconv.h>
    1.25 +#include <convgeneratedcpp.h>
    1.26 +#include <ecom/implementationproxy.h>
    1.27 +#include "cp949table.h"
    1.28 +#include "charactersetconverter.h"
    1.29 +
    1.30 +static const TUint KBitsForNonStandardStates = 0x03;
    1.31 +static const TUint KShiftedToKSCState = 0x01;
    1.32 +
    1.33 +static const TUint KMaxSizeOfTmpBuffer = 1024;
    1.34 +
    1.35 +static const TUint8 KMaxAscii = 0x9f;
    1.36 +
    1.37 +_LIT8(KLit8EscapeSequence, "\x1b\x24\x43");
    1.38 +
    1.39 +#define SHIFT_IN_BYTE  0x0F
    1.40 +#define SHIFT_OUT_BYTE 0x0E
    1.41 +
    1.42 +typedef enum
    1.43 +{
    1.44 +    EISO2022Initialize,
    1.45 +    EISO2022Ascii,
    1.46 +    EISO2022KSC
    1.47 +} TISO2022FromUniState;
    1.48 +
    1.49 +// New Interface class
    1.50 +class CISO2022KRImplementation : public CCharacterSetConverterPluginInterface
    1.51 +{
    1.52 +    public:
    1.53 +        virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    1.54 +
    1.55 +        virtual TInt ConvertFromUnicode(
    1.56 +            CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.57 +            const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.58 +            TDes8& aForeign, 
    1.59 +            const TDesC16& aUnicode, 
    1.60 +            CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters );
    1.61 +
    1.62 +        virtual TInt ConvertToUnicode(
    1.63 +            CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.64 +            TDes16& aUnicode, 
    1.65 +            const TDesC8& aForeign, 
    1.66 +            TInt& aState, 
    1.67 +            TInt& aNumberOfUnconvertibleCharacters, 
    1.68 +            TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter );
    1.69 +
    1.70 +        virtual TBool IsInThisCharacterSetL(
    1.71 +            TBool& aSetToTrue, 
    1.72 +            TInt& aConfidenceLevel, 
    1.73 +            const TDesC8& );
    1.74 +
    1.75 +        static CISO2022KRImplementation* NewL();
    1.76 +
    1.77 +        virtual ~CISO2022KRImplementation();
    1.78 +    private:
    1.79 +        CISO2022KRImplementation();
    1.80 +};
    1.81 +
    1.82 +// FUNCTION DEFINITIONS
    1.83 +const TDesC8& CISO2022KRImplementation::ReplacementForUnconvertibleUnicodeCharacters()
    1.84 +	{
    1.85 +	return CnvCp949Table::ReplacementForUnconvertibleUnicodeCharacters();
    1.86 +	}
    1.87 +
    1.88 +TInt CISO2022KRImplementation::ConvertFromUnicode(
    1.89 +    CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    1.90 +    const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.91 +    TDes8& aForeign, 
    1.92 +    const TDesC16& aUnicode, 
    1.93 +    CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
    1.94 +	{
    1.95 +    TInt ret;
    1.96 +    TInt currPos = 3;
    1.97 +    TUint outputConversionFlags = 0;
    1.98 +    TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend;
    1.99 +    TISO2022FromUniState currState = EISO2022Initialize;
   1.100 +    TUint8 shiftByte = 0;
   1.101 +    TPtr8 shiftBytePtr(NULL, 0);
   1.102 +
   1.103 +    aForeign.SetLength(0);
   1.104 +
   1.105 +    /* Start with escape sequence */
   1.106 +    aForeign.Append( KLit8EscapeSequence );
   1.107 +
   1.108 +    ret = CCnvCharacterSetConverter::DoConvertFromUnicode( CnvCp949Table::ConversionData(),
   1.109 +                                                           aDefaultEndiannessOfForeignCharacters,
   1.110 +                                                           aReplacementForUnconvertibleUnicodeCharacters,
   1.111 +                                                           aForeign,
   1.112 +                                                           aUnicode,
   1.113 +                                                           aIndicesOfUnconvertibleCharacters,
   1.114 +                                                           outputConversionFlags, 
   1.115 +                                                           inputConversionFlags );
   1.116 +    /* Append shift in and out bytes as needed */
   1.117 +    while( currPos < aForeign.Length() )
   1.118 +        {
   1.119 +        TUint8 *currChar = (TUint8 *)aForeign.Mid(currPos).Ptr();
   1.120 +        if( *currChar > KMaxAscii )
   1.121 +            { /* KSC character */
   1.122 +            if( currState != EISO2022KSC )
   1.123 +                { /* Insert shift out byte */
   1.124 +                shiftByte = SHIFT_OUT_BYTE;
   1.125 +                currState = EISO2022KSC;
   1.126 +                }
   1.127 +
   1.128 +            /* Clear the 8th bit */
   1.129 +            *currChar = (*currChar & ~(0x80));
   1.130 +            }
   1.131 +        else
   1.132 +            { /* ASCII character */
   1.133 +            if( currState != EISO2022Ascii )
   1.134 +                { /* Insert shift in byte */
   1.135 +                shiftByte = SHIFT_IN_BYTE;
   1.136 +                currState = EISO2022Ascii;
   1.137 +                }
   1.138 +            }
   1.139 +
   1.140 +        if( shiftByte )
   1.141 +            {
   1.142 +            if( (aForeign.Length() + 1) > aForeign.MaxLength() )
   1.143 +                { /* Make room for shift byte */
   1.144 +                if( aForeign[ (aForeign.Length() - 1) ] > KMaxAscii )
   1.145 +                    { /* Drop a dual byte KSC character */
   1.146 +                    aForeign.SetLength( aForeign.Length() - 2 );
   1.147 +                    }
   1.148 +                else
   1.149 +                    { /* Drop a single byte ASCII character */
   1.150 +                    aForeign.SetLength( aForeign.Length() - 1 );
   1.151 +                    }
   1.152 +                    /* Increase unconverted amount */
   1.153 +                    ret++;
   1.154 +                /* TBD, propably should try to fix aIndicesOfUnconvertibleCharacters
   1.155 +                        if possible */
   1.156 +                }
   1.157 +                shiftBytePtr.Set( &shiftByte, 1, 1 );
   1.158 +                aForeign.Insert( currPos, shiftBytePtr );
   1.159 +                currPos++;
   1.160 +                shiftByte = 0;
   1.161 +            }
   1.162 +
   1.163 +        /* Skip current character */
   1.164 +        currPos++;
   1.165 +        }
   1.166 +
   1.167 +    return ret;
   1.168 +    }
   1.169 +
   1.170 +TInt CISO2022KRImplementation::ConvertToUnicode(
   1.171 +    CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   1.172 +    TDes16& aUnicode, 
   1.173 +    const TDesC8& aForeign, 
   1.174 +    TInt& aState, 
   1.175 +    TInt& aNumberOfUnconvertibleCharacters, 
   1.176 +    TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   1.177 +	{
   1.178 +    TInt err;
   1.179 +    TInt ret = 0;
   1.180 +    TInt currPos = 0;
   1.181 +    TInt convPos = 0;
   1.182 +    TInt shiftInPos = KErrNotFound;
   1.183 +    TInt shiftOutPos = KErrNotFound;
   1.184 +    TInt shiftPos = KErrNotFound;
   1.185 +    TInt escPos = KErrNotFound;
   1.186 +    TPtrC8 currSegment;
   1.187 +    TPtrC8 convSegment;
   1.188 +    TBool changeState = EFalse;
   1.189 +
   1.190 +    TUint outputConversionFlags = 0;
   1.191 +    TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend;
   1.192 +    TInt numberOfUnconvertibleCharacters = 0;
   1.193 +    TInt indexOfFirstByteOfFirstUnconvertibleCharacter = 0;
   1.194 +    aNumberOfUnconvertibleCharacters = 0;
   1.195 +
   1.196 +    while( currPos < aForeign.Length() )
   1.197 +        {
   1.198 +
   1.199 +        currSegment.Set( aForeign.Mid( currPos ) );
   1.200 +
   1.201 +        /* First change state if needed */
   1.202 +        if( changeState )
   1.203 +            {
   1.204 +            changeState = EFalse;
   1.205 +            if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
   1.206 +                { /* Switch back to default ASCII */
   1.207 +                aState &= ~(KShiftedToKSCState);
   1.208 +                }
   1.209 +            else
   1.210 +                { /* Switch to KSC */
   1.211 +                aState |= KShiftedToKSCState; 
   1.212 +                }
   1.213 +            }
   1.214 +
   1.215 +        /* Search for escape which should be skipped */
   1.216 +        escPos = currSegment.Find( KLit8EscapeSequence );
   1.217 +        
   1.218 +        /* Search for shift in byte */
   1.219 +        shiftInPos = currSegment.Locate( SHIFT_IN_BYTE );
   1.220 +
   1.221 +        /* Search for shift out byte */
   1.222 +        shiftOutPos = currSegment.Locate( SHIFT_OUT_BYTE );
   1.223 +
   1.224 +        /* Set shift pos according to found shift bytes */
   1.225 +        if( shiftInPos == KErrNotFound &&
   1.226 +            shiftOutPos == KErrNotFound )
   1.227 +            { /* Neither found */
   1.228 +            shiftPos = KErrNotFound;
   1.229 +            }
   1.230 +        else
   1.231 +            {
   1.232 +            if( (shiftInPos != KErrNotFound) &&
   1.233 +                ((shiftInPos < shiftOutPos) || (shiftOutPos == KErrNotFound)) )
   1.234 +                { /* shift in is nearer or shift out not found */
   1.235 +                shiftPos = shiftInPos;
   1.236 +                /* Set state change if needed */
   1.237 +                if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
   1.238 +                    {
   1.239 +                    changeState = ETrue;
   1.240 +                    }
   1.241 +                }
   1.242 +            else
   1.243 +                { /* shift out must be nearer or shift in not fouind */
   1.244 +                shiftPos = shiftOutPos;
   1.245 +                /* Set state change if needed */
   1.246 +                if( (aState & KBitsForNonStandardStates) != KShiftedToKSCState )
   1.247 +                    {
   1.248 +                    changeState = ETrue;
   1.249 +                    }
   1.250 +                }
   1.251 +            }
   1.252 +
   1.253 +        if( shiftPos == KErrNotFound )
   1.254 +            { /* Shift byte not found, same coding for the rest of the data */
   1.255 +            if( escPos == KErrNotFound )
   1.256 +                { /* No escape sequence either, just convert the rest */
   1.257 +                convSegment.Set( currSegment );
   1.258 +                }
   1.259 +            }
   1.260 +        else if( ((escPos != KErrNotFound) && (shiftPos < escPos)) ||
   1.261 +                 (escPos == KErrNotFound) )
   1.262 +            { /* Shift byte found and it comes before escape sequence or no escape
   1.263 +                 sequence was found, convert data preceeding the shift byte if shift
   1.264 +                 byte isn't the first character */
   1.265 +                if( shiftPos == 0 )
   1.266 +                { /* No data to convert preceeds the shift byte, just skip it and continue */
   1.267 +                    currPos += 1;
   1.268 +                    continue;
   1.269 +                }
   1.270 +                convSegment.Set( currSegment.Left( shiftPos ) );
   1.271 +                /* Clear to prevent convert to escape sequence */
   1.272 +                escPos = KErrNotFound;
   1.273 +            }
   1.274 +
   1.275 +        if( escPos != KErrNotFound )
   1.276 +            { /* Escape sequence found before any shift bytes,
   1.277 +                 clear possible state change and convert data
   1.278 +                 preceeding the escape sequence if
   1.279 +                 escape sequence is not at the beginning */
   1.280 +            changeState = EFalse;
   1.281 +            if( escPos == 0 )
   1.282 +                { /* No data to convert preceeds the escape sequence, just skip it continue */
   1.283 +                currPos += KLit8EscapeSequence().Length();
   1.284 +                continue;
   1.285 +                }
   1.286 +            convSegment.Set( currSegment.Left( escPos ) );
   1.287 +            }
   1.288 +
   1.289 +        if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
   1.290 +            { /* Convert KSC encoded */
   1.291 +            HBufC8 *tmpForeign = NULL;
   1.292 +
   1.293 +            if( (convSegment.Length() & 0x1) )
   1.294 +                { /* KSC should have even amount of bytes */
   1.295 +                ret = CCnvCharacterSetConverter::EErrorIllFormedInput;
   1.296 +                }
   1.297 +            else
   1.298 +                {
   1.299 +                convPos = 0;
   1.300 +                while( convPos < convSegment.Length() )
   1.301 +                    {
   1.302 +                    TRAP( err, tmpForeign = HBufC8::NewL( KMaxSizeOfTmpBuffer ) );
   1.303 +                    if( err != KErrNone )
   1.304 +                        {
   1.305 +                        User::Panic( _L("ISO-2022-KR"), err );
   1.306 +                        }
   1.307 +
   1.308 +                    if( convSegment.Length() < KMaxSizeOfTmpBuffer )
   1.309 +                        { /* Convert whole segment */
   1.310 +                        tmpForeign->Des().Copy( convSegment );
   1.311 +                        }
   1.312 +                    else
   1.313 +                        { /* Convert in chunks */
   1.314 +                        if( (convPos + KMaxSizeOfTmpBuffer) >= convSegment.Length() )
   1.315 +                            { /* Last chunk */
   1.316 +                            tmpForeign->Des().Copy( convSegment.Mid( convPos ) );
   1.317 +                            }
   1.318 +                        else
   1.319 +                            {
   1.320 +                            tmpForeign->Des().Copy( convSegment.Mid( convPos, KMaxSizeOfTmpBuffer ) );
   1.321 +                            }
   1.322 +                        }
   1.323 +
   1.324 +                    TUint8 *chars = (TUint8 *)tmpForeign->Des().Ptr();
   1.325 +                    for( TInt i = 0 ; i < tmpForeign->Length() ; i++ )
   1.326 +                        { /* Set highest bit in characters */
   1.327 +                        chars[i] |= 0x80;
   1.328 +                        }
   1.329 +
   1.330 +                    numberOfUnconvertibleCharacters = 0;
   1.331 +                    ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(),
   1.332 +                                                                         aDefaultEndiannessOfForeignCharacters,
   1.333 +                                                                         aUnicode, *tmpForeign,
   1.334 +                                                                         numberOfUnconvertibleCharacters,
   1.335 +                                                                         indexOfFirstByteOfFirstUnconvertibleCharacter,
   1.336 +                                                                         outputConversionFlags,
   1.337 +                                                                         inputConversionFlags );
   1.338 +                    if( numberOfUnconvertibleCharacters != 0 &&
   1.339 +                        aNumberOfUnconvertibleCharacters == 0 )
   1.340 +                        { /* First uncovertible found, set index relative to actual input buffer*/
   1.341 +                        aIndexOfFirstByteOfFirstUnconvertibleCharacter = (currPos + convPos + indexOfFirstByteOfFirstUnconvertibleCharacter);
   1.342 +                        }
   1.343 +
   1.344 +                    aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters;
   1.345 +
   1.346 +                    if( ret < 0 )
   1.347 +                        { /* Some error, break the loop,
   1.348 +                             errors are handled later */
   1.349 +                        delete tmpForeign;
   1.350 +                        break;
   1.351 +                        }
   1.352 +
   1.353 +                    if( ret > 0 )
   1.354 +                        { /* Not all were converted, fix return value
   1.355 +                             to be relative to convSegment and break the loop */
   1.356 +                        ret = (convSegment.Length() - convPos - tmpForeign->Length() + ret);
   1.357 +                        delete tmpForeign;
   1.358 +                        break;
   1.359 +                        }
   1.360 +
   1.361 +                    convPos += tmpForeign->Length();
   1.362 +                    delete tmpForeign;
   1.363 +                    }
   1.364 +                }
   1.365 +            }
   1.366 +        else
   1.367 +            { /* Convert ASCII encoded by default, KSC can be used without setting highest bit */
   1.368 +                numberOfUnconvertibleCharacters = 0;
   1.369 +                ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(),
   1.370 +                                                                     aDefaultEndiannessOfForeignCharacters,
   1.371 +                                                                     aUnicode, convSegment,
   1.372 +                                                                     numberOfUnconvertibleCharacters,
   1.373 +                                                                     indexOfFirstByteOfFirstUnconvertibleCharacter,
   1.374 +                                                                     outputConversionFlags,
   1.375 +                                                                     inputConversionFlags );
   1.376 +                if( numberOfUnconvertibleCharacters != 0 &&
   1.377 +                    aNumberOfUnconvertibleCharacters == 0 )
   1.378 +                    { /* First uncovertible found, set index relative to actual input buffer*/
   1.379 +                    aIndexOfFirstByteOfFirstUnconvertibleCharacter = currPos + indexOfFirstByteOfFirstUnconvertibleCharacter;
   1.380 +                    }
   1.381 +                aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters;
   1.382 +            }
   1.383 +
   1.384 +        if( ret < 0 )
   1.385 +            { /* Error during conversion */
   1.386 +            return ret;
   1.387 +            }
   1.388 +        else if( ret > 0 )
   1.389 +            { /* Not all characters where converted, return
   1.390 +                 value indicating how many bytes in total are left unconverted */
   1.391 +            return (aForeign.Length() - currPos - convSegment.Length() + ret);
   1.392 +            }
   1.393 +
   1.394 +        /* Increase to skip converted data */
   1.395 +        currPos += convSegment.Length();
   1.396 +        if( escPos != KErrNotFound )
   1.397 +            { /* Increase to skip escape sequence */
   1.398 +            currPos += KLit8EscapeSequence().Length();
   1.399 +            }
   1.400 +        else if( shiftPos != KErrNotFound )
   1.401 +            { /* Increase to skip shift byte */
   1.402 +            currPos += 1;
   1.403 +            }
   1.404 +
   1.405 +        }
   1.406 +
   1.407 +    return 0;
   1.408 +	}
   1.409 +
   1.410 +
   1.411 +TBool CISO2022KRImplementation::IsInThisCharacterSetL(
   1.412 +    TBool& aSetToTrue, 
   1.413 +    TInt& aConfidenceLevel, 
   1.414 +    const TDesC8& /*aBuf*/)
   1.415 +	{
   1.416 +/*	
   1.417 +    aSetToTrue=ETrue;
   1.418 +    aConfidenceLevel=50;
   1.419 +    
   1.420 +    TUint8 ch(0);
   1.421 +    for (TInt i=0;i<aBuf.Length();i++)
   1.422 +        {
   1.423 +        ch=aBuf[i];
   1.424 +        if (ch<0x7F)
   1.425 +            {
   1.426 +            continue;
   1.427 +            }
   1.428 +        else if (0xa1<=ch&&ch<=0xfe)
   1.429 +            {
   1.430 +            i++;
   1.431 +            __ASSERT_DEBUG(i<aBuf.Length(),User::Panic(_L("IS2022KR"),__LINE__));
   1.432 +            }
   1.433 +        else
   1.434 +            {
   1.435 +            aConfidenceLevel=0;
   1.436 +            aSetToTrue=EFalse;
   1.437 +            break;
   1.438 +            }
   1.439 +        }    
   1.440 +	return aSetToTrue;
   1.441 +*/
   1.442 +	aSetToTrue=ETrue;
   1.443 +	aConfidenceLevel=0;
   1.444 +	return EFalse;
   1.445 +	}
   1.446 +
   1.447 +CISO2022KRImplementation* CISO2022KRImplementation::NewL()
   1.448 +    {
   1.449 +    CISO2022KRImplementation* self = new(ELeave) CISO2022KRImplementation;
   1.450 +    return self;
   1.451 +    }
   1.452 +
   1.453 +CISO2022KRImplementation::CISO2022KRImplementation()
   1.454 +    {
   1.455 +    //default constructor.. do nothing
   1.456 +    }
   1.457 +
   1.458 +CISO2022KRImplementation::~CISO2022KRImplementation()
   1.459 +    {
   1.460 +    //default destructor .. do nothing
   1.461 +    }
   1.462 +
   1.463 +// ECOM CREATION FUNCTION
   1.464 +const TImplementationProxy ImplementationTable[] = 
   1.465 +    {
   1.466 +    // Note: This is the same UID as defined in old mmp-file
   1.467 +    // Used also in 12221212.rss ( implementation_uid )
   1.468 +    IMPLEMENTATION_PROXY_ENTRY( 0x20010101, CISO2022KRImplementation::NewL )
   1.469 +    };
   1.470 +
   1.471 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy( TInt& aTableCount )
   1.472 +    {
   1.473 +    aTableCount = sizeof( ImplementationTable ) / sizeof(TImplementationProxy);
   1.474 +    return ImplementationTable;
   1.475 +    }
   1.476 +