1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/plugins/iso2022kr.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,473 @@
1.4 +/*
1.5 +* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description: ISO2022kr conversion plugin
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +// INCLUDES
1.23 +#include <e32std.h>
1.24 +#include <charconv.h>
1.25 +#include <convgeneratedcpp.h>
1.26 +#include <ecom/implementationproxy.h>
1.27 +#include "cp949table.h"
1.28 +#include "charactersetconverter.h"
1.29 +
1.30 +static const TUint KBitsForNonStandardStates = 0x03;
1.31 +static const TUint KShiftedToKSCState = 0x01;
1.32 +
1.33 +static const TUint KMaxSizeOfTmpBuffer = 1024;
1.34 +
1.35 +static const TUint8 KMaxAscii = 0x9f;
1.36 +
1.37 +_LIT8(KLit8EscapeSequence, "\x1b\x24\x43");
1.38 +
1.39 +#define SHIFT_IN_BYTE 0x0F
1.40 +#define SHIFT_OUT_BYTE 0x0E
1.41 +
1.42 +typedef enum
1.43 +{
1.44 + EISO2022Initialize,
1.45 + EISO2022Ascii,
1.46 + EISO2022KSC
1.47 +} TISO2022FromUniState;
1.48 +
1.49 +// New Interface class
1.50 +class CISO2022KRImplementation : public CCharacterSetConverterPluginInterface
1.51 +{
1.52 + public:
1.53 + virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
1.54 +
1.55 + virtual TInt ConvertFromUnicode(
1.56 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.57 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.58 + TDes8& aForeign,
1.59 + const TDesC16& aUnicode,
1.60 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters );
1.61 +
1.62 + virtual TInt ConvertToUnicode(
1.63 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.64 + TDes16& aUnicode,
1.65 + const TDesC8& aForeign,
1.66 + TInt& aState,
1.67 + TInt& aNumberOfUnconvertibleCharacters,
1.68 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter );
1.69 +
1.70 + virtual TBool IsInThisCharacterSetL(
1.71 + TBool& aSetToTrue,
1.72 + TInt& aConfidenceLevel,
1.73 + const TDesC8& );
1.74 +
1.75 + static CISO2022KRImplementation* NewL();
1.76 +
1.77 + virtual ~CISO2022KRImplementation();
1.78 + private:
1.79 + CISO2022KRImplementation();
1.80 +};
1.81 +
1.82 +// FUNCTION DEFINITIONS
1.83 +const TDesC8& CISO2022KRImplementation::ReplacementForUnconvertibleUnicodeCharacters()
1.84 + {
1.85 + return CnvCp949Table::ReplacementForUnconvertibleUnicodeCharacters();
1.86 + }
1.87 +
1.88 +TInt CISO2022KRImplementation::ConvertFromUnicode(
1.89 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.90 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.91 + TDes8& aForeign,
1.92 + const TDesC16& aUnicode,
1.93 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
1.94 + {
1.95 + TInt ret;
1.96 + TInt currPos = 3;
1.97 + TUint outputConversionFlags = 0;
1.98 + TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend;
1.99 + TISO2022FromUniState currState = EISO2022Initialize;
1.100 + TUint8 shiftByte = 0;
1.101 + TPtr8 shiftBytePtr(NULL, 0);
1.102 +
1.103 + aForeign.SetLength(0);
1.104 +
1.105 + /* Start with escape sequence */
1.106 + aForeign.Append( KLit8EscapeSequence );
1.107 +
1.108 + ret = CCnvCharacterSetConverter::DoConvertFromUnicode( CnvCp949Table::ConversionData(),
1.109 + aDefaultEndiannessOfForeignCharacters,
1.110 + aReplacementForUnconvertibleUnicodeCharacters,
1.111 + aForeign,
1.112 + aUnicode,
1.113 + aIndicesOfUnconvertibleCharacters,
1.114 + outputConversionFlags,
1.115 + inputConversionFlags );
1.116 + /* Append shift in and out bytes as needed */
1.117 + while( currPos < aForeign.Length() )
1.118 + {
1.119 + TUint8 *currChar = (TUint8 *)aForeign.Mid(currPos).Ptr();
1.120 + if( *currChar > KMaxAscii )
1.121 + { /* KSC character */
1.122 + if( currState != EISO2022KSC )
1.123 + { /* Insert shift out byte */
1.124 + shiftByte = SHIFT_OUT_BYTE;
1.125 + currState = EISO2022KSC;
1.126 + }
1.127 +
1.128 + /* Clear the 8th bit */
1.129 + *currChar = (*currChar & ~(0x80));
1.130 + }
1.131 + else
1.132 + { /* ASCII character */
1.133 + if( currState != EISO2022Ascii )
1.134 + { /* Insert shift in byte */
1.135 + shiftByte = SHIFT_IN_BYTE;
1.136 + currState = EISO2022Ascii;
1.137 + }
1.138 + }
1.139 +
1.140 + if( shiftByte )
1.141 + {
1.142 + if( (aForeign.Length() + 1) > aForeign.MaxLength() )
1.143 + { /* Make room for shift byte */
1.144 + if( aForeign[ (aForeign.Length() - 1) ] > KMaxAscii )
1.145 + { /* Drop a dual byte KSC character */
1.146 + aForeign.SetLength( aForeign.Length() - 2 );
1.147 + }
1.148 + else
1.149 + { /* Drop a single byte ASCII character */
1.150 + aForeign.SetLength( aForeign.Length() - 1 );
1.151 + }
1.152 + /* Increase unconverted amount */
1.153 + ret++;
1.154 + /* TBD, propably should try to fix aIndicesOfUnconvertibleCharacters
1.155 + if possible */
1.156 + }
1.157 + shiftBytePtr.Set( &shiftByte, 1, 1 );
1.158 + aForeign.Insert( currPos, shiftBytePtr );
1.159 + currPos++;
1.160 + shiftByte = 0;
1.161 + }
1.162 +
1.163 + /* Skip current character */
1.164 + currPos++;
1.165 + }
1.166 +
1.167 + return ret;
1.168 + }
1.169 +
1.170 +TInt CISO2022KRImplementation::ConvertToUnicode(
1.171 + CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters,
1.172 + TDes16& aUnicode,
1.173 + const TDesC8& aForeign,
1.174 + TInt& aState,
1.175 + TInt& aNumberOfUnconvertibleCharacters,
1.176 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
1.177 + {
1.178 + TInt err;
1.179 + TInt ret = 0;
1.180 + TInt currPos = 0;
1.181 + TInt convPos = 0;
1.182 + TInt shiftInPos = KErrNotFound;
1.183 + TInt shiftOutPos = KErrNotFound;
1.184 + TInt shiftPos = KErrNotFound;
1.185 + TInt escPos = KErrNotFound;
1.186 + TPtrC8 currSegment;
1.187 + TPtrC8 convSegment;
1.188 + TBool changeState = EFalse;
1.189 +
1.190 + TUint outputConversionFlags = 0;
1.191 + TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend;
1.192 + TInt numberOfUnconvertibleCharacters = 0;
1.193 + TInt indexOfFirstByteOfFirstUnconvertibleCharacter = 0;
1.194 + aNumberOfUnconvertibleCharacters = 0;
1.195 +
1.196 + while( currPos < aForeign.Length() )
1.197 + {
1.198 +
1.199 + currSegment.Set( aForeign.Mid( currPos ) );
1.200 +
1.201 + /* First change state if needed */
1.202 + if( changeState )
1.203 + {
1.204 + changeState = EFalse;
1.205 + if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
1.206 + { /* Switch back to default ASCII */
1.207 + aState &= ~(KShiftedToKSCState);
1.208 + }
1.209 + else
1.210 + { /* Switch to KSC */
1.211 + aState |= KShiftedToKSCState;
1.212 + }
1.213 + }
1.214 +
1.215 + /* Search for escape which should be skipped */
1.216 + escPos = currSegment.Find( KLit8EscapeSequence );
1.217 +
1.218 + /* Search for shift in byte */
1.219 + shiftInPos = currSegment.Locate( SHIFT_IN_BYTE );
1.220 +
1.221 + /* Search for shift out byte */
1.222 + shiftOutPos = currSegment.Locate( SHIFT_OUT_BYTE );
1.223 +
1.224 + /* Set shift pos according to found shift bytes */
1.225 + if( shiftInPos == KErrNotFound &&
1.226 + shiftOutPos == KErrNotFound )
1.227 + { /* Neither found */
1.228 + shiftPos = KErrNotFound;
1.229 + }
1.230 + else
1.231 + {
1.232 + if( (shiftInPos != KErrNotFound) &&
1.233 + ((shiftInPos < shiftOutPos) || (shiftOutPos == KErrNotFound)) )
1.234 + { /* shift in is nearer or shift out not found */
1.235 + shiftPos = shiftInPos;
1.236 + /* Set state change if needed */
1.237 + if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
1.238 + {
1.239 + changeState = ETrue;
1.240 + }
1.241 + }
1.242 + else
1.243 + { /* shift out must be nearer or shift in not fouind */
1.244 + shiftPos = shiftOutPos;
1.245 + /* Set state change if needed */
1.246 + if( (aState & KBitsForNonStandardStates) != KShiftedToKSCState )
1.247 + {
1.248 + changeState = ETrue;
1.249 + }
1.250 + }
1.251 + }
1.252 +
1.253 + if( shiftPos == KErrNotFound )
1.254 + { /* Shift byte not found, same coding for the rest of the data */
1.255 + if( escPos == KErrNotFound )
1.256 + { /* No escape sequence either, just convert the rest */
1.257 + convSegment.Set( currSegment );
1.258 + }
1.259 + }
1.260 + else if( ((escPos != KErrNotFound) && (shiftPos < escPos)) ||
1.261 + (escPos == KErrNotFound) )
1.262 + { /* Shift byte found and it comes before escape sequence or no escape
1.263 + sequence was found, convert data preceeding the shift byte if shift
1.264 + byte isn't the first character */
1.265 + if( shiftPos == 0 )
1.266 + { /* No data to convert preceeds the shift byte, just skip it and continue */
1.267 + currPos += 1;
1.268 + continue;
1.269 + }
1.270 + convSegment.Set( currSegment.Left( shiftPos ) );
1.271 + /* Clear to prevent convert to escape sequence */
1.272 + escPos = KErrNotFound;
1.273 + }
1.274 +
1.275 + if( escPos != KErrNotFound )
1.276 + { /* Escape sequence found before any shift bytes,
1.277 + clear possible state change and convert data
1.278 + preceeding the escape sequence if
1.279 + escape sequence is not at the beginning */
1.280 + changeState = EFalse;
1.281 + if( escPos == 0 )
1.282 + { /* No data to convert preceeds the escape sequence, just skip it continue */
1.283 + currPos += KLit8EscapeSequence().Length();
1.284 + continue;
1.285 + }
1.286 + convSegment.Set( currSegment.Left( escPos ) );
1.287 + }
1.288 +
1.289 + if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
1.290 + { /* Convert KSC encoded */
1.291 + HBufC8 *tmpForeign = NULL;
1.292 +
1.293 + if( (convSegment.Length() & 0x1) )
1.294 + { /* KSC should have even amount of bytes */
1.295 + ret = CCnvCharacterSetConverter::EErrorIllFormedInput;
1.296 + }
1.297 + else
1.298 + {
1.299 + convPos = 0;
1.300 + while( convPos < convSegment.Length() )
1.301 + {
1.302 + TRAP( err, tmpForeign = HBufC8::NewL( KMaxSizeOfTmpBuffer ) );
1.303 + if( err != KErrNone )
1.304 + {
1.305 + User::Panic( _L("ISO-2022-KR"), err );
1.306 + }
1.307 +
1.308 + if( convSegment.Length() < KMaxSizeOfTmpBuffer )
1.309 + { /* Convert whole segment */
1.310 + tmpForeign->Des().Copy( convSegment );
1.311 + }
1.312 + else
1.313 + { /* Convert in chunks */
1.314 + if( (convPos + KMaxSizeOfTmpBuffer) >= convSegment.Length() )
1.315 + { /* Last chunk */
1.316 + tmpForeign->Des().Copy( convSegment.Mid( convPos ) );
1.317 + }
1.318 + else
1.319 + {
1.320 + tmpForeign->Des().Copy( convSegment.Mid( convPos, KMaxSizeOfTmpBuffer ) );
1.321 + }
1.322 + }
1.323 +
1.324 + TUint8 *chars = (TUint8 *)tmpForeign->Des().Ptr();
1.325 + for( TInt i = 0 ; i < tmpForeign->Length() ; i++ )
1.326 + { /* Set highest bit in characters */
1.327 + chars[i] |= 0x80;
1.328 + }
1.329 +
1.330 + numberOfUnconvertibleCharacters = 0;
1.331 + ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(),
1.332 + aDefaultEndiannessOfForeignCharacters,
1.333 + aUnicode, *tmpForeign,
1.334 + numberOfUnconvertibleCharacters,
1.335 + indexOfFirstByteOfFirstUnconvertibleCharacter,
1.336 + outputConversionFlags,
1.337 + inputConversionFlags );
1.338 + if( numberOfUnconvertibleCharacters != 0 &&
1.339 + aNumberOfUnconvertibleCharacters == 0 )
1.340 + { /* First uncovertible found, set index relative to actual input buffer*/
1.341 + aIndexOfFirstByteOfFirstUnconvertibleCharacter = (currPos + convPos + indexOfFirstByteOfFirstUnconvertibleCharacter);
1.342 + }
1.343 +
1.344 + aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters;
1.345 +
1.346 + if( ret < 0 )
1.347 + { /* Some error, break the loop,
1.348 + errors are handled later */
1.349 + delete tmpForeign;
1.350 + break;
1.351 + }
1.352 +
1.353 + if( ret > 0 )
1.354 + { /* Not all were converted, fix return value
1.355 + to be relative to convSegment and break the loop */
1.356 + ret = (convSegment.Length() - convPos - tmpForeign->Length() + ret);
1.357 + delete tmpForeign;
1.358 + break;
1.359 + }
1.360 +
1.361 + convPos += tmpForeign->Length();
1.362 + delete tmpForeign;
1.363 + }
1.364 + }
1.365 + }
1.366 + else
1.367 + { /* Convert ASCII encoded by default, KSC can be used without setting highest bit */
1.368 + numberOfUnconvertibleCharacters = 0;
1.369 + ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(),
1.370 + aDefaultEndiannessOfForeignCharacters,
1.371 + aUnicode, convSegment,
1.372 + numberOfUnconvertibleCharacters,
1.373 + indexOfFirstByteOfFirstUnconvertibleCharacter,
1.374 + outputConversionFlags,
1.375 + inputConversionFlags );
1.376 + if( numberOfUnconvertibleCharacters != 0 &&
1.377 + aNumberOfUnconvertibleCharacters == 0 )
1.378 + { /* First uncovertible found, set index relative to actual input buffer*/
1.379 + aIndexOfFirstByteOfFirstUnconvertibleCharacter = currPos + indexOfFirstByteOfFirstUnconvertibleCharacter;
1.380 + }
1.381 + aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters;
1.382 + }
1.383 +
1.384 + if( ret < 0 )
1.385 + { /* Error during conversion */
1.386 + return ret;
1.387 + }
1.388 + else if( ret > 0 )
1.389 + { /* Not all characters where converted, return
1.390 + value indicating how many bytes in total are left unconverted */
1.391 + return (aForeign.Length() - currPos - convSegment.Length() + ret);
1.392 + }
1.393 +
1.394 + /* Increase to skip converted data */
1.395 + currPos += convSegment.Length();
1.396 + if( escPos != KErrNotFound )
1.397 + { /* Increase to skip escape sequence */
1.398 + currPos += KLit8EscapeSequence().Length();
1.399 + }
1.400 + else if( shiftPos != KErrNotFound )
1.401 + { /* Increase to skip shift byte */
1.402 + currPos += 1;
1.403 + }
1.404 +
1.405 + }
1.406 +
1.407 + return 0;
1.408 + }
1.409 +
1.410 +
1.411 +TBool CISO2022KRImplementation::IsInThisCharacterSetL(
1.412 + TBool& aSetToTrue,
1.413 + TInt& aConfidenceLevel,
1.414 + const TDesC8& /*aBuf*/)
1.415 + {
1.416 +/*
1.417 + aSetToTrue=ETrue;
1.418 + aConfidenceLevel=50;
1.419 +
1.420 + TUint8 ch(0);
1.421 + for (TInt i=0;i<aBuf.Length();i++)
1.422 + {
1.423 + ch=aBuf[i];
1.424 + if (ch<0x7F)
1.425 + {
1.426 + continue;
1.427 + }
1.428 + else if (0xa1<=ch&&ch<=0xfe)
1.429 + {
1.430 + i++;
1.431 + __ASSERT_DEBUG(i<aBuf.Length(),User::Panic(_L("IS2022KR"),__LINE__));
1.432 + }
1.433 + else
1.434 + {
1.435 + aConfidenceLevel=0;
1.436 + aSetToTrue=EFalse;
1.437 + break;
1.438 + }
1.439 + }
1.440 + return aSetToTrue;
1.441 +*/
1.442 + aSetToTrue=ETrue;
1.443 + aConfidenceLevel=0;
1.444 + return EFalse;
1.445 + }
1.446 +
1.447 +CISO2022KRImplementation* CISO2022KRImplementation::NewL()
1.448 + {
1.449 + CISO2022KRImplementation* self = new(ELeave) CISO2022KRImplementation;
1.450 + return self;
1.451 + }
1.452 +
1.453 +CISO2022KRImplementation::CISO2022KRImplementation()
1.454 + {
1.455 + //default constructor.. do nothing
1.456 + }
1.457 +
1.458 +CISO2022KRImplementation::~CISO2022KRImplementation()
1.459 + {
1.460 + //default destructor .. do nothing
1.461 + }
1.462 +
1.463 +// ECOM CREATION FUNCTION
1.464 +const TImplementationProxy ImplementationTable[] =
1.465 + {
1.466 + // Note: This is the same UID as defined in old mmp-file
1.467 + // Used also in 12221212.rss ( implementation_uid )
1.468 + IMPLEMENTATION_PROXY_ENTRY( 0x20010101, CISO2022KRImplementation::NewL )
1.469 + };
1.470 +
1.471 +EXPORT_C const TImplementationProxy* ImplementationGroupProxy( TInt& aTableCount )
1.472 + {
1.473 + aTableCount = sizeof( ImplementationTable ) / sizeof(TImplementationProxy);
1.474 + return ImplementationTable;
1.475 + }
1.476 +