os/textandloc/charconvfw/charconvplugins/src/plugins/iso2022kr.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description:   ISO2022kr conversion plugin
    15 *
    16 */
    17 
    18 
    19 // INCLUDES
    20 #include <e32std.h>
    21 #include <charconv.h>
    22 #include <convgeneratedcpp.h>
    23 #include <ecom/implementationproxy.h>
    24 #include "cp949table.h"
    25 #include "charactersetconverter.h"
    26 
    27 static const TUint KBitsForNonStandardStates = 0x03;
    28 static const TUint KShiftedToKSCState = 0x01;
    29 
    30 static const TUint KMaxSizeOfTmpBuffer = 1024;
    31 
    32 static const TUint8 KMaxAscii = 0x9f;
    33 
    34 _LIT8(KLit8EscapeSequence, "\x1b\x24\x43");
    35 
    36 #define SHIFT_IN_BYTE  0x0F
    37 #define SHIFT_OUT_BYTE 0x0E
    38 
    39 typedef enum
    40 {
    41     EISO2022Initialize,
    42     EISO2022Ascii,
    43     EISO2022KSC
    44 } TISO2022FromUniState;
    45 
    46 // New Interface class
    47 class CISO2022KRImplementation : public CCharacterSetConverterPluginInterface
    48 {
    49     public:
    50         virtual const TDesC8& ReplacementForUnconvertibleUnicodeCharacters();
    51 
    52         virtual TInt ConvertFromUnicode(
    53             CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    54             const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    55             TDes8& aForeign, 
    56             const TDesC16& aUnicode, 
    57             CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters );
    58 
    59         virtual TInt ConvertToUnicode(
    60             CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    61             TDes16& aUnicode, 
    62             const TDesC8& aForeign, 
    63             TInt& aState, 
    64             TInt& aNumberOfUnconvertibleCharacters, 
    65             TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter );
    66 
    67         virtual TBool IsInThisCharacterSetL(
    68             TBool& aSetToTrue, 
    69             TInt& aConfidenceLevel, 
    70             const TDesC8& );
    71 
    72         static CISO2022KRImplementation* NewL();
    73 
    74         virtual ~CISO2022KRImplementation();
    75     private:
    76         CISO2022KRImplementation();
    77 };
    78 
    79 // FUNCTION DEFINITIONS
    80 const TDesC8& CISO2022KRImplementation::ReplacementForUnconvertibleUnicodeCharacters()
    81 	{
    82 	return CnvCp949Table::ReplacementForUnconvertibleUnicodeCharacters();
    83 	}
    84 
    85 TInt CISO2022KRImplementation::ConvertFromUnicode(
    86     CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
    87     const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    88     TDes8& aForeign, 
    89     const TDesC16& aUnicode, 
    90     CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
    91 	{
    92     TInt ret;
    93     TInt currPos = 3;
    94     TUint outputConversionFlags = 0;
    95     TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend;
    96     TISO2022FromUniState currState = EISO2022Initialize;
    97     TUint8 shiftByte = 0;
    98     TPtr8 shiftBytePtr(NULL, 0);
    99 
   100     aForeign.SetLength(0);
   101 
   102     /* Start with escape sequence */
   103     aForeign.Append( KLit8EscapeSequence );
   104 
   105     ret = CCnvCharacterSetConverter::DoConvertFromUnicode( CnvCp949Table::ConversionData(),
   106                                                            aDefaultEndiannessOfForeignCharacters,
   107                                                            aReplacementForUnconvertibleUnicodeCharacters,
   108                                                            aForeign,
   109                                                            aUnicode,
   110                                                            aIndicesOfUnconvertibleCharacters,
   111                                                            outputConversionFlags, 
   112                                                            inputConversionFlags );
   113     /* Append shift in and out bytes as needed */
   114     while( currPos < aForeign.Length() )
   115         {
   116         TUint8 *currChar = (TUint8 *)aForeign.Mid(currPos).Ptr();
   117         if( *currChar > KMaxAscii )
   118             { /* KSC character */
   119             if( currState != EISO2022KSC )
   120                 { /* Insert shift out byte */
   121                 shiftByte = SHIFT_OUT_BYTE;
   122                 currState = EISO2022KSC;
   123                 }
   124 
   125             /* Clear the 8th bit */
   126             *currChar = (*currChar & ~(0x80));
   127             }
   128         else
   129             { /* ASCII character */
   130             if( currState != EISO2022Ascii )
   131                 { /* Insert shift in byte */
   132                 shiftByte = SHIFT_IN_BYTE;
   133                 currState = EISO2022Ascii;
   134                 }
   135             }
   136 
   137         if( shiftByte )
   138             {
   139             if( (aForeign.Length() + 1) > aForeign.MaxLength() )
   140                 { /* Make room for shift byte */
   141                 if( aForeign[ (aForeign.Length() - 1) ] > KMaxAscii )
   142                     { /* Drop a dual byte KSC character */
   143                     aForeign.SetLength( aForeign.Length() - 2 );
   144                     }
   145                 else
   146                     { /* Drop a single byte ASCII character */
   147                     aForeign.SetLength( aForeign.Length() - 1 );
   148                     }
   149                     /* Increase unconverted amount */
   150                     ret++;
   151                 /* TBD, propably should try to fix aIndicesOfUnconvertibleCharacters
   152                         if possible */
   153                 }
   154                 shiftBytePtr.Set( &shiftByte, 1, 1 );
   155                 aForeign.Insert( currPos, shiftBytePtr );
   156                 currPos++;
   157                 shiftByte = 0;
   158             }
   159 
   160         /* Skip current character */
   161         currPos++;
   162         }
   163 
   164     return ret;
   165     }
   166 
   167 TInt CISO2022KRImplementation::ConvertToUnicode(
   168     CCnvCharacterSetConverter::TEndianness aDefaultEndiannessOfForeignCharacters, 
   169     TDes16& aUnicode, 
   170     const TDesC8& aForeign, 
   171     TInt& aState, 
   172     TInt& aNumberOfUnconvertibleCharacters, 
   173     TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   174 	{
   175     TInt err;
   176     TInt ret = 0;
   177     TInt currPos = 0;
   178     TInt convPos = 0;
   179     TInt shiftInPos = KErrNotFound;
   180     TInt shiftOutPos = KErrNotFound;
   181     TInt shiftPos = KErrNotFound;
   182     TInt escPos = KErrNotFound;
   183     TPtrC8 currSegment;
   184     TPtrC8 convSegment;
   185     TBool changeState = EFalse;
   186 
   187     TUint outputConversionFlags = 0;
   188     TUint inputConversionFlags = CCnvCharacterSetConverter::EInputConversionFlagAppend;
   189     TInt numberOfUnconvertibleCharacters = 0;
   190     TInt indexOfFirstByteOfFirstUnconvertibleCharacter = 0;
   191     aNumberOfUnconvertibleCharacters = 0;
   192 
   193     while( currPos < aForeign.Length() )
   194         {
   195 
   196         currSegment.Set( aForeign.Mid( currPos ) );
   197 
   198         /* First change state if needed */
   199         if( changeState )
   200             {
   201             changeState = EFalse;
   202             if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
   203                 { /* Switch back to default ASCII */
   204                 aState &= ~(KShiftedToKSCState);
   205                 }
   206             else
   207                 { /* Switch to KSC */
   208                 aState |= KShiftedToKSCState; 
   209                 }
   210             }
   211 
   212         /* Search for escape which should be skipped */
   213         escPos = currSegment.Find( KLit8EscapeSequence );
   214         
   215         /* Search for shift in byte */
   216         shiftInPos = currSegment.Locate( SHIFT_IN_BYTE );
   217 
   218         /* Search for shift out byte */
   219         shiftOutPos = currSegment.Locate( SHIFT_OUT_BYTE );
   220 
   221         /* Set shift pos according to found shift bytes */
   222         if( shiftInPos == KErrNotFound &&
   223             shiftOutPos == KErrNotFound )
   224             { /* Neither found */
   225             shiftPos = KErrNotFound;
   226             }
   227         else
   228             {
   229             if( (shiftInPos != KErrNotFound) &&
   230                 ((shiftInPos < shiftOutPos) || (shiftOutPos == KErrNotFound)) )
   231                 { /* shift in is nearer or shift out not found */
   232                 shiftPos = shiftInPos;
   233                 /* Set state change if needed */
   234                 if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
   235                     {
   236                     changeState = ETrue;
   237                     }
   238                 }
   239             else
   240                 { /* shift out must be nearer or shift in not fouind */
   241                 shiftPos = shiftOutPos;
   242                 /* Set state change if needed */
   243                 if( (aState & KBitsForNonStandardStates) != KShiftedToKSCState )
   244                     {
   245                     changeState = ETrue;
   246                     }
   247                 }
   248             }
   249 
   250         if( shiftPos == KErrNotFound )
   251             { /* Shift byte not found, same coding for the rest of the data */
   252             if( escPos == KErrNotFound )
   253                 { /* No escape sequence either, just convert the rest */
   254                 convSegment.Set( currSegment );
   255                 }
   256             }
   257         else if( ((escPos != KErrNotFound) && (shiftPos < escPos)) ||
   258                  (escPos == KErrNotFound) )
   259             { /* Shift byte found and it comes before escape sequence or no escape
   260                  sequence was found, convert data preceeding the shift byte if shift
   261                  byte isn't the first character */
   262                 if( shiftPos == 0 )
   263                 { /* No data to convert preceeds the shift byte, just skip it and continue */
   264                     currPos += 1;
   265                     continue;
   266                 }
   267                 convSegment.Set( currSegment.Left( shiftPos ) );
   268                 /* Clear to prevent convert to escape sequence */
   269                 escPos = KErrNotFound;
   270             }
   271 
   272         if( escPos != KErrNotFound )
   273             { /* Escape sequence found before any shift bytes,
   274                  clear possible state change and convert data
   275                  preceeding the escape sequence if
   276                  escape sequence is not at the beginning */
   277             changeState = EFalse;
   278             if( escPos == 0 )
   279                 { /* No data to convert preceeds the escape sequence, just skip it continue */
   280                 currPos += KLit8EscapeSequence().Length();
   281                 continue;
   282                 }
   283             convSegment.Set( currSegment.Left( escPos ) );
   284             }
   285 
   286         if( (aState & KBitsForNonStandardStates) == KShiftedToKSCState )
   287             { /* Convert KSC encoded */
   288             HBufC8 *tmpForeign = NULL;
   289 
   290             if( (convSegment.Length() & 0x1) )
   291                 { /* KSC should have even amount of bytes */
   292                 ret = CCnvCharacterSetConverter::EErrorIllFormedInput;
   293                 }
   294             else
   295                 {
   296                 convPos = 0;
   297                 while( convPos < convSegment.Length() )
   298                     {
   299                     TRAP( err, tmpForeign = HBufC8::NewL( KMaxSizeOfTmpBuffer ) );
   300                     if( err != KErrNone )
   301                         {
   302                         User::Panic( _L("ISO-2022-KR"), err );
   303                         }
   304 
   305                     if( convSegment.Length() < KMaxSizeOfTmpBuffer )
   306                         { /* Convert whole segment */
   307                         tmpForeign->Des().Copy( convSegment );
   308                         }
   309                     else
   310                         { /* Convert in chunks */
   311                         if( (convPos + KMaxSizeOfTmpBuffer) >= convSegment.Length() )
   312                             { /* Last chunk */
   313                             tmpForeign->Des().Copy( convSegment.Mid( convPos ) );
   314                             }
   315                         else
   316                             {
   317                             tmpForeign->Des().Copy( convSegment.Mid( convPos, KMaxSizeOfTmpBuffer ) );
   318                             }
   319                         }
   320 
   321                     TUint8 *chars = (TUint8 *)tmpForeign->Des().Ptr();
   322                     for( TInt i = 0 ; i < tmpForeign->Length() ; i++ )
   323                         { /* Set highest bit in characters */
   324                         chars[i] |= 0x80;
   325                         }
   326 
   327                     numberOfUnconvertibleCharacters = 0;
   328                     ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(),
   329                                                                          aDefaultEndiannessOfForeignCharacters,
   330                                                                          aUnicode, *tmpForeign,
   331                                                                          numberOfUnconvertibleCharacters,
   332                                                                          indexOfFirstByteOfFirstUnconvertibleCharacter,
   333                                                                          outputConversionFlags,
   334                                                                          inputConversionFlags );
   335                     if( numberOfUnconvertibleCharacters != 0 &&
   336                         aNumberOfUnconvertibleCharacters == 0 )
   337                         { /* First uncovertible found, set index relative to actual input buffer*/
   338                         aIndexOfFirstByteOfFirstUnconvertibleCharacter = (currPos + convPos + indexOfFirstByteOfFirstUnconvertibleCharacter);
   339                         }
   340 
   341                     aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters;
   342 
   343                     if( ret < 0 )
   344                         { /* Some error, break the loop,
   345                              errors are handled later */
   346                         delete tmpForeign;
   347                         break;
   348                         }
   349 
   350                     if( ret > 0 )
   351                         { /* Not all were converted, fix return value
   352                              to be relative to convSegment and break the loop */
   353                         ret = (convSegment.Length() - convPos - tmpForeign->Length() + ret);
   354                         delete tmpForeign;
   355                         break;
   356                         }
   357 
   358                     convPos += tmpForeign->Length();
   359                     delete tmpForeign;
   360                     }
   361                 }
   362             }
   363         else
   364             { /* Convert ASCII encoded by default, KSC can be used without setting highest bit */
   365                 numberOfUnconvertibleCharacters = 0;
   366                 ret = CCnvCharacterSetConverter::DoConvertToUnicode( CnvCp949Table::ConversionData(),
   367                                                                      aDefaultEndiannessOfForeignCharacters,
   368                                                                      aUnicode, convSegment,
   369                                                                      numberOfUnconvertibleCharacters,
   370                                                                      indexOfFirstByteOfFirstUnconvertibleCharacter,
   371                                                                      outputConversionFlags,
   372                                                                      inputConversionFlags );
   373                 if( numberOfUnconvertibleCharacters != 0 &&
   374                     aNumberOfUnconvertibleCharacters == 0 )
   375                     { /* First uncovertible found, set index relative to actual input buffer*/
   376                     aIndexOfFirstByteOfFirstUnconvertibleCharacter = currPos + indexOfFirstByteOfFirstUnconvertibleCharacter;
   377                     }
   378                 aNumberOfUnconvertibleCharacters += numberOfUnconvertibleCharacters;
   379             }
   380 
   381         if( ret < 0 )
   382             { /* Error during conversion */
   383             return ret;
   384             }
   385         else if( ret > 0 )
   386             { /* Not all characters where converted, return
   387                  value indicating how many bytes in total are left unconverted */
   388             return (aForeign.Length() - currPos - convSegment.Length() + ret);
   389             }
   390 
   391         /* Increase to skip converted data */
   392         currPos += convSegment.Length();
   393         if( escPos != KErrNotFound )
   394             { /* Increase to skip escape sequence */
   395             currPos += KLit8EscapeSequence().Length();
   396             }
   397         else if( shiftPos != KErrNotFound )
   398             { /* Increase to skip shift byte */
   399             currPos += 1;
   400             }
   401 
   402         }
   403 
   404     return 0;
   405 	}
   406 
   407 
   408 TBool CISO2022KRImplementation::IsInThisCharacterSetL(
   409     TBool& aSetToTrue, 
   410     TInt& aConfidenceLevel, 
   411     const TDesC8& /*aBuf*/)
   412 	{
   413 /*	
   414     aSetToTrue=ETrue;
   415     aConfidenceLevel=50;
   416     
   417     TUint8 ch(0);
   418     for (TInt i=0;i<aBuf.Length();i++)
   419         {
   420         ch=aBuf[i];
   421         if (ch<0x7F)
   422             {
   423             continue;
   424             }
   425         else if (0xa1<=ch&&ch<=0xfe)
   426             {
   427             i++;
   428             __ASSERT_DEBUG(i<aBuf.Length(),User::Panic(_L("IS2022KR"),__LINE__));
   429             }
   430         else
   431             {
   432             aConfidenceLevel=0;
   433             aSetToTrue=EFalse;
   434             break;
   435             }
   436         }    
   437 	return aSetToTrue;
   438 */
   439 	aSetToTrue=ETrue;
   440 	aConfidenceLevel=0;
   441 	return EFalse;
   442 	}
   443 
   444 CISO2022KRImplementation* CISO2022KRImplementation::NewL()
   445     {
   446     CISO2022KRImplementation* self = new(ELeave) CISO2022KRImplementation;
   447     return self;
   448     }
   449 
   450 CISO2022KRImplementation::CISO2022KRImplementation()
   451     {
   452     //default constructor.. do nothing
   453     }
   454 
   455 CISO2022KRImplementation::~CISO2022KRImplementation()
   456     {
   457     //default destructor .. do nothing
   458     }
   459 
   460 // ECOM CREATION FUNCTION
   461 const TImplementationProxy ImplementationTable[] = 
   462     {
   463     // Note: This is the same UID as defined in old mmp-file
   464     // Used also in 12221212.rss ( implementation_uid )
   465     IMPLEMENTATION_PROXY_ENTRY( 0x20010101, CISO2022KRImplementation::NewL )
   466     };
   467 
   468 EXPORT_C const TImplementationProxy* ImplementationGroupProxy( TInt& aTableCount )
   469     {
   470     aTableCount = sizeof( ImplementationTable ) / sizeof(TImplementationProxy);
   471     return ImplementationTable;
   472     }
   473