1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/shared/shiftjis_shared.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,400 @@
1.4 +/*
1.5 +* Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include <e32std.h>
1.23 +#include <charconv.h>
1.24 +#include <convutils.h>
1.25 +#include "jisx0201.h"
1.26 +#include "jisx0208.h"
1.27 +#include "shiftjis.h"
1.28 +#include <convdata.h>
1.29 +#include "charconv_tls.h"
1.30 +#include "charconv_table_utilities.h"
1.31 +
1.32 +// Lead byte ranges for 2-Byte Shift-JIS sequences
1.33 +const TUint KSingleByteRangeFirstBlockEnd=0x80;
1.34 +const TUint KSingleByteRangeSecondBlockStart=0xa0;
1.35 +const TUint KSingleByteRangeSecondBlockEnd=0xdf;
1.36 +const TUint KLeadByteMax = 0xfc;
1.37 +
1.38 +//Trail byte ranges for 2-Byte Shift-JIS sequences
1.39 +const TUint KTrailByteMin = 0x40 ;
1.40 +const TUint KTrailByteMax = 0xFC ;
1.41 +const TUint KTrailByteIllegal = 0x7F ;
1.42 +
1.43 +const TUint8 KSJISLineFeed = 0x0a ; // Shift-Jis value for Line Feed
1.44 +const TUint8 KSJISCarriageReturn = 0x0d; // Shift-Jis value for Carriage Return
1.45 +const TUint16 KUnicodeLineFeed = 0x2028; // Unicode Line Feed
1.46 +const TUint16 KUnicodeParagraphSeperator = 0x2029; // Unicode Paragraph seperator
1.47 +const TUint16 KUnicodeCarriageReturn = KSJISCarriageReturn ;
1.48 +
1.49 +
1.50 +/**
1.51 + * Get the Shift-JIS replacement for Unicode characters which cannot be converted
1.52 + *
1.53 + * @return const TDesC8& 8-bit descriptor containing the Shift-JIS data which will replace any untranslatable Unicode characters.
1.54 + * @since Internationalization_6.2
1.55 + * @internalTechnology
1.56 + */
1.57 +EXPORT_C const TDesC8& CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters()
1.58 + {
1.59 + return ReplacementForUnconvertibleUnicodeCharacters_internal();
1.60 + }
1.61 +
1.62 +
1.63 +/**
1.64 + * Converts text from Unicode to Shift-JIS
1.65 + *
1.66 + * @since Internationalization_6.2
1.67 + * @internalTechnology
1.68 + */
1.69 +EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
1.70 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign,
1.71 + const TDesC16& aUnicode,
1.72 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
1.73 + {
1.74 + return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters,
1.75 + aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
1.76 + }
1.77 +
1.78 +/**
1.79 + * Converts text from Unicode to Shift-JIS
1.80 + * Note that CnvShiftJis does NOT support extensions through use of additional character sets,
1.81 + * instead multiple versions are built for the different (operator defined) implementations
1.82 + * of Shift-JIS which need to be supported and the correct one installed at ROM build time,
1.83 + * and this method is only supplied for purposes of interface compatibility.
1.84 + * @since Internationalization_6.2
1.85 + * @internalTechnology
1.86 + */
1.87 +EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
1.88 + const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.89 + TDes8& aForeign, const TDesC16& aUnicode,
1.90 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters,
1.91 + const TArray<CnvUtilities::SCharacterSet>& /* aArrayOfAdditionalCharacterSets */)
1.92 + {
1.93 + return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters,
1.94 + aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
1.95 + }
1.96 +
1.97 +EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
1.98 + TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters,
1.99 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
1.100 + {
1.101 + return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters,
1.102 + aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
1.103 + }
1.104 +
1.105 +
1.106 +/**
1.107 + * Converts text from Shift-JIS to Unicode
1.108 + *
1.109 + * @since Internationalization_6.2
1.110 + * @internalTechnology
1.111 + */
1.112 +EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
1.113 + TDes16& aUnicode, const TDesC8& aForeign,
1.114 + TInt& aNumberOfUnconvertibleCharacters,
1.115 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter,
1.116 + const TArray<CnvUtilities::SMethod>& /* aArrayOfAdditionalMethods */)
1.117 + {
1.118 + return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters,
1.119 + aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
1.120 +
1.121 + }
1.122 +
1.123 +
1.124 +
1.125 +/**
1.126 +This function actually does the work of converting Shift-JIS input to unicode output.
1.127 +
1.128 +@param TDes16& aUnicode The output buffer
1.129 +@param const TDesC8& aForeign The input buffer
1.130 +@param TInt& aNumberOfUnconvertibleCharacters Number of input characters which were processed but couldn't be converted.
1.131 +@param TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter Locates first untranslatable character in input buffer
1.132 +@return CCnvCharacterSetConverter::EErrorIllFormedInput if the input was invalid, otherwise the number of bytes in the input buffer which weren't be processed (e.g. due to output buffer overflow).
1.133 +@internalTechnology
1.134 +*/
1.135 +
1.136 +TInt CnvShiftJis::DoConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign,
1.137 + TInt& aNumberOfUnconvertibleCharacters,
1.138 + TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
1.139 + {
1.140 + TUint foreignBytePointer = 0 ;
1.141 + TUint row ;
1.142 + TUint column ;
1.143 + TUint8 foreignCharByte ;
1.144 + TUint8 foreignCharStart ;
1.145 + TChar unicodeChar ;
1.146 + TBool finished = EFalse ;
1.147 + TInt charsConverted = 0 ;
1.148 + TUint16 unicodeUnmappedCharacter = getUnicodeUnmappedCharacter() ;
1.149 + TInt unicodeBufferLength = aUnicode.MaxLength() ;
1.150 + TInt foreignDataLength = aForeign.Length() ;
1.151 +
1.152 + aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1 ;
1.153 + aNumberOfUnconvertibleCharacters = 0 ;
1.154 +
1.155 + // Check for valid input and output buffers
1.156 + if (( unicodeBufferLength == 0) || foreignDataLength == 0)
1.157 + {
1.158 + finished = ETrue ;
1.159 + }
1.160 +
1.161 + // Reset output buffer
1.162 + aUnicode.Zero() ;
1.163 +
1.164 + // Perform conversion
1.165 + while (!finished)
1.166 + {
1.167 + foreignCharStart = foreignBytePointer ;
1.168 + foreignCharByte = aForeign[foreignBytePointer++] ;
1.169 +
1.170 + // Look for (and handle) CR/LF pairs in ShiftJis input stream.
1.171 + // It is a specific requirement from Symbian KK that CR/LF pairs
1.172 + // in the input stream be converted to Unicode LF characters
1.173 + if((KSJISCarriageReturn == foreignCharByte) && (foreignBytePointer < foreignDataLength))
1.174 + {
1.175 + // check next byte
1.176 + if(KSJISLineFeed == aForeign[foreignBytePointer])
1.177 + {
1.178 + // CR code
1.179 + unicodeChar = KUnicodeLineFeed ;
1.180 + foreignBytePointer++ ;
1.181 + }
1.182 + else
1.183 + {
1.184 + unicodeChar = KUnicodeCarriageReturn ;
1.185 + }
1.186 + }
1.187 + else
1.188 + {
1.189 + if (((foreignCharByte > KSingleByteRangeFirstBlockEnd) &&
1.190 + (foreignCharByte < KSingleByteRangeSecondBlockStart)) ||
1.191 + ((foreignCharByte > KSingleByteRangeSecondBlockEnd) &&
1.192 + (foreignCharByte <= KLeadByteMax)))
1.193 + {
1.194 + if (foreignBytePointer < foreignDataLength)
1.195 + {
1.196 + // Potential 2 byte shiftJis character
1.197 + row = foreignCharByte ;
1.198 + column = aForeign[foreignBytePointer] ;
1.199 + if (((column <= KTrailByteMax) && (column >= KTrailByteMin)) && column != KTrailByteIllegal)
1.200 + {
1.201 + foreignBytePointer++ ;
1.202 + unicodeChar = lookupUnicodeChar(row,column) ;
1.203 + }
1.204 + else
1.205 + {
1.206 + unicodeChar = unicodeUnmappedCharacter ;
1.207 + }
1.208 + }
1.209 + else
1.210 + {
1.211 + // Only got the first byte of a 2 byte character
1.212 + // reset "read" pointer to beginning of character
1.213 + // and bail out!
1.214 + finished = ETrue ;
1.215 + foreignBytePointer-- ;
1.216 + continue ;
1.217 + }
1.218 + }
1.219 + else
1.220 + {
1.221 + //Probably a single byte shiftJis chracter
1.222 + row = 0 ;
1.223 + column = foreignCharByte ;
1.224 + unicodeChar = lookupUnicodeChar(row,column) ;
1.225 + }
1.226 + }
1.227 +
1.228 +
1.229 + // Check for unconvertible characters.
1.230 + if (unicodeChar == unicodeUnmappedCharacter)
1.231 + {
1.232 + if (aIndexOfFirstByteOfFirstUnconvertibleCharacter == -1)
1.233 + {
1.234 + aIndexOfFirstByteOfFirstUnconvertibleCharacter = foreignCharStart ;
1.235 + }
1.236 + aNumberOfUnconvertibleCharacters++ ;
1.237 + }
1.238 +
1.239 + // Append the converted (or not!) character to the output buffer
1.240 + aUnicode.Append(unicodeChar);
1.241 + charsConverted++ ;
1.242 +
1.243 + // Check for end of input buffer or output buffer full
1.244 + if ((charsConverted >= unicodeBufferLength) || (foreignBytePointer >= foreignDataLength))
1.245 + finished = ETrue ;
1.246 + }
1.247 +
1.248 + // Evaluate success of the operation and either return error code (currently just
1.249 + // invalid input) or return number of un-processed characters in input buffer in
1.250 + // case of output buffer being filled before input fully consumed (0 means all
1.251 + // characters consumed)
1.252 + TInt returnValue ;
1.253 + if (foreignDataLength && !charsConverted)
1.254 + {
1.255 + // Input must contain at least one complete character to be considered valid Shift-JIS.
1.256 + returnValue = CCnvCharacterSetConverter::EErrorIllFormedInput ;
1.257 + }
1.258 + else
1.259 + {
1.260 + returnValue = foreignDataLength - foreignBytePointer;
1.261 + }
1.262 + return returnValue ;
1.263 + }
1.264 +
1.265 +
1.266 +
1.267 +
1.268 +/**
1.269 +This function actually does the work of converting converting unicode input to Shift-JIS output.
1.270 +
1.271 +@param const TDesC8& aReplacementForUnconvertibleUnicodeCharacters byte sequence to be be used as output for unicode characters which have no mapping defined.
1.272 +@param TDes16& aUnicode The input buffer
1.273 +@param const TDesC8& aForeign The output buffer
1.274 +@return The number of unicode characters in the input buffer which weren't be processed (e.g. due to output buffer overflow).
1.275 +@internalTechnology
1.276 +*/
1.277 +TInt CnvShiftJis::DoConvertFromUnicode(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
1.278 + TDes8& aForeign, const TDesC16& aUnicode,
1.279 + CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
1.280 + {
1.281 + TUint unicodePointer = 0 ;
1.282 + TUint row ;
1.283 + TUint column ;
1.284 + TUint8 foreignCharByte ;
1.285 + TUint16 foreignChar ;
1.286 + TUint16 unicodeChar ;
1.287 + TBool finished = EFalse ;
1.288 + TInt charsConverted = 0 ;
1.289 + TInt unicodeLength = aUnicode.Length() ;
1.290 + TInt foreignMaxLength = aForeign.MaxLength() ;
1.291 + TUint16 foreignUnmappedCharacter = getForeignUnmappedCharacter() ;
1.292 + CCnvCharacterSetConverter::TDowngradeForExoticLineTerminatingCharacters downgradeForExoticLineTerminatingCharacters = CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed;
1.293 + TBool downgradeExoticLineTerminatingCharacters = EFalse ;
1.294 +
1.295 +
1.296 + // Check for valid input and output buffers
1.297 + if ((unicodeLength == 0) || foreignMaxLength == 0)
1.298 + {
1.299 + finished = ETrue ;
1.300 + }
1.301 +
1.302 + // If we've been called from an instance of CCnvCharacterSetConverter we can retrieve its state
1.303 + // from Thread Local Storage. This really isn't very nice but there's no other way we can get
1.304 + // hold of settings like downgrade for unicode line termination characters without breaking
1.305 + // compatibility with the existing plug-in interface!
1.306 + CCnvCharacterSetConverter* currentCharacterSetConverter = (CCnvCharacterSetConverter*)TTlsData::CurrentCharacterSetConverter();
1.307 + if (currentCharacterSetConverter)
1.308 + {
1.309 + downgradeForExoticLineTerminatingCharacters = currentCharacterSetConverter->GetDowngradeForExoticLineTerminatingCharacters() ;
1.310 + downgradeExoticLineTerminatingCharacters = ETrue ;
1.311 + }
1.312 +
1.313 + // Reset output buffer
1.314 + aForeign.Zero();
1.315 +
1.316 + // Process input buffer
1.317 + while (!finished)
1.318 + {
1.319 +
1.320 + // Look up foreign Char
1.321 + unicodeChar = aUnicode[unicodePointer] ;
1.322 +
1.323 + // Check for any downgrade of Unicode line endings characters required if we've got
1.324 + // a Unicode Line-Feed or Paragraph-Seperator character to deal with.
1.325 + if (downgradeExoticLineTerminatingCharacters &&
1.326 + ((unicodeChar==KUnicodeLineFeed) || (unicodeChar==KUnicodeParagraphSeperator)))
1.327 + {
1.328 + if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed)
1.329 + {
1.330 + if (aForeign.Length() < (foreignMaxLength - 1))
1.331 + {
1.332 + aForeign.Append(KSJISCarriageReturn) ;
1.333 + aForeign.Append(KSJISLineFeed) ;
1.334 + charsConverted++ ;
1.335 + }
1.336 + else
1.337 + {
1.338 + // Foreign buffer full!
1.339 + finished = ETrue;
1.340 + }
1.341 + }
1.342 + else if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToJustLineFeed)
1.343 + {
1.344 + if (aForeign.Length() < foreignMaxLength)
1.345 + {
1.346 + aForeign.Append(KSJISLineFeed) ;
1.347 + charsConverted++ ;
1.348 + }
1.349 + else
1.350 + {
1.351 + // Foreign buffer full!
1.352 + finished = ETrue;
1.353 + }
1.354 + }
1.355 + }
1.356 + else
1.357 + {
1.358 + row = unicodeChar / 256 ;
1.359 + column = unicodeChar % 256 ;
1.360 + foreignChar = lookupForeignChar(row, column) ;
1.361 +
1.362 + // Check for untranslatable character
1.363 + if ((foreignChar == foreignUnmappedCharacter) &&
1.364 + (aForeign.Length() < (foreignMaxLength - 1)))
1.365 + {
1.366 + aIndicesOfUnconvertibleCharacters.AppendIndex(unicodePointer) ;
1.367 + aForeign.Append(aReplacementForUnconvertibleUnicodeCharacters) ;
1.368 + }
1.369 + else if ((foreignChar <= 0xFF) && (aForeign.Length() < foreignMaxLength))
1.370 + {
1.371 + // Single byte character
1.372 + foreignCharByte = (TUint8) foreignChar ;
1.373 + aForeign.Append(foreignCharByte) ;
1.374 + charsConverted++ ;
1.375 + }
1.376 + else if (aForeign.Length() < (foreignMaxLength - 1))
1.377 + {
1.378 + // Two byte character
1.379 + foreignCharByte = (TUint8) (foreignChar >> 8 ) ;
1.380 + aForeign.Append(foreignCharByte) ;
1.381 + foreignCharByte = (TUint8) (foreignChar & 0xFF) ;
1.382 + aForeign.Append(foreignCharByte) ;
1.383 + charsConverted++ ;
1.384 + }
1.385 + else
1.386 + {
1.387 + // Foreign buffer full!
1.388 + finished = ETrue;
1.389 + }
1.390 + }
1.391 +
1.392 + // Check for terminating condition (input buffer consumed or output buffer full)
1.393 + if (!finished && (++unicodePointer >= unicodeLength))
1.394 + {
1.395 + finished = ETrue ;
1.396 + }
1.397 + }
1.398 +
1.399 + // Return number of input characters *not* processsed (will be zero unless output
1.400 + // buffer has been filled before all input consumed)
1.401 + return unicodeLength - unicodePointer;
1.402 + }
1.403 +