os/textandloc/charconvfw/charconvplugins/src/shared/shiftjis_shared.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconvplugins/src/shared/shiftjis_shared.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,400 @@
     1.4 +/*
     1.5 +* Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <e32std.h>
    1.23 +#include <charconv.h>
    1.24 +#include <convutils.h>
    1.25 +#include "jisx0201.h"
    1.26 +#include "jisx0208.h"
    1.27 +#include "shiftjis.h"
    1.28 +#include <convdata.h>
    1.29 +#include "charconv_tls.h"
    1.30 +#include "charconv_table_utilities.h"
    1.31 +
    1.32 +// Lead byte ranges for 2-Byte Shift-JIS sequences
    1.33 +const TUint KSingleByteRangeFirstBlockEnd=0x80;
    1.34 +const TUint KSingleByteRangeSecondBlockStart=0xa0;
    1.35 +const TUint KSingleByteRangeSecondBlockEnd=0xdf;
    1.36 +const TUint KLeadByteMax = 0xfc;
    1.37 +
    1.38 +//Trail byte ranges for 2-Byte Shift-JIS sequences
    1.39 +const TUint KTrailByteMin = 0x40 ;
    1.40 +const TUint KTrailByteMax = 0xFC ;
    1.41 +const TUint KTrailByteIllegal = 0x7F ;
    1.42 +
    1.43 +const TUint8 KSJISLineFeed = 0x0a ;	// Shift-Jis value for Line Feed
    1.44 +const TUint8 KSJISCarriageReturn = 0x0d;	// Shift-Jis value for Carriage Return
    1.45 +const TUint16 KUnicodeLineFeed = 0x2028; // Unicode Line Feed
    1.46 +const TUint16 KUnicodeParagraphSeperator = 0x2029; // Unicode Paragraph seperator
    1.47 +const TUint16 KUnicodeCarriageReturn = KSJISCarriageReturn ;
    1.48 +
    1.49 +
    1.50 +/** 
    1.51 + * Get the Shift-JIS replacement for Unicode characters which cannot be converted
    1.52 + * 
    1.53 + * @return const TDesC8& 8-bit descriptor containing the Shift-JIS data which will replace any untranslatable Unicode characters.
    1.54 + * @since Internationalization_6.2
    1.55 + * @internalTechnology
    1.56 + */
    1.57 +EXPORT_C const TDesC8& CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters()
    1.58 +	{
    1.59 +	return ReplacementForUnconvertibleUnicodeCharacters_internal();
    1.60 +	}
    1.61 +
    1.62 +
    1.63 +/**
    1.64 + * Converts text from Unicode to Shift-JIS 
    1.65 + * 
    1.66 + * @since Internationalization_6.2
    1.67 + * @internalTechnology 
    1.68 + */
    1.69 +EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
    1.70 +                                              const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign,
    1.71 +                                              const TDesC16& aUnicode,
    1.72 +                                              CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
    1.73 +	{
    1.74 +	return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, 
    1.75 +	                            aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
    1.76 +	}
    1.77 +
    1.78 +/**
    1.79 + * Converts text from Unicode to Shift-JIS 
    1.80 + * Note that CnvShiftJis does NOT support extensions through use of additional character sets,
    1.81 + * instead multiple versions are built for the different (operator defined) implementations
    1.82 + * of Shift-JIS which need to be supported and the correct one installed at ROM build time,
    1.83 + * and this method is only supplied for purposes of interface compatibility. 
    1.84 + * @since Internationalization_6.2
    1.85 + * @internalTechnology
    1.86 + */
    1.87 +EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, 
    1.88 +                                              const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    1.89 +                                              TDes8& aForeign, const TDesC16& aUnicode, 
    1.90 +                                              CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
    1.91 +                                              const TArray<CnvUtilities::SCharacterSet>& /* aArrayOfAdditionalCharacterSets */)
    1.92 +	{
    1.93 +	return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, 
    1.94 +	                            aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
    1.95 +	}
    1.96 +
    1.97 +EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
    1.98 +                                            TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters,
    1.99 +                                            TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   1.100 +	{
   1.101 +	return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, 
   1.102 +                              aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
   1.103 +	}
   1.104 +
   1.105 +
   1.106 +/**
   1.107 + * Converts text from Shift-JIS to Unicode 
   1.108 + * 
   1.109 + * @since Internationalization_6.2
   1.110 + * @internalTechnology
   1.111 + */
   1.112 +EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, 
   1.113 +                                            TDes16& aUnicode, const TDesC8& aForeign, 
   1.114 +                                            TInt& aNumberOfUnconvertibleCharacters, 
   1.115 +                                            TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
   1.116 +                                            const TArray<CnvUtilities::SMethod>& /* aArrayOfAdditionalMethods */)
   1.117 +	{
   1.118 +	return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, 
   1.119 +                              aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
   1.120 +
   1.121 +	}
   1.122 +	
   1.123 +	
   1.124 +
   1.125 +/**
   1.126 +This function actually does the work of converting Shift-JIS input to unicode output.
   1.127 +
   1.128 +@param TDes16& aUnicode The output buffer
   1.129 +@param const TDesC8& aForeign The input buffer
   1.130 +@param TInt& aNumberOfUnconvertibleCharacters Number of input characters which were processed but couldn't be converted. 
   1.131 +@param TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter Locates first untranslatable character in input buffer
   1.132 +@return CCnvCharacterSetConverter::EErrorIllFormedInput if the input was invalid, otherwise the number of bytes in the input buffer which weren't be processed (e.g. due to output buffer overflow). 
   1.133 +@internalTechnology
   1.134 +*/
   1.135 +	
   1.136 +TInt CnvShiftJis::DoConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, 
   1.137 +                                     TInt& aNumberOfUnconvertibleCharacters, 
   1.138 +                                     TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   1.139 +	{
   1.140 +	TUint foreignBytePointer = 0 ;
   1.141 +	TUint row ;
   1.142 +	TUint column ;
   1.143 +	TUint8 foreignCharByte ;
   1.144 +	TUint8 foreignCharStart ;
   1.145 +	TChar unicodeChar ;
   1.146 +	TBool finished = EFalse ;
   1.147 +	TInt charsConverted = 0 ;
   1.148 +	TUint16 unicodeUnmappedCharacter = getUnicodeUnmappedCharacter() ;
   1.149 +	TInt unicodeBufferLength = aUnicode.MaxLength() ;
   1.150 +	TInt foreignDataLength = aForeign.Length() ;
   1.151 +		
   1.152 +	aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1 ;
   1.153 +	aNumberOfUnconvertibleCharacters = 0 ;
   1.154 +
   1.155 +	// Check for valid input and output buffers
   1.156 +	if (( unicodeBufferLength == 0) || foreignDataLength == 0)
   1.157 +		{
   1.158 +		finished = ETrue ;		
   1.159 +		}
   1.160 +		
   1.161 +	// Reset output buffer	
   1.162 +	aUnicode.Zero() ;
   1.163 +
   1.164 +	// Perform conversion		
   1.165 +	while (!finished)
   1.166 +		{
   1.167 +		foreignCharStart = foreignBytePointer ;
   1.168 +		foreignCharByte = aForeign[foreignBytePointer++] ;
   1.169 +
   1.170 +		// Look for (and handle) CR/LF pairs in ShiftJis input stream.
   1.171 +		// It is a specific requirement from Symbian KK that CR/LF pairs
   1.172 +		// in the input stream be converted to Unicode LF characters 
   1.173 +		if((KSJISCarriageReturn == foreignCharByte) && (foreignBytePointer < foreignDataLength))
   1.174 +			{
   1.175 +			// check next byte
   1.176 +			if(KSJISLineFeed == aForeign[foreignBytePointer]) 
   1.177 +				{
   1.178 +				// CR code
   1.179 +				unicodeChar = KUnicodeLineFeed ;
   1.180 +				foreignBytePointer++ ;
   1.181 +				}
   1.182 +			else
   1.183 +				{					
   1.184 +				unicodeChar = KUnicodeCarriageReturn ;
   1.185 +				}
   1.186 +			}
   1.187 +		else
   1.188 +			{
   1.189 +			if (((foreignCharByte > KSingleByteRangeFirstBlockEnd) && 
   1.190 +			     (foreignCharByte < KSingleByteRangeSecondBlockStart)) ||
   1.191 +			    ((foreignCharByte > KSingleByteRangeSecondBlockEnd) &&
   1.192 +			     (foreignCharByte <= KLeadByteMax)))
   1.193 +				{
   1.194 +				if (foreignBytePointer < foreignDataLength)
   1.195 +					{
   1.196 +					// Potential 2 byte shiftJis character
   1.197 +					row = foreignCharByte ;
   1.198 +					column = aForeign[foreignBytePointer] ;
   1.199 +					if (((column <= KTrailByteMax) && (column >= KTrailByteMin)) && column != KTrailByteIllegal)
   1.200 +						{
   1.201 +						foreignBytePointer++ ;	
   1.202 +						unicodeChar = lookupUnicodeChar(row,column) ;	
   1.203 +						}
   1.204 +					else
   1.205 +						{
   1.206 +						unicodeChar = unicodeUnmappedCharacter ;
   1.207 +						}
   1.208 +					}
   1.209 +				else
   1.210 +					{
   1.211 +					// Only got the first byte of a 2 byte character
   1.212 +					// reset "read" pointer to beginning of character
   1.213 +					// and bail out!
   1.214 +					finished = ETrue ;
   1.215 +					foreignBytePointer-- ;
   1.216 +					continue ;
   1.217 +					}
   1.218 +				}
   1.219 +			else
   1.220 +				{
   1.221 +				//Probably a single byte shiftJis chracter
   1.222 +				row = 0 ;
   1.223 +				column = foreignCharByte ;
   1.224 +				unicodeChar = lookupUnicodeChar(row,column) ;
   1.225 +				}
   1.226 +			}
   1.227 +			
   1.228 +
   1.229 +		// Check for unconvertible characters.
   1.230 +		if (unicodeChar == unicodeUnmappedCharacter)
   1.231 +			{
   1.232 +			if (aIndexOfFirstByteOfFirstUnconvertibleCharacter == -1)
   1.233 +				{
   1.234 +				aIndexOfFirstByteOfFirstUnconvertibleCharacter = foreignCharStart ;
   1.235 +				}
   1.236 +			aNumberOfUnconvertibleCharacters++ ;
   1.237 +			}
   1.238 +			
   1.239 +		// Append the converted (or not!) character to the output buffer
   1.240 +		aUnicode.Append(unicodeChar);
   1.241 +		charsConverted++ ;
   1.242 +		
   1.243 +		// Check for end of input buffer or output buffer full
   1.244 +		if ((charsConverted >= unicodeBufferLength) || (foreignBytePointer >= foreignDataLength))
   1.245 +			finished = ETrue ;
   1.246 +		}
   1.247 +	
   1.248 +	// Evaluate success of the operation and either return error code (currently just 
   1.249 +	// invalid input) or return number of un-processed characters in input buffer in
   1.250 +	// case of output buffer being filled before input fully consumed (0 means all
   1.251 +	// characters consumed)
   1.252 +	TInt returnValue ;
   1.253 +	if (foreignDataLength && !charsConverted)
   1.254 +		{
   1.255 +		// Input must contain at least one complete character to be considered valid Shift-JIS.
   1.256 +		returnValue = CCnvCharacterSetConverter::EErrorIllFormedInput ;
   1.257 +		}
   1.258 +	else
   1.259 +		{
   1.260 +		returnValue = foreignDataLength - foreignBytePointer;
   1.261 +		}
   1.262 +	return returnValue ;
   1.263 +	}
   1.264 +	
   1.265 +
   1.266 +
   1.267 +	
   1.268 +/**
   1.269 +This function actually does the work of converting converting unicode input to Shift-JIS  output.
   1.270 +
   1.271 +@param const TDesC8& aReplacementForUnconvertibleUnicodeCharacters byte sequence to be be used as output for unicode characters which have no mapping defined.
   1.272 +@param TDes16& aUnicode The input buffer
   1.273 +@param const TDesC8& aForeign The output buffer
   1.274 +@return The number of unicode characters in the input buffer which weren't be processed (e.g. due to output buffer overflow). 
   1.275 +@internalTechnology
   1.276 +*/
   1.277 +TInt CnvShiftJis::DoConvertFromUnicode(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
   1.278 +	                                   TDes8& aForeign, const TDesC16& aUnicode, 
   1.279 +	                                   CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
   1.280 +	{
   1.281 +	TUint unicodePointer = 0 ;
   1.282 +	TUint row ;
   1.283 +	TUint column ;
   1.284 +	TUint8 foreignCharByte ;
   1.285 +	TUint16 foreignChar ;
   1.286 +	TUint16 unicodeChar ;
   1.287 +	TBool finished = EFalse ;
   1.288 +	TInt charsConverted = 0 ;
   1.289 +	TInt unicodeLength = aUnicode.Length() ;
   1.290 +	TInt foreignMaxLength = aForeign.MaxLength() ;
   1.291 +	TUint16 foreignUnmappedCharacter = getForeignUnmappedCharacter() ;
   1.292 +	CCnvCharacterSetConverter::TDowngradeForExoticLineTerminatingCharacters downgradeForExoticLineTerminatingCharacters = CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed;
   1.293 +	TBool downgradeExoticLineTerminatingCharacters	= EFalse ;
   1.294 +	
   1.295 +	
   1.296 +	// Check for valid input and output buffers
   1.297 +	if ((unicodeLength == 0) ||  foreignMaxLength == 0)
   1.298 +		{
   1.299 +		finished = ETrue ;		
   1.300 +		}
   1.301 +	
   1.302 +	// If we've been called from an instance of CCnvCharacterSetConverter we can retrieve its state
   1.303 +	// from Thread Local Storage. This really isn't very nice but there's no other way we can get
   1.304 +	// hold of settings like downgrade for unicode line termination characters without breaking
   1.305 +	// compatibility with the existing plug-in interface!
   1.306 +	CCnvCharacterSetConverter* currentCharacterSetConverter = (CCnvCharacterSetConverter*)TTlsData::CurrentCharacterSetConverter();
   1.307 +	if (currentCharacterSetConverter)
   1.308 +		{
   1.309 +		downgradeForExoticLineTerminatingCharacters = currentCharacterSetConverter->GetDowngradeForExoticLineTerminatingCharacters() ;
   1.310 +		downgradeExoticLineTerminatingCharacters = ETrue ;
   1.311 +		}
   1.312 +		
   1.313 +	// Reset output buffer
   1.314 +	aForeign.Zero();
   1.315 +		
   1.316 +	// Process input buffer
   1.317 +	while (!finished)
   1.318 +		{
   1.319 +		
   1.320 +		// Look up foreign Char
   1.321 +		unicodeChar = aUnicode[unicodePointer] ;
   1.322 +		
   1.323 +		// Check for any downgrade of Unicode line endings characters required if we've got
   1.324 +		// a Unicode Line-Feed or Paragraph-Seperator character to deal with. 
   1.325 +		if (downgradeExoticLineTerminatingCharacters && 
   1.326 +		    ((unicodeChar==KUnicodeLineFeed) || (unicodeChar==KUnicodeParagraphSeperator)))
   1.327 +			{
   1.328 +			if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed)
   1.329 +				{
   1.330 +				if (aForeign.Length() < (foreignMaxLength - 1))
   1.331 +					{
   1.332 +					aForeign.Append(KSJISCarriageReturn) ;
   1.333 +					aForeign.Append(KSJISLineFeed) ;
   1.334 +					charsConverted++ ;
   1.335 +					}
   1.336 +					else
   1.337 +					{
   1.338 +					// Foreign buffer full!
   1.339 +					finished = ETrue;
   1.340 +					}
   1.341 +				}
   1.342 +			else if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToJustLineFeed)
   1.343 +				{
   1.344 +				if (aForeign.Length() < foreignMaxLength)
   1.345 +					{
   1.346 +					aForeign.Append(KSJISLineFeed) ;
   1.347 +					charsConverted++ ;
   1.348 +					}
   1.349 +					else
   1.350 +					{
   1.351 +					// Foreign buffer full!
   1.352 +					finished = ETrue;
   1.353 +					}
   1.354 +				}		
   1.355 +			}
   1.356 +		else
   1.357 +			{	
   1.358 +			row = unicodeChar / 256 ;
   1.359 +			column = unicodeChar % 256 ;		
   1.360 +			foreignChar = lookupForeignChar(row, column) ;		
   1.361 +
   1.362 +			// Check for untranslatable character 
   1.363 +			if ((foreignChar == foreignUnmappedCharacter) &&
   1.364 +			    (aForeign.Length() < (foreignMaxLength - 1)))
   1.365 +				{
   1.366 +				aIndicesOfUnconvertibleCharacters.AppendIndex(unicodePointer) ;
   1.367 +				aForeign.Append(aReplacementForUnconvertibleUnicodeCharacters) ;
   1.368 +				}	
   1.369 +			else if ((foreignChar <= 0xFF) && (aForeign.Length() < foreignMaxLength))
   1.370 +				{
   1.371 +				// Single byte character
   1.372 +				foreignCharByte = (TUint8) foreignChar ;
   1.373 +				aForeign.Append(foreignCharByte) ;
   1.374 +				charsConverted++ ;
   1.375 +				}
   1.376 +			else if (aForeign.Length() < (foreignMaxLength - 1))
   1.377 +				{
   1.378 +				// Two byte character
   1.379 +				foreignCharByte = (TUint8) (foreignChar >> 8 ) ;
   1.380 +				aForeign.Append(foreignCharByte) ;
   1.381 +				foreignCharByte = (TUint8) (foreignChar & 0xFF) ;
   1.382 +				aForeign.Append(foreignCharByte) ;
   1.383 +				charsConverted++ ;
   1.384 +				}
   1.385 +			else
   1.386 +				{
   1.387 +				// Foreign buffer full!
   1.388 +				finished = ETrue;
   1.389 +				}
   1.390 +			}
   1.391 +		
   1.392 +		// Check for terminating condition (input buffer consumed or output buffer full)	
   1.393 +		if (!finished && (++unicodePointer >= unicodeLength))
   1.394 +			{
   1.395 +			finished = ETrue ;
   1.396 +			}
   1.397 +		}
   1.398 +	
   1.399 +	// Return number of input characters *not* processsed (will be zero unless output
   1.400 +	// buffer has been filled before all input consumed)	
   1.401 +	return unicodeLength - unicodePointer;
   1.402 +	}
   1.403 +