os/textandloc/charconvfw/charconvplugins/src/shared/shiftjis_shared.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 * All rights reserved.
     4 * This component and the accompanying materials are made available
     5 * under the terms of "Eclipse Public License v1.0"
     6 * which accompanies this distribution, and is available
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 *
     9 * Initial Contributors:
    10 * Nokia Corporation - initial contribution.
    11 *
    12 * Contributors:
    13 *
    14 * Description: 
    15 *
    16 */
    17 
    18 
    19 #include <e32std.h>
    20 #include <charconv.h>
    21 #include <convutils.h>
    22 #include "jisx0201.h"
    23 #include "jisx0208.h"
    24 #include "shiftjis.h"
    25 #include <convdata.h>
    26 #include "charconv_tls.h"
    27 #include "charconv_table_utilities.h"
    28 
    29 // Lead byte ranges for 2-Byte Shift-JIS sequences
    30 const TUint KSingleByteRangeFirstBlockEnd=0x80;
    31 const TUint KSingleByteRangeSecondBlockStart=0xa0;
    32 const TUint KSingleByteRangeSecondBlockEnd=0xdf;
    33 const TUint KLeadByteMax = 0xfc;
    34 
    35 //Trail byte ranges for 2-Byte Shift-JIS sequences
    36 const TUint KTrailByteMin = 0x40 ;
    37 const TUint KTrailByteMax = 0xFC ;
    38 const TUint KTrailByteIllegal = 0x7F ;
    39 
    40 const TUint8 KSJISLineFeed = 0x0a ;	// Shift-Jis value for Line Feed
    41 const TUint8 KSJISCarriageReturn = 0x0d;	// Shift-Jis value for Carriage Return
    42 const TUint16 KUnicodeLineFeed = 0x2028; // Unicode Line Feed
    43 const TUint16 KUnicodeParagraphSeperator = 0x2029; // Unicode Paragraph seperator
    44 const TUint16 KUnicodeCarriageReturn = KSJISCarriageReturn ;
    45 
    46 
    47 /** 
    48  * Get the Shift-JIS replacement for Unicode characters which cannot be converted
    49  * 
    50  * @return const TDesC8& 8-bit descriptor containing the Shift-JIS data which will replace any untranslatable Unicode characters.
    51  * @since Internationalization_6.2
    52  * @internalTechnology
    53  */
    54 EXPORT_C const TDesC8& CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters()
    55 	{
    56 	return ReplacementForUnconvertibleUnicodeCharacters_internal();
    57 	}
    58 
    59 
    60 /**
    61  * Converts text from Unicode to Shift-JIS 
    62  * 
    63  * @since Internationalization_6.2
    64  * @internalTechnology 
    65  */
    66 EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
    67                                               const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign,
    68                                               const TDesC16& aUnicode,
    69                                               CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
    70 	{
    71 	return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, 
    72 	                            aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
    73 	}
    74 
    75 /**
    76  * Converts text from Unicode to Shift-JIS 
    77  * Note that CnvShiftJis does NOT support extensions through use of additional character sets,
    78  * instead multiple versions are built for the different (operator defined) implementations
    79  * of Shift-JIS which need to be supported and the correct one installed at ROM build time,
    80  * and this method is only supplied for purposes of interface compatibility. 
    81  * @since Internationalization_6.2
    82  * @internalTechnology
    83  */
    84 EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, 
    85                                               const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
    86                                               TDes8& aForeign, const TDesC16& aUnicode, 
    87                                               CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
    88                                               const TArray<CnvUtilities::SCharacterSet>& /* aArrayOfAdditionalCharacterSets */)
    89 	{
    90 	return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, 
    91 	                            aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
    92 	}
    93 
    94 EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
    95                                             TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters,
    96                                             TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
    97 	{
    98 	return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, 
    99                               aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
   100 	}
   101 
   102 
   103 /**
   104  * Converts text from Shift-JIS to Unicode 
   105  * 
   106  * @since Internationalization_6.2
   107  * @internalTechnology
   108  */
   109 EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, 
   110                                             TDes16& aUnicode, const TDesC8& aForeign, 
   111                                             TInt& aNumberOfUnconvertibleCharacters, 
   112                                             TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
   113                                             const TArray<CnvUtilities::SMethod>& /* aArrayOfAdditionalMethods */)
   114 	{
   115 	return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, 
   116                               aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
   117 
   118 	}
   119 	
   120 	
   121 
   122 /**
   123 This function actually does the work of converting Shift-JIS input to unicode output.
   124 
   125 @param TDes16& aUnicode The output buffer
   126 @param const TDesC8& aForeign The input buffer
   127 @param TInt& aNumberOfUnconvertibleCharacters Number of input characters which were processed but couldn't be converted. 
   128 @param TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter Locates first untranslatable character in input buffer
   129 @return CCnvCharacterSetConverter::EErrorIllFormedInput if the input was invalid, otherwise the number of bytes in the input buffer which weren't be processed (e.g. due to output buffer overflow). 
   130 @internalTechnology
   131 */
   132 	
   133 TInt CnvShiftJis::DoConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, 
   134                                      TInt& aNumberOfUnconvertibleCharacters, 
   135                                      TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
   136 	{
   137 	TUint foreignBytePointer = 0 ;
   138 	TUint row ;
   139 	TUint column ;
   140 	TUint8 foreignCharByte ;
   141 	TUint8 foreignCharStart ;
   142 	TChar unicodeChar ;
   143 	TBool finished = EFalse ;
   144 	TInt charsConverted = 0 ;
   145 	TUint16 unicodeUnmappedCharacter = getUnicodeUnmappedCharacter() ;
   146 	TInt unicodeBufferLength = aUnicode.MaxLength() ;
   147 	TInt foreignDataLength = aForeign.Length() ;
   148 		
   149 	aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1 ;
   150 	aNumberOfUnconvertibleCharacters = 0 ;
   151 
   152 	// Check for valid input and output buffers
   153 	if (( unicodeBufferLength == 0) || foreignDataLength == 0)
   154 		{
   155 		finished = ETrue ;		
   156 		}
   157 		
   158 	// Reset output buffer	
   159 	aUnicode.Zero() ;
   160 
   161 	// Perform conversion		
   162 	while (!finished)
   163 		{
   164 		foreignCharStart = foreignBytePointer ;
   165 		foreignCharByte = aForeign[foreignBytePointer++] ;
   166 
   167 		// Look for (and handle) CR/LF pairs in ShiftJis input stream.
   168 		// It is a specific requirement from Symbian KK that CR/LF pairs
   169 		// in the input stream be converted to Unicode LF characters 
   170 		if((KSJISCarriageReturn == foreignCharByte) && (foreignBytePointer < foreignDataLength))
   171 			{
   172 			// check next byte
   173 			if(KSJISLineFeed == aForeign[foreignBytePointer]) 
   174 				{
   175 				// CR code
   176 				unicodeChar = KUnicodeLineFeed ;
   177 				foreignBytePointer++ ;
   178 				}
   179 			else
   180 				{					
   181 				unicodeChar = KUnicodeCarriageReturn ;
   182 				}
   183 			}
   184 		else
   185 			{
   186 			if (((foreignCharByte > KSingleByteRangeFirstBlockEnd) && 
   187 			     (foreignCharByte < KSingleByteRangeSecondBlockStart)) ||
   188 			    ((foreignCharByte > KSingleByteRangeSecondBlockEnd) &&
   189 			     (foreignCharByte <= KLeadByteMax)))
   190 				{
   191 				if (foreignBytePointer < foreignDataLength)
   192 					{
   193 					// Potential 2 byte shiftJis character
   194 					row = foreignCharByte ;
   195 					column = aForeign[foreignBytePointer] ;
   196 					if (((column <= KTrailByteMax) && (column >= KTrailByteMin)) && column != KTrailByteIllegal)
   197 						{
   198 						foreignBytePointer++ ;	
   199 						unicodeChar = lookupUnicodeChar(row,column) ;	
   200 						}
   201 					else
   202 						{
   203 						unicodeChar = unicodeUnmappedCharacter ;
   204 						}
   205 					}
   206 				else
   207 					{
   208 					// Only got the first byte of a 2 byte character
   209 					// reset "read" pointer to beginning of character
   210 					// and bail out!
   211 					finished = ETrue ;
   212 					foreignBytePointer-- ;
   213 					continue ;
   214 					}
   215 				}
   216 			else
   217 				{
   218 				//Probably a single byte shiftJis chracter
   219 				row = 0 ;
   220 				column = foreignCharByte ;
   221 				unicodeChar = lookupUnicodeChar(row,column) ;
   222 				}
   223 			}
   224 			
   225 
   226 		// Check for unconvertible characters.
   227 		if (unicodeChar == unicodeUnmappedCharacter)
   228 			{
   229 			if (aIndexOfFirstByteOfFirstUnconvertibleCharacter == -1)
   230 				{
   231 				aIndexOfFirstByteOfFirstUnconvertibleCharacter = foreignCharStart ;
   232 				}
   233 			aNumberOfUnconvertibleCharacters++ ;
   234 			}
   235 			
   236 		// Append the converted (or not!) character to the output buffer
   237 		aUnicode.Append(unicodeChar);
   238 		charsConverted++ ;
   239 		
   240 		// Check for end of input buffer or output buffer full
   241 		if ((charsConverted >= unicodeBufferLength) || (foreignBytePointer >= foreignDataLength))
   242 			finished = ETrue ;
   243 		}
   244 	
   245 	// Evaluate success of the operation and either return error code (currently just 
   246 	// invalid input) or return number of un-processed characters in input buffer in
   247 	// case of output buffer being filled before input fully consumed (0 means all
   248 	// characters consumed)
   249 	TInt returnValue ;
   250 	if (foreignDataLength && !charsConverted)
   251 		{
   252 		// Input must contain at least one complete character to be considered valid Shift-JIS.
   253 		returnValue = CCnvCharacterSetConverter::EErrorIllFormedInput ;
   254 		}
   255 	else
   256 		{
   257 		returnValue = foreignDataLength - foreignBytePointer;
   258 		}
   259 	return returnValue ;
   260 	}
   261 	
   262 
   263 
   264 	
   265 /**
   266 This function actually does the work of converting converting unicode input to Shift-JIS  output.
   267 
   268 @param const TDesC8& aReplacementForUnconvertibleUnicodeCharacters byte sequence to be be used as output for unicode characters which have no mapping defined.
   269 @param TDes16& aUnicode The input buffer
   270 @param const TDesC8& aForeign The output buffer
   271 @return The number of unicode characters in the input buffer which weren't be processed (e.g. due to output buffer overflow). 
   272 @internalTechnology
   273 */
   274 TInt CnvShiftJis::DoConvertFromUnicode(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
   275 	                                   TDes8& aForeign, const TDesC16& aUnicode, 
   276 	                                   CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
   277 	{
   278 	TUint unicodePointer = 0 ;
   279 	TUint row ;
   280 	TUint column ;
   281 	TUint8 foreignCharByte ;
   282 	TUint16 foreignChar ;
   283 	TUint16 unicodeChar ;
   284 	TBool finished = EFalse ;
   285 	TInt charsConverted = 0 ;
   286 	TInt unicodeLength = aUnicode.Length() ;
   287 	TInt foreignMaxLength = aForeign.MaxLength() ;
   288 	TUint16 foreignUnmappedCharacter = getForeignUnmappedCharacter() ;
   289 	CCnvCharacterSetConverter::TDowngradeForExoticLineTerminatingCharacters downgradeForExoticLineTerminatingCharacters = CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed;
   290 	TBool downgradeExoticLineTerminatingCharacters	= EFalse ;
   291 	
   292 	
   293 	// Check for valid input and output buffers
   294 	if ((unicodeLength == 0) ||  foreignMaxLength == 0)
   295 		{
   296 		finished = ETrue ;		
   297 		}
   298 	
   299 	// If we've been called from an instance of CCnvCharacterSetConverter we can retrieve its state
   300 	// from Thread Local Storage. This really isn't very nice but there's no other way we can get
   301 	// hold of settings like downgrade for unicode line termination characters without breaking
   302 	// compatibility with the existing plug-in interface!
   303 	CCnvCharacterSetConverter* currentCharacterSetConverter = (CCnvCharacterSetConverter*)TTlsData::CurrentCharacterSetConverter();
   304 	if (currentCharacterSetConverter)
   305 		{
   306 		downgradeForExoticLineTerminatingCharacters = currentCharacterSetConverter->GetDowngradeForExoticLineTerminatingCharacters() ;
   307 		downgradeExoticLineTerminatingCharacters = ETrue ;
   308 		}
   309 		
   310 	// Reset output buffer
   311 	aForeign.Zero();
   312 		
   313 	// Process input buffer
   314 	while (!finished)
   315 		{
   316 		
   317 		// Look up foreign Char
   318 		unicodeChar = aUnicode[unicodePointer] ;
   319 		
   320 		// Check for any downgrade of Unicode line endings characters required if we've got
   321 		// a Unicode Line-Feed or Paragraph-Seperator character to deal with. 
   322 		if (downgradeExoticLineTerminatingCharacters && 
   323 		    ((unicodeChar==KUnicodeLineFeed) || (unicodeChar==KUnicodeParagraphSeperator)))
   324 			{
   325 			if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed)
   326 				{
   327 				if (aForeign.Length() < (foreignMaxLength - 1))
   328 					{
   329 					aForeign.Append(KSJISCarriageReturn) ;
   330 					aForeign.Append(KSJISLineFeed) ;
   331 					charsConverted++ ;
   332 					}
   333 					else
   334 					{
   335 					// Foreign buffer full!
   336 					finished = ETrue;
   337 					}
   338 				}
   339 			else if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToJustLineFeed)
   340 				{
   341 				if (aForeign.Length() < foreignMaxLength)
   342 					{
   343 					aForeign.Append(KSJISLineFeed) ;
   344 					charsConverted++ ;
   345 					}
   346 					else
   347 					{
   348 					// Foreign buffer full!
   349 					finished = ETrue;
   350 					}
   351 				}		
   352 			}
   353 		else
   354 			{	
   355 			row = unicodeChar / 256 ;
   356 			column = unicodeChar % 256 ;		
   357 			foreignChar = lookupForeignChar(row, column) ;		
   358 
   359 			// Check for untranslatable character 
   360 			if ((foreignChar == foreignUnmappedCharacter) &&
   361 			    (aForeign.Length() < (foreignMaxLength - 1)))
   362 				{
   363 				aIndicesOfUnconvertibleCharacters.AppendIndex(unicodePointer) ;
   364 				aForeign.Append(aReplacementForUnconvertibleUnicodeCharacters) ;
   365 				}	
   366 			else if ((foreignChar <= 0xFF) && (aForeign.Length() < foreignMaxLength))
   367 				{
   368 				// Single byte character
   369 				foreignCharByte = (TUint8) foreignChar ;
   370 				aForeign.Append(foreignCharByte) ;
   371 				charsConverted++ ;
   372 				}
   373 			else if (aForeign.Length() < (foreignMaxLength - 1))
   374 				{
   375 				// Two byte character
   376 				foreignCharByte = (TUint8) (foreignChar >> 8 ) ;
   377 				aForeign.Append(foreignCharByte) ;
   378 				foreignCharByte = (TUint8) (foreignChar & 0xFF) ;
   379 				aForeign.Append(foreignCharByte) ;
   380 				charsConverted++ ;
   381 				}
   382 			else
   383 				{
   384 				// Foreign buffer full!
   385 				finished = ETrue;
   386 				}
   387 			}
   388 		
   389 		// Check for terminating condition (input buffer consumed or output buffer full)	
   390 		if (!finished && (++unicodePointer >= unicodeLength))
   391 			{
   392 			finished = ETrue ;
   393 			}
   394 		}
   395 	
   396 	// Return number of input characters *not* processsed (will be zero unless output
   397 	// buffer has been filled before all input consumed)	
   398 	return unicodeLength - unicodePointer;
   399 	}
   400