sl@0: /*
sl@0: * Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0: * All rights reserved.
sl@0: * This component and the accompanying materials are made available
sl@0: * under the terms of "Eclipse Public License v1.0"
sl@0: * which accompanies this distribution, and is available
sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0: *
sl@0: * Initial Contributors:
sl@0: * Nokia Corporation - initial contribution.
sl@0: *
sl@0: * Contributors:
sl@0: *
sl@0: * Description: 
sl@0: *
sl@0: */
sl@0: 
sl@0: 
sl@0: #include <e32std.h>
sl@0: #include <charconv.h>
sl@0: #include <convutils.h>
sl@0: #include "jisx0201.h"
sl@0: #include "jisx0208.h"
sl@0: #include "shiftjis.h"
sl@0: #include <convdata.h>
sl@0: #include "charconv_tls.h"
sl@0: #include "charconv_table_utilities.h"
sl@0: 
sl@0: // Lead byte ranges for 2-Byte Shift-JIS sequences
sl@0: const TUint KSingleByteRangeFirstBlockEnd=0x80;
sl@0: const TUint KSingleByteRangeSecondBlockStart=0xa0;
sl@0: const TUint KSingleByteRangeSecondBlockEnd=0xdf;
sl@0: const TUint KLeadByteMax = 0xfc;
sl@0: 
sl@0: //Trail byte ranges for 2-Byte Shift-JIS sequences
sl@0: const TUint KTrailByteMin = 0x40 ;
sl@0: const TUint KTrailByteMax = 0xFC ;
sl@0: const TUint KTrailByteIllegal = 0x7F ;
sl@0: 
sl@0: const TUint8 KSJISLineFeed = 0x0a ;	// Shift-Jis value for Line Feed
sl@0: const TUint8 KSJISCarriageReturn = 0x0d;	// Shift-Jis value for Carriage Return
sl@0: const TUint16 KUnicodeLineFeed = 0x2028; // Unicode Line Feed
sl@0: const TUint16 KUnicodeParagraphSeperator = 0x2029; // Unicode Paragraph seperator
sl@0: const TUint16 KUnicodeCarriageReturn = KSJISCarriageReturn ;
sl@0: 
sl@0: 
sl@0: /** 
sl@0:  * Get the Shift-JIS replacement for Unicode characters which cannot be converted
sl@0:  * 
sl@0:  * @return const TDesC8& 8-bit descriptor containing the Shift-JIS data which will replace any untranslatable Unicode characters.
sl@0:  * @since Internationalization_6.2
sl@0:  * @internalTechnology
sl@0:  */
sl@0: EXPORT_C const TDesC8& CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters()
sl@0: 	{
sl@0: 	return ReplacementForUnconvertibleUnicodeCharacters_internal();
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /**
sl@0:  * Converts text from Unicode to Shift-JIS 
sl@0:  * 
sl@0:  * @since Internationalization_6.2
sl@0:  * @internalTechnology 
sl@0:  */
sl@0: EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
sl@0:                                               const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign,
sl@0:                                               const TDesC16& aUnicode,
sl@0:                                               CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
sl@0: 	{
sl@0: 	return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, 
sl@0: 	                            aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0:  * Converts text from Unicode to Shift-JIS 
sl@0:  * Note that CnvShiftJis does NOT support extensions through use of additional character sets,
sl@0:  * instead multiple versions are built for the different (operator defined) implementations
sl@0:  * of Shift-JIS which need to be supported and the correct one installed at ROM build time,
sl@0:  * and this method is only supplied for purposes of interface compatibility. 
sl@0:  * @since Internationalization_6.2
sl@0:  * @internalTechnology
sl@0:  */
sl@0: EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, 
sl@0:                                               const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0:                                               TDes8& aForeign, const TDesC16& aUnicode, 
sl@0:                                               CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, 
sl@0:                                               const TArray<CnvUtilities::SCharacterSet>& /* aArrayOfAdditionalCharacterSets */)
sl@0: 	{
sl@0: 	return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, 
sl@0: 	                            aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
sl@0: 	}
sl@0: 
sl@0: EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
sl@0:                                             TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters,
sl@0:                                             TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
sl@0: 	{
sl@0: 	return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, 
sl@0:                               aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /**
sl@0:  * Converts text from Shift-JIS to Unicode 
sl@0:  * 
sl@0:  * @since Internationalization_6.2
sl@0:  * @internalTechnology
sl@0:  */
sl@0: EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, 
sl@0:                                             TDes16& aUnicode, const TDesC8& aForeign, 
sl@0:                                             TInt& aNumberOfUnconvertibleCharacters, 
sl@0:                                             TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, 
sl@0:                                             const TArray<CnvUtilities::SMethod>& /* aArrayOfAdditionalMethods */)
sl@0: 	{
sl@0: 	return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, 
sl@0:                               aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
sl@0: 
sl@0: 	}
sl@0: 	
sl@0: 	
sl@0: 
sl@0: /**
sl@0: This function actually does the work of converting Shift-JIS input to unicode output.
sl@0: 
sl@0: @param TDes16& aUnicode The output buffer
sl@0: @param const TDesC8& aForeign The input buffer
sl@0: @param TInt& aNumberOfUnconvertibleCharacters Number of input characters which were processed but couldn't be converted. 
sl@0: @param TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter Locates first untranslatable character in input buffer
sl@0: @return CCnvCharacterSetConverter::EErrorIllFormedInput if the input was invalid, otherwise the number of bytes in the input buffer which weren't be processed (e.g. due to output buffer overflow). 
sl@0: @internalTechnology
sl@0: */
sl@0: 	
sl@0: TInt CnvShiftJis::DoConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, 
sl@0:                                      TInt& aNumberOfUnconvertibleCharacters, 
sl@0:                                      TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
sl@0: 	{
sl@0: 	TUint foreignBytePointer = 0 ;
sl@0: 	TUint row ;
sl@0: 	TUint column ;
sl@0: 	TUint8 foreignCharByte ;
sl@0: 	TUint8 foreignCharStart ;
sl@0: 	TChar unicodeChar ;
sl@0: 	TBool finished = EFalse ;
sl@0: 	TInt charsConverted = 0 ;
sl@0: 	TUint16 unicodeUnmappedCharacter = getUnicodeUnmappedCharacter() ;
sl@0: 	TInt unicodeBufferLength = aUnicode.MaxLength() ;
sl@0: 	TInt foreignDataLength = aForeign.Length() ;
sl@0: 		
sl@0: 	aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1 ;
sl@0: 	aNumberOfUnconvertibleCharacters = 0 ;
sl@0: 
sl@0: 	// Check for valid input and output buffers
sl@0: 	if (( unicodeBufferLength == 0) || foreignDataLength == 0)
sl@0: 		{
sl@0: 		finished = ETrue ;		
sl@0: 		}
sl@0: 		
sl@0: 	// Reset output buffer	
sl@0: 	aUnicode.Zero() ;
sl@0: 
sl@0: 	// Perform conversion		
sl@0: 	while (!finished)
sl@0: 		{
sl@0: 		foreignCharStart = foreignBytePointer ;
sl@0: 		foreignCharByte = aForeign[foreignBytePointer++] ;
sl@0: 
sl@0: 		// Look for (and handle) CR/LF pairs in ShiftJis input stream.
sl@0: 		// It is a specific requirement from Symbian KK that CR/LF pairs
sl@0: 		// in the input stream be converted to Unicode LF characters 
sl@0: 		if((KSJISCarriageReturn == foreignCharByte) && (foreignBytePointer < foreignDataLength))
sl@0: 			{
sl@0: 			// check next byte
sl@0: 			if(KSJISLineFeed == aForeign[foreignBytePointer]) 
sl@0: 				{
sl@0: 				// CR code
sl@0: 				unicodeChar = KUnicodeLineFeed ;
sl@0: 				foreignBytePointer++ ;
sl@0: 				}
sl@0: 			else
sl@0: 				{					
sl@0: 				unicodeChar = KUnicodeCarriageReturn ;
sl@0: 				}
sl@0: 			}
sl@0: 		else
sl@0: 			{
sl@0: 			if (((foreignCharByte > KSingleByteRangeFirstBlockEnd) && 
sl@0: 			     (foreignCharByte < KSingleByteRangeSecondBlockStart)) ||
sl@0: 			    ((foreignCharByte > KSingleByteRangeSecondBlockEnd) &&
sl@0: 			     (foreignCharByte <= KLeadByteMax)))
sl@0: 				{
sl@0: 				if (foreignBytePointer < foreignDataLength)
sl@0: 					{
sl@0: 					// Potential 2 byte shiftJis character
sl@0: 					row = foreignCharByte ;
sl@0: 					column = aForeign[foreignBytePointer] ;
sl@0: 					if (((column <= KTrailByteMax) && (column >= KTrailByteMin)) && column != KTrailByteIllegal)
sl@0: 						{
sl@0: 						foreignBytePointer++ ;	
sl@0: 						unicodeChar = lookupUnicodeChar(row,column) ;	
sl@0: 						}
sl@0: 					else
sl@0: 						{
sl@0: 						unicodeChar = unicodeUnmappedCharacter ;
sl@0: 						}
sl@0: 					}
sl@0: 				else
sl@0: 					{
sl@0: 					// Only got the first byte of a 2 byte character
sl@0: 					// reset "read" pointer to beginning of character
sl@0: 					// and bail out!
sl@0: 					finished = ETrue ;
sl@0: 					foreignBytePointer-- ;
sl@0: 					continue ;
sl@0: 					}
sl@0: 				}
sl@0: 			else
sl@0: 				{
sl@0: 				//Probably a single byte shiftJis chracter
sl@0: 				row = 0 ;
sl@0: 				column = foreignCharByte ;
sl@0: 				unicodeChar = lookupUnicodeChar(row,column) ;
sl@0: 				}
sl@0: 			}
sl@0: 			
sl@0: 
sl@0: 		// Check for unconvertible characters.
sl@0: 		if (unicodeChar == unicodeUnmappedCharacter)
sl@0: 			{
sl@0: 			if (aIndexOfFirstByteOfFirstUnconvertibleCharacter == -1)
sl@0: 				{
sl@0: 				aIndexOfFirstByteOfFirstUnconvertibleCharacter = foreignCharStart ;
sl@0: 				}
sl@0: 			aNumberOfUnconvertibleCharacters++ ;
sl@0: 			}
sl@0: 			
sl@0: 		// Append the converted (or not!) character to the output buffer
sl@0: 		aUnicode.Append(unicodeChar);
sl@0: 		charsConverted++ ;
sl@0: 		
sl@0: 		// Check for end of input buffer or output buffer full
sl@0: 		if ((charsConverted >= unicodeBufferLength) || (foreignBytePointer >= foreignDataLength))
sl@0: 			finished = ETrue ;
sl@0: 		}
sl@0: 	
sl@0: 	// Evaluate success of the operation and either return error code (currently just 
sl@0: 	// invalid input) or return number of un-processed characters in input buffer in
sl@0: 	// case of output buffer being filled before input fully consumed (0 means all
sl@0: 	// characters consumed)
sl@0: 	TInt returnValue ;
sl@0: 	if (foreignDataLength && !charsConverted)
sl@0: 		{
sl@0: 		// Input must contain at least one complete character to be considered valid Shift-JIS.
sl@0: 		returnValue = CCnvCharacterSetConverter::EErrorIllFormedInput ;
sl@0: 		}
sl@0: 	else
sl@0: 		{
sl@0: 		returnValue = foreignDataLength - foreignBytePointer;
sl@0: 		}
sl@0: 	return returnValue ;
sl@0: 	}
sl@0: 	
sl@0: 
sl@0: 
sl@0: 	
sl@0: /**
sl@0: This function actually does the work of converting converting unicode input to Shift-JIS  output.
sl@0: 
sl@0: @param const TDesC8& aReplacementForUnconvertibleUnicodeCharacters byte sequence to be be used as output for unicode characters which have no mapping defined.
sl@0: @param TDes16& aUnicode The input buffer
sl@0: @param const TDesC8& aForeign The output buffer
sl@0: @return The number of unicode characters in the input buffer which weren't be processed (e.g. due to output buffer overflow). 
sl@0: @internalTechnology
sl@0: */
sl@0: TInt CnvShiftJis::DoConvertFromUnicode(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, 
sl@0: 	                                   TDes8& aForeign, const TDesC16& aUnicode, 
sl@0: 	                                   CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
sl@0: 	{
sl@0: 	TUint unicodePointer = 0 ;
sl@0: 	TUint row ;
sl@0: 	TUint column ;
sl@0: 	TUint8 foreignCharByte ;
sl@0: 	TUint16 foreignChar ;
sl@0: 	TUint16 unicodeChar ;
sl@0: 	TBool finished = EFalse ;
sl@0: 	TInt charsConverted = 0 ;
sl@0: 	TInt unicodeLength = aUnicode.Length() ;
sl@0: 	TInt foreignMaxLength = aForeign.MaxLength() ;
sl@0: 	TUint16 foreignUnmappedCharacter = getForeignUnmappedCharacter() ;
sl@0: 	CCnvCharacterSetConverter::TDowngradeForExoticLineTerminatingCharacters downgradeForExoticLineTerminatingCharacters = CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed;
sl@0: 	TBool downgradeExoticLineTerminatingCharacters	= EFalse ;
sl@0: 	
sl@0: 	
sl@0: 	// Check for valid input and output buffers
sl@0: 	if ((unicodeLength == 0) ||  foreignMaxLength == 0)
sl@0: 		{
sl@0: 		finished = ETrue ;		
sl@0: 		}
sl@0: 	
sl@0: 	// If we've been called from an instance of CCnvCharacterSetConverter we can retrieve its state
sl@0: 	// from Thread Local Storage. This really isn't very nice but there's no other way we can get
sl@0: 	// hold of settings like downgrade for unicode line termination characters without breaking
sl@0: 	// compatibility with the existing plug-in interface!
sl@0: 	CCnvCharacterSetConverter* currentCharacterSetConverter = (CCnvCharacterSetConverter*)TTlsData::CurrentCharacterSetConverter();
sl@0: 	if (currentCharacterSetConverter)
sl@0: 		{
sl@0: 		downgradeForExoticLineTerminatingCharacters = currentCharacterSetConverter->GetDowngradeForExoticLineTerminatingCharacters() ;
sl@0: 		downgradeExoticLineTerminatingCharacters = ETrue ;
sl@0: 		}
sl@0: 		
sl@0: 	// Reset output buffer
sl@0: 	aForeign.Zero();
sl@0: 		
sl@0: 	// Process input buffer
sl@0: 	while (!finished)
sl@0: 		{
sl@0: 		
sl@0: 		// Look up foreign Char
sl@0: 		unicodeChar = aUnicode[unicodePointer] ;
sl@0: 		
sl@0: 		// Check for any downgrade of Unicode line endings characters required if we've got
sl@0: 		// a Unicode Line-Feed or Paragraph-Seperator character to deal with. 
sl@0: 		if (downgradeExoticLineTerminatingCharacters && 
sl@0: 		    ((unicodeChar==KUnicodeLineFeed) || (unicodeChar==KUnicodeParagraphSeperator)))
sl@0: 			{
sl@0: 			if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed)
sl@0: 				{
sl@0: 				if (aForeign.Length() < (foreignMaxLength - 1))
sl@0: 					{
sl@0: 					aForeign.Append(KSJISCarriageReturn) ;
sl@0: 					aForeign.Append(KSJISLineFeed) ;
sl@0: 					charsConverted++ ;
sl@0: 					}
sl@0: 					else
sl@0: 					{
sl@0: 					// Foreign buffer full!
sl@0: 					finished = ETrue;
sl@0: 					}
sl@0: 				}
sl@0: 			else if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToJustLineFeed)
sl@0: 				{
sl@0: 				if (aForeign.Length() < foreignMaxLength)
sl@0: 					{
sl@0: 					aForeign.Append(KSJISLineFeed) ;
sl@0: 					charsConverted++ ;
sl@0: 					}
sl@0: 					else
sl@0: 					{
sl@0: 					// Foreign buffer full!
sl@0: 					finished = ETrue;
sl@0: 					}
sl@0: 				}		
sl@0: 			}
sl@0: 		else
sl@0: 			{	
sl@0: 			row = unicodeChar / 256 ;
sl@0: 			column = unicodeChar % 256 ;		
sl@0: 			foreignChar = lookupForeignChar(row, column) ;		
sl@0: 
sl@0: 			// Check for untranslatable character 
sl@0: 			if ((foreignChar == foreignUnmappedCharacter) &&
sl@0: 			    (aForeign.Length() < (foreignMaxLength - 1)))
sl@0: 				{
sl@0: 				aIndicesOfUnconvertibleCharacters.AppendIndex(unicodePointer) ;
sl@0: 				aForeign.Append(aReplacementForUnconvertibleUnicodeCharacters) ;
sl@0: 				}	
sl@0: 			else if ((foreignChar <= 0xFF) && (aForeign.Length() < foreignMaxLength))
sl@0: 				{
sl@0: 				// Single byte character
sl@0: 				foreignCharByte = (TUint8) foreignChar ;
sl@0: 				aForeign.Append(foreignCharByte) ;
sl@0: 				charsConverted++ ;
sl@0: 				}
sl@0: 			else if (aForeign.Length() < (foreignMaxLength - 1))
sl@0: 				{
sl@0: 				// Two byte character
sl@0: 				foreignCharByte = (TUint8) (foreignChar >> 8 ) ;
sl@0: 				aForeign.Append(foreignCharByte) ;
sl@0: 				foreignCharByte = (TUint8) (foreignChar & 0xFF) ;
sl@0: 				aForeign.Append(foreignCharByte) ;
sl@0: 				charsConverted++ ;
sl@0: 				}
sl@0: 			else
sl@0: 				{
sl@0: 				// Foreign buffer full!
sl@0: 				finished = ETrue;
sl@0: 				}
sl@0: 			}
sl@0: 		
sl@0: 		// Check for terminating condition (input buffer consumed or output buffer full)	
sl@0: 		if (!finished && (++unicodePointer >= unicodeLength))
sl@0: 			{
sl@0: 			finished = ETrue ;
sl@0: 			}
sl@0: 		}
sl@0: 	
sl@0: 	// Return number of input characters *not* processsed (will be zero unless output
sl@0: 	// buffer has been filled before all input consumed)	
sl@0: 	return unicodeLength - unicodePointer;
sl@0: 	}
sl@0: