sl@0: /* sl@0: * Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include sl@0: #include sl@0: #include sl@0: #include "jisx0201.h" sl@0: #include "jisx0208.h" sl@0: #include "shiftjis.h" sl@0: #include sl@0: #include "charconv_tls.h" sl@0: #include "charconv_table_utilities.h" sl@0: sl@0: // Lead byte ranges for 2-Byte Shift-JIS sequences sl@0: const TUint KSingleByteRangeFirstBlockEnd=0x80; sl@0: const TUint KSingleByteRangeSecondBlockStart=0xa0; sl@0: const TUint KSingleByteRangeSecondBlockEnd=0xdf; sl@0: const TUint KLeadByteMax = 0xfc; sl@0: sl@0: //Trail byte ranges for 2-Byte Shift-JIS sequences sl@0: const TUint KTrailByteMin = 0x40 ; sl@0: const TUint KTrailByteMax = 0xFC ; sl@0: const TUint KTrailByteIllegal = 0x7F ; sl@0: sl@0: const TUint8 KSJISLineFeed = 0x0a ; // Shift-Jis value for Line Feed sl@0: const TUint8 KSJISCarriageReturn = 0x0d; // Shift-Jis value for Carriage Return sl@0: const TUint16 KUnicodeLineFeed = 0x2028; // Unicode Line Feed sl@0: const TUint16 KUnicodeParagraphSeperator = 0x2029; // Unicode Paragraph seperator sl@0: const TUint16 KUnicodeCarriageReturn = KSJISCarriageReturn ; sl@0: sl@0: sl@0: /** sl@0: * Get the Shift-JIS replacement for Unicode characters which cannot be converted sl@0: * sl@0: * @return const TDesC8& 8-bit descriptor containing the Shift-JIS data which will replace any untranslatable Unicode characters. sl@0: * @since Internationalization_6.2 sl@0: * @internalTechnology sl@0: */ sl@0: EXPORT_C const TDesC8& CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters() sl@0: { sl@0: return ReplacementForUnconvertibleUnicodeCharacters_internal(); sl@0: } sl@0: sl@0: sl@0: /** sl@0: * Converts text from Unicode to Shift-JIS sl@0: * sl@0: * @since Internationalization_6.2 sl@0: * @internalTechnology sl@0: */ sl@0: EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign, sl@0: const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) sl@0: { sl@0: return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, sl@0: aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ; sl@0: } sl@0: sl@0: /** sl@0: * Converts text from Unicode to Shift-JIS sl@0: * Note that CnvShiftJis does NOT support extensions through use of additional character sets, sl@0: * instead multiple versions are built for the different (operator defined) implementations sl@0: * of Shift-JIS which need to be supported and the correct one installed at ROM build time, sl@0: * and this method is only supplied for purposes of interface compatibility. sl@0: * @since Internationalization_6.2 sl@0: * @internalTechnology sl@0: */ sl@0: EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, sl@0: const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters, sl@0: const TArray& /* aArrayOfAdditionalCharacterSets */) sl@0: { sl@0: return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters, sl@0: aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ; sl@0: } sl@0: sl@0: EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, sl@0: TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) sl@0: { sl@0: return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter) ; sl@0: } sl@0: sl@0: sl@0: /** sl@0: * Converts text from Shift-JIS to Unicode sl@0: * sl@0: * @since Internationalization_6.2 sl@0: * @internalTechnology sl@0: */ sl@0: EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */, sl@0: TDes16& aUnicode, const TDesC8& aForeign, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, sl@0: const TArray& /* aArrayOfAdditionalMethods */) sl@0: { sl@0: return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters, sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter) ; sl@0: sl@0: } sl@0: sl@0: sl@0: sl@0: /** sl@0: This function actually does the work of converting Shift-JIS input to unicode output. sl@0: sl@0: @param TDes16& aUnicode The output buffer sl@0: @param const TDesC8& aForeign The input buffer sl@0: @param TInt& aNumberOfUnconvertibleCharacters Number of input characters which were processed but couldn't be converted. sl@0: @param TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter Locates first untranslatable character in input buffer sl@0: @return CCnvCharacterSetConverter::EErrorIllFormedInput if the input was invalid, otherwise the number of bytes in the input buffer which weren't be processed (e.g. due to output buffer overflow). sl@0: @internalTechnology sl@0: */ sl@0: sl@0: TInt CnvShiftJis::DoConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign, sl@0: TInt& aNumberOfUnconvertibleCharacters, sl@0: TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter) sl@0: { sl@0: TUint foreignBytePointer = 0 ; sl@0: TUint row ; sl@0: TUint column ; sl@0: TUint8 foreignCharByte ; sl@0: TUint8 foreignCharStart ; sl@0: TChar unicodeChar ; sl@0: TBool finished = EFalse ; sl@0: TInt charsConverted = 0 ; sl@0: TUint16 unicodeUnmappedCharacter = getUnicodeUnmappedCharacter() ; sl@0: TInt unicodeBufferLength = aUnicode.MaxLength() ; sl@0: TInt foreignDataLength = aForeign.Length() ; sl@0: sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1 ; sl@0: aNumberOfUnconvertibleCharacters = 0 ; sl@0: sl@0: // Check for valid input and output buffers sl@0: if (( unicodeBufferLength == 0) || foreignDataLength == 0) sl@0: { sl@0: finished = ETrue ; sl@0: } sl@0: sl@0: // Reset output buffer sl@0: aUnicode.Zero() ; sl@0: sl@0: // Perform conversion sl@0: while (!finished) sl@0: { sl@0: foreignCharStart = foreignBytePointer ; sl@0: foreignCharByte = aForeign[foreignBytePointer++] ; sl@0: sl@0: // Look for (and handle) CR/LF pairs in ShiftJis input stream. sl@0: // It is a specific requirement from Symbian KK that CR/LF pairs sl@0: // in the input stream be converted to Unicode LF characters sl@0: if((KSJISCarriageReturn == foreignCharByte) && (foreignBytePointer < foreignDataLength)) sl@0: { sl@0: // check next byte sl@0: if(KSJISLineFeed == aForeign[foreignBytePointer]) sl@0: { sl@0: // CR code sl@0: unicodeChar = KUnicodeLineFeed ; sl@0: foreignBytePointer++ ; sl@0: } sl@0: else sl@0: { sl@0: unicodeChar = KUnicodeCarriageReturn ; sl@0: } sl@0: } sl@0: else sl@0: { sl@0: if (((foreignCharByte > KSingleByteRangeFirstBlockEnd) && sl@0: (foreignCharByte < KSingleByteRangeSecondBlockStart)) || sl@0: ((foreignCharByte > KSingleByteRangeSecondBlockEnd) && sl@0: (foreignCharByte <= KLeadByteMax))) sl@0: { sl@0: if (foreignBytePointer < foreignDataLength) sl@0: { sl@0: // Potential 2 byte shiftJis character sl@0: row = foreignCharByte ; sl@0: column = aForeign[foreignBytePointer] ; sl@0: if (((column <= KTrailByteMax) && (column >= KTrailByteMin)) && column != KTrailByteIllegal) sl@0: { sl@0: foreignBytePointer++ ; sl@0: unicodeChar = lookupUnicodeChar(row,column) ; sl@0: } sl@0: else sl@0: { sl@0: unicodeChar = unicodeUnmappedCharacter ; sl@0: } sl@0: } sl@0: else sl@0: { sl@0: // Only got the first byte of a 2 byte character sl@0: // reset "read" pointer to beginning of character sl@0: // and bail out! sl@0: finished = ETrue ; sl@0: foreignBytePointer-- ; sl@0: continue ; sl@0: } sl@0: } sl@0: else sl@0: { sl@0: //Probably a single byte shiftJis chracter sl@0: row = 0 ; sl@0: column = foreignCharByte ; sl@0: unicodeChar = lookupUnicodeChar(row,column) ; sl@0: } sl@0: } sl@0: sl@0: sl@0: // Check for unconvertible characters. sl@0: if (unicodeChar == unicodeUnmappedCharacter) sl@0: { sl@0: if (aIndexOfFirstByteOfFirstUnconvertibleCharacter == -1) sl@0: { sl@0: aIndexOfFirstByteOfFirstUnconvertibleCharacter = foreignCharStart ; sl@0: } sl@0: aNumberOfUnconvertibleCharacters++ ; sl@0: } sl@0: sl@0: // Append the converted (or not!) character to the output buffer sl@0: aUnicode.Append(unicodeChar); sl@0: charsConverted++ ; sl@0: sl@0: // Check for end of input buffer or output buffer full sl@0: if ((charsConverted >= unicodeBufferLength) || (foreignBytePointer >= foreignDataLength)) sl@0: finished = ETrue ; sl@0: } sl@0: sl@0: // Evaluate success of the operation and either return error code (currently just sl@0: // invalid input) or return number of un-processed characters in input buffer in sl@0: // case of output buffer being filled before input fully consumed (0 means all sl@0: // characters consumed) sl@0: TInt returnValue ; sl@0: if (foreignDataLength && !charsConverted) sl@0: { sl@0: // Input must contain at least one complete character to be considered valid Shift-JIS. sl@0: returnValue = CCnvCharacterSetConverter::EErrorIllFormedInput ; sl@0: } sl@0: else sl@0: { sl@0: returnValue = foreignDataLength - foreignBytePointer; sl@0: } sl@0: return returnValue ; sl@0: } sl@0: sl@0: sl@0: sl@0: sl@0: /** sl@0: This function actually does the work of converting converting unicode input to Shift-JIS output. sl@0: sl@0: @param const TDesC8& aReplacementForUnconvertibleUnicodeCharacters byte sequence to be be used as output for unicode characters which have no mapping defined. sl@0: @param TDes16& aUnicode The input buffer sl@0: @param const TDesC8& aForeign The output buffer sl@0: @return The number of unicode characters in the input buffer which weren't be processed (e.g. due to output buffer overflow). sl@0: @internalTechnology sl@0: */ sl@0: TInt CnvShiftJis::DoConvertFromUnicode(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, sl@0: TDes8& aForeign, const TDesC16& aUnicode, sl@0: CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters) sl@0: { sl@0: TUint unicodePointer = 0 ; sl@0: TUint row ; sl@0: TUint column ; sl@0: TUint8 foreignCharByte ; sl@0: TUint16 foreignChar ; sl@0: TUint16 unicodeChar ; sl@0: TBool finished = EFalse ; sl@0: TInt charsConverted = 0 ; sl@0: TInt unicodeLength = aUnicode.Length() ; sl@0: TInt foreignMaxLength = aForeign.MaxLength() ; sl@0: TUint16 foreignUnmappedCharacter = getForeignUnmappedCharacter() ; sl@0: CCnvCharacterSetConverter::TDowngradeForExoticLineTerminatingCharacters downgradeForExoticLineTerminatingCharacters = CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed; sl@0: TBool downgradeExoticLineTerminatingCharacters = EFalse ; sl@0: sl@0: sl@0: // Check for valid input and output buffers sl@0: if ((unicodeLength == 0) || foreignMaxLength == 0) sl@0: { sl@0: finished = ETrue ; sl@0: } sl@0: sl@0: // If we've been called from an instance of CCnvCharacterSetConverter we can retrieve its state sl@0: // from Thread Local Storage. This really isn't very nice but there's no other way we can get sl@0: // hold of settings like downgrade for unicode line termination characters without breaking sl@0: // compatibility with the existing plug-in interface! sl@0: CCnvCharacterSetConverter* currentCharacterSetConverter = (CCnvCharacterSetConverter*)TTlsData::CurrentCharacterSetConverter(); sl@0: if (currentCharacterSetConverter) sl@0: { sl@0: downgradeForExoticLineTerminatingCharacters = currentCharacterSetConverter->GetDowngradeForExoticLineTerminatingCharacters() ; sl@0: downgradeExoticLineTerminatingCharacters = ETrue ; sl@0: } sl@0: sl@0: // Reset output buffer sl@0: aForeign.Zero(); sl@0: sl@0: // Process input buffer sl@0: while (!finished) sl@0: { sl@0: sl@0: // Look up foreign Char sl@0: unicodeChar = aUnicode[unicodePointer] ; sl@0: sl@0: // Check for any downgrade of Unicode line endings characters required if we've got sl@0: // a Unicode Line-Feed or Paragraph-Seperator character to deal with. sl@0: if (downgradeExoticLineTerminatingCharacters && sl@0: ((unicodeChar==KUnicodeLineFeed) || (unicodeChar==KUnicodeParagraphSeperator))) sl@0: { sl@0: if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed) sl@0: { sl@0: if (aForeign.Length() < (foreignMaxLength - 1)) sl@0: { sl@0: aForeign.Append(KSJISCarriageReturn) ; sl@0: aForeign.Append(KSJISLineFeed) ; sl@0: charsConverted++ ; sl@0: } sl@0: else sl@0: { sl@0: // Foreign buffer full! sl@0: finished = ETrue; sl@0: } sl@0: } sl@0: else if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToJustLineFeed) sl@0: { sl@0: if (aForeign.Length() < foreignMaxLength) sl@0: { sl@0: aForeign.Append(KSJISLineFeed) ; sl@0: charsConverted++ ; sl@0: } sl@0: else sl@0: { sl@0: // Foreign buffer full! sl@0: finished = ETrue; sl@0: } sl@0: } sl@0: } sl@0: else sl@0: { sl@0: row = unicodeChar / 256 ; sl@0: column = unicodeChar % 256 ; sl@0: foreignChar = lookupForeignChar(row, column) ; sl@0: sl@0: // Check for untranslatable character sl@0: if ((foreignChar == foreignUnmappedCharacter) && sl@0: (aForeign.Length() < (foreignMaxLength - 1))) sl@0: { sl@0: aIndicesOfUnconvertibleCharacters.AppendIndex(unicodePointer) ; sl@0: aForeign.Append(aReplacementForUnconvertibleUnicodeCharacters) ; sl@0: } sl@0: else if ((foreignChar <= 0xFF) && (aForeign.Length() < foreignMaxLength)) sl@0: { sl@0: // Single byte character sl@0: foreignCharByte = (TUint8) foreignChar ; sl@0: aForeign.Append(foreignCharByte) ; sl@0: charsConverted++ ; sl@0: } sl@0: else if (aForeign.Length() < (foreignMaxLength - 1)) sl@0: { sl@0: // Two byte character sl@0: foreignCharByte = (TUint8) (foreignChar >> 8 ) ; sl@0: aForeign.Append(foreignCharByte) ; sl@0: foreignCharByte = (TUint8) (foreignChar & 0xFF) ; sl@0: aForeign.Append(foreignCharByte) ; sl@0: charsConverted++ ; sl@0: } sl@0: else sl@0: { sl@0: // Foreign buffer full! sl@0: finished = ETrue; sl@0: } sl@0: } sl@0: sl@0: // Check for terminating condition (input buffer consumed or output buffer full) sl@0: if (!finished && (++unicodePointer >= unicodeLength)) sl@0: { sl@0: finished = ETrue ; sl@0: } sl@0: } sl@0: sl@0: // Return number of input characters *not* processsed (will be zero unless output sl@0: // buffer has been filled before all input consumed) sl@0: return unicodeLength - unicodePointer; sl@0: } sl@0: