First public contribution.
2 * Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
21 #include <convutils.h>
26 #include "charconv_tls.h"
27 #include "charconv_table_utilities.h"
29 // Lead byte ranges for 2-Byte Shift-JIS sequences
30 const TUint KSingleByteRangeFirstBlockEnd=0x80;
31 const TUint KSingleByteRangeSecondBlockStart=0xa0;
32 const TUint KSingleByteRangeSecondBlockEnd=0xdf;
33 const TUint KLeadByteMax = 0xfc;
35 //Trail byte ranges for 2-Byte Shift-JIS sequences
36 const TUint KTrailByteMin = 0x40 ;
37 const TUint KTrailByteMax = 0xFC ;
38 const TUint KTrailByteIllegal = 0x7F ;
40 const TUint8 KSJISLineFeed = 0x0a ; // Shift-Jis value for Line Feed
41 const TUint8 KSJISCarriageReturn = 0x0d; // Shift-Jis value for Carriage Return
42 const TUint16 KUnicodeLineFeed = 0x2028; // Unicode Line Feed
43 const TUint16 KUnicodeParagraphSeperator = 0x2029; // Unicode Paragraph seperator
44 const TUint16 KUnicodeCarriageReturn = KSJISCarriageReturn ;
48 * Get the Shift-JIS replacement for Unicode characters which cannot be converted
50 * @return const TDesC8& 8-bit descriptor containing the Shift-JIS data which will replace any untranslatable Unicode characters.
51 * @since Internationalization_6.2
54 EXPORT_C const TDesC8& CnvShiftJis::ReplacementForUnconvertibleUnicodeCharacters()
56 return ReplacementForUnconvertibleUnicodeCharacters_internal();
61 * Converts text from Unicode to Shift-JIS
63 * @since Internationalization_6.2
66 EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
67 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters, TDes8& aForeign,
68 const TDesC16& aUnicode,
69 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
71 return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters,
72 aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
76 * Converts text from Unicode to Shift-JIS
77 * Note that CnvShiftJis does NOT support extensions through use of additional character sets,
78 * instead multiple versions are built for the different (operator defined) implementations
79 * of Shift-JIS which need to be supported and the correct one installed at ROM build time,
80 * and this method is only supplied for purposes of interface compatibility.
81 * @since Internationalization_6.2
84 EXPORT_C TInt CnvShiftJis::ConvertFromUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
85 const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
86 TDes8& aForeign, const TDesC16& aUnicode,
87 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters,
88 const TArray<CnvUtilities::SCharacterSet>& /* aArrayOfAdditionalCharacterSets */)
90 return DoConvertFromUnicode(aReplacementForUnconvertibleUnicodeCharacters,
91 aForeign, aUnicode, aIndicesOfUnconvertibleCharacters) ;
94 EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
95 TDes16& aUnicode, const TDesC8& aForeign, TInt& aNumberOfUnconvertibleCharacters,
96 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
98 return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters,
99 aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
104 * Converts text from Shift-JIS to Unicode
106 * @since Internationalization_6.2
107 * @internalTechnology
109 EXPORT_C TInt CnvShiftJis::ConvertToUnicode(CCnvCharacterSetConverter::TEndianness /* aDefaultEndiannessOfForeignCharacters */,
110 TDes16& aUnicode, const TDesC8& aForeign,
111 TInt& aNumberOfUnconvertibleCharacters,
112 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter,
113 const TArray<CnvUtilities::SMethod>& /* aArrayOfAdditionalMethods */)
115 return DoConvertToUnicode(aUnicode, aForeign, aNumberOfUnconvertibleCharacters,
116 aIndexOfFirstByteOfFirstUnconvertibleCharacter) ;
123 This function actually does the work of converting Shift-JIS input to unicode output.
125 @param TDes16& aUnicode The output buffer
126 @param const TDesC8& aForeign The input buffer
127 @param TInt& aNumberOfUnconvertibleCharacters Number of input characters which were processed but couldn't be converted.
128 @param TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter Locates first untranslatable character in input buffer
129 @return CCnvCharacterSetConverter::EErrorIllFormedInput if the input was invalid, otherwise the number of bytes in the input buffer which weren't be processed (e.g. due to output buffer overflow).
133 TInt CnvShiftJis::DoConvertToUnicode(TDes16& aUnicode, const TDesC8& aForeign,
134 TInt& aNumberOfUnconvertibleCharacters,
135 TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
137 TUint foreignBytePointer = 0 ;
140 TUint8 foreignCharByte ;
141 TUint8 foreignCharStart ;
143 TBool finished = EFalse ;
144 TInt charsConverted = 0 ;
145 TUint16 unicodeUnmappedCharacter = getUnicodeUnmappedCharacter() ;
146 TInt unicodeBufferLength = aUnicode.MaxLength() ;
147 TInt foreignDataLength = aForeign.Length() ;
149 aIndexOfFirstByteOfFirstUnconvertibleCharacter = -1 ;
150 aNumberOfUnconvertibleCharacters = 0 ;
152 // Check for valid input and output buffers
153 if (( unicodeBufferLength == 0) || foreignDataLength == 0)
158 // Reset output buffer
161 // Perform conversion
164 foreignCharStart = foreignBytePointer ;
165 foreignCharByte = aForeign[foreignBytePointer++] ;
167 // Look for (and handle) CR/LF pairs in ShiftJis input stream.
168 // It is a specific requirement from Symbian KK that CR/LF pairs
169 // in the input stream be converted to Unicode LF characters
170 if((KSJISCarriageReturn == foreignCharByte) && (foreignBytePointer < foreignDataLength))
173 if(KSJISLineFeed == aForeign[foreignBytePointer])
176 unicodeChar = KUnicodeLineFeed ;
177 foreignBytePointer++ ;
181 unicodeChar = KUnicodeCarriageReturn ;
186 if (((foreignCharByte > KSingleByteRangeFirstBlockEnd) &&
187 (foreignCharByte < KSingleByteRangeSecondBlockStart)) ||
188 ((foreignCharByte > KSingleByteRangeSecondBlockEnd) &&
189 (foreignCharByte <= KLeadByteMax)))
191 if (foreignBytePointer < foreignDataLength)
193 // Potential 2 byte shiftJis character
194 row = foreignCharByte ;
195 column = aForeign[foreignBytePointer] ;
196 if (((column <= KTrailByteMax) && (column >= KTrailByteMin)) && column != KTrailByteIllegal)
198 foreignBytePointer++ ;
199 unicodeChar = lookupUnicodeChar(row,column) ;
203 unicodeChar = unicodeUnmappedCharacter ;
208 // Only got the first byte of a 2 byte character
209 // reset "read" pointer to beginning of character
212 foreignBytePointer-- ;
218 //Probably a single byte shiftJis chracter
220 column = foreignCharByte ;
221 unicodeChar = lookupUnicodeChar(row,column) ;
226 // Check for unconvertible characters.
227 if (unicodeChar == unicodeUnmappedCharacter)
229 if (aIndexOfFirstByteOfFirstUnconvertibleCharacter == -1)
231 aIndexOfFirstByteOfFirstUnconvertibleCharacter = foreignCharStart ;
233 aNumberOfUnconvertibleCharacters++ ;
236 // Append the converted (or not!) character to the output buffer
237 aUnicode.Append(unicodeChar);
240 // Check for end of input buffer or output buffer full
241 if ((charsConverted >= unicodeBufferLength) || (foreignBytePointer >= foreignDataLength))
245 // Evaluate success of the operation and either return error code (currently just
246 // invalid input) or return number of un-processed characters in input buffer in
247 // case of output buffer being filled before input fully consumed (0 means all
248 // characters consumed)
250 if (foreignDataLength && !charsConverted)
252 // Input must contain at least one complete character to be considered valid Shift-JIS.
253 returnValue = CCnvCharacterSetConverter::EErrorIllFormedInput ;
257 returnValue = foreignDataLength - foreignBytePointer;
266 This function actually does the work of converting converting unicode input to Shift-JIS output.
268 @param const TDesC8& aReplacementForUnconvertibleUnicodeCharacters byte sequence to be be used as output for unicode characters which have no mapping defined.
269 @param TDes16& aUnicode The input buffer
270 @param const TDesC8& aForeign The output buffer
271 @return The number of unicode characters in the input buffer which weren't be processed (e.g. due to output buffer overflow).
274 TInt CnvShiftJis::DoConvertFromUnicode(const TDesC8& aReplacementForUnconvertibleUnicodeCharacters,
275 TDes8& aForeign, const TDesC16& aUnicode,
276 CCnvCharacterSetConverter::TArrayOfAscendingIndices& aIndicesOfUnconvertibleCharacters)
278 TUint unicodePointer = 0 ;
281 TUint8 foreignCharByte ;
282 TUint16 foreignChar ;
283 TUint16 unicodeChar ;
284 TBool finished = EFalse ;
285 TInt charsConverted = 0 ;
286 TInt unicodeLength = aUnicode.Length() ;
287 TInt foreignMaxLength = aForeign.MaxLength() ;
288 TUint16 foreignUnmappedCharacter = getForeignUnmappedCharacter() ;
289 CCnvCharacterSetConverter::TDowngradeForExoticLineTerminatingCharacters downgradeForExoticLineTerminatingCharacters = CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed;
290 TBool downgradeExoticLineTerminatingCharacters = EFalse ;
293 // Check for valid input and output buffers
294 if ((unicodeLength == 0) || foreignMaxLength == 0)
299 // If we've been called from an instance of CCnvCharacterSetConverter we can retrieve its state
300 // from Thread Local Storage. This really isn't very nice but there's no other way we can get
301 // hold of settings like downgrade for unicode line termination characters without breaking
302 // compatibility with the existing plug-in interface!
303 CCnvCharacterSetConverter* currentCharacterSetConverter = (CCnvCharacterSetConverter*)TTlsData::CurrentCharacterSetConverter();
304 if (currentCharacterSetConverter)
306 downgradeForExoticLineTerminatingCharacters = currentCharacterSetConverter->GetDowngradeForExoticLineTerminatingCharacters() ;
307 downgradeExoticLineTerminatingCharacters = ETrue ;
310 // Reset output buffer
313 // Process input buffer
317 // Look up foreign Char
318 unicodeChar = aUnicode[unicodePointer] ;
320 // Check for any downgrade of Unicode line endings characters required if we've got
321 // a Unicode Line-Feed or Paragraph-Seperator character to deal with.
322 if (downgradeExoticLineTerminatingCharacters &&
323 ((unicodeChar==KUnicodeLineFeed) || (unicodeChar==KUnicodeParagraphSeperator)))
325 if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToCarriageReturnLineFeed)
327 if (aForeign.Length() < (foreignMaxLength - 1))
329 aForeign.Append(KSJISCarriageReturn) ;
330 aForeign.Append(KSJISLineFeed) ;
335 // Foreign buffer full!
339 else if (downgradeForExoticLineTerminatingCharacters == CCnvCharacterSetConverter::EDowngradeExoticLineTerminatingCharactersToJustLineFeed)
341 if (aForeign.Length() < foreignMaxLength)
343 aForeign.Append(KSJISLineFeed) ;
348 // Foreign buffer full!
355 row = unicodeChar / 256 ;
356 column = unicodeChar % 256 ;
357 foreignChar = lookupForeignChar(row, column) ;
359 // Check for untranslatable character
360 if ((foreignChar == foreignUnmappedCharacter) &&
361 (aForeign.Length() < (foreignMaxLength - 1)))
363 aIndicesOfUnconvertibleCharacters.AppendIndex(unicodePointer) ;
364 aForeign.Append(aReplacementForUnconvertibleUnicodeCharacters) ;
366 else if ((foreignChar <= 0xFF) && (aForeign.Length() < foreignMaxLength))
368 // Single byte character
369 foreignCharByte = (TUint8) foreignChar ;
370 aForeign.Append(foreignCharByte) ;
373 else if (aForeign.Length() < (foreignMaxLength - 1))
375 // Two byte character
376 foreignCharByte = (TUint8) (foreignChar >> 8 ) ;
377 aForeign.Append(foreignCharByte) ;
378 foreignCharByte = (TUint8) (foreignChar & 0xFF) ;
379 aForeign.Append(foreignCharByte) ;
384 // Foreign buffer full!
389 // Check for terminating condition (input buffer consumed or output buffer full)
390 if (!finished && (++unicodePointer >= unicodeLength))
396 // Return number of input characters *not* processsed (will be zero unless output
397 // buffer has been filled before all input consumed)
398 return unicodeLength - unicodePointer;