Update contrib.
1 // Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
2 // All rights reserved.
3 // This component and the accompanying materials are made available
4 // under the terms of "Eclipse Public License v1.0"
5 // which accompanies this distribution, and is available
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
8 // Initial Contributors:
9 // Nokia Corporation - initial contribution.
14 // Name : MRT_WCHARCNVT.CPP
16 // Contains the source for the helper functions used by wchar
17 // restartable conversion API's in libc
23 // Copyright (c) 1997-2003 Symbian Ltd. All rights reserved.
36 #define KSURROGATE_OFFSET 0x10000 - (0xD800 << 10) - 0xDC00
38 //-----------------------------------------------------------------------------
39 //Function Name : TInt ConvertToUnicodeFromUtf8(TDes16& aUnicode, const
40 // TDesC8& aUtf8, mbstate_t *state)
41 //Description : Converts the unicode to UTF8
42 //Return Value : The number of unconverted bytes left at the end of the input
43 //descriptor, or one of the error values defined in TError.
44 //-----------------------------------------------------------------------------
45 TInt ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, mbstate_t *state)
47 aUnicode.SetLength(0);
48 if (aUtf8.Length()==0)
52 if (aUnicode.MaxLength()==0)
54 return aUtf8.Length();
58 if ( state->__count > 0)
60 // state have some information, use that.
61 utf8 = HBufC8::NewLC ( state->__count + aUtf8.Length() );
62 TPtr8 tempBuf = utf8->Des();
63 TPtr8 temp ((TUint8*)state->__value.__wchb, state->__count);
65 tempBuf.Append(aUtf8);
68 TUint16* pointerToCurrentUnicodeCharacter=CONST_CAST(TUint16*, aUnicode.Ptr());
69 const TUint16* pointerToLastUnicodeCharacter=pointerToCurrentUnicodeCharacter+(aUnicode.MaxLength()-1);
70 const TUint8* pointerToCurrentUtf8Byte= utf8 ? utf8->Des().Ptr() : aUtf8.Ptr();
71 const TUint8* pointerToPendingUtf8Byte=utf8 ? utf8->Des().Ptr() : aUtf8.Ptr();
72 TInt length = utf8 ? utf8->Des().Length() : aUtf8.Length();
73 const TUint8* pointerToLastUtf8Byte=pointerToCurrentUtf8Byte+(length-1);
74 TUint16 replacementcharacter = 0xFFFD;
75 TUint8 currentUtf8Byte;
76 TUint currentUnicodeCharacter;
82 currentUtf8Byte=*pointerToCurrentUtf8Byte;
83 pointerToPendingUtf8Byte = pointerToCurrentUtf8Byte;
88 if ((currentUtf8Byte&(0xf8<<i))==(static_cast<TUint8>(0xF0<<i)))
95 if ((sequenceLength<2 || sequenceLength>6) && sequenceLength!=0)
97 currentUnicodeCharacter=replacementcharacter;
101 if ((pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1)<sequenceLength)
103 // we dnt have enough UTF-8 bytes to complete the Muti-Byte character.
104 // store the character within the state.
106 while (pointerToCurrentUtf8Byte <= pointerToLastUtf8Byte)
108 state->__value.__wchb[state->__count++] = *(pointerToCurrentUtf8Byte++);
110 // reset the current pointer
111 pointerToCurrentUtf8Byte -= state->__count;
112 if((pointerToCurrentUnicodeCharacter-aUnicode.Ptr())==0)
114 // still nothing is decoded.
117 CleanupStack::PopAndDestroy(); // utf8
122 // something is already decoded, so return the no of bytes that use for
129 currentUnicodeCharacter = currentUtf8Byte&(0x7F>>sequenceLength);
131 for(TInt i=sequenceLength;i>1; i--)
133 currentUtf8Byte = *(++pointerToCurrentUtf8Byte);
134 if ((currentUtf8Byte&0xc0)==0x80)
136 currentUnicodeCharacter = (currentUnicodeCharacter<<6)|(currentUtf8Byte&0x3F);
140 // Encoding error occured.
141 // store the contained within the state and return -1.
142 // set the error EILSEQ to errno
145 CleanupStack::PopAndDestroy(); // utf8
149 //currentUnicodeCharacter=replacementcharacter;
150 //--pointerToCurrentUtf8Byte;
155 if (currentUnicodeCharacter > 0xFFFF)
157 if(pointerToCurrentUnicodeCharacter>=pointerToLastUnicodeCharacter)
159 // unicode descriptor dnt have 2 wchar bytes to hold the data.
160 pointerToCurrentUtf8Byte=pointerToPendingUtf8Byte;
164 TUint surrogate = (currentUnicodeCharacter>>10) + 0xD7C0;
165 *pointerToCurrentUnicodeCharacter=static_cast<TUint16>(surrogate);
166 ++pointerToCurrentUnicodeCharacter;
168 surrogate = (currentUnicodeCharacter&0x3FF)+0xDC00;
169 *pointerToCurrentUnicodeCharacter=static_cast<TUint16>(surrogate);
170 ++pointerToCurrentUnicodeCharacter;
171 ++pointerToCurrentUtf8Byte;
175 *pointerToCurrentUnicodeCharacter=static_cast<TUint16>(currentUnicodeCharacter);
176 ++pointerToCurrentUnicodeCharacter;
177 ++pointerToCurrentUtf8Byte;
180 if ((pointerToCurrentUtf8Byte>pointerToLastUtf8Byte) || (pointerToCurrentUnicodeCharacter>pointerToLastUnicodeCharacter))
182 // checking the boundary condition.
183 // Here either the UTF-8 or Unicode descriptor reached to the end.
187 // decoding finished.
188 aUnicode.SetLength(pointerToCurrentUnicodeCharacter-aUnicode.Ptr());
191 CleanupStack::PopAndDestroy(); // utf8
193 //return pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1;
194 // returns the number of bytes used to complete a valid multibyte character.
195 return pointerToCurrentUtf8Byte - aUtf8.Ptr();
198 //-----------------------------------------------------------------------------
199 //Function Name : TInt _Utf16ToUtf8(char* aDst, wchar_t aSrc, mbstate_t* ps, int aLen )
200 //Description : Converts wide char in UCS2 format to UTF8 equivalent
201 //Return Value : The number of bytes converted, 0 if L'\0\' was translated, -1 on
202 //generic error and errno set appropriately, -2 if len is not sufficient to store aSrc wide char
203 //-----------------------------------------------------------------------------
204 TInt _Utf16ToUtf8(char* dst, wchar_t aSrc, mbstate_t* ps, int aLen)
208 if(ps->__count !=_EUTF16InitialState && ps->__count != _EUTF16_21BitExtensionState)
214 //following characters are illegal
215 //see http://www.unicode.org/faq/utf_bom.html#40
216 if(aSrc == 0xFFFE || aSrc == 0xFFFF || (aSrc >= 0xFDD0 && aSrc <= 0xFDEF) )
223 if(ps->__count == _EUTF16InitialState)
226 //following characters in addition are illegal in initial state
227 //see http://www.unicode.org/faq/utf_bom.html#40
228 if((aSrc >= 0xDC00 && aSrc <= 0xDFFF) )
235 if ((aSrc & 0xff80)==0x0000)
239 *dst++ = static_cast<TUint8>(aSrc);
248 else if ((aSrc & 0xf800)==0x0000)
252 *dst++ = static_cast<TUint8>(0xc0|(aSrc>>6));
253 *dst++ = static_cast<TUint8> (0x80|(aSrc&0x3f));
261 else if ((aSrc & 0xfc00)==0xd800)
263 ps->__value.lead = aSrc;
264 ps->__count = _EUTF16_21BitExtensionState;
265 retval = 0; //nothing written out just yet
271 *dst++ = static_cast<TUint8>(0xe0|(aSrc>>12));
272 *dst++ = static_cast<TUint8>(0x80|((aSrc>>6)&0x3f));
273 *dst++ = static_cast<TUint8>(0x80|(aSrc&0x3f));
284 else //ps->__count == _EUCS2_21BitExtensionState)
286 //characters outside this range are illegal in this state
287 //see http://www.unicode.org/faq/utf_bom.html#40
288 if((aSrc < 0xDC00 || aSrc > 0xDFFF) )
294 if ((aSrc & 0xfc00)!=0xdc00)
301 //snippet taken from unicode faq
302 //http://www.unicode.org/faq/utf_bom.html#39
304 unsigned long codepoint = (ps->__value.lead << 10) + aSrc + KSURROGATE_OFFSET;
306 *dst++ = static_cast<TUint8>( 0xf0|(codepoint>>18));
307 *dst++ = static_cast<TUint8>(0x80|((codepoint>>12)&0x3f));
308 *dst++ = static_cast<TUint8>(0x80|((codepoint>>6)&0x3f));
309 *dst++ = static_cast<TUint8>(0x80|(codepoint&0x3f));
316 ps->__count = _EUTF16InitialState;