1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericopenlibs/openenvcore/libc/src/charcnv.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,322 @@
1.4 +// Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of "Eclipse Public License v1.0"
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +// Name : MRT_WCHARCNVT.CPP
1.18 +// Part of : MRT LIBC
1.19 +// Contains the source for the helper functions used by wchar
1.20 +// restartable conversion API's in libc
1.21 +// Version : 1.0
1.22 +//
1.23 +
1.24 +
1.25 +
1.26 +// Copyright (c) 1997-2003 Symbian Ltd. All rights reserved.
1.27 +
1.28 +// system includes
1.29 +#include <e32std.h>
1.30 +#include <e32base.h>
1.31 +#include <utf.h>
1.32 +#include <stdlib.h>
1.33 +#include <string.h>
1.34 +#include <errno.h>
1.35 +#include <wchar.h>
1.36 +
1.37 +#include "wcharcnv.h"
1.38 +
1.39 +#define KSURROGATE_OFFSET 0x10000 - (0xD800 << 10) - 0xDC00
1.40 +
1.41 +//-----------------------------------------------------------------------------
1.42 +//Function Name : TInt ConvertToUnicodeFromUtf8(TDes16& aUnicode, const
1.43 +// TDesC8& aUtf8, mbstate_t *state)
1.44 +//Description : Converts the unicode to UTF8
1.45 +//Return Value : The number of unconverted bytes left at the end of the input
1.46 +//descriptor, or one of the error values defined in TError.
1.47 +//-----------------------------------------------------------------------------
1.48 +TInt ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, mbstate_t *state)
1.49 +{
1.50 + aUnicode.SetLength(0);
1.51 + if (aUtf8.Length()==0)
1.52 + {
1.53 + return 0;
1.54 + }
1.55 + if (aUnicode.MaxLength()==0)
1.56 + {
1.57 + return aUtf8.Length();
1.58 + }
1.59 +
1.60 + HBufC8* utf8 = NULL;
1.61 + if ( state->__count > 0)
1.62 + {
1.63 + // state have some information, use that.
1.64 + utf8 = HBufC8::NewLC ( state->__count + aUtf8.Length() );
1.65 + TPtr8 tempBuf = utf8->Des();
1.66 + TPtr8 temp ((TUint8*)state->__value.__wchb, state->__count);
1.67 + tempBuf.Copy(temp);
1.68 + tempBuf.Append(aUtf8);
1.69 + }
1.70 +
1.71 + TUint16* pointerToCurrentUnicodeCharacter=CONST_CAST(TUint16*, aUnicode.Ptr());
1.72 + const TUint16* pointerToLastUnicodeCharacter=pointerToCurrentUnicodeCharacter+(aUnicode.MaxLength()-1);
1.73 + const TUint8* pointerToCurrentUtf8Byte= utf8 ? utf8->Des().Ptr() : aUtf8.Ptr();
1.74 + const TUint8* pointerToPendingUtf8Byte=utf8 ? utf8->Des().Ptr() : aUtf8.Ptr();
1.75 + TInt length = utf8 ? utf8->Des().Length() : aUtf8.Length();
1.76 + const TUint8* pointerToLastUtf8Byte=pointerToCurrentUtf8Byte+(length-1);
1.77 + TUint16 replacementcharacter = 0xFFFD;
1.78 + TUint8 currentUtf8Byte;
1.79 + TUint currentUnicodeCharacter;
1.80 + TInt sequenceLength;
1.81 +
1.82 +
1.83 + FOREVER
1.84 + {
1.85 + currentUtf8Byte=*pointerToCurrentUtf8Byte;
1.86 + pointerToPendingUtf8Byte = pointerToCurrentUtf8Byte;
1.87 + sequenceLength=100;
1.88 +
1.89 + for(TInt i=0;i<7;i++)
1.90 + {
1.91 + if ((currentUtf8Byte&(0xf8<<i))==(static_cast<TUint8>(0xF0<<i)))
1.92 + {
1.93 + sequenceLength = 4-i;
1.94 + break;
1.95 + }
1.96 + }
1.97 +
1.98 + if ((sequenceLength<2 || sequenceLength>6) && sequenceLength!=0)
1.99 + {
1.100 + currentUnicodeCharacter=replacementcharacter;
1.101 + }
1.102 + else
1.103 + {
1.104 + if ((pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1)<sequenceLength)
1.105 + {
1.106 + // we dnt have enough UTF-8 bytes to complete the Muti-Byte character.
1.107 + // store the character within the state.
1.108 + state->__count = 0;
1.109 + while (pointerToCurrentUtf8Byte <= pointerToLastUtf8Byte)
1.110 + {
1.111 + state->__value.__wchb[state->__count++] = *(pointerToCurrentUtf8Byte++);
1.112 + }
1.113 + // reset the current pointer
1.114 + pointerToCurrentUtf8Byte -= state->__count;
1.115 + if((pointerToCurrentUnicodeCharacter-aUnicode.Ptr())==0)
1.116 + {
1.117 + // still nothing is decoded.
1.118 + if ( utf8 )
1.119 + {
1.120 + CleanupStack::PopAndDestroy(); // utf8
1.121 + }
1.122 + return -2;
1.123 + //return -1;
1.124 + }
1.125 + // something is already decoded, so return the no of bytes that use for
1.126 + // decoding.
1.127 + break;
1.128 + }
1.129 +
1.130 + // reset the state
1.131 + state->__count = 0;
1.132 + currentUnicodeCharacter = currentUtf8Byte&(0x7F>>sequenceLength);
1.133 +
1.134 + for(TInt i=sequenceLength;i>1; i--)
1.135 + {
1.136 + currentUtf8Byte = *(++pointerToCurrentUtf8Byte);
1.137 + if ((currentUtf8Byte&0xc0)==0x80)
1.138 + {
1.139 + currentUnicodeCharacter = (currentUnicodeCharacter<<6)|(currentUtf8Byte&0x3F);
1.140 + }
1.141 + else
1.142 + {
1.143 + // Encoding error occured.
1.144 + // store the contained within the state and return -1.
1.145 + // set the error EILSEQ to errno
1.146 + if ( utf8 )
1.147 + {
1.148 + CleanupStack::PopAndDestroy(); // utf8
1.149 + }
1.150 + errno = EILSEQ;
1.151 + return -1;
1.152 + //currentUnicodeCharacter=replacementcharacter;
1.153 + //--pointerToCurrentUtf8Byte;
1.154 + }
1.155 + }
1.156 + }
1.157 +
1.158 + if (currentUnicodeCharacter > 0xFFFF)
1.159 + {
1.160 + if(pointerToCurrentUnicodeCharacter>=pointerToLastUnicodeCharacter)
1.161 + {
1.162 + // unicode descriptor dnt have 2 wchar bytes to hold the data.
1.163 + pointerToCurrentUtf8Byte=pointerToPendingUtf8Byte;
1.164 + break;
1.165 + }
1.166 +
1.167 + TUint surrogate = (currentUnicodeCharacter>>10) + 0xD7C0;
1.168 + *pointerToCurrentUnicodeCharacter=static_cast<TUint16>(surrogate);
1.169 + ++pointerToCurrentUnicodeCharacter;
1.170 +
1.171 + surrogate = (currentUnicodeCharacter&0x3FF)+0xDC00;
1.172 + *pointerToCurrentUnicodeCharacter=static_cast<TUint16>(surrogate);
1.173 + ++pointerToCurrentUnicodeCharacter;
1.174 + ++pointerToCurrentUtf8Byte;
1.175 + }
1.176 + else
1.177 + {
1.178 + *pointerToCurrentUnicodeCharacter=static_cast<TUint16>(currentUnicodeCharacter);
1.179 + ++pointerToCurrentUnicodeCharacter;
1.180 + ++pointerToCurrentUtf8Byte;
1.181 + }
1.182 +
1.183 + if ((pointerToCurrentUtf8Byte>pointerToLastUtf8Byte) || (pointerToCurrentUnicodeCharacter>pointerToLastUnicodeCharacter))
1.184 + {
1.185 + // checking the boundary condition.
1.186 + // Here either the UTF-8 or Unicode descriptor reached to the end.
1.187 + break;
1.188 + }
1.189 + } // forever
1.190 + // decoding finished.
1.191 + aUnicode.SetLength(pointerToCurrentUnicodeCharacter-aUnicode.Ptr());
1.192 + if ( utf8 )
1.193 + {
1.194 + CleanupStack::PopAndDestroy(); // utf8
1.195 + }
1.196 + //return pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1;
1.197 + // returns the number of bytes used to complete a valid multibyte character.
1.198 + return pointerToCurrentUtf8Byte - aUtf8.Ptr();
1.199 +} //end of function
1.200 +
1.201 +//-----------------------------------------------------------------------------
1.202 +//Function Name : TInt _Utf16ToUtf8(char* aDst, wchar_t aSrc, mbstate_t* ps, int aLen )
1.203 +//Description : Converts wide char in UCS2 format to UTF8 equivalent
1.204 +//Return Value : The number of bytes converted, 0 if L'\0\' was translated, -1 on
1.205 +//generic error and errno set appropriately, -2 if len is not sufficient to store aSrc wide char
1.206 +//-----------------------------------------------------------------------------
1.207 +TInt _Utf16ToUtf8(char* dst, wchar_t aSrc, mbstate_t* ps, int aLen)
1.208 +{
1.209 + int retval = 0;
1.210 + // check the state
1.211 + if(ps->__count !=_EUTF16InitialState && ps->__count != _EUTF16_21BitExtensionState)
1.212 + {
1.213 + errno = EINVAL;
1.214 + return -1;
1.215 + }
1.216 +
1.217 + //following characters are illegal
1.218 + //see http://www.unicode.org/faq/utf_bom.html#40
1.219 + if(aSrc == 0xFFFE || aSrc == 0xFFFF || (aSrc >= 0xFDD0 && aSrc <= 0xFDEF) )
1.220 + {
1.221 + errno = EILSEQ;
1.222 + return -1;
1.223 + }
1.224 +
1.225 +
1.226 + if(ps->__count == _EUTF16InitialState)
1.227 + {
1.228 +
1.229 + //following characters in addition are illegal in initial state
1.230 + //see http://www.unicode.org/faq/utf_bom.html#40
1.231 + if((aSrc >= 0xDC00 && aSrc <= 0xDFFF) )
1.232 + {
1.233 + errno = EILSEQ;
1.234 + return -1;
1.235 + }
1.236 +
1.237 +
1.238 + if ((aSrc & 0xff80)==0x0000)
1.239 + {
1.240 + if(aLen >= 1)
1.241 + {
1.242 + *dst++ = static_cast<TUint8>(aSrc);
1.243 + retval = 1;
1.244 + }
1.245 + else
1.246 + {
1.247 + return -2;
1.248 + }
1.249 +
1.250 + }
1.251 + else if ((aSrc & 0xf800)==0x0000)
1.252 + {
1.253 + if (aLen >= 2)
1.254 + {
1.255 + *dst++ = static_cast<TUint8>(0xc0|(aSrc>>6));
1.256 + *dst++ = static_cast<TUint8> (0x80|(aSrc&0x3f));
1.257 + retval = 2;
1.258 + }
1.259 + else
1.260 + {
1.261 + return -2;
1.262 + }
1.263 + }
1.264 + else if ((aSrc & 0xfc00)==0xd800)
1.265 + {
1.266 + ps->__value.lead = aSrc;
1.267 + ps->__count = _EUTF16_21BitExtensionState;
1.268 + retval = 0; //nothing written out just yet
1.269 + }
1.270 + else
1.271 + {
1.272 + if ( aLen >= 3)
1.273 + {
1.274 + *dst++ = static_cast<TUint8>(0xe0|(aSrc>>12));
1.275 + *dst++ = static_cast<TUint8>(0x80|((aSrc>>6)&0x3f));
1.276 + *dst++ = static_cast<TUint8>(0x80|(aSrc&0x3f));
1.277 + retval = 3;
1.278 + }
1.279 + else
1.280 + {
1.281 + return -2;
1.282 + }
1.283 + }
1.284 +
1.285 +
1.286 + }
1.287 + else //ps->__count == _EUCS2_21BitExtensionState)
1.288 + {
1.289 + //characters outside this range are illegal in this state
1.290 + //see http://www.unicode.org/faq/utf_bom.html#40
1.291 + if((aSrc < 0xDC00 || aSrc > 0xDFFF) )
1.292 + {
1.293 + errno = EILSEQ;
1.294 + return -1;
1.295 + }
1.296 +
1.297 + if ((aSrc & 0xfc00)!=0xdc00)
1.298 + {
1.299 + errno = EILSEQ;
1.300 + return -1;
1.301 + }
1.302 + if ( aLen >= 4)
1.303 + {
1.304 + //snippet taken from unicode faq
1.305 + //http://www.unicode.org/faq/utf_bom.html#39
1.306 +
1.307 + unsigned long codepoint = (ps->__value.lead << 10) + aSrc + KSURROGATE_OFFSET;
1.308 +
1.309 + *dst++ = static_cast<TUint8>( 0xf0|(codepoint>>18));
1.310 + *dst++ = static_cast<TUint8>(0x80|((codepoint>>12)&0x3f));
1.311 + *dst++ = static_cast<TUint8>(0x80|((codepoint>>6)&0x3f));
1.312 + *dst++ = static_cast<TUint8>(0x80|(codepoint&0x3f));
1.313 + retval = 4;
1.314 + }
1.315 + else
1.316 + {
1.317 + return -2;
1.318 + }
1.319 + ps->__count = _EUTF16InitialState;
1.320 + }
1.321 + return retval;
1.322 +
1.323 +
1.324 +}//end of function
1.325 +