Symaptic: os/ossrv/genericopenlibs/openenvcore/libc/src/charcnv.cpp@260cb5ec6c19

     1 // Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).

     2 // All rights reserved.

     3 // This component and the accompanying materials are made available

     4 // under the terms of "Eclipse Public License v1.0"

     5 // which accompanies this distribution, and is available

     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".

     7 //

     8 // Initial Contributors:

     9 // Nokia Corporation - initial contribution.

    10 //

    11 // Contributors:

    12 //

    13 // Description:

    14 // Name        : MRT_WCHARCNVT.CPP

    15 // Part of     : MRT LIBC

    16 // Contains the source for the helper functions used by wchar

    17 // restartable conversion API's in libc

    18 // Version     : 1.0

    19 //

    23 // Copyright (c) 1997-2003 Symbian Ltd.  All rights reserved.

    25 // system includes

    26 #include <e32std.h>

    27 #include <e32base.h>

    28 #include <utf.h>

    29 #include <stdlib.h>

    30 #include <string.h>

    31 #include <errno.h>

    32 #include <wchar.h>

    34 #include "wcharcnv.h"

    36 #define  KSURROGATE_OFFSET 0x10000 - (0xD800 << 10) - 0xDC00

    38 //-----------------------------------------------------------------------------

    39 //Function Name : TInt ConvertToUnicodeFromUtf8(TDes16& aUnicode, const

    40 //                           TDesC8& aUtf8, mbstate_t *state)

    41 //Description   : Converts the unicode to UTF8

    42 //Return Value  : The number of unconverted bytes left at the end of the input

    43 //descriptor, or one of the error values defined in TError.

    44 //-----------------------------------------------------------------------------

    45 TInt ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, mbstate_t *state)

    46 {

    47 	aUnicode.SetLength(0);

    48 	if (aUtf8.Length()==0)

    49 	{

    50 		return 0;

    51 	}

    52 	if (aUnicode.MaxLength()==0)

    53 	{

    54 		return aUtf8.Length();

    55 	}

    57 	HBufC8* utf8 = NULL;

    58 	if ( state->__count > 0)

    59         {

    60 	        // state have some information, use that.

    61 	        utf8 = HBufC8::NewLC ( state->__count + aUtf8.Length() );

    62 	        TPtr8 tempBuf = utf8->Des();

    63 	        TPtr8 temp ((TUint8*)state->__value.__wchb, state->__count);

    64 	        tempBuf.Copy(temp);

    65 	        tempBuf.Append(aUtf8);

    66         }

    68 	TUint16* pointerToCurrentUnicodeCharacter=CONST_CAST(TUint16*, aUnicode.Ptr());

    69 	const TUint16* pointerToLastUnicodeCharacter=pointerToCurrentUnicodeCharacter+(aUnicode.MaxLength()-1);

    70 	const TUint8* pointerToCurrentUtf8Byte= utf8 ? utf8->Des().Ptr() : aUtf8.Ptr();

    71 	const TUint8* pointerToPendingUtf8Byte=utf8 ? utf8->Des().Ptr() : aUtf8.Ptr();

    72 	TInt length = utf8 ? utf8->Des().Length() : aUtf8.Length();

    73 	const TUint8* pointerToLastUtf8Byte=pointerToCurrentUtf8Byte+(length-1);

    74 	TUint16 replacementcharacter = 0xFFFD;

    75 	TUint8 currentUtf8Byte;

    76 	TUint currentUnicodeCharacter;

    77 	TInt sequenceLength;

    80 	FOREVER

    81 	{

    82 		currentUtf8Byte=*pointerToCurrentUtf8Byte;

    83 		pointerToPendingUtf8Byte = pointerToCurrentUtf8Byte;

    84 		sequenceLength=100;

    86 		for(TInt i=0;i<7;i++)

    87 		{

    88 			if ((currentUtf8Byte&(0xf8<<i))==(static_cast<TUint8>(0xF0<<i)))

    89 			{

    90 				sequenceLength = 4-i;

    91 				break;

    92 			}

    93 		}

    95 		if ((sequenceLength<2 || sequenceLength>6) && sequenceLength!=0)

    96 		{

    97 			currentUnicodeCharacter=replacementcharacter;

    98 		}

    99 		else

   100 		{

   101 			if ((pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1)<sequenceLength)

   102 			{

   103 				// we dnt have enough UTF-8 bytes to complete the Muti-Byte character.

   104 				// store the character within the state.

   105 				state->__count = 0;

   106             			while (pointerToCurrentUtf8Byte <= pointerToLastUtf8Byte)

   107         		        {

   108             			        state->__value.__wchb[state->__count++] = *(pointerToCurrentUtf8Byte++);

   109 		                }

   110             			// reset the current pointer

   111     	        		pointerToCurrentUtf8Byte -= state->__count;

   112 				if((pointerToCurrentUnicodeCharacter-aUnicode.Ptr())==0)

   113 			        {

   114 				        // still nothing is decoded.

   115 				        if ( utf8 )

   116 			                {

   117 	        			        CleanupStack::PopAndDestroy(); // utf8

   118 			                }

   119 				        return -2;

   120 				        //return -1;

   121 				}

   122         			// something is already decoded, so return the no of bytes that use for

   123 	        		// decoding.

   124 		        	break;

   125 		        }

   127                         // reset the state

   128                     	state->__count = 0;

   129 			currentUnicodeCharacter = currentUtf8Byte&(0x7F>>sequenceLength);

   131         		for(TInt i=sequenceLength;i>1; i--)

   132 	                {

   133         			currentUtf8Byte = *(++pointerToCurrentUtf8Byte);

   134         			if ((currentUtf8Byte&0xc0)==0x80)

   135         			{

   136 	       				currentUnicodeCharacter = (currentUnicodeCharacter<<6)|(currentUtf8Byte&0x3F);

   137         			}

   138         			else

   139         			{

   140         				// Encoding error occured.

   141         				// store the contained within the state and return -1.

   142         				// set the error EILSEQ to errno

   143                 		        if ( utf8 )

   144                			        {

   145                 			        CleanupStack::PopAndDestroy(); // utf8

   146                			        }

   147         				errno = EILSEQ;

   148                				return -1;

   149         				//currentUnicodeCharacter=replacementcharacter;

   150         				//--pointerToCurrentUtf8Byte;

   151         			}

   152 	       		}

   153 	        }

   155        		if (currentUnicodeCharacter > 0xFFFF)

   156        		{

   157 	        	if(pointerToCurrentUnicodeCharacter>=pointerToLastUnicodeCharacter)

   158 	        	{

   159         			// unicode descriptor dnt have 2 wchar bytes to hold the data.

   160 	       			pointerToCurrentUtf8Byte=pointerToPendingUtf8Byte;

   161 	        		break;

   162 	        	}

   164         		TUint surrogate = (currentUnicodeCharacter>>10) + 0xD7C0;

   165 	       		*pointerToCurrentUnicodeCharacter=static_cast<TUint16>(surrogate);

   166 	        	++pointerToCurrentUnicodeCharacter;

   168 		        surrogate = (currentUnicodeCharacter&0x3FF)+0xDC00;

   169         		*pointerToCurrentUnicodeCharacter=static_cast<TUint16>(surrogate);

   170 	       		++pointerToCurrentUnicodeCharacter;

   171 	        	++pointerToCurrentUtf8Byte;

   172 	        }

   173         	else

   174 		{

   175 	        	*pointerToCurrentUnicodeCharacter=static_cast<TUint16>(currentUnicodeCharacter);

   176 		        ++pointerToCurrentUnicodeCharacter;

   177         		++pointerToCurrentUtf8Byte;

   178 		}

   180 	        if ((pointerToCurrentUtf8Byte>pointerToLastUtf8Byte) || (pointerToCurrentUnicodeCharacter>pointerToLastUnicodeCharacter))

   181 	        {

   182         		// checking the boundary condition.

   183         		// Here either the UTF-8 or Unicode descriptor reached to the end.

   184         		break;

   185         	}

   186 	} // forever

   187         // decoding finished.

   188 	aUnicode.SetLength(pointerToCurrentUnicodeCharacter-aUnicode.Ptr());

   189         if ( utf8 )

   190         {

   191                 CleanupStack::PopAndDestroy(); // utf8

   192         }

   193        	//return pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1;

   194 	// returns the number of bytes used to complete a valid multibyte character.

   195 	return pointerToCurrentUtf8Byte - aUtf8.Ptr();

   196 } //end of function

   198 //-----------------------------------------------------------------------------

   199 //Function Name : TInt _Utf16ToUtf8(char* aDst, wchar_t aSrc, mbstate_t* ps, int aLen )

   200 //Description   : Converts wide char in UCS2 format to UTF8 equivalent

   201 //Return Value  : The number of bytes converted, 0 if L'\0\' was translated, -1 on

   202 //generic error and errno set appropriately, -2 if len is not sufficient to store aSrc wide char

   203 //-----------------------------------------------------------------------------

   204 TInt _Utf16ToUtf8(char* dst, wchar_t aSrc, mbstate_t* ps, int aLen)

   205 {

   206 	int retval = 0;

   207 	// check the state

   208 	if(ps->__count !=_EUTF16InitialState && ps->__count != _EUTF16_21BitExtensionState)

   209 	{

   210 		errno = EINVAL;

   211 		return -1;

   212 	}

   214 	//following characters are illegal

   215 	//see http://www.unicode.org/faq/utf_bom.html#40

   216 	if(aSrc == 0xFFFE || aSrc == 0xFFFF || (aSrc >= 0xFDD0 && aSrc <= 0xFDEF) )

   217 	{

   218 		errno = EILSEQ;

   219 		return -1;

   220 	}

   223 	if(ps->__count == _EUTF16InitialState)

   224 	{

   226 		//following characters in addition are illegal in initial state

   227 		//see http://www.unicode.org/faq/utf_bom.html#40

   228 		if((aSrc >= 0xDC00 && aSrc <= 0xDFFF) )

   229 		{

   230 			errno = EILSEQ;

   231 			return -1;

   232 		}

   235 		if ((aSrc & 0xff80)==0x0000)

   236 		{

   237 			if(aLen >= 1)

   238 			{

   239 				*dst++ = static_cast<TUint8>(aSrc);

   240 				retval = 1;

   241 			}

   242 			else

   243 			{

   244 				return -2;

   245 			}

   247 		}

   248 		else if ((aSrc & 0xf800)==0x0000)

   249 		{

   250 			if (aLen >= 2)

   251 			{

   252 				*dst++ = static_cast<TUint8>(0xc0|(aSrc>>6));

   253 				*dst++ = static_cast<TUint8> (0x80|(aSrc&0x3f));

   254 				retval = 2;

   255 			}

   256 			else

   257 			{

   258 				return -2;

   259 			}

   260 		}

   261 		else if ((aSrc & 0xfc00)==0xd800)

   262 		{

   263 			 ps->__value.lead = aSrc;

   264 		 	 ps->__count = _EUTF16_21BitExtensionState;

   265 		 	retval = 0; //nothing written out just yet

   266 		}

   267 		else

   268 		{

   269 			if ( aLen >= 3)

   270 			{

   271 				*dst++ = static_cast<TUint8>(0xe0|(aSrc>>12));

   272 				*dst++ = static_cast<TUint8>(0x80|((aSrc>>6)&0x3f));

   273 				*dst++ = static_cast<TUint8>(0x80|(aSrc&0x3f));

   274 				retval = 3;

   275 			}

   276 			else

   277 			{

   278 				return -2;

   279 			}

   280 		}

   283 	}

   284 	else //ps->__count == _EUCS2_21BitExtensionState)

   285 	{

   286 		//characters outside this range are illegal in this state

   287 		//see http://www.unicode.org/faq/utf_bom.html#40

   288 		if((aSrc < 0xDC00 || aSrc > 0xDFFF) )

   289 		{

   290 			errno = EILSEQ;

   291 			return -1;

   292 		}

   294 		if ((aSrc & 0xfc00)!=0xdc00)

   295 		{

   296 			errno = EILSEQ;

   297 			return -1;

   298 		}

   299 		if ( aLen >= 4)

   300 		{

   301 			//snippet taken from unicode faq

   302 			//http://www.unicode.org/faq/utf_bom.html#39

   304 			unsigned long  codepoint = (ps->__value.lead << 10) + aSrc + KSURROGATE_OFFSET;

   306 			*dst++ = static_cast<TUint8>( 0xf0|(codepoint>>18));

   307 			*dst++ = static_cast<TUint8>(0x80|((codepoint>>12)&0x3f));

   308 			*dst++ = static_cast<TUint8>(0x80|((codepoint>>6)&0x3f));

   309 			*dst++ = static_cast<TUint8>(0x80|(codepoint&0x3f));

   310 			retval = 4;

   311 		}

   312 		else

   313 		{

   314 			return -2;

   315 		}

   316 		ps->__count = _EUTF16InitialState;

   317 	}

   318 	return retval;

   321 }//end of function

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--