diff -r 000000000000 -r bde4ae8d615e os/ossrv/genericopenlibs/openenvcore/libc/src/charcnv.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/os/ossrv/genericopenlibs/openenvcore/libc/src/charcnv.cpp Fri Jun 15 03:10:57 2012 +0200 @@ -0,0 +1,322 @@ +// Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of "Eclipse Public License v1.0" +// which accompanies this distribution, and is available +// at the URL "http://www.eclipse.org/legal/epl-v10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// Name : MRT_WCHARCNVT.CPP +// Part of : MRT LIBC +// Contains the source for the helper functions used by wchar +// restartable conversion API's in libc +// Version : 1.0 +// + + + +// Copyright (c) 1997-2003 Symbian Ltd. All rights reserved. + +// system includes +#include +#include +#include +#include +#include +#include +#include + +#include "wcharcnv.h" + +#define KSURROGATE_OFFSET 0x10000 - (0xD800 << 10) - 0xDC00 + +//----------------------------------------------------------------------------- +//Function Name : TInt ConvertToUnicodeFromUtf8(TDes16& aUnicode, const +// TDesC8& aUtf8, mbstate_t *state) +//Description : Converts the unicode to UTF8 +//Return Value : The number of unconverted bytes left at the end of the input +//descriptor, or one of the error values defined in TError. +//----------------------------------------------------------------------------- +TInt ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, mbstate_t *state) +{ + aUnicode.SetLength(0); + if (aUtf8.Length()==0) + { + return 0; + } + if (aUnicode.MaxLength()==0) + { + return aUtf8.Length(); + } + + HBufC8* utf8 = NULL; + if ( state->__count > 0) + { + // state have some information, use that. + utf8 = HBufC8::NewLC ( state->__count + aUtf8.Length() ); + TPtr8 tempBuf = utf8->Des(); + TPtr8 temp ((TUint8*)state->__value.__wchb, state->__count); + tempBuf.Copy(temp); + tempBuf.Append(aUtf8); + } + + TUint16* pointerToCurrentUnicodeCharacter=CONST_CAST(TUint16*, aUnicode.Ptr()); + const TUint16* pointerToLastUnicodeCharacter=pointerToCurrentUnicodeCharacter+(aUnicode.MaxLength()-1); + const TUint8* pointerToCurrentUtf8Byte= utf8 ? utf8->Des().Ptr() : aUtf8.Ptr(); + const TUint8* pointerToPendingUtf8Byte=utf8 ? utf8->Des().Ptr() : aUtf8.Ptr(); + TInt length = utf8 ? utf8->Des().Length() : aUtf8.Length(); + const TUint8* pointerToLastUtf8Byte=pointerToCurrentUtf8Byte+(length-1); + TUint16 replacementcharacter = 0xFFFD; + TUint8 currentUtf8Byte; + TUint currentUnicodeCharacter; + TInt sequenceLength; + + + FOREVER + { + currentUtf8Byte=*pointerToCurrentUtf8Byte; + pointerToPendingUtf8Byte = pointerToCurrentUtf8Byte; + sequenceLength=100; + + for(TInt i=0;i<7;i++) + { + if ((currentUtf8Byte&(0xf8<(0xF0<6) && sequenceLength!=0) + { + currentUnicodeCharacter=replacementcharacter; + } + else + { + if ((pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1)__count = 0; + while (pointerToCurrentUtf8Byte <= pointerToLastUtf8Byte) + { + state->__value.__wchb[state->__count++] = *(pointerToCurrentUtf8Byte++); + } + // reset the current pointer + pointerToCurrentUtf8Byte -= state->__count; + if((pointerToCurrentUnicodeCharacter-aUnicode.Ptr())==0) + { + // still nothing is decoded. + if ( utf8 ) + { + CleanupStack::PopAndDestroy(); // utf8 + } + return -2; + //return -1; + } + // something is already decoded, so return the no of bytes that use for + // decoding. + break; + } + + // reset the state + state->__count = 0; + currentUnicodeCharacter = currentUtf8Byte&(0x7F>>sequenceLength); + + for(TInt i=sequenceLength;i>1; i--) + { + currentUtf8Byte = *(++pointerToCurrentUtf8Byte); + if ((currentUtf8Byte&0xc0)==0x80) + { + currentUnicodeCharacter = (currentUnicodeCharacter<<6)|(currentUtf8Byte&0x3F); + } + else + { + // Encoding error occured. + // store the contained within the state and return -1. + // set the error EILSEQ to errno + if ( utf8 ) + { + CleanupStack::PopAndDestroy(); // utf8 + } + errno = EILSEQ; + return -1; + //currentUnicodeCharacter=replacementcharacter; + //--pointerToCurrentUtf8Byte; + } + } + } + + if (currentUnicodeCharacter > 0xFFFF) + { + if(pointerToCurrentUnicodeCharacter>=pointerToLastUnicodeCharacter) + { + // unicode descriptor dnt have 2 wchar bytes to hold the data. + pointerToCurrentUtf8Byte=pointerToPendingUtf8Byte; + break; + } + + TUint surrogate = (currentUnicodeCharacter>>10) + 0xD7C0; + *pointerToCurrentUnicodeCharacter=static_cast(surrogate); + ++pointerToCurrentUnicodeCharacter; + + surrogate = (currentUnicodeCharacter&0x3FF)+0xDC00; + *pointerToCurrentUnicodeCharacter=static_cast(surrogate); + ++pointerToCurrentUnicodeCharacter; + ++pointerToCurrentUtf8Byte; + } + else + { + *pointerToCurrentUnicodeCharacter=static_cast(currentUnicodeCharacter); + ++pointerToCurrentUnicodeCharacter; + ++pointerToCurrentUtf8Byte; + } + + if ((pointerToCurrentUtf8Byte>pointerToLastUtf8Byte) || (pointerToCurrentUnicodeCharacter>pointerToLastUnicodeCharacter)) + { + // checking the boundary condition. + // Here either the UTF-8 or Unicode descriptor reached to the end. + break; + } + } // forever + // decoding finished. + aUnicode.SetLength(pointerToCurrentUnicodeCharacter-aUnicode.Ptr()); + if ( utf8 ) + { + CleanupStack::PopAndDestroy(); // utf8 + } + //return pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1; + // returns the number of bytes used to complete a valid multibyte character. + return pointerToCurrentUtf8Byte - aUtf8.Ptr(); +} //end of function + +//----------------------------------------------------------------------------- +//Function Name : TInt _Utf16ToUtf8(char* aDst, wchar_t aSrc, mbstate_t* ps, int aLen ) +//Description : Converts wide char in UCS2 format to UTF8 equivalent +//Return Value : The number of bytes converted, 0 if L'\0\' was translated, -1 on +//generic error and errno set appropriately, -2 if len is not sufficient to store aSrc wide char +//----------------------------------------------------------------------------- +TInt _Utf16ToUtf8(char* dst, wchar_t aSrc, mbstate_t* ps, int aLen) +{ + int retval = 0; + // check the state + if(ps->__count !=_EUTF16InitialState && ps->__count != _EUTF16_21BitExtensionState) + { + errno = EINVAL; + return -1; + } + + //following characters are illegal + //see http://www.unicode.org/faq/utf_bom.html#40 + if(aSrc == 0xFFFE || aSrc == 0xFFFF || (aSrc >= 0xFDD0 && aSrc <= 0xFDEF) ) + { + errno = EILSEQ; + return -1; + } + + + if(ps->__count == _EUTF16InitialState) + { + + //following characters in addition are illegal in initial state + //see http://www.unicode.org/faq/utf_bom.html#40 + if((aSrc >= 0xDC00 && aSrc <= 0xDFFF) ) + { + errno = EILSEQ; + return -1; + } + + + if ((aSrc & 0xff80)==0x0000) + { + if(aLen >= 1) + { + *dst++ = static_cast(aSrc); + retval = 1; + } + else + { + return -2; + } + + } + else if ((aSrc & 0xf800)==0x0000) + { + if (aLen >= 2) + { + *dst++ = static_cast(0xc0|(aSrc>>6)); + *dst++ = static_cast (0x80|(aSrc&0x3f)); + retval = 2; + } + else + { + return -2; + } + } + else if ((aSrc & 0xfc00)==0xd800) + { + ps->__value.lead = aSrc; + ps->__count = _EUTF16_21BitExtensionState; + retval = 0; //nothing written out just yet + } + else + { + if ( aLen >= 3) + { + *dst++ = static_cast(0xe0|(aSrc>>12)); + *dst++ = static_cast(0x80|((aSrc>>6)&0x3f)); + *dst++ = static_cast(0x80|(aSrc&0x3f)); + retval = 3; + } + else + { + return -2; + } + } + + + } + else //ps->__count == _EUCS2_21BitExtensionState) + { + //characters outside this range are illegal in this state + //see http://www.unicode.org/faq/utf_bom.html#40 + if((aSrc < 0xDC00 || aSrc > 0xDFFF) ) + { + errno = EILSEQ; + return -1; + } + + if ((aSrc & 0xfc00)!=0xdc00) + { + errno = EILSEQ; + return -1; + } + if ( aLen >= 4) + { + //snippet taken from unicode faq + //http://www.unicode.org/faq/utf_bom.html#39 + + unsigned long codepoint = (ps->__value.lead << 10) + aSrc + KSURROGATE_OFFSET; + + *dst++ = static_cast( 0xf0|(codepoint>>18)); + *dst++ = static_cast(0x80|((codepoint>>12)&0x3f)); + *dst++ = static_cast(0x80|((codepoint>>6)&0x3f)); + *dst++ = static_cast(0x80|(codepoint&0x3f)); + retval = 4; + } + else + { + return -2; + } + ps->__count = _EUTF16InitialState; + } + return retval; + + +}//end of function +