sl@0: /* sl@0: * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: * All rights reserved. sl@0: * This component and the accompanying materials are made available sl@0: * under the terms of "Eclipse Public License v1.0" sl@0: * which accompanies this distribution, and is available sl@0: * at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: * sl@0: * Initial Contributors: sl@0: * Nokia Corporation - initial contribution. sl@0: * sl@0: * Contributors: sl@0: * sl@0: * Description: sl@0: * sl@0: */ sl@0: sl@0: sl@0: #include sl@0: sl@0: const int KErrorIllFormedInput=-1; sl@0: sl@0: int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8) sl@0: // must '\0'-terminate the output sl@0: { sl@0: wchar_t* startOfUnicode=aUnicode; sl@0: for (;;) sl@0: { sl@0: unsigned int currentUtf8Byte=*aUtf8; sl@0: if (currentUtf8Byte=='\0') sl@0: { sl@0: break; sl@0: } sl@0: if ((currentUtf8Byte&0x80)==0x00) sl@0: { sl@0: if (startOfUnicode!=NULL) sl@0: { sl@0: *aUnicode=(wchar_t)currentUtf8Byte; sl@0: } sl@0: } sl@0: else if ((currentUtf8Byte&0xe0)==0xc0) sl@0: { sl@0: unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x1f)<<6); sl@0: ++aUtf8; sl@0: currentUtf8Byte=*aUtf8; sl@0: if ((currentUtf8Byte&0xc0)!=0x80) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: currentUnicodeCharacter|=(currentUtf8Byte&0x3f); sl@0: if (startOfUnicode!=NULL) sl@0: { sl@0: *aUnicode=(wchar_t)currentUnicodeCharacter; sl@0: } sl@0: } sl@0: else if ((currentUtf8Byte&0xf0)==0xe0) sl@0: { sl@0: unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<12); sl@0: ++aUtf8; sl@0: currentUtf8Byte=*aUtf8; sl@0: if ((currentUtf8Byte&0xc0)!=0x80) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<6); sl@0: ++aUtf8; sl@0: currentUtf8Byte=*aUtf8; sl@0: if ((currentUtf8Byte&0xc0)!=0x80) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: currentUnicodeCharacter|=(currentUtf8Byte&0x3f); sl@0: if (startOfUnicode!=NULL) sl@0: { sl@0: *aUnicode=(wchar_t)currentUnicodeCharacter; sl@0: } sl@0: } sl@0: else if ((currentUtf8Byte&0xf8)==0xf0) sl@0: { sl@0: unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x07)<<8); sl@0: ++aUtf8; sl@0: currentUtf8Byte=*aUtf8; sl@0: if ((currentUtf8Byte&0xc0)!=0x80) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<2); sl@0: if (currentUnicodeCharacter<0x0040) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: currentUnicodeCharacter-=0x0040; sl@0: if (currentUnicodeCharacter>=0x0400) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: ++aUtf8; sl@0: currentUtf8Byte=*aUtf8; sl@0: if ((currentUtf8Byte&0xc0)!=0x80) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: currentUnicodeCharacter|=((currentUtf8Byte&0x30)>>4); sl@0: if (startOfUnicode!=NULL) sl@0: { sl@0: *aUnicode=(wchar_t)(0xd800|currentUnicodeCharacter); sl@0: } sl@0: currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<6); sl@0: ++aUtf8; sl@0: currentUtf8Byte=*aUtf8; sl@0: if ((currentUtf8Byte&0xc0)!=0x80) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: currentUnicodeCharacter|=(currentUtf8Byte&0x3f); sl@0: ++aUnicode; sl@0: if (startOfUnicode!=NULL) sl@0: { sl@0: *aUnicode=(wchar_t)(0xdc00|currentUnicodeCharacter); sl@0: } sl@0: } sl@0: else sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: ++aUnicode; sl@0: ++aUtf8; sl@0: } sl@0: if (startOfUnicode!=NULL) sl@0: { sl@0: *aUnicode='\0'; sl@0: } sl@0: return aUnicode-startOfUnicode; sl@0: } sl@0: #include sl@0: int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode) sl@0: // must '\0'-terminate the output sl@0: { sl@0: char* startOfUtf8=aUtf8; sl@0: for (;;) sl@0: { sl@0: unsigned int currentUnicodeCharacter=*aUnicode; sl@0: if (currentUnicodeCharacter=='\0') sl@0: { sl@0: break; sl@0: } sl@0: if ((currentUnicodeCharacter&0xff80)==0x0000) sl@0: { sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)currentUnicodeCharacter; sl@0: } sl@0: } sl@0: else if ((currentUnicodeCharacter&0xf800)==0x0000) sl@0: { sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)(0xc0|(currentUnicodeCharacter>>6)); sl@0: } sl@0: ++aUtf8; sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); sl@0: } sl@0: } sl@0: else if ((currentUnicodeCharacter&0xfc00)==0xd800) sl@0: { sl@0: currentUnicodeCharacter+=0x0040; sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)(0xf0|((currentUnicodeCharacter>>8)&0x07)); sl@0: } sl@0: ++aUtf8; sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)(0x80|((currentUnicodeCharacter>>2)&0x3f)); sl@0: } sl@0: { sl@0: unsigned int currentUtf8Byte=(0x80|((currentUnicodeCharacter&0x03)<<4)); sl@0: ++aUnicode; sl@0: currentUnicodeCharacter=*aUnicode; sl@0: if ((currentUnicodeCharacter&0xfc00)!=0xdc00) sl@0: { sl@0: return KErrorIllFormedInput; sl@0: } sl@0: currentUtf8Byte|=((currentUnicodeCharacter>>6)&0x0f); sl@0: ++aUtf8; sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)currentUtf8Byte; sl@0: } sl@0: } sl@0: ++aUtf8; sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); sl@0: } sl@0: } sl@0: else sl@0: { sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)(0xe0|(currentUnicodeCharacter>>12)); sl@0: } sl@0: ++aUtf8; sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)(0x80|((currentUnicodeCharacter>>6)&0x3f)); sl@0: } sl@0: ++aUtf8; sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); sl@0: } sl@0: } sl@0: ++aUtf8; sl@0: ++aUnicode; sl@0: } sl@0: if (startOfUtf8!=NULL) sl@0: { sl@0: *aUtf8='\0'; sl@0: } sl@0: return aUtf8-startOfUtf8; sl@0: } sl@0: sl@0: