os/textandloc/charconvfw/charconv_fw/tools/convtool/utf.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconv_fw/tools/convtool/utf.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,226 @@
     1.4 +/*
     1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +* All rights reserved.
     1.7 +* This component and the accompanying materials are made available
     1.8 +* under the terms of "Eclipse Public License v1.0"
     1.9 +* which accompanies this distribution, and is available
    1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +*
    1.12 +* Initial Contributors:
    1.13 +* Nokia Corporation - initial contribution.
    1.14 +*
    1.15 +* Contributors:
    1.16 +*
    1.17 +* Description: 
    1.18 +*
    1.19 +*/
    1.20 +
    1.21 +
    1.22 +#include <stdlib.h>
    1.23 +
    1.24 +const int KErrorIllFormedInput=-1;
    1.25 +
    1.26 +int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8)
    1.27 +// must '\0'-terminate the output
    1.28 +	{
    1.29 +	wchar_t* startOfUnicode=aUnicode;
    1.30 +	for (;;)
    1.31 +		{
    1.32 +		unsigned int currentUtf8Byte=*aUtf8;
    1.33 +		if (currentUtf8Byte=='\0')
    1.34 +			{
    1.35 +			break;
    1.36 +			}
    1.37 +		if ((currentUtf8Byte&0x80)==0x00)
    1.38 +			{
    1.39 +			if (startOfUnicode!=NULL)
    1.40 +				{
    1.41 +				*aUnicode=(wchar_t)currentUtf8Byte;
    1.42 +				}
    1.43 +			}
    1.44 +		else if ((currentUtf8Byte&0xe0)==0xc0)
    1.45 +			{
    1.46 +			unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x1f)<<6);
    1.47 +			++aUtf8;
    1.48 +			currentUtf8Byte=*aUtf8;
    1.49 +			if ((currentUtf8Byte&0xc0)!=0x80)
    1.50 +				{
    1.51 +				return KErrorIllFormedInput;
    1.52 +				}
    1.53 +			currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
    1.54 +			if (startOfUnicode!=NULL)
    1.55 +				{
    1.56 +				*aUnicode=(wchar_t)currentUnicodeCharacter;
    1.57 +				}
    1.58 +			}
    1.59 +		else if ((currentUtf8Byte&0xf0)==0xe0)
    1.60 +			{
    1.61 +			unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<12);
    1.62 +			++aUtf8;
    1.63 +			currentUtf8Byte=*aUtf8;
    1.64 +			if ((currentUtf8Byte&0xc0)!=0x80)
    1.65 +				{
    1.66 +				return KErrorIllFormedInput;
    1.67 +				}
    1.68 +			currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<6);
    1.69 +			++aUtf8;
    1.70 +			currentUtf8Byte=*aUtf8;
    1.71 +			if ((currentUtf8Byte&0xc0)!=0x80)
    1.72 +				{
    1.73 +				return KErrorIllFormedInput;
    1.74 +				}
    1.75 +			currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
    1.76 +			if (startOfUnicode!=NULL)
    1.77 +				{
    1.78 +				*aUnicode=(wchar_t)currentUnicodeCharacter;
    1.79 +				}
    1.80 +			}
    1.81 +		else if ((currentUtf8Byte&0xf8)==0xf0)
    1.82 +			{
    1.83 +			unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x07)<<8);
    1.84 +			++aUtf8;
    1.85 +			currentUtf8Byte=*aUtf8;
    1.86 +			if ((currentUtf8Byte&0xc0)!=0x80)
    1.87 +				{
    1.88 +				return KErrorIllFormedInput;
    1.89 +				}
    1.90 +			currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<2);
    1.91 +			if (currentUnicodeCharacter<0x0040)
    1.92 +				{
    1.93 +				return KErrorIllFormedInput;
    1.94 +				}
    1.95 +			currentUnicodeCharacter-=0x0040;
    1.96 +			if (currentUnicodeCharacter>=0x0400)
    1.97 +				{
    1.98 +				return KErrorIllFormedInput;
    1.99 +				}
   1.100 +			++aUtf8;
   1.101 +			currentUtf8Byte=*aUtf8;
   1.102 +			if ((currentUtf8Byte&0xc0)!=0x80)
   1.103 +				{
   1.104 +				return KErrorIllFormedInput;
   1.105 +				}
   1.106 +			currentUnicodeCharacter|=((currentUtf8Byte&0x30)>>4);
   1.107 +			if (startOfUnicode!=NULL)
   1.108 +				{
   1.109 +				*aUnicode=(wchar_t)(0xd800|currentUnicodeCharacter);
   1.110 +				}
   1.111 +			currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<6);
   1.112 +			++aUtf8;
   1.113 +			currentUtf8Byte=*aUtf8;
   1.114 +			if ((currentUtf8Byte&0xc0)!=0x80)
   1.115 +				{
   1.116 +				return KErrorIllFormedInput;
   1.117 +				}
   1.118 +			currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
   1.119 +			++aUnicode;
   1.120 +			if (startOfUnicode!=NULL)
   1.121 +				{
   1.122 +				*aUnicode=(wchar_t)(0xdc00|currentUnicodeCharacter);
   1.123 +				}
   1.124 +			}
   1.125 +		else
   1.126 +			{
   1.127 +			return KErrorIllFormedInput;
   1.128 +			}
   1.129 +		++aUnicode;
   1.130 +		++aUtf8;
   1.131 +		}
   1.132 +	if (startOfUnicode!=NULL)
   1.133 +		{
   1.134 +		*aUnicode='\0';
   1.135 +		}
   1.136 +	return aUnicode-startOfUnicode;
   1.137 +	}
   1.138 +#include <stdio.h>
   1.139 +int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode)
   1.140 +// must '\0'-terminate the output
   1.141 +	{
   1.142 +	char* startOfUtf8=aUtf8;
   1.143 +	for (;;)
   1.144 +		{
   1.145 +		unsigned int currentUnicodeCharacter=*aUnicode;
   1.146 +		if (currentUnicodeCharacter=='\0')
   1.147 +			{
   1.148 +			break;
   1.149 +			}
   1.150 +		if ((currentUnicodeCharacter&0xff80)==0x0000)
   1.151 +			{
   1.152 +			if (startOfUtf8!=NULL)
   1.153 +				{
   1.154 +				*aUtf8=(char)currentUnicodeCharacter;
   1.155 +				}
   1.156 +			}
   1.157 +		else if ((currentUnicodeCharacter&0xf800)==0x0000)
   1.158 +			{
   1.159 +			if (startOfUtf8!=NULL)
   1.160 +				{
   1.161 +				*aUtf8=(char)(0xc0|(currentUnicodeCharacter>>6));
   1.162 +				}
   1.163 +			++aUtf8;
   1.164 +			if (startOfUtf8!=NULL)
   1.165 +				{
   1.166 +				*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
   1.167 +				}
   1.168 +			}
   1.169 +		else if ((currentUnicodeCharacter&0xfc00)==0xd800)
   1.170 +			{
   1.171 +			currentUnicodeCharacter+=0x0040;
   1.172 +			if (startOfUtf8!=NULL)
   1.173 +				{
   1.174 +				*aUtf8=(char)(0xf0|((currentUnicodeCharacter>>8)&0x07));
   1.175 +				}
   1.176 +			++aUtf8;
   1.177 +			if (startOfUtf8!=NULL)
   1.178 +				{
   1.179 +				*aUtf8=(char)(0x80|((currentUnicodeCharacter>>2)&0x3f));
   1.180 +				}
   1.181 +			{
   1.182 +			unsigned int currentUtf8Byte=(0x80|((currentUnicodeCharacter&0x03)<<4));
   1.183 +			++aUnicode;
   1.184 +			currentUnicodeCharacter=*aUnicode;
   1.185 +			if ((currentUnicodeCharacter&0xfc00)!=0xdc00)
   1.186 +				{
   1.187 +				return KErrorIllFormedInput;
   1.188 +				}
   1.189 +			currentUtf8Byte|=((currentUnicodeCharacter>>6)&0x0f);
   1.190 +			++aUtf8;
   1.191 +			if (startOfUtf8!=NULL)
   1.192 +				{
   1.193 +				*aUtf8=(char)currentUtf8Byte;
   1.194 +				}
   1.195 +			}
   1.196 +			++aUtf8;
   1.197 +			if (startOfUtf8!=NULL)
   1.198 +				{
   1.199 +				*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
   1.200 +				}
   1.201 +			}
   1.202 +		else
   1.203 +			{
   1.204 +			if (startOfUtf8!=NULL)
   1.205 +				{
   1.206 +				*aUtf8=(char)(0xe0|(currentUnicodeCharacter>>12));
   1.207 +				}
   1.208 +			++aUtf8;
   1.209 +			if (startOfUtf8!=NULL)
   1.210 +				{
   1.211 +				*aUtf8=(char)(0x80|((currentUnicodeCharacter>>6)&0x3f));
   1.212 +				}
   1.213 +			++aUtf8;
   1.214 +			if (startOfUtf8!=NULL)
   1.215 +				{
   1.216 +				*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
   1.217 +				}
   1.218 +			}
   1.219 +		++aUtf8;
   1.220 +		++aUnicode;
   1.221 +		}
   1.222 +	if (startOfUtf8!=NULL)
   1.223 +		{
   1.224 +		*aUtf8='\0';
   1.225 +		}
   1.226 +	return aUtf8-startOfUtf8;
   1.227 +	}
   1.228 +
   1.229 +