1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconv_fw/tools/convtool/utf.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,226 @@
1.4 +/*
1.5 +* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +* All rights reserved.
1.7 +* This component and the accompanying materials are made available
1.8 +* under the terms of "Eclipse Public License v1.0"
1.9 +* which accompanies this distribution, and is available
1.10 +* at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +*
1.12 +* Initial Contributors:
1.13 +* Nokia Corporation - initial contribution.
1.14 +*
1.15 +* Contributors:
1.16 +*
1.17 +* Description:
1.18 +*
1.19 +*/
1.20 +
1.21 +
1.22 +#include <stdlib.h>
1.23 +
1.24 +const int KErrorIllFormedInput=-1;
1.25 +
1.26 +int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8)
1.27 +// must '\0'-terminate the output
1.28 + {
1.29 + wchar_t* startOfUnicode=aUnicode;
1.30 + for (;;)
1.31 + {
1.32 + unsigned int currentUtf8Byte=*aUtf8;
1.33 + if (currentUtf8Byte=='\0')
1.34 + {
1.35 + break;
1.36 + }
1.37 + if ((currentUtf8Byte&0x80)==0x00)
1.38 + {
1.39 + if (startOfUnicode!=NULL)
1.40 + {
1.41 + *aUnicode=(wchar_t)currentUtf8Byte;
1.42 + }
1.43 + }
1.44 + else if ((currentUtf8Byte&0xe0)==0xc0)
1.45 + {
1.46 + unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x1f)<<6);
1.47 + ++aUtf8;
1.48 + currentUtf8Byte=*aUtf8;
1.49 + if ((currentUtf8Byte&0xc0)!=0x80)
1.50 + {
1.51 + return KErrorIllFormedInput;
1.52 + }
1.53 + currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
1.54 + if (startOfUnicode!=NULL)
1.55 + {
1.56 + *aUnicode=(wchar_t)currentUnicodeCharacter;
1.57 + }
1.58 + }
1.59 + else if ((currentUtf8Byte&0xf0)==0xe0)
1.60 + {
1.61 + unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<12);
1.62 + ++aUtf8;
1.63 + currentUtf8Byte=*aUtf8;
1.64 + if ((currentUtf8Byte&0xc0)!=0x80)
1.65 + {
1.66 + return KErrorIllFormedInput;
1.67 + }
1.68 + currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<6);
1.69 + ++aUtf8;
1.70 + currentUtf8Byte=*aUtf8;
1.71 + if ((currentUtf8Byte&0xc0)!=0x80)
1.72 + {
1.73 + return KErrorIllFormedInput;
1.74 + }
1.75 + currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
1.76 + if (startOfUnicode!=NULL)
1.77 + {
1.78 + *aUnicode=(wchar_t)currentUnicodeCharacter;
1.79 + }
1.80 + }
1.81 + else if ((currentUtf8Byte&0xf8)==0xf0)
1.82 + {
1.83 + unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x07)<<8);
1.84 + ++aUtf8;
1.85 + currentUtf8Byte=*aUtf8;
1.86 + if ((currentUtf8Byte&0xc0)!=0x80)
1.87 + {
1.88 + return KErrorIllFormedInput;
1.89 + }
1.90 + currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<2);
1.91 + if (currentUnicodeCharacter<0x0040)
1.92 + {
1.93 + return KErrorIllFormedInput;
1.94 + }
1.95 + currentUnicodeCharacter-=0x0040;
1.96 + if (currentUnicodeCharacter>=0x0400)
1.97 + {
1.98 + return KErrorIllFormedInput;
1.99 + }
1.100 + ++aUtf8;
1.101 + currentUtf8Byte=*aUtf8;
1.102 + if ((currentUtf8Byte&0xc0)!=0x80)
1.103 + {
1.104 + return KErrorIllFormedInput;
1.105 + }
1.106 + currentUnicodeCharacter|=((currentUtf8Byte&0x30)>>4);
1.107 + if (startOfUnicode!=NULL)
1.108 + {
1.109 + *aUnicode=(wchar_t)(0xd800|currentUnicodeCharacter);
1.110 + }
1.111 + currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<6);
1.112 + ++aUtf8;
1.113 + currentUtf8Byte=*aUtf8;
1.114 + if ((currentUtf8Byte&0xc0)!=0x80)
1.115 + {
1.116 + return KErrorIllFormedInput;
1.117 + }
1.118 + currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
1.119 + ++aUnicode;
1.120 + if (startOfUnicode!=NULL)
1.121 + {
1.122 + *aUnicode=(wchar_t)(0xdc00|currentUnicodeCharacter);
1.123 + }
1.124 + }
1.125 + else
1.126 + {
1.127 + return KErrorIllFormedInput;
1.128 + }
1.129 + ++aUnicode;
1.130 + ++aUtf8;
1.131 + }
1.132 + if (startOfUnicode!=NULL)
1.133 + {
1.134 + *aUnicode='\0';
1.135 + }
1.136 + return aUnicode-startOfUnicode;
1.137 + }
1.138 +#include <stdio.h>
1.139 +int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode)
1.140 +// must '\0'-terminate the output
1.141 + {
1.142 + char* startOfUtf8=aUtf8;
1.143 + for (;;)
1.144 + {
1.145 + unsigned int currentUnicodeCharacter=*aUnicode;
1.146 + if (currentUnicodeCharacter=='\0')
1.147 + {
1.148 + break;
1.149 + }
1.150 + if ((currentUnicodeCharacter&0xff80)==0x0000)
1.151 + {
1.152 + if (startOfUtf8!=NULL)
1.153 + {
1.154 + *aUtf8=(char)currentUnicodeCharacter;
1.155 + }
1.156 + }
1.157 + else if ((currentUnicodeCharacter&0xf800)==0x0000)
1.158 + {
1.159 + if (startOfUtf8!=NULL)
1.160 + {
1.161 + *aUtf8=(char)(0xc0|(currentUnicodeCharacter>>6));
1.162 + }
1.163 + ++aUtf8;
1.164 + if (startOfUtf8!=NULL)
1.165 + {
1.166 + *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
1.167 + }
1.168 + }
1.169 + else if ((currentUnicodeCharacter&0xfc00)==0xd800)
1.170 + {
1.171 + currentUnicodeCharacter+=0x0040;
1.172 + if (startOfUtf8!=NULL)
1.173 + {
1.174 + *aUtf8=(char)(0xf0|((currentUnicodeCharacter>>8)&0x07));
1.175 + }
1.176 + ++aUtf8;
1.177 + if (startOfUtf8!=NULL)
1.178 + {
1.179 + *aUtf8=(char)(0x80|((currentUnicodeCharacter>>2)&0x3f));
1.180 + }
1.181 + {
1.182 + unsigned int currentUtf8Byte=(0x80|((currentUnicodeCharacter&0x03)<<4));
1.183 + ++aUnicode;
1.184 + currentUnicodeCharacter=*aUnicode;
1.185 + if ((currentUnicodeCharacter&0xfc00)!=0xdc00)
1.186 + {
1.187 + return KErrorIllFormedInput;
1.188 + }
1.189 + currentUtf8Byte|=((currentUnicodeCharacter>>6)&0x0f);
1.190 + ++aUtf8;
1.191 + if (startOfUtf8!=NULL)
1.192 + {
1.193 + *aUtf8=(char)currentUtf8Byte;
1.194 + }
1.195 + }
1.196 + ++aUtf8;
1.197 + if (startOfUtf8!=NULL)
1.198 + {
1.199 + *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
1.200 + }
1.201 + }
1.202 + else
1.203 + {
1.204 + if (startOfUtf8!=NULL)
1.205 + {
1.206 + *aUtf8=(char)(0xe0|(currentUnicodeCharacter>>12));
1.207 + }
1.208 + ++aUtf8;
1.209 + if (startOfUtf8!=NULL)
1.210 + {
1.211 + *aUtf8=(char)(0x80|((currentUnicodeCharacter>>6)&0x3f));
1.212 + }
1.213 + ++aUtf8;
1.214 + if (startOfUtf8!=NULL)
1.215 + {
1.216 + *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
1.217 + }
1.218 + }
1.219 + ++aUtf8;
1.220 + ++aUnicode;
1.221 + }
1.222 + if (startOfUtf8!=NULL)
1.223 + {
1.224 + *aUtf8='\0';
1.225 + }
1.226 + return aUtf8-startOfUtf8;
1.227 + }
1.228 +
1.229 +