First public contribution.
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
4 * This component and the accompanying materials are made available
5 * under the terms of "Eclipse Public License v1.0"
6 * which accompanies this distribution, and is available
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 * Initial Contributors:
10 * Nokia Corporation - initial contribution.
21 const int KErrorIllFormedInput=-1;
23 int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8)
24 // must '\0'-terminate the output
26 wchar_t* startOfUnicode=aUnicode;
29 unsigned int currentUtf8Byte=*aUtf8;
30 if (currentUtf8Byte=='\0')
34 if ((currentUtf8Byte&0x80)==0x00)
36 if (startOfUnicode!=NULL)
38 *aUnicode=(wchar_t)currentUtf8Byte;
41 else if ((currentUtf8Byte&0xe0)==0xc0)
43 unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x1f)<<6);
45 currentUtf8Byte=*aUtf8;
46 if ((currentUtf8Byte&0xc0)!=0x80)
48 return KErrorIllFormedInput;
50 currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
51 if (startOfUnicode!=NULL)
53 *aUnicode=(wchar_t)currentUnicodeCharacter;
56 else if ((currentUtf8Byte&0xf0)==0xe0)
58 unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<12);
60 currentUtf8Byte=*aUtf8;
61 if ((currentUtf8Byte&0xc0)!=0x80)
63 return KErrorIllFormedInput;
65 currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<6);
67 currentUtf8Byte=*aUtf8;
68 if ((currentUtf8Byte&0xc0)!=0x80)
70 return KErrorIllFormedInput;
72 currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
73 if (startOfUnicode!=NULL)
75 *aUnicode=(wchar_t)currentUnicodeCharacter;
78 else if ((currentUtf8Byte&0xf8)==0xf0)
80 unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x07)<<8);
82 currentUtf8Byte=*aUtf8;
83 if ((currentUtf8Byte&0xc0)!=0x80)
85 return KErrorIllFormedInput;
87 currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<2);
88 if (currentUnicodeCharacter<0x0040)
90 return KErrorIllFormedInput;
92 currentUnicodeCharacter-=0x0040;
93 if (currentUnicodeCharacter>=0x0400)
95 return KErrorIllFormedInput;
98 currentUtf8Byte=*aUtf8;
99 if ((currentUtf8Byte&0xc0)!=0x80)
101 return KErrorIllFormedInput;
103 currentUnicodeCharacter|=((currentUtf8Byte&0x30)>>4);
104 if (startOfUnicode!=NULL)
106 *aUnicode=(wchar_t)(0xd800|currentUnicodeCharacter);
108 currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<6);
110 currentUtf8Byte=*aUtf8;
111 if ((currentUtf8Byte&0xc0)!=0x80)
113 return KErrorIllFormedInput;
115 currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
117 if (startOfUnicode!=NULL)
119 *aUnicode=(wchar_t)(0xdc00|currentUnicodeCharacter);
124 return KErrorIllFormedInput;
129 if (startOfUnicode!=NULL)
133 return aUnicode-startOfUnicode;
136 int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode)
137 // must '\0'-terminate the output
139 char* startOfUtf8=aUtf8;
142 unsigned int currentUnicodeCharacter=*aUnicode;
143 if (currentUnicodeCharacter=='\0')
147 if ((currentUnicodeCharacter&0xff80)==0x0000)
149 if (startOfUtf8!=NULL)
151 *aUtf8=(char)currentUnicodeCharacter;
154 else if ((currentUnicodeCharacter&0xf800)==0x0000)
156 if (startOfUtf8!=NULL)
158 *aUtf8=(char)(0xc0|(currentUnicodeCharacter>>6));
161 if (startOfUtf8!=NULL)
163 *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
166 else if ((currentUnicodeCharacter&0xfc00)==0xd800)
168 currentUnicodeCharacter+=0x0040;
169 if (startOfUtf8!=NULL)
171 *aUtf8=(char)(0xf0|((currentUnicodeCharacter>>8)&0x07));
174 if (startOfUtf8!=NULL)
176 *aUtf8=(char)(0x80|((currentUnicodeCharacter>>2)&0x3f));
179 unsigned int currentUtf8Byte=(0x80|((currentUnicodeCharacter&0x03)<<4));
181 currentUnicodeCharacter=*aUnicode;
182 if ((currentUnicodeCharacter&0xfc00)!=0xdc00)
184 return KErrorIllFormedInput;
186 currentUtf8Byte|=((currentUnicodeCharacter>>6)&0x0f);
188 if (startOfUtf8!=NULL)
190 *aUtf8=(char)currentUtf8Byte;
194 if (startOfUtf8!=NULL)
196 *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
201 if (startOfUtf8!=NULL)
203 *aUtf8=(char)(0xe0|(currentUnicodeCharacter>>12));
206 if (startOfUtf8!=NULL)
208 *aUtf8=(char)(0x80|((currentUnicodeCharacter>>6)&0x3f));
211 if (startOfUtf8!=NULL)
213 *aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
219 if (startOfUtf8!=NULL)
223 return aUtf8-startOfUtf8;