author | sl |
Tue, 10 Jun 2014 14:32:02 +0200 | |
changeset 1 | 260cb5ec6c19 |
permissions | -rw-r--r-- |
sl@0 | 1 |
/* |
sl@0 | 2 |
* Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
sl@0 | 3 |
* All rights reserved. |
sl@0 | 4 |
* This component and the accompanying materials are made available |
sl@0 | 5 |
* under the terms of "Eclipse Public License v1.0" |
sl@0 | 6 |
* which accompanies this distribution, and is available |
sl@0 | 7 |
* at the URL "http://www.eclipse.org/legal/epl-v10.html". |
sl@0 | 8 |
* |
sl@0 | 9 |
* Initial Contributors: |
sl@0 | 10 |
* Nokia Corporation - initial contribution. |
sl@0 | 11 |
* |
sl@0 | 12 |
* Contributors: |
sl@0 | 13 |
* |
sl@0 | 14 |
* Description: |
sl@0 | 15 |
* |
sl@0 | 16 |
*/ |
sl@0 | 17 |
|
sl@0 | 18 |
|
sl@0 | 19 |
#include <stdlib.h> |
sl@0 | 20 |
|
sl@0 | 21 |
const int KErrorIllFormedInput=-1; |
sl@0 | 22 |
|
sl@0 | 23 |
int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8) |
sl@0 | 24 |
// must '\0'-terminate the output |
sl@0 | 25 |
{ |
sl@0 | 26 |
wchar_t* startOfUnicode=aUnicode; |
sl@0 | 27 |
for (;;) |
sl@0 | 28 |
{ |
sl@0 | 29 |
unsigned int currentUtf8Byte=*aUtf8; |
sl@0 | 30 |
if (currentUtf8Byte=='\0') |
sl@0 | 31 |
{ |
sl@0 | 32 |
break; |
sl@0 | 33 |
} |
sl@0 | 34 |
if ((currentUtf8Byte&0x80)==0x00) |
sl@0 | 35 |
{ |
sl@0 | 36 |
if (startOfUnicode!=NULL) |
sl@0 | 37 |
{ |
sl@0 | 38 |
*aUnicode=(wchar_t)currentUtf8Byte; |
sl@0 | 39 |
} |
sl@0 | 40 |
} |
sl@0 | 41 |
else if ((currentUtf8Byte&0xe0)==0xc0) |
sl@0 | 42 |
{ |
sl@0 | 43 |
unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x1f)<<6); |
sl@0 | 44 |
++aUtf8; |
sl@0 | 45 |
currentUtf8Byte=*aUtf8; |
sl@0 | 46 |
if ((currentUtf8Byte&0xc0)!=0x80) |
sl@0 | 47 |
{ |
sl@0 | 48 |
return KErrorIllFormedInput; |
sl@0 | 49 |
} |
sl@0 | 50 |
currentUnicodeCharacter|=(currentUtf8Byte&0x3f); |
sl@0 | 51 |
if (startOfUnicode!=NULL) |
sl@0 | 52 |
{ |
sl@0 | 53 |
*aUnicode=(wchar_t)currentUnicodeCharacter; |
sl@0 | 54 |
} |
sl@0 | 55 |
} |
sl@0 | 56 |
else if ((currentUtf8Byte&0xf0)==0xe0) |
sl@0 | 57 |
{ |
sl@0 | 58 |
unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<12); |
sl@0 | 59 |
++aUtf8; |
sl@0 | 60 |
currentUtf8Byte=*aUtf8; |
sl@0 | 61 |
if ((currentUtf8Byte&0xc0)!=0x80) |
sl@0 | 62 |
{ |
sl@0 | 63 |
return KErrorIllFormedInput; |
sl@0 | 64 |
} |
sl@0 | 65 |
currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<6); |
sl@0 | 66 |
++aUtf8; |
sl@0 | 67 |
currentUtf8Byte=*aUtf8; |
sl@0 | 68 |
if ((currentUtf8Byte&0xc0)!=0x80) |
sl@0 | 69 |
{ |
sl@0 | 70 |
return KErrorIllFormedInput; |
sl@0 | 71 |
} |
sl@0 | 72 |
currentUnicodeCharacter|=(currentUtf8Byte&0x3f); |
sl@0 | 73 |
if (startOfUnicode!=NULL) |
sl@0 | 74 |
{ |
sl@0 | 75 |
*aUnicode=(wchar_t)currentUnicodeCharacter; |
sl@0 | 76 |
} |
sl@0 | 77 |
} |
sl@0 | 78 |
else if ((currentUtf8Byte&0xf8)==0xf0) |
sl@0 | 79 |
{ |
sl@0 | 80 |
unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x07)<<8); |
sl@0 | 81 |
++aUtf8; |
sl@0 | 82 |
currentUtf8Byte=*aUtf8; |
sl@0 | 83 |
if ((currentUtf8Byte&0xc0)!=0x80) |
sl@0 | 84 |
{ |
sl@0 | 85 |
return KErrorIllFormedInput; |
sl@0 | 86 |
} |
sl@0 | 87 |
currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<2); |
sl@0 | 88 |
if (currentUnicodeCharacter<0x0040) |
sl@0 | 89 |
{ |
sl@0 | 90 |
return KErrorIllFormedInput; |
sl@0 | 91 |
} |
sl@0 | 92 |
currentUnicodeCharacter-=0x0040; |
sl@0 | 93 |
if (currentUnicodeCharacter>=0x0400) |
sl@0 | 94 |
{ |
sl@0 | 95 |
return KErrorIllFormedInput; |
sl@0 | 96 |
} |
sl@0 | 97 |
++aUtf8; |
sl@0 | 98 |
currentUtf8Byte=*aUtf8; |
sl@0 | 99 |
if ((currentUtf8Byte&0xc0)!=0x80) |
sl@0 | 100 |
{ |
sl@0 | 101 |
return KErrorIllFormedInput; |
sl@0 | 102 |
} |
sl@0 | 103 |
currentUnicodeCharacter|=((currentUtf8Byte&0x30)>>4); |
sl@0 | 104 |
if (startOfUnicode!=NULL) |
sl@0 | 105 |
{ |
sl@0 | 106 |
*aUnicode=(wchar_t)(0xd800|currentUnicodeCharacter); |
sl@0 | 107 |
} |
sl@0 | 108 |
currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<6); |
sl@0 | 109 |
++aUtf8; |
sl@0 | 110 |
currentUtf8Byte=*aUtf8; |
sl@0 | 111 |
if ((currentUtf8Byte&0xc0)!=0x80) |
sl@0 | 112 |
{ |
sl@0 | 113 |
return KErrorIllFormedInput; |
sl@0 | 114 |
} |
sl@0 | 115 |
currentUnicodeCharacter|=(currentUtf8Byte&0x3f); |
sl@0 | 116 |
++aUnicode; |
sl@0 | 117 |
if (startOfUnicode!=NULL) |
sl@0 | 118 |
{ |
sl@0 | 119 |
*aUnicode=(wchar_t)(0xdc00|currentUnicodeCharacter); |
sl@0 | 120 |
} |
sl@0 | 121 |
} |
sl@0 | 122 |
else |
sl@0 | 123 |
{ |
sl@0 | 124 |
return KErrorIllFormedInput; |
sl@0 | 125 |
} |
sl@0 | 126 |
++aUnicode; |
sl@0 | 127 |
++aUtf8; |
sl@0 | 128 |
} |
sl@0 | 129 |
if (startOfUnicode!=NULL) |
sl@0 | 130 |
{ |
sl@0 | 131 |
*aUnicode='\0'; |
sl@0 | 132 |
} |
sl@0 | 133 |
return aUnicode-startOfUnicode; |
sl@0 | 134 |
} |
sl@0 | 135 |
#include <stdio.h> |
sl@0 | 136 |
int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode) |
sl@0 | 137 |
// must '\0'-terminate the output |
sl@0 | 138 |
{ |
sl@0 | 139 |
char* startOfUtf8=aUtf8; |
sl@0 | 140 |
for (;;) |
sl@0 | 141 |
{ |
sl@0 | 142 |
unsigned int currentUnicodeCharacter=*aUnicode; |
sl@0 | 143 |
if (currentUnicodeCharacter=='\0') |
sl@0 | 144 |
{ |
sl@0 | 145 |
break; |
sl@0 | 146 |
} |
sl@0 | 147 |
if ((currentUnicodeCharacter&0xff80)==0x0000) |
sl@0 | 148 |
{ |
sl@0 | 149 |
if (startOfUtf8!=NULL) |
sl@0 | 150 |
{ |
sl@0 | 151 |
*aUtf8=(char)currentUnicodeCharacter; |
sl@0 | 152 |
} |
sl@0 | 153 |
} |
sl@0 | 154 |
else if ((currentUnicodeCharacter&0xf800)==0x0000) |
sl@0 | 155 |
{ |
sl@0 | 156 |
if (startOfUtf8!=NULL) |
sl@0 | 157 |
{ |
sl@0 | 158 |
*aUtf8=(char)(0xc0|(currentUnicodeCharacter>>6)); |
sl@0 | 159 |
} |
sl@0 | 160 |
++aUtf8; |
sl@0 | 161 |
if (startOfUtf8!=NULL) |
sl@0 | 162 |
{ |
sl@0 | 163 |
*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); |
sl@0 | 164 |
} |
sl@0 | 165 |
} |
sl@0 | 166 |
else if ((currentUnicodeCharacter&0xfc00)==0xd800) |
sl@0 | 167 |
{ |
sl@0 | 168 |
currentUnicodeCharacter+=0x0040; |
sl@0 | 169 |
if (startOfUtf8!=NULL) |
sl@0 | 170 |
{ |
sl@0 | 171 |
*aUtf8=(char)(0xf0|((currentUnicodeCharacter>>8)&0x07)); |
sl@0 | 172 |
} |
sl@0 | 173 |
++aUtf8; |
sl@0 | 174 |
if (startOfUtf8!=NULL) |
sl@0 | 175 |
{ |
sl@0 | 176 |
*aUtf8=(char)(0x80|((currentUnicodeCharacter>>2)&0x3f)); |
sl@0 | 177 |
} |
sl@0 | 178 |
{ |
sl@0 | 179 |
unsigned int currentUtf8Byte=(0x80|((currentUnicodeCharacter&0x03)<<4)); |
sl@0 | 180 |
++aUnicode; |
sl@0 | 181 |
currentUnicodeCharacter=*aUnicode; |
sl@0 | 182 |
if ((currentUnicodeCharacter&0xfc00)!=0xdc00) |
sl@0 | 183 |
{ |
sl@0 | 184 |
return KErrorIllFormedInput; |
sl@0 | 185 |
} |
sl@0 | 186 |
currentUtf8Byte|=((currentUnicodeCharacter>>6)&0x0f); |
sl@0 | 187 |
++aUtf8; |
sl@0 | 188 |
if (startOfUtf8!=NULL) |
sl@0 | 189 |
{ |
sl@0 | 190 |
*aUtf8=(char)currentUtf8Byte; |
sl@0 | 191 |
} |
sl@0 | 192 |
} |
sl@0 | 193 |
++aUtf8; |
sl@0 | 194 |
if (startOfUtf8!=NULL) |
sl@0 | 195 |
{ |
sl@0 | 196 |
*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); |
sl@0 | 197 |
} |
sl@0 | 198 |
} |
sl@0 | 199 |
else |
sl@0 | 200 |
{ |
sl@0 | 201 |
if (startOfUtf8!=NULL) |
sl@0 | 202 |
{ |
sl@0 | 203 |
*aUtf8=(char)(0xe0|(currentUnicodeCharacter>>12)); |
sl@0 | 204 |
} |
sl@0 | 205 |
++aUtf8; |
sl@0 | 206 |
if (startOfUtf8!=NULL) |
sl@0 | 207 |
{ |
sl@0 | 208 |
*aUtf8=(char)(0x80|((currentUnicodeCharacter>>6)&0x3f)); |
sl@0 | 209 |
} |
sl@0 | 210 |
++aUtf8; |
sl@0 | 211 |
if (startOfUtf8!=NULL) |
sl@0 | 212 |
{ |
sl@0 | 213 |
*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f)); |
sl@0 | 214 |
} |
sl@0 | 215 |
} |
sl@0 | 216 |
++aUtf8; |
sl@0 | 217 |
++aUnicode; |
sl@0 | 218 |
} |
sl@0 | 219 |
if (startOfUtf8!=NULL) |
sl@0 | 220 |
{ |
sl@0 | 221 |
*aUtf8='\0'; |
sl@0 | 222 |
} |
sl@0 | 223 |
return aUtf8-startOfUtf8; |
sl@0 | 224 |
} |
sl@0 | 225 |
|
sl@0 | 226 |