Update contrib.
2 # Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
4 # This component and the accompanying materials are made available
5 # under the terms of "Eclipse Public License v1.0"
6 # which accompanies this distribution, and is available
7 # at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 # Initial Contributors:
10 # Nokia Corporation - initial contribution.
22 @UTF::ISA=qw(Exporter);
23 @UTF::EXPORT=qw(Utf8ToUnicode UnicodeToUtf8);
25 my $KErrorIllFormedInput=-1;
31 my $UnicodeTemplate = shift;
34 my $numOfBytes = length($Utf8);
35 my @Utf8Unpacked = unpack "C*",$Utf8;
36 my @UnicodeUnpacked = ();
40 if ($Utf8Index > $#Utf8Unpacked)
45 my $currentUtf8Byte = $Utf8Unpacked[$Utf8Index];
47 if (($currentUtf8Byte&0x80)==0x00)
49 $UnicodeUnpacked[$UnicodeIndex] = $currentUtf8Byte;
52 elsif (($currentUtf8Byte&0xe0)==0xc0)
54 my $currentUnicodeCharacter=(($currentUtf8Byte&0x1f)<<6);
56 $currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
57 if (($currentUtf8Byte&0xc0)!=0x80)
59 return $KErrorIllFormedInput;
61 $currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
62 $UnicodeUnpacked[$UnicodeIndex] = $currentUnicodeCharacter;
65 elsif (($currentUtf8Byte&0xf0)==0xe0)
67 my $currentUnicodeCharacter=(($currentUtf8Byte&0x0f)<<12);
69 $currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
70 if (($currentUtf8Byte&0xc0)!=0x80)
72 return $KErrorIllFormedInput;
74 $currentUnicodeCharacter|=(($currentUtf8Byte&0x3f)<<6);
76 $currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
77 if (($currentUtf8Byte&0xc0)!=0x80)
79 return $KErrorIllFormedInput;
81 $currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
82 $UnicodeUnpacked[$UnicodeIndex] = $currentUnicodeCharacter;
85 elsif (($currentUtf8Byte&0xf8)==0xf0)
87 my $currentUnicodeCharacter=(($currentUtf8Byte&0x07)<<8);
89 $currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
90 if (($currentUtf8Byte&0xc0)!=0x80)
92 return $KErrorIllFormedInput;
94 $currentUnicodeCharacter|=(($currentUtf8Byte&0x3f)<<2);
95 if ($currentUnicodeCharacter<0x0040)
97 return $KErrorIllFormedInput;
99 $currentUnicodeCharacter-=0x0040;
100 if ($currentUnicodeCharacter>=0x0400)
102 return $KErrorIllFormedInput;
105 $currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
106 if (($currentUtf8Byte&0xc0)!=0x80)
108 return $KErrorIllFormedInput;
110 $currentUnicodeCharacter|=(($currentUtf8Byte&0x30)>>4);
111 $UnicodeUnpacked[$UnicodeIndex] = (0xd800|$currentUnicodeCharacter);
112 $currentUnicodeCharacter=(($currentUtf8Byte&0x0f)<<6);
114 $currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
115 if (($currentUtf8Byte&0xc0)!=0x80)
117 return $KErrorIllFormedInput;
119 $currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
121 $UnicodeUnpacked[$UnicodeIndex] = (0xdc00|$currentUnicodeCharacter);
125 return $KErrorIllFormedInput;
131 $$Unicode = pack "$UnicodeTemplate*", @UnicodeUnpacked;
132 return $UnicodeIndex;
139 my $UnicodeTemplate = shift;
141 my $UnicodeIndex = 0;
142 my $numOfBytes = length($Unicode);
143 my @UnicodeUnpacked = unpack "$UnicodeTemplate*", $Unicode;
144 my @Utf8Unpacked = ();
148 # exit the loop if no more in the UnicodeUnpacked
149 if ($UnicodeIndex > $#UnicodeUnpacked)
154 my $currentUnicodeCharacter=$UnicodeUnpacked[$UnicodeIndex];
155 if (($currentUnicodeCharacter&0xff80)==0x0000)
157 $Utf8Unpacked[$Utf8Index]= $currentUnicodeCharacter;
159 elsif (($currentUnicodeCharacter&0xf800)==0x0000)
162 $Utf8Unpacked[$Utf8Index]= (0xc0 | $currentUnicodeCharacter >> 6);
164 $Utf8Unpacked[$Utf8Index]= (0x80 | $currentUnicodeCharacter&0x3f);
166 elsif (($currentUnicodeCharacter&0xfc00)==0xd800)
168 $currentUnicodeCharacter+=0x0040;
169 $Utf8Unpacked[$Utf8Index]= (0xf0|(($currentUnicodeCharacter>>8)&0x07));
171 $Utf8Unpacked[$Utf8Index]= (0x80|(($currentUnicodeCharacter>>2)&0x3f));
172 my $currentUtf8Byte=(0x80|(($currentUnicodeCharacter&0x03)<<4));
174 $currentUnicodeCharacter=$UnicodeUnpacked[$UnicodeIndex];
175 if (($currentUnicodeCharacter&0xfc00)!=0xdc00)
177 return $KErrorIllFormedInput;
179 $currentUtf8Byte|=(($currentUnicodeCharacter>>6)&0x0f);
181 $Utf8Unpacked[$Utf8Index]= $currentUtf8Byte;
183 $Utf8Unpacked[$Utf8Index]= (0x80| ($currentUnicodeCharacter&0x3f));
187 $Utf8Unpacked[$Utf8Index]= (0xe0|($currentUnicodeCharacter>>12));
189 $Utf8Unpacked[$Utf8Index]= (0x80|(($currentUnicodeCharacter>>6)&0x3f));
191 $Utf8Unpacked[$Utf8Index]= (0x80| ($currentUnicodeCharacter&0x3f));
197 $$Utf8 = pack "C*", @Utf8Unpacked;