os/textandloc/charconvfw/charconvplugins/tools/UTF.PM
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/charconvfw/charconvplugins/tools/UTF.PM	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,200 @@
     1.4 +#
     1.5 +# Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +# All rights reserved.
     1.7 +# This component and the accompanying materials are made available
     1.8 +# under the terms of "Eclipse Public License v1.0"
     1.9 +# which accompanies this distribution, and is available
    1.10 +# at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +#
    1.12 +# Initial Contributors:
    1.13 +# Nokia Corporation - initial contribution.
    1.14 +#
    1.15 +# Contributors:
    1.16 +#
    1.17 +# Description: 
    1.18 +#
    1.19 +
    1.20 +use strict;
    1.21 +use integer;
    1.22 +
    1.23 +package UTF;
    1.24 +require Exporter;
    1.25 +@UTF::ISA=qw(Exporter);
    1.26 +@UTF::EXPORT=qw(Utf8ToUnicode UnicodeToUtf8);
    1.27 +
    1.28 +my $KErrorIllFormedInput=-1;
    1.29 +
    1.30 +sub Utf8ToUnicode
    1.31 +	{
    1.32 +	my $Unicode = shift;  
    1.33 +	my $Utf8 = shift;
    1.34 +	my $UnicodeTemplate = shift;
    1.35 +	my $Utf8Index = 0;
    1.36 +	my $UnicodeIndex = 0;
    1.37 +	my $numOfBytes = length($Utf8);
    1.38 +	my @Utf8Unpacked = unpack "C*",$Utf8;
    1.39 +	my @UnicodeUnpacked = (); 
    1.40 +
    1.41 +	for (;;)
    1.42 +		{
    1.43 +		if ($Utf8Index > $#Utf8Unpacked)
    1.44 +			{
    1.45 +			last;
    1.46 +			}
    1.47 +
    1.48 +		my $currentUtf8Byte = $Utf8Unpacked[$Utf8Index];
    1.49 +		
    1.50 +		if (($currentUtf8Byte&0x80)==0x00)
    1.51 +			{
    1.52 +			$UnicodeUnpacked[$UnicodeIndex] = $currentUtf8Byte;
    1.53 +			}
    1.54 +		
    1.55 +		elsif (($currentUtf8Byte&0xe0)==0xc0)
    1.56 +			{
    1.57 +			my $currentUnicodeCharacter=(($currentUtf8Byte&0x1f)<<6);
    1.58 +			++$Utf8Index;
    1.59 +			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
    1.60 +			if (($currentUtf8Byte&0xc0)!=0x80)
    1.61 +				{
    1.62 +				return $KErrorIllFormedInput;
    1.63 +				}
    1.64 +			$currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
    1.65 +			$UnicodeUnpacked[$UnicodeIndex] = $currentUnicodeCharacter;
    1.66 +			}
    1.67 +
    1.68 +		elsif (($currentUtf8Byte&0xf0)==0xe0)
    1.69 +			{
    1.70 +			my $currentUnicodeCharacter=(($currentUtf8Byte&0x0f)<<12);
    1.71 +			++$Utf8Index;
    1.72 +			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
    1.73 +			if (($currentUtf8Byte&0xc0)!=0x80)
    1.74 +				{
    1.75 +				return $KErrorIllFormedInput;
    1.76 +				}
    1.77 +			$currentUnicodeCharacter|=(($currentUtf8Byte&0x3f)<<6);
    1.78 +			++$Utf8Index;
    1.79 +			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
    1.80 +			if (($currentUtf8Byte&0xc0)!=0x80)
    1.81 +				{
    1.82 +				return $KErrorIllFormedInput;
    1.83 +				}
    1.84 +			$currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
    1.85 +			$UnicodeUnpacked[$UnicodeIndex] = $currentUnicodeCharacter;
    1.86 +			}
    1.87 +
    1.88 +		elsif (($currentUtf8Byte&0xf8)==0xf0)
    1.89 +			{                                         
    1.90 +			my $currentUnicodeCharacter=(($currentUtf8Byte&0x07)<<8);
    1.91 +			++$Utf8Index;
    1.92 +			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
    1.93 +			if (($currentUtf8Byte&0xc0)!=0x80)
    1.94 +				{
    1.95 +				return $KErrorIllFormedInput;
    1.96 +				}
    1.97 +			$currentUnicodeCharacter|=(($currentUtf8Byte&0x3f)<<2);
    1.98 +			if ($currentUnicodeCharacter<0x0040)
    1.99 +				{
   1.100 +				return $KErrorIllFormedInput;
   1.101 +				}
   1.102 +			$currentUnicodeCharacter-=0x0040;
   1.103 +			if ($currentUnicodeCharacter>=0x0400)
   1.104 +				{
   1.105 +				return $KErrorIllFormedInput;
   1.106 +				}
   1.107 +			++$Utf8Index;
   1.108 +			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
   1.109 +			if (($currentUtf8Byte&0xc0)!=0x80)
   1.110 +				{
   1.111 +				return $KErrorIllFormedInput;
   1.112 +				}
   1.113 +			$currentUnicodeCharacter|=(($currentUtf8Byte&0x30)>>4);
   1.114 +			$UnicodeUnpacked[$UnicodeIndex] = (0xd800|$currentUnicodeCharacter);
   1.115 +			$currentUnicodeCharacter=(($currentUtf8Byte&0x0f)<<6);
   1.116 +			++$Utf8Index;
   1.117 +			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
   1.118 +			if (($currentUtf8Byte&0xc0)!=0x80)
   1.119 +				{
   1.120 +				return $KErrorIllFormedInput;
   1.121 +				}
   1.122 +			$currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
   1.123 +			++$UnicodeIndex;
   1.124 +			$UnicodeUnpacked[$UnicodeIndex] = (0xdc00|$currentUnicodeCharacter);
   1.125 +			}
   1.126 +		else
   1.127 +			{
   1.128 +			return $KErrorIllFormedInput;
   1.129 +			}
   1.130 +		++$UnicodeIndex;
   1.131 +		++$Utf8Index;
   1.132 +		}
   1.133 +	$$Unicode = (); 
   1.134 +	$$Unicode = pack "$UnicodeTemplate*", @UnicodeUnpacked;
   1.135 +	return $UnicodeIndex;  
   1.136 +	}
   1.137 +
   1.138 +sub UnicodeToUtf8
   1.139 +	{
   1.140 +	my $Utf8 = shift; 
   1.141 +	my $Unicode = shift;
   1.142 +	my $UnicodeTemplate = shift;
   1.143 +	my $Utf8Index = 0;
   1.144 +	my $UnicodeIndex = 0;
   1.145 +	my $numOfBytes = length($Unicode);
   1.146 +	my @UnicodeUnpacked = unpack "$UnicodeTemplate*", $Unicode;
   1.147 +	my @Utf8Unpacked = ();
   1.148 +	
   1.149 +	for (;;)
   1.150 +		{
   1.151 +		# exit the loop if no more in the UnicodeUnpacked
   1.152 +		if ($UnicodeIndex > $#UnicodeUnpacked)
   1.153 +			{
   1.154 +			last;
   1.155 +			}
   1.156 +
   1.157 +		my $currentUnicodeCharacter=$UnicodeUnpacked[$UnicodeIndex];
   1.158 +		if (($currentUnicodeCharacter&0xff80)==0x0000)
   1.159 +			{	
   1.160 +			$Utf8Unpacked[$Utf8Index]= $currentUnicodeCharacter;
   1.161 +			}
   1.162 +		elsif (($currentUnicodeCharacter&0xf800)==0x0000)
   1.163 +			{
   1.164 +
   1.165 +			$Utf8Unpacked[$Utf8Index]= (0xc0 | $currentUnicodeCharacter >> 6);
   1.166 +			++$Utf8Index;
   1.167 +			$Utf8Unpacked[$Utf8Index]= (0x80 | $currentUnicodeCharacter&0x3f);
   1.168 +			}
   1.169 +		elsif (($currentUnicodeCharacter&0xfc00)==0xd800)
   1.170 +			{
   1.171 +			$currentUnicodeCharacter+=0x0040;
   1.172 +			$Utf8Unpacked[$Utf8Index]= (0xf0|(($currentUnicodeCharacter>>8)&0x07));
   1.173 +			++$Utf8Index;
   1.174 +			$Utf8Unpacked[$Utf8Index]= (0x80|(($currentUnicodeCharacter>>2)&0x3f));
   1.175 +			my $currentUtf8Byte=(0x80|(($currentUnicodeCharacter&0x03)<<4));
   1.176 +			++$UnicodeIndex;
   1.177 +			$currentUnicodeCharacter=$UnicodeUnpacked[$UnicodeIndex];
   1.178 +			if (($currentUnicodeCharacter&0xfc00)!=0xdc00)
   1.179 +				{
   1.180 +				return $KErrorIllFormedInput;
   1.181 +				}
   1.182 +			$currentUtf8Byte|=(($currentUnicodeCharacter>>6)&0x0f);
   1.183 +			++$Utf8Index;
   1.184 +			$Utf8Unpacked[$Utf8Index]= $currentUtf8Byte;
   1.185 +			++$Utf8Index;
   1.186 +			$Utf8Unpacked[$Utf8Index]= (0x80| ($currentUnicodeCharacter&0x3f));
   1.187 +			}
   1.188 +		else
   1.189 +			{
   1.190 +			$Utf8Unpacked[$Utf8Index]= (0xe0|($currentUnicodeCharacter>>12));
   1.191 +			++$Utf8Index;
   1.192 +			$Utf8Unpacked[$Utf8Index]= (0x80|(($currentUnicodeCharacter>>6)&0x3f));
   1.193 +			++$Utf8Index;
   1.194 +			$Utf8Unpacked[$Utf8Index]= (0x80| ($currentUnicodeCharacter&0x3f));
   1.195 +			}
   1.196 +		++$Utf8Index;
   1.197 +		++$UnicodeIndex;
   1.198 +		}
   1.199 +	$$Utf8 = ();	
   1.200 +	$$Utf8 = pack "C*", @Utf8Unpacked;
   1.201 +	return $Utf8Index; 
   1.202 +
   1.203 +	}