os/textandloc/charconvfw/charconvplugins/tools/charconv.pl
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
#
sl@0
     2
# Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
# All rights reserved.
sl@0
     4
# This component and the accompanying materials are made available
sl@0
     5
# under the terms of "Eclipse Public License v1.0"
sl@0
     6
# which accompanies this distribution, and is available
sl@0
     7
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
#
sl@0
     9
# Initial Contributors:
sl@0
    10
# Nokia Corporation - initial contribution.
sl@0
    11
#
sl@0
    12
# Contributors:
sl@0
    13
#
sl@0
    14
# Description: 
sl@0
    15
#
sl@0
    16
sl@0
    17
use strict;
sl@0
    18
use integer;
sl@0
    19
sl@0
    20
sub PerlScriptPath
sl@0
    21
	{
sl@0
    22
	my $perlScriptPath=$0;
sl@0
    23
	my $os = $^O; #get the OS type
sl@0
    24
	#check OS type
sl@0
    25
  if($os=~/MSWin32/) #Windows OS
sl@0
    26
    {
sl@0
    27
    $perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes
sl@0
    28
    $perlScriptPath=~s/(\\?)[^\\]+$/$1/; # get rid of this Perl-script's file-name
sl@0
    29
    }
sl@0
    30
  else #Unix OS
sl@0
    31
    {
sl@0
    32
    $perlScriptPath=~s/\\/\//g; # replace any back-slashes with forward-slashes
sl@0
    33
    $perlScriptPath=~s/(\/?)[^\/]+$/$1/; # get rid of this Perl-script's file-name
sl@0
    34
    }
sl@0
    35
	return $perlScriptPath;
sl@0
    36
	}
sl@0
    37
BEGIN
sl@0
    38
	{
sl@0
    39
	unshift(@INC, &PerlScriptPath()); # can't do "use lib &PerlScriptPath()" here as "use lib" only seems to work with *hard-coded* directory names
sl@0
    40
	}
sl@0
    41
use PARSER;
sl@0
    42
use UTF;
sl@0
    43
sl@0
    44
# The following numbers are used for byte-orders:
sl@0
    45
#     0 means unspecified
sl@0
    46
#     1 means big-endian
sl@0
    47
#     2 means little-endian
sl@0
    48
sl@0
    49
FixParametersToWorkWithWindows98(\@ARGV);
sl@0
    50
my $versionNumber = 3;
sl@0
    51
my $outputByteOrderMark = 0;
sl@0
    52
my $unicodeByteOrder = 0;
sl@0
    53
my $inputEncoding = ""; 
sl@0
    54
my $outputEncoding = ""; 
sl@0
    55
my %foreignCharacters = (); # Hash with the foreign Character code as the value, unicode as key 
sl@0
    56
my %unicodeCharacters = (); # Hash with the Unicode Character code as the value, foreign as key 
sl@0
    57
sl@0
    58
sl@0
    59
my $inputFile=\*STDIN;
sl@0
    60
my $outputFile=\*STDOUT;
sl@0
    61
ReadParameters(\@ARGV,\$outputByteOrderMark,\$unicodeByteOrder,\$inputEncoding,\$outputEncoding,\$inputFile,\$outputFile);
sl@0
    62
HandleByteOrderMarks($outputByteOrderMark,\$unicodeByteOrder, \$inputEncoding,\$outputEncoding, $inputFile, $outputFile);
sl@0
    63
DoConversion(\$unicodeByteOrder, \$inputEncoding, \$outputEncoding, $inputFile, $outputFile, \%foreignCharacters, \%unicodeCharacters);
sl@0
    64
if ($inputFile!=\*STDIN)
sl@0
    65
	{
sl@0
    66
	close($inputFile) or die;
sl@0
    67
	}
sl@0
    68
if ($outputFile!=\*STDOUT)
sl@0
    69
	{
sl@0
    70
	close($outputFile) or die;
sl@0
    71
	}
sl@0
    72
sl@0
    73
sub FixParametersToWorkWithWindows98
sl@0
    74
	{
sl@0
    75
	my $parameters=shift;
sl@0
    76
	my $i;
sl@0
    77
	for ($i=@$parameters-2; $i>=0; --$i) # iterate backwards as some parameters may be deleted from @$parameters
sl@0
    78
		{
sl@0
    79
		if (($parameters->[$i]=~/^(-input)$/i) ||
sl@0
    80
			($parameters->[$i]=~/^(-output)$/i))
sl@0
    81
			{
sl@0
    82
			$parameters->[$i].='='.$parameters->[$i+1];
sl@0
    83
			splice(@$parameters, $i+1, 1);
sl@0
    84
			}
sl@0
    85
		}
sl@0
    86
	}
sl@0
    87
sl@0
    88
sub PrintUsage 
sl@0
    89
	{
sl@0
    90
	print "\nVersion $versionNumber\n\nCharacter set conversion tool\nCopyright (c) 1999 Symbian Ltd\n\n";
sl@0
    91
	print "Usage:\n\n\t charconv [<options>] <inputspec> <outputspec>\n\nwhere\n\n\t";
sl@0
    92
	print "options    :=  [-big|-little][-byteordermark]\n\t";
sl@0
    93
	print "inputspec  :=  -input=<format> [<input_file>]\n\t";
sl@0
    94
	print "outputspec :=  -output=<format> [<output_file>]\n\t";
sl@0
    95
	print "format     :=  unicode|utf8|big5|gb2312...\n\n";
sl@0
    96
	}
sl@0
    97
sl@0
    98
sub Assert
sl@0
    99
	{
sl@0
   100
	my $condition = shift;
sl@0
   101
	my $errorMessage = shift;
sl@0
   102
	if (!($condition)) # find out where this is used and work this out 
sl@0
   103
		{
sl@0
   104
		die("Error: $errorMessage");
sl@0
   105
		}
sl@0
   106
	}
sl@0
   107
sl@0
   108
sub PrintWarning 
sl@0
   109
	{
sl@0
   110
	my $warningMessage = shift;
sl@0
   111
	print STDERR "Warning: $warningMessage\n";
sl@0
   112
	}
sl@0
   113
	
sl@0
   114
sl@0
   115
sub TryFileParameter 
sl@0
   116
	{
sl@0
   117
	my $args = shift;
sl@0
   118
	my $argindex = shift;
sl@0
   119
	my $inputoroutput = shift;
sl@0
   120
	my $encoding = shift;
sl@0
   121
	my $filehandle = shift;
sl@0
   122
	my $prefix = "-$inputoroutput=";
sl@0
   123
sl@0
   124
	if ($args->[$$argindex] =~ /^$prefix(.*)/)
sl@0
   125
		{
sl@0
   126
		Assert($$encoding eq "", "\"$prefix...\" is specified more than once");
sl@0
   127
		$$encoding = $1;
sl@0
   128
		++$$argindex;
sl@0
   129
		if (($$argindex >= @$args) || ($args->[$$argindex] =~ /^-/))
sl@0
   130
			{
sl@0
   131
			--$$argindex;
sl@0
   132
			}
sl@0
   133
		else
sl@0
   134
			{
sl@0
   135
			if ($inputoroutput =~ /input/i)
sl@0
   136
				{
sl@0
   137
				open(INPUT_FILE,"<$args->[$$argindex]") or die "opening $inputoroutput-file failed $!";
sl@0
   138
				$$filehandle=\*INPUT_FILE;
sl@0
   139
				}
sl@0
   140
			else
sl@0
   141
				{
sl@0
   142
				open(OUTPUT_FILE,">$args->[$$argindex]") or die "opening $inputoroutput-file failed $!";
sl@0
   143
				$$filehandle=\*OUTPUT_FILE;
sl@0
   144
				}
sl@0
   145
			}
sl@0
   146
		binmode $$filehandle;
sl@0
   147
		return  1;
sl@0
   148
		}
sl@0
   149
	return 0; 
sl@0
   150
	}
sl@0
   151
sl@0
   152
sub ReadParameters 
sl@0
   153
	{
sl@0
   154
	my $args = shift; 
sl@0
   155
	my $outputbyteordermark = shift;
sl@0
   156
	my $unicodebyteorder = shift;
sl@0
   157
	my $inputencoding = shift;
sl@0
   158
	my $outputencoding = shift;
sl@0
   159
	my $inputhandle = shift;
sl@0
   160
	my $outputhandle = shift;
sl@0
   161
	my $i;
sl@0
   162
	my $range;
sl@0
   163
	if ((@$args <= 0) || ($args->[0] eq "?") || ($args->[0] eq "/?"))
sl@0
   164
		{
sl@0
   165
		PrintUsage();
sl@0
   166
		exit;
sl@0
   167
		}
sl@0
   168
sl@0
   169
	for ($i = 0; $i < @$args ; ++$i)
sl@0
   170
		{
sl@0
   171
		if ( $args->[$i]=~ /-byteordermark/i)
sl@0
   172
			{
sl@0
   173
			Assert(!$$outputbyteordermark, "\"-byteordermark\" is specified more than once");
sl@0
   174
			$$outputbyteordermark = 1; 
sl@0
   175
			}
sl@0
   176
		elsif ($args->[$i]=~ /-big/i)
sl@0
   177
			{
sl@0
   178
			Assert(($$unicodebyteorder==0),"the byte order of unicode text (i.e. \"-big\"/\"-little\") is specified more than once");
sl@0
   179
			$$unicodebyteorder = 1;
sl@0
   180
			}
sl@0
   181
		elsif ($args->[$i]=~ /-little/i)
sl@0
   182
			{
sl@0
   183
			Assert(($$unicodebyteorder==0),"the byte order of unicode text (i.e. \"-big\"/\"-little\") is specified more than once");
sl@0
   184
			$$unicodebyteorder = 2;
sl@0
   185
			}
sl@0
   186
		else
sl@0
   187
			{ 
sl@0
   188
			Assert(TryFileParameter($args, \$i, "input",$inputencoding,$inputhandle) ||
sl@0
   189
				   TryFileParameter($args, \$i, "output",$outputencoding, $outputhandle), "bad parameter \"$args->[$i]\"");
sl@0
   190
			}
sl@0
   191
		}
sl@0
   192
	Assert($$inputencoding ne "", "no input encoding is specified");
sl@0
   193
	Assert($$outputencoding ne "", "no output encoding is specified");
sl@0
   194
	}
sl@0
   195
sl@0
   196
sub ReadFromFile
sl@0
   197
	{  
sl@0
   198
	my $buffer = shift;
sl@0
   199
	my $numOfBytesToRead = shift;
sl@0
   200
	my $inputhandle = shift;
sl@0
   201
	my $numOfBytesRead = 0;									
sl@0
   202
	my $numOfBytesToReadThisTime = $numOfBytesToRead;		
sl@0
   203
sl@0
   204
	for(;;)
sl@0
   205
		{
sl@0
   206
		for(;;)
sl@0
   207
			{
sl@0
   208
			my $remainingNumOfBytesToRead = $numOfBytesToRead - $numOfBytesRead;  
sl@0
   209
			if ($numOfBytesToReadThisTime > $remainingNumOfBytesToRead)
sl@0
   210
				{
sl@0
   211
				$numOfBytesToReadThisTime = $remainingNumOfBytesToRead;
sl@0
   212
				}  		
sl@0
   213
			my $numOfBytesReadThisTime = read $inputhandle, $$buffer, $numOfBytesToReadThisTime;  
sl@0
   214
			if (defined $numOfBytesReadThisTime)
sl@0
   215
				{
sl@0
   216
				$numOfBytesRead += $numOfBytesReadThisTime;
sl@0
   217
				Assert($numOfBytesRead <= $numOfBytesReadThisTime, "internal error (read too many bytes)");
sl@0
   218
				if (($numOfBytesRead >= $numOfBytesReadThisTime) || $numOfBytesReadThisTime == 0)
sl@0
   219
					{
sl@0
   220
					return;
sl@0
   221
					}
sl@0
   222
				last;
sl@0
   223
				}
sl@0
   224
			$numOfBytesToReadThisTime /= 2;
sl@0
   225
			Assert($numOfBytesToReadThisTime >0, "reading from file failed");
sl@0
   226
			}
sl@0
   227
		}
sl@0
   228
	}
sl@0
   229
sl@0
   230
sub HandleByteOrderMarks 
sl@0
   231
	{
sl@0
   232
	my $outputbyteordermark = shift;
sl@0
   233
	my $unicodebyteorder = shift;
sl@0
   234
	my $inputencoding = shift;
sl@0
   235
	my $outputencoding = shift;
sl@0
   236
	my $inputhandle = shift;
sl@0
   237
	my $outputhandle = shift;
sl@0
   238
sl@0
   239
	if ($$inputencoding =~ /unicode/i) 
sl@0
   240
		{
sl@0
   241
		my $firstUnicodeCharacter = 0; 
sl@0
   242
		ReadFromFile(\$firstUnicodeCharacter, 2, $inputhandle);
sl@0
   243
		my $byteOrderSpecifiedByByteOrderMark = 0;
sl@0
   244
		if (length($firstUnicodeCharacter) == 2)
sl@0
   245
			{
sl@0
   246
			my @firstUnicodeCharacter = unpack "C*", $firstUnicodeCharacter;
sl@0
   247
			if (($firstUnicodeCharacter[0]==0xff) && ($firstUnicodeCharacter[1]==0xfe))
sl@0
   248
				{
sl@0
   249
				$byteOrderSpecifiedByByteOrderMark = 2; 
sl@0
   250
				}
sl@0
   251
			elsif (($firstUnicodeCharacter[0]==0xfe) && ($firstUnicodeCharacter[1]==0xff))
sl@0
   252
				{
sl@0
   253
				$byteOrderSpecifiedByByteOrderMark = 1; 
sl@0
   254
				}
sl@0
   255
			else
sl@0
   256
				{
sl@0
   257
				my $error = seek $inputhandle, 0, 0; # rewind to start of file
sl@0
   258
				Assert ($error == 1, "could not rewind to the start of input file");
sl@0
   259
				}
sl@0
   260
			}
sl@0
   261
		if ($byteOrderSpecifiedByByteOrderMark!=0)
sl@0
   262
			{
sl@0
   263
			if (($$unicodebyteorder!=0) && ($byteOrderSpecifiedByByteOrderMark!=$$unicodebyteorder))
sl@0
   264
				{
sl@0
   265
				PrintWarning ("the byte order specified by the byte-order mark in the unicode input is different from the byte order specified by the parameter - taking the byte-order specified by the byte-order mark in the unicode input");
sl@0
   266
				}
sl@0
   267
			$$unicodebyteorder = $byteOrderSpecifiedByByteOrderMark;
sl@0
   268
			}
sl@0
   269
		}
sl@0
   270
	if ($outputbyteordermark)
sl@0
   271
		{
sl@0
   272
		if ($$outputencoding ne "unicode") 
sl@0
   273
			{
sl@0
   274
			PrintWarning("\"-byteordermark\" is only relevant for unicode output");
sl@0
   275
			}
sl@0
   276
		else
sl@0
   277
			{
sl@0
   278
			Assert($$unicodebyteorder!=0, "the byte order must be specified if a byte-order mark is to be added to the unicode output");
sl@0
   279
			my $firstUnicodeCharacter=($$unicodebyteorder==1)? "\xfe\xff": "\xff\xfe";
sl@0
   280
			WriteToFile(\$firstUnicodeCharacter, $outputhandle);
sl@0
   281
			}
sl@0
   282
		}
sl@0
   283
	}
sl@0
   284
sl@0
   285
sub WriteToFile 
sl@0
   286
	{
sl@0
   287
	my $buffer = shift;
sl@0
   288
	my $outputhandle = shift;
sl@0
   289
sl@0
   290
	print $outputhandle $$buffer;
sl@0
   291
	}
sl@0
   292
sl@0
   293
sub DoConversion
sl@0
   294
	{
sl@0
   295
	my $unicodebyteorder = shift;
sl@0
   296
	my $inputencoding = shift;
sl@0
   297
	my $outputencoding = shift;
sl@0
   298
	my $inputhandle = shift;
sl@0
   299
	my $outputhandle = shift; 
sl@0
   300
	my $foreignCharacters = shift;
sl@0
   301
	my $unicodeCharacters = shift;
sl@0
   302
	
sl@0
   303
	my $currentBuffer = 0;
sl@0
   304
	my @arrayOfBuffers = ('', '', '');
sl@0
   305
	my $largeNumber=1000000;
sl@0
   306
	ReadFromFile(\($arrayOfBuffers[$currentBuffer]), $largeNumber, $inputhandle);
sl@0
   307
	ReverseByteOrderIfUnicodeAndBigEndian($unicodebyteorder, $inputencoding, \($arrayOfBuffers[$currentBuffer]));
sl@0
   308
	if ($$inputencoding ne $$outputencoding) 
sl@0
   309
		{
sl@0
   310
		if ($$inputencoding !~ /^unicode$/i)
sl@0
   311
			{
sl@0
   312
			my $nextBuffer = $currentBuffer + 1;
sl@0
   313
			OtherToUnicode ($inputencoding, \($arrayOfBuffers[$nextBuffer]), ($arrayOfBuffers[$currentBuffer]), $foreignCharacters, $unicodeCharacters, 'v');
sl@0
   314
			$currentBuffer = $nextBuffer;
sl@0
   315
			}
sl@0
   316
		if ($$outputencoding !~ /^unicode$/i)
sl@0
   317
			{
sl@0
   318
			my $nextBuffer = $currentBuffer + 1;
sl@0
   319
			UnicodeToOther($outputencoding, \($arrayOfBuffers[$nextBuffer]), ($arrayOfBuffers[$currentBuffer]), $foreignCharacters, $unicodeCharacters, 'v');
sl@0
   320
			$currentBuffer = $nextBuffer;
sl@0
   321
			}
sl@0
   322
		}
sl@0
   323
	ReverseByteOrderIfUnicodeAndBigEndian($unicodebyteorder, $outputencoding, \($arrayOfBuffers[$currentBuffer]));
sl@0
   324
	WriteToFile(\($arrayOfBuffers[$currentBuffer]), $outputhandle);
sl@0
   325
	}
sl@0
   326
sl@0
   327
sub ReverseByteOrderIfUnicodeAndBigEndian
sl@0
   328
	{
sl@0
   329
	my $unicodebyteorder = shift;
sl@0
   330
	my $encoding = shift;
sl@0
   331
	my $buffer = shift;
sl@0
   332
	my $i;
sl@0
   333
sl@0
   334
	if ($$encoding =~ /^unicode$/i)
sl@0
   335
		{
sl@0
   336
		Assert(length($$buffer)%2==0, "internal error (bad number of bytes in unicode buffer)");
sl@0
   337
		if ($$unicodebyteorder==0)
sl@0
   338
			{
sl@0
   339
			PrintWarning("the byte order of unicode text is unspecified - defaulting to little-endian");
sl@0
   340
			$$unicodebyteorder = 2;
sl@0
   341
			}
sl@0
   342
		if ($$unicodebyteorder==1)
sl@0
   343
			{
sl@0
   344
			$$buffer=pack('v*', unpack('n*', $$buffer));
sl@0
   345
			}
sl@0
   346
		}
sl@0
   347
	}
sl@0
   348
sl@0
   349
sub FillInHashes
sl@0
   350
	{
sl@0
   351
	my $foreignCharacters = shift;
sl@0
   352
	my $unicodeCharacters = shift;
sl@0
   353
	my $encoding = shift; 
sl@0
   354
	my $replacementCharacter = shift;
sl@0
   355
	my $ranges = shift;
sl@0
   356
	my $bigEndian = shift;
sl@0
   357
sl@0
   358
	my $endianness = 0;
sl@0
   359
	my $replacenum = 0;
sl@0
   360
	my $rangenum = 0;
sl@0
   361
	my $fileread = 0;
sl@0
   362
	my $largenumber = 1000000;
sl@0
   363
sl@0
   364
	my $dataFile=&PerlScriptPath()."charconv\\".$$encoding.'.dat';
sl@0
   365
sl@0
   366
	my $line;
sl@0
   367
sl@0
   368
	if (-e $dataFile)
sl@0
   369
		{
sl@0
   370
		open (HASH_INPUT, "< $dataFile") or die ("Could not open file for reading");
sl@0
   371
		
sl@0
   372
		binmode HASH_INPUT;
sl@0
   373
		# reading the endianness
sl@0
   374
		$fileread = read HASH_INPUT, $endianness, 1;
sl@0
   375
		$endianness = unpack "C",$endianness;
sl@0
   376
		if ($endianness == 0)
sl@0
   377
			{
sl@0
   378
			# set the template to a default-> n for the eman time
sl@0
   379
			$$bigEndian = 0;
sl@0
   380
			}
sl@0
   381
		elsif ($endianness == 1)
sl@0
   382
			{
sl@0
   383
			$$bigEndian = 0;
sl@0
   384
			}
sl@0
   385
		elsif ($endianness == 2)
sl@0
   386
			{
sl@0
   387
			$$bigEndian = 1;
sl@0
   388
			}
sl@0
   389
		else
sl@0
   390
			{
sl@0
   391
			print "Illegal Endianness specified in the control files";
sl@0
   392
			}
sl@0
   393
		#reading the replacement characters
sl@0
   394
		$fileread = read HASH_INPUT, $replacenum,1;
sl@0
   395
		$replacenum= unpack "C",$replacenum;
sl@0
   396
		$fileread = read HASH_INPUT, $$replacementCharacter,$replacenum;
sl@0
   397
		# reading the ranges
sl@0
   398
		$fileread = read HASH_INPUT, $rangenum, 1;
sl@0
   399
		$rangenum = unpack "C",$rangenum;
sl@0
   400
		my $i; # loop variable 
sl@0
   401
		for ($i=0; $i < $rangenum; ++$i)
sl@0
   402
			{
sl@0
   403
			my $lowerrange = 0;
sl@0
   404
			my $upperrange = 0;
sl@0
   405
			my $followchar = 0;
sl@0
   406
sl@0
   407
			$fileread = read HASH_INPUT,$lowerrange,1;
sl@0
   408
			$lowerrange = unpack "C",$lowerrange;
sl@0
   409
			$fileread = read HASH_INPUT,$upperrange,1;
sl@0
   410
			$upperrange = unpack "C",$upperrange;
sl@0
   411
			$fileread = read HASH_INPUT,$followchar,1;
sl@0
   412
			$followchar = unpack "C",$followchar;
sl@0
   413
sl@0
   414
			push @$ranges,[$lowerrange,$upperrange,$followchar];
sl@0
   415
			}
sl@0
   416
		my $data = 0;
sl@0
   417
		my @unpackeddata = 0;
sl@0
   418
		$fileread = read HASH_INPUT, $data, $largenumber;
sl@0
   419
		@unpackeddata = unpack "v*",$data;
sl@0
   420
		for($i = 0; $i <= $#unpackeddata; $i= $i+2)
sl@0
   421
			{
sl@0
   422
			$unicodeCharacters->{$unpackeddata[$i]}=$unpackeddata[$i+1];
sl@0
   423
			$foreignCharacters->{$unpackeddata[$i+1]}=$unpackeddata[$i];
sl@0
   424
			}
sl@0
   425
		}
sl@0
   426
	else
sl@0
   427
		{
sl@0
   428
		die ("Encoding Format \"$$encoding\" not recognised");
sl@0
   429
		}
sl@0
   430
	}
sl@0
   431
sl@0
   432
sub OtherToUnicode
sl@0
   433
	{
sl@0
   434
	my $inputencoding = shift;
sl@0
   435
	my $unicode = shift;  
sl@0
   436
	my $other = shift; 
sl@0
   437
	my $foreignCharacters = shift;
sl@0
   438
	my $unicodeCharacters = shift;
sl@0
   439
	my $unicodetemplate = shift;
sl@0
   440
	my $replacementCharacter = 0;
sl@0
   441
	my $unicodeReplacementCharacter = pack($unicodetemplate, 0xfffd);
sl@0
   442
	my @ranges=(); 
sl@0
   443
 
sl@0
   444
	my $otherIndex= 0;
sl@0
   445
	my $numOfBytes = length($other);
sl@0
   446
	my $key = 0;
sl@0
   447
	my $inRange = 0;
sl@0
   448
	my $followByte = -1;
sl@0
   449
sl@0
   450
	if ($$inputencoding=~/^utf8$/i)
sl@0
   451
		{
sl@0
   452
		return &Utf8ToUnicode($unicode, $other, $unicodetemplate);
sl@0
   453
		}
sl@0
   454
	my $bigEndian;
sl@0
   455
	FillInHashes($foreignCharacters,$unicodeCharacters, $inputencoding, \$replacementCharacter,\@ranges,\$bigEndian);
sl@0
   456
	for (;;)
sl@0
   457
		{
sl@0
   458
		if ($otherIndex > $numOfBytes -1) 
sl@0
   459
			{
sl@0
   460
			last;
sl@0
   461
			}
sl@0
   462
		my $frontByte = (unpack("x$otherIndex".'C', $other))[0];
sl@0
   463
		# @ranges is an array of references. Each reference is a reference to an array
sl@0
   464
		for ($key = 0; $key <= $#ranges; ++$key)
sl@0
   465
			{
sl@0
   466
			my $arrayref = $ranges[$key];
sl@0
   467
			if (($frontByte >= $arrayref->[0]) && ($frontByte <= $arrayref->[1]))
sl@0
   468
				{
sl@0
   469
				$followByte = $arrayref->[2];
sl@0
   470
				$inRange = 1;
sl@0
   471
				}
sl@0
   472
			}
sl@0
   473
		Assert ($inRange != 0, "cannot figure out the Byte size of the character");
sl@0
   474
		my $tempByte = 0;
sl@0
   475
		for ($key = 0; $key<= $followByte; ++$key)
sl@0
   476
			{
sl@0
   477
			if ($bigEndian)
sl@0
   478
				{
sl@0
   479
				$tempByte = ($tempByte << 8) | (unpack("x$otherIndex".'C', $other))[0];
sl@0
   480
				}
sl@0
   481
			else
sl@0
   482
				{
sl@0
   483
				$tempByte = $tempByte | ((unpack("x$otherIndex".'C', $other))[0] << (8*$key));
sl@0
   484
				}
sl@0
   485
			$otherIndex++;	
sl@0
   486
			}
sl@0
   487
		if (exists $unicodeCharacters->{$tempByte})
sl@0
   488
			{
sl@0
   489
			$$unicode .= pack $unicodetemplate , $unicodeCharacters->{$tempByte};
sl@0
   490
			}
sl@0
   491
		else
sl@0
   492
			{
sl@0
   493
			$$unicode .= $unicodeReplacementCharacter;
sl@0
   494
			}
sl@0
   495
		}
sl@0
   496
	}
sl@0
   497
sl@0
   498
sub UnicodeToOther
sl@0
   499
	{
sl@0
   500
	my $outputencoding = shift;
sl@0
   501
	my $other = shift;
sl@0
   502
	my $unicode = shift;
sl@0
   503
	my $foreignCharacters = shift;
sl@0
   504
	my $unicodeCharacters = shift;
sl@0
   505
	my $unicodetemplate = shift;
sl@0
   506
	my $replacementCharacter = 0;
sl@0
   507
	my @ranges=(); 
sl@0
   508
sl@0
   509
	my $unicodeIndex= 0;
sl@0
   510
	my $numOfBytes = length($unicode);
sl@0
   511
	my @UnicodeUnpacked = ();
sl@0
   512
	my $key = 0;
sl@0
   513
sl@0
   514
	if ($$outputencoding=~/^utf8$/i)
sl@0
   515
		{
sl@0
   516
		return &UnicodeToUtf8($other, $unicode, $unicodetemplate);
sl@0
   517
		}
sl@0
   518
	my $bigEndian;
sl@0
   519
	FillInHashes($foreignCharacters,$unicodeCharacters, $outputencoding, \$replacementCharacter,\@ranges,\$bigEndian);
sl@0
   520
	my $foreignTemplate=$bigEndian? 'n': 'v';
sl@0
   521
	@UnicodeUnpacked = unpack "$unicodetemplate*", $unicode;
sl@0
   522
	foreach $key (@UnicodeUnpacked)
sl@0
   523
		{
sl@0
   524
		if (!exists($foreignCharacters->{$key}))
sl@0
   525
			{
sl@0
   526
			$$other .= $replacementCharacter;
sl@0
   527
			}
sl@0
   528
		else
sl@0
   529
			{
sl@0
   530
			# This is the WRONG but it will work for the mean time
sl@0
   531
			# This will fail if the foreignCharacter has characters that are more than
sl@0
   532
			# two bytes long ..... But this should work for foreign characters of 1 or 2 Bytes
sl@0
   533
sl@0
   534
			my $foreignValue = $foreignCharacters->{$key};
sl@0
   535
			if ( $foreignValue <= 255)
sl@0
   536
				{
sl@0
   537
				$$other .= pack "C" , $foreignValue;
sl@0
   538
				}
sl@0
   539
			else
sl@0
   540
				{
sl@0
   541
				$$other .= pack $foreignTemplate, $foreignValue;
sl@0
   542
				}
sl@0
   543
			}
sl@0
   544
		}
sl@0
   545
	}
sl@0
   546