os/textandloc/charconvfw/fatfilenameconversionplugins/group/cp54936_4byte_tounicode.pl
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
#
sl@0
     2
# Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
# All rights reserved.
sl@0
     4
# This component and the accompanying materials are made available
sl@0
     5
# under the terms of "Eclipse Public License v1.0"
sl@0
     6
# which accompanies this distribution, and is available
sl@0
     7
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
#
sl@0
     9
# Initial Contributors:
sl@0
    10
# Nokia Corporation - initial contribution.
sl@0
    11
#
sl@0
    12
# Contributors:
sl@0
    13
#
sl@0
    14
# Description: 
sl@0
    15
# See line 111 of this file.
sl@0
    16
#
sl@0
    17
sl@0
    18
if (@ARGV != 1 && @ARGV != 2)
sl@0
    19
	{
sl@0
    20
	print <<EOD;
sl@0
    21
Usage: perl -w cp54936_4byte.pl cp54936_4byte.txt
sl@0
    22
EOD
sl@0
    23
	exit(1);
sl@0
    24
	}
sl@0
    25
sl@0
    26
# Removes the extenstion from the filename
sl@0
    27
$ARGV[0] =~ m/(.*)\..*/;
sl@0
    28
my $root = $1;
sl@0
    29
$root =~ m/.*[\\\/]([^\\\/]*)$/;
sl@0
    30
my $header_to_include = $1;
sl@0
    31
sl@0
    32
if (@ARGV == 2)
sl@0
    33
	{
sl@0
    34
	$ARGV[1] =~ m/(.*)\..*/;
sl@0
    35
	$root = $1;
sl@0
    36
	}
sl@0
    37
sl@0
    38
open (IN, "<$ARGV[0]") or die ("Error: $ARGV[0] $!");
sl@0
    39
sl@0
    40
my $lineNumber = 0;
sl@0
    41
my $acceptLineNumber = 0;
sl@0
    42
my %lines;		# hash table of all characters in format with key=foreign(string) and value=unicode(string)
sl@0
    43
while (!eof(IN))
sl@0
    44
	{
sl@0
    45
	my $line = <IN>;
sl@0
    46
	$lineNumber++;
sl@0
    47
	if ($line =~ /^(0[xX]8[1-4]3\d[\da-fA-F]{2}3\d)\s*(0[xX][\da-fA-F]{4}).*/)
sl@0
    48
		{
sl@0
    49
		# read a line like "0x81318133	0x060D"
sl@0
    50
		$acceptLineNumber++;
sl@0
    51
		my $foreign = $1;
sl@0
    52
		my $unicode = $2;
sl@0
    53
		$lines{$foreign} = $unicode;
sl@0
    54
		}
sl@0
    55
	else
sl@0
    56
		{
sl@0
    57
		#print "Ignore line: $line";
sl@0
    58
		}
sl@0
    59
	}
sl@0
    60
close IN;
sl@0
    61
print "Read $ARGV[0] done.\n";
sl@0
    62
print "$acceptLineNumber of $lineNumber lines accepted.\n";
sl@0
    63
sl@0
    64
sl@0
    65
# increase input cp54936 code by 1
sl@0
    66
# param is a string like "0x81308439"
sl@0
    67
# return a string like "0x81308530"
sl@0
    68
sub IncreaseCP54936Code
sl@0
    69
	{
sl@0
    70
	my ($increaseme) = @_;
sl@0
    71
	$increaseme =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
sl@0
    72
	($b1, $b2, $b3, $b4) = (hex($1), hex($2), hex($3), hex($4));
sl@0
    73
	$b4++;
sl@0
    74
	if ($b4 == 0x3A)
sl@0
    75
		{
sl@0
    76
		$b4 = 0x30;
sl@0
    77
		$b3++;
sl@0
    78
		if ($b3 == 0xFF)
sl@0
    79
			{
sl@0
    80
			$b3 = 0x81;
sl@0
    81
			$b2++;
sl@0
    82
			if ($b2 == 0x3A)
sl@0
    83
				{
sl@0
    84
				$b2 = 0x30;
sl@0
    85
				$b1++;
sl@0
    86
				}
sl@0
    87
			}
sl@0
    88
		}
sl@0
    89
	return sprintf("0x%02X%02X%02X%02X", $b1, $b2, $b3, $b4);
sl@0
    90
	}
sl@0
    91
sl@0
    92
# return the offset from 0x81308130 to input "0x8234A235"
sl@0
    93
sub OffsetOfCP54936Code
sl@0
    94
	{
sl@0
    95
	$_[0] =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
sl@0
    96
	($b1, $b2, $b3, $b4) = (hex($1), hex($2), hex($3), hex($4));
sl@0
    97
	return ($b1-0x81)*12600 + ($b2-0x30)*1260 + ($b3-0x81)*10 + ($b4-0x30);
sl@0
    98
	}
sl@0
    99
sl@0
   100
# return the last byte of input "0x8234A235"
sl@0
   101
sub Byte4OfCP54936Code
sl@0
   102
	{
sl@0
   103
	$_[0] =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
sl@0
   104
	return hex($4);
sl@0
   105
	}
sl@0
   106
sl@0
   107
sl@0
   108
print "Write to $root.cpp...\n";
sl@0
   109
open (CPP, ">$root.cpp") or die ("Error: $ARGV[0] Can't open cpp file");
sl@0
   110
sl@0
   111
print CPP <<EOD;
sl@0
   112
// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
   113
// All rights reserved.
sl@0
   114
// This component and the accompanying materials are made available
sl@0
   115
// under the terms of the License "Eclipse Public License v1.0"
sl@0
   116
// which accompanies this distribution, and is available
sl@0
   117
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
   118
//
sl@0
   119
// Initial Contributors:
sl@0
   120
// Nokia Corporation - initial contribution.
sl@0
   121
//
sl@0
   122
// Contributors:
sl@0
   123
//
sl@0
   124
// Description:
sl@0
   125
//
sl@0
   126
// Auto-generated by the cp54936_4byte_tounicode.pl tool - Do not edit!!!
sl@0
   127
//
sl@0
   128
sl@0
   129
#include <e32std.h>
sl@0
   130
#include <e32def.h>
sl@0
   131
#include "cp54936.h"
sl@0
   132
sl@0
   133
sl@0
   134
// mapping table of: CP54936 4-byte in-BMP ---> Unicode
sl@0
   135
// To calculate index: index=(b1-144)*12600+(b2-48)*1260+(b3-129)*10+(b4-48), in which,
sl@0
   136
// b1,b2,b3,b4 is byte1,2,3,4 of CP54936 code.
sl@0
   137
// For example, CP54936 code 0x8232EA38, the index=(0x82-144)*12600+(0x32-48)*1260+(0xEA-129)*10+(0x38-48)=16178
sl@0
   138
// So we get the Unicode 0x42AB.
sl@0
   139
// Generated with: \"perl -w ..\\group\\cp54936_4byte_tounicode.pl cp54936_4byte.txt cp54936_4byte_tounicode.cpp\".
sl@0
   140
sl@0
   141
EOD
sl@0
   142
sl@0
   143
my $bytecount = 0;
sl@0
   144
my $expect = "0x81308130";
sl@0
   145
my $last = "0x8431A439";
sl@0
   146
my $totalCount = OffsetOfCP54936Code($last) + 1;
sl@0
   147
sl@0
   148
sl@0
   149
print CPP "const TUint16 KMappingTable4ByteBmp2Unicode[$totalCount] =\n\t{\n\t";
sl@0
   150
sl@0
   151
my $outIndex = 0;	# to wrap every 10 items
sl@0
   152
while (OffsetOfCP54936Code($expect) <= OffsetOfCP54936Code($last))
sl@0
   153
	{
sl@0
   154
	if (!exists($lines{$expect}))
sl@0
   155
		{
sl@0
   156
		print CPP "0xFFFD, ";
sl@0
   157
		}
sl@0
   158
	else
sl@0
   159
		{
sl@0
   160
		print CPP "$lines{$expect}, ";
sl@0
   161
		}
sl@0
   162
	$bytecount += 2;
sl@0
   163
	$outIndex++;
sl@0
   164
	if ($outIndex % 10 == 0)
sl@0
   165
		{
sl@0
   166
		print CPP "\t// $expect\n\t";
sl@0
   167
		}
sl@0
   168
	# to next foreign
sl@0
   169
	$expect = IncreaseCP54936Code($expect);
sl@0
   170
	}
sl@0
   171
sl@0
   172
print CPP "};\n";
sl@0
   173
print CPP "// total byte count = $bytecount\n";
sl@0
   174
print "\nTotal byte count: $bytecount.\n";
sl@0
   175
close CPP;
sl@0
   176
print "Done.\n";