os/textandloc/charconvfw/fatfilenameconversionplugins/group/cp54936_4byte_tounicode.pl
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/fatfilenameconversionplugins/group/cp54936_4byte_tounicode.pl Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,176 @@
1.4 +#
1.5 +# Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +# All rights reserved.
1.7 +# This component and the accompanying materials are made available
1.8 +# under the terms of "Eclipse Public License v1.0"
1.9 +# which accompanies this distribution, and is available
1.10 +# at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +#
1.12 +# Initial Contributors:
1.13 +# Nokia Corporation - initial contribution.
1.14 +#
1.15 +# Contributors:
1.16 +#
1.17 +# Description:
1.18 +# See line 111 of this file.
1.19 +#
1.20 +
1.21 +if (@ARGV != 1 && @ARGV != 2)
1.22 + {
1.23 + print <<EOD;
1.24 +Usage: perl -w cp54936_4byte.pl cp54936_4byte.txt
1.25 +EOD
1.26 + exit(1);
1.27 + }
1.28 +
1.29 +# Removes the extenstion from the filename
1.30 +$ARGV[0] =~ m/(.*)\..*/;
1.31 +my $root = $1;
1.32 +$root =~ m/.*[\\\/]([^\\\/]*)$/;
1.33 +my $header_to_include = $1;
1.34 +
1.35 +if (@ARGV == 2)
1.36 + {
1.37 + $ARGV[1] =~ m/(.*)\..*/;
1.38 + $root = $1;
1.39 + }
1.40 +
1.41 +open (IN, "<$ARGV[0]") or die ("Error: $ARGV[0] $!");
1.42 +
1.43 +my $lineNumber = 0;
1.44 +my $acceptLineNumber = 0;
1.45 +my %lines; # hash table of all characters in format with key=foreign(string) and value=unicode(string)
1.46 +while (!eof(IN))
1.47 + {
1.48 + my $line = <IN>;
1.49 + $lineNumber++;
1.50 + if ($line =~ /^(0[xX]8[1-4]3\d[\da-fA-F]{2}3\d)\s*(0[xX][\da-fA-F]{4}).*/)
1.51 + {
1.52 + # read a line like "0x81318133 0x060D"
1.53 + $acceptLineNumber++;
1.54 + my $foreign = $1;
1.55 + my $unicode = $2;
1.56 + $lines{$foreign} = $unicode;
1.57 + }
1.58 + else
1.59 + {
1.60 + #print "Ignore line: $line";
1.61 + }
1.62 + }
1.63 +close IN;
1.64 +print "Read $ARGV[0] done.\n";
1.65 +print "$acceptLineNumber of $lineNumber lines accepted.\n";
1.66 +
1.67 +
1.68 +# increase input cp54936 code by 1
1.69 +# param is a string like "0x81308439"
1.70 +# return a string like "0x81308530"
1.71 +sub IncreaseCP54936Code
1.72 + {
1.73 + my ($increaseme) = @_;
1.74 + $increaseme =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
1.75 + ($b1, $b2, $b3, $b4) = (hex($1), hex($2), hex($3), hex($4));
1.76 + $b4++;
1.77 + if ($b4 == 0x3A)
1.78 + {
1.79 + $b4 = 0x30;
1.80 + $b3++;
1.81 + if ($b3 == 0xFF)
1.82 + {
1.83 + $b3 = 0x81;
1.84 + $b2++;
1.85 + if ($b2 == 0x3A)
1.86 + {
1.87 + $b2 = 0x30;
1.88 + $b1++;
1.89 + }
1.90 + }
1.91 + }
1.92 + return sprintf("0x%02X%02X%02X%02X", $b1, $b2, $b3, $b4);
1.93 + }
1.94 +
1.95 +# return the offset from 0x81308130 to input "0x8234A235"
1.96 +sub OffsetOfCP54936Code
1.97 + {
1.98 + $_[0] =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
1.99 + ($b1, $b2, $b3, $b4) = (hex($1), hex($2), hex($3), hex($4));
1.100 + return ($b1-0x81)*12600 + ($b2-0x30)*1260 + ($b3-0x81)*10 + ($b4-0x30);
1.101 + }
1.102 +
1.103 +# return the last byte of input "0x8234A235"
1.104 +sub Byte4OfCP54936Code
1.105 + {
1.106 + $_[0] =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
1.107 + return hex($4);
1.108 + }
1.109 +
1.110 +
1.111 +print "Write to $root.cpp...\n";
1.112 +open (CPP, ">$root.cpp") or die ("Error: $ARGV[0] Can't open cpp file");
1.113 +
1.114 +print CPP <<EOD;
1.115 +// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
1.116 +// All rights reserved.
1.117 +// This component and the accompanying materials are made available
1.118 +// under the terms of the License "Eclipse Public License v1.0"
1.119 +// which accompanies this distribution, and is available
1.120 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.121 +//
1.122 +// Initial Contributors:
1.123 +// Nokia Corporation - initial contribution.
1.124 +//
1.125 +// Contributors:
1.126 +//
1.127 +// Description:
1.128 +//
1.129 +// Auto-generated by the cp54936_4byte_tounicode.pl tool - Do not edit!!!
1.130 +//
1.131 +
1.132 +#include <e32std.h>
1.133 +#include <e32def.h>
1.134 +#include "cp54936.h"
1.135 +
1.136 +
1.137 +// mapping table of: CP54936 4-byte in-BMP ---> Unicode
1.138 +// To calculate index: index=(b1-144)*12600+(b2-48)*1260+(b3-129)*10+(b4-48), in which,
1.139 +// b1,b2,b3,b4 is byte1,2,3,4 of CP54936 code.
1.140 +// For example, CP54936 code 0x8232EA38, the index=(0x82-144)*12600+(0x32-48)*1260+(0xEA-129)*10+(0x38-48)=16178
1.141 +// So we get the Unicode 0x42AB.
1.142 +// Generated with: \"perl -w ..\\group\\cp54936_4byte_tounicode.pl cp54936_4byte.txt cp54936_4byte_tounicode.cpp\".
1.143 +
1.144 +EOD
1.145 +
1.146 +my $bytecount = 0;
1.147 +my $expect = "0x81308130";
1.148 +my $last = "0x8431A439";
1.149 +my $totalCount = OffsetOfCP54936Code($last) + 1;
1.150 +
1.151 +
1.152 +print CPP "const TUint16 KMappingTable4ByteBmp2Unicode[$totalCount] =\n\t{\n\t";
1.153 +
1.154 +my $outIndex = 0; # to wrap every 10 items
1.155 +while (OffsetOfCP54936Code($expect) <= OffsetOfCP54936Code($last))
1.156 + {
1.157 + if (!exists($lines{$expect}))
1.158 + {
1.159 + print CPP "0xFFFD, ";
1.160 + }
1.161 + else
1.162 + {
1.163 + print CPP "$lines{$expect}, ";
1.164 + }
1.165 + $bytecount += 2;
1.166 + $outIndex++;
1.167 + if ($outIndex % 10 == 0)
1.168 + {
1.169 + print CPP "\t// $expect\n\t";
1.170 + }
1.171 + # to next foreign
1.172 + $expect = IncreaseCP54936Code($expect);
1.173 + }
1.174 +
1.175 +print CPP "};\n";
1.176 +print CPP "// total byte count = $bytecount\n";
1.177 +print "\nTotal byte count: $bytecount.\n";
1.178 +close CPP;
1.179 +print "Done.\n";