os/textandloc/charconvfw/fatfilenameconversionplugins/group/cp54936_4byte_tounicode.pl
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 #
     2 # Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 # All rights reserved.
     4 # This component and the accompanying materials are made available
     5 # under the terms of "Eclipse Public License v1.0"
     6 # which accompanies this distribution, and is available
     7 # at the URL "http://www.eclipse.org/legal/epl-v10.html".
     8 #
     9 # Initial Contributors:
    10 # Nokia Corporation - initial contribution.
    11 #
    12 # Contributors:
    13 #
    14 # Description: 
    15 # See line 111 of this file.
    16 #
    17 
    18 if (@ARGV != 1 && @ARGV != 2)
    19 	{
    20 	print <<EOD;
    21 Usage: perl -w cp54936_4byte.pl cp54936_4byte.txt
    22 EOD
    23 	exit(1);
    24 	}
    25 
    26 # Removes the extenstion from the filename
    27 $ARGV[0] =~ m/(.*)\..*/;
    28 my $root = $1;
    29 $root =~ m/.*[\\\/]([^\\\/]*)$/;
    30 my $header_to_include = $1;
    31 
    32 if (@ARGV == 2)
    33 	{
    34 	$ARGV[1] =~ m/(.*)\..*/;
    35 	$root = $1;
    36 	}
    37 
    38 open (IN, "<$ARGV[0]") or die ("Error: $ARGV[0] $!");
    39 
    40 my $lineNumber = 0;
    41 my $acceptLineNumber = 0;
    42 my %lines;		# hash table of all characters in format with key=foreign(string) and value=unicode(string)
    43 while (!eof(IN))
    44 	{
    45 	my $line = <IN>;
    46 	$lineNumber++;
    47 	if ($line =~ /^(0[xX]8[1-4]3\d[\da-fA-F]{2}3\d)\s*(0[xX][\da-fA-F]{4}).*/)
    48 		{
    49 		# read a line like "0x81318133	0x060D"
    50 		$acceptLineNumber++;
    51 		my $foreign = $1;
    52 		my $unicode = $2;
    53 		$lines{$foreign} = $unicode;
    54 		}
    55 	else
    56 		{
    57 		#print "Ignore line: $line";
    58 		}
    59 	}
    60 close IN;
    61 print "Read $ARGV[0] done.\n";
    62 print "$acceptLineNumber of $lineNumber lines accepted.\n";
    63 
    64 
    65 # increase input cp54936 code by 1
    66 # param is a string like "0x81308439"
    67 # return a string like "0x81308530"
    68 sub IncreaseCP54936Code
    69 	{
    70 	my ($increaseme) = @_;
    71 	$increaseme =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
    72 	($b1, $b2, $b3, $b4) = (hex($1), hex($2), hex($3), hex($4));
    73 	$b4++;
    74 	if ($b4 == 0x3A)
    75 		{
    76 		$b4 = 0x30;
    77 		$b3++;
    78 		if ($b3 == 0xFF)
    79 			{
    80 			$b3 = 0x81;
    81 			$b2++;
    82 			if ($b2 == 0x3A)
    83 				{
    84 				$b2 = 0x30;
    85 				$b1++;
    86 				}
    87 			}
    88 		}
    89 	return sprintf("0x%02X%02X%02X%02X", $b1, $b2, $b3, $b4);
    90 	}
    91 
    92 # return the offset from 0x81308130 to input "0x8234A235"
    93 sub OffsetOfCP54936Code
    94 	{
    95 	$_[0] =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
    96 	($b1, $b2, $b3, $b4) = (hex($1), hex($2), hex($3), hex($4));
    97 	return ($b1-0x81)*12600 + ($b2-0x30)*1260 + ($b3-0x81)*10 + ($b4-0x30);
    98 	}
    99 
   100 # return the last byte of input "0x8234A235"
   101 sub Byte4OfCP54936Code
   102 	{
   103 	$_[0] =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
   104 	return hex($4);
   105 	}
   106 
   107 
   108 print "Write to $root.cpp...\n";
   109 open (CPP, ">$root.cpp") or die ("Error: $ARGV[0] Can't open cpp file");
   110 
   111 print CPP <<EOD;
   112 // Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
   113 // All rights reserved.
   114 // This component and the accompanying materials are made available
   115 // under the terms of the License "Eclipse Public License v1.0"
   116 // which accompanies this distribution, and is available
   117 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
   118 //
   119 // Initial Contributors:
   120 // Nokia Corporation - initial contribution.
   121 //
   122 // Contributors:
   123 //
   124 // Description:
   125 //
   126 // Auto-generated by the cp54936_4byte_tounicode.pl tool - Do not edit!!!
   127 //
   128 
   129 #include <e32std.h>
   130 #include <e32def.h>
   131 #include "cp54936.h"
   132 
   133 
   134 // mapping table of: CP54936 4-byte in-BMP ---> Unicode
   135 // To calculate index: index=(b1-144)*12600+(b2-48)*1260+(b3-129)*10+(b4-48), in which,
   136 // b1,b2,b3,b4 is byte1,2,3,4 of CP54936 code.
   137 // For example, CP54936 code 0x8232EA38, the index=(0x82-144)*12600+(0x32-48)*1260+(0xEA-129)*10+(0x38-48)=16178
   138 // So we get the Unicode 0x42AB.
   139 // Generated with: \"perl -w ..\\group\\cp54936_4byte_tounicode.pl cp54936_4byte.txt cp54936_4byte_tounicode.cpp\".
   140 
   141 EOD
   142 
   143 my $bytecount = 0;
   144 my $expect = "0x81308130";
   145 my $last = "0x8431A439";
   146 my $totalCount = OffsetOfCP54936Code($last) + 1;
   147 
   148 
   149 print CPP "const TUint16 KMappingTable4ByteBmp2Unicode[$totalCount] =\n\t{\n\t";
   150 
   151 my $outIndex = 0;	# to wrap every 10 items
   152 while (OffsetOfCP54936Code($expect) <= OffsetOfCP54936Code($last))
   153 	{
   154 	if (!exists($lines{$expect}))
   155 		{
   156 		print CPP "0xFFFD, ";
   157 		}
   158 	else
   159 		{
   160 		print CPP "$lines{$expect}, ";
   161 		}
   162 	$bytecount += 2;
   163 	$outIndex++;
   164 	if ($outIndex % 10 == 0)
   165 		{
   166 		print CPP "\t// $expect\n\t";
   167 		}
   168 	# to next foreign
   169 	$expect = IncreaseCP54936Code($expect);
   170 	}
   171 
   172 print CPP "};\n";
   173 print CPP "// total byte count = $bytecount\n";
   174 print "\nTotal byte count: $bytecount.\n";
   175 close CPP;
   176 print "Done.\n";