1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/kernelhwsrv/kernel/eka/euser/unicode/perl/UnicodeCompositionEx.pl Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,165 @@
1.4 +#
1.5 +# Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +# All rights reserved.
1.7 +# This component and the accompanying materials are made available
1.8 +# under the terms of the License "Eclipse Public License v1.0"
1.9 +# which accompanies this distribution, and is available
1.10 +# at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +#
1.12 +# Initial Contributors:
1.13 +# Nokia Corporation - initial contribution.
1.14 +#
1.15 +# Contributors:
1.16 +#
1.17 +# Description:
1.18 +#
1.19 +# UnicodeCompositionEx
1.20 +# adds composition exclusion information to unicode data
1.21 +#
1.22 +# Added as a new field:
1.23 +# Symbian:<excluded-from-composition>
1.24 +# where <excluded-from-composition> is E or null.
1.25 +#
1.26 +# Usage:
1.27 +# perl -w UnicodeAddComposeEx.pl CompositionExclusions.txt < <Unicode-data-file>
1.28 +
1.29 +use strict;
1.30 +
1.31 +if (scalar(@ARGV) != 1)
1.32 + {
1.33 + print (STDERR "Usage:\nperl -w UnicodeAddComposeEx.pl CompositionExclusions.txt < <Unicode-data-file>\n");
1.34 + exit 1;
1.35 + }
1.36 +
1.37 +open(EXCLUSIONS, $ARGV[0]) or die("Could not open file $ARGV[0]\n");
1.38 +
1.39 +my $lineNo = 0;
1.40 +my %Excluded = ();
1.41 +while (<EXCLUSIONS>)
1.42 + {
1.43 + $lineNo++;
1.44 + # try to parse the line if there is some non-whitespace before the comment
1.45 + if (!/^[ \t]*([#].*)?$/)
1.46 + {
1.47 + /^[ \t]*([0-9A-Fa-f]{4,6})[ \t]*([#].*)?$/ or die("Did not understand line $lineNo of $ARGV[0]");
1.48 + my $code = hex($1);
1.49 + die ("Value $code outside Unicode range at line $lineNo of $ARGV[0]")
1.50 + unless ($code < 0x110000);
1.51 + $Excluded{$code} = 1;
1.52 + #printf("Excluding %X because it is in the exclusion list\n", $code);
1.53 + }
1.54 + }
1.55 +
1.56 +close EXCLUSIONS;
1.57 +# This is a two-pass operation, so we must store the lines ready for output later.
1.58 +my @DataFileLines = ();
1.59 +my %DataFileLineCodes = ();
1.60 +# The first pass will collect all the relevant data:
1.61 +# The first character of the decomposition if there is more than one
1.62 +my %FirstOfDecompositionString = ();
1.63 +# The singleton decomposition if it is a singleton
1.64 +my %SingletonDecomposition = ();
1.65 +# The decompositions tag, if any
1.66 +my %DecompTag = ();
1.67 +# The combining class
1.68 +my %CombiningClass = ();
1.69 +# We will also be marking all singleton decompositions for exclusion
1.70 +$lineNo = 0;
1.71 +while (my $line = <STDIN>)
1.72 + {
1.73 + chomp $line;
1.74 + $DataFileLines[$lineNo] = $line;
1.75 + $lineNo++;
1.76 + # Split into fields: make sure trailing null strings are not
1.77 + # deleted by adding a dummy final field
1.78 + my @attribute = split(/;/, $line.';dummy');
1.79 + # Delete the dummy field
1.80 + pop @attribute;
1.81 +
1.82 + if (scalar(@attribute) == 15)
1.83 + {
1.84 + my $code = $attribute[0];
1.85 + die("First attribute '$code' not a valid Unicode codepoint at line $lineNo")
1.86 + unless $code =~ /^1?[0-9a-fA-F]{4,5}$/;
1.87 + $code = hex($code);
1.88 + my $combiningClass = $attribute[3];
1.89 + die("Fourth attribute '$combiningClass' is not a valid Unicode combining class at line $lineNo")
1.90 + unless (0 <= $combiningClass && $combiningClass < 256);
1.91 + my $decompositionString = $attribute[5];
1.92 + die ("Sixth attribute '$decompositionString' is not a valid decomposition string at line $lineNo")
1.93 + unless ($decompositionString =~ /^(<.*>)?[0-9a-fA-F \t]*$/);
1.94 + my @decomposition = split(/[ \t]+/, $decompositionString);
1.95 + if (@decomposition && $decomposition[0] =~ /^<.*>$/)
1.96 + {
1.97 + $DecompTag{$code} = shift @decomposition;
1.98 + }
1.99 + if (scalar(@decomposition) == 1)
1.100 + {
1.101 + # We want to exclude codes such as these, with a singleton
1.102 + # decomposition mapping, but at the moment we don't know if the
1.103 + # character mapped to has a decomposition mapping, so we will
1.104 + # defer this to another stage.
1.105 + die("Decomposition $decomposition[0] not understood at line $lineNo")
1.106 + unless ($decomposition[0] =~ /^[0-9A-Fa-f]+$/);
1.107 + $SingletonDecomposition{$code} = hex($decomposition[0]);
1.108 + }
1.109 + elsif (1 < scalar(@decomposition))
1.110 + {
1.111 + die("Decomposition $decomposition[0] not understood at line $lineNo")
1.112 + unless ($decomposition[0] =~ /^[0-9A-Fa-f]+$/);
1.113 + $FirstOfDecompositionString{$code} = hex($decomposition[0]);
1.114 + }
1.115 + $CombiningClass{$code} = $combiningClass;
1.116 + $DataFileLineCodes{$lineNo-1} = $code;
1.117 + }
1.118 + elsif ($line !~ /^[ \t]*$/)
1.119 + {
1.120 + die 'Do not understand line '.$lineNo;
1.121 + }
1.122 + }
1.123 +
1.124 +# Each code that has a decomposition string longer than one character
1.125 +# where the first character has non-zero combining class is excluded
1.126 +foreach my $code (keys %FirstOfDecompositionString)
1.127 + {
1.128 + my $decomp = $FirstOfDecompositionString{$code};
1.129 + if (exists($CombiningClass{$decomp}))
1.130 + {
1.131 + if ($CombiningClass{$decomp} != 0)
1.132 + {
1.133 + $Excluded{$code} = 1;
1.134 + #printf("Excluding %X because its decomposition starts with a non-starter(%X)\n", $code, $decomp);
1.135 + }
1.136 + }
1.137 + }
1.138 +
1.139 +# Each code that has a singleton decomposition string may be excluded if
1.140 +# that code has only a singleton mapping itself.
1.141 +foreach my $code (sort (keys %SingletonDecomposition))
1.142 + {
1.143 + my $mapsTo = $code;
1.144 + while (exists $SingletonDecomposition{$mapsTo} && !exists $DecompTag{$code})
1.145 + {
1.146 + $mapsTo = $SingletonDecomposition{$mapsTo};
1.147 + }
1.148 + if (!exists $FirstOfDecompositionString{$mapsTo})
1.149 + {
1.150 + #printf("Excluding %X because its decomposition is a singleton(%X)\n", $code, $mapsTo);
1.151 + $Excluded{$code} = 1;
1.152 + }
1.153 + }
1.154 +
1.155 +# Now we output the file with the extra filed appended to each line
1.156 +for(my $i = 0; $i != scalar(@DataFileLines); $i++)
1.157 + {
1.158 + print $DataFileLines[$i];
1.159 + if (exists($DataFileLineCodes{$i}))
1.160 + {
1.161 + print ';Symbian:';
1.162 + if (exists($Excluded{ $DataFileLineCodes{$i} }))
1.163 + {
1.164 + print 'E';
1.165 + }
1.166 + }
1.167 + print "\n";
1.168 + }