os/kernelhwsrv/kernel/eka/euser/unicode/perl/UnicodeCompositionEx.pl
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/kernelhwsrv/kernel/eka/euser/unicode/perl/UnicodeCompositionEx.pl	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,165 @@
     1.4 +#
     1.5 +# Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
     1.6 +# All rights reserved.
     1.7 +# This component and the accompanying materials are made available
     1.8 +# under the terms of the License "Eclipse Public License v1.0"
     1.9 +# which accompanies this distribution, and is available
    1.10 +# at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.11 +#
    1.12 +# Initial Contributors:
    1.13 +# Nokia Corporation - initial contribution.
    1.14 +#
    1.15 +# Contributors:
    1.16 +#
    1.17 +# Description:
    1.18 +#
    1.19 +# UnicodeCompositionEx
    1.20 +# adds composition exclusion information to unicode data
    1.21 +#
    1.22 +# Added as a new field:
    1.23 +# Symbian:<excluded-from-composition>
    1.24 +# where <excluded-from-composition> is E or null.
    1.25 +#
    1.26 +# Usage:
    1.27 +# perl -w UnicodeAddComposeEx.pl CompositionExclusions.txt < <Unicode-data-file>
    1.28 +
    1.29 +use strict;
    1.30 +
    1.31 +if (scalar(@ARGV) != 1)
    1.32 +	{
    1.33 +	print (STDERR "Usage:\nperl -w UnicodeAddComposeEx.pl CompositionExclusions.txt < <Unicode-data-file>\n");
    1.34 +	exit 1;
    1.35 +	}
    1.36 +
    1.37 +open(EXCLUSIONS, $ARGV[0]) or die("Could not open file $ARGV[0]\n");
    1.38 +
    1.39 +my $lineNo = 0;
    1.40 +my %Excluded = ();
    1.41 +while (<EXCLUSIONS>)
    1.42 +	{
    1.43 +	$lineNo++;
    1.44 +	# try to parse the line if there is some non-whitespace before the comment
    1.45 +	if (!/^[ \t]*([#].*)?$/)
    1.46 +		{
    1.47 +		/^[ \t]*([0-9A-Fa-f]{4,6})[ \t]*([#].*)?$/ or die("Did not understand line $lineNo of $ARGV[0]");
    1.48 +		my $code = hex($1);
    1.49 +		die ("Value $code outside Unicode range at line $lineNo of $ARGV[0]")
    1.50 +			unless ($code < 0x110000);
    1.51 +		$Excluded{$code} = 1;
    1.52 +		#printf("Excluding %X because it is in the exclusion list\n", $code);
    1.53 +		}
    1.54 +	}
    1.55 +
    1.56 +close EXCLUSIONS;
    1.57 +# This is a two-pass operation, so we must store the lines ready for output later.
    1.58 +my @DataFileLines = ();
    1.59 +my %DataFileLineCodes = ();
    1.60 +# The first pass will collect all the relevant data:
    1.61 +# The first character of the decomposition if there is more than one
    1.62 +my %FirstOfDecompositionString = ();
    1.63 +# The singleton decomposition if it is a singleton
    1.64 +my %SingletonDecomposition = ();
    1.65 +# The decompositions tag, if any
    1.66 +my %DecompTag = ();
    1.67 +# The combining class
    1.68 +my %CombiningClass = ();
    1.69 +# We will also be marking all singleton decompositions for exclusion
    1.70 +$lineNo = 0;
    1.71 +while (my $line = <STDIN>)
    1.72 +	{
    1.73 +	chomp $line;
    1.74 +	$DataFileLines[$lineNo] = $line;
    1.75 +	$lineNo++;
    1.76 +	# Split into fields: make sure trailing null strings are not
    1.77 +	# deleted by adding a dummy final field
    1.78 +	my @attribute = split(/;/, $line.';dummy');
    1.79 +	# Delete the dummy field
    1.80 +	pop @attribute;
    1.81 +
    1.82 +	if (scalar(@attribute) == 15)
    1.83 +		{
    1.84 +		my $code = $attribute[0];
    1.85 +		die("First attribute '$code' not a valid Unicode codepoint at line $lineNo")
    1.86 +			unless $code =~ /^1?[0-9a-fA-F]{4,5}$/;
    1.87 +		$code = hex($code);
    1.88 +		my $combiningClass = $attribute[3];
    1.89 +		die("Fourth attribute '$combiningClass' is not a valid Unicode combining class at line $lineNo")
    1.90 +			unless (0 <= $combiningClass && $combiningClass < 256);
    1.91 +		my $decompositionString = $attribute[5];
    1.92 +		die ("Sixth attribute '$decompositionString' is not a valid decomposition string at line $lineNo")
    1.93 +			unless ($decompositionString =~ /^(<.*>)?[0-9a-fA-F \t]*$/);
    1.94 +		my @decomposition = split(/[ \t]+/, $decompositionString);
    1.95 +		if (@decomposition && $decomposition[0] =~ /^<.*>$/)
    1.96 +			{
    1.97 +			$DecompTag{$code} = shift @decomposition;
    1.98 +			}
    1.99 +		if (scalar(@decomposition) == 1)
   1.100 +			{
   1.101 +			# We want to exclude codes such as these, with a singleton
   1.102 +			# decomposition mapping, but at the moment we don't know if the
   1.103 +			# character mapped to has a decomposition mapping, so we will
   1.104 +			# defer this to another stage.
   1.105 +			die("Decomposition $decomposition[0] not understood at line $lineNo")
   1.106 +				unless ($decomposition[0] =~ /^[0-9A-Fa-f]+$/);
   1.107 +			$SingletonDecomposition{$code} = hex($decomposition[0]);
   1.108 +			}
   1.109 +		elsif (1 < scalar(@decomposition))
   1.110 +			{
   1.111 +			die("Decomposition $decomposition[0] not understood at line $lineNo")
   1.112 +				unless ($decomposition[0] =~ /^[0-9A-Fa-f]+$/);
   1.113 +			$FirstOfDecompositionString{$code} = hex($decomposition[0]);
   1.114 +			}
   1.115 +		$CombiningClass{$code} = $combiningClass;
   1.116 +		$DataFileLineCodes{$lineNo-1} = $code;
   1.117 +		}
   1.118 +	elsif ($line !~ /^[ \t]*$/)
   1.119 +		{
   1.120 +		die 'Do not understand line '.$lineNo;
   1.121 +		}
   1.122 +	}
   1.123 +
   1.124 +# Each code that has a decomposition string longer than one character
   1.125 +# where the first character has non-zero combining class is excluded
   1.126 +foreach my $code (keys %FirstOfDecompositionString)
   1.127 +	{
   1.128 +	my $decomp = $FirstOfDecompositionString{$code};
   1.129 +	if (exists($CombiningClass{$decomp}))
   1.130 +		{
   1.131 +		if ($CombiningClass{$decomp} != 0)
   1.132 +			{
   1.133 +			$Excluded{$code} = 1;
   1.134 +			#printf("Excluding %X because its decomposition starts with a non-starter(%X)\n", $code, $decomp);
   1.135 +			}
   1.136 +		}
   1.137 +	}
   1.138 +
   1.139 +# Each code that has a singleton decomposition string may be excluded if
   1.140 +# that code has only a singleton mapping itself.
   1.141 +foreach my $code (sort (keys %SingletonDecomposition))
   1.142 +	{
   1.143 +	my $mapsTo = $code;
   1.144 +	while (exists $SingletonDecomposition{$mapsTo} && !exists $DecompTag{$code})
   1.145 +		{
   1.146 +		$mapsTo = $SingletonDecomposition{$mapsTo};
   1.147 +		}
   1.148 +	if (!exists $FirstOfDecompositionString{$mapsTo})
   1.149 +		{
   1.150 +		#printf("Excluding %X because its decomposition is a singleton(%X)\n", $code, $mapsTo);
   1.151 +		$Excluded{$code} = 1;
   1.152 +		}
   1.153 +	}
   1.154 +
   1.155 +# Now we output the file with the extra filed appended to each line
   1.156 +for(my $i = 0; $i != scalar(@DataFileLines); $i++)
   1.157 +	{
   1.158 +	print $DataFileLines[$i];
   1.159 +	if (exists($DataFileLineCodes{$i}))
   1.160 +		{
   1.161 +		print ';Symbian:';
   1.162 +		if (exists($Excluded{ $DataFileLineCodes{$i} }))
   1.163 +			{
   1.164 +			print 'E';
   1.165 +			}
   1.166 +		}
   1.167 +	print "\n";
   1.168 +	}