Update contrib.
2 # Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
4 # This component and the accompanying materials are made available
5 # under the terms of the License "Eclipse Public License v1.0"
6 # which accompanies this distribution, and is available
7 # at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 # Initial Contributors:
10 # Nokia Corporation - initial contribution.
16 # UnicodeMaxDecompose.pl
18 # Adds maximal decompositions of the character and maximal decompositions of
19 # its folded varient to the Unicode data.
21 # Added as the fourth field after the 'Symbain:' marker in the following format:
23 # Symbian:<grapheme-role>;<excluded>;<folded>;<max-decomposition>;<folded-decomposition>
24 # where each of <max-decomposition> and <folded-decomposition> are strings
25 # of hex numbers separated by spaces, representing the complete decomposition
26 # of the character and its folded equivalent respectively.
29 # perl -w UnicodeMaxDecompose.pl < <output-of-UnicodeAddFolded>
33 if (scalar(@ARGV) != 0)
35 print (STDERR "Usage:\nperl -w UnicodeMaxDecompose.pl < <output-of-UnicodeAddFolded>\n");
39 my %StatedDecomposition = ();
40 my %CompleteDecomposition = ();
45 return unless exists $StatedDecomposition{$code};
46 my $stated = $StatedDecomposition{$code};
47 delete $StatedDecomposition{$code};
49 foreach my $hexelt ( split(' ', $stated) )
54 if (exists $CompleteDecomposition{$hexelt})
56 push @complete, $CompleteDecomposition{$hexelt};
60 push @complete, $hexelt;
64 $CompleteDecomposition{$code} = join(' ', @complete);
72 while (my $line = <STDIN>)
76 # Split into fields: make sure trailing null strings are not
77 # deleted by adding a dummy final field
78 my @attribute = split(/;/, $line.';dummy');
79 # Delete the dummy field
81 die ("Line $lineNo is missing 'Symbian:' entries. Has UnicodeAddFolded been run?")
82 if (scalar(@attribute) == 16);
83 if (scalar(@attribute) == 17)
85 die ("Line $lineNo is missing 'Symbian:' entries. Has UnicodeAddFolded been run?")
86 if ($attribute[15] !~ /^[ \t]*symbian:/i);
87 my $code = $attribute[0];
88 die("First attribute '$code' not a valid Unicode codepoint at line $lineNo")
89 unless ($code =~ /^1?[0-9a-fA-F]{4,5}$/ && hex($code) < 0x110000);
90 my $decomposition = $attribute[5];
91 die("Decomposition '$decomposition' at line $lineNo is not a valid Unicode decomposition.")
92 unless $decomposition =~ /^[ \t]*(<.*>[ \t]*[0-9a-fA-F])?[0-9a-fA-F \t]*$/;
93 my $folded = $attribute[16];
94 die ("'$folded' not a valid string of hex values at line $lineNo.")
95 unless $folded =~ /[0-9a-fA-F \t]*/;
96 # Store all decompositions that have no tag and at least one value
97 if ($decomposition =~ /^[ \t]*[0-9a-fA-F]/)
99 $StatedDecomposition{$code} = $decomposition;
101 if ($folded =~ /[0-9a-fA-F]/)
103 $Folded{$code} = $folded;
105 $LineToCode{$lineNo-1} = $code;
107 elsif ($line !~ /^[ \t]*$/)
109 die 'Do not understand line '.$lineNo;
111 $RawLine[$lineNo-1] = $line;
114 # Completely decompose all strings in the %StatedDecomposition
115 foreach my $code (keys %StatedDecomposition)
120 # Now decompose all the folded versions
121 foreach my $code (keys %Folded)
124 foreach my $hexelt (split(' ', $Folded{$code}))
126 if (exists $CompleteDecomposition{$hexelt})
128 push @result, split(' ', $CompleteDecomposition{$hexelt});
132 push @result, $hexelt;
135 $Folded{$code} = join(' ', @result);
138 # Now output all the results
139 for (my $i = 0; $i != scalar(@RawLine); $i++)
142 if (exists $LineToCode{$i})
144 my $code = $LineToCode{$i};
147 $decomp = $CompleteDecomposition{$code}
148 if exists $CompleteDecomposition{$code};
150 if (exists $Folded{$code})
156 # If there is no folded value, but there is a decomposition
157 # sequence, the character must fold to the decomposition