sl@0: # sl@0: # Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: # All rights reserved. sl@0: # This component and the accompanying materials are made available sl@0: # under the terms of the License "Eclipse Public License v1.0" sl@0: # which accompanies this distribution, and is available sl@0: # at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: # sl@0: # Initial Contributors: sl@0: # Nokia Corporation - initial contribution. sl@0: # sl@0: # Contributors: sl@0: # sl@0: # Description: sl@0: # sl@0: # UnicodeMaxDecompose.pl sl@0: # sl@0: # Adds maximal decompositions of the character and maximal decompositions of sl@0: # its folded varient to the Unicode data. sl@0: # sl@0: # Added as the fourth field after the 'Symbain:' marker in the following format: sl@0: # sl@0: # Symbian:;;;; sl@0: # where each of and are strings sl@0: # of hex numbers separated by spaces, representing the complete decomposition sl@0: # of the character and its folded equivalent respectively. sl@0: # sl@0: # Usage: sl@0: # perl -w UnicodeMaxDecompose.pl < sl@0: sl@0: use strict; sl@0: sl@0: if (scalar(@ARGV) != 0) sl@0: { sl@0: print (STDERR "Usage:\nperl -w UnicodeMaxDecompose.pl < \n"); sl@0: exit 1; sl@0: } sl@0: sl@0: my %StatedDecomposition = (); sl@0: my %CompleteDecomposition = (); sl@0: sl@0: sub Decompose sl@0: { sl@0: my ($code) = @_; sl@0: return unless exists $StatedDecomposition{$code}; sl@0: my $stated = $StatedDecomposition{$code}; sl@0: delete $StatedDecomposition{$code}; sl@0: my @complete = (); sl@0: foreach my $hexelt ( split(' ', $stated) ) sl@0: { sl@0: if ($hexelt) sl@0: { sl@0: Decompose($hexelt); sl@0: if (exists $CompleteDecomposition{$hexelt}) sl@0: { sl@0: push @complete, $CompleteDecomposition{$hexelt}; sl@0: } sl@0: else sl@0: { sl@0: push @complete, $hexelt; sl@0: } sl@0: } sl@0: } sl@0: $CompleteDecomposition{$code} = join(' ', @complete); sl@0: } sl@0: sl@0: my %Folded = (); sl@0: my %LineToCode = (); sl@0: my @RawLine = (); sl@0: sl@0: my $lineNo = 0; sl@0: while (my $line = ) sl@0: { sl@0: chomp $line; sl@0: $lineNo++; sl@0: # Split into fields: make sure trailing null strings are not sl@0: # deleted by adding a dummy final field sl@0: my @attribute = split(/;/, $line.';dummy'); sl@0: # Delete the dummy field sl@0: pop @attribute; sl@0: die ("Line $lineNo is missing 'Symbian:' entries. Has UnicodeAddFolded been run?") sl@0: if (scalar(@attribute) == 16); sl@0: if (scalar(@attribute) == 17) sl@0: { sl@0: die ("Line $lineNo is missing 'Symbian:' entries. Has UnicodeAddFolded been run?") sl@0: if ($attribute[15] !~ /^[ \t]*symbian:/i); sl@0: my $code = $attribute[0]; sl@0: die("First attribute '$code' not a valid Unicode codepoint at line $lineNo") sl@0: unless ($code =~ /^1?[0-9a-fA-F]{4,5}$/ && hex($code) < 0x110000); sl@0: my $decomposition = $attribute[5]; sl@0: die("Decomposition '$decomposition' at line $lineNo is not a valid Unicode decomposition.") sl@0: unless $decomposition =~ /^[ \t]*(<.*>[ \t]*[0-9a-fA-F])?[0-9a-fA-F \t]*$/; sl@0: my $folded = $attribute[16]; sl@0: die ("'$folded' not a valid string of hex values at line $lineNo.") sl@0: unless $folded =~ /[0-9a-fA-F \t]*/; sl@0: # Store all decompositions that have no tag and at least one value sl@0: if ($decomposition =~ /^[ \t]*[0-9a-fA-F]/) sl@0: { sl@0: $StatedDecomposition{$code} = $decomposition; sl@0: } sl@0: if ($folded =~ /[0-9a-fA-F]/) sl@0: { sl@0: $Folded{$code} = $folded; sl@0: } sl@0: $LineToCode{$lineNo-1} = $code; sl@0: } sl@0: elsif ($line !~ /^[ \t]*$/) sl@0: { sl@0: die 'Do not understand line '.$lineNo; sl@0: } sl@0: $RawLine[$lineNo-1] = $line; sl@0: } sl@0: sl@0: # Completely decompose all strings in the %StatedDecomposition sl@0: foreach my $code (keys %StatedDecomposition) sl@0: { sl@0: Decompose($code); sl@0: } sl@0: sl@0: # Now decompose all the folded versions sl@0: foreach my $code (keys %Folded) sl@0: { sl@0: my @result = (); sl@0: foreach my $hexelt (split(' ', $Folded{$code})) sl@0: { sl@0: if (exists $CompleteDecomposition{$hexelt}) sl@0: { sl@0: push @result, split(' ', $CompleteDecomposition{$hexelt}); sl@0: } sl@0: else sl@0: { sl@0: push @result, $hexelt; sl@0: } sl@0: } sl@0: $Folded{$code} = join(' ', @result); sl@0: } sl@0: sl@0: # Now output all the results sl@0: for (my $i = 0; $i != scalar(@RawLine); $i++) sl@0: { sl@0: print $RawLine[$i]; sl@0: if (exists $LineToCode{$i}) sl@0: { sl@0: my $code = $LineToCode{$i}; sl@0: print ';'; sl@0: my $decomp = ''; sl@0: $decomp = $CompleteDecomposition{$code} sl@0: if exists $CompleteDecomposition{$code}; sl@0: print $decomp.';'; sl@0: if (exists $Folded{$code}) sl@0: { sl@0: print $Folded{$code} sl@0: } sl@0: else sl@0: { sl@0: # If there is no folded value, but there is a decomposition sl@0: # sequence, the character must fold to the decomposition sl@0: # sequence too. sl@0: print $decomp; sl@0: } sl@0: } sl@0: print "\n"; sl@0: }