Update contrib.
2 # Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
4 # This component and the accompanying materials are made available
5 # under the terms of "Eclipse Public License v1.0"
6 # which accompanies this distribution, and is available
7 # at the URL "http://www.eclipse.org/legal/epl-v10.html".
9 # Initial Contributors:
10 # Nokia Corporation - initial contribution.
22 my $perlScriptPath=$0;
23 $perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes
24 $perlScriptPath=~s/\\?[^\\]+$//; # get rid of this Perl-script's file-name
25 if ($perlScriptPath eq '')
27 $perlScriptPath='..\group';
31 $perlScriptPath=~s/(\\?)[^\\]+$/$1group/;
33 unshift(@INC, $perlScriptPath); # can't do "use lib $perlScriptPath" here as "use lib" only seems to work with *hard-coded* directory names
37 if ((@ARGV==0) || ($ARGV[0]=~/\?/i) || ($ARGV[0]=~/-h/i) || ($ARGV[0]=~/\/h/i) || ($ARGV[0]=~/help/i))
39 die("\nVersion 021\n\nCharacter-set conversion-table generating tool\nCopyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reservered.\n\nUsage:\n\n\tperl analyse.pl <source-file> <output-file> foreign|Unicode [options]\n\nwhere the following options are available (each has a short form and a long form which are shown below separated by a '|'):\n\n\t-c | -columns(<a>: <b>, <c>)\n\t-p | -cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed\n\t-u | -sourceFilesToSubtract(<a>, <b>, ...)\n\n");
41 my @columns=(2, 1, 2);
42 my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=0;
43 my @sourceFilesToSubtract=();
44 &extractCommandLineFlags(\@columns, \$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \@sourceFilesToSubtract);
45 my $sourceFileName=shift;
46 my $outputFileName=shift;
47 my $columnToSortOn=shift;
48 my %characterCodesOfOtherColumn=();
50 open(SOURCE_FILE, "< $sourceFileName") or die("Error: could not open \"$sourceFileName\" for reading");
51 &readSourceFile(\*SOURCE_FILE, $sourceFileName, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 0);
52 close(SOURCE_FILE) or die("Error: could not close \"$sourceFileName\"\n");
53 my $sourceFileToSubtract;
54 foreach $sourceFileToSubtract (@sourceFilesToSubtract)
56 open(SOURCE_FILE_TO_SUBTRACT, "< $sourceFileToSubtract") or die("Error: could not open \"$sourceFileToSubtract\" for reading\n");
57 &readSourceFile(\*SOURCE_FILE_TO_SUBTRACT, $sourceFileToSubtract, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 1);
58 close(SOURCE_FILE_TO_SUBTRACT) or die("Error: could not close \"$sourceFileToSubtract\"\n");
60 open(OUTPUT_FILE, "> $outputFileName") or die("Error: could not open \"$outputFileName\" for writing");
62 my $numberOfMissingSpaces=0;
63 my $numberOfLinesSorted=0;
67 foreach $key (sort {$a<=>$b} (keys(%linesSorted)))
69 if ($previousKey ne "")
71 $previousKey<$key or die("Error: there appears to be a mix up with the keys \"$previousKey\" and \"$key\"");
72 if ($previousKey!=$key-1)
75 $numberOfMissingSpaces+=$key-$previousKey;
76 print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
79 if (($previousKey eq "") || ($previousKey!=$key-1))
81 $offset=$characterCodesOfOtherColumn{$key}-$key;
83 elsif ((defined $offset) && ($offset!=$characterCodesOfOtherColumn{$key}-$key))
87 print(OUTPUT_FILE "$linesSorted{$key}");
88 ++$numberOfLinesSorted;
91 print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
93 my $maximumNumberOfIterationsWhenBinarySearching=1;
94 while (($numberOfLinesSorted>>$maximumNumberOfIterationsWhenBinarySearching)>0)
96 ++$maximumNumberOfIterationsWhenBinarySearching;
98 print("The number of breaks was $numberOfBreaks\nThe number of missing spaces was $numberOfMissingSpaces\nThe number of lines sorted was $numberOfLinesSorted\nThe maximum number of iterations when binary searching would be $maximumNumberOfIterationsWhenBinarySearching");
100 sub extractCommandLineFlags()
103 my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
104 my $sourceFilesToSubtract=shift;
106 for ($i=0; $i<=$#ARGV;) # (i) not cache-ing $#ARGV into a variable as @ARGV may change length in this loop (ii) iterate forwards as some parameters may occupy more than one element in @ARGV
108 if (($ARGV[$i]=~/^-c\b(.*)$/i) || ($ARGV[$i]=~/^-columns\b(.*)$/i))
111 splice(@ARGV, $i, 1);
114 if ($columnsData=~/^\s*\(\s*(\d+)\s*:\s*(\d+)\s*,\s*(\d+)\s*\)\s*$/)
116 @$columns=($1, $2, $3);
119 ($#ARGV>=$i) or die("Error: bad \"-columns\" format\n");
120 $columnsData.=(splice(@ARGV, $i, 1))[0];
123 elsif (($ARGV[$i]=~/^-p$/i) || ($ARGV[$i]=~/^-cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed$/i))
125 splice(@ARGV, $i, 1);
126 $$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=1;
128 elsif (($ARGV[$i]=~/^-u\b(.*)$/i) || ($ARGV[$i]=~/^-sourceFilesToSubtract\b(.*)$/i))
130 my $sourceFilesData=$1;
131 splice(@ARGV, $i, 1);
134 if ($sourceFilesData=~/^\s*\(\s*(.+)\)\s*$/)
136 my $sourceFilesData=$1;
137 @$sourceFilesToSubtract=split(/,/, $sourceFilesData, -1);
139 for ($j=$#$sourceFilesToSubtract; $j>=0; --$j)
141 $sourceFilesToSubtract->[$j]=~s/^\s+//;
142 $sourceFilesToSubtract->[$j]=~s/\s+$//;
143 ($sourceFilesToSubtract->[$j] ne '') or die("Error: bad \"-sourceFilesToSubtract\" format (1)\n");
147 ($#ARGV>=$i) or die("Error: bad \"-sourceFilesToSubtract\" format (2)\n");
148 $sourceFilesData.=(splice(@ARGV, $i, 1))[0];
160 my $fileHandle=shift;
162 my $characterCodesOfOtherColumn=shift;
163 my $linesSorted=shift;
164 my $columnToSortOn=shift;
166 my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
168 my $foreignCharacterCodeProcessingCode='';
169 if (!(($columns->[0]>0) && ($columns->[1]>0) && ($columns->[2]>0) && ($columns->[1]<=$columns->[0]) && ($columns->[2]<=$columns->[0]) && ($columns->[1]!=$columns->[2])))
172 die("Error: bad \"-columns\" data\n");
174 my $patternOfLineContainingCharacterCodes=join('\s+', ('0x([0-9a-f]+)') x $columns->[0]);
176 my $strippedDownLine;
179 ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle);
180 if ($strippedDownLine eq '')
184 if ($strippedDownLine=~/^SET_FOREIGN_CHARACTER_CODE_PROCESSING_CODE\s+(.*)$/i)
186 $foreignCharacterCodeProcessingCode=$1;
188 elsif ($strippedDownLine=~/^$patternOfLineContainingCharacterCodes$/i)
190 no strict 'refs'; # so that we can use symbolic references for $1, $2, etc
191 my $foreignCharacterCode=hex(${$columns->[1]});
192 my $unicodeCharacterCode=hex(${$columns->[2]});
194 if ($foreignCharacterCodeProcessingCode ne '')
196 $foreignCharacterCode=eval($foreignCharacterCodeProcessingCode);
198 my $characterCodeOfColumnToSortOn;
199 my $characterCodeOfOtherColumn;
200 if ($columnToSortOn=~/^foreign$/i)
202 $characterCodeOfColumnToSortOn=$foreignCharacterCode;
203 $characterCodeOfOtherColumn=$unicodeCharacterCode;
205 elsif ($columnToSortOn=~/^Unicode$/i)
207 $characterCodeOfColumnToSortOn=$unicodeCharacterCode;
208 $characterCodeOfOtherColumn=$foreignCharacterCode;
212 die("Error: bad parameter \"$columnToSortOn\"");
214 if ((!$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed) || !((($unicodeCharacterCode>=0xe000) && ($unicodeCharacterCode<=0xf8ff)) || (($unicodeCharacterCode>=0xf0000) && ($unicodeCharacterCode<=0x10ffff))))
218 $linesSorted->{$characterCodeOfColumnToSortOn}='### '.$linesSorted->{$characterCodeOfColumnToSortOn};
219 if ($characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}!=$characterCodeOfOtherColumn)
221 printf(STDERR "Warning: 0x%x maps to 0x%x in the main source file, but to 0x%x in a source file to be extracted\n", $characterCodeOfColumnToSortOn, $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}, $characterCodeOfOtherColumn);
226 $linesSorted->{$characterCodeOfColumnToSortOn}=$line;
227 $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}=$characterCodeOfOtherColumn;
231 elsif ($line!~/^\s*0x([0-9a-f]+)\s*#\s*undefined.*$/i)
234 die("Error: unexpected line in \"$fileName\":\n $line\n");