sl@0
|
1 |
#
|
sl@0
|
2 |
# Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
|
sl@0
|
3 |
# All rights reserved.
|
sl@0
|
4 |
# This component and the accompanying materials are made available
|
sl@0
|
5 |
# under the terms of "Eclipse Public License v1.0"
|
sl@0
|
6 |
# which accompanies this distribution, and is available
|
sl@0
|
7 |
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
sl@0
|
8 |
#
|
sl@0
|
9 |
# Initial Contributors:
|
sl@0
|
10 |
# Nokia Corporation - initial contribution.
|
sl@0
|
11 |
#
|
sl@0
|
12 |
# Contributors:
|
sl@0
|
13 |
#
|
sl@0
|
14 |
# Description:
|
sl@0
|
15 |
#
|
sl@0
|
16 |
|
sl@0
|
17 |
use strict;
|
sl@0
|
18 |
use integer;
|
sl@0
|
19 |
|
sl@0
|
20 |
BEGIN
|
sl@0
|
21 |
{
|
sl@0
|
22 |
my $perlScriptPath=$0;
|
sl@0
|
23 |
$perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes
|
sl@0
|
24 |
$perlScriptPath=~s/\\?[^\\]+$//; # get rid of this Perl-script's file-name
|
sl@0
|
25 |
if ($perlScriptPath eq '')
|
sl@0
|
26 |
{
|
sl@0
|
27 |
$perlScriptPath='..\group';
|
sl@0
|
28 |
}
|
sl@0
|
29 |
else
|
sl@0
|
30 |
{
|
sl@0
|
31 |
$perlScriptPath=~s/(\\?)[^\\]+$/$1group/;
|
sl@0
|
32 |
}
|
sl@0
|
33 |
unshift(@INC, $perlScriptPath); # can't do "use lib $perlScriptPath" here as "use lib" only seems to work with *hard-coded* directory names
|
sl@0
|
34 |
}
|
sl@0
|
35 |
use PARSER;
|
sl@0
|
36 |
|
sl@0
|
37 |
if ((@ARGV==0) || ($ARGV[0]=~/\?/i) || ($ARGV[0]=~/-h/i) || ($ARGV[0]=~/\/h/i) || ($ARGV[0]=~/help/i))
|
sl@0
|
38 |
{
|
sl@0
|
39 |
die("\nVersion 021\n\nCharacter-set conversion-table generating tool\nCopyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reservered.\n\nUsage:\n\n\tperl analyse.pl <source-file> <output-file> foreign|Unicode [options]\n\nwhere the following options are available (each has a short form and a long form which are shown below separated by a '|'):\n\n\t-c | -columns(<a>: <b>, <c>)\n\t-p | -cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed\n\t-u | -sourceFilesToSubtract(<a>, <b>, ...)\n\n");
|
sl@0
|
40 |
}
|
sl@0
|
41 |
my @columns=(2, 1, 2);
|
sl@0
|
42 |
my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=0;
|
sl@0
|
43 |
my @sourceFilesToSubtract=();
|
sl@0
|
44 |
&extractCommandLineFlags(\@columns, \$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \@sourceFilesToSubtract);
|
sl@0
|
45 |
my $sourceFileName=shift;
|
sl@0
|
46 |
my $outputFileName=shift;
|
sl@0
|
47 |
my $columnToSortOn=shift;
|
sl@0
|
48 |
my %characterCodesOfOtherColumn=();
|
sl@0
|
49 |
my %linesSorted=();
|
sl@0
|
50 |
open(SOURCE_FILE, "< $sourceFileName") or die("Error: could not open \"$sourceFileName\" for reading");
|
sl@0
|
51 |
&readSourceFile(\*SOURCE_FILE, $sourceFileName, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 0);
|
sl@0
|
52 |
close(SOURCE_FILE) or die("Error: could not close \"$sourceFileName\"\n");
|
sl@0
|
53 |
my $sourceFileToSubtract;
|
sl@0
|
54 |
foreach $sourceFileToSubtract (@sourceFilesToSubtract)
|
sl@0
|
55 |
{
|
sl@0
|
56 |
open(SOURCE_FILE_TO_SUBTRACT, "< $sourceFileToSubtract") or die("Error: could not open \"$sourceFileToSubtract\" for reading\n");
|
sl@0
|
57 |
&readSourceFile(\*SOURCE_FILE_TO_SUBTRACT, $sourceFileToSubtract, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 1);
|
sl@0
|
58 |
close(SOURCE_FILE_TO_SUBTRACT) or die("Error: could not close \"$sourceFileToSubtract\"\n");
|
sl@0
|
59 |
}
|
sl@0
|
60 |
open(OUTPUT_FILE, "> $outputFileName") or die("Error: could not open \"$outputFileName\" for writing");
|
sl@0
|
61 |
my $numberOfBreaks=0;
|
sl@0
|
62 |
my $numberOfMissingSpaces=0;
|
sl@0
|
63 |
my $numberOfLinesSorted=0;
|
sl@0
|
64 |
my $previousKey="";
|
sl@0
|
65 |
my $offset=0;
|
sl@0
|
66 |
my $key;
|
sl@0
|
67 |
foreach $key (sort {$a<=>$b} (keys(%linesSorted)))
|
sl@0
|
68 |
{
|
sl@0
|
69 |
if ($previousKey ne "")
|
sl@0
|
70 |
{
|
sl@0
|
71 |
$previousKey<$key or die("Error: there appears to be a mix up with the keys \"$previousKey\" and \"$key\"");
|
sl@0
|
72 |
if ($previousKey!=$key-1)
|
sl@0
|
73 |
{
|
sl@0
|
74 |
++$numberOfBreaks;
|
sl@0
|
75 |
$numberOfMissingSpaces+=$key-$previousKey;
|
sl@0
|
76 |
print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
|
sl@0
|
77 |
}
|
sl@0
|
78 |
}
|
sl@0
|
79 |
if (($previousKey eq "") || ($previousKey!=$key-1))
|
sl@0
|
80 |
{
|
sl@0
|
81 |
$offset=$characterCodesOfOtherColumn{$key}-$key;
|
sl@0
|
82 |
}
|
sl@0
|
83 |
elsif ((defined $offset) && ($offset!=$characterCodesOfOtherColumn{$key}-$key))
|
sl@0
|
84 |
{
|
sl@0
|
85 |
undef $offset;
|
sl@0
|
86 |
}
|
sl@0
|
87 |
print(OUTPUT_FILE "$linesSorted{$key}");
|
sl@0
|
88 |
++$numberOfLinesSorted;
|
sl@0
|
89 |
$previousKey=$key;
|
sl@0
|
90 |
}
|
sl@0
|
91 |
print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
|
sl@0
|
92 |
close(OUTPUT_FILE);
|
sl@0
|
93 |
my $maximumNumberOfIterationsWhenBinarySearching=1;
|
sl@0
|
94 |
while (($numberOfLinesSorted>>$maximumNumberOfIterationsWhenBinarySearching)>0)
|
sl@0
|
95 |
{
|
sl@0
|
96 |
++$maximumNumberOfIterationsWhenBinarySearching;
|
sl@0
|
97 |
}
|
sl@0
|
98 |
print("The number of breaks was $numberOfBreaks\nThe number of missing spaces was $numberOfMissingSpaces\nThe number of lines sorted was $numberOfLinesSorted\nThe maximum number of iterations when binary searching would be $maximumNumberOfIterationsWhenBinarySearching");
|
sl@0
|
99 |
|
sl@0
|
100 |
sub extractCommandLineFlags()
|
sl@0
|
101 |
{
|
sl@0
|
102 |
my $columns=shift;
|
sl@0
|
103 |
my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
|
sl@0
|
104 |
my $sourceFilesToSubtract=shift;
|
sl@0
|
105 |
my $i;
|
sl@0
|
106 |
for ($i=0; $i<=$#ARGV;) # (i) not cache-ing $#ARGV into a variable as @ARGV may change length in this loop (ii) iterate forwards as some parameters may occupy more than one element in @ARGV
|
sl@0
|
107 |
{
|
sl@0
|
108 |
if (($ARGV[$i]=~/^-c\b(.*)$/i) || ($ARGV[$i]=~/^-columns\b(.*)$/i))
|
sl@0
|
109 |
{
|
sl@0
|
110 |
my $columnsData=$1;
|
sl@0
|
111 |
splice(@ARGV, $i, 1);
|
sl@0
|
112 |
for (;;)
|
sl@0
|
113 |
{
|
sl@0
|
114 |
if ($columnsData=~/^\s*\(\s*(\d+)\s*:\s*(\d+)\s*,\s*(\d+)\s*\)\s*$/)
|
sl@0
|
115 |
{
|
sl@0
|
116 |
@$columns=($1, $2, $3);
|
sl@0
|
117 |
last;
|
sl@0
|
118 |
}
|
sl@0
|
119 |
($#ARGV>=$i) or die("Error: bad \"-columns\" format\n");
|
sl@0
|
120 |
$columnsData.=(splice(@ARGV, $i, 1))[0];
|
sl@0
|
121 |
}
|
sl@0
|
122 |
}
|
sl@0
|
123 |
elsif (($ARGV[$i]=~/^-p$/i) || ($ARGV[$i]=~/^-cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed$/i))
|
sl@0
|
124 |
{
|
sl@0
|
125 |
splice(@ARGV, $i, 1);
|
sl@0
|
126 |
$$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=1;
|
sl@0
|
127 |
}
|
sl@0
|
128 |
elsif (($ARGV[$i]=~/^-u\b(.*)$/i) || ($ARGV[$i]=~/^-sourceFilesToSubtract\b(.*)$/i))
|
sl@0
|
129 |
{
|
sl@0
|
130 |
my $sourceFilesData=$1;
|
sl@0
|
131 |
splice(@ARGV, $i, 1);
|
sl@0
|
132 |
for (;;)
|
sl@0
|
133 |
{
|
sl@0
|
134 |
if ($sourceFilesData=~/^\s*\(\s*(.+)\)\s*$/)
|
sl@0
|
135 |
{
|
sl@0
|
136 |
my $sourceFilesData=$1;
|
sl@0
|
137 |
@$sourceFilesToSubtract=split(/,/, $sourceFilesData, -1);
|
sl@0
|
138 |
my $j;
|
sl@0
|
139 |
for ($j=$#$sourceFilesToSubtract; $j>=0; --$j)
|
sl@0
|
140 |
{
|
sl@0
|
141 |
$sourceFilesToSubtract->[$j]=~s/^\s+//;
|
sl@0
|
142 |
$sourceFilesToSubtract->[$j]=~s/\s+$//;
|
sl@0
|
143 |
($sourceFilesToSubtract->[$j] ne '') or die("Error: bad \"-sourceFilesToSubtract\" format (1)\n");
|
sl@0
|
144 |
}
|
sl@0
|
145 |
last;
|
sl@0
|
146 |
}
|
sl@0
|
147 |
($#ARGV>=$i) or die("Error: bad \"-sourceFilesToSubtract\" format (2)\n");
|
sl@0
|
148 |
$sourceFilesData.=(splice(@ARGV, $i, 1))[0];
|
sl@0
|
149 |
}
|
sl@0
|
150 |
}
|
sl@0
|
151 |
else
|
sl@0
|
152 |
{
|
sl@0
|
153 |
++$i;
|
sl@0
|
154 |
}
|
sl@0
|
155 |
}
|
sl@0
|
156 |
}
|
sl@0
|
157 |
|
sl@0
|
158 |
sub readSourceFile
|
sl@0
|
159 |
{
|
sl@0
|
160 |
my $fileHandle=shift;
|
sl@0
|
161 |
my $fileName=shift;
|
sl@0
|
162 |
my $characterCodesOfOtherColumn=shift;
|
sl@0
|
163 |
my $linesSorted=shift;
|
sl@0
|
164 |
my $columnToSortOn=shift;
|
sl@0
|
165 |
my $columns=shift;
|
sl@0
|
166 |
my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
|
sl@0
|
167 |
my $subtract=shift;
|
sl@0
|
168 |
my $foreignCharacterCodeProcessingCode='';
|
sl@0
|
169 |
if (!(($columns->[0]>0) && ($columns->[1]>0) && ($columns->[2]>0) && ($columns->[1]<=$columns->[0]) && ($columns->[2]<=$columns->[0]) && ($columns->[1]!=$columns->[2])))
|
sl@0
|
170 |
{
|
sl@0
|
171 |
close($fileHandle);
|
sl@0
|
172 |
die("Error: bad \"-columns\" data\n");
|
sl@0
|
173 |
}
|
sl@0
|
174 |
my $patternOfLineContainingCharacterCodes=join('\s+', ('0x([0-9a-f]+)') x $columns->[0]);
|
sl@0
|
175 |
my $line;
|
sl@0
|
176 |
my $strippedDownLine;
|
sl@0
|
177 |
for (;;)
|
sl@0
|
178 |
{
|
sl@0
|
179 |
($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle);
|
sl@0
|
180 |
if ($strippedDownLine eq '')
|
sl@0
|
181 |
{
|
sl@0
|
182 |
last;
|
sl@0
|
183 |
}
|
sl@0
|
184 |
if ($strippedDownLine=~/^SET_FOREIGN_CHARACTER_CODE_PROCESSING_CODE\s+(.*)$/i)
|
sl@0
|
185 |
{
|
sl@0
|
186 |
$foreignCharacterCodeProcessingCode=$1;
|
sl@0
|
187 |
}
|
sl@0
|
188 |
elsif ($strippedDownLine=~/^$patternOfLineContainingCharacterCodes$/i)
|
sl@0
|
189 |
{
|
sl@0
|
190 |
no strict 'refs'; # so that we can use symbolic references for $1, $2, etc
|
sl@0
|
191 |
my $foreignCharacterCode=hex(${$columns->[1]});
|
sl@0
|
192 |
my $unicodeCharacterCode=hex(${$columns->[2]});
|
sl@0
|
193 |
use strict 'refs';
|
sl@0
|
194 |
if ($foreignCharacterCodeProcessingCode ne '')
|
sl@0
|
195 |
{
|
sl@0
|
196 |
$foreignCharacterCode=eval($foreignCharacterCodeProcessingCode);
|
sl@0
|
197 |
}
|
sl@0
|
198 |
my $characterCodeOfColumnToSortOn;
|
sl@0
|
199 |
my $characterCodeOfOtherColumn;
|
sl@0
|
200 |
if ($columnToSortOn=~/^foreign$/i)
|
sl@0
|
201 |
{
|
sl@0
|
202 |
$characterCodeOfColumnToSortOn=$foreignCharacterCode;
|
sl@0
|
203 |
$characterCodeOfOtherColumn=$unicodeCharacterCode;
|
sl@0
|
204 |
}
|
sl@0
|
205 |
elsif ($columnToSortOn=~/^Unicode$/i)
|
sl@0
|
206 |
{
|
sl@0
|
207 |
$characterCodeOfColumnToSortOn=$unicodeCharacterCode;
|
sl@0
|
208 |
$characterCodeOfOtherColumn=$foreignCharacterCode;
|
sl@0
|
209 |
}
|
sl@0
|
210 |
else
|
sl@0
|
211 |
{
|
sl@0
|
212 |
die("Error: bad parameter \"$columnToSortOn\"");
|
sl@0
|
213 |
}
|
sl@0
|
214 |
if ((!$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed) || !((($unicodeCharacterCode>=0xe000) && ($unicodeCharacterCode<=0xf8ff)) || (($unicodeCharacterCode>=0xf0000) && ($unicodeCharacterCode<=0x10ffff))))
|
sl@0
|
215 |
{
|
sl@0
|
216 |
if ($subtract)
|
sl@0
|
217 |
{
|
sl@0
|
218 |
$linesSorted->{$characterCodeOfColumnToSortOn}='### '.$linesSorted->{$characterCodeOfColumnToSortOn};
|
sl@0
|
219 |
if ($characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}!=$characterCodeOfOtherColumn)
|
sl@0
|
220 |
{
|
sl@0
|
221 |
printf(STDERR "Warning: 0x%x maps to 0x%x in the main source file, but to 0x%x in a source file to be extracted\n", $characterCodeOfColumnToSortOn, $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}, $characterCodeOfOtherColumn);
|
sl@0
|
222 |
}
|
sl@0
|
223 |
}
|
sl@0
|
224 |
else
|
sl@0
|
225 |
{
|
sl@0
|
226 |
$linesSorted->{$characterCodeOfColumnToSortOn}=$line;
|
sl@0
|
227 |
$characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}=$characterCodeOfOtherColumn;
|
sl@0
|
228 |
}
|
sl@0
|
229 |
}
|
sl@0
|
230 |
}
|
sl@0
|
231 |
elsif ($line!~/^\s*0x([0-9a-f]+)\s*#\s*undefined.*$/i)
|
sl@0
|
232 |
{
|
sl@0
|
233 |
close($fileHandle);
|
sl@0
|
234 |
die("Error: unexpected line in \"$fileName\":\n $line\n");
|
sl@0
|
235 |
}
|
sl@0
|
236 |
}
|
sl@0
|
237 |
}
|
sl@0
|
238 |
|