1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/charconvfw/charconvplugins/tools/analyse.pl Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,238 @@
1.4 +#
1.5 +# Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
1.6 +# All rights reserved.
1.7 +# This component and the accompanying materials are made available
1.8 +# under the terms of "Eclipse Public License v1.0"
1.9 +# which accompanies this distribution, and is available
1.10 +# at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.11 +#
1.12 +# Initial Contributors:
1.13 +# Nokia Corporation - initial contribution.
1.14 +#
1.15 +# Contributors:
1.16 +#
1.17 +# Description:
1.18 +#
1.19 +
1.20 +use strict;
1.21 +use integer;
1.22 +
1.23 +BEGIN
1.24 + {
1.25 + my $perlScriptPath=$0;
1.26 + $perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes
1.27 + $perlScriptPath=~s/\\?[^\\]+$//; # get rid of this Perl-script's file-name
1.28 + if ($perlScriptPath eq '')
1.29 + {
1.30 + $perlScriptPath='..\group';
1.31 + }
1.32 + else
1.33 + {
1.34 + $perlScriptPath=~s/(\\?)[^\\]+$/$1group/;
1.35 + }
1.36 + unshift(@INC, $perlScriptPath); # can't do "use lib $perlScriptPath" here as "use lib" only seems to work with *hard-coded* directory names
1.37 + }
1.38 +use PARSER;
1.39 +
1.40 +if ((@ARGV==0) || ($ARGV[0]=~/\?/i) || ($ARGV[0]=~/-h/i) || ($ARGV[0]=~/\/h/i) || ($ARGV[0]=~/help/i))
1.41 + {
1.42 + die("\nVersion 021\n\nCharacter-set conversion-table generating tool\nCopyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reservered.\n\nUsage:\n\n\tperl analyse.pl <source-file> <output-file> foreign|Unicode [options]\n\nwhere the following options are available (each has a short form and a long form which are shown below separated by a '|'):\n\n\t-c | -columns(<a>: <b>, <c>)\n\t-p | -cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed\n\t-u | -sourceFilesToSubtract(<a>, <b>, ...)\n\n");
1.43 + }
1.44 +my @columns=(2, 1, 2);
1.45 +my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=0;
1.46 +my @sourceFilesToSubtract=();
1.47 +&extractCommandLineFlags(\@columns, \$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \@sourceFilesToSubtract);
1.48 +my $sourceFileName=shift;
1.49 +my $outputFileName=shift;
1.50 +my $columnToSortOn=shift;
1.51 +my %characterCodesOfOtherColumn=();
1.52 +my %linesSorted=();
1.53 +open(SOURCE_FILE, "< $sourceFileName") or die("Error: could not open \"$sourceFileName\" for reading");
1.54 +&readSourceFile(\*SOURCE_FILE, $sourceFileName, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 0);
1.55 +close(SOURCE_FILE) or die("Error: could not close \"$sourceFileName\"\n");
1.56 +my $sourceFileToSubtract;
1.57 +foreach $sourceFileToSubtract (@sourceFilesToSubtract)
1.58 + {
1.59 + open(SOURCE_FILE_TO_SUBTRACT, "< $sourceFileToSubtract") or die("Error: could not open \"$sourceFileToSubtract\" for reading\n");
1.60 + &readSourceFile(\*SOURCE_FILE_TO_SUBTRACT, $sourceFileToSubtract, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 1);
1.61 + close(SOURCE_FILE_TO_SUBTRACT) or die("Error: could not close \"$sourceFileToSubtract\"\n");
1.62 + }
1.63 +open(OUTPUT_FILE, "> $outputFileName") or die("Error: could not open \"$outputFileName\" for writing");
1.64 +my $numberOfBreaks=0;
1.65 +my $numberOfMissingSpaces=0;
1.66 +my $numberOfLinesSorted=0;
1.67 +my $previousKey="";
1.68 +my $offset=0;
1.69 +my $key;
1.70 +foreach $key (sort {$a<=>$b} (keys(%linesSorted)))
1.71 + {
1.72 + if ($previousKey ne "")
1.73 + {
1.74 + $previousKey<$key or die("Error: there appears to be a mix up with the keys \"$previousKey\" and \"$key\"");
1.75 + if ($previousKey!=$key-1)
1.76 + {
1.77 + ++$numberOfBreaks;
1.78 + $numberOfMissingSpaces+=$key-$previousKey;
1.79 + print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
1.80 + }
1.81 + }
1.82 + if (($previousKey eq "") || ($previousKey!=$key-1))
1.83 + {
1.84 + $offset=$characterCodesOfOtherColumn{$key}-$key;
1.85 + }
1.86 + elsif ((defined $offset) && ($offset!=$characterCodesOfOtherColumn{$key}-$key))
1.87 + {
1.88 + undef $offset;
1.89 + }
1.90 + print(OUTPUT_FILE "$linesSorted{$key}");
1.91 + ++$numberOfLinesSorted;
1.92 + $previousKey=$key;
1.93 + }
1.94 +print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
1.95 +close(OUTPUT_FILE);
1.96 +my $maximumNumberOfIterationsWhenBinarySearching=1;
1.97 +while (($numberOfLinesSorted>>$maximumNumberOfIterationsWhenBinarySearching)>0)
1.98 + {
1.99 + ++$maximumNumberOfIterationsWhenBinarySearching;
1.100 + }
1.101 +print("The number of breaks was $numberOfBreaks\nThe number of missing spaces was $numberOfMissingSpaces\nThe number of lines sorted was $numberOfLinesSorted\nThe maximum number of iterations when binary searching would be $maximumNumberOfIterationsWhenBinarySearching");
1.102 +
1.103 +sub extractCommandLineFlags()
1.104 + {
1.105 + my $columns=shift;
1.106 + my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
1.107 + my $sourceFilesToSubtract=shift;
1.108 + my $i;
1.109 + for ($i=0; $i<=$#ARGV;) # (i) not cache-ing $#ARGV into a variable as @ARGV may change length in this loop (ii) iterate forwards as some parameters may occupy more than one element in @ARGV
1.110 + {
1.111 + if (($ARGV[$i]=~/^-c\b(.*)$/i) || ($ARGV[$i]=~/^-columns\b(.*)$/i))
1.112 + {
1.113 + my $columnsData=$1;
1.114 + splice(@ARGV, $i, 1);
1.115 + for (;;)
1.116 + {
1.117 + if ($columnsData=~/^\s*\(\s*(\d+)\s*:\s*(\d+)\s*,\s*(\d+)\s*\)\s*$/)
1.118 + {
1.119 + @$columns=($1, $2, $3);
1.120 + last;
1.121 + }
1.122 + ($#ARGV>=$i) or die("Error: bad \"-columns\" format\n");
1.123 + $columnsData.=(splice(@ARGV, $i, 1))[0];
1.124 + }
1.125 + }
1.126 + elsif (($ARGV[$i]=~/^-p$/i) || ($ARGV[$i]=~/^-cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed$/i))
1.127 + {
1.128 + splice(@ARGV, $i, 1);
1.129 + $$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=1;
1.130 + }
1.131 + elsif (($ARGV[$i]=~/^-u\b(.*)$/i) || ($ARGV[$i]=~/^-sourceFilesToSubtract\b(.*)$/i))
1.132 + {
1.133 + my $sourceFilesData=$1;
1.134 + splice(@ARGV, $i, 1);
1.135 + for (;;)
1.136 + {
1.137 + if ($sourceFilesData=~/^\s*\(\s*(.+)\)\s*$/)
1.138 + {
1.139 + my $sourceFilesData=$1;
1.140 + @$sourceFilesToSubtract=split(/,/, $sourceFilesData, -1);
1.141 + my $j;
1.142 + for ($j=$#$sourceFilesToSubtract; $j>=0; --$j)
1.143 + {
1.144 + $sourceFilesToSubtract->[$j]=~s/^\s+//;
1.145 + $sourceFilesToSubtract->[$j]=~s/\s+$//;
1.146 + ($sourceFilesToSubtract->[$j] ne '') or die("Error: bad \"-sourceFilesToSubtract\" format (1)\n");
1.147 + }
1.148 + last;
1.149 + }
1.150 + ($#ARGV>=$i) or die("Error: bad \"-sourceFilesToSubtract\" format (2)\n");
1.151 + $sourceFilesData.=(splice(@ARGV, $i, 1))[0];
1.152 + }
1.153 + }
1.154 + else
1.155 + {
1.156 + ++$i;
1.157 + }
1.158 + }
1.159 + }
1.160 +
1.161 +sub readSourceFile
1.162 + {
1.163 + my $fileHandle=shift;
1.164 + my $fileName=shift;
1.165 + my $characterCodesOfOtherColumn=shift;
1.166 + my $linesSorted=shift;
1.167 + my $columnToSortOn=shift;
1.168 + my $columns=shift;
1.169 + my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
1.170 + my $subtract=shift;
1.171 + my $foreignCharacterCodeProcessingCode='';
1.172 + if (!(($columns->[0]>0) && ($columns->[1]>0) && ($columns->[2]>0) && ($columns->[1]<=$columns->[0]) && ($columns->[2]<=$columns->[0]) && ($columns->[1]!=$columns->[2])))
1.173 + {
1.174 + close($fileHandle);
1.175 + die("Error: bad \"-columns\" data\n");
1.176 + }
1.177 + my $patternOfLineContainingCharacterCodes=join('\s+', ('0x([0-9a-f]+)') x $columns->[0]);
1.178 + my $line;
1.179 + my $strippedDownLine;
1.180 + for (;;)
1.181 + {
1.182 + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle);
1.183 + if ($strippedDownLine eq '')
1.184 + {
1.185 + last;
1.186 + }
1.187 + if ($strippedDownLine=~/^SET_FOREIGN_CHARACTER_CODE_PROCESSING_CODE\s+(.*)$/i)
1.188 + {
1.189 + $foreignCharacterCodeProcessingCode=$1;
1.190 + }
1.191 + elsif ($strippedDownLine=~/^$patternOfLineContainingCharacterCodes$/i)
1.192 + {
1.193 + no strict 'refs'; # so that we can use symbolic references for $1, $2, etc
1.194 + my $foreignCharacterCode=hex(${$columns->[1]});
1.195 + my $unicodeCharacterCode=hex(${$columns->[2]});
1.196 + use strict 'refs';
1.197 + if ($foreignCharacterCodeProcessingCode ne '')
1.198 + {
1.199 + $foreignCharacterCode=eval($foreignCharacterCodeProcessingCode);
1.200 + }
1.201 + my $characterCodeOfColumnToSortOn;
1.202 + my $characterCodeOfOtherColumn;
1.203 + if ($columnToSortOn=~/^foreign$/i)
1.204 + {
1.205 + $characterCodeOfColumnToSortOn=$foreignCharacterCode;
1.206 + $characterCodeOfOtherColumn=$unicodeCharacterCode;
1.207 + }
1.208 + elsif ($columnToSortOn=~/^Unicode$/i)
1.209 + {
1.210 + $characterCodeOfColumnToSortOn=$unicodeCharacterCode;
1.211 + $characterCodeOfOtherColumn=$foreignCharacterCode;
1.212 + }
1.213 + else
1.214 + {
1.215 + die("Error: bad parameter \"$columnToSortOn\"");
1.216 + }
1.217 + if ((!$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed) || !((($unicodeCharacterCode>=0xe000) && ($unicodeCharacterCode<=0xf8ff)) || (($unicodeCharacterCode>=0xf0000) && ($unicodeCharacterCode<=0x10ffff))))
1.218 + {
1.219 + if ($subtract)
1.220 + {
1.221 + $linesSorted->{$characterCodeOfColumnToSortOn}='### '.$linesSorted->{$characterCodeOfColumnToSortOn};
1.222 + if ($characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}!=$characterCodeOfOtherColumn)
1.223 + {
1.224 + printf(STDERR "Warning: 0x%x maps to 0x%x in the main source file, but to 0x%x in a source file to be extracted\n", $characterCodeOfColumnToSortOn, $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}, $characterCodeOfOtherColumn);
1.225 + }
1.226 + }
1.227 + else
1.228 + {
1.229 + $linesSorted->{$characterCodeOfColumnToSortOn}=$line;
1.230 + $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}=$characterCodeOfOtherColumn;
1.231 + }
1.232 + }
1.233 + }
1.234 + elsif ($line!~/^\s*0x([0-9a-f]+)\s*#\s*undefined.*$/i)
1.235 + {
1.236 + close($fileHandle);
1.237 + die("Error: unexpected line in \"$fileName\":\n $line\n");
1.238 + }
1.239 + }
1.240 + }
1.241 +