sl@0: # sl@0: # Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: # All rights reserved. sl@0: # This component and the accompanying materials are made available sl@0: # under the terms of "Eclipse Public License v1.0" sl@0: # which accompanies this distribution, and is available sl@0: # at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: # sl@0: # Initial Contributors: sl@0: # Nokia Corporation - initial contribution. sl@0: # sl@0: # Contributors: sl@0: # sl@0: # Description: sl@0: # sl@0: sl@0: use strict; sl@0: use integer; sl@0: sl@0: BEGIN sl@0: { sl@0: my $perlScriptPath=$0; sl@0: $perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes sl@0: $perlScriptPath=~s/\\?[^\\]+$//; # get rid of this Perl-script's file-name sl@0: if ($perlScriptPath eq '') sl@0: { sl@0: $perlScriptPath='..\group'; sl@0: } sl@0: else sl@0: { sl@0: $perlScriptPath=~s/(\\?)[^\\]+$/$1group/; sl@0: } sl@0: unshift(@INC, $perlScriptPath); # can't do "use lib $perlScriptPath" here as "use lib" only seems to work with *hard-coded* directory names sl@0: } sl@0: use PARSER; sl@0: sl@0: if ((@ARGV==0) || ($ARGV[0]=~/\?/i) || ($ARGV[0]=~/-h/i) || ($ARGV[0]=~/\/h/i) || ($ARGV[0]=~/help/i)) sl@0: { sl@0: die("\nVersion 021\n\nCharacter-set conversion-table generating tool\nCopyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reservered.\n\nUsage:\n\n\tperl analyse.pl foreign|Unicode [options]\n\nwhere the following options are available (each has a short form and a long form which are shown below separated by a '|'):\n\n\t-c | -columns(: , )\n\t-p | -cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed\n\t-u | -sourceFilesToSubtract(, , ...)\n\n"); sl@0: } sl@0: my @columns=(2, 1, 2); sl@0: my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=0; sl@0: my @sourceFilesToSubtract=(); sl@0: &extractCommandLineFlags(\@columns, \$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \@sourceFilesToSubtract); sl@0: my $sourceFileName=shift; sl@0: my $outputFileName=shift; sl@0: my $columnToSortOn=shift; sl@0: my %characterCodesOfOtherColumn=(); sl@0: my %linesSorted=(); sl@0: open(SOURCE_FILE, "< $sourceFileName") or die("Error: could not open \"$sourceFileName\" for reading"); sl@0: &readSourceFile(\*SOURCE_FILE, $sourceFileName, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 0); sl@0: close(SOURCE_FILE) or die("Error: could not close \"$sourceFileName\"\n"); sl@0: my $sourceFileToSubtract; sl@0: foreach $sourceFileToSubtract (@sourceFilesToSubtract) sl@0: { sl@0: open(SOURCE_FILE_TO_SUBTRACT, "< $sourceFileToSubtract") or die("Error: could not open \"$sourceFileToSubtract\" for reading\n"); sl@0: &readSourceFile(\*SOURCE_FILE_TO_SUBTRACT, $sourceFileToSubtract, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 1); sl@0: close(SOURCE_FILE_TO_SUBTRACT) or die("Error: could not close \"$sourceFileToSubtract\"\n"); sl@0: } sl@0: open(OUTPUT_FILE, "> $outputFileName") or die("Error: could not open \"$outputFileName\" for writing"); sl@0: my $numberOfBreaks=0; sl@0: my $numberOfMissingSpaces=0; sl@0: my $numberOfLinesSorted=0; sl@0: my $previousKey=""; sl@0: my $offset=0; sl@0: my $key; sl@0: foreach $key (sort {$a<=>$b} (keys(%linesSorted))) sl@0: { sl@0: if ($previousKey ne "") sl@0: { sl@0: $previousKey<$key or die("Error: there appears to be a mix up with the keys \"$previousKey\" and \"$key\""); sl@0: if ($previousKey!=$key-1) sl@0: { sl@0: ++$numberOfBreaks; sl@0: $numberOfMissingSpaces+=$key-$previousKey; sl@0: print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n"); sl@0: } sl@0: } sl@0: if (($previousKey eq "") || ($previousKey!=$key-1)) sl@0: { sl@0: $offset=$characterCodesOfOtherColumn{$key}-$key; sl@0: } sl@0: elsif ((defined $offset) && ($offset!=$characterCodesOfOtherColumn{$key}-$key)) sl@0: { sl@0: undef $offset; sl@0: } sl@0: print(OUTPUT_FILE "$linesSorted{$key}"); sl@0: ++$numberOfLinesSorted; sl@0: $previousKey=$key; sl@0: } sl@0: print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n"); sl@0: close(OUTPUT_FILE); sl@0: my $maximumNumberOfIterationsWhenBinarySearching=1; sl@0: while (($numberOfLinesSorted>>$maximumNumberOfIterationsWhenBinarySearching)>0) sl@0: { sl@0: ++$maximumNumberOfIterationsWhenBinarySearching; sl@0: } sl@0: print("The number of breaks was $numberOfBreaks\nThe number of missing spaces was $numberOfMissingSpaces\nThe number of lines sorted was $numberOfLinesSorted\nThe maximum number of iterations when binary searching would be $maximumNumberOfIterationsWhenBinarySearching"); sl@0: sl@0: sub extractCommandLineFlags() sl@0: { sl@0: my $columns=shift; sl@0: my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift; sl@0: my $sourceFilesToSubtract=shift; sl@0: my $i; sl@0: for ($i=0; $i<=$#ARGV;) # (i) not cache-ing $#ARGV into a variable as @ARGV may change length in this loop (ii) iterate forwards as some parameters may occupy more than one element in @ARGV sl@0: { sl@0: if (($ARGV[$i]=~/^-c\b(.*)$/i) || ($ARGV[$i]=~/^-columns\b(.*)$/i)) sl@0: { sl@0: my $columnsData=$1; sl@0: splice(@ARGV, $i, 1); sl@0: for (;;) sl@0: { sl@0: if ($columnsData=~/^\s*\(\s*(\d+)\s*:\s*(\d+)\s*,\s*(\d+)\s*\)\s*$/) sl@0: { sl@0: @$columns=($1, $2, $3); sl@0: last; sl@0: } sl@0: ($#ARGV>=$i) or die("Error: bad \"-columns\" format\n"); sl@0: $columnsData.=(splice(@ARGV, $i, 1))[0]; sl@0: } sl@0: } sl@0: elsif (($ARGV[$i]=~/^-p$/i) || ($ARGV[$i]=~/^-cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed$/i)) sl@0: { sl@0: splice(@ARGV, $i, 1); sl@0: $$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=1; sl@0: } sl@0: elsif (($ARGV[$i]=~/^-u\b(.*)$/i) || ($ARGV[$i]=~/^-sourceFilesToSubtract\b(.*)$/i)) sl@0: { sl@0: my $sourceFilesData=$1; sl@0: splice(@ARGV, $i, 1); sl@0: for (;;) sl@0: { sl@0: if ($sourceFilesData=~/^\s*\(\s*(.+)\)\s*$/) sl@0: { sl@0: my $sourceFilesData=$1; sl@0: @$sourceFilesToSubtract=split(/,/, $sourceFilesData, -1); sl@0: my $j; sl@0: for ($j=$#$sourceFilesToSubtract; $j>=0; --$j) sl@0: { sl@0: $sourceFilesToSubtract->[$j]=~s/^\s+//; sl@0: $sourceFilesToSubtract->[$j]=~s/\s+$//; sl@0: ($sourceFilesToSubtract->[$j] ne '') or die("Error: bad \"-sourceFilesToSubtract\" format (1)\n"); sl@0: } sl@0: last; sl@0: } sl@0: ($#ARGV>=$i) or die("Error: bad \"-sourceFilesToSubtract\" format (2)\n"); sl@0: $sourceFilesData.=(splice(@ARGV, $i, 1))[0]; sl@0: } sl@0: } sl@0: else sl@0: { sl@0: ++$i; sl@0: } sl@0: } sl@0: } sl@0: sl@0: sub readSourceFile sl@0: { sl@0: my $fileHandle=shift; sl@0: my $fileName=shift; sl@0: my $characterCodesOfOtherColumn=shift; sl@0: my $linesSorted=shift; sl@0: my $columnToSortOn=shift; sl@0: my $columns=shift; sl@0: my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift; sl@0: my $subtract=shift; sl@0: my $foreignCharacterCodeProcessingCode=''; sl@0: if (!(($columns->[0]>0) && ($columns->[1]>0) && ($columns->[2]>0) && ($columns->[1]<=$columns->[0]) && ($columns->[2]<=$columns->[0]) && ($columns->[1]!=$columns->[2]))) sl@0: { sl@0: close($fileHandle); sl@0: die("Error: bad \"-columns\" data\n"); sl@0: } sl@0: my $patternOfLineContainingCharacterCodes=join('\s+', ('0x([0-9a-f]+)') x $columns->[0]); sl@0: my $line; sl@0: my $strippedDownLine; sl@0: for (;;) sl@0: { sl@0: ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); sl@0: if ($strippedDownLine eq '') sl@0: { sl@0: last; sl@0: } sl@0: if ($strippedDownLine=~/^SET_FOREIGN_CHARACTER_CODE_PROCESSING_CODE\s+(.*)$/i) sl@0: { sl@0: $foreignCharacterCodeProcessingCode=$1; sl@0: } sl@0: elsif ($strippedDownLine=~/^$patternOfLineContainingCharacterCodes$/i) sl@0: { sl@0: no strict 'refs'; # so that we can use symbolic references for $1, $2, etc sl@0: my $foreignCharacterCode=hex(${$columns->[1]}); sl@0: my $unicodeCharacterCode=hex(${$columns->[2]}); sl@0: use strict 'refs'; sl@0: if ($foreignCharacterCodeProcessingCode ne '') sl@0: { sl@0: $foreignCharacterCode=eval($foreignCharacterCodeProcessingCode); sl@0: } sl@0: my $characterCodeOfColumnToSortOn; sl@0: my $characterCodeOfOtherColumn; sl@0: if ($columnToSortOn=~/^foreign$/i) sl@0: { sl@0: $characterCodeOfColumnToSortOn=$foreignCharacterCode; sl@0: $characterCodeOfOtherColumn=$unicodeCharacterCode; sl@0: } sl@0: elsif ($columnToSortOn=~/^Unicode$/i) sl@0: { sl@0: $characterCodeOfColumnToSortOn=$unicodeCharacterCode; sl@0: $characterCodeOfOtherColumn=$foreignCharacterCode; sl@0: } sl@0: else sl@0: { sl@0: die("Error: bad parameter \"$columnToSortOn\""); sl@0: } sl@0: if ((!$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed) || !((($unicodeCharacterCode>=0xe000) && ($unicodeCharacterCode<=0xf8ff)) || (($unicodeCharacterCode>=0xf0000) && ($unicodeCharacterCode<=0x10ffff)))) sl@0: { sl@0: if ($subtract) sl@0: { sl@0: $linesSorted->{$characterCodeOfColumnToSortOn}='### '.$linesSorted->{$characterCodeOfColumnToSortOn}; sl@0: if ($characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}!=$characterCodeOfOtherColumn) sl@0: { sl@0: printf(STDERR "Warning: 0x%x maps to 0x%x in the main source file, but to 0x%x in a source file to be extracted\n", $characterCodeOfColumnToSortOn, $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}, $characterCodeOfOtherColumn); sl@0: } sl@0: } sl@0: else sl@0: { sl@0: $linesSorted->{$characterCodeOfColumnToSortOn}=$line; sl@0: $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}=$characterCodeOfOtherColumn; sl@0: } sl@0: } sl@0: } sl@0: elsif ($line!~/^\s*0x([0-9a-f]+)\s*#\s*undefined.*$/i) sl@0: { sl@0: close($fileHandle); sl@0: die("Error: unexpected line in \"$fileName\":\n $line\n"); sl@0: } sl@0: } sl@0: } sl@0: