os/textandloc/charconvfw/charconvplugins/tools/analyse.pl
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
#
sl@0
     2
# Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     3
# All rights reserved.
sl@0
     4
# This component and the accompanying materials are made available
sl@0
     5
# under the terms of "Eclipse Public License v1.0"
sl@0
     6
# which accompanies this distribution, and is available
sl@0
     7
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     8
#
sl@0
     9
# Initial Contributors:
sl@0
    10
# Nokia Corporation - initial contribution.
sl@0
    11
#
sl@0
    12
# Contributors:
sl@0
    13
#
sl@0
    14
# Description: 
sl@0
    15
#
sl@0
    16
sl@0
    17
use strict;
sl@0
    18
use integer;
sl@0
    19
sl@0
    20
BEGIN
sl@0
    21
	{
sl@0
    22
	my $perlScriptPath=$0;
sl@0
    23
	$perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes
sl@0
    24
	$perlScriptPath=~s/\\?[^\\]+$//; # get rid of this Perl-script's file-name
sl@0
    25
	if ($perlScriptPath eq '')
sl@0
    26
		{
sl@0
    27
		$perlScriptPath='..\group';
sl@0
    28
		}
sl@0
    29
	else
sl@0
    30
		{
sl@0
    31
		$perlScriptPath=~s/(\\?)[^\\]+$/$1group/;
sl@0
    32
		}
sl@0
    33
	unshift(@INC, $perlScriptPath); # can't do "use lib $perlScriptPath" here as "use lib" only seems to work with *hard-coded* directory names
sl@0
    34
	}
sl@0
    35
use PARSER;
sl@0
    36
sl@0
    37
if ((@ARGV==0) || ($ARGV[0]=~/\?/i) || ($ARGV[0]=~/-h/i) || ($ARGV[0]=~/\/h/i) || ($ARGV[0]=~/help/i))
sl@0
    38
	{
sl@0
    39
	die("\nVersion 021\n\nCharacter-set conversion-table generating tool\nCopyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reservered.\n\nUsage:\n\n\tperl analyse.pl <source-file> <output-file> foreign|Unicode [options]\n\nwhere the following options are available (each has a short form and a long form which are shown below separated by a '|'):\n\n\t-c | -columns(<a>: <b>, <c>)\n\t-p | -cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed\n\t-u | -sourceFilesToSubtract(<a>, <b>, ...)\n\n");
sl@0
    40
	}
sl@0
    41
my @columns=(2, 1, 2);
sl@0
    42
my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=0;
sl@0
    43
my @sourceFilesToSubtract=();
sl@0
    44
&extractCommandLineFlags(\@columns, \$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \@sourceFilesToSubtract);
sl@0
    45
my $sourceFileName=shift;
sl@0
    46
my $outputFileName=shift;
sl@0
    47
my $columnToSortOn=shift;
sl@0
    48
my %characterCodesOfOtherColumn=();
sl@0
    49
my %linesSorted=();
sl@0
    50
open(SOURCE_FILE, "< $sourceFileName") or die("Error: could not open \"$sourceFileName\" for reading");
sl@0
    51
&readSourceFile(\*SOURCE_FILE, $sourceFileName, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 0);
sl@0
    52
close(SOURCE_FILE) or die("Error: could not close \"$sourceFileName\"\n");
sl@0
    53
my $sourceFileToSubtract;
sl@0
    54
foreach $sourceFileToSubtract (@sourceFilesToSubtract)
sl@0
    55
	{
sl@0
    56
	open(SOURCE_FILE_TO_SUBTRACT, "< $sourceFileToSubtract") or die("Error: could not open \"$sourceFileToSubtract\" for reading\n");
sl@0
    57
	&readSourceFile(\*SOURCE_FILE_TO_SUBTRACT, $sourceFileToSubtract, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 1);
sl@0
    58
	close(SOURCE_FILE_TO_SUBTRACT) or die("Error: could not close \"$sourceFileToSubtract\"\n");
sl@0
    59
	}
sl@0
    60
open(OUTPUT_FILE, "> $outputFileName") or die("Error: could not open \"$outputFileName\" for writing");
sl@0
    61
my $numberOfBreaks=0;
sl@0
    62
my $numberOfMissingSpaces=0;
sl@0
    63
my $numberOfLinesSorted=0;
sl@0
    64
my $previousKey="";
sl@0
    65
my $offset=0;
sl@0
    66
my $key;
sl@0
    67
foreach $key (sort {$a<=>$b} (keys(%linesSorted)))
sl@0
    68
	{
sl@0
    69
	if ($previousKey ne "")
sl@0
    70
		{
sl@0
    71
		$previousKey<$key or die("Error: there appears to be a mix up with the keys \"$previousKey\" and \"$key\"");
sl@0
    72
		if ($previousKey!=$key-1)
sl@0
    73
			{
sl@0
    74
			++$numberOfBreaks;
sl@0
    75
			$numberOfMissingSpaces+=$key-$previousKey;
sl@0
    76
			print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
sl@0
    77
			}
sl@0
    78
		}
sl@0
    79
	if (($previousKey eq "") || ($previousKey!=$key-1))
sl@0
    80
		{
sl@0
    81
		$offset=$characterCodesOfOtherColumn{$key}-$key;
sl@0
    82
		}
sl@0
    83
	elsif ((defined $offset) && ($offset!=$characterCodesOfOtherColumn{$key}-$key))
sl@0
    84
		{
sl@0
    85
		undef $offset;
sl@0
    86
		}
sl@0
    87
	print(OUTPUT_FILE "$linesSorted{$key}");
sl@0
    88
	++$numberOfLinesSorted;
sl@0
    89
	$previousKey=$key;
sl@0
    90
	}
sl@0
    91
print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
sl@0
    92
close(OUTPUT_FILE);
sl@0
    93
my $maximumNumberOfIterationsWhenBinarySearching=1;
sl@0
    94
while (($numberOfLinesSorted>>$maximumNumberOfIterationsWhenBinarySearching)>0)
sl@0
    95
	{
sl@0
    96
	++$maximumNumberOfIterationsWhenBinarySearching;
sl@0
    97
	}
sl@0
    98
print("The number of breaks was $numberOfBreaks\nThe number of missing spaces was $numberOfMissingSpaces\nThe number of lines sorted was $numberOfLinesSorted\nThe maximum number of iterations when binary searching would be $maximumNumberOfIterationsWhenBinarySearching");
sl@0
    99
sl@0
   100
sub extractCommandLineFlags()
sl@0
   101
	{
sl@0
   102
	my $columns=shift;
sl@0
   103
	my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
sl@0
   104
	my $sourceFilesToSubtract=shift;
sl@0
   105
	my $i;
sl@0
   106
	for ($i=0; $i<=$#ARGV;) # (i) not cache-ing $#ARGV into a variable as @ARGV may change length in this loop (ii) iterate forwards as some parameters may occupy more than one element in @ARGV
sl@0
   107
		{
sl@0
   108
		if (($ARGV[$i]=~/^-c\b(.*)$/i) || ($ARGV[$i]=~/^-columns\b(.*)$/i))
sl@0
   109
			{
sl@0
   110
			my $columnsData=$1;
sl@0
   111
			splice(@ARGV, $i, 1);
sl@0
   112
			for (;;)
sl@0
   113
				{
sl@0
   114
				if ($columnsData=~/^\s*\(\s*(\d+)\s*:\s*(\d+)\s*,\s*(\d+)\s*\)\s*$/)
sl@0
   115
					{
sl@0
   116
					@$columns=($1, $2, $3);
sl@0
   117
					last;
sl@0
   118
					}
sl@0
   119
				($#ARGV>=$i) or die("Error: bad \"-columns\" format\n");
sl@0
   120
				$columnsData.=(splice(@ARGV, $i, 1))[0];
sl@0
   121
				}
sl@0
   122
			}
sl@0
   123
		elsif (($ARGV[$i]=~/^-p$/i) || ($ARGV[$i]=~/^-cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed$/i))
sl@0
   124
			{
sl@0
   125
			splice(@ARGV, $i, 1);
sl@0
   126
			$$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=1;
sl@0
   127
			}
sl@0
   128
		elsif (($ARGV[$i]=~/^-u\b(.*)$/i) || ($ARGV[$i]=~/^-sourceFilesToSubtract\b(.*)$/i))
sl@0
   129
			{
sl@0
   130
			my $sourceFilesData=$1;
sl@0
   131
			splice(@ARGV, $i, 1);
sl@0
   132
			for (;;)
sl@0
   133
				{
sl@0
   134
				if ($sourceFilesData=~/^\s*\(\s*(.+)\)\s*$/)
sl@0
   135
					{
sl@0
   136
					my $sourceFilesData=$1;
sl@0
   137
					@$sourceFilesToSubtract=split(/,/, $sourceFilesData, -1);
sl@0
   138
					my $j;
sl@0
   139
					for ($j=$#$sourceFilesToSubtract; $j>=0; --$j)
sl@0
   140
						{
sl@0
   141
						$sourceFilesToSubtract->[$j]=~s/^\s+//;
sl@0
   142
						$sourceFilesToSubtract->[$j]=~s/\s+$//;
sl@0
   143
						($sourceFilesToSubtract->[$j] ne '') or die("Error: bad \"-sourceFilesToSubtract\" format (1)\n");
sl@0
   144
						}
sl@0
   145
					last;
sl@0
   146
					}
sl@0
   147
				($#ARGV>=$i) or die("Error: bad \"-sourceFilesToSubtract\" format (2)\n");
sl@0
   148
				$sourceFilesData.=(splice(@ARGV, $i, 1))[0];
sl@0
   149
				}
sl@0
   150
			}
sl@0
   151
		else
sl@0
   152
			{
sl@0
   153
			++$i;
sl@0
   154
			}
sl@0
   155
		}
sl@0
   156
	}
sl@0
   157
sl@0
   158
sub readSourceFile
sl@0
   159
	{
sl@0
   160
	my $fileHandle=shift;
sl@0
   161
	my $fileName=shift;
sl@0
   162
	my $characterCodesOfOtherColumn=shift;
sl@0
   163
	my $linesSorted=shift;
sl@0
   164
	my $columnToSortOn=shift;
sl@0
   165
	my $columns=shift;
sl@0
   166
	my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
sl@0
   167
	my $subtract=shift;
sl@0
   168
	my $foreignCharacterCodeProcessingCode='';
sl@0
   169
	if (!(($columns->[0]>0) && ($columns->[1]>0) && ($columns->[2]>0) && ($columns->[1]<=$columns->[0]) && ($columns->[2]<=$columns->[0]) && ($columns->[1]!=$columns->[2])))
sl@0
   170
		{
sl@0
   171
		close($fileHandle);
sl@0
   172
		die("Error: bad \"-columns\" data\n");
sl@0
   173
		}
sl@0
   174
	my $patternOfLineContainingCharacterCodes=join('\s+', ('0x([0-9a-f]+)') x $columns->[0]);
sl@0
   175
	my $line;
sl@0
   176
	my $strippedDownLine;
sl@0
   177
	for (;;)
sl@0
   178
		{
sl@0
   179
		($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle);
sl@0
   180
		if ($strippedDownLine eq '')
sl@0
   181
			{
sl@0
   182
			last;
sl@0
   183
			}
sl@0
   184
		if ($strippedDownLine=~/^SET_FOREIGN_CHARACTER_CODE_PROCESSING_CODE\s+(.*)$/i)
sl@0
   185
			{
sl@0
   186
			$foreignCharacterCodeProcessingCode=$1;
sl@0
   187
			}
sl@0
   188
		elsif ($strippedDownLine=~/^$patternOfLineContainingCharacterCodes$/i)
sl@0
   189
			{
sl@0
   190
			no strict 'refs'; # so that we can use symbolic references for $1, $2, etc
sl@0
   191
			my $foreignCharacterCode=hex(${$columns->[1]});
sl@0
   192
			my $unicodeCharacterCode=hex(${$columns->[2]});
sl@0
   193
			use strict 'refs';
sl@0
   194
			if ($foreignCharacterCodeProcessingCode ne '')
sl@0
   195
				{
sl@0
   196
				$foreignCharacterCode=eval($foreignCharacterCodeProcessingCode);
sl@0
   197
				}
sl@0
   198
			my $characterCodeOfColumnToSortOn;
sl@0
   199
			my $characterCodeOfOtherColumn;
sl@0
   200
			if ($columnToSortOn=~/^foreign$/i)
sl@0
   201
				{
sl@0
   202
				$characterCodeOfColumnToSortOn=$foreignCharacterCode;
sl@0
   203
				$characterCodeOfOtherColumn=$unicodeCharacterCode;
sl@0
   204
				}
sl@0
   205
			elsif ($columnToSortOn=~/^Unicode$/i)
sl@0
   206
				{
sl@0
   207
				$characterCodeOfColumnToSortOn=$unicodeCharacterCode;
sl@0
   208
				$characterCodeOfOtherColumn=$foreignCharacterCode;
sl@0
   209
				}
sl@0
   210
			else
sl@0
   211
				{
sl@0
   212
				die("Error: bad parameter \"$columnToSortOn\"");
sl@0
   213
				}
sl@0
   214
			if ((!$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed) || !((($unicodeCharacterCode>=0xe000) && ($unicodeCharacterCode<=0xf8ff)) || (($unicodeCharacterCode>=0xf0000) && ($unicodeCharacterCode<=0x10ffff))))
sl@0
   215
				{
sl@0
   216
				if ($subtract)
sl@0
   217
					{
sl@0
   218
					$linesSorted->{$characterCodeOfColumnToSortOn}='### '.$linesSorted->{$characterCodeOfColumnToSortOn};
sl@0
   219
					if ($characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}!=$characterCodeOfOtherColumn)
sl@0
   220
						{
sl@0
   221
						printf(STDERR "Warning: 0x%x maps to 0x%x in the main source file, but to 0x%x in a source file to be extracted\n", $characterCodeOfColumnToSortOn, $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}, $characterCodeOfOtherColumn);
sl@0
   222
						}
sl@0
   223
					}
sl@0
   224
				else
sl@0
   225
					{
sl@0
   226
					$linesSorted->{$characterCodeOfColumnToSortOn}=$line;
sl@0
   227
					$characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}=$characterCodeOfOtherColumn;
sl@0
   228
					}
sl@0
   229
				}
sl@0
   230
			}
sl@0
   231
		elsif ($line!~/^\s*0x([0-9a-f]+)\s*#\s*undefined.*$/i)
sl@0
   232
			{
sl@0
   233
			close($fileHandle);
sl@0
   234
			die("Error: unexpected line in \"$fileName\":\n    $line\n");
sl@0
   235
			}
sl@0
   236
		}
sl@0
   237
	}
sl@0
   238