diff -r 000000000000 -r bde4ae8d615e os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/encoding.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/encoding.test Fri Jun 15 03:10:57 2012 +0200 @@ -0,0 +1,568 @@ +# This file contains a collection of tests for tclEncoding.c +# Sourcing this file into Tcl runs the tests and generates output for +# errors. No output means no errors were found. +# +# Copyright (c) 1997 Sun Microsystems, Inc. +# Copyright (c) 1998-1999 by Scriptics Corporation. +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# RCS: @(#) $Id: encoding.test,v 1.16.2.3 2006/10/05 21:24:56 hobbs Exp $ + +package require tcltest 2 +namespace import -force ::tcltest::* + +proc toutf {args} { + global x + lappend x "toutf $args" +} +proc fromutf {args} { + global x + lappend x "fromutf $args" +} + +# Some tests require the testencoding command +testConstraint testencoding [llength [info commands testencoding]] +testConstraint exec [llength [info commands exec]] + +# TclInitEncodingSubsystem is tested by the rest of this file +# TclFinalizeEncodingSubsystem is not currently tested + +test encoding-1.1 {Tcl_GetEncoding: system encoding} {testencoding} { + testencoding create foo toutf fromutf + set old [encoding system] + encoding system foo + set x {} + encoding convertto abcd + encoding system $old + testencoding delete foo + set x +} {{fromutf }} +test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} { + testencoding create foo toutf fromutf + set x {} + encoding convertto foo abcd + testencoding delete foo + set x +} {{fromutf }} +test encoding-1.3 {Tcl_GetEncoding: load encoding} { + list [encoding convertto jis0208 \u4e4e] \ + [encoding convertfrom jis0208 8C] +} "8C \u4e4e" + +test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} { + encoding convertto jis0208 \u4e4e +} {8C} +test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} {testencoding} { + set system [encoding system] + set path [testencoding path] + encoding system shiftjis ;# incr ref count + testencoding path [list [pwd]] + set x [encoding convertto shiftjis \u4e4e] ;# old one found + encoding system identity + lappend x [catch {encoding convertto shiftjis \u4e4e} msg] $msg + encoding system identity + testencoding path $path + encoding system $system + set x +} "\u008c\u00c1 1 {unknown encoding \"shiftjis\"}" + +test encoding-3.1 {Tcl_GetEncodingName, NULL} { + set old [encoding system] + encoding system shiftjis + set x [encoding system] + encoding system $old + set x +} {shiftjis} +test encoding-3.2 {Tcl_GetEncodingName, non-null} { + set old [fconfigure stdout -encoding] + fconfigure stdout -encoding jis0208 + set x [fconfigure stdout -encoding] + fconfigure stdout -encoding $old + set x +} {jis0208} + +test encoding-4.1 {Tcl_GetEncodingNames} {testencoding} { + cd [makeDirectory tmp] + makeDirectory [file join tmp encoding] + makeFile {} [file join tmp encoding junk.enc] + makeFile {} [file join tmp encoding junk2.enc] + set path [testencoding path] + testencoding path {} + catch {unset encodings} + catch {unset x} + foreach encoding [encoding names] { + set encodings($encoding) 1 + } + testencoding path [list [pwd]] + foreach encoding [encoding names] { + if {![info exists encodings($encoding)]} { + lappend x $encoding + } + } + testencoding path $path + cd [workingDirectory] + removeFile [file join tmp encoding junk2.enc] + removeFile [file join tmp encoding junk.enc] + removeDirectory [file join tmp encoding] + removeDirectory tmp + lsort $x +} {junk junk2} + +test encoding-5.1 {Tcl_SetSystemEncoding} { + set old [encoding system] + encoding system jis0208 + set x [encoding convertto \u4e4e] + encoding system identity + encoding system $old + set x +} {8C} +test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} { + set old [encoding system] + encoding system $old + string compare $old [encoding system] +} {0} + +test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} { + testencoding create foo {toutf 1} {fromutf 2} + set x {} + encoding convertfrom foo abcd + encoding convertto foo abcd + testencoding delete foo + set x +} {{toutf 1} {fromutf 2}} +test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} { + testencoding create foo {toutf a} {fromutf b} + set x {} + encoding convertfrom foo abcd + encoding convertto foo abcd + testencoding delete foo + set x +} {{toutf a} {fromutf b}} + +test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} { + encoding convertfrom jis0208 8c8c8c8c +} "\u543e\u543e\u543e\u543e" +test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { + set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C + append a $a + append a $a + append a $a + append a $a + set x [encoding convertfrom jis0208 $a] + list [string length $x] [string index $x 0] +} "512 \u4e4e" + +test encoding-8.1 {Tcl_ExternalToUtf} { + set f [open [file join [temporaryDirectory] dummy] w] + fconfigure $f -translation binary -encoding iso8859-1 + puts -nonewline $f "ab\x8c\xc1g" + close $f + set f [open [file join [temporaryDirectory] dummy] r] + fconfigure $f -translation binary -encoding shiftjis + set x [read $f] + close $f + file delete [file join [temporaryDirectory] dummy] + set x +} "ab\u4e4eg" + +test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} { + encoding convertto jis0208 "\u543e\u543e\u543e\u543e" +} {8c8c8c8c} +test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { + set a \u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e + append a $a + append a $a + append a $a + append a $a + append a $a + append a $a + set x [encoding convertto jis0208 $a] + list [string length $x] [string range $x 0 1] +} "1024 8C" + +test encoding-10.1 {Tcl_UtfToExternal} { + set f [open [file join [temporaryDirectory] dummy] w] + fconfigure $f -translation binary -encoding shiftjis + puts -nonewline $f "ab\u4e4eg" + close $f + set f [open [file join [temporaryDirectory] dummy] r] + fconfigure $f -translation binary -encoding iso8859-1 + set x [read $f] + close $f + file delete [file join [temporaryDirectory] dummy] + set x +} "ab\x8c\xc1g" + +proc viewable {str} { + set res "" + foreach c [split $str {}] { + if {[string is print $c] && [string is ascii $c]} { + append res $c + } else { + append res "\\u[format %4.4x [scan $c %c]]" + } + } + return "$str ($res)" +} + +test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} { + set system [encoding system] + set path [testencoding path] + encoding system iso8859-1 + testencoding path {} + set x [list [catch {encoding convertto jis0208 \u4e4e} msg] $msg] + testencoding path $path + encoding system $system + lappend x [encoding convertto jis0208 \u4e4e] +} {1 {unknown encoding "jis0208"} 8C} +test encoding-11.2 {LoadEncodingFile: single-byte} { + encoding convertfrom jis0201 \xa1 +} "\uff61" +test encoding-11.3 {LoadEncodingFile: double-byte} { + encoding convertfrom jis0208 8C +} "\u4e4e" +test encoding-11.4 {LoadEncodingFile: multi-byte} { + encoding convertfrom shiftjis \x8c\xc1 +} "\u4e4e" +test encoding-11.5 {LoadEncodingFile: escape file} { + viewable [encoding convertto iso2022 \u4e4e] +} [viewable "\x1b\$B8C\x1b(B"] +test encoding-11.5.1 {LoadEncodingFile: escape file} { + viewable [encoding convertto iso2022-jp \u4e4e] +} [viewable "\x1b\$B8C\x1b(B"] +test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} { + set system [encoding system] + set path [testencoding path] + encoding system identity + cd [temporaryDirectory] + testencoding path tmp + makeDirectory tmp + makeDirectory [file join tmp encoding] + set f [open [file join tmp encoding splat.enc] w] + fconfigure $f -translation binary + puts $f "abcdefghijklmnop" + close $f + set x [list [catch {encoding convertto splat \u4e4e} msg] $msg] + file delete [file join [temporaryDirectory] tmp encoding splat.enc] + removeDirectory [file join tmp encoding] + removeDirectory tmp + cd [workingDirectory] + testencoding path $path + encoding system $system + set x +} {1 {invalid encoding file "splat"}} + +# OpenEncodingFile is fully tested by the rest of the tests in this file. + +test encoding-12.1 {LoadTableEncoding: normal encoding} { + set x [encoding convertto iso8859-3 \u120] + append x [encoding convertto iso8859-3 \ud5] + append x [encoding convertfrom iso8859-3 \xd5] +} "\xd5?\u120" +test encoding-12.2 {LoadTableEncoding: single-byte encoding} { + set x [encoding convertto iso8859-3 ab\u0120g] + append x [encoding convertfrom iso8859-3 ab\xd5g] +} "ab\xd5gab\u120g" +test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { + set x [encoding convertto shiftjis ab\u4e4eg] + append x [encoding convertfrom shiftjis ab\x8c\xc1g] +} "ab\x8c\xc1gab\u4e4eg" +test encoding-12.4 {LoadTableEncoding: double-byte encoding} { + set x [encoding convertto jis0208 \u4e4e\u3b1] + append x [encoding convertfrom jis0208 8C&A] +} "8C&A\u4e4e\u3b1" +test encoding-12.5 {LoadTableEncoding: symbol encoding} { + set x [encoding convertto symbol \u3b3] + append x [encoding convertto symbol \u67] + append x [encoding convertfrom symbol \x67] +} "\x67\x67\u3b3" + +test encoding-13.1 {LoadEscapeTable} { + viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]] +} [viewable "ab\x1b\$B8C\x1b\$\(DD%\x1b(Bg"] + +test encoding-14.1 {BinaryProc} { + encoding convertto identity \x12\x34\x56\xff\x69 +} "\x12\x34\x56\xc3\xbf\x69" + +test encoding-15.1 {UtfToUtfProc} { + encoding convertto utf-8 \xa3 +} "\xc2\xa3" + +test encoding-15.2 {UtfToUtfProc null character output} { + set x \u0000 + set y [encoding convertto utf-8 \u0000] + set y [encoding convertfrom identity $y] + binary scan $y H* z + list [string bytelength $x] [string bytelength $y] $z +} {2 1 00} + +test encoding-15.3 {UtfToUtfProc null character input} { + set x [encoding convertfrom identity \x00] + set y [encoding convertfrom utf-8 $x] + binary scan [encoding convertto identity $y] H* z + list [string bytelength $x] [string bytelength $y] $z +} {1 2 c080} + +test encoding-16.1 {UnicodeToUtfProc} { + set val [encoding convertfrom unicode NN] + list $val [format %x [scan $val %c]] +} "\u4e4e 4e4e" + +test encoding-17.1 {UtfToUnicodeProc} { +} {} + +test encoding-18.1 {TableToUtfProc} { +} {} + +test encoding-19.1 {TableFromUtfProc} { +} {} + +test encoding-20.1 {TableFreefProc} { +} {} + +test encoding-21.1 {EscapeToUtfProc} { +} {} + +test encoding-22.1 {EscapeFromUtfProc} { +} {} + +set ::iso2022encData "\u001b\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\u001b(B +\u001b\$B>.@Z= 0} { + if {$count} { + incr count 1 ; # account for newline + append out \n + } + append out $line + incr count $num + } + close $fid + if {[string compare $::iso2022uniData $out]} { + return -code error "iso2022-jp read in doesn't match original" + } + list $count $out +} [list [string length $::iso2022uniData] $::iso2022uniData] +test encoding-23.3 {iso2022-jp escape encoding test} { + # read $fis reads size in chars, not raw bytes. + set fid [open iso2022.txt r] + fconfigure $fid -encoding iso2022-jp + set data [read $fid 50] + close $fid + set data +} [string range $::iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50 +cd [workingDirectory] + +test encoding-24.1 {EscapeFreeProc on open channels} -constraints { + exec +} -setup { + # Bug #524674 input + set file [makeFile { + set f [open [file join [file dirname [info script]] iso2022.txt]] + fconfigure $f -encoding iso2022-jp + gets $f + } iso2022.tcl] +} -body { + exec [interpreter] $file +} -cleanup { + removeFile iso2022.tcl +} -result {} + +test encoding-24.2 {EscapeFreeProc on open channels} -constraints { + exec +} -setup { + # Bug #524674 output + set file [makeFile { + fconfigure stdout -encoding iso2022-jp + puts ab\u4e4e\u68d9g + exit + } iso2022.tcl] +} -body { + viewable [exec [interpreter] $file] +} -cleanup { + removeFile iso2022.tcl +} -result "ab\x1b\$B8C\x1b\$(DD%\x1b(Bg (ab\\u001b\$B8C\\u001b\$(DD%\\u001b(Bg)" + +test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { + # Bug #219314 - if we don't free escape encodings correctly on + # channel closure, we go boom + set file [makeFile { + encoding system iso2022-jp + set a "\u4e4e\u4e5e\u4e5f"; # 3 Japanese Kanji letters + puts $a + } iso2022.tcl] + set f [open "|[list [interpreter] $file]"] + fconfigure $f -encoding iso2022-jp + set count [gets $f line] + close $f + removeFile iso2022.tcl + list $count [viewable $line] +} [list 3 "\u4e4e\u4e5e\u4e5f (\\u4e4e\\u4e5e\\u4e5f)"] + +file delete [file join [temporaryDirectory] iso2022.txt] + +# +# Begin jajp encoding round-trip conformity tests +# +proc foreach-jisx0208 {varName command} { + upvar 1 $varName code + foreach range { + {2121 217E} + {2221 222E} + {223A 2241} + {224A 2250} + {225C 226A} + {2272 2279} + {227E 227E} + {2330 2339} + {2421 2473} + {2521 2576} + {2821 2821} + {282C 282C} + {2837 2837} + + {30 21 4E 7E} + {4F21 4F53} + + {50 21 73 7E} + {7421 7426} + } { + if {[llength $range] == 2} { + # for adhoc range. simple {first last}. inclusive. + set first [scan [lindex $range 0] %x] + set last [scan [lindex $range 1] %x] + for {set i $first} {$i <= $last} {incr i} { + set code $i + uplevel 1 $command + } + } elseif {[llength $range] == 4} { + # for uniform range. + set h0 [scan [lindex $range 0] %x] + set l0 [scan [lindex $range 1] %x] + set hend [scan [lindex $range 2] %x] + set lend [scan [lindex $range 3] %x] + for {set hi $h0} {$hi <= $hend} {incr hi} { + for {set lo $l0} {$lo <= $lend} {incr lo} { + set code [expr {$hi << 8 | ($lo & 0xff)}] + uplevel 1 $command + } + } + } else { + error "really?" + } + } +} +proc gen-jisx0208-euc-jp {code} { + binary format cc \ + [expr {($code >> 8) | 0x80}] [expr {($code & 0xff) | 0x80}] +} +proc gen-jisx0208-iso2022-jp {code} { + binary format a3cca3 \ + "\x1b\$B" [expr {$code >> 8}] [expr {$code & 0xff}] "\x1b(B" +} +proc gen-jisx0208-cp932 {code} { + set c1 [expr {($code >> 8) | 0x80}] + set c2 [expr {($code & 0xff)| 0x80}] + if {$c1 % 2} { + set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}] + incr c2 [expr {- (0x60 + ($c2 < 0xe0))}] + } else { + set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}] + incr c2 -2 + } + binary format cc $c1 $c2 +} +proc channel-diff {fa fb} { + set diff {} + while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} { + if {[string compare $la $lb] == 0} continue + # lappend diff $la $lb + + # For more readable (easy to analyze) output. + set code [lindex $la 0] + binary scan [lindex $la 1] H* expected + binary scan [lindex $lb 1] H* got + lappend diff [list $code $expected $got] + } + set diff +} + +# Create char tables. +cd [temporaryDirectory] +foreach enc {cp932 euc-jp iso2022-jp} { + set f [open $enc.chars w] + fconfigure $f -encoding binary + foreach-jisx0208 code { + puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]] + } + close $f +} +# shiftjis == cp932 for jisx0208. +file copy -force cp932.chars shiftjis.chars + +set NUM 0 +foreach from {cp932 shiftjis euc-jp iso2022-jp} { + foreach to {cp932 shiftjis euc-jp iso2022-jp} { + test encoding-25.[incr NUM] "jisx0208 $from => $to" { + cd [temporaryDirectory] + set f [open $from.chars] + fconfigure $f -encoding $from + set out [open $from.$to.out w] + fconfigure $out -encoding $to + puts -nonewline $out [read $f] + close $out + close $f + + # then compare $to.chars <=> $from.to.out as binary. + set fa [open $to.chars] + fconfigure $fa -encoding binary + set fb [open $from.$to.out] + fconfigure $fb -encoding binary + set diff [channel-diff $fa $fb] + close $fa + close $fb + + # Difference should be empty. + set diff + } {} + } +} + +eval [list file delete] [glob -directory [temporaryDirectory] *.chars *.out] +# ===> Cut here <=== + +# EscapeFreeProc, GetTableEncoding, unilen +# are fully tested by the rest of this file + +# cleanup +::tcltest::cleanupTests +return