sl@0: # This file contains a collection of tests for tclEncoding.c sl@0: # Sourcing this file into Tcl runs the tests and generates output for sl@0: # errors. No output means no errors were found. sl@0: # sl@0: # Copyright (c) 1997 Sun Microsystems, Inc. sl@0: # Copyright (c) 1998-1999 by Scriptics Corporation. sl@0: # sl@0: # See the file "license.terms" for information on usage and redistribution sl@0: # of this file, and for a DISCLAIMER OF ALL WARRANTIES. sl@0: # sl@0: # RCS: @(#) $Id: encoding.test,v 1.16.2.3 2006/10/05 21:24:56 hobbs Exp $ sl@0: sl@0: package require tcltest 2 sl@0: namespace import -force ::tcltest::* sl@0: sl@0: proc toutf {args} { sl@0: global x sl@0: lappend x "toutf $args" sl@0: } sl@0: proc fromutf {args} { sl@0: global x sl@0: lappend x "fromutf $args" sl@0: } sl@0: sl@0: # Some tests require the testencoding command sl@0: testConstraint testencoding [llength [info commands testencoding]] sl@0: testConstraint exec [llength [info commands exec]] sl@0: sl@0: # TclInitEncodingSubsystem is tested by the rest of this file sl@0: # TclFinalizeEncodingSubsystem is not currently tested sl@0: sl@0: test encoding-1.1 {Tcl_GetEncoding: system encoding} {testencoding} { sl@0: testencoding create foo toutf fromutf sl@0: set old [encoding system] sl@0: encoding system foo sl@0: set x {} sl@0: encoding convertto abcd sl@0: encoding system $old sl@0: testencoding delete foo sl@0: set x sl@0: } {{fromutf }} sl@0: test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} { sl@0: testencoding create foo toutf fromutf sl@0: set x {} sl@0: encoding convertto foo abcd sl@0: testencoding delete foo sl@0: set x sl@0: } {{fromutf }} sl@0: test encoding-1.3 {Tcl_GetEncoding: load encoding} { sl@0: list [encoding convertto jis0208 \u4e4e] \ sl@0: [encoding convertfrom jis0208 8C] sl@0: } "8C \u4e4e" sl@0: sl@0: test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} { sl@0: encoding convertto jis0208 \u4e4e sl@0: } {8C} sl@0: test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} {testencoding} { sl@0: set system [encoding system] sl@0: set path [testencoding path] sl@0: encoding system shiftjis ;# incr ref count sl@0: testencoding path [list [pwd]] sl@0: set x [encoding convertto shiftjis \u4e4e] ;# old one found sl@0: encoding system identity sl@0: lappend x [catch {encoding convertto shiftjis \u4e4e} msg] $msg sl@0: encoding system identity sl@0: testencoding path $path sl@0: encoding system $system sl@0: set x sl@0: } "\u008c\u00c1 1 {unknown encoding \"shiftjis\"}" sl@0: sl@0: test encoding-3.1 {Tcl_GetEncodingName, NULL} { sl@0: set old [encoding system] sl@0: encoding system shiftjis sl@0: set x [encoding system] sl@0: encoding system $old sl@0: set x sl@0: } {shiftjis} sl@0: test encoding-3.2 {Tcl_GetEncodingName, non-null} { sl@0: set old [fconfigure stdout -encoding] sl@0: fconfigure stdout -encoding jis0208 sl@0: set x [fconfigure stdout -encoding] sl@0: fconfigure stdout -encoding $old sl@0: set x sl@0: } {jis0208} sl@0: sl@0: test encoding-4.1 {Tcl_GetEncodingNames} {testencoding} { sl@0: cd [makeDirectory tmp] sl@0: makeDirectory [file join tmp encoding] sl@0: makeFile {} [file join tmp encoding junk.enc] sl@0: makeFile {} [file join tmp encoding junk2.enc] sl@0: set path [testencoding path] sl@0: testencoding path {} sl@0: catch {unset encodings} sl@0: catch {unset x} sl@0: foreach encoding [encoding names] { sl@0: set encodings($encoding) 1 sl@0: } sl@0: testencoding path [list [pwd]] sl@0: foreach encoding [encoding names] { sl@0: if {![info exists encodings($encoding)]} { sl@0: lappend x $encoding sl@0: } sl@0: } sl@0: testencoding path $path sl@0: cd [workingDirectory] sl@0: removeFile [file join tmp encoding junk2.enc] sl@0: removeFile [file join tmp encoding junk.enc] sl@0: removeDirectory [file join tmp encoding] sl@0: removeDirectory tmp sl@0: lsort $x sl@0: } {junk junk2} sl@0: sl@0: test encoding-5.1 {Tcl_SetSystemEncoding} { sl@0: set old [encoding system] sl@0: encoding system jis0208 sl@0: set x [encoding convertto \u4e4e] sl@0: encoding system identity sl@0: encoding system $old sl@0: set x sl@0: } {8C} sl@0: test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} { sl@0: set old [encoding system] sl@0: encoding system $old sl@0: string compare $old [encoding system] sl@0: } {0} sl@0: sl@0: test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} { sl@0: testencoding create foo {toutf 1} {fromutf 2} sl@0: set x {} sl@0: encoding convertfrom foo abcd sl@0: encoding convertto foo abcd sl@0: testencoding delete foo sl@0: set x sl@0: } {{toutf 1} {fromutf 2}} sl@0: test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} { sl@0: testencoding create foo {toutf a} {fromutf b} sl@0: set x {} sl@0: encoding convertfrom foo abcd sl@0: encoding convertto foo abcd sl@0: testencoding delete foo sl@0: set x sl@0: } {{toutf a} {fromutf b}} sl@0: sl@0: test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} { sl@0: encoding convertfrom jis0208 8c8c8c8c sl@0: } "\u543e\u543e\u543e\u543e" sl@0: test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { sl@0: set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C sl@0: append a $a sl@0: append a $a sl@0: append a $a sl@0: append a $a sl@0: set x [encoding convertfrom jis0208 $a] sl@0: list [string length $x] [string index $x 0] sl@0: } "512 \u4e4e" sl@0: sl@0: test encoding-8.1 {Tcl_ExternalToUtf} { sl@0: set f [open [file join [temporaryDirectory] dummy] w] sl@0: fconfigure $f -translation binary -encoding iso8859-1 sl@0: puts -nonewline $f "ab\x8c\xc1g" sl@0: close $f sl@0: set f [open [file join [temporaryDirectory] dummy] r] sl@0: fconfigure $f -translation binary -encoding shiftjis sl@0: set x [read $f] sl@0: close $f sl@0: file delete [file join [temporaryDirectory] dummy] sl@0: set x sl@0: } "ab\u4e4eg" sl@0: sl@0: test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} { sl@0: encoding convertto jis0208 "\u543e\u543e\u543e\u543e" sl@0: } {8c8c8c8c} sl@0: test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { sl@0: set a \u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e sl@0: append a $a sl@0: append a $a sl@0: append a $a sl@0: append a $a sl@0: append a $a sl@0: append a $a sl@0: set x [encoding convertto jis0208 $a] sl@0: list [string length $x] [string range $x 0 1] sl@0: } "1024 8C" sl@0: sl@0: test encoding-10.1 {Tcl_UtfToExternal} { sl@0: set f [open [file join [temporaryDirectory] dummy] w] sl@0: fconfigure $f -translation binary -encoding shiftjis sl@0: puts -nonewline $f "ab\u4e4eg" sl@0: close $f sl@0: set f [open [file join [temporaryDirectory] dummy] r] sl@0: fconfigure $f -translation binary -encoding iso8859-1 sl@0: set x [read $f] sl@0: close $f sl@0: file delete [file join [temporaryDirectory] dummy] sl@0: set x sl@0: } "ab\x8c\xc1g" sl@0: sl@0: proc viewable {str} { sl@0: set res "" sl@0: foreach c [split $str {}] { sl@0: if {[string is print $c] && [string is ascii $c]} { sl@0: append res $c sl@0: } else { sl@0: append res "\\u[format %4.4x [scan $c %c]]" sl@0: } sl@0: } sl@0: return "$str ($res)" sl@0: } sl@0: sl@0: test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} { sl@0: set system [encoding system] sl@0: set path [testencoding path] sl@0: encoding system iso8859-1 sl@0: testencoding path {} sl@0: set x [list [catch {encoding convertto jis0208 \u4e4e} msg] $msg] sl@0: testencoding path $path sl@0: encoding system $system sl@0: lappend x [encoding convertto jis0208 \u4e4e] sl@0: } {1 {unknown encoding "jis0208"} 8C} sl@0: test encoding-11.2 {LoadEncodingFile: single-byte} { sl@0: encoding convertfrom jis0201 \xa1 sl@0: } "\uff61" sl@0: test encoding-11.3 {LoadEncodingFile: double-byte} { sl@0: encoding convertfrom jis0208 8C sl@0: } "\u4e4e" sl@0: test encoding-11.4 {LoadEncodingFile: multi-byte} { sl@0: encoding convertfrom shiftjis \x8c\xc1 sl@0: } "\u4e4e" sl@0: test encoding-11.5 {LoadEncodingFile: escape file} { sl@0: viewable [encoding convertto iso2022 \u4e4e] sl@0: } [viewable "\x1b\$B8C\x1b(B"] sl@0: test encoding-11.5.1 {LoadEncodingFile: escape file} { sl@0: viewable [encoding convertto iso2022-jp \u4e4e] sl@0: } [viewable "\x1b\$B8C\x1b(B"] sl@0: test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} { sl@0: set system [encoding system] sl@0: set path [testencoding path] sl@0: encoding system identity sl@0: cd [temporaryDirectory] sl@0: testencoding path tmp sl@0: makeDirectory tmp sl@0: makeDirectory [file join tmp encoding] sl@0: set f [open [file join tmp encoding splat.enc] w] sl@0: fconfigure $f -translation binary sl@0: puts $f "abcdefghijklmnop" sl@0: close $f sl@0: set x [list [catch {encoding convertto splat \u4e4e} msg] $msg] sl@0: file delete [file join [temporaryDirectory] tmp encoding splat.enc] sl@0: removeDirectory [file join tmp encoding] sl@0: removeDirectory tmp sl@0: cd [workingDirectory] sl@0: testencoding path $path sl@0: encoding system $system sl@0: set x sl@0: } {1 {invalid encoding file "splat"}} sl@0: sl@0: # OpenEncodingFile is fully tested by the rest of the tests in this file. sl@0: sl@0: test encoding-12.1 {LoadTableEncoding: normal encoding} { sl@0: set x [encoding convertto iso8859-3 \u120] sl@0: append x [encoding convertto iso8859-3 \ud5] sl@0: append x [encoding convertfrom iso8859-3 \xd5] sl@0: } "\xd5?\u120" sl@0: test encoding-12.2 {LoadTableEncoding: single-byte encoding} { sl@0: set x [encoding convertto iso8859-3 ab\u0120g] sl@0: append x [encoding convertfrom iso8859-3 ab\xd5g] sl@0: } "ab\xd5gab\u120g" sl@0: test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { sl@0: set x [encoding convertto shiftjis ab\u4e4eg] sl@0: append x [encoding convertfrom shiftjis ab\x8c\xc1g] sl@0: } "ab\x8c\xc1gab\u4e4eg" sl@0: test encoding-12.4 {LoadTableEncoding: double-byte encoding} { sl@0: set x [encoding convertto jis0208 \u4e4e\u3b1] sl@0: append x [encoding convertfrom jis0208 8C&A] sl@0: } "8C&A\u4e4e\u3b1" sl@0: test encoding-12.5 {LoadTableEncoding: symbol encoding} { sl@0: set x [encoding convertto symbol \u3b3] sl@0: append x [encoding convertto symbol \u67] sl@0: append x [encoding convertfrom symbol \x67] sl@0: } "\x67\x67\u3b3" sl@0: sl@0: test encoding-13.1 {LoadEscapeTable} { sl@0: viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]] sl@0: } [viewable "ab\x1b\$B8C\x1b\$\(DD%\x1b(Bg"] sl@0: sl@0: test encoding-14.1 {BinaryProc} { sl@0: encoding convertto identity \x12\x34\x56\xff\x69 sl@0: } "\x12\x34\x56\xc3\xbf\x69" sl@0: sl@0: test encoding-15.1 {UtfToUtfProc} { sl@0: encoding convertto utf-8 \xa3 sl@0: } "\xc2\xa3" sl@0: sl@0: test encoding-15.2 {UtfToUtfProc null character output} { sl@0: set x \u0000 sl@0: set y [encoding convertto utf-8 \u0000] sl@0: set y [encoding convertfrom identity $y] sl@0: binary scan $y H* z sl@0: list [string bytelength $x] [string bytelength $y] $z sl@0: } {2 1 00} sl@0: sl@0: test encoding-15.3 {UtfToUtfProc null character input} { sl@0: set x [encoding convertfrom identity \x00] sl@0: set y [encoding convertfrom utf-8 $x] sl@0: binary scan [encoding convertto identity $y] H* z sl@0: list [string bytelength $x] [string bytelength $y] $z sl@0: } {1 2 c080} sl@0: sl@0: test encoding-16.1 {UnicodeToUtfProc} { sl@0: set val [encoding convertfrom unicode NN] sl@0: list $val [format %x [scan $val %c]] sl@0: } "\u4e4e 4e4e" sl@0: sl@0: test encoding-17.1 {UtfToUnicodeProc} { sl@0: } {} sl@0: sl@0: test encoding-18.1 {TableToUtfProc} { sl@0: } {} sl@0: sl@0: test encoding-19.1 {TableFromUtfProc} { sl@0: } {} sl@0: sl@0: test encoding-20.1 {TableFreefProc} { sl@0: } {} sl@0: sl@0: test encoding-21.1 {EscapeToUtfProc} { sl@0: } {} sl@0: sl@0: test encoding-22.1 {EscapeFromUtfProc} { sl@0: } {} sl@0: sl@0: set ::iso2022encData "\u001b\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\u001b(B sl@0: \u001b\$B>.@Z= 0} { sl@0: if {$count} { sl@0: incr count 1 ; # account for newline sl@0: append out \n sl@0: } sl@0: append out $line sl@0: incr count $num sl@0: } sl@0: close $fid sl@0: if {[string compare $::iso2022uniData $out]} { sl@0: return -code error "iso2022-jp read in doesn't match original" sl@0: } sl@0: list $count $out sl@0: } [list [string length $::iso2022uniData] $::iso2022uniData] sl@0: test encoding-23.3 {iso2022-jp escape encoding test} { sl@0: # read $fis reads size in chars, not raw bytes. sl@0: set fid [open iso2022.txt r] sl@0: fconfigure $fid -encoding iso2022-jp sl@0: set data [read $fid 50] sl@0: close $fid sl@0: set data sl@0: } [string range $::iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50 sl@0: cd [workingDirectory] sl@0: sl@0: test encoding-24.1 {EscapeFreeProc on open channels} -constraints { sl@0: exec sl@0: } -setup { sl@0: # Bug #524674 input sl@0: set file [makeFile { sl@0: set f [open [file join [file dirname [info script]] iso2022.txt]] sl@0: fconfigure $f -encoding iso2022-jp sl@0: gets $f sl@0: } iso2022.tcl] sl@0: } -body { sl@0: exec [interpreter] $file sl@0: } -cleanup { sl@0: removeFile iso2022.tcl sl@0: } -result {} sl@0: sl@0: test encoding-24.2 {EscapeFreeProc on open channels} -constraints { sl@0: exec sl@0: } -setup { sl@0: # Bug #524674 output sl@0: set file [makeFile { sl@0: fconfigure stdout -encoding iso2022-jp sl@0: puts ab\u4e4e\u68d9g sl@0: exit sl@0: } iso2022.tcl] sl@0: } -body { sl@0: viewable [exec [interpreter] $file] sl@0: } -cleanup { sl@0: removeFile iso2022.tcl sl@0: } -result "ab\x1b\$B8C\x1b\$(DD%\x1b(Bg (ab\\u001b\$B8C\\u001b\$(DD%\\u001b(Bg)" sl@0: sl@0: test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { sl@0: # Bug #219314 - if we don't free escape encodings correctly on sl@0: # channel closure, we go boom sl@0: set file [makeFile { sl@0: encoding system iso2022-jp sl@0: set a "\u4e4e\u4e5e\u4e5f"; # 3 Japanese Kanji letters sl@0: puts $a sl@0: } iso2022.tcl] sl@0: set f [open "|[list [interpreter] $file]"] sl@0: fconfigure $f -encoding iso2022-jp sl@0: set count [gets $f line] sl@0: close $f sl@0: removeFile iso2022.tcl sl@0: list $count [viewable $line] sl@0: } [list 3 "\u4e4e\u4e5e\u4e5f (\\u4e4e\\u4e5e\\u4e5f)"] sl@0: sl@0: file delete [file join [temporaryDirectory] iso2022.txt] sl@0: sl@0: # sl@0: # Begin jajp encoding round-trip conformity tests sl@0: # sl@0: proc foreach-jisx0208 {varName command} { sl@0: upvar 1 $varName code sl@0: foreach range { sl@0: {2121 217E} sl@0: {2221 222E} sl@0: {223A 2241} sl@0: {224A 2250} sl@0: {225C 226A} sl@0: {2272 2279} sl@0: {227E 227E} sl@0: {2330 2339} sl@0: {2421 2473} sl@0: {2521 2576} sl@0: {2821 2821} sl@0: {282C 282C} sl@0: {2837 2837} sl@0: sl@0: {30 21 4E 7E} sl@0: {4F21 4F53} sl@0: sl@0: {50 21 73 7E} sl@0: {7421 7426} sl@0: } { sl@0: if {[llength $range] == 2} { sl@0: # for adhoc range. simple {first last}. inclusive. sl@0: set first [scan [lindex $range 0] %x] sl@0: set last [scan [lindex $range 1] %x] sl@0: for {set i $first} {$i <= $last} {incr i} { sl@0: set code $i sl@0: uplevel 1 $command sl@0: } sl@0: } elseif {[llength $range] == 4} { sl@0: # for uniform range. sl@0: set h0 [scan [lindex $range 0] %x] sl@0: set l0 [scan [lindex $range 1] %x] sl@0: set hend [scan [lindex $range 2] %x] sl@0: set lend [scan [lindex $range 3] %x] sl@0: for {set hi $h0} {$hi <= $hend} {incr hi} { sl@0: for {set lo $l0} {$lo <= $lend} {incr lo} { sl@0: set code [expr {$hi << 8 | ($lo & 0xff)}] sl@0: uplevel 1 $command sl@0: } sl@0: } sl@0: } else { sl@0: error "really?" sl@0: } sl@0: } sl@0: } sl@0: proc gen-jisx0208-euc-jp {code} { sl@0: binary format cc \ sl@0: [expr {($code >> 8) | 0x80}] [expr {($code & 0xff) | 0x80}] sl@0: } sl@0: proc gen-jisx0208-iso2022-jp {code} { sl@0: binary format a3cca3 \ sl@0: "\x1b\$B" [expr {$code >> 8}] [expr {$code & 0xff}] "\x1b(B" sl@0: } sl@0: proc gen-jisx0208-cp932 {code} { sl@0: set c1 [expr {($code >> 8) | 0x80}] sl@0: set c2 [expr {($code & 0xff)| 0x80}] sl@0: if {$c1 % 2} { sl@0: set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}] sl@0: incr c2 [expr {- (0x60 + ($c2 < 0xe0))}] sl@0: } else { sl@0: set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}] sl@0: incr c2 -2 sl@0: } sl@0: binary format cc $c1 $c2 sl@0: } sl@0: proc channel-diff {fa fb} { sl@0: set diff {} sl@0: while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} { sl@0: if {[string compare $la $lb] == 0} continue sl@0: # lappend diff $la $lb sl@0: sl@0: # For more readable (easy to analyze) output. sl@0: set code [lindex $la 0] sl@0: binary scan [lindex $la 1] H* expected sl@0: binary scan [lindex $lb 1] H* got sl@0: lappend diff [list $code $expected $got] sl@0: } sl@0: set diff sl@0: } sl@0: sl@0: # Create char tables. sl@0: cd [temporaryDirectory] sl@0: foreach enc {cp932 euc-jp iso2022-jp} { sl@0: set f [open $enc.chars w] sl@0: fconfigure $f -encoding binary sl@0: foreach-jisx0208 code { sl@0: puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]] sl@0: } sl@0: close $f sl@0: } sl@0: # shiftjis == cp932 for jisx0208. sl@0: file copy -force cp932.chars shiftjis.chars sl@0: sl@0: set NUM 0 sl@0: foreach from {cp932 shiftjis euc-jp iso2022-jp} { sl@0: foreach to {cp932 shiftjis euc-jp iso2022-jp} { sl@0: test encoding-25.[incr NUM] "jisx0208 $from => $to" { sl@0: cd [temporaryDirectory] sl@0: set f [open $from.chars] sl@0: fconfigure $f -encoding $from sl@0: set out [open $from.$to.out w] sl@0: fconfigure $out -encoding $to sl@0: puts -nonewline $out [read $f] sl@0: close $out sl@0: close $f sl@0: sl@0: # then compare $to.chars <=> $from.to.out as binary. sl@0: set fa [open $to.chars] sl@0: fconfigure $fa -encoding binary sl@0: set fb [open $from.$to.out] sl@0: fconfigure $fb -encoding binary sl@0: set diff [channel-diff $fa $fb] sl@0: close $fa sl@0: close $fb sl@0: sl@0: # Difference should be empty. sl@0: set diff sl@0: } {} sl@0: } sl@0: } sl@0: sl@0: eval [list file delete] [glob -directory [temporaryDirectory] *.chars *.out] sl@0: # ===> Cut here <=== sl@0: sl@0: # EscapeFreeProc, GetTableEncoding, unilen sl@0: # are fully tested by the rest of this file sl@0: sl@0: # cleanup sl@0: ::tcltest::cleanupTests sl@0: return