os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/encoding.test
Update contrib.
1 # This file contains a collection of tests for tclEncoding.c
2 # Sourcing this file into Tcl runs the tests and generates output for
3 # errors. No output means no errors were found.
5 # Copyright (c) 1997 Sun Microsystems, Inc.
6 # Copyright (c) 1998-1999 by Scriptics Corporation.
8 # See the file "license.terms" for information on usage and redistribution
9 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11 # RCS: @(#) $Id: encoding.test,v 1.16.2.3 2006/10/05 21:24:56 hobbs Exp $
13 package require tcltest 2
14 namespace import -force ::tcltest::*
18 lappend x "toutf $args"
22 lappend x "fromutf $args"
25 # Some tests require the testencoding command
26 testConstraint testencoding [llength [info commands testencoding]]
27 testConstraint exec [llength [info commands exec]]
29 # TclInitEncodingSubsystem is tested by the rest of this file
30 # TclFinalizeEncodingSubsystem is not currently tested
32 test encoding-1.1 {Tcl_GetEncoding: system encoding} {testencoding} {
33 testencoding create foo toutf fromutf
34 set old [encoding system]
37 encoding convertto abcd
39 testencoding delete foo
42 test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} {
43 testencoding create foo toutf fromutf
45 encoding convertto foo abcd
46 testencoding delete foo
49 test encoding-1.3 {Tcl_GetEncoding: load encoding} {
50 list [encoding convertto jis0208 \u4e4e] \
51 [encoding convertfrom jis0208 8C]
54 test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} {
55 encoding convertto jis0208 \u4e4e
57 test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} {testencoding} {
58 set system [encoding system]
59 set path [testencoding path]
60 encoding system shiftjis ;# incr ref count
61 testencoding path [list [pwd]]
62 set x [encoding convertto shiftjis \u4e4e] ;# old one found
63 encoding system identity
64 lappend x [catch {encoding convertto shiftjis \u4e4e} msg] $msg
65 encoding system identity
66 testencoding path $path
67 encoding system $system
69 } "\u008c\u00c1 1 {unknown encoding \"shiftjis\"}"
71 test encoding-3.1 {Tcl_GetEncodingName, NULL} {
72 set old [encoding system]
73 encoding system shiftjis
74 set x [encoding system]
78 test encoding-3.2 {Tcl_GetEncodingName, non-null} {
79 set old [fconfigure stdout -encoding]
80 fconfigure stdout -encoding jis0208
81 set x [fconfigure stdout -encoding]
82 fconfigure stdout -encoding $old
86 test encoding-4.1 {Tcl_GetEncodingNames} {testencoding} {
87 cd [makeDirectory tmp]
88 makeDirectory [file join tmp encoding]
89 makeFile {} [file join tmp encoding junk.enc]
90 makeFile {} [file join tmp encoding junk2.enc]
91 set path [testencoding path]
93 catch {unset encodings}
95 foreach encoding [encoding names] {
96 set encodings($encoding) 1
98 testencoding path [list [pwd]]
99 foreach encoding [encoding names] {
100 if {![info exists encodings($encoding)]} {
104 testencoding path $path
105 cd [workingDirectory]
106 removeFile [file join tmp encoding junk2.enc]
107 removeFile [file join tmp encoding junk.enc]
108 removeDirectory [file join tmp encoding]
113 test encoding-5.1 {Tcl_SetSystemEncoding} {
114 set old [encoding system]
115 encoding system jis0208
116 set x [encoding convertto \u4e4e]
117 encoding system identity
121 test encoding-5.2 {Tcl_SetSystemEncoding: test ref count} {
122 set old [encoding system]
124 string compare $old [encoding system]
127 test encoding-6.1 {Tcl_CreateEncoding: new} {testencoding} {
128 testencoding create foo {toutf 1} {fromutf 2}
130 encoding convertfrom foo abcd
131 encoding convertto foo abcd
132 testencoding delete foo
134 } {{toutf 1} {fromutf 2}}
135 test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} {
136 testencoding create foo {toutf a} {fromutf b}
138 encoding convertfrom foo abcd
139 encoding convertto foo abcd
140 testencoding delete foo
142 } {{toutf a} {fromutf b}}
144 test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} {
145 encoding convertfrom jis0208 8c8c8c8c
146 } "\u543e\u543e\u543e\u543e"
147 test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} {
148 set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C
153 set x [encoding convertfrom jis0208 $a]
154 list [string length $x] [string index $x 0]
157 test encoding-8.1 {Tcl_ExternalToUtf} {
158 set f [open [file join [temporaryDirectory] dummy] w]
159 fconfigure $f -translation binary -encoding iso8859-1
160 puts -nonewline $f "ab\x8c\xc1g"
162 set f [open [file join [temporaryDirectory] dummy] r]
163 fconfigure $f -translation binary -encoding shiftjis
166 file delete [file join [temporaryDirectory] dummy]
170 test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} {
171 encoding convertto jis0208 "\u543e\u543e\u543e\u543e"
173 test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} {
174 set a \u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e
181 set x [encoding convertto jis0208 $a]
182 list [string length $x] [string range $x 0 1]
185 test encoding-10.1 {Tcl_UtfToExternal} {
186 set f [open [file join [temporaryDirectory] dummy] w]
187 fconfigure $f -translation binary -encoding shiftjis
188 puts -nonewline $f "ab\u4e4eg"
190 set f [open [file join [temporaryDirectory] dummy] r]
191 fconfigure $f -translation binary -encoding iso8859-1
194 file delete [file join [temporaryDirectory] dummy]
198 proc viewable {str} {
200 foreach c [split $str {}] {
201 if {[string is print $c] && [string is ascii $c]} {
204 append res "\\u[format %4.4x [scan $c %c]]"
210 test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} {
211 set system [encoding system]
212 set path [testencoding path]
213 encoding system iso8859-1
215 set x [list [catch {encoding convertto jis0208 \u4e4e} msg] $msg]
216 testencoding path $path
217 encoding system $system
218 lappend x [encoding convertto jis0208 \u4e4e]
219 } {1 {unknown encoding "jis0208"} 8C}
220 test encoding-11.2 {LoadEncodingFile: single-byte} {
221 encoding convertfrom jis0201 \xa1
223 test encoding-11.3 {LoadEncodingFile: double-byte} {
224 encoding convertfrom jis0208 8C
226 test encoding-11.4 {LoadEncodingFile: multi-byte} {
227 encoding convertfrom shiftjis \x8c\xc1
229 test encoding-11.5 {LoadEncodingFile: escape file} {
230 viewable [encoding convertto iso2022 \u4e4e]
231 } [viewable "\x1b\$B8C\x1b(B"]
232 test encoding-11.5.1 {LoadEncodingFile: escape file} {
233 viewable [encoding convertto iso2022-jp \u4e4e]
234 } [viewable "\x1b\$B8C\x1b(B"]
235 test encoding-11.6 {LoadEncodingFile: invalid file} {testencoding} {
236 set system [encoding system]
237 set path [testencoding path]
238 encoding system identity
239 cd [temporaryDirectory]
240 testencoding path tmp
242 makeDirectory [file join tmp encoding]
243 set f [open [file join tmp encoding splat.enc] w]
244 fconfigure $f -translation binary
245 puts $f "abcdefghijklmnop"
247 set x [list [catch {encoding convertto splat \u4e4e} msg] $msg]
248 file delete [file join [temporaryDirectory] tmp encoding splat.enc]
249 removeDirectory [file join tmp encoding]
251 cd [workingDirectory]
252 testencoding path $path
253 encoding system $system
255 } {1 {invalid encoding file "splat"}}
257 # OpenEncodingFile is fully tested by the rest of the tests in this file.
259 test encoding-12.1 {LoadTableEncoding: normal encoding} {
260 set x [encoding convertto iso8859-3 \u120]
261 append x [encoding convertto iso8859-3 \ud5]
262 append x [encoding convertfrom iso8859-3 \xd5]
264 test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
265 set x [encoding convertto iso8859-3 ab\u0120g]
266 append x [encoding convertfrom iso8859-3 ab\xd5g]
268 test encoding-12.3 {LoadTableEncoding: multi-byte encoding} {
269 set x [encoding convertto shiftjis ab\u4e4eg]
270 append x [encoding convertfrom shiftjis ab\x8c\xc1g]
271 } "ab\x8c\xc1gab\u4e4eg"
272 test encoding-12.4 {LoadTableEncoding: double-byte encoding} {
273 set x [encoding convertto jis0208 \u4e4e\u3b1]
274 append x [encoding convertfrom jis0208 8C&A]
276 test encoding-12.5 {LoadTableEncoding: symbol encoding} {
277 set x [encoding convertto symbol \u3b3]
278 append x [encoding convertto symbol \u67]
279 append x [encoding convertfrom symbol \x67]
282 test encoding-13.1 {LoadEscapeTable} {
283 viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]]
284 } [viewable "ab\x1b\$B8C\x1b\$\(DD%\x1b(Bg"]
286 test encoding-14.1 {BinaryProc} {
287 encoding convertto identity \x12\x34\x56\xff\x69
288 } "\x12\x34\x56\xc3\xbf\x69"
290 test encoding-15.1 {UtfToUtfProc} {
291 encoding convertto utf-8 \xa3
294 test encoding-15.2 {UtfToUtfProc null character output} {
296 set y [encoding convertto utf-8 \u0000]
297 set y [encoding convertfrom identity $y]
299 list [string bytelength $x] [string bytelength $y] $z
302 test encoding-15.3 {UtfToUtfProc null character input} {
303 set x [encoding convertfrom identity \x00]
304 set y [encoding convertfrom utf-8 $x]
305 binary scan [encoding convertto identity $y] H* z
306 list [string bytelength $x] [string bytelength $y] $z
309 test encoding-16.1 {UnicodeToUtfProc} {
310 set val [encoding convertfrom unicode NN]
311 list $val [format %x [scan $val %c]]
314 test encoding-17.1 {UtfToUnicodeProc} {
317 test encoding-18.1 {TableToUtfProc} {
320 test encoding-19.1 {TableFromUtfProc} {
323 test encoding-20.1 {TableFreefProc} {
326 test encoding-21.1 {EscapeToUtfProc} {
329 test encoding-22.1 {EscapeFromUtfProc} {
332 set ::iso2022encData "\u001b\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\u001b(B
333 \u001b\$B>.@Z<jAwIU@h\$H\$7\$F;HMQ\$7\$F\$*\$j\$^\$9!#62\$lF~\$j\$^\$9\$,!\"@5\$7\$\$=;=j\$r\$4EPO?\$7\$J\$*\u001b(B
334 \u001b\$B\$*4j\$\$\$\$\$?\$7\$^\$9!#\$^\$?!\"BgJQ62=L\$G\$9\$,!\"=;=jJQ99\$N\$\"\$H!\"F|K\\8l%5!<%S%9It!J\u001b(B
335 casino_japanese@___.com \u001b\$B!K\$^\$G\$4=;=jJQ99:Q\$NO\"Mm\$r\$\$\$?\$@\$1\$J\$\$\$G\u001b(B
336 \u001b\$B\$7\$g\$&\$+!)\u001b(B"
338 set ::iso2022uniData [encoding convertfrom iso2022-jp $::iso2022encData]
339 set ::iso2022uniData2 "\u79c1\u3069\u3082\u3067\u306f\u3001\u30c1\u30c3\u30d7\u3054\u8cfc\u5165\u6642\u306b\u3054\u767b\u9332\u3044\u305f\u3060\u3044\u305f\u3054\u4f4f\u6240\u3092\u30ad\u30e3\u30c3\u30b7\u30e5\u30a2\u30a6\u30c8\u306e\u969b\u306e
340 \u5c0f\u5207\u624b\u9001\u4ed8\u5148\u3068\u3057\u3066\u4f7f\u7528\u3057\u3066\u304a\u308a\u307e\u3059\u3002\u6050\u308c\u5165\u308a\u307e\u3059\u304c\u3001\u6b63\u3057\u3044\u4f4f\u6240\u3092\u3054\u767b\u9332\u3057\u306a\u304a
341 \u304a\u9858\u3044\u3044\u305f\u3057\u307e\u3059\u3002\u307e\u305f\u3001\u5927\u5909\u6050\u7e2e\u3067\u3059\u304c\u3001\u4f4f\u6240\u5909\u66f4\u306e\u3042\u3068\u3001\u65e5\u672c\u8a9e\u30b5\u30fc\u30d3\u30b9\u90e8\uff08
342 \u0063\u0061\u0073\u0069\u006e\u006f\u005f\u006a\u0061\u0070\u0061\u006e\u0065\u0073\u0065\u0040\u005f\u005f\u005f\u002e\u0063\u006f\u006d\u0020\uff09\u307e\u3067\u3054\u4f4f\u6240\u5909\u66f4\u6e08\u306e\u9023\u7d61\u3092\u3044\u305f\u3060\u3051\u306a\u3044\u3067
343 \u3057\u3087\u3046\u304b\uff1f"
345 cd [temporaryDirectory]
346 set fid [open iso2022.txt w]
347 fconfigure $fid -encoding binary
348 puts -nonewline $fid $::iso2022encData
351 test encoding-23.1 {iso2022-jp escape encoding test} {
352 string equal $::iso2022uniData $::iso2022uniData2
354 test encoding-23.2 {iso2022-jp escape encoding test} {
355 # This checks that 'gets' isn't resetting the encoding inappropriately.
357 set fid [open iso2022.txt r]
358 fconfigure $fid -encoding iso2022-jp
361 while {[set num [gets $fid line]] >= 0} {
363 incr count 1 ; # account for newline
370 if {[string compare $::iso2022uniData $out]} {
371 return -code error "iso2022-jp read in doesn't match original"
374 } [list [string length $::iso2022uniData] $::iso2022uniData]
375 test encoding-23.3 {iso2022-jp escape encoding test} {
376 # read $fis <size> reads size in chars, not raw bytes.
377 set fid [open iso2022.txt r]
378 fconfigure $fid -encoding iso2022-jp
379 set data [read $fid 50]
382 } [string range $::iso2022uniData 0 49] ; # 0 .. 49 inclusive == 50
383 cd [workingDirectory]
385 test encoding-24.1 {EscapeFreeProc on open channels} -constraints {
390 set f [open [file join [file dirname [info script]] iso2022.txt]]
391 fconfigure $f -encoding iso2022-jp
395 exec [interpreter] $file
397 removeFile iso2022.tcl
400 test encoding-24.2 {EscapeFreeProc on open channels} -constraints {
405 fconfigure stdout -encoding iso2022-jp
410 viewable [exec [interpreter] $file]
412 removeFile iso2022.tcl
413 } -result "ab\x1b\$B8C\x1b\$(DD%\x1b(Bg (ab\\u001b\$B8C\\u001b\$(DD%\\u001b(Bg)"
415 test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
416 # Bug #219314 - if we don't free escape encodings correctly on
417 # channel closure, we go boom
419 encoding system iso2022-jp
420 set a "\u4e4e\u4e5e\u4e5f"; # 3 Japanese Kanji letters
423 set f [open "|[list [interpreter] $file]"]
424 fconfigure $f -encoding iso2022-jp
425 set count [gets $f line]
427 removeFile iso2022.tcl
428 list $count [viewable $line]
429 } [list 3 "\u4e4e\u4e5e\u4e5f (\\u4e4e\\u4e5e\\u4e5f)"]
431 file delete [file join [temporaryDirectory] iso2022.txt]
434 # Begin jajp encoding round-trip conformity tests
436 proc foreach-jisx0208 {varName command} {
437 upvar 1 $varName code
459 if {[llength $range] == 2} {
460 # for adhoc range. simple {first last}. inclusive.
461 set first [scan [lindex $range 0] %x]
462 set last [scan [lindex $range 1] %x]
463 for {set i $first} {$i <= $last} {incr i} {
467 } elseif {[llength $range] == 4} {
469 set h0 [scan [lindex $range 0] %x]
470 set l0 [scan [lindex $range 1] %x]
471 set hend [scan [lindex $range 2] %x]
472 set lend [scan [lindex $range 3] %x]
473 for {set hi $h0} {$hi <= $hend} {incr hi} {
474 for {set lo $l0} {$lo <= $lend} {incr lo} {
475 set code [expr {$hi << 8 | ($lo & 0xff)}]
484 proc gen-jisx0208-euc-jp {code} {
486 [expr {($code >> 8) | 0x80}] [expr {($code & 0xff) | 0x80}]
488 proc gen-jisx0208-iso2022-jp {code} {
489 binary format a3cca3 \
490 "\x1b\$B" [expr {$code >> 8}] [expr {$code & 0xff}] "\x1b(B"
492 proc gen-jisx0208-cp932 {code} {
493 set c1 [expr {($code >> 8) | 0x80}]
494 set c2 [expr {($code & 0xff)| 0x80}]
496 set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}]
497 incr c2 [expr {- (0x60 + ($c2 < 0xe0))}]
499 set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}]
502 binary format cc $c1 $c2
504 proc channel-diff {fa fb} {
506 while {[gets $fa la] >= 0 && [gets $fb lb] >= 0} {
507 if {[string compare $la $lb] == 0} continue
508 # lappend diff $la $lb
510 # For more readable (easy to analyze) output.
511 set code [lindex $la 0]
512 binary scan [lindex $la 1] H* expected
513 binary scan [lindex $lb 1] H* got
514 lappend diff [list $code $expected $got]
519 # Create char tables.
520 cd [temporaryDirectory]
521 foreach enc {cp932 euc-jp iso2022-jp} {
522 set f [open $enc.chars w]
523 fconfigure $f -encoding binary
524 foreach-jisx0208 code {
525 puts $f [format "%04X %s" $code [gen-jisx0208-$enc $code]]
529 # shiftjis == cp932 for jisx0208.
530 file copy -force cp932.chars shiftjis.chars
533 foreach from {cp932 shiftjis euc-jp iso2022-jp} {
534 foreach to {cp932 shiftjis euc-jp iso2022-jp} {
535 test encoding-25.[incr NUM] "jisx0208 $from => $to" {
536 cd [temporaryDirectory]
537 set f [open $from.chars]
538 fconfigure $f -encoding $from
539 set out [open $from.$to.out w]
540 fconfigure $out -encoding $to
541 puts -nonewline $out [read $f]
545 # then compare $to.chars <=> $from.to.out as binary.
546 set fa [open $to.chars]
547 fconfigure $fa -encoding binary
548 set fb [open $from.$to.out]
549 fconfigure $fb -encoding binary
550 set diff [channel-diff $fa $fb]
554 # Difference should be empty.
560 eval [list file delete] [glob -directory [temporaryDirectory] *.chars *.out]
563 # EscapeFreeProc, GetTableEncoding, unilen
564 # are fully tested by the rest of this file
567 ::tcltest::cleanupTests