sl@0: # This file contains a collection of tests for tclUtf.c sl@0: # Sourcing this file into Tcl runs the tests and generates output for sl@0: # errors. No output means no errors were found. sl@0: # sl@0: # Copyright (c) 1997 Sun Microsystems, Inc. sl@0: # Copyright (c) 1998-1999 by Scriptics Corporation. sl@0: # sl@0: # See the file "license.terms" for information on usage and redistribution sl@0: # of this file, and for a DISCLAIMER OF ALL WARRANTIES. sl@0: # sl@0: # RCS: @(#) $Id: utf.test,v 1.8.14.5 2005/09/07 14:35:56 dgp Exp $ sl@0: sl@0: if {[lsearch [namespace children] ::tcltest] == -1} { sl@0: package require tcltest 2 sl@0: namespace import -force ::tcltest::* sl@0: } sl@0: sl@0: catch {unset x} sl@0: sl@0: test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} { sl@0: set x \x01 sl@0: } [bytestring "\x01"] sl@0: test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} { sl@0: set x "\x00" sl@0: } [bytestring "\xc0\x80"] sl@0: test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { sl@0: set x "\xe0" sl@0: } [bytestring "\xc3\xa0"] sl@0: test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { sl@0: set x "\u4e4e" sl@0: } [bytestring "\xe4\xb9\x8e"] sl@0: test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} { sl@0: string length [format %c -1] sl@0: } 1 sl@0: sl@0: test utf-2.1 {Tcl_UtfToUniChar: low ascii} { sl@0: string length "abc" sl@0: } {3} sl@0: test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} { sl@0: string length [bytestring "\x82\x83\x84"] sl@0: } {3} sl@0: test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} { sl@0: string length [bytestring "\xC2"] sl@0: } {1} sl@0: test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} { sl@0: string length [bytestring "\xC2\xa2"] sl@0: } {1} sl@0: test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} { sl@0: string length [bytestring "\xE2"] sl@0: } {1} sl@0: test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} { sl@0: string length [bytestring "\xE2\xA2"] sl@0: } {2} sl@0: test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { sl@0: string length [bytestring "\xE4\xb9\x8e"] sl@0: } {1} sl@0: test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} { sl@0: string length [bytestring "\xF4\xA2\xA2\xA2"] sl@0: } {4} sl@0: sl@0: test utf-3.1 {Tcl_UtfCharComplete} { sl@0: } {} sl@0: sl@0: testConstraint testnumutfchars [llength [info commands testnumutfchars]] sl@0: test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { sl@0: testnumutfchars "" sl@0: } {0} sl@0: test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars { sl@0: testnumutfchars [bytestring "\xC2\xA2"] sl@0: } {1} sl@0: test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars { sl@0: testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] sl@0: } {7} sl@0: test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars { sl@0: testnumutfchars [bytestring "\xC0\x80"] sl@0: } {1} sl@0: test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { sl@0: testnumutfchars "" 1 sl@0: } {0} sl@0: test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars { sl@0: testnumutfchars [bytestring "\xC2\xA2"] 1 sl@0: } {1} sl@0: test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars { sl@0: testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1 sl@0: } {7} sl@0: test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars { sl@0: testnumutfchars [bytestring "\xC0\x80"] 1 sl@0: } {1} sl@0: sl@0: test utf-5.1 {Tcl_UtfFindFirsts} { sl@0: } {} sl@0: sl@0: test utf-6.1 {Tcl_UtfNext} { sl@0: } {} sl@0: sl@0: test utf-7.1 {Tcl_UtfPrev} { sl@0: } {} sl@0: sl@0: test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { sl@0: string index abcd 0 sl@0: } {a} sl@0: test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { sl@0: string index \u4e4e\u25a 0 sl@0: } "\u4e4e" sl@0: test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { sl@0: string index abcd 2 sl@0: } {c} sl@0: test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { sl@0: string index \u4e4e\u25a\xff\u543 2 sl@0: } "\uff" sl@0: sl@0: test utf-9.1 {Tcl_UtfAtIndex: index = 0} { sl@0: string range abcd 0 2 sl@0: } {abc} sl@0: test utf-9.2 {Tcl_UtfAtIndex: index > 0} { sl@0: string range \u4e4e\u25a\xff\u543klmnop 1 5 sl@0: } "\u25a\xff\u543kl" sl@0: sl@0: sl@0: test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { sl@0: set x \n sl@0: } { sl@0: } sl@0: test utf-10.2 {Tcl_UtfBackslash: \u subst} { sl@0: set x \ua2 sl@0: } [bytestring "\xc2\xa2"] sl@0: test utf-10.3 {Tcl_UtfBackslash: longer \u subst} { sl@0: set x \u4e21 sl@0: } [bytestring "\xe4\xb8\xa1"] sl@0: test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} { sl@0: set x \u4e2k sl@0: } "[bytestring \xd3\xa2]k" sl@0: test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} { sl@0: set x \u4e216 sl@0: } "[bytestring \xe4\xb8\xa1]6" sl@0: proc bsCheck {char num} { sl@0: global errNum sl@0: test utf-10.$errNum {backslash substitution} { sl@0: scan $char %c value sl@0: set value sl@0: } $num sl@0: incr errNum sl@0: } sl@0: set errNum 6 sl@0: bsCheck \b 8 sl@0: bsCheck \e 101 sl@0: bsCheck \f 12 sl@0: bsCheck \n 10 sl@0: bsCheck \r 13 sl@0: bsCheck \t 9 sl@0: bsCheck \v 11 sl@0: bsCheck \{ 123 sl@0: bsCheck \} 125 sl@0: bsCheck \[ 91 sl@0: bsCheck \] 93 sl@0: bsCheck \$ 36 sl@0: bsCheck \ 32 sl@0: bsCheck \; 59 sl@0: bsCheck \\ 92 sl@0: bsCheck \Ca 67 sl@0: bsCheck \Ma 77 sl@0: bsCheck \CMa 67 sl@0: # prior to 8.3, this returned 8, as \8 as accepted as an sl@0: # octal value - but it isn't! [Bug: 3975] sl@0: bsCheck \8a 56 sl@0: bsCheck \14 12 sl@0: bsCheck \141 97 sl@0: bsCheck b\0 98 sl@0: bsCheck \x 120 sl@0: bsCheck \xa 10 sl@0: bsCheck \xA 10 sl@0: bsCheck \x41 65 sl@0: bsCheck \x541 65 sl@0: bsCheck \u 117 sl@0: bsCheck \uk 117 sl@0: bsCheck \u41 65 sl@0: bsCheck \ua 10 sl@0: bsCheck \uA 10 sl@0: bsCheck \340 224 sl@0: bsCheck \ua1 161 sl@0: bsCheck \u4e21 20001 sl@0: sl@0: test utf-11.1 {Tcl_UtfToUpper} { sl@0: string toupper {} sl@0: } {} sl@0: test utf-11.2 {Tcl_UtfToUpper} { sl@0: string toupper abc sl@0: } ABC sl@0: test utf-11.3 {Tcl_UtfToUpper} { sl@0: string toupper \u00e3ab sl@0: } \u00c3AB sl@0: test utf-11.4 {Tcl_UtfToUpper} { sl@0: string toupper \u01e3ab sl@0: } \u01e2AB sl@0: sl@0: test utf-12.1 {Tcl_UtfToLower} { sl@0: string tolower {} sl@0: } {} sl@0: test utf-12.2 {Tcl_UtfToLower} { sl@0: string tolower ABC sl@0: } abc sl@0: test utf-12.3 {Tcl_UtfToLower} { sl@0: string tolower \u00c3AB sl@0: } \u00e3ab sl@0: test utf-12.4 {Tcl_UtfToLower} { sl@0: string tolower \u01e2AB sl@0: } \u01e3ab sl@0: sl@0: test utf-13.1 {Tcl_UtfToTitle} { sl@0: string totitle {} sl@0: } {} sl@0: test utf-13.2 {Tcl_UtfToTitle} { sl@0: string totitle abc sl@0: } Abc sl@0: test utf-13.3 {Tcl_UtfToTitle} { sl@0: string totitle \u00e3ab sl@0: } \u00c3ab sl@0: test utf-13.4 {Tcl_UtfToTitle} { sl@0: string totitle \u01f3ab sl@0: } \u01f2ab sl@0: sl@0: test utf-14.1 {Tcl_UtfNcasecmp} { sl@0: string compare -nocase a b sl@0: } -1 sl@0: test utf-14.2 {Tcl_UtfNcasecmp} { sl@0: string compare -nocase b a sl@0: } 1 sl@0: test utf-14.3 {Tcl_UtfNcasecmp} { sl@0: string compare -nocase B a sl@0: } 1 sl@0: test utf-14.4 {Tcl_UtfNcasecmp} { sl@0: string compare -nocase aBcB abca sl@0: } 1 sl@0: sl@0: test utf-15.1 {Tcl_UniCharToUpper, negative delta} { sl@0: string toupper aA sl@0: } AA sl@0: test utf-15.2 {Tcl_UniCharToUpper, positive delta} { sl@0: string toupper \u0178\u00ff sl@0: } \u0178\u0178 sl@0: test utf-15.3 {Tcl_UniCharToUpper, no delta} { sl@0: string toupper ! sl@0: } ! sl@0: sl@0: test utf-16.1 {Tcl_UniCharToLower, negative delta} { sl@0: string tolower aA sl@0: } aa sl@0: test utf-16.2 {Tcl_UniCharToLower, positive delta} { sl@0: string tolower \u0178\u00ff sl@0: } \u00ff\u00ff sl@0: test utf-17.1 {Tcl_UniCharToLower, no delta} { sl@0: string tolower ! sl@0: } ! sl@0: sl@0: test utf-18.1 {Tcl_UniCharToTitle, add one for title} { sl@0: string totitle \u01c4 sl@0: } \u01c5 sl@0: test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} { sl@0: string totitle \u01c6 sl@0: } \u01c5 sl@0: test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { sl@0: string totitle \u017f sl@0: } \u0053 sl@0: test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { sl@0: string totitle \u00ff sl@0: } \u0178 sl@0: test utf-18.5 {Tcl_UniCharToTitle, no delta} { sl@0: string totitle ! sl@0: } ! sl@0: sl@0: test utf-19.1 {TclUniCharLen} { sl@0: list [regexp \\d abc456def foo] $foo sl@0: } {1 4} sl@0: sl@0: test utf-20.1 {TclUniCharNcmp} { sl@0: } {} sl@0: sl@0: test utf-21.1 {TclUniCharIsAlnum} { sl@0: # this returns 1 with Unicode 3 compliance sl@0: string is alnum \u1040\u021f sl@0: } {1} sl@0: test utf-21.2 {unicode alnum char in regc_locale.c} { sl@0: # this returns 1 with Unicode 3 compliance sl@0: list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f] sl@0: } {1 1} sl@0: sl@0: test utf-22.1 {TclUniCharIsWordChar} { sl@0: string wordend "xyz123_bar fg" 0 sl@0: } 10 sl@0: test utf-22.2 {TclUniCharIsWordChar} { sl@0: string wordend "x\u5080z123_bar\u203c fg" 0 sl@0: } 10 sl@0: sl@0: test utf-23.1 {TclUniCharIsAlpha} { sl@0: # this returns 1 with Unicode 3 compliance sl@0: string is alpha \u021f sl@0: } {1} sl@0: test utf-23.2 {unicode alpha char in regc_locale.c} { sl@0: # this returns 1 with Unicode 3 compliance sl@0: regexp {^[[:alpha:]]+$} \u021f sl@0: } {1} sl@0: sl@0: test utf-24.1 {TclUniCharIsDigit} { sl@0: # this returns 1 with Unicode 3 compliance sl@0: string is digit \u1040 sl@0: } {1} sl@0: test utf-24.2 {unicode digit char in regc_locale.c} { sl@0: # this returns 1 with Unicode 3 compliance sl@0: list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040] sl@0: } {1 1} sl@0: sl@0: test utf-24.3 {TclUniCharIsSpace} { sl@0: # this returns 1 with Unicode 3 compliance sl@0: string is space \u1680 sl@0: } {1} sl@0: test utf-24.4 {unicode space char in regc_locale.c} { sl@0: # this returns 1 with Unicode 3 compliance sl@0: list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680] sl@0: } {1 1} sl@0: sl@0: testConstraint teststringobj [llength [info commands teststringobj]] sl@0: test utf-25.1 {Tcl_UniCharNcasecmp} teststringobj { sl@0: testobj freeallvars sl@0: teststringobj set 1 a sl@0: teststringobj set 2 b sl@0: teststringobj getunicode 1 sl@0: teststringobj getunicode 2 sl@0: string compare -nocase [teststringobj get 1] [teststringobj get 2] sl@0: } -1 sl@0: test utf-25.2 {Tcl_UniCharNcasecmp} teststringobj { sl@0: testobj freeallvars sl@0: teststringobj set 1 b sl@0: teststringobj set 2 a sl@0: teststringobj getunicode 1 sl@0: teststringobj getunicode 2 sl@0: string compare -nocase [teststringobj get 1] [teststringobj get 2] sl@0: } 1 sl@0: test utf-25.3 {Tcl_UniCharNcasecmp} teststringobj { sl@0: testobj freeallvars sl@0: teststringobj set 1 B sl@0: teststringobj set 2 a sl@0: teststringobj getunicode 1 sl@0: teststringobj getunicode 2 sl@0: string compare -nocase [teststringobj get 1] [teststringobj get 2] sl@0: } 1 sl@0: test utf-25.4 {Tcl_UniCharNcasecmp} teststringobj { sl@0: testobj freeallvars sl@0: teststringobj set 1 aBcB sl@0: teststringobj set 2 abca sl@0: teststringobj getunicode 1 sl@0: teststringobj getunicode 2 sl@0: string compare -nocase [teststringobj get 1] [teststringobj get 2] sl@0: } 1 sl@0: sl@0: # cleanup sl@0: ::tcltest::cleanupTests sl@0: return