os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/utf.test
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/utf.test	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,357 @@
     1.4 +# This file contains a collection of tests for tclUtf.c
     1.5 +# Sourcing this file into Tcl runs the tests and generates output for
     1.6 +# errors.  No output means no errors were found.
     1.7 +#
     1.8 +# Copyright (c) 1997 Sun Microsystems, Inc.
     1.9 +# Copyright (c) 1998-1999 by Scriptics Corporation.
    1.10 +#
    1.11 +# See the file "license.terms" for information on usage and redistribution
    1.12 +# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
    1.13 +#
    1.14 +# RCS: @(#) $Id: utf.test,v 1.8.14.5 2005/09/07 14:35:56 dgp Exp $
    1.15 +
    1.16 +if {[lsearch [namespace children] ::tcltest] == -1} {
    1.17 +    package require tcltest 2
    1.18 +    namespace import -force ::tcltest::*
    1.19 +}
    1.20 +
    1.21 +catch {unset x}
    1.22 +
    1.23 +test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
    1.24 +    set x \x01
    1.25 +} [bytestring "\x01"]
    1.26 +test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
    1.27 +    set x "\x00"
    1.28 +} [bytestring "\xc0\x80"]
    1.29 +test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
    1.30 +    set x "\xe0"
    1.31 +} [bytestring "\xc3\xa0"]
    1.32 +test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
    1.33 +    set x "\u4e4e"
    1.34 +} [bytestring "\xe4\xb9\x8e"]
    1.35 +test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
    1.36 +    string length [format %c -1]
    1.37 +} 1
    1.38 +
    1.39 +test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
    1.40 +    string length "abc"
    1.41 +} {3}
    1.42 +test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
    1.43 +    string length [bytestring "\x82\x83\x84"]
    1.44 +} {3}
    1.45 +test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
    1.46 +    string length [bytestring "\xC2"]
    1.47 +} {1}
    1.48 +test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
    1.49 +    string length [bytestring "\xC2\xa2"]
    1.50 +} {1}
    1.51 +test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
    1.52 +    string length [bytestring "\xE2"]
    1.53 +} {1}
    1.54 +test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
    1.55 +    string length [bytestring "\xE2\xA2"]
    1.56 +} {2}
    1.57 +test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
    1.58 +    string length [bytestring "\xE4\xb9\x8e"]
    1.59 +} {1}
    1.60 +test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
    1.61 +    string length [bytestring "\xF4\xA2\xA2\xA2"]
    1.62 +} {4}
    1.63 +
    1.64 +test utf-3.1 {Tcl_UtfCharComplete} {
    1.65 +} {}
    1.66 +
    1.67 +testConstraint testnumutfchars [llength [info commands testnumutfchars]]
    1.68 +test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
    1.69 +    testnumutfchars ""
    1.70 +} {0}
    1.71 +test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
    1.72 +    testnumutfchars [bytestring "\xC2\xA2"]
    1.73 +} {1}
    1.74 +test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
    1.75 +    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
    1.76 +} {7}
    1.77 +test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
    1.78 +    testnumutfchars [bytestring "\xC0\x80"]
    1.79 +} {1}
    1.80 +test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
    1.81 +    testnumutfchars "" 1
    1.82 +} {0}
    1.83 +test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
    1.84 +    testnumutfchars [bytestring "\xC2\xA2"] 1
    1.85 +} {1}
    1.86 +test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
    1.87 +    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1
    1.88 +} {7}
    1.89 +test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
    1.90 +    testnumutfchars [bytestring "\xC0\x80"] 1
    1.91 +} {1}
    1.92 +
    1.93 +test utf-5.1 {Tcl_UtfFindFirsts} {
    1.94 +} {}
    1.95 +
    1.96 +test utf-6.1 {Tcl_UtfNext} {
    1.97 +} {}
    1.98 +
    1.99 +test utf-7.1 {Tcl_UtfPrev} {
   1.100 +} {}
   1.101 +
   1.102 +test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
   1.103 +    string index abcd 0
   1.104 +} {a}
   1.105 +test utf-8.2 {Tcl_UniCharAtIndex: index = 0} {
   1.106 +    string index \u4e4e\u25a 0
   1.107 +} "\u4e4e"
   1.108 +test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
   1.109 +    string index abcd 2
   1.110 +} {c}
   1.111 +test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
   1.112 +    string index \u4e4e\u25a\xff\u543 2
   1.113 +} "\uff"
   1.114 +
   1.115 +test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
   1.116 +    string range abcd 0 2
   1.117 +} {abc}
   1.118 +test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
   1.119 +    string range \u4e4e\u25a\xff\u543klmnop 1 5
   1.120 +} "\u25a\xff\u543kl"
   1.121 +
   1.122 +
   1.123 +test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
   1.124 +    set x \n
   1.125 +} {
   1.126 +}
   1.127 +test utf-10.2 {Tcl_UtfBackslash: \u subst} {
   1.128 +    set x \ua2
   1.129 +} [bytestring "\xc2\xa2"]
   1.130 +test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
   1.131 +    set x \u4e21
   1.132 +} [bytestring "\xe4\xb8\xa1"]
   1.133 +test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
   1.134 +    set x \u4e2k
   1.135 +} "[bytestring \xd3\xa2]k"
   1.136 +test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
   1.137 +    set x \u4e216
   1.138 +} "[bytestring \xe4\xb8\xa1]6"
   1.139 +proc bsCheck {char num} {
   1.140 +    global errNum
   1.141 +    test utf-10.$errNum {backslash substitution} {
   1.142 +	scan $char %c value
   1.143 +	set value
   1.144 +    } $num
   1.145 +    incr errNum
   1.146 +}
   1.147 +set errNum 6
   1.148 +bsCheck \b	8
   1.149 +bsCheck \e	101
   1.150 +bsCheck \f	12
   1.151 +bsCheck \n	10
   1.152 +bsCheck \r	13
   1.153 +bsCheck \t	9
   1.154 +bsCheck \v	11
   1.155 +bsCheck \{	123
   1.156 +bsCheck \}	125
   1.157 +bsCheck \[	91
   1.158 +bsCheck \]	93
   1.159 +bsCheck \$	36
   1.160 +bsCheck \ 	32
   1.161 +bsCheck \;	59
   1.162 +bsCheck \\	92
   1.163 +bsCheck \Ca	67
   1.164 +bsCheck \Ma	77
   1.165 +bsCheck \CMa	67
   1.166 +# prior to 8.3, this returned 8, as \8 as accepted as an
   1.167 +# octal value - but it isn't! [Bug: 3975]
   1.168 +bsCheck \8a	56
   1.169 +bsCheck \14	12
   1.170 +bsCheck \141	97
   1.171 +bsCheck b\0	98
   1.172 +bsCheck \x	120
   1.173 +bsCheck \xa	10
   1.174 +bsCheck \xA	10
   1.175 +bsCheck \x41	65
   1.176 +bsCheck \x541	65
   1.177 +bsCheck \u	117
   1.178 +bsCheck \uk	117
   1.179 +bsCheck \u41	65
   1.180 +bsCheck \ua	10
   1.181 +bsCheck \uA	10
   1.182 +bsCheck \340	224
   1.183 +bsCheck \ua1	161
   1.184 +bsCheck \u4e21	20001
   1.185 +
   1.186 +test utf-11.1 {Tcl_UtfToUpper} {
   1.187 +    string toupper {}
   1.188 +} {}
   1.189 +test utf-11.2 {Tcl_UtfToUpper} {
   1.190 +    string toupper abc
   1.191 +} ABC
   1.192 +test utf-11.3 {Tcl_UtfToUpper} {
   1.193 +    string toupper \u00e3ab
   1.194 +} \u00c3AB
   1.195 +test utf-11.4 {Tcl_UtfToUpper} {
   1.196 +    string toupper \u01e3ab
   1.197 +} \u01e2AB
   1.198 +
   1.199 +test utf-12.1 {Tcl_UtfToLower} {
   1.200 +    string tolower {}
   1.201 +} {}
   1.202 +test utf-12.2 {Tcl_UtfToLower} {
   1.203 +    string tolower ABC
   1.204 +} abc
   1.205 +test utf-12.3 {Tcl_UtfToLower} {
   1.206 +    string tolower \u00c3AB
   1.207 +} \u00e3ab
   1.208 +test utf-12.4 {Tcl_UtfToLower} {
   1.209 +    string tolower \u01e2AB
   1.210 +} \u01e3ab
   1.211 +
   1.212 +test utf-13.1 {Tcl_UtfToTitle} {
   1.213 +    string totitle {}
   1.214 +} {}
   1.215 +test utf-13.2 {Tcl_UtfToTitle} {
   1.216 +    string totitle abc
   1.217 +} Abc
   1.218 +test utf-13.3 {Tcl_UtfToTitle} {
   1.219 +    string totitle \u00e3ab
   1.220 +} \u00c3ab
   1.221 +test utf-13.4 {Tcl_UtfToTitle} {
   1.222 +    string totitle \u01f3ab
   1.223 +} \u01f2ab
   1.224 +
   1.225 +test utf-14.1 {Tcl_UtfNcasecmp} {
   1.226 +    string compare -nocase a b
   1.227 +} -1
   1.228 +test utf-14.2 {Tcl_UtfNcasecmp} {
   1.229 +    string compare -nocase b a
   1.230 +} 1
   1.231 +test utf-14.3 {Tcl_UtfNcasecmp} {
   1.232 +    string compare -nocase B a
   1.233 +} 1
   1.234 +test utf-14.4 {Tcl_UtfNcasecmp} {
   1.235 +    string compare -nocase aBcB abca
   1.236 +} 1
   1.237 +
   1.238 +test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
   1.239 +    string toupper aA
   1.240 +} AA
   1.241 +test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
   1.242 +    string toupper \u0178\u00ff
   1.243 +} \u0178\u0178
   1.244 +test utf-15.3 {Tcl_UniCharToUpper, no delta} {
   1.245 +    string toupper !
   1.246 +} !
   1.247 +
   1.248 +test utf-16.1 {Tcl_UniCharToLower, negative delta} {
   1.249 +    string tolower aA
   1.250 +} aa
   1.251 +test utf-16.2 {Tcl_UniCharToLower, positive delta} {
   1.252 +    string tolower \u0178\u00ff
   1.253 +} \u00ff\u00ff
   1.254 +test utf-17.1 {Tcl_UniCharToLower, no delta} {
   1.255 +    string tolower !
   1.256 +} !
   1.257 +
   1.258 +test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
   1.259 +    string totitle \u01c4
   1.260 +} \u01c5
   1.261 +test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
   1.262 +    string totitle \u01c6
   1.263 +} \u01c5
   1.264 +test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
   1.265 +    string totitle \u017f
   1.266 +} \u0053
   1.267 +test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
   1.268 +    string totitle \u00ff
   1.269 +} \u0178
   1.270 +test utf-18.5 {Tcl_UniCharToTitle, no delta} {
   1.271 +    string totitle !
   1.272 +} !
   1.273 +
   1.274 +test utf-19.1 {TclUniCharLen} {
   1.275 +    list [regexp \\d abc456def foo] $foo
   1.276 +} {1 4}
   1.277 +
   1.278 +test utf-20.1 {TclUniCharNcmp} {
   1.279 +} {}
   1.280 +
   1.281 +test utf-21.1 {TclUniCharIsAlnum} {
   1.282 +    # this returns 1 with Unicode 3 compliance
   1.283 +    string is alnum \u1040\u021f
   1.284 +} {1}
   1.285 +test utf-21.2 {unicode alnum char in regc_locale.c} {
   1.286 +    # this returns 1 with Unicode 3 compliance
   1.287 +    list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f]
   1.288 +} {1 1}
   1.289 +
   1.290 +test utf-22.1 {TclUniCharIsWordChar} {
   1.291 +    string wordend "xyz123_bar fg" 0
   1.292 +} 10
   1.293 +test utf-22.2 {TclUniCharIsWordChar} {
   1.294 +    string wordend "x\u5080z123_bar\u203c fg" 0
   1.295 +} 10
   1.296 +
   1.297 +test utf-23.1 {TclUniCharIsAlpha} {
   1.298 +    # this returns 1 with Unicode 3 compliance
   1.299 +    string is alpha \u021f
   1.300 +} {1}
   1.301 +test utf-23.2 {unicode alpha char in regc_locale.c} {
   1.302 +    # this returns 1 with Unicode 3 compliance
   1.303 +    regexp {^[[:alpha:]]+$} \u021f
   1.304 +} {1}
   1.305 +
   1.306 +test utf-24.1 {TclUniCharIsDigit} {
   1.307 +    # this returns 1 with Unicode 3 compliance
   1.308 +    string is digit \u1040
   1.309 +} {1}
   1.310 +test utf-24.2 {unicode digit char in regc_locale.c} {
   1.311 +    # this returns 1 with Unicode 3 compliance
   1.312 +    list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040]
   1.313 +} {1 1}
   1.314 +
   1.315 +test utf-24.3 {TclUniCharIsSpace} {
   1.316 +    # this returns 1 with Unicode 3 compliance
   1.317 +    string is space \u1680
   1.318 +} {1}
   1.319 +test utf-24.4 {unicode space char in regc_locale.c} {
   1.320 +    # this returns 1 with Unicode 3 compliance
   1.321 +    list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680]
   1.322 +} {1 1}
   1.323 +
   1.324 +testConstraint teststringobj [llength [info commands teststringobj]]
   1.325 +test utf-25.1 {Tcl_UniCharNcasecmp} teststringobj {
   1.326 +    testobj freeallvars
   1.327 +    teststringobj set 1 a
   1.328 +    teststringobj set 2 b
   1.329 +    teststringobj getunicode 1
   1.330 +    teststringobj getunicode 2
   1.331 +    string compare -nocase [teststringobj get 1] [teststringobj get 2]
   1.332 +} -1
   1.333 +test utf-25.2 {Tcl_UniCharNcasecmp} teststringobj {
   1.334 +    testobj freeallvars
   1.335 +    teststringobj set 1 b
   1.336 +    teststringobj set 2 a
   1.337 +    teststringobj getunicode 1
   1.338 +    teststringobj getunicode 2
   1.339 +    string compare -nocase [teststringobj get 1] [teststringobj get 2]
   1.340 +} 1
   1.341 +test utf-25.3 {Tcl_UniCharNcasecmp} teststringobj {
   1.342 +    testobj freeallvars
   1.343 +    teststringobj set 1 B
   1.344 +    teststringobj set 2 a
   1.345 +    teststringobj getunicode 1
   1.346 +    teststringobj getunicode 2
   1.347 +    string compare -nocase [teststringobj get 1] [teststringobj get 2]
   1.348 +} 1
   1.349 +test utf-25.4 {Tcl_UniCharNcasecmp} teststringobj {
   1.350 +    testobj freeallvars
   1.351 +    teststringobj set 1 aBcB
   1.352 +    teststringobj set 2 abca
   1.353 +    teststringobj getunicode 1
   1.354 +    teststringobj getunicode 2
   1.355 +    string compare -nocase [teststringobj get 1] [teststringobj get 2]
   1.356 +} 1
   1.357 +
   1.358 +# cleanup
   1.359 +::tcltest::cleanupTests
   1.360 +return