os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/utf.test
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/utf.test Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,357 @@
1.4 +# This file contains a collection of tests for tclUtf.c
1.5 +# Sourcing this file into Tcl runs the tests and generates output for
1.6 +# errors. No output means no errors were found.
1.7 +#
1.8 +# Copyright (c) 1997 Sun Microsystems, Inc.
1.9 +# Copyright (c) 1998-1999 by Scriptics Corporation.
1.10 +#
1.11 +# See the file "license.terms" for information on usage and redistribution
1.12 +# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
1.13 +#
1.14 +# RCS: @(#) $Id: utf.test,v 1.8.14.5 2005/09/07 14:35:56 dgp Exp $
1.15 +
1.16 +if {[lsearch [namespace children] ::tcltest] == -1} {
1.17 + package require tcltest 2
1.18 + namespace import -force ::tcltest::*
1.19 +}
1.20 +
1.21 +catch {unset x}
1.22 +
1.23 +test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
1.24 + set x \x01
1.25 +} [bytestring "\x01"]
1.26 +test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
1.27 + set x "\x00"
1.28 +} [bytestring "\xc0\x80"]
1.29 +test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
1.30 + set x "\xe0"
1.31 +} [bytestring "\xc3\xa0"]
1.32 +test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
1.33 + set x "\u4e4e"
1.34 +} [bytestring "\xe4\xb9\x8e"]
1.35 +test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
1.36 + string length [format %c -1]
1.37 +} 1
1.38 +
1.39 +test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
1.40 + string length "abc"
1.41 +} {3}
1.42 +test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
1.43 + string length [bytestring "\x82\x83\x84"]
1.44 +} {3}
1.45 +test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
1.46 + string length [bytestring "\xC2"]
1.47 +} {1}
1.48 +test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
1.49 + string length [bytestring "\xC2\xa2"]
1.50 +} {1}
1.51 +test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
1.52 + string length [bytestring "\xE2"]
1.53 +} {1}
1.54 +test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
1.55 + string length [bytestring "\xE2\xA2"]
1.56 +} {2}
1.57 +test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
1.58 + string length [bytestring "\xE4\xb9\x8e"]
1.59 +} {1}
1.60 +test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
1.61 + string length [bytestring "\xF4\xA2\xA2\xA2"]
1.62 +} {4}
1.63 +
1.64 +test utf-3.1 {Tcl_UtfCharComplete} {
1.65 +} {}
1.66 +
1.67 +testConstraint testnumutfchars [llength [info commands testnumutfchars]]
1.68 +test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
1.69 + testnumutfchars ""
1.70 +} {0}
1.71 +test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
1.72 + testnumutfchars [bytestring "\xC2\xA2"]
1.73 +} {1}
1.74 +test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
1.75 + testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
1.76 +} {7}
1.77 +test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
1.78 + testnumutfchars [bytestring "\xC0\x80"]
1.79 +} {1}
1.80 +test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
1.81 + testnumutfchars "" 1
1.82 +} {0}
1.83 +test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
1.84 + testnumutfchars [bytestring "\xC2\xA2"] 1
1.85 +} {1}
1.86 +test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
1.87 + testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1
1.88 +} {7}
1.89 +test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
1.90 + testnumutfchars [bytestring "\xC0\x80"] 1
1.91 +} {1}
1.92 +
1.93 +test utf-5.1 {Tcl_UtfFindFirsts} {
1.94 +} {}
1.95 +
1.96 +test utf-6.1 {Tcl_UtfNext} {
1.97 +} {}
1.98 +
1.99 +test utf-7.1 {Tcl_UtfPrev} {
1.100 +} {}
1.101 +
1.102 +test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
1.103 + string index abcd 0
1.104 +} {a}
1.105 +test utf-8.2 {Tcl_UniCharAtIndex: index = 0} {
1.106 + string index \u4e4e\u25a 0
1.107 +} "\u4e4e"
1.108 +test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
1.109 + string index abcd 2
1.110 +} {c}
1.111 +test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
1.112 + string index \u4e4e\u25a\xff\u543 2
1.113 +} "\uff"
1.114 +
1.115 +test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
1.116 + string range abcd 0 2
1.117 +} {abc}
1.118 +test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
1.119 + string range \u4e4e\u25a\xff\u543klmnop 1 5
1.120 +} "\u25a\xff\u543kl"
1.121 +
1.122 +
1.123 +test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
1.124 + set x \n
1.125 +} {
1.126 +}
1.127 +test utf-10.2 {Tcl_UtfBackslash: \u subst} {
1.128 + set x \ua2
1.129 +} [bytestring "\xc2\xa2"]
1.130 +test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
1.131 + set x \u4e21
1.132 +} [bytestring "\xe4\xb8\xa1"]
1.133 +test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
1.134 + set x \u4e2k
1.135 +} "[bytestring \xd3\xa2]k"
1.136 +test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
1.137 + set x \u4e216
1.138 +} "[bytestring \xe4\xb8\xa1]6"
1.139 +proc bsCheck {char num} {
1.140 + global errNum
1.141 + test utf-10.$errNum {backslash substitution} {
1.142 + scan $char %c value
1.143 + set value
1.144 + } $num
1.145 + incr errNum
1.146 +}
1.147 +set errNum 6
1.148 +bsCheck \b 8
1.149 +bsCheck \e 101
1.150 +bsCheck \f 12
1.151 +bsCheck \n 10
1.152 +bsCheck \r 13
1.153 +bsCheck \t 9
1.154 +bsCheck \v 11
1.155 +bsCheck \{ 123
1.156 +bsCheck \} 125
1.157 +bsCheck \[ 91
1.158 +bsCheck \] 93
1.159 +bsCheck \$ 36
1.160 +bsCheck \ 32
1.161 +bsCheck \; 59
1.162 +bsCheck \\ 92
1.163 +bsCheck \Ca 67
1.164 +bsCheck \Ma 77
1.165 +bsCheck \CMa 67
1.166 +# prior to 8.3, this returned 8, as \8 as accepted as an
1.167 +# octal value - but it isn't! [Bug: 3975]
1.168 +bsCheck \8a 56
1.169 +bsCheck \14 12
1.170 +bsCheck \141 97
1.171 +bsCheck b\0 98
1.172 +bsCheck \x 120
1.173 +bsCheck \xa 10
1.174 +bsCheck \xA 10
1.175 +bsCheck \x41 65
1.176 +bsCheck \x541 65
1.177 +bsCheck \u 117
1.178 +bsCheck \uk 117
1.179 +bsCheck \u41 65
1.180 +bsCheck \ua 10
1.181 +bsCheck \uA 10
1.182 +bsCheck \340 224
1.183 +bsCheck \ua1 161
1.184 +bsCheck \u4e21 20001
1.185 +
1.186 +test utf-11.1 {Tcl_UtfToUpper} {
1.187 + string toupper {}
1.188 +} {}
1.189 +test utf-11.2 {Tcl_UtfToUpper} {
1.190 + string toupper abc
1.191 +} ABC
1.192 +test utf-11.3 {Tcl_UtfToUpper} {
1.193 + string toupper \u00e3ab
1.194 +} \u00c3AB
1.195 +test utf-11.4 {Tcl_UtfToUpper} {
1.196 + string toupper \u01e3ab
1.197 +} \u01e2AB
1.198 +
1.199 +test utf-12.1 {Tcl_UtfToLower} {
1.200 + string tolower {}
1.201 +} {}
1.202 +test utf-12.2 {Tcl_UtfToLower} {
1.203 + string tolower ABC
1.204 +} abc
1.205 +test utf-12.3 {Tcl_UtfToLower} {
1.206 + string tolower \u00c3AB
1.207 +} \u00e3ab
1.208 +test utf-12.4 {Tcl_UtfToLower} {
1.209 + string tolower \u01e2AB
1.210 +} \u01e3ab
1.211 +
1.212 +test utf-13.1 {Tcl_UtfToTitle} {
1.213 + string totitle {}
1.214 +} {}
1.215 +test utf-13.2 {Tcl_UtfToTitle} {
1.216 + string totitle abc
1.217 +} Abc
1.218 +test utf-13.3 {Tcl_UtfToTitle} {
1.219 + string totitle \u00e3ab
1.220 +} \u00c3ab
1.221 +test utf-13.4 {Tcl_UtfToTitle} {
1.222 + string totitle \u01f3ab
1.223 +} \u01f2ab
1.224 +
1.225 +test utf-14.1 {Tcl_UtfNcasecmp} {
1.226 + string compare -nocase a b
1.227 +} -1
1.228 +test utf-14.2 {Tcl_UtfNcasecmp} {
1.229 + string compare -nocase b a
1.230 +} 1
1.231 +test utf-14.3 {Tcl_UtfNcasecmp} {
1.232 + string compare -nocase B a
1.233 +} 1
1.234 +test utf-14.4 {Tcl_UtfNcasecmp} {
1.235 + string compare -nocase aBcB abca
1.236 +} 1
1.237 +
1.238 +test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
1.239 + string toupper aA
1.240 +} AA
1.241 +test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
1.242 + string toupper \u0178\u00ff
1.243 +} \u0178\u0178
1.244 +test utf-15.3 {Tcl_UniCharToUpper, no delta} {
1.245 + string toupper !
1.246 +} !
1.247 +
1.248 +test utf-16.1 {Tcl_UniCharToLower, negative delta} {
1.249 + string tolower aA
1.250 +} aa
1.251 +test utf-16.2 {Tcl_UniCharToLower, positive delta} {
1.252 + string tolower \u0178\u00ff
1.253 +} \u00ff\u00ff
1.254 +test utf-17.1 {Tcl_UniCharToLower, no delta} {
1.255 + string tolower !
1.256 +} !
1.257 +
1.258 +test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
1.259 + string totitle \u01c4
1.260 +} \u01c5
1.261 +test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
1.262 + string totitle \u01c6
1.263 +} \u01c5
1.264 +test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
1.265 + string totitle \u017f
1.266 +} \u0053
1.267 +test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
1.268 + string totitle \u00ff
1.269 +} \u0178
1.270 +test utf-18.5 {Tcl_UniCharToTitle, no delta} {
1.271 + string totitle !
1.272 +} !
1.273 +
1.274 +test utf-19.1 {TclUniCharLen} {
1.275 + list [regexp \\d abc456def foo] $foo
1.276 +} {1 4}
1.277 +
1.278 +test utf-20.1 {TclUniCharNcmp} {
1.279 +} {}
1.280 +
1.281 +test utf-21.1 {TclUniCharIsAlnum} {
1.282 + # this returns 1 with Unicode 3 compliance
1.283 + string is alnum \u1040\u021f
1.284 +} {1}
1.285 +test utf-21.2 {unicode alnum char in regc_locale.c} {
1.286 + # this returns 1 with Unicode 3 compliance
1.287 + list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f]
1.288 +} {1 1}
1.289 +
1.290 +test utf-22.1 {TclUniCharIsWordChar} {
1.291 + string wordend "xyz123_bar fg" 0
1.292 +} 10
1.293 +test utf-22.2 {TclUniCharIsWordChar} {
1.294 + string wordend "x\u5080z123_bar\u203c fg" 0
1.295 +} 10
1.296 +
1.297 +test utf-23.1 {TclUniCharIsAlpha} {
1.298 + # this returns 1 with Unicode 3 compliance
1.299 + string is alpha \u021f
1.300 +} {1}
1.301 +test utf-23.2 {unicode alpha char in regc_locale.c} {
1.302 + # this returns 1 with Unicode 3 compliance
1.303 + regexp {^[[:alpha:]]+$} \u021f
1.304 +} {1}
1.305 +
1.306 +test utf-24.1 {TclUniCharIsDigit} {
1.307 + # this returns 1 with Unicode 3 compliance
1.308 + string is digit \u1040
1.309 +} {1}
1.310 +test utf-24.2 {unicode digit char in regc_locale.c} {
1.311 + # this returns 1 with Unicode 3 compliance
1.312 + list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040]
1.313 +} {1 1}
1.314 +
1.315 +test utf-24.3 {TclUniCharIsSpace} {
1.316 + # this returns 1 with Unicode 3 compliance
1.317 + string is space \u1680
1.318 +} {1}
1.319 +test utf-24.4 {unicode space char in regc_locale.c} {
1.320 + # this returns 1 with Unicode 3 compliance
1.321 + list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680]
1.322 +} {1 1}
1.323 +
1.324 +testConstraint teststringobj [llength [info commands teststringobj]]
1.325 +test utf-25.1 {Tcl_UniCharNcasecmp} teststringobj {
1.326 + testobj freeallvars
1.327 + teststringobj set 1 a
1.328 + teststringobj set 2 b
1.329 + teststringobj getunicode 1
1.330 + teststringobj getunicode 2
1.331 + string compare -nocase [teststringobj get 1] [teststringobj get 2]
1.332 +} -1
1.333 +test utf-25.2 {Tcl_UniCharNcasecmp} teststringobj {
1.334 + testobj freeallvars
1.335 + teststringobj set 1 b
1.336 + teststringobj set 2 a
1.337 + teststringobj getunicode 1
1.338 + teststringobj getunicode 2
1.339 + string compare -nocase [teststringobj get 1] [teststringobj get 2]
1.340 +} 1
1.341 +test utf-25.3 {Tcl_UniCharNcasecmp} teststringobj {
1.342 + testobj freeallvars
1.343 + teststringobj set 1 B
1.344 + teststringobj set 2 a
1.345 + teststringobj getunicode 1
1.346 + teststringobj getunicode 2
1.347 + string compare -nocase [teststringobj get 1] [teststringobj get 2]
1.348 +} 1
1.349 +test utf-25.4 {Tcl_UniCharNcasecmp} teststringobj {
1.350 + testobj freeallvars
1.351 + teststringobj set 1 aBcB
1.352 + teststringobj set 2 abca
1.353 + teststringobj getunicode 1
1.354 + teststringobj getunicode 2
1.355 + string compare -nocase [teststringobj get 1] [teststringobj get 2]
1.356 +} 1
1.357 +
1.358 +# cleanup
1.359 +::tcltest::cleanupTests
1.360 +return