os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/utf.test
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 # This file contains a collection of tests for tclUtf.c
     2 # Sourcing this file into Tcl runs the tests and generates output for
     3 # errors.  No output means no errors were found.
     4 #
     5 # Copyright (c) 1997 Sun Microsystems, Inc.
     6 # Copyright (c) 1998-1999 by Scriptics Corporation.
     7 #
     8 # See the file "license.terms" for information on usage and redistribution
     9 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
    10 #
    11 # RCS: @(#) $Id: utf.test,v 1.8.14.5 2005/09/07 14:35:56 dgp Exp $
    12 
    13 if {[lsearch [namespace children] ::tcltest] == -1} {
    14     package require tcltest 2
    15     namespace import -force ::tcltest::*
    16 }
    17 
    18 catch {unset x}
    19 
    20 test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
    21     set x \x01
    22 } [bytestring "\x01"]
    23 test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
    24     set x "\x00"
    25 } [bytestring "\xc0\x80"]
    26 test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
    27     set x "\xe0"
    28 } [bytestring "\xc3\xa0"]
    29 test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
    30     set x "\u4e4e"
    31 } [bytestring "\xe4\xb9\x8e"]
    32 test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
    33     string length [format %c -1]
    34 } 1
    35 
    36 test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
    37     string length "abc"
    38 } {3}
    39 test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
    40     string length [bytestring "\x82\x83\x84"]
    41 } {3}
    42 test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
    43     string length [bytestring "\xC2"]
    44 } {1}
    45 test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
    46     string length [bytestring "\xC2\xa2"]
    47 } {1}
    48 test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
    49     string length [bytestring "\xE2"]
    50 } {1}
    51 test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
    52     string length [bytestring "\xE2\xA2"]
    53 } {2}
    54 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
    55     string length [bytestring "\xE4\xb9\x8e"]
    56 } {1}
    57 test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
    58     string length [bytestring "\xF4\xA2\xA2\xA2"]
    59 } {4}
    60 
    61 test utf-3.1 {Tcl_UtfCharComplete} {
    62 } {}
    63 
    64 testConstraint testnumutfchars [llength [info commands testnumutfchars]]
    65 test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
    66     testnumutfchars ""
    67 } {0}
    68 test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
    69     testnumutfchars [bytestring "\xC2\xA2"]
    70 } {1}
    71 test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
    72     testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
    73 } {7}
    74 test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
    75     testnumutfchars [bytestring "\xC0\x80"]
    76 } {1}
    77 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
    78     testnumutfchars "" 1
    79 } {0}
    80 test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
    81     testnumutfchars [bytestring "\xC2\xA2"] 1
    82 } {1}
    83 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
    84     testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1
    85 } {7}
    86 test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
    87     testnumutfchars [bytestring "\xC0\x80"] 1
    88 } {1}
    89 
    90 test utf-5.1 {Tcl_UtfFindFirsts} {
    91 } {}
    92 
    93 test utf-6.1 {Tcl_UtfNext} {
    94 } {}
    95 
    96 test utf-7.1 {Tcl_UtfPrev} {
    97 } {}
    98 
    99 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
   100     string index abcd 0
   101 } {a}
   102 test utf-8.2 {Tcl_UniCharAtIndex: index = 0} {
   103     string index \u4e4e\u25a 0
   104 } "\u4e4e"
   105 test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
   106     string index abcd 2
   107 } {c}
   108 test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
   109     string index \u4e4e\u25a\xff\u543 2
   110 } "\uff"
   111 
   112 test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
   113     string range abcd 0 2
   114 } {abc}
   115 test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
   116     string range \u4e4e\u25a\xff\u543klmnop 1 5
   117 } "\u25a\xff\u543kl"
   118 
   119 
   120 test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
   121     set x \n
   122 } {
   123 }
   124 test utf-10.2 {Tcl_UtfBackslash: \u subst} {
   125     set x \ua2
   126 } [bytestring "\xc2\xa2"]
   127 test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
   128     set x \u4e21
   129 } [bytestring "\xe4\xb8\xa1"]
   130 test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
   131     set x \u4e2k
   132 } "[bytestring \xd3\xa2]k"
   133 test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
   134     set x \u4e216
   135 } "[bytestring \xe4\xb8\xa1]6"
   136 proc bsCheck {char num} {
   137     global errNum
   138     test utf-10.$errNum {backslash substitution} {
   139 	scan $char %c value
   140 	set value
   141     } $num
   142     incr errNum
   143 }
   144 set errNum 6
   145 bsCheck \b	8
   146 bsCheck \e	101
   147 bsCheck \f	12
   148 bsCheck \n	10
   149 bsCheck \r	13
   150 bsCheck \t	9
   151 bsCheck \v	11
   152 bsCheck \{	123
   153 bsCheck \}	125
   154 bsCheck \[	91
   155 bsCheck \]	93
   156 bsCheck \$	36
   157 bsCheck \ 	32
   158 bsCheck \;	59
   159 bsCheck \\	92
   160 bsCheck \Ca	67
   161 bsCheck \Ma	77
   162 bsCheck \CMa	67
   163 # prior to 8.3, this returned 8, as \8 as accepted as an
   164 # octal value - but it isn't! [Bug: 3975]
   165 bsCheck \8a	56
   166 bsCheck \14	12
   167 bsCheck \141	97
   168 bsCheck b\0	98
   169 bsCheck \x	120
   170 bsCheck \xa	10
   171 bsCheck \xA	10
   172 bsCheck \x41	65
   173 bsCheck \x541	65
   174 bsCheck \u	117
   175 bsCheck \uk	117
   176 bsCheck \u41	65
   177 bsCheck \ua	10
   178 bsCheck \uA	10
   179 bsCheck \340	224
   180 bsCheck \ua1	161
   181 bsCheck \u4e21	20001
   182 
   183 test utf-11.1 {Tcl_UtfToUpper} {
   184     string toupper {}
   185 } {}
   186 test utf-11.2 {Tcl_UtfToUpper} {
   187     string toupper abc
   188 } ABC
   189 test utf-11.3 {Tcl_UtfToUpper} {
   190     string toupper \u00e3ab
   191 } \u00c3AB
   192 test utf-11.4 {Tcl_UtfToUpper} {
   193     string toupper \u01e3ab
   194 } \u01e2AB
   195 
   196 test utf-12.1 {Tcl_UtfToLower} {
   197     string tolower {}
   198 } {}
   199 test utf-12.2 {Tcl_UtfToLower} {
   200     string tolower ABC
   201 } abc
   202 test utf-12.3 {Tcl_UtfToLower} {
   203     string tolower \u00c3AB
   204 } \u00e3ab
   205 test utf-12.4 {Tcl_UtfToLower} {
   206     string tolower \u01e2AB
   207 } \u01e3ab
   208 
   209 test utf-13.1 {Tcl_UtfToTitle} {
   210     string totitle {}
   211 } {}
   212 test utf-13.2 {Tcl_UtfToTitle} {
   213     string totitle abc
   214 } Abc
   215 test utf-13.3 {Tcl_UtfToTitle} {
   216     string totitle \u00e3ab
   217 } \u00c3ab
   218 test utf-13.4 {Tcl_UtfToTitle} {
   219     string totitle \u01f3ab
   220 } \u01f2ab
   221 
   222 test utf-14.1 {Tcl_UtfNcasecmp} {
   223     string compare -nocase a b
   224 } -1
   225 test utf-14.2 {Tcl_UtfNcasecmp} {
   226     string compare -nocase b a
   227 } 1
   228 test utf-14.3 {Tcl_UtfNcasecmp} {
   229     string compare -nocase B a
   230 } 1
   231 test utf-14.4 {Tcl_UtfNcasecmp} {
   232     string compare -nocase aBcB abca
   233 } 1
   234 
   235 test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
   236     string toupper aA
   237 } AA
   238 test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
   239     string toupper \u0178\u00ff
   240 } \u0178\u0178
   241 test utf-15.3 {Tcl_UniCharToUpper, no delta} {
   242     string toupper !
   243 } !
   244 
   245 test utf-16.1 {Tcl_UniCharToLower, negative delta} {
   246     string tolower aA
   247 } aa
   248 test utf-16.2 {Tcl_UniCharToLower, positive delta} {
   249     string tolower \u0178\u00ff
   250 } \u00ff\u00ff
   251 test utf-17.1 {Tcl_UniCharToLower, no delta} {
   252     string tolower !
   253 } !
   254 
   255 test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
   256     string totitle \u01c4
   257 } \u01c5
   258 test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
   259     string totitle \u01c6
   260 } \u01c5
   261 test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
   262     string totitle \u017f
   263 } \u0053
   264 test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
   265     string totitle \u00ff
   266 } \u0178
   267 test utf-18.5 {Tcl_UniCharToTitle, no delta} {
   268     string totitle !
   269 } !
   270 
   271 test utf-19.1 {TclUniCharLen} {
   272     list [regexp \\d abc456def foo] $foo
   273 } {1 4}
   274 
   275 test utf-20.1 {TclUniCharNcmp} {
   276 } {}
   277 
   278 test utf-21.1 {TclUniCharIsAlnum} {
   279     # this returns 1 with Unicode 3 compliance
   280     string is alnum \u1040\u021f
   281 } {1}
   282 test utf-21.2 {unicode alnum char in regc_locale.c} {
   283     # this returns 1 with Unicode 3 compliance
   284     list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f]
   285 } {1 1}
   286 
   287 test utf-22.1 {TclUniCharIsWordChar} {
   288     string wordend "xyz123_bar fg" 0
   289 } 10
   290 test utf-22.2 {TclUniCharIsWordChar} {
   291     string wordend "x\u5080z123_bar\u203c fg" 0
   292 } 10
   293 
   294 test utf-23.1 {TclUniCharIsAlpha} {
   295     # this returns 1 with Unicode 3 compliance
   296     string is alpha \u021f
   297 } {1}
   298 test utf-23.2 {unicode alpha char in regc_locale.c} {
   299     # this returns 1 with Unicode 3 compliance
   300     regexp {^[[:alpha:]]+$} \u021f
   301 } {1}
   302 
   303 test utf-24.1 {TclUniCharIsDigit} {
   304     # this returns 1 with Unicode 3 compliance
   305     string is digit \u1040
   306 } {1}
   307 test utf-24.2 {unicode digit char in regc_locale.c} {
   308     # this returns 1 with Unicode 3 compliance
   309     list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040]
   310 } {1 1}
   311 
   312 test utf-24.3 {TclUniCharIsSpace} {
   313     # this returns 1 with Unicode 3 compliance
   314     string is space \u1680
   315 } {1}
   316 test utf-24.4 {unicode space char in regc_locale.c} {
   317     # this returns 1 with Unicode 3 compliance
   318     list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680]
   319 } {1 1}
   320 
   321 testConstraint teststringobj [llength [info commands teststringobj]]
   322 test utf-25.1 {Tcl_UniCharNcasecmp} teststringobj {
   323     testobj freeallvars
   324     teststringobj set 1 a
   325     teststringobj set 2 b
   326     teststringobj getunicode 1
   327     teststringobj getunicode 2
   328     string compare -nocase [teststringobj get 1] [teststringobj get 2]
   329 } -1
   330 test utf-25.2 {Tcl_UniCharNcasecmp} teststringobj {
   331     testobj freeallvars
   332     teststringobj set 1 b
   333     teststringobj set 2 a
   334     teststringobj getunicode 1
   335     teststringobj getunicode 2
   336     string compare -nocase [teststringobj get 1] [teststringobj get 2]
   337 } 1
   338 test utf-25.3 {Tcl_UniCharNcasecmp} teststringobj {
   339     testobj freeallvars
   340     teststringobj set 1 B
   341     teststringobj set 2 a
   342     teststringobj getunicode 1
   343     teststringobj getunicode 2
   344     string compare -nocase [teststringobj get 1] [teststringobj get 2]
   345 } 1
   346 test utf-25.4 {Tcl_UniCharNcasecmp} teststringobj {
   347     testobj freeallvars
   348     teststringobj set 1 aBcB
   349     teststringobj set 2 abca
   350     teststringobj getunicode 1
   351     teststringobj getunicode 2
   352     string compare -nocase [teststringobj get 1] [teststringobj get 2]
   353 } 1
   354 
   355 # cleanup
   356 ::tcltest::cleanupTests
   357 return