os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/tests/utf.test
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
# This file contains a collection of tests for tclUtf.c
sl@0
     2
# Sourcing this file into Tcl runs the tests and generates output for
sl@0
     3
# errors.  No output means no errors were found.
sl@0
     4
#
sl@0
     5
# Copyright (c) 1997 Sun Microsystems, Inc.
sl@0
     6
# Copyright (c) 1998-1999 by Scriptics Corporation.
sl@0
     7
#
sl@0
     8
# See the file "license.terms" for information on usage and redistribution
sl@0
     9
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
sl@0
    10
#
sl@0
    11
# RCS: @(#) $Id: utf.test,v 1.8.14.5 2005/09/07 14:35:56 dgp Exp $
sl@0
    12
sl@0
    13
if {[lsearch [namespace children] ::tcltest] == -1} {
sl@0
    14
    package require tcltest 2
sl@0
    15
    namespace import -force ::tcltest::*
sl@0
    16
}
sl@0
    17
sl@0
    18
catch {unset x}
sl@0
    19
sl@0
    20
test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
sl@0
    21
    set x \x01
sl@0
    22
} [bytestring "\x01"]
sl@0
    23
test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
sl@0
    24
    set x "\x00"
sl@0
    25
} [bytestring "\xc0\x80"]
sl@0
    26
test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
sl@0
    27
    set x "\xe0"
sl@0
    28
} [bytestring "\xc3\xa0"]
sl@0
    29
test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
sl@0
    30
    set x "\u4e4e"
sl@0
    31
} [bytestring "\xe4\xb9\x8e"]
sl@0
    32
test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
sl@0
    33
    string length [format %c -1]
sl@0
    34
} 1
sl@0
    35
sl@0
    36
test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
sl@0
    37
    string length "abc"
sl@0
    38
} {3}
sl@0
    39
test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
sl@0
    40
    string length [bytestring "\x82\x83\x84"]
sl@0
    41
} {3}
sl@0
    42
test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
sl@0
    43
    string length [bytestring "\xC2"]
sl@0
    44
} {1}
sl@0
    45
test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
sl@0
    46
    string length [bytestring "\xC2\xa2"]
sl@0
    47
} {1}
sl@0
    48
test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
sl@0
    49
    string length [bytestring "\xE2"]
sl@0
    50
} {1}
sl@0
    51
test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
sl@0
    52
    string length [bytestring "\xE2\xA2"]
sl@0
    53
} {2}
sl@0
    54
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
sl@0
    55
    string length [bytestring "\xE4\xb9\x8e"]
sl@0
    56
} {1}
sl@0
    57
test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
sl@0
    58
    string length [bytestring "\xF4\xA2\xA2\xA2"]
sl@0
    59
} {4}
sl@0
    60
sl@0
    61
test utf-3.1 {Tcl_UtfCharComplete} {
sl@0
    62
} {}
sl@0
    63
sl@0
    64
testConstraint testnumutfchars [llength [info commands testnumutfchars]]
sl@0
    65
test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
sl@0
    66
    testnumutfchars ""
sl@0
    67
} {0}
sl@0
    68
test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
sl@0
    69
    testnumutfchars [bytestring "\xC2\xA2"]
sl@0
    70
} {1}
sl@0
    71
test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
sl@0
    72
    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
sl@0
    73
} {7}
sl@0
    74
test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
sl@0
    75
    testnumutfchars [bytestring "\xC0\x80"]
sl@0
    76
} {1}
sl@0
    77
test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
sl@0
    78
    testnumutfchars "" 1
sl@0
    79
} {0}
sl@0
    80
test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
sl@0
    81
    testnumutfchars [bytestring "\xC2\xA2"] 1
sl@0
    82
} {1}
sl@0
    83
test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
sl@0
    84
    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1
sl@0
    85
} {7}
sl@0
    86
test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
sl@0
    87
    testnumutfchars [bytestring "\xC0\x80"] 1
sl@0
    88
} {1}
sl@0
    89
sl@0
    90
test utf-5.1 {Tcl_UtfFindFirsts} {
sl@0
    91
} {}
sl@0
    92
sl@0
    93
test utf-6.1 {Tcl_UtfNext} {
sl@0
    94
} {}
sl@0
    95
sl@0
    96
test utf-7.1 {Tcl_UtfPrev} {
sl@0
    97
} {}
sl@0
    98
sl@0
    99
test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
sl@0
   100
    string index abcd 0
sl@0
   101
} {a}
sl@0
   102
test utf-8.2 {Tcl_UniCharAtIndex: index = 0} {
sl@0
   103
    string index \u4e4e\u25a 0
sl@0
   104
} "\u4e4e"
sl@0
   105
test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
sl@0
   106
    string index abcd 2
sl@0
   107
} {c}
sl@0
   108
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
sl@0
   109
    string index \u4e4e\u25a\xff\u543 2
sl@0
   110
} "\uff"
sl@0
   111
sl@0
   112
test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
sl@0
   113
    string range abcd 0 2
sl@0
   114
} {abc}
sl@0
   115
test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
sl@0
   116
    string range \u4e4e\u25a\xff\u543klmnop 1 5
sl@0
   117
} "\u25a\xff\u543kl"
sl@0
   118
sl@0
   119
sl@0
   120
test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
sl@0
   121
    set x \n
sl@0
   122
} {
sl@0
   123
}
sl@0
   124
test utf-10.2 {Tcl_UtfBackslash: \u subst} {
sl@0
   125
    set x \ua2
sl@0
   126
} [bytestring "\xc2\xa2"]
sl@0
   127
test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
sl@0
   128
    set x \u4e21
sl@0
   129
} [bytestring "\xe4\xb8\xa1"]
sl@0
   130
test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
sl@0
   131
    set x \u4e2k
sl@0
   132
} "[bytestring \xd3\xa2]k"
sl@0
   133
test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
sl@0
   134
    set x \u4e216
sl@0
   135
} "[bytestring \xe4\xb8\xa1]6"
sl@0
   136
proc bsCheck {char num} {
sl@0
   137
    global errNum
sl@0
   138
    test utf-10.$errNum {backslash substitution} {
sl@0
   139
	scan $char %c value
sl@0
   140
	set value
sl@0
   141
    } $num
sl@0
   142
    incr errNum
sl@0
   143
}
sl@0
   144
set errNum 6
sl@0
   145
bsCheck \b	8
sl@0
   146
bsCheck \e	101
sl@0
   147
bsCheck \f	12
sl@0
   148
bsCheck \n	10
sl@0
   149
bsCheck \r	13
sl@0
   150
bsCheck \t	9
sl@0
   151
bsCheck \v	11
sl@0
   152
bsCheck \{	123
sl@0
   153
bsCheck \}	125
sl@0
   154
bsCheck \[	91
sl@0
   155
bsCheck \]	93
sl@0
   156
bsCheck \$	36
sl@0
   157
bsCheck \ 	32
sl@0
   158
bsCheck \;	59
sl@0
   159
bsCheck \\	92
sl@0
   160
bsCheck \Ca	67
sl@0
   161
bsCheck \Ma	77
sl@0
   162
bsCheck \CMa	67
sl@0
   163
# prior to 8.3, this returned 8, as \8 as accepted as an
sl@0
   164
# octal value - but it isn't! [Bug: 3975]
sl@0
   165
bsCheck \8a	56
sl@0
   166
bsCheck \14	12
sl@0
   167
bsCheck \141	97
sl@0
   168
bsCheck b\0	98
sl@0
   169
bsCheck \x	120
sl@0
   170
bsCheck \xa	10
sl@0
   171
bsCheck \xA	10
sl@0
   172
bsCheck \x41	65
sl@0
   173
bsCheck \x541	65
sl@0
   174
bsCheck \u	117
sl@0
   175
bsCheck \uk	117
sl@0
   176
bsCheck \u41	65
sl@0
   177
bsCheck \ua	10
sl@0
   178
bsCheck \uA	10
sl@0
   179
bsCheck \340	224
sl@0
   180
bsCheck \ua1	161
sl@0
   181
bsCheck \u4e21	20001
sl@0
   182
sl@0
   183
test utf-11.1 {Tcl_UtfToUpper} {
sl@0
   184
    string toupper {}
sl@0
   185
} {}
sl@0
   186
test utf-11.2 {Tcl_UtfToUpper} {
sl@0
   187
    string toupper abc
sl@0
   188
} ABC
sl@0
   189
test utf-11.3 {Tcl_UtfToUpper} {
sl@0
   190
    string toupper \u00e3ab
sl@0
   191
} \u00c3AB
sl@0
   192
test utf-11.4 {Tcl_UtfToUpper} {
sl@0
   193
    string toupper \u01e3ab
sl@0
   194
} \u01e2AB
sl@0
   195
sl@0
   196
test utf-12.1 {Tcl_UtfToLower} {
sl@0
   197
    string tolower {}
sl@0
   198
} {}
sl@0
   199
test utf-12.2 {Tcl_UtfToLower} {
sl@0
   200
    string tolower ABC
sl@0
   201
} abc
sl@0
   202
test utf-12.3 {Tcl_UtfToLower} {
sl@0
   203
    string tolower \u00c3AB
sl@0
   204
} \u00e3ab
sl@0
   205
test utf-12.4 {Tcl_UtfToLower} {
sl@0
   206
    string tolower \u01e2AB
sl@0
   207
} \u01e3ab
sl@0
   208
sl@0
   209
test utf-13.1 {Tcl_UtfToTitle} {
sl@0
   210
    string totitle {}
sl@0
   211
} {}
sl@0
   212
test utf-13.2 {Tcl_UtfToTitle} {
sl@0
   213
    string totitle abc
sl@0
   214
} Abc
sl@0
   215
test utf-13.3 {Tcl_UtfToTitle} {
sl@0
   216
    string totitle \u00e3ab
sl@0
   217
} \u00c3ab
sl@0
   218
test utf-13.4 {Tcl_UtfToTitle} {
sl@0
   219
    string totitle \u01f3ab
sl@0
   220
} \u01f2ab
sl@0
   221
sl@0
   222
test utf-14.1 {Tcl_UtfNcasecmp} {
sl@0
   223
    string compare -nocase a b
sl@0
   224
} -1
sl@0
   225
test utf-14.2 {Tcl_UtfNcasecmp} {
sl@0
   226
    string compare -nocase b a
sl@0
   227
} 1
sl@0
   228
test utf-14.3 {Tcl_UtfNcasecmp} {
sl@0
   229
    string compare -nocase B a
sl@0
   230
} 1
sl@0
   231
test utf-14.4 {Tcl_UtfNcasecmp} {
sl@0
   232
    string compare -nocase aBcB abca
sl@0
   233
} 1
sl@0
   234
sl@0
   235
test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
sl@0
   236
    string toupper aA
sl@0
   237
} AA
sl@0
   238
test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
sl@0
   239
    string toupper \u0178\u00ff
sl@0
   240
} \u0178\u0178
sl@0
   241
test utf-15.3 {Tcl_UniCharToUpper, no delta} {
sl@0
   242
    string toupper !
sl@0
   243
} !
sl@0
   244
sl@0
   245
test utf-16.1 {Tcl_UniCharToLower, negative delta} {
sl@0
   246
    string tolower aA
sl@0
   247
} aa
sl@0
   248
test utf-16.2 {Tcl_UniCharToLower, positive delta} {
sl@0
   249
    string tolower \u0178\u00ff
sl@0
   250
} \u00ff\u00ff
sl@0
   251
test utf-17.1 {Tcl_UniCharToLower, no delta} {
sl@0
   252
    string tolower !
sl@0
   253
} !
sl@0
   254
sl@0
   255
test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
sl@0
   256
    string totitle \u01c4
sl@0
   257
} \u01c5
sl@0
   258
test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
sl@0
   259
    string totitle \u01c6
sl@0
   260
} \u01c5
sl@0
   261
test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
sl@0
   262
    string totitle \u017f
sl@0
   263
} \u0053
sl@0
   264
test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
sl@0
   265
    string totitle \u00ff
sl@0
   266
} \u0178
sl@0
   267
test utf-18.5 {Tcl_UniCharToTitle, no delta} {
sl@0
   268
    string totitle !
sl@0
   269
} !
sl@0
   270
sl@0
   271
test utf-19.1 {TclUniCharLen} {
sl@0
   272
    list [regexp \\d abc456def foo] $foo
sl@0
   273
} {1 4}
sl@0
   274
sl@0
   275
test utf-20.1 {TclUniCharNcmp} {
sl@0
   276
} {}
sl@0
   277
sl@0
   278
test utf-21.1 {TclUniCharIsAlnum} {
sl@0
   279
    # this returns 1 with Unicode 3 compliance
sl@0
   280
    string is alnum \u1040\u021f
sl@0
   281
} {1}
sl@0
   282
test utf-21.2 {unicode alnum char in regc_locale.c} {
sl@0
   283
    # this returns 1 with Unicode 3 compliance
sl@0
   284
    list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f]
sl@0
   285
} {1 1}
sl@0
   286
sl@0
   287
test utf-22.1 {TclUniCharIsWordChar} {
sl@0
   288
    string wordend "xyz123_bar fg" 0
sl@0
   289
} 10
sl@0
   290
test utf-22.2 {TclUniCharIsWordChar} {
sl@0
   291
    string wordend "x\u5080z123_bar\u203c fg" 0
sl@0
   292
} 10
sl@0
   293
sl@0
   294
test utf-23.1 {TclUniCharIsAlpha} {
sl@0
   295
    # this returns 1 with Unicode 3 compliance
sl@0
   296
    string is alpha \u021f
sl@0
   297
} {1}
sl@0
   298
test utf-23.2 {unicode alpha char in regc_locale.c} {
sl@0
   299
    # this returns 1 with Unicode 3 compliance
sl@0
   300
    regexp {^[[:alpha:]]+$} \u021f
sl@0
   301
} {1}
sl@0
   302
sl@0
   303
test utf-24.1 {TclUniCharIsDigit} {
sl@0
   304
    # this returns 1 with Unicode 3 compliance
sl@0
   305
    string is digit \u1040
sl@0
   306
} {1}
sl@0
   307
test utf-24.2 {unicode digit char in regc_locale.c} {
sl@0
   308
    # this returns 1 with Unicode 3 compliance
sl@0
   309
    list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040]
sl@0
   310
} {1 1}
sl@0
   311
sl@0
   312
test utf-24.3 {TclUniCharIsSpace} {
sl@0
   313
    # this returns 1 with Unicode 3 compliance
sl@0
   314
    string is space \u1680
sl@0
   315
} {1}
sl@0
   316
test utf-24.4 {unicode space char in regc_locale.c} {
sl@0
   317
    # this returns 1 with Unicode 3 compliance
sl@0
   318
    list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680]
sl@0
   319
} {1 1}
sl@0
   320
sl@0
   321
testConstraint teststringobj [llength [info commands teststringobj]]
sl@0
   322
test utf-25.1 {Tcl_UniCharNcasecmp} teststringobj {
sl@0
   323
    testobj freeallvars
sl@0
   324
    teststringobj set 1 a
sl@0
   325
    teststringobj set 2 b
sl@0
   326
    teststringobj getunicode 1
sl@0
   327
    teststringobj getunicode 2
sl@0
   328
    string compare -nocase [teststringobj get 1] [teststringobj get 2]
sl@0
   329
} -1
sl@0
   330
test utf-25.2 {Tcl_UniCharNcasecmp} teststringobj {
sl@0
   331
    testobj freeallvars
sl@0
   332
    teststringobj set 1 b
sl@0
   333
    teststringobj set 2 a
sl@0
   334
    teststringobj getunicode 1
sl@0
   335
    teststringobj getunicode 2
sl@0
   336
    string compare -nocase [teststringobj get 1] [teststringobj get 2]
sl@0
   337
} 1
sl@0
   338
test utf-25.3 {Tcl_UniCharNcasecmp} teststringobj {
sl@0
   339
    testobj freeallvars
sl@0
   340
    teststringobj set 1 B
sl@0
   341
    teststringobj set 2 a
sl@0
   342
    teststringobj getunicode 1
sl@0
   343
    teststringobj getunicode 2
sl@0
   344
    string compare -nocase [teststringobj get 1] [teststringobj get 2]
sl@0
   345
} 1
sl@0
   346
test utf-25.4 {Tcl_UniCharNcasecmp} teststringobj {
sl@0
   347
    testobj freeallvars
sl@0
   348
    teststringobj set 1 aBcB
sl@0
   349
    teststringobj set 2 abca
sl@0
   350
    teststringobj getunicode 1
sl@0
   351
    teststringobj getunicode 2
sl@0
   352
    string compare -nocase [teststringobj get 1] [teststringobj get 2]
sl@0
   353
} 1
sl@0
   354
sl@0
   355
# cleanup
sl@0
   356
::tcltest::cleanupTests
sl@0
   357
return