sl@0
|
1 |
# This file contains a collection of tests for tclUtf.c
|
sl@0
|
2 |
# Sourcing this file into Tcl runs the tests and generates output for
|
sl@0
|
3 |
# errors. No output means no errors were found.
|
sl@0
|
4 |
#
|
sl@0
|
5 |
# Copyright (c) 1997 Sun Microsystems, Inc.
|
sl@0
|
6 |
# Copyright (c) 1998-1999 by Scriptics Corporation.
|
sl@0
|
7 |
#
|
sl@0
|
8 |
# See the file "license.terms" for information on usage and redistribution
|
sl@0
|
9 |
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
|
sl@0
|
10 |
#
|
sl@0
|
11 |
# RCS: @(#) $Id: utf.test,v 1.8.14.5 2005/09/07 14:35:56 dgp Exp $
|
sl@0
|
12 |
|
sl@0
|
13 |
if {[lsearch [namespace children] ::tcltest] == -1} {
|
sl@0
|
14 |
package require tcltest 2
|
sl@0
|
15 |
namespace import -force ::tcltest::*
|
sl@0
|
16 |
}
|
sl@0
|
17 |
|
sl@0
|
18 |
catch {unset x}
|
sl@0
|
19 |
|
sl@0
|
20 |
test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
|
sl@0
|
21 |
set x \x01
|
sl@0
|
22 |
} [bytestring "\x01"]
|
sl@0
|
23 |
test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
|
sl@0
|
24 |
set x "\x00"
|
sl@0
|
25 |
} [bytestring "\xc0\x80"]
|
sl@0
|
26 |
test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
|
sl@0
|
27 |
set x "\xe0"
|
sl@0
|
28 |
} [bytestring "\xc3\xa0"]
|
sl@0
|
29 |
test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
|
sl@0
|
30 |
set x "\u4e4e"
|
sl@0
|
31 |
} [bytestring "\xe4\xb9\x8e"]
|
sl@0
|
32 |
test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
|
sl@0
|
33 |
string length [format %c -1]
|
sl@0
|
34 |
} 1
|
sl@0
|
35 |
|
sl@0
|
36 |
test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
|
sl@0
|
37 |
string length "abc"
|
sl@0
|
38 |
} {3}
|
sl@0
|
39 |
test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
|
sl@0
|
40 |
string length [bytestring "\x82\x83\x84"]
|
sl@0
|
41 |
} {3}
|
sl@0
|
42 |
test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
|
sl@0
|
43 |
string length [bytestring "\xC2"]
|
sl@0
|
44 |
} {1}
|
sl@0
|
45 |
test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
|
sl@0
|
46 |
string length [bytestring "\xC2\xa2"]
|
sl@0
|
47 |
} {1}
|
sl@0
|
48 |
test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
|
sl@0
|
49 |
string length [bytestring "\xE2"]
|
sl@0
|
50 |
} {1}
|
sl@0
|
51 |
test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
|
sl@0
|
52 |
string length [bytestring "\xE2\xA2"]
|
sl@0
|
53 |
} {2}
|
sl@0
|
54 |
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
|
sl@0
|
55 |
string length [bytestring "\xE4\xb9\x8e"]
|
sl@0
|
56 |
} {1}
|
sl@0
|
57 |
test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
|
sl@0
|
58 |
string length [bytestring "\xF4\xA2\xA2\xA2"]
|
sl@0
|
59 |
} {4}
|
sl@0
|
60 |
|
sl@0
|
61 |
test utf-3.1 {Tcl_UtfCharComplete} {
|
sl@0
|
62 |
} {}
|
sl@0
|
63 |
|
sl@0
|
64 |
testConstraint testnumutfchars [llength [info commands testnumutfchars]]
|
sl@0
|
65 |
test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
|
sl@0
|
66 |
testnumutfchars ""
|
sl@0
|
67 |
} {0}
|
sl@0
|
68 |
test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
|
sl@0
|
69 |
testnumutfchars [bytestring "\xC2\xA2"]
|
sl@0
|
70 |
} {1}
|
sl@0
|
71 |
test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
|
sl@0
|
72 |
testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
|
sl@0
|
73 |
} {7}
|
sl@0
|
74 |
test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
|
sl@0
|
75 |
testnumutfchars [bytestring "\xC0\x80"]
|
sl@0
|
76 |
} {1}
|
sl@0
|
77 |
test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
|
sl@0
|
78 |
testnumutfchars "" 1
|
sl@0
|
79 |
} {0}
|
sl@0
|
80 |
test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
|
sl@0
|
81 |
testnumutfchars [bytestring "\xC2\xA2"] 1
|
sl@0
|
82 |
} {1}
|
sl@0
|
83 |
test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
|
sl@0
|
84 |
testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1
|
sl@0
|
85 |
} {7}
|
sl@0
|
86 |
test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
|
sl@0
|
87 |
testnumutfchars [bytestring "\xC0\x80"] 1
|
sl@0
|
88 |
} {1}
|
sl@0
|
89 |
|
sl@0
|
90 |
test utf-5.1 {Tcl_UtfFindFirsts} {
|
sl@0
|
91 |
} {}
|
sl@0
|
92 |
|
sl@0
|
93 |
test utf-6.1 {Tcl_UtfNext} {
|
sl@0
|
94 |
} {}
|
sl@0
|
95 |
|
sl@0
|
96 |
test utf-7.1 {Tcl_UtfPrev} {
|
sl@0
|
97 |
} {}
|
sl@0
|
98 |
|
sl@0
|
99 |
test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
|
sl@0
|
100 |
string index abcd 0
|
sl@0
|
101 |
} {a}
|
sl@0
|
102 |
test utf-8.2 {Tcl_UniCharAtIndex: index = 0} {
|
sl@0
|
103 |
string index \u4e4e\u25a 0
|
sl@0
|
104 |
} "\u4e4e"
|
sl@0
|
105 |
test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
|
sl@0
|
106 |
string index abcd 2
|
sl@0
|
107 |
} {c}
|
sl@0
|
108 |
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
|
sl@0
|
109 |
string index \u4e4e\u25a\xff\u543 2
|
sl@0
|
110 |
} "\uff"
|
sl@0
|
111 |
|
sl@0
|
112 |
test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
|
sl@0
|
113 |
string range abcd 0 2
|
sl@0
|
114 |
} {abc}
|
sl@0
|
115 |
test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
|
sl@0
|
116 |
string range \u4e4e\u25a\xff\u543klmnop 1 5
|
sl@0
|
117 |
} "\u25a\xff\u543kl"
|
sl@0
|
118 |
|
sl@0
|
119 |
|
sl@0
|
120 |
test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
|
sl@0
|
121 |
set x \n
|
sl@0
|
122 |
} {
|
sl@0
|
123 |
}
|
sl@0
|
124 |
test utf-10.2 {Tcl_UtfBackslash: \u subst} {
|
sl@0
|
125 |
set x \ua2
|
sl@0
|
126 |
} [bytestring "\xc2\xa2"]
|
sl@0
|
127 |
test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
|
sl@0
|
128 |
set x \u4e21
|
sl@0
|
129 |
} [bytestring "\xe4\xb8\xa1"]
|
sl@0
|
130 |
test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
|
sl@0
|
131 |
set x \u4e2k
|
sl@0
|
132 |
} "[bytestring \xd3\xa2]k"
|
sl@0
|
133 |
test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
|
sl@0
|
134 |
set x \u4e216
|
sl@0
|
135 |
} "[bytestring \xe4\xb8\xa1]6"
|
sl@0
|
136 |
proc bsCheck {char num} {
|
sl@0
|
137 |
global errNum
|
sl@0
|
138 |
test utf-10.$errNum {backslash substitution} {
|
sl@0
|
139 |
scan $char %c value
|
sl@0
|
140 |
set value
|
sl@0
|
141 |
} $num
|
sl@0
|
142 |
incr errNum
|
sl@0
|
143 |
}
|
sl@0
|
144 |
set errNum 6
|
sl@0
|
145 |
bsCheck \b 8
|
sl@0
|
146 |
bsCheck \e 101
|
sl@0
|
147 |
bsCheck \f 12
|
sl@0
|
148 |
bsCheck \n 10
|
sl@0
|
149 |
bsCheck \r 13
|
sl@0
|
150 |
bsCheck \t 9
|
sl@0
|
151 |
bsCheck \v 11
|
sl@0
|
152 |
bsCheck \{ 123
|
sl@0
|
153 |
bsCheck \} 125
|
sl@0
|
154 |
bsCheck \[ 91
|
sl@0
|
155 |
bsCheck \] 93
|
sl@0
|
156 |
bsCheck \$ 36
|
sl@0
|
157 |
bsCheck \ 32
|
sl@0
|
158 |
bsCheck \; 59
|
sl@0
|
159 |
bsCheck \\ 92
|
sl@0
|
160 |
bsCheck \Ca 67
|
sl@0
|
161 |
bsCheck \Ma 77
|
sl@0
|
162 |
bsCheck \CMa 67
|
sl@0
|
163 |
# prior to 8.3, this returned 8, as \8 as accepted as an
|
sl@0
|
164 |
# octal value - but it isn't! [Bug: 3975]
|
sl@0
|
165 |
bsCheck \8a 56
|
sl@0
|
166 |
bsCheck \14 12
|
sl@0
|
167 |
bsCheck \141 97
|
sl@0
|
168 |
bsCheck b\0 98
|
sl@0
|
169 |
bsCheck \x 120
|
sl@0
|
170 |
bsCheck \xa 10
|
sl@0
|
171 |
bsCheck \xA 10
|
sl@0
|
172 |
bsCheck \x41 65
|
sl@0
|
173 |
bsCheck \x541 65
|
sl@0
|
174 |
bsCheck \u 117
|
sl@0
|
175 |
bsCheck \uk 117
|
sl@0
|
176 |
bsCheck \u41 65
|
sl@0
|
177 |
bsCheck \ua 10
|
sl@0
|
178 |
bsCheck \uA 10
|
sl@0
|
179 |
bsCheck \340 224
|
sl@0
|
180 |
bsCheck \ua1 161
|
sl@0
|
181 |
bsCheck \u4e21 20001
|
sl@0
|
182 |
|
sl@0
|
183 |
test utf-11.1 {Tcl_UtfToUpper} {
|
sl@0
|
184 |
string toupper {}
|
sl@0
|
185 |
} {}
|
sl@0
|
186 |
test utf-11.2 {Tcl_UtfToUpper} {
|
sl@0
|
187 |
string toupper abc
|
sl@0
|
188 |
} ABC
|
sl@0
|
189 |
test utf-11.3 {Tcl_UtfToUpper} {
|
sl@0
|
190 |
string toupper \u00e3ab
|
sl@0
|
191 |
} \u00c3AB
|
sl@0
|
192 |
test utf-11.4 {Tcl_UtfToUpper} {
|
sl@0
|
193 |
string toupper \u01e3ab
|
sl@0
|
194 |
} \u01e2AB
|
sl@0
|
195 |
|
sl@0
|
196 |
test utf-12.1 {Tcl_UtfToLower} {
|
sl@0
|
197 |
string tolower {}
|
sl@0
|
198 |
} {}
|
sl@0
|
199 |
test utf-12.2 {Tcl_UtfToLower} {
|
sl@0
|
200 |
string tolower ABC
|
sl@0
|
201 |
} abc
|
sl@0
|
202 |
test utf-12.3 {Tcl_UtfToLower} {
|
sl@0
|
203 |
string tolower \u00c3AB
|
sl@0
|
204 |
} \u00e3ab
|
sl@0
|
205 |
test utf-12.4 {Tcl_UtfToLower} {
|
sl@0
|
206 |
string tolower \u01e2AB
|
sl@0
|
207 |
} \u01e3ab
|
sl@0
|
208 |
|
sl@0
|
209 |
test utf-13.1 {Tcl_UtfToTitle} {
|
sl@0
|
210 |
string totitle {}
|
sl@0
|
211 |
} {}
|
sl@0
|
212 |
test utf-13.2 {Tcl_UtfToTitle} {
|
sl@0
|
213 |
string totitle abc
|
sl@0
|
214 |
} Abc
|
sl@0
|
215 |
test utf-13.3 {Tcl_UtfToTitle} {
|
sl@0
|
216 |
string totitle \u00e3ab
|
sl@0
|
217 |
} \u00c3ab
|
sl@0
|
218 |
test utf-13.4 {Tcl_UtfToTitle} {
|
sl@0
|
219 |
string totitle \u01f3ab
|
sl@0
|
220 |
} \u01f2ab
|
sl@0
|
221 |
|
sl@0
|
222 |
test utf-14.1 {Tcl_UtfNcasecmp} {
|
sl@0
|
223 |
string compare -nocase a b
|
sl@0
|
224 |
} -1
|
sl@0
|
225 |
test utf-14.2 {Tcl_UtfNcasecmp} {
|
sl@0
|
226 |
string compare -nocase b a
|
sl@0
|
227 |
} 1
|
sl@0
|
228 |
test utf-14.3 {Tcl_UtfNcasecmp} {
|
sl@0
|
229 |
string compare -nocase B a
|
sl@0
|
230 |
} 1
|
sl@0
|
231 |
test utf-14.4 {Tcl_UtfNcasecmp} {
|
sl@0
|
232 |
string compare -nocase aBcB abca
|
sl@0
|
233 |
} 1
|
sl@0
|
234 |
|
sl@0
|
235 |
test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
|
sl@0
|
236 |
string toupper aA
|
sl@0
|
237 |
} AA
|
sl@0
|
238 |
test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
|
sl@0
|
239 |
string toupper \u0178\u00ff
|
sl@0
|
240 |
} \u0178\u0178
|
sl@0
|
241 |
test utf-15.3 {Tcl_UniCharToUpper, no delta} {
|
sl@0
|
242 |
string toupper !
|
sl@0
|
243 |
} !
|
sl@0
|
244 |
|
sl@0
|
245 |
test utf-16.1 {Tcl_UniCharToLower, negative delta} {
|
sl@0
|
246 |
string tolower aA
|
sl@0
|
247 |
} aa
|
sl@0
|
248 |
test utf-16.2 {Tcl_UniCharToLower, positive delta} {
|
sl@0
|
249 |
string tolower \u0178\u00ff
|
sl@0
|
250 |
} \u00ff\u00ff
|
sl@0
|
251 |
test utf-17.1 {Tcl_UniCharToLower, no delta} {
|
sl@0
|
252 |
string tolower !
|
sl@0
|
253 |
} !
|
sl@0
|
254 |
|
sl@0
|
255 |
test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
|
sl@0
|
256 |
string totitle \u01c4
|
sl@0
|
257 |
} \u01c5
|
sl@0
|
258 |
test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
|
sl@0
|
259 |
string totitle \u01c6
|
sl@0
|
260 |
} \u01c5
|
sl@0
|
261 |
test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
|
sl@0
|
262 |
string totitle \u017f
|
sl@0
|
263 |
} \u0053
|
sl@0
|
264 |
test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
|
sl@0
|
265 |
string totitle \u00ff
|
sl@0
|
266 |
} \u0178
|
sl@0
|
267 |
test utf-18.5 {Tcl_UniCharToTitle, no delta} {
|
sl@0
|
268 |
string totitle !
|
sl@0
|
269 |
} !
|
sl@0
|
270 |
|
sl@0
|
271 |
test utf-19.1 {TclUniCharLen} {
|
sl@0
|
272 |
list [regexp \\d abc456def foo] $foo
|
sl@0
|
273 |
} {1 4}
|
sl@0
|
274 |
|
sl@0
|
275 |
test utf-20.1 {TclUniCharNcmp} {
|
sl@0
|
276 |
} {}
|
sl@0
|
277 |
|
sl@0
|
278 |
test utf-21.1 {TclUniCharIsAlnum} {
|
sl@0
|
279 |
# this returns 1 with Unicode 3 compliance
|
sl@0
|
280 |
string is alnum \u1040\u021f
|
sl@0
|
281 |
} {1}
|
sl@0
|
282 |
test utf-21.2 {unicode alnum char in regc_locale.c} {
|
sl@0
|
283 |
# this returns 1 with Unicode 3 compliance
|
sl@0
|
284 |
list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f]
|
sl@0
|
285 |
} {1 1}
|
sl@0
|
286 |
|
sl@0
|
287 |
test utf-22.1 {TclUniCharIsWordChar} {
|
sl@0
|
288 |
string wordend "xyz123_bar fg" 0
|
sl@0
|
289 |
} 10
|
sl@0
|
290 |
test utf-22.2 {TclUniCharIsWordChar} {
|
sl@0
|
291 |
string wordend "x\u5080z123_bar\u203c fg" 0
|
sl@0
|
292 |
} 10
|
sl@0
|
293 |
|
sl@0
|
294 |
test utf-23.1 {TclUniCharIsAlpha} {
|
sl@0
|
295 |
# this returns 1 with Unicode 3 compliance
|
sl@0
|
296 |
string is alpha \u021f
|
sl@0
|
297 |
} {1}
|
sl@0
|
298 |
test utf-23.2 {unicode alpha char in regc_locale.c} {
|
sl@0
|
299 |
# this returns 1 with Unicode 3 compliance
|
sl@0
|
300 |
regexp {^[[:alpha:]]+$} \u021f
|
sl@0
|
301 |
} {1}
|
sl@0
|
302 |
|
sl@0
|
303 |
test utf-24.1 {TclUniCharIsDigit} {
|
sl@0
|
304 |
# this returns 1 with Unicode 3 compliance
|
sl@0
|
305 |
string is digit \u1040
|
sl@0
|
306 |
} {1}
|
sl@0
|
307 |
test utf-24.2 {unicode digit char in regc_locale.c} {
|
sl@0
|
308 |
# this returns 1 with Unicode 3 compliance
|
sl@0
|
309 |
list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040]
|
sl@0
|
310 |
} {1 1}
|
sl@0
|
311 |
|
sl@0
|
312 |
test utf-24.3 {TclUniCharIsSpace} {
|
sl@0
|
313 |
# this returns 1 with Unicode 3 compliance
|
sl@0
|
314 |
string is space \u1680
|
sl@0
|
315 |
} {1}
|
sl@0
|
316 |
test utf-24.4 {unicode space char in regc_locale.c} {
|
sl@0
|
317 |
# this returns 1 with Unicode 3 compliance
|
sl@0
|
318 |
list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680]
|
sl@0
|
319 |
} {1 1}
|
sl@0
|
320 |
|
sl@0
|
321 |
testConstraint teststringobj [llength [info commands teststringobj]]
|
sl@0
|
322 |
test utf-25.1 {Tcl_UniCharNcasecmp} teststringobj {
|
sl@0
|
323 |
testobj freeallvars
|
sl@0
|
324 |
teststringobj set 1 a
|
sl@0
|
325 |
teststringobj set 2 b
|
sl@0
|
326 |
teststringobj getunicode 1
|
sl@0
|
327 |
teststringobj getunicode 2
|
sl@0
|
328 |
string compare -nocase [teststringobj get 1] [teststringobj get 2]
|
sl@0
|
329 |
} -1
|
sl@0
|
330 |
test utf-25.2 {Tcl_UniCharNcasecmp} teststringobj {
|
sl@0
|
331 |
testobj freeallvars
|
sl@0
|
332 |
teststringobj set 1 b
|
sl@0
|
333 |
teststringobj set 2 a
|
sl@0
|
334 |
teststringobj getunicode 1
|
sl@0
|
335 |
teststringobj getunicode 2
|
sl@0
|
336 |
string compare -nocase [teststringobj get 1] [teststringobj get 2]
|
sl@0
|
337 |
} 1
|
sl@0
|
338 |
test utf-25.3 {Tcl_UniCharNcasecmp} teststringobj {
|
sl@0
|
339 |
testobj freeallvars
|
sl@0
|
340 |
teststringobj set 1 B
|
sl@0
|
341 |
teststringobj set 2 a
|
sl@0
|
342 |
teststringobj getunicode 1
|
sl@0
|
343 |
teststringobj getunicode 2
|
sl@0
|
344 |
string compare -nocase [teststringobj get 1] [teststringobj get 2]
|
sl@0
|
345 |
} 1
|
sl@0
|
346 |
test utf-25.4 {Tcl_UniCharNcasecmp} teststringobj {
|
sl@0
|
347 |
testobj freeallvars
|
sl@0
|
348 |
teststringobj set 1 aBcB
|
sl@0
|
349 |
teststringobj set 2 abca
|
sl@0
|
350 |
teststringobj getunicode 1
|
sl@0
|
351 |
teststringobj getunicode 2
|
sl@0
|
352 |
string compare -nocase [teststringobj get 1] [teststringobj get 2]
|
sl@0
|
353 |
} 1
|
sl@0
|
354 |
|
sl@0
|
355 |
# cleanup
|
sl@0
|
356 |
::tcltest::cleanupTests
|
sl@0
|
357 |
return
|