sl@0
|
1 |
# 2002 May 24
|
sl@0
|
2 |
#
|
sl@0
|
3 |
# The author disclaims copyright to this source code. In place of
|
sl@0
|
4 |
# a legal notice, here is a blessing:
|
sl@0
|
5 |
#
|
sl@0
|
6 |
# May you do good and not evil.
|
sl@0
|
7 |
# May you find forgiveness for yourself and forgive others.
|
sl@0
|
8 |
# May you share freely, never taking more than you give.
|
sl@0
|
9 |
#
|
sl@0
|
10 |
#***********************************************************************
|
sl@0
|
11 |
# This file implements regression tests for SQLite library. The focus of
|
sl@0
|
12 |
# this file is testing the SQLite routines used for converting between the
|
sl@0
|
13 |
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
|
sl@0
|
14 |
# UTF-16be).
|
sl@0
|
15 |
#
|
sl@0
|
16 |
# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
|
sl@0
|
17 |
|
sl@0
|
18 |
set testdir [file dirname $argv0]
|
sl@0
|
19 |
source $testdir/tester.tcl
|
sl@0
|
20 |
|
sl@0
|
21 |
# Skip this test if the build does not support multiple encodings.
|
sl@0
|
22 |
#
|
sl@0
|
23 |
ifcapable {!utf16} {
|
sl@0
|
24 |
finish_test
|
sl@0
|
25 |
return
|
sl@0
|
26 |
}
|
sl@0
|
27 |
|
sl@0
|
28 |
proc do_bincmp_test {testname got expect} {
|
sl@0
|
29 |
binary scan $expect \c* expectvals
|
sl@0
|
30 |
binary scan $got \c* gotvals
|
sl@0
|
31 |
do_test $testname [list set dummy $gotvals] $expectvals
|
sl@0
|
32 |
}
|
sl@0
|
33 |
|
sl@0
|
34 |
# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
|
sl@0
|
35 |
# to change the byte-order of the string.
|
sl@0
|
36 |
proc swap_byte_order {utf16} {
|
sl@0
|
37 |
binary scan $utf16 \c* ints
|
sl@0
|
38 |
|
sl@0
|
39 |
foreach {a b} $ints {
|
sl@0
|
40 |
lappend ints2 $b
|
sl@0
|
41 |
lappend ints2 $a
|
sl@0
|
42 |
}
|
sl@0
|
43 |
|
sl@0
|
44 |
return [binary format \c* $ints2]
|
sl@0
|
45 |
}
|
sl@0
|
46 |
|
sl@0
|
47 |
#
|
sl@0
|
48 |
# Test that the SQLite routines for converting between UTF encodings
|
sl@0
|
49 |
# produce the same results as their TCL counterparts.
|
sl@0
|
50 |
#
|
sl@0
|
51 |
# $testname is the prefix to be used for the test names.
|
sl@0
|
52 |
# $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
|
sl@0
|
53 |
#
|
sl@0
|
54 |
# The test procedure is:
|
sl@0
|
55 |
# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
|
sl@0
|
56 |
# SQLite routines produce the same results.
|
sl@0
|
57 |
#
|
sl@0
|
58 |
# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
|
sl@0
|
59 |
# SQLite routines produce the same results.
|
sl@0
|
60 |
#
|
sl@0
|
61 |
# 3. Use the SQLite routines to convert the native machine order UTF-16
|
sl@0
|
62 |
# representation back to the original UTF-8. Check that the result
|
sl@0
|
63 |
# matches the original representation.
|
sl@0
|
64 |
#
|
sl@0
|
65 |
# 4. Add a byte-order mark to each of the UTF-16 representations and
|
sl@0
|
66 |
# check that the SQLite routines can convert them back to UTF-8. For
|
sl@0
|
67 |
# byte-order mark info, refer to section 3.10 of the unicode standard.
|
sl@0
|
68 |
#
|
sl@0
|
69 |
# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
|
sl@0
|
70 |
# that SQLite can convert them both to native byte order UTF-16
|
sl@0
|
71 |
# strings, sans BOM.
|
sl@0
|
72 |
#
|
sl@0
|
73 |
# Coverage:
|
sl@0
|
74 |
#
|
sl@0
|
75 |
# sqlite_utf8to16be (step 2)
|
sl@0
|
76 |
# sqlite_utf8to16le (step 1)
|
sl@0
|
77 |
# sqlite_utf16to8 (steps 3, 4)
|
sl@0
|
78 |
# sqlite_utf16to16le (step 5)
|
sl@0
|
79 |
# sqlite_utf16to16be (step 5)
|
sl@0
|
80 |
#
|
sl@0
|
81 |
proc test_conversion {testname str} {
|
sl@0
|
82 |
|
sl@0
|
83 |
# Step 1.
|
sl@0
|
84 |
set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
|
sl@0
|
85 |
set utf16le_tcl [encoding convertto unicode $str]
|
sl@0
|
86 |
append utf16le_tcl "\x00\x00"
|
sl@0
|
87 |
if { $::tcl_platform(byteOrder)!="littleEndian" } {
|
sl@0
|
88 |
set utf16le_tcl [swap_byte_order $utf16le_tcl]
|
sl@0
|
89 |
}
|
sl@0
|
90 |
do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
|
sl@0
|
91 |
set utf16le $utf16le_tcl
|
sl@0
|
92 |
|
sl@0
|
93 |
# Step 2.
|
sl@0
|
94 |
set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
|
sl@0
|
95 |
set utf16be_tcl [encoding convertto unicode $str]
|
sl@0
|
96 |
append utf16be_tcl "\x00\x00"
|
sl@0
|
97 |
if { $::tcl_platform(byteOrder)=="littleEndian" } {
|
sl@0
|
98 |
set utf16be_tcl [swap_byte_order $utf16be_tcl]
|
sl@0
|
99 |
}
|
sl@0
|
100 |
do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
|
sl@0
|
101 |
set utf16be $utf16be_tcl
|
sl@0
|
102 |
|
sl@0
|
103 |
# Step 3.
|
sl@0
|
104 |
if { $::tcl_platform(byteOrder)=="littleEndian" } {
|
sl@0
|
105 |
set utf16 $utf16le
|
sl@0
|
106 |
} else {
|
sl@0
|
107 |
set utf16 $utf16be
|
sl@0
|
108 |
}
|
sl@0
|
109 |
set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
|
sl@0
|
110 |
do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
|
sl@0
|
111 |
|
sl@0
|
112 |
# Step 4 (little endian).
|
sl@0
|
113 |
append utf16le_bom "\xFF\xFE" $utf16le
|
sl@0
|
114 |
set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
|
sl@0
|
115 |
do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
|
sl@0
|
116 |
|
sl@0
|
117 |
# Step 4 (big endian).
|
sl@0
|
118 |
append utf16be_bom "\xFE\xFF" $utf16be
|
sl@0
|
119 |
set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
|
sl@0
|
120 |
do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
|
sl@0
|
121 |
|
sl@0
|
122 |
# Step 5 (little endian to little endian).
|
sl@0
|
123 |
set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
|
sl@0
|
124 |
do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
|
sl@0
|
125 |
|
sl@0
|
126 |
# Step 5 (big endian to big endian).
|
sl@0
|
127 |
set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
|
sl@0
|
128 |
do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
|
sl@0
|
129 |
|
sl@0
|
130 |
# Step 5 (big endian to little endian).
|
sl@0
|
131 |
set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
|
sl@0
|
132 |
do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
|
sl@0
|
133 |
|
sl@0
|
134 |
# Step 5 (little endian to big endian).
|
sl@0
|
135 |
set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
|
sl@0
|
136 |
do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
|
sl@0
|
137 |
}
|
sl@0
|
138 |
|
sl@0
|
139 |
translate_selftest
|
sl@0
|
140 |
|
sl@0
|
141 |
test_conversion enc-1 "hello world"
|
sl@0
|
142 |
test_conversion enc-2 "sqlite"
|
sl@0
|
143 |
test_conversion enc-3 ""
|
sl@0
|
144 |
test_conversion enc-X "\u0100"
|
sl@0
|
145 |
test_conversion enc-4 "\u1234"
|
sl@0
|
146 |
test_conversion enc-5 "\u4321abc"
|
sl@0
|
147 |
test_conversion enc-6 "\u4321\u1234"
|
sl@0
|
148 |
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
|
sl@0
|
149 |
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
|
sl@0
|
150 |
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
|
sl@0
|
151 |
test_conversion enc-10 [string repeat "\uE000" 100]
|
sl@0
|
152 |
|
sl@0
|
153 |
proc test_collate {enc zLeft zRight} {
|
sl@0
|
154 |
return [string compare $zLeft $zRight]
|
sl@0
|
155 |
}
|
sl@0
|
156 |
add_test_collate $::DB 0 0 1
|
sl@0
|
157 |
do_test enc-11.1 {
|
sl@0
|
158 |
execsql {
|
sl@0
|
159 |
CREATE TABLE ab(a COLLATE test_collate, b);
|
sl@0
|
160 |
INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
|
sl@0
|
161 |
INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
|
sl@0
|
162 |
CREATE INDEX ab_i ON ab(a, b);
|
sl@0
|
163 |
}
|
sl@0
|
164 |
} {}
|
sl@0
|
165 |
do_test enc-11.2 {
|
sl@0
|
166 |
set cp200 "\u00C8"
|
sl@0
|
167 |
execsql {
|
sl@0
|
168 |
SELECT count(*) FROM ab WHERE a = $::cp200;
|
sl@0
|
169 |
}
|
sl@0
|
170 |
} {2}
|
sl@0
|
171 |
|
sl@0
|
172 |
finish_test
|