sl@0: # 2002 May 24 sl@0: # sl@0: # The author disclaims copyright to this source code. In place of sl@0: # a legal notice, here is a blessing: sl@0: # sl@0: # May you do good and not evil. sl@0: # May you find forgiveness for yourself and forgive others. sl@0: # May you share freely, never taking more than you give. sl@0: # sl@0: #*********************************************************************** sl@0: # This file implements regression tests for SQLite library. The focus of sl@0: # this file is testing the SQLite routines used for converting between the sl@0: # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and sl@0: # UTF-16be). sl@0: # sl@0: # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $ sl@0: sl@0: set testdir [file dirname $argv0] sl@0: source $testdir/tester.tcl sl@0: sl@0: # Skip this test if the build does not support multiple encodings. sl@0: # sl@0: ifcapable {!utf16} { sl@0: finish_test sl@0: return sl@0: } sl@0: sl@0: proc do_bincmp_test {testname got expect} { sl@0: binary scan $expect \c* expectvals sl@0: binary scan $got \c* gotvals sl@0: do_test $testname [list set dummy $gotvals] $expectvals sl@0: } sl@0: sl@0: # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around sl@0: # to change the byte-order of the string. sl@0: proc swap_byte_order {utf16} { sl@0: binary scan $utf16 \c* ints sl@0: sl@0: foreach {a b} $ints { sl@0: lappend ints2 $b sl@0: lappend ints2 $a sl@0: } sl@0: sl@0: return [binary format \c* $ints2] sl@0: } sl@0: sl@0: # sl@0: # Test that the SQLite routines for converting between UTF encodings sl@0: # produce the same results as their TCL counterparts. sl@0: # sl@0: # $testname is the prefix to be used for the test names. sl@0: # $str is a string to use for testing (encoded in UTF-8, as normal for TCL). sl@0: # sl@0: # The test procedure is: sl@0: # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and sl@0: # SQLite routines produce the same results. sl@0: # sl@0: # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and sl@0: # SQLite routines produce the same results. sl@0: # sl@0: # 3. Use the SQLite routines to convert the native machine order UTF-16 sl@0: # representation back to the original UTF-8. Check that the result sl@0: # matches the original representation. sl@0: # sl@0: # 4. Add a byte-order mark to each of the UTF-16 representations and sl@0: # check that the SQLite routines can convert them back to UTF-8. For sl@0: # byte-order mark info, refer to section 3.10 of the unicode standard. sl@0: # sl@0: # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure sl@0: # that SQLite can convert them both to native byte order UTF-16 sl@0: # strings, sans BOM. sl@0: # sl@0: # Coverage: sl@0: # sl@0: # sqlite_utf8to16be (step 2) sl@0: # sqlite_utf8to16le (step 1) sl@0: # sqlite_utf16to8 (steps 3, 4) sl@0: # sqlite_utf16to16le (step 5) sl@0: # sqlite_utf16to16be (step 5) sl@0: # sl@0: proc test_conversion {testname str} { sl@0: sl@0: # Step 1. sl@0: set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE] sl@0: set utf16le_tcl [encoding convertto unicode $str] sl@0: append utf16le_tcl "\x00\x00" sl@0: if { $::tcl_platform(byteOrder)!="littleEndian" } { sl@0: set utf16le_tcl [swap_byte_order $utf16le_tcl] sl@0: } sl@0: do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl sl@0: set utf16le $utf16le_tcl sl@0: sl@0: # Step 2. sl@0: set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE] sl@0: set utf16be_tcl [encoding convertto unicode $str] sl@0: append utf16be_tcl "\x00\x00" sl@0: if { $::tcl_platform(byteOrder)=="littleEndian" } { sl@0: set utf16be_tcl [swap_byte_order $utf16be_tcl] sl@0: } sl@0: do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl sl@0: set utf16be $utf16be_tcl sl@0: sl@0: # Step 3. sl@0: if { $::tcl_platform(byteOrder)=="littleEndian" } { sl@0: set utf16 $utf16le sl@0: } else { sl@0: set utf16 $utf16be sl@0: } sl@0: set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8] sl@0: do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str] sl@0: sl@0: # Step 4 (little endian). sl@0: append utf16le_bom "\xFF\xFE" $utf16le sl@0: set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1] sl@0: do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str] sl@0: sl@0: # Step 4 (big endian). sl@0: append utf16be_bom "\xFE\xFF" $utf16be sl@0: set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8] sl@0: do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str] sl@0: sl@0: # Step 5 (little endian to little endian). sl@0: set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE] sl@0: do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le sl@0: sl@0: # Step 5 (big endian to big endian). sl@0: set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE] sl@0: do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be sl@0: sl@0: # Step 5 (big endian to little endian). sl@0: set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE] sl@0: do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le sl@0: sl@0: # Step 5 (little endian to big endian). sl@0: set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE] sl@0: do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be sl@0: } sl@0: sl@0: translate_selftest sl@0: sl@0: test_conversion enc-1 "hello world" sl@0: test_conversion enc-2 "sqlite" sl@0: test_conversion enc-3 "" sl@0: test_conversion enc-X "\u0100" sl@0: test_conversion enc-4 "\u1234" sl@0: test_conversion enc-5 "\u4321abc" sl@0: test_conversion enc-6 "\u4321\u1234" sl@0: test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100] sl@0: test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100] sl@0: test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100] sl@0: test_conversion enc-10 [string repeat "\uE000" 100] sl@0: sl@0: proc test_collate {enc zLeft zRight} { sl@0: return [string compare $zLeft $zRight] sl@0: } sl@0: add_test_collate $::DB 0 0 1 sl@0: do_test enc-11.1 { sl@0: execsql { sl@0: CREATE TABLE ab(a COLLATE test_collate, b); sl@0: INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800'); sl@0: INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800'); sl@0: CREATE INDEX ab_i ON ab(a, b); sl@0: } sl@0: } {} sl@0: do_test enc-11.2 { sl@0: set cp200 "\u00C8" sl@0: execsql { sl@0: SELECT count(*) FROM ab WHERE a = $::cp200; sl@0: } sl@0: } {2} sl@0: sl@0: finish_test