sl@0: # 2002 May 24 sl@0: # sl@0: # The author disclaims copyright to this source code. In place of sl@0: # a legal notice, here is a blessing: sl@0: # sl@0: # May you do good and not evil. sl@0: # May you find forgiveness for yourself and forgive others. sl@0: # May you share freely, never taking more than you give. sl@0: # sl@0: #*********************************************************************** sl@0: # This file implements regression tests for SQLite library. The focus of sl@0: # this file is testing the SQLite routines used for converting between the sl@0: # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and sl@0: # UTF-16be). sl@0: # sl@0: # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $ sl@0: sl@0: set testdir [file dirname $argv0] sl@0: source $testdir/tester.tcl sl@0: sl@0: # If UTF16 support is disabled, ignore the tests in this file sl@0: # sl@0: ifcapable {!utf16} { sl@0: finish_test sl@0: return sl@0: } sl@0: sl@0: # The rough organisation of tests in this file is: sl@0: # sl@0: # enc2.1.*: Simple tests with a UTF-8 db. sl@0: # enc2.2.*: Simple tests with a UTF-16LE db. sl@0: # enc2.3.*: Simple tests with a UTF-16BE db. sl@0: # enc2.4.*: Test that attached databases must have the same text encoding sl@0: # as the main database. sl@0: # enc2.5.*: Test the behaviour of the library when a collation sequence is sl@0: # not available for the most desirable text encoding. sl@0: # enc2.6.*: Similar test for user functions. sl@0: # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the sl@0: # wrong text encoding for the database. sl@0: # enc2.8.*: Test sqlite3_complete16() sl@0: # sl@0: sl@0: db close sl@0: sl@0: # Return the UTF-8 representation of the supplied UTF-16 string $str. sl@0: proc utf8 {str} { sl@0: # If $str ends in two 0x00 0x00 bytes, knock these off before sl@0: # converting to UTF-8 using TCL. sl@0: binary scan $str \c* vals sl@0: if {[lindex $vals end]==0 && [lindex $vals end-1]==0} { sl@0: set str [binary format \c* [lrange $vals 0 end-2]] sl@0: } sl@0: sl@0: set r [encoding convertfrom unicode $str] sl@0: return $r sl@0: } sl@0: sl@0: # sl@0: # This proc contains all the tests in this file. It is run sl@0: # three times. Each time the file 'test.db' contains a database sl@0: # with the following contents: sl@0: set dbcontents { sl@0: CREATE TABLE t1(a PRIMARY KEY, b, c); sl@0: INSERT INTO t1 VALUES('one', 'I', 1); sl@0: } sl@0: # This proc tests that we can open and manipulate the test.db sl@0: # database, and that it is possible to retreive values in sl@0: # various text encodings. sl@0: # sl@0: proc run_test_script {t enc} { sl@0: sl@0: # Open the database and pull out a (the) row. sl@0: do_test $t.1 { sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: execsql {SELECT * FROM t1} sl@0: } {one I 1} sl@0: sl@0: # Insert some data sl@0: do_test $t.2 { sl@0: execsql {INSERT INTO t1 VALUES('two', 'II', 2);} sl@0: execsql {SELECT * FROM t1} sl@0: } {one I 1 two II 2} sl@0: sl@0: # Insert some data sl@0: do_test $t.3 { sl@0: execsql { sl@0: INSERT INTO t1 VALUES('three','III',3); sl@0: INSERT INTO t1 VALUES('four','IV',4); sl@0: INSERT INTO t1 VALUES('five','V',5); sl@0: } sl@0: execsql {SELECT * FROM t1} sl@0: } {one I 1 two II 2 three III 3 four IV 4 five V 5} sl@0: sl@0: # Use the index sl@0: do_test $t.4 { sl@0: execsql { sl@0: SELECT * FROM t1 WHERE a = 'one'; sl@0: } sl@0: } {one I 1} sl@0: do_test $t.5 { sl@0: execsql { sl@0: SELECT * FROM t1 WHERE a = 'four'; sl@0: } sl@0: } {four IV 4} sl@0: ifcapable subquery { sl@0: do_test $t.6 { sl@0: execsql { sl@0: SELECT * FROM t1 WHERE a IN ('one', 'two'); sl@0: } sl@0: } {one I 1 two II 2} sl@0: } sl@0: sl@0: # Now check that we can retrieve data in both UTF-16 and UTF-8 sl@0: do_test $t.7 { sl@0: set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL] sl@0: sqlite3_step $STMT sl@0: sqlite3_column_text $STMT 0 sl@0: } {four} sl@0: sl@0: do_test $t.8 { sl@0: sqlite3_step $STMT sl@0: utf8 [sqlite3_column_text16 $STMT 0] sl@0: } {five} sl@0: sl@0: do_test $t.9 { sl@0: sqlite3_finalize $STMT sl@0: } SQLITE_OK sl@0: sl@0: ifcapable vacuum { sl@0: execsql VACUUM sl@0: } sl@0: sl@0: do_test $t.10 { sl@0: db eval {PRAGMA encoding} sl@0: } $enc sl@0: sl@0: } sl@0: sl@0: # The three unicode encodings understood by SQLite. sl@0: set encodings [list UTF-8 UTF-16le UTF-16be] sl@0: sl@0: set sqlite_os_trace 0 sl@0: set i 1 sl@0: foreach enc $encodings { sl@0: file delete -force test.db sl@0: sqlite3 db test.db sl@0: db eval "PRAGMA encoding = \"$enc\"" sl@0: execsql $dbcontents sl@0: do_test enc2-$i.0.1 { sl@0: db eval {PRAGMA encoding} sl@0: } $enc sl@0: do_test enc2-$i.0.2 { sl@0: db eval {PRAGMA encoding=UTF8} sl@0: db eval {PRAGMA encoding} sl@0: } $enc sl@0: do_test enc2-$i.0.3 { sl@0: db eval {PRAGMA encoding=UTF16le} sl@0: db eval {PRAGMA encoding} sl@0: } $enc sl@0: do_test enc2-$i.0.4 { sl@0: db eval {PRAGMA encoding=UTF16be} sl@0: db eval {PRAGMA encoding} sl@0: } $enc sl@0: sl@0: db close sl@0: run_test_script enc2-$i $enc sl@0: db close sl@0: incr i sl@0: } sl@0: sl@0: # Test that it is an error to try to attach a database with a different sl@0: # encoding to the main database. sl@0: ifcapable attach { sl@0: do_test enc2-4.1 { sl@0: file delete -force test.db sl@0: sqlite3 db test.db sl@0: db eval "PRAGMA encoding = 'UTF-8'" sl@0: db eval "CREATE TABLE abc(a, b, c);" sl@0: } {} sl@0: do_test enc2-4.2 { sl@0: file delete -force test2.db sl@0: sqlite3 db2 test2.db sl@0: db2 eval "PRAGMA encoding = 'UTF-16'" sl@0: db2 eval "CREATE TABLE abc(a, b, c);" sl@0: } {} sl@0: do_test enc2-4.3 { sl@0: catchsql { sl@0: ATTACH 'test2.db' as aux; sl@0: } sl@0: } {1 {attached databases must use the same text encoding as main database}} sl@0: db2 close sl@0: db close sl@0: } sl@0: sl@0: # The following tests - enc2-5.* - test that SQLite selects the correct sl@0: # collation sequence when more than one is available. sl@0: sl@0: set ::values [list one two three four five] sl@0: set ::test_collate_enc INVALID sl@0: proc test_collate {enc lhs rhs} { sl@0: set ::test_collate_enc $enc sl@0: set l [lsearch -exact $::values $lhs] sl@0: set r [lsearch -exact $::values $rhs] sl@0: set res [expr $l - $r] sl@0: # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res" sl@0: return $res sl@0: } sl@0: sl@0: file delete -force test.db sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: do_test enc2-5.0 { sl@0: execsql { sl@0: CREATE TABLE t5(a); sl@0: INSERT INTO t5 VALUES('one'); sl@0: INSERT INTO t5 VALUES('two'); sl@0: INSERT INTO t5 VALUES('five'); sl@0: INSERT INTO t5 VALUES('three'); sl@0: INSERT INTO t5 VALUES('four'); sl@0: } sl@0: } {} sl@0: do_test enc2-5.1 { sl@0: add_test_collate $DB 1 1 1 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-8} sl@0: do_test enc2-5.2 { sl@0: add_test_collate $DB 0 1 0 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-16LE} sl@0: do_test enc2-5.3 { sl@0: add_test_collate $DB 0 0 1 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-16BE} sl@0: sl@0: db close sl@0: file delete -force test.db sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: execsql {pragma encoding = 'UTF-16LE'} sl@0: do_test enc2-5.4 { sl@0: execsql { sl@0: CREATE TABLE t5(a); sl@0: INSERT INTO t5 VALUES('one'); sl@0: INSERT INTO t5 VALUES('two'); sl@0: INSERT INTO t5 VALUES('five'); sl@0: INSERT INTO t5 VALUES('three'); sl@0: INSERT INTO t5 VALUES('four'); sl@0: } sl@0: } {} sl@0: do_test enc2-5.5 { sl@0: add_test_collate $DB 1 1 1 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-16LE} sl@0: do_test enc2-5.6 { sl@0: add_test_collate $DB 1 0 1 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-16BE} sl@0: do_test enc2-5.7 { sl@0: add_test_collate $DB 1 0 0 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-8} sl@0: sl@0: db close sl@0: file delete -force test.db sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: execsql {pragma encoding = 'UTF-16BE'} sl@0: do_test enc2-5.8 { sl@0: execsql { sl@0: CREATE TABLE t5(a); sl@0: INSERT INTO t5 VALUES('one'); sl@0: INSERT INTO t5 VALUES('two'); sl@0: INSERT INTO t5 VALUES('five'); sl@0: INSERT INTO t5 VALUES('three'); sl@0: INSERT INTO t5 VALUES('four'); sl@0: } sl@0: } {} sl@0: do_test enc2-5.9 { sl@0: add_test_collate $DB 1 1 1 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-16BE} sl@0: do_test enc2-5.10 { sl@0: add_test_collate $DB 1 1 0 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-16LE} sl@0: do_test enc2-5.11 { sl@0: add_test_collate $DB 1 0 0 sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-8} sl@0: sl@0: # Also test that a UTF-16 collation factory works. sl@0: do_test enc2-5-12 { sl@0: add_test_collate $DB 0 0 0 sl@0: catchsql { sl@0: SELECT * FROM t5 ORDER BY 1 COLLATE test_collate sl@0: } sl@0: } {1 {no such collation sequence: test_collate}} sl@0: do_test enc2-5.13 { sl@0: add_test_collate_needed $DB sl@0: set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }] sl@0: lappend res $::test_collate_enc sl@0: } {one two three four five UTF-16BE} sl@0: do_test enc2-5.14 { sl@0: set ::sqlite_last_needed_collation sl@0: } test_collate sl@0: sl@0: db close sl@0: file delete -force test.db sl@0: sl@0: do_test enc2-5.15 { sl@0: sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db] sl@0: add_test_collate_needed $::DB sl@0: set ::sqlite_last_needed_collation sl@0: } {} sl@0: do_test enc2-5.16 { sl@0: execsql {CREATE TABLE t1(a varchar collate test_collate);} sl@0: } {} sl@0: do_test enc2-5.17 { sl@0: set ::sqlite_last_needed_collation sl@0: } {test_collate} sl@0: sl@0: # The following tests - enc2-6.* - test that SQLite selects the correct sl@0: # user function when more than one is available. sl@0: sl@0: proc test_function {enc arg} { sl@0: return "$enc $arg" sl@0: } sl@0: sl@0: db close sl@0: file delete -force test.db sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: execsql {pragma encoding = 'UTF-8'} sl@0: do_test enc2-6.0 { sl@0: execsql { sl@0: CREATE TABLE t5(a); sl@0: INSERT INTO t5 VALUES('one'); sl@0: } sl@0: } {} sl@0: do_test enc2-6.1 { sl@0: add_test_function $DB 1 1 1 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-8 sqlite}} sl@0: db close sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: do_test enc2-6.2 { sl@0: add_test_function $DB 0 1 0 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-16LE sqlite}} sl@0: db close sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: do_test enc2-6.3 { sl@0: add_test_function $DB 0 0 1 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-16BE sqlite}} sl@0: sl@0: db close sl@0: file delete -force test.db sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: execsql {pragma encoding = 'UTF-16LE'} sl@0: do_test enc2-6.3 { sl@0: execsql { sl@0: CREATE TABLE t5(a); sl@0: INSERT INTO t5 VALUES('sqlite'); sl@0: } sl@0: } {} sl@0: do_test enc2-6.4 { sl@0: add_test_function $DB 1 1 1 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-16LE sqlite}} sl@0: db close sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: do_test enc2-6.5 { sl@0: add_test_function $DB 0 1 0 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-16LE sqlite}} sl@0: db close sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: do_test enc2-6.6 { sl@0: add_test_function $DB 0 0 1 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-16BE sqlite}} sl@0: sl@0: db close sl@0: file delete -force test.db sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: execsql {pragma encoding = 'UTF-16BE'} sl@0: do_test enc2-6.7 { sl@0: execsql { sl@0: CREATE TABLE t5(a); sl@0: INSERT INTO t5 VALUES('sqlite'); sl@0: } sl@0: } {} sl@0: do_test enc2-6.8 { sl@0: add_test_function $DB 1 1 1 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-16BE sqlite}} sl@0: db close sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: do_test enc2-6.9 { sl@0: add_test_function $DB 0 1 0 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-16LE sqlite}} sl@0: db close sl@0: sqlite3 db test.db; set DB [sqlite3_connection_pointer db] sl@0: do_test enc2-6.10 { sl@0: add_test_function $DB 0 0 1 sl@0: execsql { sl@0: SELECT test_function('sqlite') sl@0: } sl@0: } {{UTF-16BE sqlite}} sl@0: sl@0: sl@0: db close sl@0: file delete -force test.db sl@0: sl@0: # The following tests - enc2-7.* - function as follows: sl@0: # sl@0: # 1: Open an empty database file assuming UTF-16 encoding. sl@0: # 2: Open the same database with a different handle assuming UTF-8. Create sl@0: # a table using this handle. sl@0: # 3: Read the sqlite_master table from the first handle. sl@0: # 4: Ensure the first handle recognises the database encoding is UTF-8. sl@0: # sl@0: do_test enc2-7.1 { sl@0: sqlite3 db test.db sl@0: execsql { sl@0: PRAGMA encoding = 'UTF-16'; sl@0: SELECT * FROM sqlite_master; sl@0: } sl@0: } {} sl@0: do_test enc2-7.2 { sl@0: set enc [execsql { sl@0: PRAGMA encoding; sl@0: }] sl@0: string range $enc 0 end-2 ;# Chop off the "le" or "be" sl@0: } {UTF-16} sl@0: do_test enc2-7.3 { sl@0: sqlite3 db2 test.db sl@0: execsql { sl@0: PRAGMA encoding = 'UTF-8'; sl@0: CREATE TABLE abc(a, b, c); sl@0: } db2 sl@0: } {} sl@0: do_test enc2-7.4 { sl@0: execsql { sl@0: SELECT * FROM sqlite_master; sl@0: } sl@0: } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}" sl@0: do_test enc2-7.5 { sl@0: execsql { sl@0: PRAGMA encoding; sl@0: } sl@0: } {UTF-8} sl@0: sl@0: db close sl@0: db2 close sl@0: sl@0: proc utf16 {utf8} { sl@0: set utf16 [encoding convertto unicode $utf8] sl@0: append utf16 "\x00\x00" sl@0: return $utf16 sl@0: } sl@0: ifcapable {complete} { sl@0: do_test enc2-8.1 { sl@0: sqlite3_complete16 [utf16 "SELECT * FROM t1;"] sl@0: } {1} sl@0: do_test enc2-8.2 { sl@0: sqlite3_complete16 [utf16 "SELECT * FROM"] sl@0: } {0} sl@0: } sl@0: sl@0: # Test that the encoding of an empty database may still be set after the sl@0: # (empty) schema has been initialized. sl@0: file delete -force test.db sl@0: do_test enc2-9.1 { sl@0: sqlite3 db test.db sl@0: execsql { sl@0: PRAGMA encoding = 'UTF-8'; sl@0: PRAGMA encoding; sl@0: } sl@0: } {UTF-8} sl@0: do_test enc2-9.2 { sl@0: sqlite3 db test.db sl@0: execsql { sl@0: PRAGMA encoding = 'UTF-16le'; sl@0: PRAGMA encoding; sl@0: } sl@0: } {UTF-16le} sl@0: do_test enc2-9.3 { sl@0: sqlite3 db test.db sl@0: execsql { sl@0: SELECT * FROM sqlite_master; sl@0: PRAGMA encoding = 'UTF-8'; sl@0: PRAGMA encoding; sl@0: } sl@0: } {UTF-8} sl@0: do_test enc2-9.4 { sl@0: sqlite3 db test.db sl@0: execsql { sl@0: PRAGMA encoding = 'UTF-16le'; sl@0: CREATE TABLE abc(a, b, c); sl@0: PRAGMA encoding; sl@0: } sl@0: } {UTF-16le} sl@0: do_test enc2-9.5 { sl@0: sqlite3 db test.db sl@0: execsql { sl@0: PRAGMA encoding = 'UTF-8'; sl@0: PRAGMA encoding; sl@0: } sl@0: } {UTF-16le} sl@0: sl@0: # Ticket #1987. sl@0: # Disallow encoding changes once the encoding has been set. sl@0: # sl@0: do_test enc2-10.1 { sl@0: db close sl@0: file delete -force test.db test.db-journal sl@0: sqlite3 db test.db sl@0: db eval { sl@0: PRAGMA encoding=UTF16; sl@0: CREATE TABLE t1(a); sl@0: PRAGMA encoding=UTF8; sl@0: CREATE TABLE t2(b); sl@0: } sl@0: db close sl@0: sqlite3 db test.db sl@0: db eval { sl@0: SELECT name FROM sqlite_master sl@0: } sl@0: } {t1 t2} sl@0: sl@0: finish_test