First public contribution.
     3 # The author disclaims copyright to this source code.  In place of
 
     4 # a legal notice, here is a blessing:
 
     6 #    May you do good and not evil.
 
     7 #    May you find forgiveness for yourself and forgive others.
 
     8 #    May you share freely, never taking more than you give.
 
    10 #***********************************************************************
 
    11 # This file implements regression tests for SQLite library.  The focus of
 
    12 # this file is testing the SQLite routines used for converting between the
 
    13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
 
    16 # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $
 
    18 set testdir [file dirname $argv0]
 
    19 source $testdir/tester.tcl
 
    21 # If UTF16 support is disabled, ignore the tests in this file
 
    28 # The rough organisation of tests in this file is:
 
    30 # enc2.1.*: Simple tests with a UTF-8 db.
 
    31 # enc2.2.*: Simple tests with a UTF-16LE db.
 
    32 # enc2.3.*: Simple tests with a UTF-16BE db.
 
    33 # enc2.4.*: Test that attached databases must have the same text encoding
 
    34 #           as the main database.
 
    35 # enc2.5.*: Test the behaviour of the library when a collation sequence is
 
    36 #           not available for the most desirable text encoding.
 
    37 # enc2.6.*: Similar test for user functions.
 
    38 # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
 
    39 #           wrong text encoding for the database.
 
    40 # enc2.8.*: Test sqlite3_complete16()
 
    45 # Return the UTF-8 representation of the supplied UTF-16 string $str. 
 
    47   # If $str ends in two 0x00 0x00 bytes, knock these off before
 
    48   # converting to UTF-8 using TCL.
 
    49   binary scan $str \c* vals
 
    50   if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
 
    51     set str [binary format \c* [lrange $vals 0 end-2]]
 
    54   set r [encoding convertfrom unicode $str]
 
    59 # This proc contains all the tests in this file. It is run
 
    60 # three times. Each time the file 'test.db' contains a database
 
    61 # with the following contents:
 
    63   CREATE TABLE t1(a PRIMARY KEY, b, c);
 
    64   INSERT INTO t1 VALUES('one', 'I', 1);
 
    66 # This proc tests that we can open and manipulate the test.db 
 
    67 # database, and that it is possible to retreive values in
 
    68 # various text encodings.
 
    70 proc run_test_script {t enc} {
 
    72 # Open the database and pull out a (the) row.
 
    74   sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
    75   execsql {SELECT * FROM t1}
 
    80   execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
 
    81   execsql {SELECT * FROM t1}
 
    87     INSERT INTO t1 VALUES('three','III',3);
 
    88     INSERT INTO t1 VALUES('four','IV',4);
 
    89     INSERT INTO t1 VALUES('five','V',5);
 
    91   execsql {SELECT * FROM t1}
 
    92 } {one I 1 two II 2 three III 3 four IV 4 five V 5}
 
    97     SELECT * FROM t1 WHERE a = 'one';
 
   102     SELECT * FROM t1 WHERE a = 'four';
 
   108       SELECT * FROM t1 WHERE a IN ('one', 'two');
 
   113 # Now check that we can retrieve data in both UTF-16 and UTF-8
 
   115   set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
 
   117   sqlite3_column_text $STMT 0
 
   122   utf8 [sqlite3_column_text16 $STMT 0]
 
   126   sqlite3_finalize $STMT
 
   134   db eval {PRAGMA encoding}
 
   139 # The three unicode encodings understood by SQLite.
 
   140 set encodings [list UTF-8 UTF-16le UTF-16be]
 
   142 set sqlite_os_trace 0
 
   144 foreach enc $encodings {
 
   145   file delete -force test.db
 
   147   db eval "PRAGMA encoding = \"$enc\""
 
   149   do_test enc2-$i.0.1 {
 
   150     db eval {PRAGMA encoding}
 
   152   do_test enc2-$i.0.2 {
 
   153     db eval {PRAGMA encoding=UTF8}
 
   154     db eval {PRAGMA encoding}
 
   156   do_test enc2-$i.0.3 {
 
   157     db eval {PRAGMA encoding=UTF16le}
 
   158     db eval {PRAGMA encoding}
 
   160   do_test enc2-$i.0.4 {
 
   161     db eval {PRAGMA encoding=UTF16be}
 
   162     db eval {PRAGMA encoding}
 
   166   run_test_script enc2-$i $enc
 
   171 # Test that it is an error to try to attach a database with a different
 
   172 # encoding to the main database.
 
   175     file delete -force test.db
 
   177     db eval "PRAGMA encoding = 'UTF-8'"
 
   178     db eval "CREATE TABLE abc(a, b, c);"
 
   181     file delete -force test2.db
 
   183     db2 eval "PRAGMA encoding = 'UTF-16'"
 
   184     db2 eval "CREATE TABLE abc(a, b, c);"
 
   188       ATTACH 'test2.db' as aux;
 
   190   } {1 {attached databases must use the same text encoding as main database}}
 
   195 # The following tests - enc2-5.* - test that SQLite selects the correct
 
   196 # collation sequence when more than one is available.
 
   198 set ::values [list one two three four five]
 
   199 set ::test_collate_enc INVALID
 
   200 proc test_collate {enc lhs rhs} {
 
   201   set ::test_collate_enc $enc
 
   202   set l [lsearch -exact $::values $lhs]
 
   203   set r [lsearch -exact $::values $rhs]
 
   204   set res [expr $l - $r]
 
   205   # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
 
   209 file delete -force test.db
 
   210 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   214     INSERT INTO t5 VALUES('one');
 
   215     INSERT INTO t5 VALUES('two');
 
   216     INSERT INTO t5 VALUES('five');
 
   217     INSERT INTO t5 VALUES('three');
 
   218     INSERT INTO t5 VALUES('four');
 
   222   add_test_collate $DB 1 1 1
 
   223   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
 
   224   lappend res $::test_collate_enc
 
   225 } {one two three four five UTF-8}
 
   227   add_test_collate $DB 0 1 0
 
   228   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 
   229   lappend res $::test_collate_enc
 
   230 } {one two three four five UTF-16LE}
 
   232   add_test_collate $DB 0 0 1
 
   233   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 
   234   lappend res $::test_collate_enc
 
   235 } {one two three four five UTF-16BE}
 
   238 file delete -force test.db
 
   239 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   240 execsql {pragma encoding = 'UTF-16LE'}
 
   244     INSERT INTO t5 VALUES('one');
 
   245     INSERT INTO t5 VALUES('two');
 
   246     INSERT INTO t5 VALUES('five');
 
   247     INSERT INTO t5 VALUES('three');
 
   248     INSERT INTO t5 VALUES('four');
 
   252   add_test_collate $DB 1 1 1
 
   253   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 
   254   lappend res $::test_collate_enc
 
   255 } {one two three four five UTF-16LE}
 
   257   add_test_collate $DB 1 0 1
 
   258   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 
   259   lappend res $::test_collate_enc
 
   260 } {one two three four five UTF-16BE}
 
   262   add_test_collate $DB 1 0 0
 
   263   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 
   264   lappend res $::test_collate_enc
 
   265 } {one two three four five UTF-8}
 
   268 file delete -force test.db
 
   269 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   270 execsql {pragma encoding = 'UTF-16BE'}
 
   274     INSERT INTO t5 VALUES('one');
 
   275     INSERT INTO t5 VALUES('two');
 
   276     INSERT INTO t5 VALUES('five');
 
   277     INSERT INTO t5 VALUES('three');
 
   278     INSERT INTO t5 VALUES('four');
 
   282   add_test_collate $DB 1 1 1
 
   283   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 
   284   lappend res $::test_collate_enc
 
   285 } {one two three four five UTF-16BE}
 
   287   add_test_collate $DB 1 1 0
 
   288   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 
   289   lappend res $::test_collate_enc
 
   290 } {one two three four five UTF-16LE}
 
   292   add_test_collate $DB 1 0 0
 
   293   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 
   294   lappend res $::test_collate_enc
 
   295 } {one two three four five UTF-8}
 
   297 # Also test that a UTF-16 collation factory works.
 
   299   add_test_collate $DB 0 0 0
 
   301     SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
 
   303 } {1 {no such collation sequence: test_collate}}
 
   305   add_test_collate_needed $DB 
 
   306   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
 
   307   lappend res $::test_collate_enc
 
   308 } {one two three four five UTF-16BE}
 
   310   set ::sqlite_last_needed_collation
 
   314 file delete -force test.db
 
   317   sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
 
   318   add_test_collate_needed $::DB
 
   319   set ::sqlite_last_needed_collation
 
   322   execsql {CREATE TABLE t1(a varchar collate test_collate);}
 
   325   set ::sqlite_last_needed_collation
 
   328 # The following tests - enc2-6.* - test that SQLite selects the correct
 
   329 # user function when more than one is available.
 
   331 proc test_function {enc arg} {
 
   336 file delete -force test.db
 
   337 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   338 execsql {pragma encoding = 'UTF-8'}
 
   342     INSERT INTO t5 VALUES('one');
 
   346   add_test_function $DB 1 1 1
 
   348     SELECT test_function('sqlite')
 
   352 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   354   add_test_function $DB 0 1 0
 
   356     SELECT test_function('sqlite')
 
   358 } {{UTF-16LE sqlite}}
 
   360 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   362   add_test_function $DB 0 0 1
 
   364     SELECT test_function('sqlite')
 
   366 } {{UTF-16BE sqlite}}
 
   369 file delete -force test.db
 
   370 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   371 execsql {pragma encoding = 'UTF-16LE'}
 
   375     INSERT INTO t5 VALUES('sqlite');
 
   379   add_test_function $DB 1 1 1
 
   381     SELECT test_function('sqlite')
 
   383 } {{UTF-16LE sqlite}}
 
   385 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   387   add_test_function $DB 0 1 0
 
   389     SELECT test_function('sqlite')
 
   391 } {{UTF-16LE sqlite}}
 
   393 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   395   add_test_function $DB 0 0 1
 
   397     SELECT test_function('sqlite')
 
   399 } {{UTF-16BE sqlite}}
 
   402 file delete -force test.db
 
   403 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   404 execsql {pragma encoding = 'UTF-16BE'}
 
   408     INSERT INTO t5 VALUES('sqlite');
 
   412   add_test_function $DB 1 1 1
 
   414     SELECT test_function('sqlite')
 
   416 } {{UTF-16BE sqlite}}
 
   418 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   420   add_test_function $DB 0 1 0
 
   422     SELECT test_function('sqlite')
 
   424 } {{UTF-16LE sqlite}}
 
   426 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 
   428   add_test_function $DB 0 0 1
 
   430     SELECT test_function('sqlite')
 
   432 } {{UTF-16BE sqlite}}
 
   436 file delete -force test.db
 
   438 # The following tests - enc2-7.* - function as follows:
 
   440 # 1: Open an empty database file assuming UTF-16 encoding.
 
   441 # 2: Open the same database with a different handle assuming UTF-8. Create
 
   442 #    a table using this handle.
 
   443 # 3: Read the sqlite_master table from the first handle. 
 
   444 # 4: Ensure the first handle recognises the database encoding is UTF-8.
 
   449     PRAGMA encoding = 'UTF-16';
 
   450     SELECT * FROM sqlite_master;
 
   457   string range $enc 0 end-2 ;# Chop off the "le" or "be"
 
   462     PRAGMA encoding = 'UTF-8';
 
   463     CREATE TABLE abc(a, b, c);
 
   468     SELECT * FROM sqlite_master;
 
   470 } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
 
   481   set utf16 [encoding convertto unicode $utf8]
 
   482   append utf16 "\x00\x00"
 
   485 ifcapable {complete} {
 
   487     sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
 
   490     sqlite3_complete16 [utf16 "SELECT * FROM"]
 
   494 # Test that the encoding of an empty database may still be set after the
 
   495 # (empty) schema has been initialized.
 
   496 file delete -force test.db
 
   500     PRAGMA encoding = 'UTF-8';
 
   507     PRAGMA encoding = 'UTF-16le';
 
   514     SELECT * FROM sqlite_master;
 
   515     PRAGMA encoding = 'UTF-8';
 
   522     PRAGMA encoding = 'UTF-16le';
 
   523     CREATE TABLE abc(a, b, c);
 
   530     PRAGMA encoding = 'UTF-8';
 
   536 # Disallow encoding changes once the encoding has been set.
 
   540   file delete -force test.db test.db-journal
 
   543     PRAGMA encoding=UTF16;
 
   545     PRAGMA encoding=UTF8;
 
   551     SELECT name FROM sqlite_master