sl@0: # 2007 April 26 sl@0: # sl@0: # The author disclaims copyright to this source code. sl@0: # sl@0: #************************************************************************* sl@0: # This file implements tests for prefix-searching in the fts3 sl@0: # component of the SQLite library. sl@0: # sl@0: # $Id: fts3an.test,v 1.2 2007/12/13 21:54:11 drh Exp $ sl@0: # sl@0: sl@0: set testdir [file dirname $argv0] sl@0: source $testdir/tester.tcl sl@0: sl@0: # If SQLITE_ENABLE_FTS3 is defined, omit this file. sl@0: ifcapable !fts3 { sl@0: finish_test sl@0: return sl@0: } sl@0: sl@0: # A large string to prime the pump with. sl@0: set text { sl@0: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sl@0: iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam sl@0: sed turpis posuere placerat. Curabitur et lorem in lorem porttitor sl@0: aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit sl@0: ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra sl@0: at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus, sl@0: ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at sl@0: luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu sl@0: lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse sl@0: potenti. Cum sociis natoque penatibus et magnis dis parturient sl@0: montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu, sl@0: suscipit nec, consequat quis, risus. sl@0: } sl@0: sl@0: db eval { sl@0: CREATE VIRTUAL TABLE t1 USING fts3(c); sl@0: sl@0: INSERT INTO t1(rowid, c) VALUES(1, $text); sl@0: INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row'); sl@0: } sl@0: sl@0: # Exact match sl@0: do_test fts3an-1.1 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'" sl@0: } {1} sl@0: sl@0: # And a prefix sl@0: do_test fts3an-1.2 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'" sl@0: } {1} sl@0: sl@0: # Prefix includes exact match sl@0: do_test fts3an-1.3 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'" sl@0: } {1} sl@0: sl@0: # Make certain everything isn't considered a prefix! sl@0: do_test fts3an-1.4 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'" sl@0: } {} sl@0: sl@0: # Prefix across multiple rows. sl@0: do_test fts3an-1.5 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'" sl@0: } {1 2} sl@0: sl@0: # Likewise, with multiple hits in one document. sl@0: do_test fts3an-1.6 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'" sl@0: } {1 2} sl@0: sl@0: # Prefix which should only hit one document. sl@0: do_test fts3an-1.7 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'" sl@0: } {2} sl@0: sl@0: # * not at end is dropped. sl@0: do_test fts3an-1.8 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'" sl@0: } {} sl@0: sl@0: # Stand-alone * is dropped. sl@0: do_test fts3an-1.9 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'" sl@0: } {} sl@0: sl@0: # Phrase-query prefix. sl@0: do_test fts3an-1.10 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'" sl@0: } {2} sl@0: do_test fts3an-1.11 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'" sl@0: } {} sl@0: sl@0: # Phrase query with multiple prefix matches. sl@0: do_test fts3an-1.12 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'" sl@0: } {1 2} sl@0: sl@0: # Phrase query with multiple prefix matches. sl@0: do_test fts3an-1.13 { sl@0: execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'" sl@0: } {2} sl@0: sl@0: sl@0: sl@0: sl@0: # Test across updates (and, by implication, deletes). sl@0: sl@0: # Version of text without "lorem". sl@0: regsub -all {[Ll]orem} $text '' ntext sl@0: sl@0: db eval { sl@0: CREATE VIRTUAL TABLE t2 USING fts3(c); sl@0: sl@0: INSERT INTO t2(rowid, c) VALUES(1, $text); sl@0: INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row'); sl@0: UPDATE t2 SET c = $ntext WHERE rowid = 1; sl@0: } sl@0: sl@0: # Can't see lorem as an exact match. sl@0: do_test fts3an-2.1 { sl@0: execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'" sl@0: } {} sl@0: sl@0: # Can't see a prefix of lorem, either. sl@0: do_test fts3an-2.2 { sl@0: execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'" sl@0: } {} sl@0: sl@0: # Can see lovely in the other document. sl@0: do_test fts3an-2.3 { sl@0: execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'" sl@0: } {2} sl@0: sl@0: # Can still see other hits. sl@0: do_test fts3an-2.4 { sl@0: execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'" sl@0: } {1 2} sl@0: sl@0: # Prefix which should only hit one document. sl@0: do_test fts3an-2.5 { sl@0: execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'" sl@0: } {2} sl@0: sl@0: sl@0: sl@0: # Test with a segment which will have multiple levels in the tree. sl@0: sl@0: # Build a big document with lots of unique terms. sl@0: set bigtext $text sl@0: foreach c {a b c d e} { sl@0: regsub -all {[A-Za-z]+} $bigtext "&$c" t sl@0: append bigtext $t sl@0: } sl@0: sl@0: # Populate a table with many copies of the big document, so that we sl@0: # can test the number of hits found. Populate $ret with the expected sl@0: # hit counts for each row. offsets() returns 4 elements for every sl@0: # hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for sl@0: # $bigtext. sl@0: set ret {6 1} sl@0: db eval { sl@0: BEGIN; sl@0: CREATE VIRTUAL TABLE t3 USING fts3(c); sl@0: sl@0: INSERT INTO t3(rowid, c) VALUES(1, $text); sl@0: INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row'); sl@0: } sl@0: for {set i 0} {$i<100} {incr i} { sl@0: db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)} sl@0: lappend ret 192 sl@0: } sl@0: db eval {COMMIT;} sl@0: sl@0: # Test that we get the expected number of hits. sl@0: do_test fts3an-3.1 { sl@0: set t {} sl@0: db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} { sl@0: set l [llength $o] sl@0: lappend t [expr {$l/4}] sl@0: } sl@0: set t sl@0: } $ret sl@0: sl@0: # TODO(shess) It would be useful to test a couple edge cases, but I sl@0: # don't know if we have the precision to manage it from here at this sl@0: # time. Prefix hits can cross leaves, which the code above _should_ sl@0: # hit by virtue of size. There are two variations on this. If the sl@0: # tree is 2 levels high, the code will find the leaf-node extent sl@0: # directly, but if it is higher, the code will have to follow two sl@0: # separate interior branches down the tree. Both should be tested. sl@0: sl@0: finish_test