sl@0: # 2007 April 26
sl@0: #
sl@0: # The author disclaims copyright to this source code.
sl@0: #
sl@0: #*************************************************************************
sl@0: # This file implements tests for prefix-searching in the fts3
sl@0: # component of the SQLite library.
sl@0: #
sl@0: # $Id: fts3an.test,v 1.2 2007/12/13 21:54:11 drh Exp $
sl@0: #
sl@0: 
sl@0: set testdir [file dirname $argv0]
sl@0: source $testdir/tester.tcl
sl@0: 
sl@0: # If SQLITE_ENABLE_FTS3 is defined, omit this file.
sl@0: ifcapable !fts3 {
sl@0:   finish_test
sl@0:   return
sl@0: }
sl@0: 
sl@0: # A large string to prime the pump with.
sl@0: set text {
sl@0:   Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
sl@0:   iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
sl@0:   sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
sl@0:   aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
sl@0:   ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
sl@0:   at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
sl@0:   ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
sl@0:   luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
sl@0:   lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
sl@0:   potenti. Cum sociis natoque penatibus et magnis dis parturient
sl@0:   montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
sl@0:   suscipit nec, consequat quis, risus.
sl@0: }
sl@0: 
sl@0: db eval {
sl@0:   CREATE VIRTUAL TABLE t1 USING fts3(c);
sl@0: 
sl@0:   INSERT INTO t1(rowid, c) VALUES(1, $text);
sl@0:   INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
sl@0: }
sl@0: 
sl@0: # Exact match
sl@0: do_test fts3an-1.1 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
sl@0: } {1}
sl@0: 
sl@0: # And a prefix
sl@0: do_test fts3an-1.2 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
sl@0: } {1}
sl@0: 
sl@0: # Prefix includes exact match
sl@0: do_test fts3an-1.3 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
sl@0: } {1}
sl@0: 
sl@0: # Make certain everything isn't considered a prefix!
sl@0: do_test fts3an-1.4 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
sl@0: } {}
sl@0: 
sl@0: # Prefix across multiple rows.
sl@0: do_test fts3an-1.5 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
sl@0: } {1 2}
sl@0: 
sl@0: # Likewise, with multiple hits in one document.
sl@0: do_test fts3an-1.6 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
sl@0: } {1 2}
sl@0: 
sl@0: # Prefix which should only hit one document.
sl@0: do_test fts3an-1.7 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
sl@0: } {2}
sl@0: 
sl@0: # * not at end is dropped.
sl@0: do_test fts3an-1.8 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
sl@0: } {}
sl@0: 
sl@0: # Stand-alone * is dropped.
sl@0: do_test fts3an-1.9 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
sl@0: } {}
sl@0: 
sl@0: # Phrase-query prefix.
sl@0: do_test fts3an-1.10 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
sl@0: } {2}
sl@0: do_test fts3an-1.11 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
sl@0: } {}
sl@0: 
sl@0: # Phrase query with multiple prefix matches.
sl@0: do_test fts3an-1.12 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
sl@0: } {1 2}
sl@0: 
sl@0: # Phrase query with multiple prefix matches.
sl@0: do_test fts3an-1.13 {
sl@0:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
sl@0: } {2}
sl@0: 
sl@0: 
sl@0: 
sl@0: 
sl@0: # Test across updates (and, by implication, deletes).
sl@0: 
sl@0: # Version of text without "lorem".
sl@0: regsub -all {[Ll]orem} $text '' ntext
sl@0: 
sl@0: db eval {
sl@0:   CREATE VIRTUAL TABLE t2 USING fts3(c);
sl@0: 
sl@0:   INSERT INTO t2(rowid, c) VALUES(1, $text);
sl@0:   INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
sl@0:   UPDATE t2 SET c = $ntext WHERE rowid = 1;
sl@0: }
sl@0: 
sl@0: # Can't see lorem as an exact match.
sl@0: do_test fts3an-2.1 {
sl@0:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
sl@0: } {}
sl@0: 
sl@0: # Can't see a prefix of lorem, either.
sl@0: do_test fts3an-2.2 {
sl@0:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
sl@0: } {}
sl@0: 
sl@0: # Can see lovely in the other document.
sl@0: do_test fts3an-2.3 {
sl@0:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
sl@0: } {2}
sl@0: 
sl@0: # Can still see other hits.
sl@0: do_test fts3an-2.4 {
sl@0:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
sl@0: } {1 2}
sl@0: 
sl@0: # Prefix which should only hit one document.
sl@0: do_test fts3an-2.5 {
sl@0:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
sl@0: } {2}
sl@0: 
sl@0: 
sl@0: 
sl@0: # Test with a segment which will have multiple levels in the tree.
sl@0: 
sl@0: # Build a big document with lots of unique terms.
sl@0: set bigtext $text
sl@0: foreach c {a b c d e} {
sl@0:   regsub -all {[A-Za-z]+} $bigtext "&$c" t
sl@0:   append bigtext $t
sl@0: }
sl@0: 
sl@0: # Populate a table with many copies of the big document, so that we
sl@0: # can test the number of hits found.  Populate $ret with the expected
sl@0: # hit counts for each row.  offsets() returns 4 elements for every
sl@0: # hit.  We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
sl@0: # $bigtext.
sl@0: set ret {6 1}
sl@0: db eval {
sl@0:   BEGIN;
sl@0:   CREATE VIRTUAL TABLE t3 USING fts3(c);
sl@0: 
sl@0:   INSERT INTO t3(rowid, c) VALUES(1, $text);
sl@0:   INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
sl@0: }
sl@0: for {set i 0} {$i<100} {incr i} {
sl@0:   db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
sl@0:   lappend ret 192
sl@0: }
sl@0: db eval {COMMIT;}
sl@0: 
sl@0: # Test that we get the expected number of hits.
sl@0: do_test fts3an-3.1 {
sl@0:   set t {}
sl@0:   db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
sl@0:     set l [llength $o]
sl@0:     lappend t [expr {$l/4}]
sl@0:   }
sl@0:   set t
sl@0: } $ret
sl@0: 
sl@0: # TODO(shess) It would be useful to test a couple edge cases, but I
sl@0: # don't know if we have the precision to manage it from here at this
sl@0: # time.  Prefix hits can cross leaves, which the code above _should_
sl@0: # hit by virtue of size.  There are two variations on this.  If the
sl@0: # tree is 2 levels high, the code will find the leaf-node extent
sl@0: # directly, but if it is higher, the code will have to follow two
sl@0: # separate interior branches down the tree.  Both should be tested.
sl@0: 
sl@0: finish_test