sl@0
|
1 |
# 2007 April 26
|
sl@0
|
2 |
#
|
sl@0
|
3 |
# The author disclaims copyright to this source code.
|
sl@0
|
4 |
#
|
sl@0
|
5 |
#*************************************************************************
|
sl@0
|
6 |
# This file implements tests for prefix-searching in the fts2
|
sl@0
|
7 |
# component of the SQLite library.
|
sl@0
|
8 |
#
|
sl@0
|
9 |
# $Id: fts2n.test,v 1.2 2007/12/13 21:54:11 drh Exp $
|
sl@0
|
10 |
#
|
sl@0
|
11 |
|
sl@0
|
12 |
set testdir [file dirname $argv0]
|
sl@0
|
13 |
source $testdir/tester.tcl
|
sl@0
|
14 |
|
sl@0
|
15 |
# If SQLITE_ENABLE_FTS2 is defined, omit this file.
|
sl@0
|
16 |
ifcapable !fts2 {
|
sl@0
|
17 |
finish_test
|
sl@0
|
18 |
return
|
sl@0
|
19 |
}
|
sl@0
|
20 |
|
sl@0
|
21 |
# A large string to prime the pump with.
|
sl@0
|
22 |
set text {
|
sl@0
|
23 |
Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
|
sl@0
|
24 |
iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
|
sl@0
|
25 |
sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
|
sl@0
|
26 |
aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
|
sl@0
|
27 |
ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
|
sl@0
|
28 |
at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
|
sl@0
|
29 |
ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
|
sl@0
|
30 |
luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
|
sl@0
|
31 |
lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
|
sl@0
|
32 |
potenti. Cum sociis natoque penatibus et magnis dis parturient
|
sl@0
|
33 |
montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
|
sl@0
|
34 |
suscipit nec, consequat quis, risus.
|
sl@0
|
35 |
}
|
sl@0
|
36 |
|
sl@0
|
37 |
db eval {
|
sl@0
|
38 |
CREATE VIRTUAL TABLE t1 USING fts2(c);
|
sl@0
|
39 |
|
sl@0
|
40 |
INSERT INTO t1(rowid, c) VALUES(1, $text);
|
sl@0
|
41 |
INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
|
sl@0
|
42 |
}
|
sl@0
|
43 |
|
sl@0
|
44 |
# Exact match
|
sl@0
|
45 |
do_test fts2n-1.1 {
|
sl@0
|
46 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
|
sl@0
|
47 |
} {1}
|
sl@0
|
48 |
|
sl@0
|
49 |
# And a prefix
|
sl@0
|
50 |
do_test fts2n-1.2 {
|
sl@0
|
51 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
|
sl@0
|
52 |
} {1}
|
sl@0
|
53 |
|
sl@0
|
54 |
# Prefix includes exact match
|
sl@0
|
55 |
do_test fts2n-1.3 {
|
sl@0
|
56 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
|
sl@0
|
57 |
} {1}
|
sl@0
|
58 |
|
sl@0
|
59 |
# Make certain everything isn't considered a prefix!
|
sl@0
|
60 |
do_test fts2n-1.4 {
|
sl@0
|
61 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
|
sl@0
|
62 |
} {}
|
sl@0
|
63 |
|
sl@0
|
64 |
# Prefix across multiple rows.
|
sl@0
|
65 |
do_test fts2n-1.5 {
|
sl@0
|
66 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
|
sl@0
|
67 |
} {1 2}
|
sl@0
|
68 |
|
sl@0
|
69 |
# Likewise, with multiple hits in one document.
|
sl@0
|
70 |
do_test fts2n-1.6 {
|
sl@0
|
71 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
|
sl@0
|
72 |
} {1 2}
|
sl@0
|
73 |
|
sl@0
|
74 |
# Prefix which should only hit one document.
|
sl@0
|
75 |
do_test fts2n-1.7 {
|
sl@0
|
76 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
|
sl@0
|
77 |
} {2}
|
sl@0
|
78 |
|
sl@0
|
79 |
# * not at end is dropped.
|
sl@0
|
80 |
do_test fts2n-1.8 {
|
sl@0
|
81 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
|
sl@0
|
82 |
} {}
|
sl@0
|
83 |
|
sl@0
|
84 |
# Stand-alone * is dropped.
|
sl@0
|
85 |
do_test fts2n-1.9 {
|
sl@0
|
86 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
|
sl@0
|
87 |
} {}
|
sl@0
|
88 |
|
sl@0
|
89 |
# Phrase-query prefix.
|
sl@0
|
90 |
do_test fts2n-1.10 {
|
sl@0
|
91 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
|
sl@0
|
92 |
} {2}
|
sl@0
|
93 |
do_test fts2n-1.11 {
|
sl@0
|
94 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
|
sl@0
|
95 |
} {}
|
sl@0
|
96 |
|
sl@0
|
97 |
# Phrase query with multiple prefix matches.
|
sl@0
|
98 |
do_test fts2n-1.12 {
|
sl@0
|
99 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
|
sl@0
|
100 |
} {1 2}
|
sl@0
|
101 |
|
sl@0
|
102 |
# Phrase query with multiple prefix matches.
|
sl@0
|
103 |
do_test fts2n-1.13 {
|
sl@0
|
104 |
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
|
sl@0
|
105 |
} {2}
|
sl@0
|
106 |
|
sl@0
|
107 |
|
sl@0
|
108 |
|
sl@0
|
109 |
|
sl@0
|
110 |
# Test across updates (and, by implication, deletes).
|
sl@0
|
111 |
|
sl@0
|
112 |
# Version of text without "lorem".
|
sl@0
|
113 |
regsub -all {[Ll]orem} $text '' ntext
|
sl@0
|
114 |
|
sl@0
|
115 |
db eval {
|
sl@0
|
116 |
CREATE VIRTUAL TABLE t2 USING fts2(c);
|
sl@0
|
117 |
|
sl@0
|
118 |
INSERT INTO t2(rowid, c) VALUES(1, $text);
|
sl@0
|
119 |
INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
|
sl@0
|
120 |
UPDATE t2 SET c = $ntext WHERE rowid = 1;
|
sl@0
|
121 |
}
|
sl@0
|
122 |
|
sl@0
|
123 |
# Can't see lorem as an exact match.
|
sl@0
|
124 |
do_test fts2n-2.1 {
|
sl@0
|
125 |
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
|
sl@0
|
126 |
} {}
|
sl@0
|
127 |
|
sl@0
|
128 |
# Can't see a prefix of lorem, either.
|
sl@0
|
129 |
do_test fts2n-2.2 {
|
sl@0
|
130 |
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
|
sl@0
|
131 |
} {}
|
sl@0
|
132 |
|
sl@0
|
133 |
# Can see lovely in the other document.
|
sl@0
|
134 |
do_test fts2n-2.3 {
|
sl@0
|
135 |
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
|
sl@0
|
136 |
} {2}
|
sl@0
|
137 |
|
sl@0
|
138 |
# Can still see other hits.
|
sl@0
|
139 |
do_test fts2n-2.4 {
|
sl@0
|
140 |
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
|
sl@0
|
141 |
} {1 2}
|
sl@0
|
142 |
|
sl@0
|
143 |
# Prefix which should only hit one document.
|
sl@0
|
144 |
do_test fts2n-2.5 {
|
sl@0
|
145 |
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
|
sl@0
|
146 |
} {2}
|
sl@0
|
147 |
|
sl@0
|
148 |
|
sl@0
|
149 |
|
sl@0
|
150 |
# Test with a segment which will have multiple levels in the tree.
|
sl@0
|
151 |
|
sl@0
|
152 |
# Build a big document with lots of unique terms.
|
sl@0
|
153 |
set bigtext $text
|
sl@0
|
154 |
foreach c {a b c d e} {
|
sl@0
|
155 |
regsub -all {[A-Za-z]+} $bigtext "&$c" t
|
sl@0
|
156 |
append bigtext $t
|
sl@0
|
157 |
}
|
sl@0
|
158 |
|
sl@0
|
159 |
# Populate a table with many copies of the big document, so that we
|
sl@0
|
160 |
# can test the number of hits found. Populate $ret with the expected
|
sl@0
|
161 |
# hit counts for each row. offsets() returns 4 elements for every
|
sl@0
|
162 |
# hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
|
sl@0
|
163 |
# $bigtext.
|
sl@0
|
164 |
set ret {6 1}
|
sl@0
|
165 |
db eval {
|
sl@0
|
166 |
BEGIN;
|
sl@0
|
167 |
CREATE VIRTUAL TABLE t3 USING fts2(c);
|
sl@0
|
168 |
|
sl@0
|
169 |
INSERT INTO t3(rowid, c) VALUES(1, $text);
|
sl@0
|
170 |
INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
|
sl@0
|
171 |
}
|
sl@0
|
172 |
for {set i 0} {$i<100} {incr i} {
|
sl@0
|
173 |
db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
|
sl@0
|
174 |
lappend ret 192
|
sl@0
|
175 |
}
|
sl@0
|
176 |
db eval {COMMIT;}
|
sl@0
|
177 |
|
sl@0
|
178 |
# Test that we get the expected number of hits.
|
sl@0
|
179 |
do_test fts2n-3.1 {
|
sl@0
|
180 |
set t {}
|
sl@0
|
181 |
db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
|
sl@0
|
182 |
set l [llength $o]
|
sl@0
|
183 |
lappend t [expr {$l/4}]
|
sl@0
|
184 |
}
|
sl@0
|
185 |
set t
|
sl@0
|
186 |
} $ret
|
sl@0
|
187 |
|
sl@0
|
188 |
# TODO(shess) It would be useful to test a couple edge cases, but I
|
sl@0
|
189 |
# don't know if we have the precision to manage it from here at this
|
sl@0
|
190 |
# time. Prefix hits can cross leaves, which the code above _should_
|
sl@0
|
191 |
# hit by virtue of size. There are two variations on this. If the
|
sl@0
|
192 |
# tree is 2 levels high, the code will find the leaf-node extent
|
sl@0
|
193 |
# directly, but if its higher, the code will have to follow two
|
sl@0
|
194 |
# separate interior branches down the tree. Both should be tested.
|
sl@0
|
195 |
|
sl@0
|
196 |
finish_test
|