os/persistentdata/persistentstorage/sqlite3api/TEST/TclScript/fts2n.test
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
# 2007 April 26
sl@0
     2
#
sl@0
     3
# The author disclaims copyright to this source code.
sl@0
     4
#
sl@0
     5
#*************************************************************************
sl@0
     6
# This file implements tests for prefix-searching in the fts2
sl@0
     7
# component of the SQLite library.
sl@0
     8
#
sl@0
     9
# $Id: fts2n.test,v 1.2 2007/12/13 21:54:11 drh Exp $
sl@0
    10
#
sl@0
    11
sl@0
    12
set testdir [file dirname $argv0]
sl@0
    13
source $testdir/tester.tcl
sl@0
    14
sl@0
    15
# If SQLITE_ENABLE_FTS2 is defined, omit this file.
sl@0
    16
ifcapable !fts2 {
sl@0
    17
  finish_test
sl@0
    18
  return
sl@0
    19
}
sl@0
    20
sl@0
    21
# A large string to prime the pump with.
sl@0
    22
set text {
sl@0
    23
  Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
sl@0
    24
  iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
sl@0
    25
  sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
sl@0
    26
  aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
sl@0
    27
  ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
sl@0
    28
  at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
sl@0
    29
  ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
sl@0
    30
  luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
sl@0
    31
  lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
sl@0
    32
  potenti. Cum sociis natoque penatibus et magnis dis parturient
sl@0
    33
  montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
sl@0
    34
  suscipit nec, consequat quis, risus.
sl@0
    35
}
sl@0
    36
sl@0
    37
db eval {
sl@0
    38
  CREATE VIRTUAL TABLE t1 USING fts2(c);
sl@0
    39
sl@0
    40
  INSERT INTO t1(rowid, c) VALUES(1, $text);
sl@0
    41
  INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
sl@0
    42
}
sl@0
    43
sl@0
    44
# Exact match
sl@0
    45
do_test fts2n-1.1 {
sl@0
    46
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
sl@0
    47
} {1}
sl@0
    48
sl@0
    49
# And a prefix
sl@0
    50
do_test fts2n-1.2 {
sl@0
    51
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
sl@0
    52
} {1}
sl@0
    53
sl@0
    54
# Prefix includes exact match
sl@0
    55
do_test fts2n-1.3 {
sl@0
    56
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
sl@0
    57
} {1}
sl@0
    58
sl@0
    59
# Make certain everything isn't considered a prefix!
sl@0
    60
do_test fts2n-1.4 {
sl@0
    61
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
sl@0
    62
} {}
sl@0
    63
sl@0
    64
# Prefix across multiple rows.
sl@0
    65
do_test fts2n-1.5 {
sl@0
    66
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
sl@0
    67
} {1 2}
sl@0
    68
sl@0
    69
# Likewise, with multiple hits in one document.
sl@0
    70
do_test fts2n-1.6 {
sl@0
    71
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
sl@0
    72
} {1 2}
sl@0
    73
sl@0
    74
# Prefix which should only hit one document.
sl@0
    75
do_test fts2n-1.7 {
sl@0
    76
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
sl@0
    77
} {2}
sl@0
    78
sl@0
    79
# * not at end is dropped.
sl@0
    80
do_test fts2n-1.8 {
sl@0
    81
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
sl@0
    82
} {}
sl@0
    83
sl@0
    84
# Stand-alone * is dropped.
sl@0
    85
do_test fts2n-1.9 {
sl@0
    86
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
sl@0
    87
} {}
sl@0
    88
sl@0
    89
# Phrase-query prefix.
sl@0
    90
do_test fts2n-1.10 {
sl@0
    91
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
sl@0
    92
} {2}
sl@0
    93
do_test fts2n-1.11 {
sl@0
    94
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
sl@0
    95
} {}
sl@0
    96
sl@0
    97
# Phrase query with multiple prefix matches.
sl@0
    98
do_test fts2n-1.12 {
sl@0
    99
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
sl@0
   100
} {1 2}
sl@0
   101
sl@0
   102
# Phrase query with multiple prefix matches.
sl@0
   103
do_test fts2n-1.13 {
sl@0
   104
  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
sl@0
   105
} {2}
sl@0
   106
sl@0
   107
sl@0
   108
sl@0
   109
sl@0
   110
# Test across updates (and, by implication, deletes).
sl@0
   111
sl@0
   112
# Version of text without "lorem".
sl@0
   113
regsub -all {[Ll]orem} $text '' ntext
sl@0
   114
sl@0
   115
db eval {
sl@0
   116
  CREATE VIRTUAL TABLE t2 USING fts2(c);
sl@0
   117
sl@0
   118
  INSERT INTO t2(rowid, c) VALUES(1, $text);
sl@0
   119
  INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
sl@0
   120
  UPDATE t2 SET c = $ntext WHERE rowid = 1;
sl@0
   121
}
sl@0
   122
sl@0
   123
# Can't see lorem as an exact match.
sl@0
   124
do_test fts2n-2.1 {
sl@0
   125
  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
sl@0
   126
} {}
sl@0
   127
sl@0
   128
# Can't see a prefix of lorem, either.
sl@0
   129
do_test fts2n-2.2 {
sl@0
   130
  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
sl@0
   131
} {}
sl@0
   132
sl@0
   133
# Can see lovely in the other document.
sl@0
   134
do_test fts2n-2.3 {
sl@0
   135
  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
sl@0
   136
} {2}
sl@0
   137
sl@0
   138
# Can still see other hits.
sl@0
   139
do_test fts2n-2.4 {
sl@0
   140
  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
sl@0
   141
} {1 2}
sl@0
   142
sl@0
   143
# Prefix which should only hit one document.
sl@0
   144
do_test fts2n-2.5 {
sl@0
   145
  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
sl@0
   146
} {2}
sl@0
   147
sl@0
   148
sl@0
   149
sl@0
   150
# Test with a segment which will have multiple levels in the tree.
sl@0
   151
sl@0
   152
# Build a big document with lots of unique terms.
sl@0
   153
set bigtext $text
sl@0
   154
foreach c {a b c d e} {
sl@0
   155
  regsub -all {[A-Za-z]+} $bigtext "&$c" t
sl@0
   156
  append bigtext $t
sl@0
   157
}
sl@0
   158
sl@0
   159
# Populate a table with many copies of the big document, so that we
sl@0
   160
# can test the number of hits found.  Populate $ret with the expected
sl@0
   161
# hit counts for each row.  offsets() returns 4 elements for every
sl@0
   162
# hit.  We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
sl@0
   163
# $bigtext.
sl@0
   164
set ret {6 1}
sl@0
   165
db eval {
sl@0
   166
  BEGIN;
sl@0
   167
  CREATE VIRTUAL TABLE t3 USING fts2(c);
sl@0
   168
sl@0
   169
  INSERT INTO t3(rowid, c) VALUES(1, $text);
sl@0
   170
  INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
sl@0
   171
}
sl@0
   172
for {set i 0} {$i<100} {incr i} {
sl@0
   173
  db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
sl@0
   174
  lappend ret 192
sl@0
   175
}
sl@0
   176
db eval {COMMIT;}
sl@0
   177
sl@0
   178
# Test that we get the expected number of hits.
sl@0
   179
do_test fts2n-3.1 {
sl@0
   180
  set t {}
sl@0
   181
  db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
sl@0
   182
    set l [llength $o]
sl@0
   183
    lappend t [expr {$l/4}]
sl@0
   184
  }
sl@0
   185
  set t
sl@0
   186
} $ret
sl@0
   187
sl@0
   188
# TODO(shess) It would be useful to test a couple edge cases, but I
sl@0
   189
# don't know if we have the precision to manage it from here at this
sl@0
   190
# time.  Prefix hits can cross leaves, which the code above _should_
sl@0
   191
# hit by virtue of size.  There are two variations on this.  If the
sl@0
   192
# tree is 2 levels high, the code will find the leaf-node extent
sl@0
   193
# directly, but if its higher, the code will have to follow two
sl@0
   194
# separate interior branches down the tree.  Both should be tested.
sl@0
   195
sl@0
   196
finish_test