sl@0: # 2007 March 28 sl@0: # sl@0: # The author disclaims copyright to this source code. sl@0: # sl@0: #************************************************************************* sl@0: # This file implements regression tests for SQLite library. The focus sl@0: # of this script is testing isspace/isalnum/tolower problems with the sl@0: # FTS2 module. Unfortunately, this code isn't a really principled set sl@0: # of tests, because it is impossible to know where new uses of these sl@0: # functions might appear. sl@0: # sl@0: # $Id: fts2l.test,v 1.2 2007/12/13 21:54:11 drh Exp $ sl@0: # sl@0: sl@0: set testdir [file dirname $argv0] sl@0: source $testdir/tester.tcl sl@0: sl@0: # If SQLITE_ENABLE_FTS2 is defined, omit this file. sl@0: ifcapable !fts2 { sl@0: finish_test sl@0: return sl@0: } sl@0: sl@0: # Tests that startsWith() (calls isspace, tolower, isalnum) can handle sl@0: # hi-bit chars. parseSpec() also calls isalnum here. sl@0: do_test fts2l-1.1 { sl@0: execsql "CREATE VIRTUAL TABLE t1 USING fts2(content, \x80)" sl@0: } {} sl@0: sl@0: # Additionally tests isspace() call in getToken(), and isalnum() call sl@0: # in tokenListToIdList(). sl@0: do_test fts2l-1.2 { sl@0: catch { sl@0: execsql "CREATE VIRTUAL TABLE t2 USING fts2(content, tokenize \x80)" sl@0: } sl@0: sqlite3_errmsg $DB sl@0: } "unknown tokenizer: \x80" sl@0: sl@0: # Additionally test final isalnum() in startsWith(). sl@0: do_test fts2l-1.3 { sl@0: execsql "CREATE VIRTUAL TABLE t3 USING fts2(content, tokenize\x80)" sl@0: } {} sl@0: sl@0: # The snippet-generation code has calls to isspace() which are sort of sl@0: # hard to get to. It finds convenient breakpoints by starting ~40 sl@0: # chars before and after the matched term, and scanning ~10 chars sl@0: # around that position for isspace() characters. The long word with sl@0: # embedded hi-bit chars causes one of these isspace() calls to be sl@0: # exercised. The version with a couple extra spaces should cause the sl@0: # other isspace() call to be exercised. [Both cases have been tested sl@0: # in the debugger, but I'm hoping to continue to catch it if simple sl@0: # constant changes change things slightly. sl@0: # sl@0: # The trailing and leading hi-bit chars help with code which tests for sl@0: # isspace() to coalesce multiple spaces. sl@0: sl@0: set word "\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80" sl@0: set phrase1 "$word $word $word target $word $word $word" sl@0: set phrase2 "$word $word $word target $word $word $word" sl@0: sl@0: db eval {CREATE VIRTUAL TABLE t4 USING fts2(content)} sl@0: db eval "INSERT INTO t4 (content) VALUES ('$phrase1')" sl@0: db eval "INSERT INTO t4 (content) VALUES ('$phrase2')" sl@0: sl@0: do_test fts2l-1.4 { sl@0: execsql {SELECT rowid, length(snippet(t4)) FROM t4 WHERE t4 MATCH 'target'} sl@0: } {1 111 2 117} sl@0: sl@0: finish_test