os/persistentdata/persistentstorage/sqlite3api/TEST/TclScript/fts2l.test
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
# 2007 March 28
sl@0
     2
#
sl@0
     3
# The author disclaims copyright to this source code.
sl@0
     4
#
sl@0
     5
#*************************************************************************
sl@0
     6
# This file implements regression tests for SQLite library.  The focus
sl@0
     7
# of this script is testing isspace/isalnum/tolower problems with the
sl@0
     8
# FTS2 module.  Unfortunately, this code isn't a really principled set
sl@0
     9
# of tests, because it is impossible to know where new uses of these
sl@0
    10
# functions might appear.
sl@0
    11
#
sl@0
    12
# $Id: fts2l.test,v 1.2 2007/12/13 21:54:11 drh Exp $
sl@0
    13
#
sl@0
    14
sl@0
    15
set testdir [file dirname $argv0]
sl@0
    16
source $testdir/tester.tcl
sl@0
    17
sl@0
    18
# If SQLITE_ENABLE_FTS2 is defined, omit this file.
sl@0
    19
ifcapable !fts2 {
sl@0
    20
  finish_test
sl@0
    21
  return
sl@0
    22
}
sl@0
    23
sl@0
    24
# Tests that startsWith() (calls isspace, tolower, isalnum) can handle
sl@0
    25
# hi-bit chars.  parseSpec() also calls isalnum here.
sl@0
    26
do_test fts2l-1.1 {
sl@0
    27
  execsql "CREATE VIRTUAL TABLE t1 USING fts2(content, \x80)"
sl@0
    28
} {}
sl@0
    29
sl@0
    30
# Additionally tests isspace() call in getToken(), and isalnum() call
sl@0
    31
# in tokenListToIdList().
sl@0
    32
do_test fts2l-1.2 {
sl@0
    33
  catch {
sl@0
    34
    execsql "CREATE VIRTUAL TABLE t2 USING fts2(content, tokenize \x80)"
sl@0
    35
  }
sl@0
    36
  sqlite3_errmsg $DB
sl@0
    37
} "unknown tokenizer: \x80"
sl@0
    38
sl@0
    39
# Additionally test final isalnum() in startsWith().
sl@0
    40
do_test fts2l-1.3 {
sl@0
    41
  execsql "CREATE VIRTUAL TABLE t3 USING fts2(content, tokenize\x80)"
sl@0
    42
} {}
sl@0
    43
sl@0
    44
# The snippet-generation code has calls to isspace() which are sort of
sl@0
    45
# hard to get to.  It finds convenient breakpoints by starting ~40
sl@0
    46
# chars before and after the matched term, and scanning ~10 chars
sl@0
    47
# around that position for isspace() characters.  The long word with
sl@0
    48
# embedded hi-bit chars causes one of these isspace() calls to be
sl@0
    49
# exercised.  The version with a couple extra spaces should cause the
sl@0
    50
# other isspace() call to be exercised.  [Both cases have been tested
sl@0
    51
# in the debugger, but I'm hoping to continue to catch it if simple
sl@0
    52
# constant changes change things slightly.
sl@0
    53
#
sl@0
    54
# The trailing and leading hi-bit chars help with code which tests for
sl@0
    55
# isspace() to coalesce multiple spaces.
sl@0
    56
sl@0
    57
set word "\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80"
sl@0
    58
set phrase1 "$word $word $word target $word $word $word"
sl@0
    59
set phrase2 "$word $word $word    target    $word $word $word"
sl@0
    60
sl@0
    61
db eval {CREATE VIRTUAL TABLE t4 USING fts2(content)}
sl@0
    62
db eval "INSERT INTO t4 (content) VALUES ('$phrase1')"
sl@0
    63
db eval "INSERT INTO t4 (content) VALUES ('$phrase2')"
sl@0
    64
sl@0
    65
do_test fts2l-1.4 {
sl@0
    66
  execsql {SELECT rowid, length(snippet(t4)) FROM t4 WHERE t4 MATCH 'target'}
sl@0
    67
} {1 111 2 117}
sl@0
    68
sl@0
    69
finish_test