1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TclScript/fts2token.test Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,174 @@
1.4 +# 2007 June 21
1.5 +#
1.6 +# The author disclaims copyright to this source code. In place of
1.7 +# a legal notice, here is a blessing:
1.8 +#
1.9 +# May you do good and not evil.
1.10 +# May you find forgiveness for yourself and forgive others.
1.11 +# May you share freely, never taking more than you give.
1.12 +#
1.13 +#*************************************************************************
1.14 +# This file implements regression tests for SQLite library. The focus
1.15 +# of this script is testing the pluggable tokeniser feature of the
1.16 +# FTS2 module.
1.17 +#
1.18 +# $Id: fts2token.test,v 1.3 2007/06/25 12:05:40 danielk1977 Exp $
1.19 +#
1.20 +
1.21 +set testdir [file dirname $argv0]
1.22 +source $testdir/tester.tcl
1.23 +
1.24 +# If SQLITE_ENABLE_FTS2 is defined, omit this file.
1.25 +ifcapable !fts2 {
1.26 + finish_test
1.27 + return
1.28 +}
1.29 +
1.30 +proc escape_string {str} {
1.31 + set out ""
1.32 + foreach char [split $str ""] {
1.33 + scan $char %c i
1.34 + if {$i<=127} {
1.35 + append out $char
1.36 + } else {
1.37 + append out [format {\x%.4x} $i]
1.38 + }
1.39 + }
1.40 + set out
1.41 +}
1.42 +
1.43 +#--------------------------------------------------------------------------
1.44 +# Test cases fts2token-1.* are the warm-body test for the SQL scalar
1.45 +# function fts2_tokenizer(). The procedure is as follows:
1.46 +#
1.47 +# 1: Verify that there is no such fts2 tokenizer as 'blah'.
1.48 +#
1.49 +# 2: Query for the built-in tokenizer 'simple'. Insert a copy of the
1.50 +# retrieved value as tokenizer 'blah'.
1.51 +#
1.52 +# 3: Test that the value returned for tokenizer 'blah' is now the
1.53 +# same as that retrieved for 'simple'.
1.54 +#
1.55 +# 4: Test that it is now possible to create an fts2 table using
1.56 +# tokenizer 'blah' (it was not possible in step 1).
1.57 +#
1.58 +# 5: Test that the table created to use tokenizer 'blah' is usable.
1.59 +#
1.60 +do_test fts2token-1.1 {
1.61 + catchsql {
1.62 + CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
1.63 + }
1.64 +} {1 {unknown tokenizer: blah}}
1.65 +do_test fts2token-1.2 {
1.66 + execsql {
1.67 + SELECT fts2_tokenizer('blah', fts2_tokenizer('simple')) IS NULL;
1.68 + }
1.69 +} {0}
1.70 +do_test fts2token-1.3 {
1.71 + execsql {
1.72 + SELECT fts2_tokenizer('blah') == fts2_tokenizer('simple');
1.73 + }
1.74 +} {1}
1.75 +do_test fts2token-1.4 {
1.76 + catchsql {
1.77 + CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
1.78 + }
1.79 +} {0 {}}
1.80 +do_test fts2token-1.5 {
1.81 + execsql {
1.82 + INSERT INTO t1(content) VALUES('There was movement at the station');
1.83 + INSERT INTO t1(content) VALUES('For the word has passed around');
1.84 + INSERT INTO t1(content) VALUES('That the colt from ol regret had got away');
1.85 + SELECT content FROM t1 WHERE content MATCH 'movement'
1.86 + }
1.87 +} {{There was movement at the station}}
1.88 +
1.89 +#--------------------------------------------------------------------------
1.90 +# Test cases fts2token-2.* test error cases in the scalar function based
1.91 +# API for getting and setting tokenizers.
1.92 +#
1.93 +do_test fts2token-2.1 {
1.94 + catchsql {
1.95 + SELECT fts2_tokenizer('nosuchtokenizer');
1.96 + }
1.97 +} {1 {unknown tokenizer: nosuchtokenizer}}
1.98 +
1.99 +#--------------------------------------------------------------------------
1.100 +# Test cases fts2token-3.* test the three built-in tokenizers with a
1.101 +# simple input string via the built-in test function. This is as much
1.102 +# to test the test function as the tokenizer implementations.
1.103 +#
1.104 +do_test fts2token-3.1 {
1.105 + execsql {
1.106 + SELECT fts2_tokenizer_test('simple', 'I don''t see how');
1.107 + }
1.108 +} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
1.109 +do_test fts2token-3.2 {
1.110 + execsql {
1.111 + SELECT fts2_tokenizer_test('porter', 'I don''t see how');
1.112 + }
1.113 +} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
1.114 +ifcapable icu {
1.115 + do_test fts2token-3.3 {
1.116 + execsql {
1.117 + SELECT fts2_tokenizer_test('icu', 'I don''t see how');
1.118 + }
1.119 + } {{0 i I 1 don't don't 2 see see 3 how how}}
1.120 +}
1.121 +
1.122 +#--------------------------------------------------------------------------
1.123 +# Test cases fts2token-4.* test the ICU tokenizer. In practice, this
1.124 +# tokenizer only has two modes - "thai" and "everybody else". Some other
1.125 +# Asian languages (Lao, Khmer etc.) require the same special treatment as
1.126 +# Thai, but ICU doesn't support them yet.
1.127 +#
1.128 +ifcapable icu {
1.129 +
1.130 + proc do_icu_test {name locale input output} {
1.131 + set ::out [db eval { SELECT fts2_tokenizer_test('icu', $locale, $input) }]
1.132 + do_test $name {
1.133 + lindex $::out 0
1.134 + } $output
1.135 + }
1.136 +
1.137 + do_icu_test fts2token-4.1 en_US {} {}
1.138 + do_icu_test fts2token-4.2 en_US {Test cases fts2} [list \
1.139 + 0 test Test 1 cases cases 2 fts2 fts2
1.140 + ]
1.141 +
1.142 + # The following test shows that ICU is smart enough to recognise
1.143 + # Thai chararacters, even when the locale is set to English/United
1.144 + # States.
1.145 + #
1.146 + set input "\u0e2d\u0e30\u0e44\u0e23\u0e19\u0e30\u0e04\u0e23\u0e31\u0e1a"
1.147 + set output "0 \u0e2d\u0e30\u0e44\u0e23 \u0e2d\u0e30\u0e44\u0e23 "
1.148 + append output "1 \u0e19\u0e30 \u0e19\u0e30 "
1.149 + append output "2 \u0e04\u0e23\u0e31\u0e1a \u0e04\u0e23\u0e31\u0e1a"
1.150 +
1.151 + do_icu_test fts2token-4.3 th_TH $input $output
1.152 + do_icu_test fts2token-4.4 en_US $input $output
1.153 +
1.154 + # ICU handles an unknown locale by falling back to the default.
1.155 + # So this is not an error.
1.156 + do_icu_test fts2token-4.5 MiddleOfTheOcean $input $output
1.157 +
1.158 + set longtoken "AReallyReallyLongTokenOneThatWillSurelyRequire"
1.159 + append longtoken "AReallocInTheIcuTokenizerCode"
1.160 +
1.161 + set input "short tokens then "
1.162 + append input $longtoken
1.163 + set output "0 short short "
1.164 + append output "1 tokens tokens "
1.165 + append output "2 then then "
1.166 + append output "3 [string tolower $longtoken] $longtoken"
1.167 +
1.168 + do_icu_test fts2token-4.6 MiddleOfTheOcean $input $output
1.169 + do_icu_test fts2token-4.7 th_TH $input $output
1.170 + do_icu_test fts2token-4.8 en_US $input $output
1.171 +}
1.172 +
1.173 +do_test fts2token-internal {
1.174 + execsql { SELECT fts2_tokenizer_internal_test() }
1.175 +} {ok}
1.176 +
1.177 +finish_test