os/persistentdata/persistentstorage/sqlite3api/SQLite/fts2_tokenizer.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
** 2007 June 22
sl@0
     3
**
sl@0
     4
** The author disclaims copyright to this source code.  In place of
sl@0
     5
** a legal notice, here is a blessing:
sl@0
     6
**
sl@0
     7
**    May you do good and not evil.
sl@0
     8
**    May you find forgiveness for yourself and forgive others.
sl@0
     9
**    May you share freely, never taking more than you give.
sl@0
    10
**
sl@0
    11
******************************************************************************
sl@0
    12
**
sl@0
    13
** This is part of an SQLite module implementing full-text search.
sl@0
    14
** This particular file implements the generic tokenizer interface.
sl@0
    15
*/
sl@0
    16
sl@0
    17
/*
sl@0
    18
** The code in this file is only compiled if:
sl@0
    19
**
sl@0
    20
**     * The FTS2 module is being built as an extension
sl@0
    21
**       (in which case SQLITE_CORE is not defined), or
sl@0
    22
**
sl@0
    23
**     * The FTS2 module is being built into the core of
sl@0
    24
**       SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
sl@0
    25
*/
sl@0
    26
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
sl@0
    27
sl@0
    28
sl@0
    29
#include "sqlite3.h"
sl@0
    30
#include "sqlite3ext.h"
sl@0
    31
SQLITE_EXTENSION_INIT1
sl@0
    32
sl@0
    33
#include "fts2_hash.h"
sl@0
    34
#include "fts2_tokenizer.h"
sl@0
    35
#include <assert.h>
sl@0
    36
sl@0
    37
/*
sl@0
    38
** Implementation of the SQL scalar function for accessing the underlying 
sl@0
    39
** hash table. This function may be called as follows:
sl@0
    40
**
sl@0
    41
**   SELECT <function-name>(<key-name>);
sl@0
    42
**   SELECT <function-name>(<key-name>, <pointer>);
sl@0
    43
**
sl@0
    44
** where <function-name> is the name passed as the second argument
sl@0
    45
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
sl@0
    46
**
sl@0
    47
** If the <pointer> argument is specified, it must be a blob value
sl@0
    48
** containing a pointer to be stored as the hash data corresponding
sl@0
    49
** to the string <key-name>. If <pointer> is not specified, then
sl@0
    50
** the string <key-name> must already exist in the has table. Otherwise,
sl@0
    51
** an error is returned.
sl@0
    52
**
sl@0
    53
** Whether or not the <pointer> argument is specified, the value returned
sl@0
    54
** is a blob containing the pointer stored as the hash data corresponding
sl@0
    55
** to string <key-name> (after the hash-table is updated, if applicable).
sl@0
    56
*/
sl@0
    57
static void scalarFunc(
sl@0
    58
  sqlite3_context *context,
sl@0
    59
  int argc,
sl@0
    60
  sqlite3_value **argv
sl@0
    61
){
sl@0
    62
  fts2Hash *pHash;
sl@0
    63
  void *pPtr = 0;
sl@0
    64
  const unsigned char *zName;
sl@0
    65
  int nName;
sl@0
    66
sl@0
    67
  assert( argc==1 || argc==2 );
sl@0
    68
sl@0
    69
  pHash = (fts2Hash *)sqlite3_user_data(context);
sl@0
    70
sl@0
    71
  zName = sqlite3_value_text(argv[0]);
sl@0
    72
  nName = sqlite3_value_bytes(argv[0])+1;
sl@0
    73
sl@0
    74
  if( argc==2 ){
sl@0
    75
    void *pOld;
sl@0
    76
    int n = sqlite3_value_bytes(argv[1]);
sl@0
    77
    if( n!=sizeof(pPtr) ){
sl@0
    78
      sqlite3_result_error(context, "argument type mismatch", -1);
sl@0
    79
      return;
sl@0
    80
    }
sl@0
    81
    pPtr = *(void **)sqlite3_value_blob(argv[1]);
sl@0
    82
    pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
sl@0
    83
    if( pOld==pPtr ){
sl@0
    84
      sqlite3_result_error(context, "out of memory", -1);
sl@0
    85
      return;
sl@0
    86
    }
sl@0
    87
  }else{
sl@0
    88
    pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
sl@0
    89
    if( !pPtr ){
sl@0
    90
      char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sl@0
    91
      sqlite3_result_error(context, zErr, -1);
sl@0
    92
      sqlite3_free(zErr);
sl@0
    93
      return;
sl@0
    94
    }
sl@0
    95
  }
sl@0
    96
sl@0
    97
  sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
sl@0
    98
}
sl@0
    99
sl@0
   100
#ifdef SQLITE_TEST
sl@0
   101
sl@0
   102
#include "tcl.h"
sl@0
   103
#include <string.h>
sl@0
   104
sl@0
   105
/*
sl@0
   106
** Implementation of a special SQL scalar function for testing tokenizers 
sl@0
   107
** designed to be used in concert with the Tcl testing framework. This
sl@0
   108
** function must be called with two arguments:
sl@0
   109
**
sl@0
   110
**   SELECT <function-name>(<key-name>, <input-string>);
sl@0
   111
**   SELECT <function-name>(<key-name>, <pointer>);
sl@0
   112
**
sl@0
   113
** where <function-name> is the name passed as the second argument
sl@0
   114
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
sl@0
   115
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
sl@0
   116
**
sl@0
   117
** The return value is a string that may be interpreted as a Tcl
sl@0
   118
** list. For each token in the <input-string>, three elements are
sl@0
   119
** added to the returned list. The first is the token position, the 
sl@0
   120
** second is the token text (folded, stemmed, etc.) and the third is the
sl@0
   121
** substring of <input-string> associated with the token. For example, 
sl@0
   122
** using the built-in "simple" tokenizer:
sl@0
   123
**
sl@0
   124
**   SELECT fts_tokenizer_test('simple', 'I don't see how');
sl@0
   125
**
sl@0
   126
** will return the string:
sl@0
   127
**
sl@0
   128
**   "{0 i I 1 dont don't 2 see see 3 how how}"
sl@0
   129
**   
sl@0
   130
*/
sl@0
   131
static void testFunc(
sl@0
   132
  sqlite3_context *context,
sl@0
   133
  int argc,
sl@0
   134
  sqlite3_value **argv
sl@0
   135
){
sl@0
   136
  fts2Hash *pHash;
sl@0
   137
  sqlite3_tokenizer_module *p;
sl@0
   138
  sqlite3_tokenizer *pTokenizer = 0;
sl@0
   139
  sqlite3_tokenizer_cursor *pCsr = 0;
sl@0
   140
sl@0
   141
  const char *zErr = 0;
sl@0
   142
sl@0
   143
  const char *zName;
sl@0
   144
  int nName;
sl@0
   145
  const char *zInput;
sl@0
   146
  int nInput;
sl@0
   147
sl@0
   148
  const char *zArg = 0;
sl@0
   149
sl@0
   150
  const char *zToken;
sl@0
   151
  int nToken;
sl@0
   152
  int iStart;
sl@0
   153
  int iEnd;
sl@0
   154
  int iPos;
sl@0
   155
sl@0
   156
  Tcl_Obj *pRet;
sl@0
   157
sl@0
   158
  assert( argc==2 || argc==3 );
sl@0
   159
sl@0
   160
  nName = sqlite3_value_bytes(argv[0]);
sl@0
   161
  zName = (const char *)sqlite3_value_text(argv[0]);
sl@0
   162
  nInput = sqlite3_value_bytes(argv[argc-1]);
sl@0
   163
  zInput = (const char *)sqlite3_value_text(argv[argc-1]);
sl@0
   164
sl@0
   165
  if( argc==3 ){
sl@0
   166
    zArg = (const char *)sqlite3_value_text(argv[1]);
sl@0
   167
  }
sl@0
   168
sl@0
   169
  pHash = (fts2Hash *)sqlite3_user_data(context);
sl@0
   170
  p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
sl@0
   171
sl@0
   172
  if( !p ){
sl@0
   173
    char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sl@0
   174
    sqlite3_result_error(context, zErr, -1);
sl@0
   175
    sqlite3_free(zErr);
sl@0
   176
    return;
sl@0
   177
  }
sl@0
   178
sl@0
   179
  pRet = Tcl_NewObj();
sl@0
   180
  Tcl_IncrRefCount(pRet);
sl@0
   181
sl@0
   182
  if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
sl@0
   183
    zErr = "error in xCreate()";
sl@0
   184
    goto finish;
sl@0
   185
  }
sl@0
   186
  pTokenizer->pModule = p;
sl@0
   187
  if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
sl@0
   188
    zErr = "error in xOpen()";
sl@0
   189
    goto finish;
sl@0
   190
  }
sl@0
   191
  pCsr->pTokenizer = pTokenizer;
sl@0
   192
sl@0
   193
  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
sl@0
   194
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
sl@0
   195
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
sl@0
   196
    zToken = &zInput[iStart];
sl@0
   197
    nToken = iEnd-iStart;
sl@0
   198
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
sl@0
   199
  }
sl@0
   200
sl@0
   201
  if( SQLITE_OK!=p->xClose(pCsr) ){
sl@0
   202
    zErr = "error in xClose()";
sl@0
   203
    goto finish;
sl@0
   204
  }
sl@0
   205
  if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
sl@0
   206
    zErr = "error in xDestroy()";
sl@0
   207
    goto finish;
sl@0
   208
  }
sl@0
   209
sl@0
   210
finish:
sl@0
   211
  if( zErr ){
sl@0
   212
    sqlite3_result_error(context, zErr, -1);
sl@0
   213
  }else{
sl@0
   214
    sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
sl@0
   215
  }
sl@0
   216
  Tcl_DecrRefCount(pRet);
sl@0
   217
}
sl@0
   218
sl@0
   219
static
sl@0
   220
int registerTokenizer(
sl@0
   221
  sqlite3 *db, 
sl@0
   222
  char *zName, 
sl@0
   223
  const sqlite3_tokenizer_module *p
sl@0
   224
){
sl@0
   225
  int rc;
sl@0
   226
  sqlite3_stmt *pStmt;
sl@0
   227
  const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
sl@0
   228
sl@0
   229
  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
sl@0
   230
  if( rc!=SQLITE_OK ){
sl@0
   231
    return rc;
sl@0
   232
  }
sl@0
   233
sl@0
   234
  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sl@0
   235
  sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sl@0
   236
  sqlite3_step(pStmt);
sl@0
   237
sl@0
   238
  return sqlite3_finalize(pStmt);
sl@0
   239
}
sl@0
   240
sl@0
   241
static
sl@0
   242
int queryTokenizer(
sl@0
   243
  sqlite3 *db, 
sl@0
   244
  char *zName,  
sl@0
   245
  const sqlite3_tokenizer_module **pp
sl@0
   246
){
sl@0
   247
  int rc;
sl@0
   248
  sqlite3_stmt *pStmt;
sl@0
   249
  const char zSql[] = "SELECT fts2_tokenizer(?)";
sl@0
   250
sl@0
   251
  *pp = 0;
sl@0
   252
  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
sl@0
   253
  if( rc!=SQLITE_OK ){
sl@0
   254
    return rc;
sl@0
   255
  }
sl@0
   256
sl@0
   257
  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sl@0
   258
  if( SQLITE_ROW==sqlite3_step(pStmt) ){
sl@0
   259
    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
sl@0
   260
      memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
sl@0
   261
    }
sl@0
   262
  }
sl@0
   263
sl@0
   264
  return sqlite3_finalize(pStmt);
sl@0
   265
}
sl@0
   266
sl@0
   267
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
sl@0
   268
sl@0
   269
/*
sl@0
   270
** Implementation of the scalar function fts2_tokenizer_internal_test().
sl@0
   271
** This function is used for testing only, it is not included in the
sl@0
   272
** build unless SQLITE_TEST is defined.
sl@0
   273
**
sl@0
   274
** The purpose of this is to test that the fts2_tokenizer() function
sl@0
   275
** can be used as designed by the C-code in the queryTokenizer and
sl@0
   276
** registerTokenizer() functions above. These two functions are repeated
sl@0
   277
** in the README.tokenizer file as an example, so it is important to
sl@0
   278
** test them.
sl@0
   279
**
sl@0
   280
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
sl@0
   281
** function with no arguments. An assert() will fail if a problem is
sl@0
   282
** detected. i.e.:
sl@0
   283
**
sl@0
   284
**     SELECT fts2_tokenizer_internal_test();
sl@0
   285
**
sl@0
   286
*/
sl@0
   287
static void intTestFunc(
sl@0
   288
  sqlite3_context *context,
sl@0
   289
  int argc,
sl@0
   290
  sqlite3_value **argv
sl@0
   291
){
sl@0
   292
  int rc;
sl@0
   293
  const sqlite3_tokenizer_module *p1;
sl@0
   294
  const sqlite3_tokenizer_module *p2;
sl@0
   295
  sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
sl@0
   296
sl@0
   297
  /* Test the query function */
sl@0
   298
  sqlite3Fts2SimpleTokenizerModule(&p1);
sl@0
   299
  rc = queryTokenizer(db, "simple", &p2);
sl@0
   300
  assert( rc==SQLITE_OK );
sl@0
   301
  assert( p1==p2 );
sl@0
   302
  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
sl@0
   303
  assert( rc==SQLITE_ERROR );
sl@0
   304
  assert( p2==0 );
sl@0
   305
  assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
sl@0
   306
sl@0
   307
  /* Test the storage function */
sl@0
   308
  rc = registerTokenizer(db, "nosuchtokenizer", p1);
sl@0
   309
  assert( rc==SQLITE_OK );
sl@0
   310
  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
sl@0
   311
  assert( rc==SQLITE_OK );
sl@0
   312
  assert( p2==p1 );
sl@0
   313
sl@0
   314
  sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
sl@0
   315
}
sl@0
   316
sl@0
   317
#endif
sl@0
   318
sl@0
   319
/*
sl@0
   320
** Set up SQL objects in database db used to access the contents of
sl@0
   321
** the hash table pointed to by argument pHash. The hash table must
sl@0
   322
** been initialised to use string keys, and to take a private copy 
sl@0
   323
** of the key when a value is inserted. i.e. by a call similar to:
sl@0
   324
**
sl@0
   325
**    sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
sl@0
   326
**
sl@0
   327
** This function adds a scalar function (see header comment above
sl@0
   328
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
sl@0
   329
** defined at compilation time, a temporary virtual table (see header 
sl@0
   330
** comment above struct HashTableVtab) to the database schema. Both 
sl@0
   331
** provide read/write access to the contents of *pHash.
sl@0
   332
**
sl@0
   333
** The third argument to this function, zName, is used as the name
sl@0
   334
** of both the scalar and, if created, the virtual table.
sl@0
   335
*/
sl@0
   336
int sqlite3Fts2InitHashTable(
sl@0
   337
  sqlite3 *db, 
sl@0
   338
  fts2Hash *pHash, 
sl@0
   339
  const char *zName
sl@0
   340
){
sl@0
   341
  int rc = SQLITE_OK;
sl@0
   342
  void *p = (void *)pHash;
sl@0
   343
  const int any = SQLITE_ANY;
sl@0
   344
  char *zTest = 0;
sl@0
   345
  char *zTest2 = 0;
sl@0
   346
sl@0
   347
#ifdef SQLITE_TEST
sl@0
   348
  void *pdb = (void *)db;
sl@0
   349
  zTest = sqlite3_mprintf("%s_test", zName);
sl@0
   350
  zTest2 = sqlite3_mprintf("%s_internal_test", zName);
sl@0
   351
  if( !zTest || !zTest2 ){
sl@0
   352
    rc = SQLITE_NOMEM;
sl@0
   353
  }
sl@0
   354
#endif
sl@0
   355
sl@0
   356
  if( rc!=SQLITE_OK
sl@0
   357
   || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
sl@0
   358
   || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
sl@0
   359
#ifdef SQLITE_TEST
sl@0
   360
   || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
sl@0
   361
   || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
sl@0
   362
   || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
sl@0
   363
#endif
sl@0
   364
  );
sl@0
   365
sl@0
   366
  sqlite3_free(zTest);
sl@0
   367
  sqlite3_free(zTest2);
sl@0
   368
  return rc;
sl@0
   369
}
sl@0
   370
sl@0
   371
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */