1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/SQLite/fts1.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,3345 @@
1.4 +/* fts1 has a design flaw which can lead to database corruption (see
1.5 +** below). It is recommended not to use it any longer, instead use
1.6 +** fts3 (or higher). If you believe that your use of fts1 is safe,
1.7 +** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
1.8 +*/
1.9 +#if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \
1.10 + && !defined(SQLITE_ENABLE_BROKEN_FTS1)
1.11 +#error fts1 has a design flaw and has been deprecated.
1.12 +#endif
1.13 +/* The flaw is that fts1 uses the content table's unaliased rowid as
1.14 +** the unique docid. fts1 embeds the rowid in the index it builds,
1.15 +** and expects the rowid to not change. The SQLite VACUUM operation
1.16 +** will renumber such rowids, thereby breaking fts1. If you are using
1.17 +** fts1 in a system which has disabled VACUUM, then you can continue
1.18 +** to use it safely. Note that PRAGMA auto_vacuum does NOT disable
1.19 +** VACUUM, though systems using auto_vacuum are unlikely to invoke
1.20 +** VACUUM.
1.21 +**
1.22 +** fts1 should be safe even across VACUUM if you only insert documents
1.23 +** and never delete.
1.24 +*/
1.25 +
1.26 +/* The author disclaims copyright to this source code.
1.27 + *
1.28 + * This is an SQLite module implementing full-text search.
1.29 + */
1.30 +
1.31 +/*
1.32 +** The code in this file is only compiled if:
1.33 +**
1.34 +** * The FTS1 module is being built as an extension
1.35 +** (in which case SQLITE_CORE is not defined), or
1.36 +**
1.37 +** * The FTS1 module is being built into the core of
1.38 +** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
1.39 +*/
1.40 +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
1.41 +
1.42 +#if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
1.43 +# define SQLITE_CORE 1
1.44 +#endif
1.45 +
1.46 +#include <assert.h>
1.47 +#include <stdlib.h>
1.48 +#include <stdio.h>
1.49 +#include <string.h>
1.50 +#include <ctype.h>
1.51 +
1.52 +#include "fts1.h"
1.53 +#include "fts1_hash.h"
1.54 +#include "fts1_tokenizer.h"
1.55 +#include "sqlite3.h"
1.56 +#include "sqlite3ext.h"
1.57 +SQLITE_EXTENSION_INIT1
1.58 +
1.59 +
1.60 +#if 0
1.61 +# define TRACE(A) printf A; fflush(stdout)
1.62 +#else
1.63 +# define TRACE(A)
1.64 +#endif
1.65 +
1.66 +/* utility functions */
1.67 +
1.68 +typedef struct StringBuffer {
1.69 + int len; /* length, not including null terminator */
1.70 + int alloced; /* Space allocated for s[] */
1.71 + char *s; /* Content of the string */
1.72 +} StringBuffer;
1.73 +
1.74 +static void initStringBuffer(StringBuffer *sb){
1.75 + sb->len = 0;
1.76 + sb->alloced = 100;
1.77 + sb->s = malloc(100);
1.78 + sb->s[0] = '\0';
1.79 +}
1.80 +
1.81 +static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
1.82 + if( sb->len + nFrom >= sb->alloced ){
1.83 + sb->alloced = sb->len + nFrom + 100;
1.84 + sb->s = realloc(sb->s, sb->alloced+1);
1.85 + if( sb->s==0 ){
1.86 + initStringBuffer(sb);
1.87 + return;
1.88 + }
1.89 + }
1.90 + memcpy(sb->s + sb->len, zFrom, nFrom);
1.91 + sb->len += nFrom;
1.92 + sb->s[sb->len] = 0;
1.93 +}
1.94 +static void append(StringBuffer *sb, const char *zFrom){
1.95 + nappend(sb, zFrom, strlen(zFrom));
1.96 +}
1.97 +
1.98 +/* We encode variable-length integers in little-endian order using seven bits
1.99 + * per byte as follows:
1.100 +**
1.101 +** KEY:
1.102 +** A = 0xxxxxxx 7 bits of data and one flag bit
1.103 +** B = 1xxxxxxx 7 bits of data and one flag bit
1.104 +**
1.105 +** 7 bits - A
1.106 +** 14 bits - BA
1.107 +** 21 bits - BBA
1.108 +** and so on.
1.109 +*/
1.110 +
1.111 +/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
1.112 +#define VARINT_MAX 10
1.113 +
1.114 +/* Write a 64-bit variable-length integer to memory starting at p[0].
1.115 + * The length of data written will be between 1 and VARINT_MAX bytes.
1.116 + * The number of bytes written is returned. */
1.117 +static int putVarint(char *p, sqlite_int64 v){
1.118 + unsigned char *q = (unsigned char *) p;
1.119 + sqlite_uint64 vu = v;
1.120 + do{
1.121 + *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
1.122 + vu >>= 7;
1.123 + }while( vu!=0 );
1.124 + q[-1] &= 0x7f; /* turn off high bit in final byte */
1.125 + assert( q - (unsigned char *)p <= VARINT_MAX );
1.126 + return (int) (q - (unsigned char *)p);
1.127 +}
1.128 +
1.129 +/* Read a 64-bit variable-length integer from memory starting at p[0].
1.130 + * Return the number of bytes read, or 0 on error.
1.131 + * The value is stored in *v. */
1.132 +static int getVarint(const char *p, sqlite_int64 *v){
1.133 + const unsigned char *q = (const unsigned char *) p;
1.134 + sqlite_uint64 x = 0, y = 1;
1.135 + while( (*q & 0x80) == 0x80 ){
1.136 + x += y * (*q++ & 0x7f);
1.137 + y <<= 7;
1.138 + if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */
1.139 + assert( 0 );
1.140 + return 0;
1.141 + }
1.142 + }
1.143 + x += y * (*q++);
1.144 + *v = (sqlite_int64) x;
1.145 + return (int) (q - (unsigned char *)p);
1.146 +}
1.147 +
1.148 +static int getVarint32(const char *p, int *pi){
1.149 + sqlite_int64 i;
1.150 + int ret = getVarint(p, &i);
1.151 + *pi = (int) i;
1.152 + assert( *pi==i );
1.153 + return ret;
1.154 +}
1.155 +
1.156 +/*** Document lists ***
1.157 + *
1.158 + * A document list holds a sorted list of varint-encoded document IDs.
1.159 + *
1.160 + * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
1.161 + *
1.162 + * array {
1.163 + * varint docid;
1.164 + * array {
1.165 + * varint position; (delta from previous position plus POS_BASE)
1.166 + * varint startOffset; (delta from previous startOffset)
1.167 + * varint endOffset; (delta from startOffset)
1.168 + * }
1.169 + * }
1.170 + *
1.171 + * Here, array { X } means zero or more occurrences of X, adjacent in memory.
1.172 + *
1.173 + * A position list may hold positions for text in multiple columns. A position
1.174 + * POS_COLUMN is followed by a varint containing the index of the column for
1.175 + * following positions in the list. Any positions appearing before any
1.176 + * occurrences of POS_COLUMN are for column 0.
1.177 + *
1.178 + * A doclist with type DL_POSITIONS is like the above, but holds only docids
1.179 + * and positions without offset information.
1.180 + *
1.181 + * A doclist with type DL_DOCIDS is like the above, but holds only docids
1.182 + * without positions or offset information.
1.183 + *
1.184 + * On disk, every document list has positions and offsets, so we don't bother
1.185 + * to serialize a doclist's type.
1.186 + *
1.187 + * We don't yet delta-encode document IDs; doing so will probably be a
1.188 + * modest win.
1.189 + *
1.190 + * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
1.191 + * After the first offset, estimate the next offset by using the
1.192 + * current token position and the previous token position and offset,
1.193 + * offset to handle some variance. So the estimate would be
1.194 + * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
1.195 + * as normal. Offsets more than 64 chars from the estimate are
1.196 + * encoded as the delta to the previous start offset + 128. An
1.197 + * additional tiny increment can be gained by using the end offset of
1.198 + * the previous token to make the estimate a tiny bit more precise.
1.199 +*/
1.200 +
1.201 +/* It is not safe to call isspace(), tolower(), or isalnum() on
1.202 +** hi-bit-set characters. This is the same solution used in the
1.203 +** tokenizer.
1.204 +*/
1.205 +/* TODO(shess) The snippet-generation code should be using the
1.206 +** tokenizer-generated tokens rather than doing its own local
1.207 +** tokenization.
1.208 +*/
1.209 +/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
1.210 +static int safe_isspace(char c){
1.211 + return (c&0x80)==0 ? isspace(c) : 0;
1.212 +}
1.213 +static int safe_tolower(char c){
1.214 + return (c&0x80)==0 ? tolower(c) : c;
1.215 +}
1.216 +static int safe_isalnum(char c){
1.217 + return (c&0x80)==0 ? isalnum(c) : 0;
1.218 +}
1.219 +
1.220 +typedef enum DocListType {
1.221 + DL_DOCIDS, /* docids only */
1.222 + DL_POSITIONS, /* docids + positions */
1.223 + DL_POSITIONS_OFFSETS /* docids + positions + offsets */
1.224 +} DocListType;
1.225 +
1.226 +/*
1.227 +** By default, only positions and not offsets are stored in the doclists.
1.228 +** To change this so that offsets are stored too, compile with
1.229 +**
1.230 +** -DDL_DEFAULT=DL_POSITIONS_OFFSETS
1.231 +**
1.232 +*/
1.233 +#ifndef DL_DEFAULT
1.234 +# define DL_DEFAULT DL_POSITIONS
1.235 +#endif
1.236 +
1.237 +typedef struct DocList {
1.238 + char *pData;
1.239 + int nData;
1.240 + DocListType iType;
1.241 + int iLastColumn; /* the last column written */
1.242 + int iLastPos; /* the last position written */
1.243 + int iLastOffset; /* the last start offset written */
1.244 +} DocList;
1.245 +
1.246 +enum {
1.247 + POS_END = 0, /* end of this position list */
1.248 + POS_COLUMN, /* followed by new column number */
1.249 + POS_BASE
1.250 +};
1.251 +
1.252 +/* Initialize a new DocList to hold the given data. */
1.253 +static void docListInit(DocList *d, DocListType iType,
1.254 + const char *pData, int nData){
1.255 + d->nData = nData;
1.256 + if( nData>0 ){
1.257 + d->pData = malloc(nData);
1.258 + memcpy(d->pData, pData, nData);
1.259 + } else {
1.260 + d->pData = NULL;
1.261 + }
1.262 + d->iType = iType;
1.263 + d->iLastColumn = 0;
1.264 + d->iLastPos = d->iLastOffset = 0;
1.265 +}
1.266 +
1.267 +/* Create a new dynamically-allocated DocList. */
1.268 +static DocList *docListNew(DocListType iType){
1.269 + DocList *d = (DocList *) malloc(sizeof(DocList));
1.270 + docListInit(d, iType, 0, 0);
1.271 + return d;
1.272 +}
1.273 +
1.274 +static void docListDestroy(DocList *d){
1.275 + free(d->pData);
1.276 +#ifndef NDEBUG
1.277 + memset(d, 0x55, sizeof(*d));
1.278 +#endif
1.279 +}
1.280 +
1.281 +static void docListDelete(DocList *d){
1.282 + docListDestroy(d);
1.283 + free(d);
1.284 +}
1.285 +
1.286 +static char *docListEnd(DocList *d){
1.287 + return d->pData + d->nData;
1.288 +}
1.289 +
1.290 +/* Append a varint to a DocList's data. */
1.291 +static void appendVarint(DocList *d, sqlite_int64 i){
1.292 + char c[VARINT_MAX];
1.293 + int n = putVarint(c, i);
1.294 + d->pData = realloc(d->pData, d->nData + n);
1.295 + memcpy(d->pData + d->nData, c, n);
1.296 + d->nData += n;
1.297 +}
1.298 +
1.299 +static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
1.300 + appendVarint(d, iDocid);
1.301 + if( d->iType>=DL_POSITIONS ){
1.302 + appendVarint(d, POS_END); /* initially empty position list */
1.303 + d->iLastColumn = 0;
1.304 + d->iLastPos = d->iLastOffset = 0;
1.305 + }
1.306 +}
1.307 +
1.308 +/* helper function for docListAddPos and docListAddPosOffset */
1.309 +static void addPos(DocList *d, int iColumn, int iPos){
1.310 + assert( d->nData>0 );
1.311 + --d->nData; /* remove previous terminator */
1.312 + if( iColumn!=d->iLastColumn ){
1.313 + assert( iColumn>d->iLastColumn );
1.314 + appendVarint(d, POS_COLUMN);
1.315 + appendVarint(d, iColumn);
1.316 + d->iLastColumn = iColumn;
1.317 + d->iLastPos = d->iLastOffset = 0;
1.318 + }
1.319 + assert( iPos>=d->iLastPos );
1.320 + appendVarint(d, iPos-d->iLastPos+POS_BASE);
1.321 + d->iLastPos = iPos;
1.322 +}
1.323 +
1.324 +/* Add a position to the last position list in a doclist. */
1.325 +static void docListAddPos(DocList *d, int iColumn, int iPos){
1.326 + assert( d->iType==DL_POSITIONS );
1.327 + addPos(d, iColumn, iPos);
1.328 + appendVarint(d, POS_END); /* add new terminator */
1.329 +}
1.330 +
1.331 +/*
1.332 +** Add a position and starting and ending offsets to a doclist.
1.333 +**
1.334 +** If the doclist is setup to handle only positions, then insert
1.335 +** the position only and ignore the offsets.
1.336 +*/
1.337 +static void docListAddPosOffset(
1.338 + DocList *d, /* Doclist under construction */
1.339 + int iColumn, /* Column the inserted term is part of */
1.340 + int iPos, /* Position of the inserted term */
1.341 + int iStartOffset, /* Starting offset of inserted term */
1.342 + int iEndOffset /* Ending offset of inserted term */
1.343 +){
1.344 + assert( d->iType>=DL_POSITIONS );
1.345 + addPos(d, iColumn, iPos);
1.346 + if( d->iType==DL_POSITIONS_OFFSETS ){
1.347 + assert( iStartOffset>=d->iLastOffset );
1.348 + appendVarint(d, iStartOffset-d->iLastOffset);
1.349 + d->iLastOffset = iStartOffset;
1.350 + assert( iEndOffset>=iStartOffset );
1.351 + appendVarint(d, iEndOffset-iStartOffset);
1.352 + }
1.353 + appendVarint(d, POS_END); /* add new terminator */
1.354 +}
1.355 +
1.356 +/*
1.357 +** A DocListReader object is a cursor into a doclist. Initialize
1.358 +** the cursor to the beginning of the doclist by calling readerInit().
1.359 +** Then use routines
1.360 +**
1.361 +** peekDocid()
1.362 +** readDocid()
1.363 +** readPosition()
1.364 +** skipPositionList()
1.365 +** and so forth...
1.366 +**
1.367 +** to read information out of the doclist. When we reach the end
1.368 +** of the doclist, atEnd() returns TRUE.
1.369 +*/
1.370 +typedef struct DocListReader {
1.371 + DocList *pDoclist; /* The document list we are stepping through */
1.372 + char *p; /* Pointer to next unread byte in the doclist */
1.373 + int iLastColumn;
1.374 + int iLastPos; /* the last position read, or -1 when not in a position list */
1.375 +} DocListReader;
1.376 +
1.377 +/*
1.378 +** Initialize the DocListReader r to point to the beginning of pDoclist.
1.379 +*/
1.380 +static void readerInit(DocListReader *r, DocList *pDoclist){
1.381 + r->pDoclist = pDoclist;
1.382 + if( pDoclist!=NULL ){
1.383 + r->p = pDoclist->pData;
1.384 + }
1.385 + r->iLastColumn = -1;
1.386 + r->iLastPos = -1;
1.387 +}
1.388 +
1.389 +/*
1.390 +** Return TRUE if we have reached then end of pReader and there is
1.391 +** nothing else left to read.
1.392 +*/
1.393 +static int atEnd(DocListReader *pReader){
1.394 + return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
1.395 +}
1.396 +
1.397 +/* Peek at the next docid without advancing the read pointer.
1.398 +*/
1.399 +static sqlite_int64 peekDocid(DocListReader *pReader){
1.400 + sqlite_int64 ret;
1.401 + assert( !atEnd(pReader) );
1.402 + assert( pReader->iLastPos==-1 );
1.403 + getVarint(pReader->p, &ret);
1.404 + return ret;
1.405 +}
1.406 +
1.407 +/* Read the next docid. See also nextDocid().
1.408 +*/
1.409 +static sqlite_int64 readDocid(DocListReader *pReader){
1.410 + sqlite_int64 ret;
1.411 + assert( !atEnd(pReader) );
1.412 + assert( pReader->iLastPos==-1 );
1.413 + pReader->p += getVarint(pReader->p, &ret);
1.414 + if( pReader->pDoclist->iType>=DL_POSITIONS ){
1.415 + pReader->iLastColumn = 0;
1.416 + pReader->iLastPos = 0;
1.417 + }
1.418 + return ret;
1.419 +}
1.420 +
1.421 +/* Read the next position and column index from a position list.
1.422 + * Returns the position, or -1 at the end of the list. */
1.423 +static int readPosition(DocListReader *pReader, int *iColumn){
1.424 + int i;
1.425 + int iType = pReader->pDoclist->iType;
1.426 +
1.427 + if( pReader->iLastPos==-1 ){
1.428 + return -1;
1.429 + }
1.430 + assert( !atEnd(pReader) );
1.431 +
1.432 + if( iType<DL_POSITIONS ){
1.433 + return -1;
1.434 + }
1.435 + pReader->p += getVarint32(pReader->p, &i);
1.436 + if( i==POS_END ){
1.437 + pReader->iLastColumn = pReader->iLastPos = -1;
1.438 + *iColumn = -1;
1.439 + return -1;
1.440 + }
1.441 + if( i==POS_COLUMN ){
1.442 + pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
1.443 + pReader->iLastPos = 0;
1.444 + pReader->p += getVarint32(pReader->p, &i);
1.445 + assert( i>=POS_BASE );
1.446 + }
1.447 + pReader->iLastPos += ((int) i)-POS_BASE;
1.448 + if( iType>=DL_POSITIONS_OFFSETS ){
1.449 + /* Skip over offsets, ignoring them for now. */
1.450 + int iStart, iEnd;
1.451 + pReader->p += getVarint32(pReader->p, &iStart);
1.452 + pReader->p += getVarint32(pReader->p, &iEnd);
1.453 + }
1.454 + *iColumn = pReader->iLastColumn;
1.455 + return pReader->iLastPos;
1.456 +}
1.457 +
1.458 +/* Skip past the end of a position list. */
1.459 +static void skipPositionList(DocListReader *pReader){
1.460 + DocList *p = pReader->pDoclist;
1.461 + if( p && p->iType>=DL_POSITIONS ){
1.462 + int iColumn;
1.463 + while( readPosition(pReader, &iColumn)!=-1 ){}
1.464 + }
1.465 +}
1.466 +
1.467 +/* Skip over a docid, including its position list if the doclist has
1.468 + * positions. */
1.469 +static void skipDocument(DocListReader *pReader){
1.470 + readDocid(pReader);
1.471 + skipPositionList(pReader);
1.472 +}
1.473 +
1.474 +/* Skip past all docids which are less than [iDocid]. Returns 1 if a docid
1.475 + * matching [iDocid] was found. */
1.476 +static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
1.477 + sqlite_int64 d = 0;
1.478 + while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
1.479 + skipDocument(pReader);
1.480 + }
1.481 + return !atEnd(pReader) && d==iDocid;
1.482 +}
1.483 +
1.484 +/* Return the first document in a document list.
1.485 +*/
1.486 +static sqlite_int64 firstDocid(DocList *d){
1.487 + DocListReader r;
1.488 + readerInit(&r, d);
1.489 + return readDocid(&r);
1.490 +}
1.491 +
1.492 +#ifdef SQLITE_DEBUG
1.493 +/*
1.494 +** This routine is used for debugging purpose only.
1.495 +**
1.496 +** Write the content of a doclist to standard output.
1.497 +*/
1.498 +static void printDoclist(DocList *p){
1.499 + DocListReader r;
1.500 + const char *zSep = "";
1.501 +
1.502 + readerInit(&r, p);
1.503 + while( !atEnd(&r) ){
1.504 + sqlite_int64 docid = readDocid(&r);
1.505 + if( docid==0 ){
1.506 + skipPositionList(&r);
1.507 + continue;
1.508 + }
1.509 + printf("%s%lld", zSep, docid);
1.510 + zSep = ",";
1.511 + if( p->iType>=DL_POSITIONS ){
1.512 + int iPos, iCol;
1.513 + const char *zDiv = "";
1.514 + printf("(");
1.515 + while( (iPos = readPosition(&r, &iCol))>=0 ){
1.516 + printf("%s%d:%d", zDiv, iCol, iPos);
1.517 + zDiv = ":";
1.518 + }
1.519 + printf(")");
1.520 + }
1.521 + }
1.522 + printf("\n");
1.523 + fflush(stdout);
1.524 +}
1.525 +#endif /* SQLITE_DEBUG */
1.526 +
1.527 +/* Trim the given doclist to contain only positions in column
1.528 + * [iRestrictColumn]. */
1.529 +static void docListRestrictColumn(DocList *in, int iRestrictColumn){
1.530 + DocListReader r;
1.531 + DocList out;
1.532 +
1.533 + assert( in->iType>=DL_POSITIONS );
1.534 + readerInit(&r, in);
1.535 + docListInit(&out, DL_POSITIONS, NULL, 0);
1.536 +
1.537 + while( !atEnd(&r) ){
1.538 + sqlite_int64 iDocid = readDocid(&r);
1.539 + int iPos, iColumn;
1.540 +
1.541 + docListAddDocid(&out, iDocid);
1.542 + while( (iPos = readPosition(&r, &iColumn)) != -1 ){
1.543 + if( iColumn==iRestrictColumn ){
1.544 + docListAddPos(&out, iColumn, iPos);
1.545 + }
1.546 + }
1.547 + }
1.548 +
1.549 + docListDestroy(in);
1.550 + *in = out;
1.551 +}
1.552 +
1.553 +/* Trim the given doclist by discarding any docids without any remaining
1.554 + * positions. */
1.555 +static void docListDiscardEmpty(DocList *in) {
1.556 + DocListReader r;
1.557 + DocList out;
1.558 +
1.559 + /* TODO: It would be nice to implement this operation in place; that
1.560 + * could save a significant amount of memory in queries with long doclists. */
1.561 + assert( in->iType>=DL_POSITIONS );
1.562 + readerInit(&r, in);
1.563 + docListInit(&out, DL_POSITIONS, NULL, 0);
1.564 +
1.565 + while( !atEnd(&r) ){
1.566 + sqlite_int64 iDocid = readDocid(&r);
1.567 + int match = 0;
1.568 + int iPos, iColumn;
1.569 + while( (iPos = readPosition(&r, &iColumn)) != -1 ){
1.570 + if( !match ){
1.571 + docListAddDocid(&out, iDocid);
1.572 + match = 1;
1.573 + }
1.574 + docListAddPos(&out, iColumn, iPos);
1.575 + }
1.576 + }
1.577 +
1.578 + docListDestroy(in);
1.579 + *in = out;
1.580 +}
1.581 +
1.582 +/* Helper function for docListUpdate() and docListAccumulate().
1.583 +** Splices a doclist element into the doclist represented by r,
1.584 +** leaving r pointing after the newly spliced element.
1.585 +*/
1.586 +static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
1.587 + const char *pSource, int nSource){
1.588 + DocList *d = r->pDoclist;
1.589 + char *pTarget;
1.590 + int nTarget, found;
1.591 +
1.592 + found = skipToDocid(r, iDocid);
1.593 +
1.594 + /* Describe slice in d to place pSource/nSource. */
1.595 + pTarget = r->p;
1.596 + if( found ){
1.597 + skipDocument(r);
1.598 + nTarget = r->p-pTarget;
1.599 + }else{
1.600 + nTarget = 0;
1.601 + }
1.602 +
1.603 + /* The sense of the following is that there are three possibilities.
1.604 + ** If nTarget==nSource, we should not move any memory nor realloc.
1.605 + ** If nTarget>nSource, trim target and realloc.
1.606 + ** If nTarget<nSource, realloc then expand target.
1.607 + */
1.608 + if( nTarget>nSource ){
1.609 + memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
1.610 + }
1.611 + if( nTarget!=nSource ){
1.612 + int iDoclist = pTarget-d->pData;
1.613 + d->pData = realloc(d->pData, d->nData+nSource-nTarget);
1.614 + pTarget = d->pData+iDoclist;
1.615 + }
1.616 + if( nTarget<nSource ){
1.617 + memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
1.618 + }
1.619 +
1.620 + memcpy(pTarget, pSource, nSource);
1.621 + d->nData += nSource-nTarget;
1.622 + r->p = pTarget+nSource;
1.623 +}
1.624 +
1.625 +/* Insert/update pUpdate into the doclist. */
1.626 +static void docListUpdate(DocList *d, DocList *pUpdate){
1.627 + DocListReader reader;
1.628 +
1.629 + assert( d!=NULL && pUpdate!=NULL );
1.630 + assert( d->iType==pUpdate->iType);
1.631 +
1.632 + readerInit(&reader, d);
1.633 + docListSpliceElement(&reader, firstDocid(pUpdate),
1.634 + pUpdate->pData, pUpdate->nData);
1.635 +}
1.636 +
1.637 +/* Propagate elements from pUpdate to pAcc, overwriting elements with
1.638 +** matching docids.
1.639 +*/
1.640 +static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
1.641 + DocListReader accReader, updateReader;
1.642 +
1.643 + /* Handle edge cases where one doclist is empty. */
1.644 + assert( pAcc!=NULL );
1.645 + if( pUpdate==NULL || pUpdate->nData==0 ) return;
1.646 + if( pAcc->nData==0 ){
1.647 + pAcc->pData = malloc(pUpdate->nData);
1.648 + memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
1.649 + pAcc->nData = pUpdate->nData;
1.650 + return;
1.651 + }
1.652 +
1.653 + readerInit(&accReader, pAcc);
1.654 + readerInit(&updateReader, pUpdate);
1.655 +
1.656 + while( !atEnd(&updateReader) ){
1.657 + char *pSource = updateReader.p;
1.658 + sqlite_int64 iDocid = readDocid(&updateReader);
1.659 + skipPositionList(&updateReader);
1.660 + docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
1.661 + }
1.662 +}
1.663 +
1.664 +/*
1.665 +** Read the next docid off of pIn. Return 0 if we reach the end.
1.666 +*
1.667 +* TODO: This assumes that docids are never 0, but they may actually be 0 since
1.668 +* users can choose docids when inserting into a full-text table. Fix this.
1.669 +*/
1.670 +static sqlite_int64 nextDocid(DocListReader *pIn){
1.671 + skipPositionList(pIn);
1.672 + return atEnd(pIn) ? 0 : readDocid(pIn);
1.673 +}
1.674 +
1.675 +/*
1.676 +** pLeft and pRight are two DocListReaders that are pointing to
1.677 +** positions lists of the same document: iDocid.
1.678 +**
1.679 +** If there are no instances in pLeft or pRight where the position
1.680 +** of pLeft is one less than the position of pRight, then this
1.681 +** routine adds nothing to pOut.
1.682 +**
1.683 +** If there are one or more instances where positions from pLeft
1.684 +** are exactly one less than positions from pRight, then add a new
1.685 +** document record to pOut. If pOut wants to hold positions, then
1.686 +** include the positions from pRight that are one more than a
1.687 +** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1.
1.688 +**
1.689 +** pLeft and pRight are left pointing at the next document record.
1.690 +*/
1.691 +static void mergePosList(
1.692 + DocListReader *pLeft, /* Left position list */
1.693 + DocListReader *pRight, /* Right position list */
1.694 + sqlite_int64 iDocid, /* The docid from pLeft and pRight */
1.695 + DocList *pOut /* Write the merged document record here */
1.696 +){
1.697 + int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
1.698 + int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
1.699 + int match = 0;
1.700 +
1.701 + /* Loop until we've reached the end of both position lists. */
1.702 + while( iLeftPos!=-1 && iRightPos!=-1 ){
1.703 + if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
1.704 + if( !match ){
1.705 + docListAddDocid(pOut, iDocid);
1.706 + match = 1;
1.707 + }
1.708 + if( pOut->iType>=DL_POSITIONS ){
1.709 + docListAddPos(pOut, iRightCol, iRightPos);
1.710 + }
1.711 + iLeftPos = readPosition(pLeft, &iLeftCol);
1.712 + iRightPos = readPosition(pRight, &iRightCol);
1.713 + }else if( iRightCol<iLeftCol ||
1.714 + (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
1.715 + iRightPos = readPosition(pRight, &iRightCol);
1.716 + }else{
1.717 + iLeftPos = readPosition(pLeft, &iLeftCol);
1.718 + }
1.719 + }
1.720 + if( iLeftPos>=0 ) skipPositionList(pLeft);
1.721 + if( iRightPos>=0 ) skipPositionList(pRight);
1.722 +}
1.723 +
1.724 +/* We have two doclists: pLeft and pRight.
1.725 +** Write the phrase intersection of these two doclists into pOut.
1.726 +**
1.727 +** A phrase intersection means that two documents only match
1.728 +** if pLeft.iPos+1==pRight.iPos.
1.729 +**
1.730 +** The output pOut may or may not contain positions. If pOut
1.731 +** does contain positions, they are the positions of pRight.
1.732 +*/
1.733 +static void docListPhraseMerge(
1.734 + DocList *pLeft, /* Doclist resulting from the words on the left */
1.735 + DocList *pRight, /* Doclist for the next word to the right */
1.736 + DocList *pOut /* Write the combined doclist here */
1.737 +){
1.738 + DocListReader left, right;
1.739 + sqlite_int64 docidLeft, docidRight;
1.740 +
1.741 + readerInit(&left, pLeft);
1.742 + readerInit(&right, pRight);
1.743 + docidLeft = nextDocid(&left);
1.744 + docidRight = nextDocid(&right);
1.745 +
1.746 + while( docidLeft>0 && docidRight>0 ){
1.747 + if( docidLeft<docidRight ){
1.748 + docidLeft = nextDocid(&left);
1.749 + }else if( docidRight<docidLeft ){
1.750 + docidRight = nextDocid(&right);
1.751 + }else{
1.752 + mergePosList(&left, &right, docidLeft, pOut);
1.753 + docidLeft = nextDocid(&left);
1.754 + docidRight = nextDocid(&right);
1.755 + }
1.756 + }
1.757 +}
1.758 +
1.759 +/* We have two doclists: pLeft and pRight.
1.760 +** Write the intersection of these two doclists into pOut.
1.761 +** Only docids are matched. Position information is ignored.
1.762 +**
1.763 +** The output pOut never holds positions.
1.764 +*/
1.765 +static void docListAndMerge(
1.766 + DocList *pLeft, /* Doclist resulting from the words on the left */
1.767 + DocList *pRight, /* Doclist for the next word to the right */
1.768 + DocList *pOut /* Write the combined doclist here */
1.769 +){
1.770 + DocListReader left, right;
1.771 + sqlite_int64 docidLeft, docidRight;
1.772 +
1.773 + assert( pOut->iType<DL_POSITIONS );
1.774 +
1.775 + readerInit(&left, pLeft);
1.776 + readerInit(&right, pRight);
1.777 + docidLeft = nextDocid(&left);
1.778 + docidRight = nextDocid(&right);
1.779 +
1.780 + while( docidLeft>0 && docidRight>0 ){
1.781 + if( docidLeft<docidRight ){
1.782 + docidLeft = nextDocid(&left);
1.783 + }else if( docidRight<docidLeft ){
1.784 + docidRight = nextDocid(&right);
1.785 + }else{
1.786 + docListAddDocid(pOut, docidLeft);
1.787 + docidLeft = nextDocid(&left);
1.788 + docidRight = nextDocid(&right);
1.789 + }
1.790 + }
1.791 +}
1.792 +
1.793 +/* We have two doclists: pLeft and pRight.
1.794 +** Write the union of these two doclists into pOut.
1.795 +** Only docids are matched. Position information is ignored.
1.796 +**
1.797 +** The output pOut never holds positions.
1.798 +*/
1.799 +static void docListOrMerge(
1.800 + DocList *pLeft, /* Doclist resulting from the words on the left */
1.801 + DocList *pRight, /* Doclist for the next word to the right */
1.802 + DocList *pOut /* Write the combined doclist here */
1.803 +){
1.804 + DocListReader left, right;
1.805 + sqlite_int64 docidLeft, docidRight, priorLeft;
1.806 +
1.807 + readerInit(&left, pLeft);
1.808 + readerInit(&right, pRight);
1.809 + docidLeft = nextDocid(&left);
1.810 + docidRight = nextDocid(&right);
1.811 +
1.812 + while( docidLeft>0 && docidRight>0 ){
1.813 + if( docidLeft<=docidRight ){
1.814 + docListAddDocid(pOut, docidLeft);
1.815 + }else{
1.816 + docListAddDocid(pOut, docidRight);
1.817 + }
1.818 + priorLeft = docidLeft;
1.819 + if( docidLeft<=docidRight ){
1.820 + docidLeft = nextDocid(&left);
1.821 + }
1.822 + if( docidRight>0 && docidRight<=priorLeft ){
1.823 + docidRight = nextDocid(&right);
1.824 + }
1.825 + }
1.826 + while( docidLeft>0 ){
1.827 + docListAddDocid(pOut, docidLeft);
1.828 + docidLeft = nextDocid(&left);
1.829 + }
1.830 + while( docidRight>0 ){
1.831 + docListAddDocid(pOut, docidRight);
1.832 + docidRight = nextDocid(&right);
1.833 + }
1.834 +}
1.835 +
1.836 +/* We have two doclists: pLeft and pRight.
1.837 +** Write into pOut all documents that occur in pLeft but not
1.838 +** in pRight.
1.839 +**
1.840 +** Only docids are matched. Position information is ignored.
1.841 +**
1.842 +** The output pOut never holds positions.
1.843 +*/
1.844 +static void docListExceptMerge(
1.845 + DocList *pLeft, /* Doclist resulting from the words on the left */
1.846 + DocList *pRight, /* Doclist for the next word to the right */
1.847 + DocList *pOut /* Write the combined doclist here */
1.848 +){
1.849 + DocListReader left, right;
1.850 + sqlite_int64 docidLeft, docidRight, priorLeft;
1.851 +
1.852 + readerInit(&left, pLeft);
1.853 + readerInit(&right, pRight);
1.854 + docidLeft = nextDocid(&left);
1.855 + docidRight = nextDocid(&right);
1.856 +
1.857 + while( docidLeft>0 && docidRight>0 ){
1.858 + priorLeft = docidLeft;
1.859 + if( docidLeft<docidRight ){
1.860 + docListAddDocid(pOut, docidLeft);
1.861 + }
1.862 + if( docidLeft<=docidRight ){
1.863 + docidLeft = nextDocid(&left);
1.864 + }
1.865 + if( docidRight>0 && docidRight<=priorLeft ){
1.866 + docidRight = nextDocid(&right);
1.867 + }
1.868 + }
1.869 + while( docidLeft>0 ){
1.870 + docListAddDocid(pOut, docidLeft);
1.871 + docidLeft = nextDocid(&left);
1.872 + }
1.873 +}
1.874 +
1.875 +static char *string_dup_n(const char *s, int n){
1.876 + char *str = malloc(n + 1);
1.877 + memcpy(str, s, n);
1.878 + str[n] = '\0';
1.879 + return str;
1.880 +}
1.881 +
1.882 +/* Duplicate a string; the caller must free() the returned string.
1.883 + * (We don't use strdup() since it is not part of the standard C library and
1.884 + * may not be available everywhere.) */
1.885 +static char *string_dup(const char *s){
1.886 + return string_dup_n(s, strlen(s));
1.887 +}
1.888 +
1.889 +/* Format a string, replacing each occurrence of the % character with
1.890 + * zDb.zName. This may be more convenient than sqlite_mprintf()
1.891 + * when one string is used repeatedly in a format string.
1.892 + * The caller must free() the returned string. */
1.893 +static char *string_format(const char *zFormat,
1.894 + const char *zDb, const char *zName){
1.895 + const char *p;
1.896 + size_t len = 0;
1.897 + size_t nDb = strlen(zDb);
1.898 + size_t nName = strlen(zName);
1.899 + size_t nFullTableName = nDb+1+nName;
1.900 + char *result;
1.901 + char *r;
1.902 +
1.903 + /* first compute length needed */
1.904 + for(p = zFormat ; *p ; ++p){
1.905 + len += (*p=='%' ? nFullTableName : 1);
1.906 + }
1.907 + len += 1; /* for null terminator */
1.908 +
1.909 + r = result = malloc(len);
1.910 + for(p = zFormat; *p; ++p){
1.911 + if( *p=='%' ){
1.912 + memcpy(r, zDb, nDb);
1.913 + r += nDb;
1.914 + *r++ = '.';
1.915 + memcpy(r, zName, nName);
1.916 + r += nName;
1.917 + } else {
1.918 + *r++ = *p;
1.919 + }
1.920 + }
1.921 + *r++ = '\0';
1.922 + assert( r == result + len );
1.923 + return result;
1.924 +}
1.925 +
1.926 +static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
1.927 + const char *zFormat){
1.928 + char *zCommand = string_format(zFormat, zDb, zName);
1.929 + int rc;
1.930 + TRACE(("FTS1 sql: %s\n", zCommand));
1.931 + rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
1.932 + free(zCommand);
1.933 + return rc;
1.934 +}
1.935 +
1.936 +static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
1.937 + sqlite3_stmt **ppStmt, const char *zFormat){
1.938 + char *zCommand = string_format(zFormat, zDb, zName);
1.939 + int rc;
1.940 + TRACE(("FTS1 prepare: %s\n", zCommand));
1.941 + rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
1.942 + free(zCommand);
1.943 + return rc;
1.944 +}
1.945 +
1.946 +/* end utility functions */
1.947 +
1.948 +/* Forward reference */
1.949 +typedef struct fulltext_vtab fulltext_vtab;
1.950 +
1.951 +/* A single term in a query is represented by an instances of
1.952 +** the following structure.
1.953 +*/
1.954 +typedef struct QueryTerm {
1.955 + short int nPhrase; /* How many following terms are part of the same phrase */
1.956 + short int iPhrase; /* This is the i-th term of a phrase. */
1.957 + short int iColumn; /* Column of the index that must match this term */
1.958 + signed char isOr; /* this term is preceded by "OR" */
1.959 + signed char isNot; /* this term is preceded by "-" */
1.960 + char *pTerm; /* text of the term. '\000' terminated. malloced */
1.961 + int nTerm; /* Number of bytes in pTerm[] */
1.962 +} QueryTerm;
1.963 +
1.964 +
1.965 +/* A query string is parsed into a Query structure.
1.966 + *
1.967 + * We could, in theory, allow query strings to be complicated
1.968 + * nested expressions with precedence determined by parentheses.
1.969 + * But none of the major search engines do this. (Perhaps the
1.970 + * feeling is that an parenthesized expression is two complex of
1.971 + * an idea for the average user to grasp.) Taking our lead from
1.972 + * the major search engines, we will allow queries to be a list
1.973 + * of terms (with an implied AND operator) or phrases in double-quotes,
1.974 + * with a single optional "-" before each non-phrase term to designate
1.975 + * negation and an optional OR connector.
1.976 + *
1.977 + * OR binds more tightly than the implied AND, which is what the
1.978 + * major search engines seem to do. So, for example:
1.979 + *
1.980 + * [one two OR three] ==> one AND (two OR three)
1.981 + * [one OR two three] ==> (one OR two) AND three
1.982 + *
1.983 + * A "-" before a term matches all entries that lack that term.
1.984 + * The "-" must occur immediately before the term with in intervening
1.985 + * space. This is how the search engines do it.
1.986 + *
1.987 + * A NOT term cannot be the right-hand operand of an OR. If this
1.988 + * occurs in the query string, the NOT is ignored:
1.989 + *
1.990 + * [one OR -two] ==> one OR two
1.991 + *
1.992 + */
1.993 +typedef struct Query {
1.994 + fulltext_vtab *pFts; /* The full text index */
1.995 + int nTerms; /* Number of terms in the query */
1.996 + QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */
1.997 + int nextIsOr; /* Set the isOr flag on the next inserted term */
1.998 + int nextColumn; /* Next word parsed must be in this column */
1.999 + int dfltColumn; /* The default column */
1.1000 +} Query;
1.1001 +
1.1002 +
1.1003 +/*
1.1004 +** An instance of the following structure keeps track of generated
1.1005 +** matching-word offset information and snippets.
1.1006 +*/
1.1007 +typedef struct Snippet {
1.1008 + int nMatch; /* Total number of matches */
1.1009 + int nAlloc; /* Space allocated for aMatch[] */
1.1010 + struct snippetMatch { /* One entry for each matching term */
1.1011 + char snStatus; /* Status flag for use while constructing snippets */
1.1012 + short int iCol; /* The column that contains the match */
1.1013 + short int iTerm; /* The index in Query.pTerms[] of the matching term */
1.1014 + short int nByte; /* Number of bytes in the term */
1.1015 + int iStart; /* The offset to the first character of the term */
1.1016 + } *aMatch; /* Points to space obtained from malloc */
1.1017 + char *zOffset; /* Text rendering of aMatch[] */
1.1018 + int nOffset; /* strlen(zOffset) */
1.1019 + char *zSnippet; /* Snippet text */
1.1020 + int nSnippet; /* strlen(zSnippet) */
1.1021 +} Snippet;
1.1022 +
1.1023 +
1.1024 +typedef enum QueryType {
1.1025 + QUERY_GENERIC, /* table scan */
1.1026 + QUERY_ROWID, /* lookup by rowid */
1.1027 + QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
1.1028 +} QueryType;
1.1029 +
1.1030 +/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
1.1031 +** before we start aggregating into larger segments. Lower CHUNK_MAX
1.1032 +** means that for a given input we have more individual segments per
1.1033 +** term, which means more rows in the table and a bigger index (due to
1.1034 +** both more rows and bigger rowids). But it also reduces the average
1.1035 +** cost of adding new elements to the segment 0 doclist, and it seems
1.1036 +** to reduce the number of pages read and written during inserts. 256
1.1037 +** was chosen by measuring insertion times for a certain input (first
1.1038 +** 10k documents of Enron corpus), though including query performance
1.1039 +** in the decision may argue for a larger value.
1.1040 +*/
1.1041 +#define CHUNK_MAX 256
1.1042 +
1.1043 +typedef enum fulltext_statement {
1.1044 + CONTENT_INSERT_STMT,
1.1045 + CONTENT_SELECT_STMT,
1.1046 + CONTENT_UPDATE_STMT,
1.1047 + CONTENT_DELETE_STMT,
1.1048 +
1.1049 + TERM_SELECT_STMT,
1.1050 + TERM_SELECT_ALL_STMT,
1.1051 + TERM_INSERT_STMT,
1.1052 + TERM_UPDATE_STMT,
1.1053 + TERM_DELETE_STMT,
1.1054 +
1.1055 + MAX_STMT /* Always at end! */
1.1056 +} fulltext_statement;
1.1057 +
1.1058 +/* These must exactly match the enum above. */
1.1059 +/* TODO(adam): Is there some risk that a statement (in particular,
1.1060 +** pTermSelectStmt) will be used in two cursors at once, e.g. if a
1.1061 +** query joins a virtual table to itself? If so perhaps we should
1.1062 +** move some of these to the cursor object.
1.1063 +*/
1.1064 +static const char *const fulltext_zStatement[MAX_STMT] = {
1.1065 + /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */
1.1066 + /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
1.1067 + /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */
1.1068 + /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
1.1069 +
1.1070 + /* TERM_SELECT */
1.1071 + "select rowid, doclist from %_term where term = ? and segment = ?",
1.1072 + /* TERM_SELECT_ALL */
1.1073 + "select doclist from %_term where term = ? order by segment",
1.1074 + /* TERM_INSERT */
1.1075 + "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
1.1076 + /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
1.1077 + /* TERM_DELETE */ "delete from %_term where rowid = ?",
1.1078 +};
1.1079 +
1.1080 +/*
1.1081 +** A connection to a fulltext index is an instance of the following
1.1082 +** structure. The xCreate and xConnect methods create an instance
1.1083 +** of this structure and xDestroy and xDisconnect free that instance.
1.1084 +** All other methods receive a pointer to the structure as one of their
1.1085 +** arguments.
1.1086 +*/
1.1087 +struct fulltext_vtab {
1.1088 + sqlite3_vtab base; /* Base class used by SQLite core */
1.1089 + sqlite3 *db; /* The database connection */
1.1090 + const char *zDb; /* logical database name */
1.1091 + const char *zName; /* virtual table name */
1.1092 + int nColumn; /* number of columns in virtual table */
1.1093 + char **azColumn; /* column names. malloced */
1.1094 + char **azContentColumn; /* column names in content table; malloced */
1.1095 + sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
1.1096 +
1.1097 + /* Precompiled statements which we keep as long as the table is
1.1098 + ** open.
1.1099 + */
1.1100 + sqlite3_stmt *pFulltextStatements[MAX_STMT];
1.1101 +};
1.1102 +
1.1103 +/*
1.1104 +** When the core wants to do a query, it create a cursor using a
1.1105 +** call to xOpen. This structure is an instance of a cursor. It
1.1106 +** is destroyed by xClose.
1.1107 +*/
1.1108 +typedef struct fulltext_cursor {
1.1109 + sqlite3_vtab_cursor base; /* Base class used by SQLite core */
1.1110 + QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */
1.1111 + sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
1.1112 + int eof; /* True if at End Of Results */
1.1113 + Query q; /* Parsed query string */
1.1114 + Snippet snippet; /* Cached snippet for the current row */
1.1115 + int iColumn; /* Column being searched */
1.1116 + DocListReader result; /* used when iCursorType == QUERY_FULLTEXT */
1.1117 +} fulltext_cursor;
1.1118 +
1.1119 +static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
1.1120 + return (fulltext_vtab *) c->base.pVtab;
1.1121 +}
1.1122 +
1.1123 +static const sqlite3_module fulltextModule; /* forward declaration */
1.1124 +
1.1125 +/* Append a list of strings separated by commas to a StringBuffer. */
1.1126 +static void appendList(StringBuffer *sb, int nString, char **azString){
1.1127 + int i;
1.1128 + for(i=0; i<nString; ++i){
1.1129 + if( i>0 ) append(sb, ", ");
1.1130 + append(sb, azString[i]);
1.1131 + }
1.1132 +}
1.1133 +
1.1134 +/* Return a dynamically generated statement of the form
1.1135 + * insert into %_content (rowid, ...) values (?, ...)
1.1136 + */
1.1137 +static const char *contentInsertStatement(fulltext_vtab *v){
1.1138 + StringBuffer sb;
1.1139 + int i;
1.1140 +
1.1141 + initStringBuffer(&sb);
1.1142 + append(&sb, "insert into %_content (rowid, ");
1.1143 + appendList(&sb, v->nColumn, v->azContentColumn);
1.1144 + append(&sb, ") values (?");
1.1145 + for(i=0; i<v->nColumn; ++i)
1.1146 + append(&sb, ", ?");
1.1147 + append(&sb, ")");
1.1148 + return sb.s;
1.1149 +}
1.1150 +
1.1151 +/* Return a dynamically generated statement of the form
1.1152 + * update %_content set [col_0] = ?, [col_1] = ?, ...
1.1153 + * where rowid = ?
1.1154 + */
1.1155 +static const char *contentUpdateStatement(fulltext_vtab *v){
1.1156 + StringBuffer sb;
1.1157 + int i;
1.1158 +
1.1159 + initStringBuffer(&sb);
1.1160 + append(&sb, "update %_content set ");
1.1161 + for(i=0; i<v->nColumn; ++i) {
1.1162 + if( i>0 ){
1.1163 + append(&sb, ", ");
1.1164 + }
1.1165 + append(&sb, v->azContentColumn[i]);
1.1166 + append(&sb, " = ?");
1.1167 + }
1.1168 + append(&sb, " where rowid = ?");
1.1169 + return sb.s;
1.1170 +}
1.1171 +
1.1172 +/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
1.1173 +** If the indicated statement has never been prepared, it is prepared
1.1174 +** and cached, otherwise the cached version is reset.
1.1175 +*/
1.1176 +static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
1.1177 + sqlite3_stmt **ppStmt){
1.1178 + assert( iStmt<MAX_STMT );
1.1179 + if( v->pFulltextStatements[iStmt]==NULL ){
1.1180 + const char *zStmt;
1.1181 + int rc;
1.1182 + switch( iStmt ){
1.1183 + case CONTENT_INSERT_STMT:
1.1184 + zStmt = contentInsertStatement(v); break;
1.1185 + case CONTENT_UPDATE_STMT:
1.1186 + zStmt = contentUpdateStatement(v); break;
1.1187 + default:
1.1188 + zStmt = fulltext_zStatement[iStmt];
1.1189 + }
1.1190 + rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
1.1191 + zStmt);
1.1192 + if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
1.1193 + if( rc!=SQLITE_OK ) return rc;
1.1194 + } else {
1.1195 + int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
1.1196 + if( rc!=SQLITE_OK ) return rc;
1.1197 + }
1.1198 +
1.1199 + *ppStmt = v->pFulltextStatements[iStmt];
1.1200 + return SQLITE_OK;
1.1201 +}
1.1202 +
1.1203 +/* Step the indicated statement, handling errors SQLITE_BUSY (by
1.1204 +** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
1.1205 +** bindings to the new statement).
1.1206 +** TODO(adam): We should extend this function so that it can work with
1.1207 +** statements declared locally, not only globally cached statements.
1.1208 +*/
1.1209 +static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
1.1210 + sqlite3_stmt **ppStmt){
1.1211 + int rc;
1.1212 + sqlite3_stmt *s = *ppStmt;
1.1213 + assert( iStmt<MAX_STMT );
1.1214 + assert( s==v->pFulltextStatements[iStmt] );
1.1215 +
1.1216 + while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
1.1217 + if( rc==SQLITE_BUSY ) continue;
1.1218 + if( rc!=SQLITE_ERROR ) return rc;
1.1219 +
1.1220 + /* If an SQLITE_SCHEMA error has occured, then finalizing this
1.1221 + * statement is going to delete the fulltext_vtab structure. If
1.1222 + * the statement just executed is in the pFulltextStatements[]
1.1223 + * array, it will be finalized twice. So remove it before
1.1224 + * calling sqlite3_finalize().
1.1225 + */
1.1226 + v->pFulltextStatements[iStmt] = NULL;
1.1227 + rc = sqlite3_finalize(s);
1.1228 + break;
1.1229 + }
1.1230 + return rc;
1.1231 +
1.1232 + err:
1.1233 + sqlite3_finalize(s);
1.1234 + return rc;
1.1235 +}
1.1236 +
1.1237 +/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
1.1238 +** Useful for statements like UPDATE, where we expect no results.
1.1239 +*/
1.1240 +static int sql_single_step_statement(fulltext_vtab *v,
1.1241 + fulltext_statement iStmt,
1.1242 + sqlite3_stmt **ppStmt){
1.1243 + int rc = sql_step_statement(v, iStmt, ppStmt);
1.1244 + return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
1.1245 +}
1.1246 +
1.1247 +/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
1.1248 +static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
1.1249 + sqlite3_value **pValues){
1.1250 + sqlite3_stmt *s;
1.1251 + int i;
1.1252 + int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
1.1253 + if( rc!=SQLITE_OK ) return rc;
1.1254 +
1.1255 + rc = sqlite3_bind_value(s, 1, rowid);
1.1256 + if( rc!=SQLITE_OK ) return rc;
1.1257 +
1.1258 + for(i=0; i<v->nColumn; ++i){
1.1259 + rc = sqlite3_bind_value(s, 2+i, pValues[i]);
1.1260 + if( rc!=SQLITE_OK ) return rc;
1.1261 + }
1.1262 +
1.1263 + return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
1.1264 +}
1.1265 +
1.1266 +/* update %_content set col0 = pValues[0], col1 = pValues[1], ...
1.1267 + * where rowid = [iRowid] */
1.1268 +static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
1.1269 + sqlite_int64 iRowid){
1.1270 + sqlite3_stmt *s;
1.1271 + int i;
1.1272 + int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
1.1273 + if( rc!=SQLITE_OK ) return rc;
1.1274 +
1.1275 + for(i=0; i<v->nColumn; ++i){
1.1276 + rc = sqlite3_bind_value(s, 1+i, pValues[i]);
1.1277 + if( rc!=SQLITE_OK ) return rc;
1.1278 + }
1.1279 +
1.1280 + rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
1.1281 + if( rc!=SQLITE_OK ) return rc;
1.1282 +
1.1283 + return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
1.1284 +}
1.1285 +
1.1286 +static void freeStringArray(int nString, const char **pString){
1.1287 + int i;
1.1288 +
1.1289 + for (i=0 ; i < nString ; ++i) {
1.1290 + if( pString[i]!=NULL ) free((void *) pString[i]);
1.1291 + }
1.1292 + free((void *) pString);
1.1293 +}
1.1294 +
1.1295 +/* select * from %_content where rowid = [iRow]
1.1296 + * The caller must delete the returned array and all strings in it.
1.1297 + * null fields will be NULL in the returned array.
1.1298 + *
1.1299 + * TODO: Perhaps we should return pointer/length strings here for consistency
1.1300 + * with other code which uses pointer/length. */
1.1301 +static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
1.1302 + const char ***pValues){
1.1303 + sqlite3_stmt *s;
1.1304 + const char **values;
1.1305 + int i;
1.1306 + int rc;
1.1307 +
1.1308 + *pValues = NULL;
1.1309 +
1.1310 + rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
1.1311 + if( rc!=SQLITE_OK ) return rc;
1.1312 +
1.1313 + rc = sqlite3_bind_int64(s, 1, iRow);
1.1314 + if( rc!=SQLITE_OK ) return rc;
1.1315 +
1.1316 + rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
1.1317 + if( rc!=SQLITE_ROW ) return rc;
1.1318 +
1.1319 + values = (const char **) malloc(v->nColumn * sizeof(const char *));
1.1320 + for(i=0; i<v->nColumn; ++i){
1.1321 + if( sqlite3_column_type(s, i)==SQLITE_NULL ){
1.1322 + values[i] = NULL;
1.1323 + }else{
1.1324 + values[i] = string_dup((char*)sqlite3_column_text(s, i));
1.1325 + }
1.1326 + }
1.1327 +
1.1328 + /* We expect only one row. We must execute another sqlite3_step()
1.1329 + * to complete the iteration; otherwise the table will remain locked. */
1.1330 + rc = sqlite3_step(s);
1.1331 + if( rc==SQLITE_DONE ){
1.1332 + *pValues = values;
1.1333 + return SQLITE_OK;
1.1334 + }
1.1335 +
1.1336 + freeStringArray(v->nColumn, values);
1.1337 + return rc;
1.1338 +}
1.1339 +
1.1340 +/* delete from %_content where rowid = [iRow ] */
1.1341 +static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
1.1342 + sqlite3_stmt *s;
1.1343 + int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
1.1344 + if( rc!=SQLITE_OK ) return rc;
1.1345 +
1.1346 + rc = sqlite3_bind_int64(s, 1, iRow);
1.1347 + if( rc!=SQLITE_OK ) return rc;
1.1348 +
1.1349 + return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
1.1350 +}
1.1351 +
1.1352 +/* select rowid, doclist from %_term
1.1353 + * where term = [pTerm] and segment = [iSegment]
1.1354 + * If found, returns SQLITE_ROW; the caller must free the
1.1355 + * returned doclist. If no rows found, returns SQLITE_DONE. */
1.1356 +static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
1.1357 + int iSegment,
1.1358 + sqlite_int64 *rowid, DocList *out){
1.1359 + sqlite3_stmt *s;
1.1360 + int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
1.1361 + if( rc!=SQLITE_OK ) return rc;
1.1362 +
1.1363 + rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
1.1364 + if( rc!=SQLITE_OK ) return rc;
1.1365 +
1.1366 + rc = sqlite3_bind_int(s, 2, iSegment);
1.1367 + if( rc!=SQLITE_OK ) return rc;
1.1368 +
1.1369 + rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
1.1370 + if( rc!=SQLITE_ROW ) return rc;
1.1371 +
1.1372 + *rowid = sqlite3_column_int64(s, 0);
1.1373 + docListInit(out, DL_DEFAULT,
1.1374 + sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
1.1375 +
1.1376 + /* We expect only one row. We must execute another sqlite3_step()
1.1377 + * to complete the iteration; otherwise the table will remain locked. */
1.1378 + rc = sqlite3_step(s);
1.1379 + return rc==SQLITE_DONE ? SQLITE_ROW : rc;
1.1380 +}
1.1381 +
1.1382 +/* Load the segment doclists for term pTerm and merge them in
1.1383 +** appropriate order into out. Returns SQLITE_OK if successful. If
1.1384 +** there are no segments for pTerm, successfully returns an empty
1.1385 +** doclist in out.
1.1386 +**
1.1387 +** Each document consists of 1 or more "columns". The number of
1.1388 +** columns is v->nColumn. If iColumn==v->nColumn, then return
1.1389 +** position information about all columns. If iColumn<v->nColumn,
1.1390 +** then only return position information about the iColumn-th column
1.1391 +** (where the first column is 0).
1.1392 +*/
1.1393 +static int term_select_all(
1.1394 + fulltext_vtab *v, /* The fulltext index we are querying against */
1.1395 + int iColumn, /* If <nColumn, only look at the iColumn-th column */
1.1396 + const char *pTerm, /* The term whose posting lists we want */
1.1397 + int nTerm, /* Number of bytes in pTerm */
1.1398 + DocList *out /* Write the resulting doclist here */
1.1399 +){
1.1400 + DocList doclist;
1.1401 + sqlite3_stmt *s;
1.1402 + int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
1.1403 + if( rc!=SQLITE_OK ) return rc;
1.1404 +
1.1405 + rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
1.1406 + if( rc!=SQLITE_OK ) return rc;
1.1407 +
1.1408 + docListInit(&doclist, DL_DEFAULT, 0, 0);
1.1409 +
1.1410 + /* TODO(shess) Handle schema and busy errors. */
1.1411 + while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
1.1412 + DocList old;
1.1413 +
1.1414 + /* TODO(shess) If we processed doclists from oldest to newest, we
1.1415 + ** could skip the malloc() involved with the following call. For
1.1416 + ** now, I'd rather keep this logic similar to index_insert_term().
1.1417 + ** We could additionally drop elements when we see deletes, but
1.1418 + ** that would require a distinct version of docListAccumulate().
1.1419 + */
1.1420 + docListInit(&old, DL_DEFAULT,
1.1421 + sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
1.1422 +
1.1423 + if( iColumn<v->nColumn ){ /* querying a single column */
1.1424 + docListRestrictColumn(&old, iColumn);
1.1425 + }
1.1426 +
1.1427 + /* doclist contains the newer data, so write it over old. Then
1.1428 + ** steal accumulated result for doclist.
1.1429 + */
1.1430 + docListAccumulate(&old, &doclist);
1.1431 + docListDestroy(&doclist);
1.1432 + doclist = old;
1.1433 + }
1.1434 + if( rc!=SQLITE_DONE ){
1.1435 + docListDestroy(&doclist);
1.1436 + return rc;
1.1437 + }
1.1438 +
1.1439 + docListDiscardEmpty(&doclist);
1.1440 + *out = doclist;
1.1441 + return SQLITE_OK;
1.1442 +}
1.1443 +
1.1444 +/* insert into %_term (rowid, term, segment, doclist)
1.1445 + values ([piRowid], [pTerm], [iSegment], [doclist])
1.1446 +** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
1.1447 +**
1.1448 +** NOTE(shess) piRowid is IN, with values of "space of int64" plus
1.1449 +** null, it is not used to pass data back to the caller.
1.1450 +*/
1.1451 +static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
1.1452 + const char *pTerm, int nTerm,
1.1453 + int iSegment, DocList *doclist){
1.1454 + sqlite3_stmt *s;
1.1455 + int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
1.1456 + if( rc!=SQLITE_OK ) return rc;
1.1457 +
1.1458 + if( piRowid==NULL ){
1.1459 + rc = sqlite3_bind_null(s, 1);
1.1460 + }else{
1.1461 + rc = sqlite3_bind_int64(s, 1, *piRowid);
1.1462 + }
1.1463 + if( rc!=SQLITE_OK ) return rc;
1.1464 +
1.1465 + rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
1.1466 + if( rc!=SQLITE_OK ) return rc;
1.1467 +
1.1468 + rc = sqlite3_bind_int(s, 3, iSegment);
1.1469 + if( rc!=SQLITE_OK ) return rc;
1.1470 +
1.1471 + rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
1.1472 + if( rc!=SQLITE_OK ) return rc;
1.1473 +
1.1474 + return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
1.1475 +}
1.1476 +
1.1477 +/* update %_term set doclist = [doclist] where rowid = [rowid] */
1.1478 +static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
1.1479 + DocList *doclist){
1.1480 + sqlite3_stmt *s;
1.1481 + int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
1.1482 + if( rc!=SQLITE_OK ) return rc;
1.1483 +
1.1484 + rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
1.1485 + if( rc!=SQLITE_OK ) return rc;
1.1486 +
1.1487 + rc = sqlite3_bind_int64(s, 2, rowid);
1.1488 + if( rc!=SQLITE_OK ) return rc;
1.1489 +
1.1490 + return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
1.1491 +}
1.1492 +
1.1493 +static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
1.1494 + sqlite3_stmt *s;
1.1495 + int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
1.1496 + if( rc!=SQLITE_OK ) return rc;
1.1497 +
1.1498 + rc = sqlite3_bind_int64(s, 1, rowid);
1.1499 + if( rc!=SQLITE_OK ) return rc;
1.1500 +
1.1501 + return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
1.1502 +}
1.1503 +
1.1504 +/*
1.1505 +** Free the memory used to contain a fulltext_vtab structure.
1.1506 +*/
1.1507 +static void fulltext_vtab_destroy(fulltext_vtab *v){
1.1508 + int iStmt, i;
1.1509 +
1.1510 + TRACE(("FTS1 Destroy %p\n", v));
1.1511 + for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
1.1512 + if( v->pFulltextStatements[iStmt]!=NULL ){
1.1513 + sqlite3_finalize(v->pFulltextStatements[iStmt]);
1.1514 + v->pFulltextStatements[iStmt] = NULL;
1.1515 + }
1.1516 + }
1.1517 +
1.1518 + if( v->pTokenizer!=NULL ){
1.1519 + v->pTokenizer->pModule->xDestroy(v->pTokenizer);
1.1520 + v->pTokenizer = NULL;
1.1521 + }
1.1522 +
1.1523 + free(v->azColumn);
1.1524 + for(i = 0; i < v->nColumn; ++i) {
1.1525 + sqlite3_free(v->azContentColumn[i]);
1.1526 + }
1.1527 + free(v->azContentColumn);
1.1528 + free(v);
1.1529 +}
1.1530 +
1.1531 +/*
1.1532 +** Token types for parsing the arguments to xConnect or xCreate.
1.1533 +*/
1.1534 +#define TOKEN_EOF 0 /* End of file */
1.1535 +#define TOKEN_SPACE 1 /* Any kind of whitespace */
1.1536 +#define TOKEN_ID 2 /* An identifier */
1.1537 +#define TOKEN_STRING 3 /* A string literal */
1.1538 +#define TOKEN_PUNCT 4 /* A single punctuation character */
1.1539 +
1.1540 +/*
1.1541 +** If X is a character that can be used in an identifier then
1.1542 +** IdChar(X) will be true. Otherwise it is false.
1.1543 +**
1.1544 +** For ASCII, any character with the high-order bit set is
1.1545 +** allowed in an identifier. For 7-bit characters,
1.1546 +** sqlite3IsIdChar[X] must be 1.
1.1547 +**
1.1548 +** Ticket #1066. the SQL standard does not allow '$' in the
1.1549 +** middle of identfiers. But many SQL implementations do.
1.1550 +** SQLite will allow '$' in identifiers for compatibility.
1.1551 +** But the feature is undocumented.
1.1552 +*/
1.1553 +static const char isIdChar[] = {
1.1554 +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1.1555 + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
1.1556 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
1.1557 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1.1558 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
1.1559 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1.1560 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
1.1561 +};
1.1562 +#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
1.1563 +
1.1564 +
1.1565 +/*
1.1566 +** Return the length of the token that begins at z[0].
1.1567 +** Store the token type in *tokenType before returning.
1.1568 +*/
1.1569 +static int getToken(const char *z, int *tokenType){
1.1570 + int i, c;
1.1571 + switch( *z ){
1.1572 + case 0: {
1.1573 + *tokenType = TOKEN_EOF;
1.1574 + return 0;
1.1575 + }
1.1576 + case ' ': case '\t': case '\n': case '\f': case '\r': {
1.1577 + for(i=1; safe_isspace(z[i]); i++){}
1.1578 + *tokenType = TOKEN_SPACE;
1.1579 + return i;
1.1580 + }
1.1581 + case '`':
1.1582 + case '\'':
1.1583 + case '"': {
1.1584 + int delim = z[0];
1.1585 + for(i=1; (c=z[i])!=0; i++){
1.1586 + if( c==delim ){
1.1587 + if( z[i+1]==delim ){
1.1588 + i++;
1.1589 + }else{
1.1590 + break;
1.1591 + }
1.1592 + }
1.1593 + }
1.1594 + *tokenType = TOKEN_STRING;
1.1595 + return i + (c!=0);
1.1596 + }
1.1597 + case '[': {
1.1598 + for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
1.1599 + *tokenType = TOKEN_ID;
1.1600 + return i;
1.1601 + }
1.1602 + default: {
1.1603 + if( !IdChar(*z) ){
1.1604 + break;
1.1605 + }
1.1606 + for(i=1; IdChar(z[i]); i++){}
1.1607 + *tokenType = TOKEN_ID;
1.1608 + return i;
1.1609 + }
1.1610 + }
1.1611 + *tokenType = TOKEN_PUNCT;
1.1612 + return 1;
1.1613 +}
1.1614 +
1.1615 +/*
1.1616 +** A token extracted from a string is an instance of the following
1.1617 +** structure.
1.1618 +*/
1.1619 +typedef struct Token {
1.1620 + const char *z; /* Pointer to token text. Not '\000' terminated */
1.1621 + short int n; /* Length of the token text in bytes. */
1.1622 +} Token;
1.1623 +
1.1624 +/*
1.1625 +** Given a input string (which is really one of the argv[] parameters
1.1626 +** passed into xConnect or xCreate) split the string up into tokens.
1.1627 +** Return an array of pointers to '\000' terminated strings, one string
1.1628 +** for each non-whitespace token.
1.1629 +**
1.1630 +** The returned array is terminated by a single NULL pointer.
1.1631 +**
1.1632 +** Space to hold the returned array is obtained from a single
1.1633 +** malloc and should be freed by passing the return value to free().
1.1634 +** The individual strings within the token list are all a part of
1.1635 +** the single memory allocation and will all be freed at once.
1.1636 +*/
1.1637 +static char **tokenizeString(const char *z, int *pnToken){
1.1638 + int nToken = 0;
1.1639 + Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
1.1640 + int n = 1;
1.1641 + int e, i;
1.1642 + int totalSize = 0;
1.1643 + char **azToken;
1.1644 + char *zCopy;
1.1645 + while( n>0 ){
1.1646 + n = getToken(z, &e);
1.1647 + if( e!=TOKEN_SPACE ){
1.1648 + aToken[nToken].z = z;
1.1649 + aToken[nToken].n = n;
1.1650 + nToken++;
1.1651 + totalSize += n+1;
1.1652 + }
1.1653 + z += n;
1.1654 + }
1.1655 + azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
1.1656 + zCopy = (char*)&azToken[nToken];
1.1657 + nToken--;
1.1658 + for(i=0; i<nToken; i++){
1.1659 + azToken[i] = zCopy;
1.1660 + n = aToken[i].n;
1.1661 + memcpy(zCopy, aToken[i].z, n);
1.1662 + zCopy[n] = 0;
1.1663 + zCopy += n+1;
1.1664 + }
1.1665 + azToken[nToken] = 0;
1.1666 + free(aToken);
1.1667 + *pnToken = nToken;
1.1668 + return azToken;
1.1669 +}
1.1670 +
1.1671 +/*
1.1672 +** Convert an SQL-style quoted string into a normal string by removing
1.1673 +** the quote characters. The conversion is done in-place. If the
1.1674 +** input does not begin with a quote character, then this routine
1.1675 +** is a no-op.
1.1676 +**
1.1677 +** Examples:
1.1678 +**
1.1679 +** "abc" becomes abc
1.1680 +** 'xyz' becomes xyz
1.1681 +** [pqr] becomes pqr
1.1682 +** `mno` becomes mno
1.1683 +*/
1.1684 +static void dequoteString(char *z){
1.1685 + int quote;
1.1686 + int i, j;
1.1687 + if( z==0 ) return;
1.1688 + quote = z[0];
1.1689 + switch( quote ){
1.1690 + case '\'': break;
1.1691 + case '"': break;
1.1692 + case '`': break; /* For MySQL compatibility */
1.1693 + case '[': quote = ']'; break; /* For MS SqlServer compatibility */
1.1694 + default: return;
1.1695 + }
1.1696 + for(i=1, j=0; z[i]; i++){
1.1697 + if( z[i]==quote ){
1.1698 + if( z[i+1]==quote ){
1.1699 + z[j++] = quote;
1.1700 + i++;
1.1701 + }else{
1.1702 + z[j++] = 0;
1.1703 + break;
1.1704 + }
1.1705 + }else{
1.1706 + z[j++] = z[i];
1.1707 + }
1.1708 + }
1.1709 +}
1.1710 +
1.1711 +/*
1.1712 +** The input azIn is a NULL-terminated list of tokens. Remove the first
1.1713 +** token and all punctuation tokens. Remove the quotes from
1.1714 +** around string literal tokens.
1.1715 +**
1.1716 +** Example:
1.1717 +**
1.1718 +** input: tokenize chinese ( 'simplifed' , 'mixed' )
1.1719 +** output: chinese simplifed mixed
1.1720 +**
1.1721 +** Another example:
1.1722 +**
1.1723 +** input: delimiters ( '[' , ']' , '...' )
1.1724 +** output: [ ] ...
1.1725 +*/
1.1726 +static void tokenListToIdList(char **azIn){
1.1727 + int i, j;
1.1728 + if( azIn ){
1.1729 + for(i=0, j=-1; azIn[i]; i++){
1.1730 + if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
1.1731 + dequoteString(azIn[i]);
1.1732 + if( j>=0 ){
1.1733 + azIn[j] = azIn[i];
1.1734 + }
1.1735 + j++;
1.1736 + }
1.1737 + }
1.1738 + azIn[j] = 0;
1.1739 + }
1.1740 +}
1.1741 +
1.1742 +
1.1743 +/*
1.1744 +** Find the first alphanumeric token in the string zIn. Null-terminate
1.1745 +** this token. Remove any quotation marks. And return a pointer to
1.1746 +** the result.
1.1747 +*/
1.1748 +static char *firstToken(char *zIn, char **pzTail){
1.1749 + int n, ttype;
1.1750 + while(1){
1.1751 + n = getToken(zIn, &ttype);
1.1752 + if( ttype==TOKEN_SPACE ){
1.1753 + zIn += n;
1.1754 + }else if( ttype==TOKEN_EOF ){
1.1755 + *pzTail = zIn;
1.1756 + return 0;
1.1757 + }else{
1.1758 + zIn[n] = 0;
1.1759 + *pzTail = &zIn[1];
1.1760 + dequoteString(zIn);
1.1761 + return zIn;
1.1762 + }
1.1763 + }
1.1764 + /*NOTREACHED*/
1.1765 +}
1.1766 +
1.1767 +/* Return true if...
1.1768 +**
1.1769 +** * s begins with the string t, ignoring case
1.1770 +** * s is longer than t
1.1771 +** * The first character of s beyond t is not a alphanumeric
1.1772 +**
1.1773 +** Ignore leading space in *s.
1.1774 +**
1.1775 +** To put it another way, return true if the first token of
1.1776 +** s[] is t[].
1.1777 +*/
1.1778 +static int startsWith(const char *s, const char *t){
1.1779 + while( safe_isspace(*s) ){ s++; }
1.1780 + while( *t ){
1.1781 + if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
1.1782 + }
1.1783 + return *s!='_' && !safe_isalnum(*s);
1.1784 +}
1.1785 +
1.1786 +/*
1.1787 +** An instance of this structure defines the "spec" of a
1.1788 +** full text index. This structure is populated by parseSpec
1.1789 +** and use by fulltextConnect and fulltextCreate.
1.1790 +*/
1.1791 +typedef struct TableSpec {
1.1792 + const char *zDb; /* Logical database name */
1.1793 + const char *zName; /* Name of the full-text index */
1.1794 + int nColumn; /* Number of columns to be indexed */
1.1795 + char **azColumn; /* Original names of columns to be indexed */
1.1796 + char **azContentColumn; /* Column names for %_content */
1.1797 + char **azTokenizer; /* Name of tokenizer and its arguments */
1.1798 +} TableSpec;
1.1799 +
1.1800 +/*
1.1801 +** Reclaim all of the memory used by a TableSpec
1.1802 +*/
1.1803 +static void clearTableSpec(TableSpec *p) {
1.1804 + free(p->azColumn);
1.1805 + free(p->azContentColumn);
1.1806 + free(p->azTokenizer);
1.1807 +}
1.1808 +
1.1809 +/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
1.1810 + *
1.1811 + * CREATE VIRTUAL TABLE email
1.1812 + * USING fts1(subject, body, tokenize mytokenizer(myarg))
1.1813 + *
1.1814 + * We return parsed information in a TableSpec structure.
1.1815 + *
1.1816 + */
1.1817 +static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
1.1818 + char**pzErr){
1.1819 + int i, n;
1.1820 + char *z, *zDummy;
1.1821 + char **azArg;
1.1822 + const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */
1.1823 +
1.1824 + assert( argc>=3 );
1.1825 + /* Current interface:
1.1826 + ** argv[0] - module name
1.1827 + ** argv[1] - database name
1.1828 + ** argv[2] - table name
1.1829 + ** argv[3..] - columns, optionally followed by tokenizer specification
1.1830 + ** and snippet delimiters specification.
1.1831 + */
1.1832 +
1.1833 + /* Make a copy of the complete argv[][] array in a single allocation.
1.1834 + ** The argv[][] array is read-only and transient. We can write to the
1.1835 + ** copy in order to modify things and the copy is persistent.
1.1836 + */
1.1837 + memset(pSpec, 0, sizeof(*pSpec));
1.1838 + for(i=n=0; i<argc; i++){
1.1839 + n += strlen(argv[i]) + 1;
1.1840 + }
1.1841 + azArg = malloc( sizeof(char*)*argc + n );
1.1842 + if( azArg==0 ){
1.1843 + return SQLITE_NOMEM;
1.1844 + }
1.1845 + z = (char*)&azArg[argc];
1.1846 + for(i=0; i<argc; i++){
1.1847 + azArg[i] = z;
1.1848 + strcpy(z, argv[i]);
1.1849 + z += strlen(z)+1;
1.1850 + }
1.1851 +
1.1852 + /* Identify the column names and the tokenizer and delimiter arguments
1.1853 + ** in the argv[][] array.
1.1854 + */
1.1855 + pSpec->zDb = azArg[1];
1.1856 + pSpec->zName = azArg[2];
1.1857 + pSpec->nColumn = 0;
1.1858 + pSpec->azColumn = azArg;
1.1859 + zTokenizer = "tokenize simple";
1.1860 + for(i=3; i<argc; ++i){
1.1861 + if( startsWith(azArg[i],"tokenize") ){
1.1862 + zTokenizer = azArg[i];
1.1863 + }else{
1.1864 + z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
1.1865 + pSpec->nColumn++;
1.1866 + }
1.1867 + }
1.1868 + if( pSpec->nColumn==0 ){
1.1869 + azArg[0] = "content";
1.1870 + pSpec->nColumn = 1;
1.1871 + }
1.1872 +
1.1873 + /*
1.1874 + ** Construct the list of content column names.
1.1875 + **
1.1876 + ** Each content column name will be of the form cNNAAAA
1.1877 + ** where NN is the column number and AAAA is the sanitized
1.1878 + ** column name. "sanitized" means that special characters are
1.1879 + ** converted to "_". The cNN prefix guarantees that all column
1.1880 + ** names are unique.
1.1881 + **
1.1882 + ** The AAAA suffix is not strictly necessary. It is included
1.1883 + ** for the convenience of people who might examine the generated
1.1884 + ** %_content table and wonder what the columns are used for.
1.1885 + */
1.1886 + pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
1.1887 + if( pSpec->azContentColumn==0 ){
1.1888 + clearTableSpec(pSpec);
1.1889 + return SQLITE_NOMEM;
1.1890 + }
1.1891 + for(i=0; i<pSpec->nColumn; i++){
1.1892 + char *p;
1.1893 + pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
1.1894 + for (p = pSpec->azContentColumn[i]; *p ; ++p) {
1.1895 + if( !safe_isalnum(*p) ) *p = '_';
1.1896 + }
1.1897 + }
1.1898 +
1.1899 + /*
1.1900 + ** Parse the tokenizer specification string.
1.1901 + */
1.1902 + pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
1.1903 + tokenListToIdList(pSpec->azTokenizer);
1.1904 +
1.1905 + return SQLITE_OK;
1.1906 +}
1.1907 +
1.1908 +/*
1.1909 +** Generate a CREATE TABLE statement that describes the schema of
1.1910 +** the virtual table. Return a pointer to this schema string.
1.1911 +**
1.1912 +** Space is obtained from sqlite3_mprintf() and should be freed
1.1913 +** using sqlite3_free().
1.1914 +*/
1.1915 +static char *fulltextSchema(
1.1916 + int nColumn, /* Number of columns */
1.1917 + const char *const* azColumn, /* List of columns */
1.1918 + const char *zTableName /* Name of the table */
1.1919 +){
1.1920 + int i;
1.1921 + char *zSchema, *zNext;
1.1922 + const char *zSep = "(";
1.1923 + zSchema = sqlite3_mprintf("CREATE TABLE x");
1.1924 + for(i=0; i<nColumn; i++){
1.1925 + zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
1.1926 + sqlite3_free(zSchema);
1.1927 + zSchema = zNext;
1.1928 + zSep = ",";
1.1929 + }
1.1930 + zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
1.1931 + sqlite3_free(zSchema);
1.1932 + return zNext;
1.1933 +}
1.1934 +
1.1935 +/*
1.1936 +** Build a new sqlite3_vtab structure that will describe the
1.1937 +** fulltext index defined by spec.
1.1938 +*/
1.1939 +static int constructVtab(
1.1940 + sqlite3 *db, /* The SQLite database connection */
1.1941 + TableSpec *spec, /* Parsed spec information from parseSpec() */
1.1942 + sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
1.1943 + char **pzErr /* Write any error message here */
1.1944 +){
1.1945 + int rc;
1.1946 + int n;
1.1947 + fulltext_vtab *v = 0;
1.1948 + const sqlite3_tokenizer_module *m = NULL;
1.1949 + char *schema;
1.1950 +
1.1951 + v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
1.1952 + if( v==0 ) return SQLITE_NOMEM;
1.1953 + memset(v, 0, sizeof(*v));
1.1954 + /* sqlite will initialize v->base */
1.1955 + v->db = db;
1.1956 + v->zDb = spec->zDb; /* Freed when azColumn is freed */
1.1957 + v->zName = spec->zName; /* Freed when azColumn is freed */
1.1958 + v->nColumn = spec->nColumn;
1.1959 + v->azContentColumn = spec->azContentColumn;
1.1960 + spec->azContentColumn = 0;
1.1961 + v->azColumn = spec->azColumn;
1.1962 + spec->azColumn = 0;
1.1963 +
1.1964 + if( spec->azTokenizer==0 ){
1.1965 + return SQLITE_NOMEM;
1.1966 + }
1.1967 + /* TODO(shess) For now, add new tokenizers as else if clauses. */
1.1968 + if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
1.1969 + sqlite3Fts1SimpleTokenizerModule(&m);
1.1970 + }else if( startsWith(spec->azTokenizer[0], "porter") ){
1.1971 + sqlite3Fts1PorterTokenizerModule(&m);
1.1972 + }else{
1.1973 + *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
1.1974 + rc = SQLITE_ERROR;
1.1975 + goto err;
1.1976 + }
1.1977 + for(n=0; spec->azTokenizer[n]; n++){}
1.1978 + if( n ){
1.1979 + rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
1.1980 + &v->pTokenizer);
1.1981 + }else{
1.1982 + rc = m->xCreate(0, 0, &v->pTokenizer);
1.1983 + }
1.1984 + if( rc!=SQLITE_OK ) goto err;
1.1985 + v->pTokenizer->pModule = m;
1.1986 +
1.1987 + /* TODO: verify the existence of backing tables foo_content, foo_term */
1.1988 +
1.1989 + schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
1.1990 + spec->zName);
1.1991 + rc = sqlite3_declare_vtab(db, schema);
1.1992 + sqlite3_free(schema);
1.1993 + if( rc!=SQLITE_OK ) goto err;
1.1994 +
1.1995 + memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
1.1996 +
1.1997 + *ppVTab = &v->base;
1.1998 + TRACE(("FTS1 Connect %p\n", v));
1.1999 +
1.2000 + return rc;
1.2001 +
1.2002 +err:
1.2003 + fulltext_vtab_destroy(v);
1.2004 + return rc;
1.2005 +}
1.2006 +
1.2007 +static int fulltextConnect(
1.2008 + sqlite3 *db,
1.2009 + void *pAux,
1.2010 + int argc, const char *const*argv,
1.2011 + sqlite3_vtab **ppVTab,
1.2012 + char **pzErr
1.2013 +){
1.2014 + TableSpec spec;
1.2015 + int rc = parseSpec(&spec, argc, argv, pzErr);
1.2016 + if( rc!=SQLITE_OK ) return rc;
1.2017 +
1.2018 + rc = constructVtab(db, &spec, ppVTab, pzErr);
1.2019 + clearTableSpec(&spec);
1.2020 + return rc;
1.2021 +}
1.2022 +
1.2023 + /* The %_content table holds the text of each document, with
1.2024 + ** the rowid used as the docid.
1.2025 + **
1.2026 + ** The %_term table maps each term to a document list blob
1.2027 + ** containing elements sorted by ascending docid, each element
1.2028 + ** encoded as:
1.2029 + **
1.2030 + ** docid varint-encoded
1.2031 + ** token elements:
1.2032 + ** position+1 varint-encoded as delta from previous position
1.2033 + ** start offset varint-encoded as delta from previous start offset
1.2034 + ** end offset varint-encoded as delta from start offset
1.2035 + **
1.2036 + ** The sentinel position of 0 indicates the end of the token list.
1.2037 + **
1.2038 + ** Additionally, doclist blobs are chunked into multiple segments,
1.2039 + ** using segment to order the segments. New elements are added to
1.2040 + ** the segment at segment 0, until it exceeds CHUNK_MAX. Then
1.2041 + ** segment 0 is deleted, and the doclist is inserted at segment 1.
1.2042 + ** If there is already a doclist at segment 1, the segment 0 doclist
1.2043 + ** is merged with it, the segment 1 doclist is deleted, and the
1.2044 + ** merged doclist is inserted at segment 2, repeating those
1.2045 + ** operations until an insert succeeds.
1.2046 + **
1.2047 + ** Since this structure doesn't allow us to update elements in place
1.2048 + ** in case of deletion or update, these are simply written to
1.2049 + ** segment 0 (with an empty token list in case of deletion), with
1.2050 + ** docListAccumulate() taking care to retain lower-segment
1.2051 + ** information in preference to higher-segment information.
1.2052 + */
1.2053 + /* TODO(shess) Provide a VACUUM type operation which both removes
1.2054 + ** deleted elements which are no longer necessary, and duplicated
1.2055 + ** elements. I suspect this will probably not be necessary in
1.2056 + ** practice, though.
1.2057 + */
1.2058 +static int fulltextCreate(sqlite3 *db, void *pAux,
1.2059 + int argc, const char * const *argv,
1.2060 + sqlite3_vtab **ppVTab, char **pzErr){
1.2061 + int rc;
1.2062 + TableSpec spec;
1.2063 + StringBuffer schema;
1.2064 + TRACE(("FTS1 Create\n"));
1.2065 +
1.2066 + rc = parseSpec(&spec, argc, argv, pzErr);
1.2067 + if( rc!=SQLITE_OK ) return rc;
1.2068 +
1.2069 + initStringBuffer(&schema);
1.2070 + append(&schema, "CREATE TABLE %_content(");
1.2071 + appendList(&schema, spec.nColumn, spec.azContentColumn);
1.2072 + append(&schema, ")");
1.2073 + rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
1.2074 + free(schema.s);
1.2075 + if( rc!=SQLITE_OK ) goto out;
1.2076 +
1.2077 + rc = sql_exec(db, spec.zDb, spec.zName,
1.2078 + "create table %_term(term text, segment integer, doclist blob, "
1.2079 + "primary key(term, segment));");
1.2080 + if( rc!=SQLITE_OK ) goto out;
1.2081 +
1.2082 + rc = constructVtab(db, &spec, ppVTab, pzErr);
1.2083 +
1.2084 +out:
1.2085 + clearTableSpec(&spec);
1.2086 + return rc;
1.2087 +}
1.2088 +
1.2089 +/* Decide how to handle an SQL query. */
1.2090 +static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
1.2091 + int i;
1.2092 + TRACE(("FTS1 BestIndex\n"));
1.2093 +
1.2094 + for(i=0; i<pInfo->nConstraint; ++i){
1.2095 + const struct sqlite3_index_constraint *pConstraint;
1.2096 + pConstraint = &pInfo->aConstraint[i];
1.2097 + if( pConstraint->usable ) {
1.2098 + if( pConstraint->iColumn==-1 &&
1.2099 + pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
1.2100 + pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */
1.2101 + TRACE(("FTS1 QUERY_ROWID\n"));
1.2102 + } else if( pConstraint->iColumn>=0 &&
1.2103 + pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
1.2104 + /* full-text search */
1.2105 + pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
1.2106 + TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
1.2107 + } else continue;
1.2108 +
1.2109 + pInfo->aConstraintUsage[i].argvIndex = 1;
1.2110 + pInfo->aConstraintUsage[i].omit = 1;
1.2111 +
1.2112 + /* An arbitrary value for now.
1.2113 + * TODO: Perhaps rowid matches should be considered cheaper than
1.2114 + * full-text searches. */
1.2115 + pInfo->estimatedCost = 1.0;
1.2116 +
1.2117 + return SQLITE_OK;
1.2118 + }
1.2119 + }
1.2120 + pInfo->idxNum = QUERY_GENERIC;
1.2121 + return SQLITE_OK;
1.2122 +}
1.2123 +
1.2124 +static int fulltextDisconnect(sqlite3_vtab *pVTab){
1.2125 + TRACE(("FTS1 Disconnect %p\n", pVTab));
1.2126 + fulltext_vtab_destroy((fulltext_vtab *)pVTab);
1.2127 + return SQLITE_OK;
1.2128 +}
1.2129 +
1.2130 +static int fulltextDestroy(sqlite3_vtab *pVTab){
1.2131 + fulltext_vtab *v = (fulltext_vtab *)pVTab;
1.2132 + int rc;
1.2133 +
1.2134 + TRACE(("FTS1 Destroy %p\n", pVTab));
1.2135 + rc = sql_exec(v->db, v->zDb, v->zName,
1.2136 + "drop table if exists %_content;"
1.2137 + "drop table if exists %_term;"
1.2138 + );
1.2139 + if( rc!=SQLITE_OK ) return rc;
1.2140 +
1.2141 + fulltext_vtab_destroy((fulltext_vtab *)pVTab);
1.2142 + return SQLITE_OK;
1.2143 +}
1.2144 +
1.2145 +static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
1.2146 + fulltext_cursor *c;
1.2147 +
1.2148 + c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
1.2149 + /* sqlite will initialize c->base */
1.2150 + *ppCursor = &c->base;
1.2151 + TRACE(("FTS1 Open %p: %p\n", pVTab, c));
1.2152 +
1.2153 + return SQLITE_OK;
1.2154 +}
1.2155 +
1.2156 +
1.2157 +/* Free all of the dynamically allocated memory held by *q
1.2158 +*/
1.2159 +static void queryClear(Query *q){
1.2160 + int i;
1.2161 + for(i = 0; i < q->nTerms; ++i){
1.2162 + free(q->pTerms[i].pTerm);
1.2163 + }
1.2164 + free(q->pTerms);
1.2165 + memset(q, 0, sizeof(*q));
1.2166 +}
1.2167 +
1.2168 +/* Free all of the dynamically allocated memory held by the
1.2169 +** Snippet
1.2170 +*/
1.2171 +static void snippetClear(Snippet *p){
1.2172 + free(p->aMatch);
1.2173 + free(p->zOffset);
1.2174 + free(p->zSnippet);
1.2175 + memset(p, 0, sizeof(*p));
1.2176 +}
1.2177 +/*
1.2178 +** Append a single entry to the p->aMatch[] log.
1.2179 +*/
1.2180 +static void snippetAppendMatch(
1.2181 + Snippet *p, /* Append the entry to this snippet */
1.2182 + int iCol, int iTerm, /* The column and query term */
1.2183 + int iStart, int nByte /* Offset and size of the match */
1.2184 +){
1.2185 + int i;
1.2186 + struct snippetMatch *pMatch;
1.2187 + if( p->nMatch+1>=p->nAlloc ){
1.2188 + p->nAlloc = p->nAlloc*2 + 10;
1.2189 + p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
1.2190 + if( p->aMatch==0 ){
1.2191 + p->nMatch = 0;
1.2192 + p->nAlloc = 0;
1.2193 + return;
1.2194 + }
1.2195 + }
1.2196 + i = p->nMatch++;
1.2197 + pMatch = &p->aMatch[i];
1.2198 + pMatch->iCol = iCol;
1.2199 + pMatch->iTerm = iTerm;
1.2200 + pMatch->iStart = iStart;
1.2201 + pMatch->nByte = nByte;
1.2202 +}
1.2203 +
1.2204 +/*
1.2205 +** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
1.2206 +*/
1.2207 +#define FTS1_ROTOR_SZ (32)
1.2208 +#define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
1.2209 +
1.2210 +/*
1.2211 +** Add entries to pSnippet->aMatch[] for every match that occurs against
1.2212 +** document zDoc[0..nDoc-1] which is stored in column iColumn.
1.2213 +*/
1.2214 +static void snippetOffsetsOfColumn(
1.2215 + Query *pQuery,
1.2216 + Snippet *pSnippet,
1.2217 + int iColumn,
1.2218 + const char *zDoc,
1.2219 + int nDoc
1.2220 +){
1.2221 + const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */
1.2222 + sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */
1.2223 + sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */
1.2224 + fulltext_vtab *pVtab; /* The full text index */
1.2225 + int nColumn; /* Number of columns in the index */
1.2226 + const QueryTerm *aTerm; /* Query string terms */
1.2227 + int nTerm; /* Number of query string terms */
1.2228 + int i, j; /* Loop counters */
1.2229 + int rc; /* Return code */
1.2230 + unsigned int match, prevMatch; /* Phrase search bitmasks */
1.2231 + const char *zToken; /* Next token from the tokenizer */
1.2232 + int nToken; /* Size of zToken */
1.2233 + int iBegin, iEnd, iPos; /* Offsets of beginning and end */
1.2234 +
1.2235 + /* The following variables keep a circular buffer of the last
1.2236 + ** few tokens */
1.2237 + unsigned int iRotor = 0; /* Index of current token */
1.2238 + int iRotorBegin[FTS1_ROTOR_SZ]; /* Beginning offset of token */
1.2239 + int iRotorLen[FTS1_ROTOR_SZ]; /* Length of token */
1.2240 +
1.2241 + pVtab = pQuery->pFts;
1.2242 + nColumn = pVtab->nColumn;
1.2243 + pTokenizer = pVtab->pTokenizer;
1.2244 + pTModule = pTokenizer->pModule;
1.2245 + rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
1.2246 + if( rc ) return;
1.2247 + pTCursor->pTokenizer = pTokenizer;
1.2248 + aTerm = pQuery->pTerms;
1.2249 + nTerm = pQuery->nTerms;
1.2250 + if( nTerm>=FTS1_ROTOR_SZ ){
1.2251 + nTerm = FTS1_ROTOR_SZ - 1;
1.2252 + }
1.2253 + prevMatch = 0;
1.2254 + while(1){
1.2255 + rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
1.2256 + if( rc ) break;
1.2257 + iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
1.2258 + iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
1.2259 + match = 0;
1.2260 + for(i=0; i<nTerm; i++){
1.2261 + int iCol;
1.2262 + iCol = aTerm[i].iColumn;
1.2263 + if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
1.2264 + if( aTerm[i].nTerm!=nToken ) continue;
1.2265 + if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
1.2266 + if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
1.2267 + match |= 1<<i;
1.2268 + if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
1.2269 + for(j=aTerm[i].iPhrase-1; j>=0; j--){
1.2270 + int k = (iRotor-j) & FTS1_ROTOR_MASK;
1.2271 + snippetAppendMatch(pSnippet, iColumn, i-j,
1.2272 + iRotorBegin[k], iRotorLen[k]);
1.2273 + }
1.2274 + }
1.2275 + }
1.2276 + prevMatch = match<<1;
1.2277 + iRotor++;
1.2278 + }
1.2279 + pTModule->xClose(pTCursor);
1.2280 +}
1.2281 +
1.2282 +
1.2283 +/*
1.2284 +** Compute all offsets for the current row of the query.
1.2285 +** If the offsets have already been computed, this routine is a no-op.
1.2286 +*/
1.2287 +static void snippetAllOffsets(fulltext_cursor *p){
1.2288 + int nColumn;
1.2289 + int iColumn, i;
1.2290 + int iFirst, iLast;
1.2291 + fulltext_vtab *pFts;
1.2292 +
1.2293 + if( p->snippet.nMatch ) return;
1.2294 + if( p->q.nTerms==0 ) return;
1.2295 + pFts = p->q.pFts;
1.2296 + nColumn = pFts->nColumn;
1.2297 + iColumn = p->iCursorType - QUERY_FULLTEXT;
1.2298 + if( iColumn<0 || iColumn>=nColumn ){
1.2299 + iFirst = 0;
1.2300 + iLast = nColumn-1;
1.2301 + }else{
1.2302 + iFirst = iColumn;
1.2303 + iLast = iColumn;
1.2304 + }
1.2305 + for(i=iFirst; i<=iLast; i++){
1.2306 + const char *zDoc;
1.2307 + int nDoc;
1.2308 + zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
1.2309 + nDoc = sqlite3_column_bytes(p->pStmt, i+1);
1.2310 + snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
1.2311 + }
1.2312 +}
1.2313 +
1.2314 +/*
1.2315 +** Convert the information in the aMatch[] array of the snippet
1.2316 +** into the string zOffset[0..nOffset-1].
1.2317 +*/
1.2318 +static void snippetOffsetText(Snippet *p){
1.2319 + int i;
1.2320 + int cnt = 0;
1.2321 + StringBuffer sb;
1.2322 + char zBuf[200];
1.2323 + if( p->zOffset ) return;
1.2324 + initStringBuffer(&sb);
1.2325 + for(i=0; i<p->nMatch; i++){
1.2326 + struct snippetMatch *pMatch = &p->aMatch[i];
1.2327 + zBuf[0] = ' ';
1.2328 + sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
1.2329 + pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
1.2330 + append(&sb, zBuf);
1.2331 + cnt++;
1.2332 + }
1.2333 + p->zOffset = sb.s;
1.2334 + p->nOffset = sb.len;
1.2335 +}
1.2336 +
1.2337 +/*
1.2338 +** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set
1.2339 +** of matching words some of which might be in zDoc. zDoc is column
1.2340 +** number iCol.
1.2341 +**
1.2342 +** iBreak is suggested spot in zDoc where we could begin or end an
1.2343 +** excerpt. Return a value similar to iBreak but possibly adjusted
1.2344 +** to be a little left or right so that the break point is better.
1.2345 +*/
1.2346 +static int wordBoundary(
1.2347 + int iBreak, /* The suggested break point */
1.2348 + const char *zDoc, /* Document text */
1.2349 + int nDoc, /* Number of bytes in zDoc[] */
1.2350 + struct snippetMatch *aMatch, /* Matching words */
1.2351 + int nMatch, /* Number of entries in aMatch[] */
1.2352 + int iCol /* The column number for zDoc[] */
1.2353 +){
1.2354 + int i;
1.2355 + if( iBreak<=10 ){
1.2356 + return 0;
1.2357 + }
1.2358 + if( iBreak>=nDoc-10 ){
1.2359 + return nDoc;
1.2360 + }
1.2361 + for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
1.2362 + while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
1.2363 + if( i<nMatch ){
1.2364 + if( aMatch[i].iStart<iBreak+10 ){
1.2365 + return aMatch[i].iStart;
1.2366 + }
1.2367 + if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
1.2368 + return aMatch[i-1].iStart;
1.2369 + }
1.2370 + }
1.2371 + for(i=1; i<=10; i++){
1.2372 + if( safe_isspace(zDoc[iBreak-i]) ){
1.2373 + return iBreak - i + 1;
1.2374 + }
1.2375 + if( safe_isspace(zDoc[iBreak+i]) ){
1.2376 + return iBreak + i + 1;
1.2377 + }
1.2378 + }
1.2379 + return iBreak;
1.2380 +}
1.2381 +
1.2382 +/*
1.2383 +** If the StringBuffer does not end in white space, add a single
1.2384 +** space character to the end.
1.2385 +*/
1.2386 +static void appendWhiteSpace(StringBuffer *p){
1.2387 + if( p->len==0 ) return;
1.2388 + if( safe_isspace(p->s[p->len-1]) ) return;
1.2389 + append(p, " ");
1.2390 +}
1.2391 +
1.2392 +/*
1.2393 +** Remove white space from teh end of the StringBuffer
1.2394 +*/
1.2395 +static void trimWhiteSpace(StringBuffer *p){
1.2396 + while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
1.2397 + p->len--;
1.2398 + }
1.2399 +}
1.2400 +
1.2401 +
1.2402 +
1.2403 +/*
1.2404 +** Allowed values for Snippet.aMatch[].snStatus
1.2405 +*/
1.2406 +#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */
1.2407 +#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */
1.2408 +
1.2409 +/*
1.2410 +** Generate the text of a snippet.
1.2411 +*/
1.2412 +static void snippetText(
1.2413 + fulltext_cursor *pCursor, /* The cursor we need the snippet for */
1.2414 + const char *zStartMark, /* Markup to appear before each match */
1.2415 + const char *zEndMark, /* Markup to appear after each match */
1.2416 + const char *zEllipsis /* Ellipsis mark */
1.2417 +){
1.2418 + int i, j;
1.2419 + struct snippetMatch *aMatch;
1.2420 + int nMatch;
1.2421 + int nDesired;
1.2422 + StringBuffer sb;
1.2423 + int tailCol;
1.2424 + int tailOffset;
1.2425 + int iCol;
1.2426 + int nDoc;
1.2427 + const char *zDoc;
1.2428 + int iStart, iEnd;
1.2429 + int tailEllipsis = 0;
1.2430 + int iMatch;
1.2431 +
1.2432 +
1.2433 + free(pCursor->snippet.zSnippet);
1.2434 + pCursor->snippet.zSnippet = 0;
1.2435 + aMatch = pCursor->snippet.aMatch;
1.2436 + nMatch = pCursor->snippet.nMatch;
1.2437 + initStringBuffer(&sb);
1.2438 +
1.2439 + for(i=0; i<nMatch; i++){
1.2440 + aMatch[i].snStatus = SNIPPET_IGNORE;
1.2441 + }
1.2442 + nDesired = 0;
1.2443 + for(i=0; i<pCursor->q.nTerms; i++){
1.2444 + for(j=0; j<nMatch; j++){
1.2445 + if( aMatch[j].iTerm==i ){
1.2446 + aMatch[j].snStatus = SNIPPET_DESIRED;
1.2447 + nDesired++;
1.2448 + break;
1.2449 + }
1.2450 + }
1.2451 + }
1.2452 +
1.2453 + iMatch = 0;
1.2454 + tailCol = -1;
1.2455 + tailOffset = 0;
1.2456 + for(i=0; i<nMatch && nDesired>0; i++){
1.2457 + if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
1.2458 + nDesired--;
1.2459 + iCol = aMatch[i].iCol;
1.2460 + zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
1.2461 + nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
1.2462 + iStart = aMatch[i].iStart - 40;
1.2463 + iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
1.2464 + if( iStart<=10 ){
1.2465 + iStart = 0;
1.2466 + }
1.2467 + if( iCol==tailCol && iStart<=tailOffset+20 ){
1.2468 + iStart = tailOffset;
1.2469 + }
1.2470 + if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
1.2471 + trimWhiteSpace(&sb);
1.2472 + appendWhiteSpace(&sb);
1.2473 + append(&sb, zEllipsis);
1.2474 + appendWhiteSpace(&sb);
1.2475 + }
1.2476 + iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
1.2477 + iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
1.2478 + if( iEnd>=nDoc-10 ){
1.2479 + iEnd = nDoc;
1.2480 + tailEllipsis = 0;
1.2481 + }else{
1.2482 + tailEllipsis = 1;
1.2483 + }
1.2484 + while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
1.2485 + while( iStart<iEnd ){
1.2486 + while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
1.2487 + && aMatch[iMatch].iCol<=iCol ){
1.2488 + iMatch++;
1.2489 + }
1.2490 + if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
1.2491 + && aMatch[iMatch].iCol==iCol ){
1.2492 + nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
1.2493 + iStart = aMatch[iMatch].iStart;
1.2494 + append(&sb, zStartMark);
1.2495 + nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
1.2496 + append(&sb, zEndMark);
1.2497 + iStart += aMatch[iMatch].nByte;
1.2498 + for(j=iMatch+1; j<nMatch; j++){
1.2499 + if( aMatch[j].iTerm==aMatch[iMatch].iTerm
1.2500 + && aMatch[j].snStatus==SNIPPET_DESIRED ){
1.2501 + nDesired--;
1.2502 + aMatch[j].snStatus = SNIPPET_IGNORE;
1.2503 + }
1.2504 + }
1.2505 + }else{
1.2506 + nappend(&sb, &zDoc[iStart], iEnd - iStart);
1.2507 + iStart = iEnd;
1.2508 + }
1.2509 + }
1.2510 + tailCol = iCol;
1.2511 + tailOffset = iEnd;
1.2512 + }
1.2513 + trimWhiteSpace(&sb);
1.2514 + if( tailEllipsis ){
1.2515 + appendWhiteSpace(&sb);
1.2516 + append(&sb, zEllipsis);
1.2517 + }
1.2518 + pCursor->snippet.zSnippet = sb.s;
1.2519 + pCursor->snippet.nSnippet = sb.len;
1.2520 +}
1.2521 +
1.2522 +
1.2523 +/*
1.2524 +** Close the cursor. For additional information see the documentation
1.2525 +** on the xClose method of the virtual table interface.
1.2526 +*/
1.2527 +static int fulltextClose(sqlite3_vtab_cursor *pCursor){
1.2528 + fulltext_cursor *c = (fulltext_cursor *) pCursor;
1.2529 + TRACE(("FTS1 Close %p\n", c));
1.2530 + sqlite3_finalize(c->pStmt);
1.2531 + queryClear(&c->q);
1.2532 + snippetClear(&c->snippet);
1.2533 + if( c->result.pDoclist!=NULL ){
1.2534 + docListDelete(c->result.pDoclist);
1.2535 + }
1.2536 + free(c);
1.2537 + return SQLITE_OK;
1.2538 +}
1.2539 +
1.2540 +static int fulltextNext(sqlite3_vtab_cursor *pCursor){
1.2541 + fulltext_cursor *c = (fulltext_cursor *) pCursor;
1.2542 + sqlite_int64 iDocid;
1.2543 + int rc;
1.2544 +
1.2545 + TRACE(("FTS1 Next %p\n", pCursor));
1.2546 + snippetClear(&c->snippet);
1.2547 + if( c->iCursorType < QUERY_FULLTEXT ){
1.2548 + /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
1.2549 + rc = sqlite3_step(c->pStmt);
1.2550 + switch( rc ){
1.2551 + case SQLITE_ROW:
1.2552 + c->eof = 0;
1.2553 + return SQLITE_OK;
1.2554 + case SQLITE_DONE:
1.2555 + c->eof = 1;
1.2556 + return SQLITE_OK;
1.2557 + default:
1.2558 + c->eof = 1;
1.2559 + return rc;
1.2560 + }
1.2561 + } else { /* full-text query */
1.2562 + rc = sqlite3_reset(c->pStmt);
1.2563 + if( rc!=SQLITE_OK ) return rc;
1.2564 +
1.2565 + iDocid = nextDocid(&c->result);
1.2566 + if( iDocid==0 ){
1.2567 + c->eof = 1;
1.2568 + return SQLITE_OK;
1.2569 + }
1.2570 + rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
1.2571 + if( rc!=SQLITE_OK ) return rc;
1.2572 + /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
1.2573 + rc = sqlite3_step(c->pStmt);
1.2574 + if( rc==SQLITE_ROW ){ /* the case we expect */
1.2575 + c->eof = 0;
1.2576 + return SQLITE_OK;
1.2577 + }
1.2578 + /* an error occurred; abort */
1.2579 + return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
1.2580 + }
1.2581 +}
1.2582 +
1.2583 +
1.2584 +/* Return a DocList corresponding to the query term *pTerm. If *pTerm
1.2585 +** is the first term of a phrase query, go ahead and evaluate the phrase
1.2586 +** query and return the doclist for the entire phrase query.
1.2587 +**
1.2588 +** The result is stored in pTerm->doclist.
1.2589 +*/
1.2590 +static int docListOfTerm(
1.2591 + fulltext_vtab *v, /* The full text index */
1.2592 + int iColumn, /* column to restrict to. No restrition if >=nColumn */
1.2593 + QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */
1.2594 + DocList **ppResult /* Write the result here */
1.2595 +){
1.2596 + DocList *pLeft, *pRight, *pNew;
1.2597 + int i, rc;
1.2598 +
1.2599 + pLeft = docListNew(DL_POSITIONS);
1.2600 + rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
1.2601 + if( rc ){
1.2602 + docListDelete(pLeft);
1.2603 + return rc;
1.2604 + }
1.2605 + for(i=1; i<=pQTerm->nPhrase; i++){
1.2606 + pRight = docListNew(DL_POSITIONS);
1.2607 + rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
1.2608 + if( rc ){
1.2609 + docListDelete(pLeft);
1.2610 + return rc;
1.2611 + }
1.2612 + pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
1.2613 + docListPhraseMerge(pLeft, pRight, pNew);
1.2614 + docListDelete(pLeft);
1.2615 + docListDelete(pRight);
1.2616 + pLeft = pNew;
1.2617 + }
1.2618 + *ppResult = pLeft;
1.2619 + return SQLITE_OK;
1.2620 +}
1.2621 +
1.2622 +/* Add a new term pTerm[0..nTerm-1] to the query *q.
1.2623 +*/
1.2624 +static void queryAdd(Query *q, const char *pTerm, int nTerm){
1.2625 + QueryTerm *t;
1.2626 + ++q->nTerms;
1.2627 + q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
1.2628 + if( q->pTerms==0 ){
1.2629 + q->nTerms = 0;
1.2630 + return;
1.2631 + }
1.2632 + t = &q->pTerms[q->nTerms - 1];
1.2633 + memset(t, 0, sizeof(*t));
1.2634 + t->pTerm = malloc(nTerm+1);
1.2635 + memcpy(t->pTerm, pTerm, nTerm);
1.2636 + t->pTerm[nTerm] = 0;
1.2637 + t->nTerm = nTerm;
1.2638 + t->isOr = q->nextIsOr;
1.2639 + q->nextIsOr = 0;
1.2640 + t->iColumn = q->nextColumn;
1.2641 + q->nextColumn = q->dfltColumn;
1.2642 +}
1.2643 +
1.2644 +/*
1.2645 +** Check to see if the string zToken[0...nToken-1] matches any
1.2646 +** column name in the virtual table. If it does,
1.2647 +** return the zero-indexed column number. If not, return -1.
1.2648 +*/
1.2649 +static int checkColumnSpecifier(
1.2650 + fulltext_vtab *pVtab, /* The virtual table */
1.2651 + const char *zToken, /* Text of the token */
1.2652 + int nToken /* Number of characters in the token */
1.2653 +){
1.2654 + int i;
1.2655 + for(i=0; i<pVtab->nColumn; i++){
1.2656 + if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
1.2657 + && pVtab->azColumn[i][nToken]==0 ){
1.2658 + return i;
1.2659 + }
1.2660 + }
1.2661 + return -1;
1.2662 +}
1.2663 +
1.2664 +/*
1.2665 +** Parse the text at pSegment[0..nSegment-1]. Add additional terms
1.2666 +** to the query being assemblied in pQuery.
1.2667 +**
1.2668 +** inPhrase is true if pSegment[0..nSegement-1] is contained within
1.2669 +** double-quotes. If inPhrase is true, then the first term
1.2670 +** is marked with the number of terms in the phrase less one and
1.2671 +** OR and "-" syntax is ignored. If inPhrase is false, then every
1.2672 +** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
1.2673 +*/
1.2674 +static int tokenizeSegment(
1.2675 + sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */
1.2676 + const char *pSegment, int nSegment, /* Query expression being parsed */
1.2677 + int inPhrase, /* True if within "..." */
1.2678 + Query *pQuery /* Append results here */
1.2679 +){
1.2680 + const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
1.2681 + sqlite3_tokenizer_cursor *pCursor;
1.2682 + int firstIndex = pQuery->nTerms;
1.2683 + int iCol;
1.2684 + int nTerm = 1;
1.2685 +
1.2686 + int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
1.2687 + if( rc!=SQLITE_OK ) return rc;
1.2688 + pCursor->pTokenizer = pTokenizer;
1.2689 +
1.2690 + while( 1 ){
1.2691 + const char *pToken;
1.2692 + int nToken, iBegin, iEnd, iPos;
1.2693 +
1.2694 + rc = pModule->xNext(pCursor,
1.2695 + &pToken, &nToken,
1.2696 + &iBegin, &iEnd, &iPos);
1.2697 + if( rc!=SQLITE_OK ) break;
1.2698 + if( !inPhrase &&
1.2699 + pSegment[iEnd]==':' &&
1.2700 + (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
1.2701 + pQuery->nextColumn = iCol;
1.2702 + continue;
1.2703 + }
1.2704 + if( !inPhrase && pQuery->nTerms>0 && nToken==2
1.2705 + && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
1.2706 + pQuery->nextIsOr = 1;
1.2707 + continue;
1.2708 + }
1.2709 + queryAdd(pQuery, pToken, nToken);
1.2710 + if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
1.2711 + pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
1.2712 + }
1.2713 + pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
1.2714 + if( inPhrase ){
1.2715 + nTerm++;
1.2716 + }
1.2717 + }
1.2718 +
1.2719 + if( inPhrase && pQuery->nTerms>firstIndex ){
1.2720 + pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
1.2721 + }
1.2722 +
1.2723 + return pModule->xClose(pCursor);
1.2724 +}
1.2725 +
1.2726 +/* Parse a query string, yielding a Query object pQuery.
1.2727 +**
1.2728 +** The calling function will need to queryClear() to clean up
1.2729 +** the dynamically allocated memory held by pQuery.
1.2730 +*/
1.2731 +static int parseQuery(
1.2732 + fulltext_vtab *v, /* The fulltext index */
1.2733 + const char *zInput, /* Input text of the query string */
1.2734 + int nInput, /* Size of the input text */
1.2735 + int dfltColumn, /* Default column of the index to match against */
1.2736 + Query *pQuery /* Write the parse results here. */
1.2737 +){
1.2738 + int iInput, inPhrase = 0;
1.2739 +
1.2740 + if( zInput==0 ) nInput = 0;
1.2741 + if( nInput<0 ) nInput = strlen(zInput);
1.2742 + pQuery->nTerms = 0;
1.2743 + pQuery->pTerms = NULL;
1.2744 + pQuery->nextIsOr = 0;
1.2745 + pQuery->nextColumn = dfltColumn;
1.2746 + pQuery->dfltColumn = dfltColumn;
1.2747 + pQuery->pFts = v;
1.2748 +
1.2749 + for(iInput=0; iInput<nInput; ++iInput){
1.2750 + int i;
1.2751 + for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
1.2752 + if( i>iInput ){
1.2753 + tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
1.2754 + pQuery);
1.2755 + }
1.2756 + iInput = i;
1.2757 + if( i<nInput ){
1.2758 + assert( zInput[i]=='"' );
1.2759 + inPhrase = !inPhrase;
1.2760 + }
1.2761 + }
1.2762 +
1.2763 + if( inPhrase ){
1.2764 + /* unmatched quote */
1.2765 + queryClear(pQuery);
1.2766 + return SQLITE_ERROR;
1.2767 + }
1.2768 + return SQLITE_OK;
1.2769 +}
1.2770 +
1.2771 +/* Perform a full-text query using the search expression in
1.2772 +** zInput[0..nInput-1]. Return a list of matching documents
1.2773 +** in pResult.
1.2774 +**
1.2775 +** Queries must match column iColumn. Or if iColumn>=nColumn
1.2776 +** they are allowed to match against any column.
1.2777 +*/
1.2778 +static int fulltextQuery(
1.2779 + fulltext_vtab *v, /* The full text index */
1.2780 + int iColumn, /* Match against this column by default */
1.2781 + const char *zInput, /* The query string */
1.2782 + int nInput, /* Number of bytes in zInput[] */
1.2783 + DocList **pResult, /* Write the result doclist here */
1.2784 + Query *pQuery /* Put parsed query string here */
1.2785 +){
1.2786 + int i, iNext, rc;
1.2787 + DocList *pLeft = NULL;
1.2788 + DocList *pRight, *pNew, *pOr;
1.2789 + int nNot = 0;
1.2790 + QueryTerm *aTerm;
1.2791 +
1.2792 + rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
1.2793 + if( rc!=SQLITE_OK ) return rc;
1.2794 +
1.2795 + /* Merge AND terms. */
1.2796 + aTerm = pQuery->pTerms;
1.2797 + for(i = 0; i<pQuery->nTerms; i=iNext){
1.2798 + if( aTerm[i].isNot ){
1.2799 + /* Handle all NOT terms in a separate pass */
1.2800 + nNot++;
1.2801 + iNext = i + aTerm[i].nPhrase+1;
1.2802 + continue;
1.2803 + }
1.2804 + iNext = i + aTerm[i].nPhrase + 1;
1.2805 + rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
1.2806 + if( rc ){
1.2807 + queryClear(pQuery);
1.2808 + return rc;
1.2809 + }
1.2810 + while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
1.2811 + rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
1.2812 + iNext += aTerm[iNext].nPhrase + 1;
1.2813 + if( rc ){
1.2814 + queryClear(pQuery);
1.2815 + return rc;
1.2816 + }
1.2817 + pNew = docListNew(DL_DOCIDS);
1.2818 + docListOrMerge(pRight, pOr, pNew);
1.2819 + docListDelete(pRight);
1.2820 + docListDelete(pOr);
1.2821 + pRight = pNew;
1.2822 + }
1.2823 + if( pLeft==0 ){
1.2824 + pLeft = pRight;
1.2825 + }else{
1.2826 + pNew = docListNew(DL_DOCIDS);
1.2827 + docListAndMerge(pLeft, pRight, pNew);
1.2828 + docListDelete(pRight);
1.2829 + docListDelete(pLeft);
1.2830 + pLeft = pNew;
1.2831 + }
1.2832 + }
1.2833 +
1.2834 + if( nNot && pLeft==0 ){
1.2835 + /* We do not yet know how to handle a query of only NOT terms */
1.2836 + return SQLITE_ERROR;
1.2837 + }
1.2838 +
1.2839 + /* Do the EXCEPT terms */
1.2840 + for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){
1.2841 + if( !aTerm[i].isNot ) continue;
1.2842 + rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
1.2843 + if( rc ){
1.2844 + queryClear(pQuery);
1.2845 + docListDelete(pLeft);
1.2846 + return rc;
1.2847 + }
1.2848 + pNew = docListNew(DL_DOCIDS);
1.2849 + docListExceptMerge(pLeft, pRight, pNew);
1.2850 + docListDelete(pRight);
1.2851 + docListDelete(pLeft);
1.2852 + pLeft = pNew;
1.2853 + }
1.2854 +
1.2855 + *pResult = pLeft;
1.2856 + return rc;
1.2857 +}
1.2858 +
1.2859 +/*
1.2860 +** This is the xFilter interface for the virtual table. See
1.2861 +** the virtual table xFilter method documentation for additional
1.2862 +** information.
1.2863 +**
1.2864 +** If idxNum==QUERY_GENERIC then do a full table scan against
1.2865 +** the %_content table.
1.2866 +**
1.2867 +** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
1.2868 +** in the %_content table.
1.2869 +**
1.2870 +** If idxNum>=QUERY_FULLTEXT then use the full text index. The
1.2871 +** column on the left-hand side of the MATCH operator is column
1.2872 +** number idxNum-QUERY_FULLTEXT, 0 indexed. argv[0] is the right-hand
1.2873 +** side of the MATCH operator.
1.2874 +*/
1.2875 +/* TODO(shess) Upgrade the cursor initialization and destruction to
1.2876 +** account for fulltextFilter() being called multiple times on the
1.2877 +** same cursor. The current solution is very fragile. Apply fix to
1.2878 +** fts2 as appropriate.
1.2879 +*/
1.2880 +static int fulltextFilter(
1.2881 + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
1.2882 + int idxNum, const char *idxStr, /* Which indexing scheme to use */
1.2883 + int argc, sqlite3_value **argv /* Arguments for the indexing scheme */
1.2884 +){
1.2885 + fulltext_cursor *c = (fulltext_cursor *) pCursor;
1.2886 + fulltext_vtab *v = cursor_vtab(c);
1.2887 + int rc;
1.2888 + char *zSql;
1.2889 +
1.2890 + TRACE(("FTS1 Filter %p\n",pCursor));
1.2891 +
1.2892 + zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
1.2893 + idxNum==QUERY_GENERIC ? "" : "where rowid=?");
1.2894 + sqlite3_finalize(c->pStmt);
1.2895 + rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
1.2896 + sqlite3_free(zSql);
1.2897 + if( rc!=SQLITE_OK ) return rc;
1.2898 +
1.2899 + c->iCursorType = idxNum;
1.2900 + switch( idxNum ){
1.2901 + case QUERY_GENERIC:
1.2902 + break;
1.2903 +
1.2904 + case QUERY_ROWID:
1.2905 + rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
1.2906 + if( rc!=SQLITE_OK ) return rc;
1.2907 + break;
1.2908 +
1.2909 + default: /* full-text search */
1.2910 + {
1.2911 + const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
1.2912 + DocList *pResult;
1.2913 + assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
1.2914 + assert( argc==1 );
1.2915 + queryClear(&c->q);
1.2916 + rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
1.2917 + if( rc!=SQLITE_OK ) return rc;
1.2918 + if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
1.2919 + readerInit(&c->result, pResult);
1.2920 + break;
1.2921 + }
1.2922 + }
1.2923 +
1.2924 + return fulltextNext(pCursor);
1.2925 +}
1.2926 +
1.2927 +/* This is the xEof method of the virtual table. The SQLite core
1.2928 +** calls this routine to find out if it has reached the end of
1.2929 +** a query's results set.
1.2930 +*/
1.2931 +static int fulltextEof(sqlite3_vtab_cursor *pCursor){
1.2932 + fulltext_cursor *c = (fulltext_cursor *) pCursor;
1.2933 + return c->eof;
1.2934 +}
1.2935 +
1.2936 +/* This is the xColumn method of the virtual table. The SQLite
1.2937 +** core calls this method during a query when it needs the value
1.2938 +** of a column from the virtual table. This method needs to use
1.2939 +** one of the sqlite3_result_*() routines to store the requested
1.2940 +** value back in the pContext.
1.2941 +*/
1.2942 +static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
1.2943 + sqlite3_context *pContext, int idxCol){
1.2944 + fulltext_cursor *c = (fulltext_cursor *) pCursor;
1.2945 + fulltext_vtab *v = cursor_vtab(c);
1.2946 +
1.2947 + if( idxCol<v->nColumn ){
1.2948 + sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
1.2949 + sqlite3_result_value(pContext, pVal);
1.2950 + }else if( idxCol==v->nColumn ){
1.2951 + /* The extra column whose name is the same as the table.
1.2952 + ** Return a blob which is a pointer to the cursor
1.2953 + */
1.2954 + sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
1.2955 + }
1.2956 + return SQLITE_OK;
1.2957 +}
1.2958 +
1.2959 +/* This is the xRowid method. The SQLite core calls this routine to
1.2960 +** retrive the rowid for the current row of the result set. The
1.2961 +** rowid should be written to *pRowid.
1.2962 +*/
1.2963 +static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
1.2964 + fulltext_cursor *c = (fulltext_cursor *) pCursor;
1.2965 +
1.2966 + *pRowid = sqlite3_column_int64(c->pStmt, 0);
1.2967 + return SQLITE_OK;
1.2968 +}
1.2969 +
1.2970 +/* Add all terms in [zText] to the given hash table. If [iColumn] > 0,
1.2971 + * we also store positions and offsets in the hash table using the given
1.2972 + * column number. */
1.2973 +static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
1.2974 + const char *zText, int iColumn){
1.2975 + sqlite3_tokenizer *pTokenizer = v->pTokenizer;
1.2976 + sqlite3_tokenizer_cursor *pCursor;
1.2977 + const char *pToken;
1.2978 + int nTokenBytes;
1.2979 + int iStartOffset, iEndOffset, iPosition;
1.2980 + int rc;
1.2981 +
1.2982 + rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
1.2983 + if( rc!=SQLITE_OK ) return rc;
1.2984 +
1.2985 + pCursor->pTokenizer = pTokenizer;
1.2986 + while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
1.2987 + &pToken, &nTokenBytes,
1.2988 + &iStartOffset, &iEndOffset,
1.2989 + &iPosition) ){
1.2990 + DocList *p;
1.2991 +
1.2992 + /* Positions can't be negative; we use -1 as a terminator internally. */
1.2993 + if( iPosition<0 ){
1.2994 + pTokenizer->pModule->xClose(pCursor);
1.2995 + return SQLITE_ERROR;
1.2996 + }
1.2997 +
1.2998 + p = fts1HashFind(terms, pToken, nTokenBytes);
1.2999 + if( p==NULL ){
1.3000 + p = docListNew(DL_DEFAULT);
1.3001 + docListAddDocid(p, iDocid);
1.3002 + fts1HashInsert(terms, pToken, nTokenBytes, p);
1.3003 + }
1.3004 + if( iColumn>=0 ){
1.3005 + docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
1.3006 + }
1.3007 + }
1.3008 +
1.3009 + /* TODO(shess) Check return? Should this be able to cause errors at
1.3010 + ** this point? Actually, same question about sqlite3_finalize(),
1.3011 + ** though one could argue that failure there means that the data is
1.3012 + ** not durable. *ponder*
1.3013 + */
1.3014 + pTokenizer->pModule->xClose(pCursor);
1.3015 + return rc;
1.3016 +}
1.3017 +
1.3018 +/* Update the %_terms table to map the term [pTerm] to the given rowid. */
1.3019 +static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
1.3020 + DocList *d){
1.3021 + sqlite_int64 iIndexRow;
1.3022 + DocList doclist;
1.3023 + int iSegment = 0, rc;
1.3024 +
1.3025 + rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
1.3026 + if( rc==SQLITE_DONE ){
1.3027 + docListInit(&doclist, DL_DEFAULT, 0, 0);
1.3028 + docListUpdate(&doclist, d);
1.3029 + /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
1.3030 + rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
1.3031 + goto err;
1.3032 + }
1.3033 + if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
1.3034 +
1.3035 + docListUpdate(&doclist, d);
1.3036 + if( doclist.nData<=CHUNK_MAX ){
1.3037 + rc = term_update(v, iIndexRow, &doclist);
1.3038 + goto err;
1.3039 + }
1.3040 +
1.3041 + /* Doclist doesn't fit, delete what's there, and accumulate
1.3042 + ** forward.
1.3043 + */
1.3044 + rc = term_delete(v, iIndexRow);
1.3045 + if( rc!=SQLITE_OK ) goto err;
1.3046 +
1.3047 + /* Try to insert the doclist into a higher segment bucket. On
1.3048 + ** failure, accumulate existing doclist with the doclist from that
1.3049 + ** bucket, and put results in the next bucket.
1.3050 + */
1.3051 + iSegment++;
1.3052 + while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
1.3053 + &doclist))!=SQLITE_OK ){
1.3054 + sqlite_int64 iSegmentRow;
1.3055 + DocList old;
1.3056 + int rc2;
1.3057 +
1.3058 + /* Retain old error in case the term_insert() error was really an
1.3059 + ** error rather than a bounced insert.
1.3060 + */
1.3061 + rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
1.3062 + if( rc2!=SQLITE_ROW ) goto err;
1.3063 +
1.3064 + rc = term_delete(v, iSegmentRow);
1.3065 + if( rc!=SQLITE_OK ) goto err;
1.3066 +
1.3067 + /* Reusing lowest-number deleted row keeps the index smaller. */
1.3068 + if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
1.3069 +
1.3070 + /* doclist contains the newer data, so accumulate it over old.
1.3071 + ** Then steal accumulated data for doclist.
1.3072 + */
1.3073 + docListAccumulate(&old, &doclist);
1.3074 + docListDestroy(&doclist);
1.3075 + doclist = old;
1.3076 +
1.3077 + iSegment++;
1.3078 + }
1.3079 +
1.3080 + err:
1.3081 + docListDestroy(&doclist);
1.3082 + return rc;
1.3083 +}
1.3084 +
1.3085 +/* Add doclists for all terms in [pValues] to the hash table [terms]. */
1.3086 +static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
1.3087 + sqlite3_value **pValues){
1.3088 + int i;
1.3089 + for(i = 0; i < v->nColumn ; ++i){
1.3090 + char *zText = (char*)sqlite3_value_text(pValues[i]);
1.3091 + int rc = buildTerms(v, terms, iRowid, zText, i);
1.3092 + if( rc!=SQLITE_OK ) return rc;
1.3093 + }
1.3094 + return SQLITE_OK;
1.3095 +}
1.3096 +
1.3097 +/* Add empty doclists for all terms in the given row's content to the hash
1.3098 + * table [pTerms]. */
1.3099 +static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
1.3100 + const char **pValues;
1.3101 + int i;
1.3102 +
1.3103 + int rc = content_select(v, iRowid, &pValues);
1.3104 + if( rc!=SQLITE_OK ) return rc;
1.3105 +
1.3106 + for(i = 0 ; i < v->nColumn; ++i) {
1.3107 + rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
1.3108 + if( rc!=SQLITE_OK ) break;
1.3109 + }
1.3110 +
1.3111 + freeStringArray(v->nColumn, pValues);
1.3112 + return SQLITE_OK;
1.3113 +}
1.3114 +
1.3115 +/* Insert a row into the %_content table; set *piRowid to be the ID of the
1.3116 + * new row. Fill [pTerms] with new doclists for the %_term table. */
1.3117 +static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
1.3118 + sqlite3_value **pValues,
1.3119 + sqlite_int64 *piRowid, fts1Hash *pTerms){
1.3120 + int rc;
1.3121 +
1.3122 + rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */
1.3123 + if( rc!=SQLITE_OK ) return rc;
1.3124 + *piRowid = sqlite3_last_insert_rowid(v->db);
1.3125 + return insertTerms(v, pTerms, *piRowid, pValues);
1.3126 +}
1.3127 +
1.3128 +/* Delete a row from the %_content table; fill [pTerms] with empty doclists
1.3129 + * to be written to the %_term table. */
1.3130 +static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
1.3131 + int rc = deleteTerms(v, pTerms, iRow);
1.3132 + if( rc!=SQLITE_OK ) return rc;
1.3133 + return content_delete(v, iRow); /* execute an SQL DELETE */
1.3134 +}
1.3135 +
1.3136 +/* Update a row in the %_content table; fill [pTerms] with new doclists for the
1.3137 + * %_term table. */
1.3138 +static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
1.3139 + sqlite3_value **pValues, fts1Hash *pTerms){
1.3140 + /* Generate an empty doclist for each term that previously appeared in this
1.3141 + * row. */
1.3142 + int rc = deleteTerms(v, pTerms, iRow);
1.3143 + if( rc!=SQLITE_OK ) return rc;
1.3144 +
1.3145 + rc = content_update(v, pValues, iRow); /* execute an SQL UPDATE */
1.3146 + if( rc!=SQLITE_OK ) return rc;
1.3147 +
1.3148 + /* Now add positions for terms which appear in the updated row. */
1.3149 + return insertTerms(v, pTerms, iRow, pValues);
1.3150 +}
1.3151 +
1.3152 +/* This function implements the xUpdate callback; it is the top-level entry
1.3153 + * point for inserting, deleting or updating a row in a full-text table. */
1.3154 +static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
1.3155 + sqlite_int64 *pRowid){
1.3156 + fulltext_vtab *v = (fulltext_vtab *) pVtab;
1.3157 + fts1Hash terms; /* maps term string -> PosList */
1.3158 + int rc;
1.3159 + fts1HashElem *e;
1.3160 +
1.3161 + TRACE(("FTS1 Update %p\n", pVtab));
1.3162 +
1.3163 + fts1HashInit(&terms, FTS1_HASH_STRING, 1);
1.3164 +
1.3165 + if( nArg<2 ){
1.3166 + rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
1.3167 + } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
1.3168 + /* An update:
1.3169 + * ppArg[0] = old rowid
1.3170 + * ppArg[1] = new rowid
1.3171 + * ppArg[2..2+v->nColumn-1] = values
1.3172 + * ppArg[2+v->nColumn] = value for magic column (we ignore this)
1.3173 + */
1.3174 + sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
1.3175 + if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
1.3176 + sqlite3_value_int64(ppArg[1]) != rowid ){
1.3177 + rc = SQLITE_ERROR; /* we don't allow changing the rowid */
1.3178 + } else {
1.3179 + assert( nArg==2+v->nColumn+1);
1.3180 + rc = index_update(v, rowid, &ppArg[2], &terms);
1.3181 + }
1.3182 + } else {
1.3183 + /* An insert:
1.3184 + * ppArg[1] = requested rowid
1.3185 + * ppArg[2..2+v->nColumn-1] = values
1.3186 + * ppArg[2+v->nColumn] = value for magic column (we ignore this)
1.3187 + */
1.3188 + assert( nArg==2+v->nColumn+1);
1.3189 + rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
1.3190 + }
1.3191 +
1.3192 + if( rc==SQLITE_OK ){
1.3193 + /* Write updated doclists to disk. */
1.3194 + for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
1.3195 + DocList *p = fts1HashData(e);
1.3196 + rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
1.3197 + if( rc!=SQLITE_OK ) break;
1.3198 + }
1.3199 + }
1.3200 +
1.3201 + /* clean up */
1.3202 + for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
1.3203 + DocList *p = fts1HashData(e);
1.3204 + docListDelete(p);
1.3205 + }
1.3206 + fts1HashClear(&terms);
1.3207 +
1.3208 + return rc;
1.3209 +}
1.3210 +
1.3211 +/*
1.3212 +** Implementation of the snippet() function for FTS1
1.3213 +*/
1.3214 +static void snippetFunc(
1.3215 + sqlite3_context *pContext,
1.3216 + int argc,
1.3217 + sqlite3_value **argv
1.3218 +){
1.3219 + fulltext_cursor *pCursor;
1.3220 + if( argc<1 ) return;
1.3221 + if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
1.3222 + sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
1.3223 + sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
1.3224 + }else{
1.3225 + const char *zStart = "<b>";
1.3226 + const char *zEnd = "</b>";
1.3227 + const char *zEllipsis = "<b>...</b>";
1.3228 + memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
1.3229 + if( argc>=2 ){
1.3230 + zStart = (const char*)sqlite3_value_text(argv[1]);
1.3231 + if( argc>=3 ){
1.3232 + zEnd = (const char*)sqlite3_value_text(argv[2]);
1.3233 + if( argc>=4 ){
1.3234 + zEllipsis = (const char*)sqlite3_value_text(argv[3]);
1.3235 + }
1.3236 + }
1.3237 + }
1.3238 + snippetAllOffsets(pCursor);
1.3239 + snippetText(pCursor, zStart, zEnd, zEllipsis);
1.3240 + sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
1.3241 + pCursor->snippet.nSnippet, SQLITE_STATIC);
1.3242 + }
1.3243 +}
1.3244 +
1.3245 +/*
1.3246 +** Implementation of the offsets() function for FTS1
1.3247 +*/
1.3248 +static void snippetOffsetsFunc(
1.3249 + sqlite3_context *pContext,
1.3250 + int argc,
1.3251 + sqlite3_value **argv
1.3252 +){
1.3253 + fulltext_cursor *pCursor;
1.3254 + if( argc<1 ) return;
1.3255 + if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
1.3256 + sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
1.3257 + sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
1.3258 + }else{
1.3259 + memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
1.3260 + snippetAllOffsets(pCursor);
1.3261 + snippetOffsetText(&pCursor->snippet);
1.3262 + sqlite3_result_text(pContext,
1.3263 + pCursor->snippet.zOffset, pCursor->snippet.nOffset,
1.3264 + SQLITE_STATIC);
1.3265 + }
1.3266 +}
1.3267 +
1.3268 +/*
1.3269 +** This routine implements the xFindFunction method for the FTS1
1.3270 +** virtual table.
1.3271 +*/
1.3272 +static int fulltextFindFunction(
1.3273 + sqlite3_vtab *pVtab,
1.3274 + int nArg,
1.3275 + const char *zName,
1.3276 + void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
1.3277 + void **ppArg
1.3278 +){
1.3279 + if( strcmp(zName,"snippet")==0 ){
1.3280 + *pxFunc = snippetFunc;
1.3281 + return 1;
1.3282 + }else if( strcmp(zName,"offsets")==0 ){
1.3283 + *pxFunc = snippetOffsetsFunc;
1.3284 + return 1;
1.3285 + }
1.3286 + return 0;
1.3287 +}
1.3288 +
1.3289 +/*
1.3290 +** Rename an fts1 table.
1.3291 +*/
1.3292 +static int fulltextRename(
1.3293 + sqlite3_vtab *pVtab,
1.3294 + const char *zName
1.3295 +){
1.3296 + fulltext_vtab *p = (fulltext_vtab *)pVtab;
1.3297 + int rc = SQLITE_NOMEM;
1.3298 + char *zSql = sqlite3_mprintf(
1.3299 + "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';"
1.3300 + "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
1.3301 + , p->zDb, p->zName, zName
1.3302 + , p->zDb, p->zName, zName
1.3303 + );
1.3304 + if( zSql ){
1.3305 + rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
1.3306 + sqlite3_free(zSql);
1.3307 + }
1.3308 + return rc;
1.3309 +}
1.3310 +
1.3311 +static const sqlite3_module fulltextModule = {
1.3312 + /* iVersion */ 0,
1.3313 + /* xCreate */ fulltextCreate,
1.3314 + /* xConnect */ fulltextConnect,
1.3315 + /* xBestIndex */ fulltextBestIndex,
1.3316 + /* xDisconnect */ fulltextDisconnect,
1.3317 + /* xDestroy */ fulltextDestroy,
1.3318 + /* xOpen */ fulltextOpen,
1.3319 + /* xClose */ fulltextClose,
1.3320 + /* xFilter */ fulltextFilter,
1.3321 + /* xNext */ fulltextNext,
1.3322 + /* xEof */ fulltextEof,
1.3323 + /* xColumn */ fulltextColumn,
1.3324 + /* xRowid */ fulltextRowid,
1.3325 + /* xUpdate */ fulltextUpdate,
1.3326 + /* xBegin */ 0,
1.3327 + /* xSync */ 0,
1.3328 + /* xCommit */ 0,
1.3329 + /* xRollback */ 0,
1.3330 + /* xFindFunction */ fulltextFindFunction,
1.3331 + /* xRename */ fulltextRename,
1.3332 +};
1.3333 +
1.3334 +int sqlite3Fts1Init(sqlite3 *db){
1.3335 + sqlite3_overload_function(db, "snippet", -1);
1.3336 + sqlite3_overload_function(db, "offsets", -1);
1.3337 + return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
1.3338 +}
1.3339 +
1.3340 +#if !SQLITE_CORE
1.3341 +int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
1.3342 + const sqlite3_api_routines *pApi){
1.3343 + SQLITE_EXTENSION_INIT2(pApi)
1.3344 + return sqlite3Fts1Init(db);
1.3345 +}
1.3346 +#endif
1.3347 +
1.3348 +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */