os/persistentdata/persistentstorage/sqlite3api/SQLite/fts1.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/SQLite/fts1.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,3345 @@
     1.4 +/* fts1 has a design flaw which can lead to database corruption (see
     1.5 +** below).  It is recommended not to use it any longer, instead use
     1.6 +** fts3 (or higher).  If you believe that your use of fts1 is safe,
     1.7 +** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
     1.8 +*/
     1.9 +#if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \
    1.10 +        && !defined(SQLITE_ENABLE_BROKEN_FTS1)
    1.11 +#error fts1 has a design flaw and has been deprecated.
    1.12 +#endif
    1.13 +/* The flaw is that fts1 uses the content table's unaliased rowid as
    1.14 +** the unique docid.  fts1 embeds the rowid in the index it builds,
    1.15 +** and expects the rowid to not change.  The SQLite VACUUM operation
    1.16 +** will renumber such rowids, thereby breaking fts1.  If you are using
    1.17 +** fts1 in a system which has disabled VACUUM, then you can continue
    1.18 +** to use it safely.  Note that PRAGMA auto_vacuum does NOT disable
    1.19 +** VACUUM, though systems using auto_vacuum are unlikely to invoke
    1.20 +** VACUUM.
    1.21 +**
    1.22 +** fts1 should be safe even across VACUUM if you only insert documents
    1.23 +** and never delete.
    1.24 +*/
    1.25 +
    1.26 +/* The author disclaims copyright to this source code.
    1.27 + *
    1.28 + * This is an SQLite module implementing full-text search.
    1.29 + */
    1.30 +
    1.31 +/*
    1.32 +** The code in this file is only compiled if:
    1.33 +**
    1.34 +**     * The FTS1 module is being built as an extension
    1.35 +**       (in which case SQLITE_CORE is not defined), or
    1.36 +**
    1.37 +**     * The FTS1 module is being built into the core of
    1.38 +**       SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
    1.39 +*/
    1.40 +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
    1.41 +
    1.42 +#if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
    1.43 +# define SQLITE_CORE 1
    1.44 +#endif
    1.45 +
    1.46 +#include <assert.h>
    1.47 +#include <stdlib.h>
    1.48 +#include <stdio.h>
    1.49 +#include <string.h>
    1.50 +#include <ctype.h>
    1.51 +
    1.52 +#include "fts1.h"
    1.53 +#include "fts1_hash.h"
    1.54 +#include "fts1_tokenizer.h"
    1.55 +#include "sqlite3.h"
    1.56 +#include "sqlite3ext.h"
    1.57 +SQLITE_EXTENSION_INIT1
    1.58 +
    1.59 +
    1.60 +#if 0
    1.61 +# define TRACE(A)  printf A; fflush(stdout)
    1.62 +#else
    1.63 +# define TRACE(A)
    1.64 +#endif
    1.65 +
    1.66 +/* utility functions */
    1.67 +
    1.68 +typedef struct StringBuffer {
    1.69 +  int len;      /* length, not including null terminator */
    1.70 +  int alloced;  /* Space allocated for s[] */ 
    1.71 +  char *s;      /* Content of the string */
    1.72 +} StringBuffer;
    1.73 +
    1.74 +static void initStringBuffer(StringBuffer *sb){
    1.75 +  sb->len = 0;
    1.76 +  sb->alloced = 100;
    1.77 +  sb->s = malloc(100);
    1.78 +  sb->s[0] = '\0';
    1.79 +}
    1.80 +
    1.81 +static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
    1.82 +  if( sb->len + nFrom >= sb->alloced ){
    1.83 +    sb->alloced = sb->len + nFrom + 100;
    1.84 +    sb->s = realloc(sb->s, sb->alloced+1);
    1.85 +    if( sb->s==0 ){
    1.86 +      initStringBuffer(sb);
    1.87 +      return;
    1.88 +    }
    1.89 +  }
    1.90 +  memcpy(sb->s + sb->len, zFrom, nFrom);
    1.91 +  sb->len += nFrom;
    1.92 +  sb->s[sb->len] = 0;
    1.93 +}
    1.94 +static void append(StringBuffer *sb, const char *zFrom){
    1.95 +  nappend(sb, zFrom, strlen(zFrom));
    1.96 +}
    1.97 +
    1.98 +/* We encode variable-length integers in little-endian order using seven bits
    1.99 + * per byte as follows:
   1.100 +**
   1.101 +** KEY:
   1.102 +**         A = 0xxxxxxx    7 bits of data and one flag bit
   1.103 +**         B = 1xxxxxxx    7 bits of data and one flag bit
   1.104 +**
   1.105 +**  7 bits - A
   1.106 +** 14 bits - BA
   1.107 +** 21 bits - BBA
   1.108 +** and so on.
   1.109 +*/
   1.110 +
   1.111 +/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
   1.112 +#define VARINT_MAX 10
   1.113 +
   1.114 +/* Write a 64-bit variable-length integer to memory starting at p[0].
   1.115 + * The length of data written will be between 1 and VARINT_MAX bytes.
   1.116 + * The number of bytes written is returned. */
   1.117 +static int putVarint(char *p, sqlite_int64 v){
   1.118 +  unsigned char *q = (unsigned char *) p;
   1.119 +  sqlite_uint64 vu = v;
   1.120 +  do{
   1.121 +    *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
   1.122 +    vu >>= 7;
   1.123 +  }while( vu!=0 );
   1.124 +  q[-1] &= 0x7f;  /* turn off high bit in final byte */
   1.125 +  assert( q - (unsigned char *)p <= VARINT_MAX );
   1.126 +  return (int) (q - (unsigned char *)p);
   1.127 +}
   1.128 +
   1.129 +/* Read a 64-bit variable-length integer from memory starting at p[0].
   1.130 + * Return the number of bytes read, or 0 on error.
   1.131 + * The value is stored in *v. */
   1.132 +static int getVarint(const char *p, sqlite_int64 *v){
   1.133 +  const unsigned char *q = (const unsigned char *) p;
   1.134 +  sqlite_uint64 x = 0, y = 1;
   1.135 +  while( (*q & 0x80) == 0x80 ){
   1.136 +    x += y * (*q++ & 0x7f);
   1.137 +    y <<= 7;
   1.138 +    if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
   1.139 +      assert( 0 );
   1.140 +      return 0;
   1.141 +    }
   1.142 +  }
   1.143 +  x += y * (*q++);
   1.144 +  *v = (sqlite_int64) x;
   1.145 +  return (int) (q - (unsigned char *)p);
   1.146 +}
   1.147 +
   1.148 +static int getVarint32(const char *p, int *pi){
   1.149 + sqlite_int64 i;
   1.150 + int ret = getVarint(p, &i);
   1.151 + *pi = (int) i;
   1.152 + assert( *pi==i );
   1.153 + return ret;
   1.154 +}
   1.155 +
   1.156 +/*** Document lists ***
   1.157 + *
   1.158 + * A document list holds a sorted list of varint-encoded document IDs.
   1.159 + *
   1.160 + * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
   1.161 + *
   1.162 + * array {
   1.163 + *   varint docid;
   1.164 + *   array {
   1.165 + *     varint position;     (delta from previous position plus POS_BASE)
   1.166 + *     varint startOffset;  (delta from previous startOffset)
   1.167 + *     varint endOffset;    (delta from startOffset)
   1.168 + *   }
   1.169 + * }
   1.170 + *
   1.171 + * Here, array { X } means zero or more occurrences of X, adjacent in memory.
   1.172 + *
   1.173 + * A position list may hold positions for text in multiple columns.  A position
   1.174 + * POS_COLUMN is followed by a varint containing the index of the column for
   1.175 + * following positions in the list.  Any positions appearing before any
   1.176 + * occurrences of POS_COLUMN are for column 0.
   1.177 + *
   1.178 + * A doclist with type DL_POSITIONS is like the above, but holds only docids
   1.179 + * and positions without offset information.
   1.180 + *
   1.181 + * A doclist with type DL_DOCIDS is like the above, but holds only docids
   1.182 + * without positions or offset information.
   1.183 + *
   1.184 + * On disk, every document list has positions and offsets, so we don't bother
   1.185 + * to serialize a doclist's type.
   1.186 + * 
   1.187 + * We don't yet delta-encode document IDs; doing so will probably be a
   1.188 + * modest win.
   1.189 + *
   1.190 + * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
   1.191 + * After the first offset, estimate the next offset by using the
   1.192 + * current token position and the previous token position and offset,
   1.193 + * offset to handle some variance.  So the estimate would be
   1.194 + * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
   1.195 + * as normal.  Offsets more than 64 chars from the estimate are
   1.196 + * encoded as the delta to the previous start offset + 128.  An
   1.197 + * additional tiny increment can be gained by using the end offset of
   1.198 + * the previous token to make the estimate a tiny bit more precise.
   1.199 +*/
   1.200 +
   1.201 +/* It is not safe to call isspace(), tolower(), or isalnum() on
   1.202 +** hi-bit-set characters.  This is the same solution used in the
   1.203 +** tokenizer.
   1.204 +*/
   1.205 +/* TODO(shess) The snippet-generation code should be using the
   1.206 +** tokenizer-generated tokens rather than doing its own local
   1.207 +** tokenization.
   1.208 +*/
   1.209 +/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
   1.210 +static int safe_isspace(char c){
   1.211 +  return (c&0x80)==0 ? isspace(c) : 0;
   1.212 +}
   1.213 +static int safe_tolower(char c){
   1.214 +  return (c&0x80)==0 ? tolower(c) : c;
   1.215 +}
   1.216 +static int safe_isalnum(char c){
   1.217 +  return (c&0x80)==0 ? isalnum(c) : 0;
   1.218 +}
   1.219 +
   1.220 +typedef enum DocListType {
   1.221 +  DL_DOCIDS,              /* docids only */
   1.222 +  DL_POSITIONS,           /* docids + positions */
   1.223 +  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
   1.224 +} DocListType;
   1.225 +
   1.226 +/*
   1.227 +** By default, only positions and not offsets are stored in the doclists.
   1.228 +** To change this so that offsets are stored too, compile with
   1.229 +**
   1.230 +**          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
   1.231 +**
   1.232 +*/
   1.233 +#ifndef DL_DEFAULT
   1.234 +# define DL_DEFAULT DL_POSITIONS
   1.235 +#endif
   1.236 +
   1.237 +typedef struct DocList {
   1.238 +  char *pData;
   1.239 +  int nData;
   1.240 +  DocListType iType;
   1.241 +  int iLastColumn;    /* the last column written */
   1.242 +  int iLastPos;       /* the last position written */
   1.243 +  int iLastOffset;    /* the last start offset written */
   1.244 +} DocList;
   1.245 +
   1.246 +enum {
   1.247 +  POS_END = 0,        /* end of this position list */
   1.248 +  POS_COLUMN,         /* followed by new column number */
   1.249 +  POS_BASE
   1.250 +};
   1.251 +
   1.252 +/* Initialize a new DocList to hold the given data. */
   1.253 +static void docListInit(DocList *d, DocListType iType,
   1.254 +                        const char *pData, int nData){
   1.255 +  d->nData = nData;
   1.256 +  if( nData>0 ){
   1.257 +    d->pData = malloc(nData);
   1.258 +    memcpy(d->pData, pData, nData);
   1.259 +  } else {
   1.260 +    d->pData = NULL;
   1.261 +  }
   1.262 +  d->iType = iType;
   1.263 +  d->iLastColumn = 0;
   1.264 +  d->iLastPos = d->iLastOffset = 0;
   1.265 +}
   1.266 +
   1.267 +/* Create a new dynamically-allocated DocList. */
   1.268 +static DocList *docListNew(DocListType iType){
   1.269 +  DocList *d = (DocList *) malloc(sizeof(DocList));
   1.270 +  docListInit(d, iType, 0, 0);
   1.271 +  return d;
   1.272 +}
   1.273 +
   1.274 +static void docListDestroy(DocList *d){
   1.275 +  free(d->pData);
   1.276 +#ifndef NDEBUG
   1.277 +  memset(d, 0x55, sizeof(*d));
   1.278 +#endif
   1.279 +}
   1.280 +
   1.281 +static void docListDelete(DocList *d){
   1.282 +  docListDestroy(d);
   1.283 +  free(d);
   1.284 +}
   1.285 +
   1.286 +static char *docListEnd(DocList *d){
   1.287 +  return d->pData + d->nData;
   1.288 +}
   1.289 +
   1.290 +/* Append a varint to a DocList's data. */
   1.291 +static void appendVarint(DocList *d, sqlite_int64 i){
   1.292 +  char c[VARINT_MAX];
   1.293 +  int n = putVarint(c, i);
   1.294 +  d->pData = realloc(d->pData, d->nData + n);
   1.295 +  memcpy(d->pData + d->nData, c, n);
   1.296 +  d->nData += n;
   1.297 +}
   1.298 +
   1.299 +static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
   1.300 +  appendVarint(d, iDocid);
   1.301 +  if( d->iType>=DL_POSITIONS ){
   1.302 +    appendVarint(d, POS_END);  /* initially empty position list */
   1.303 +    d->iLastColumn = 0;
   1.304 +    d->iLastPos = d->iLastOffset = 0;
   1.305 +  }
   1.306 +}
   1.307 +
   1.308 +/* helper function for docListAddPos and docListAddPosOffset */
   1.309 +static void addPos(DocList *d, int iColumn, int iPos){
   1.310 +  assert( d->nData>0 );
   1.311 +  --d->nData;  /* remove previous terminator */
   1.312 +  if( iColumn!=d->iLastColumn ){
   1.313 +    assert( iColumn>d->iLastColumn );
   1.314 +    appendVarint(d, POS_COLUMN);
   1.315 +    appendVarint(d, iColumn);
   1.316 +    d->iLastColumn = iColumn;
   1.317 +    d->iLastPos = d->iLastOffset = 0;
   1.318 +  }
   1.319 +  assert( iPos>=d->iLastPos );
   1.320 +  appendVarint(d, iPos-d->iLastPos+POS_BASE);
   1.321 +  d->iLastPos = iPos;
   1.322 +}
   1.323 +
   1.324 +/* Add a position to the last position list in a doclist. */
   1.325 +static void docListAddPos(DocList *d, int iColumn, int iPos){
   1.326 +  assert( d->iType==DL_POSITIONS );
   1.327 +  addPos(d, iColumn, iPos);
   1.328 +  appendVarint(d, POS_END);  /* add new terminator */
   1.329 +}
   1.330 +
   1.331 +/*
   1.332 +** Add a position and starting and ending offsets to a doclist.
   1.333 +**
   1.334 +** If the doclist is setup to handle only positions, then insert
   1.335 +** the position only and ignore the offsets.
   1.336 +*/
   1.337 +static void docListAddPosOffset(
   1.338 +  DocList *d,             /* Doclist under construction */
   1.339 +  int iColumn,            /* Column the inserted term is part of */
   1.340 +  int iPos,               /* Position of the inserted term */
   1.341 +  int iStartOffset,       /* Starting offset of inserted term */
   1.342 +  int iEndOffset          /* Ending offset of inserted term */
   1.343 +){
   1.344 +  assert( d->iType>=DL_POSITIONS );
   1.345 +  addPos(d, iColumn, iPos);
   1.346 +  if( d->iType==DL_POSITIONS_OFFSETS ){
   1.347 +    assert( iStartOffset>=d->iLastOffset );
   1.348 +    appendVarint(d, iStartOffset-d->iLastOffset);
   1.349 +    d->iLastOffset = iStartOffset;
   1.350 +    assert( iEndOffset>=iStartOffset );
   1.351 +    appendVarint(d, iEndOffset-iStartOffset);
   1.352 +  }
   1.353 +  appendVarint(d, POS_END);  /* add new terminator */
   1.354 +}
   1.355 +
   1.356 +/*
   1.357 +** A DocListReader object is a cursor into a doclist.  Initialize
   1.358 +** the cursor to the beginning of the doclist by calling readerInit().
   1.359 +** Then use routines
   1.360 +**
   1.361 +**      peekDocid()
   1.362 +**      readDocid()
   1.363 +**      readPosition()
   1.364 +**      skipPositionList()
   1.365 +**      and so forth...
   1.366 +**
   1.367 +** to read information out of the doclist.  When we reach the end
   1.368 +** of the doclist, atEnd() returns TRUE.
   1.369 +*/
   1.370 +typedef struct DocListReader {
   1.371 +  DocList *pDoclist;  /* The document list we are stepping through */
   1.372 +  char *p;            /* Pointer to next unread byte in the doclist */
   1.373 +  int iLastColumn;
   1.374 +  int iLastPos;  /* the last position read, or -1 when not in a position list */
   1.375 +} DocListReader;
   1.376 +
   1.377 +/*
   1.378 +** Initialize the DocListReader r to point to the beginning of pDoclist.
   1.379 +*/
   1.380 +static void readerInit(DocListReader *r, DocList *pDoclist){
   1.381 +  r->pDoclist = pDoclist;
   1.382 +  if( pDoclist!=NULL ){
   1.383 +    r->p = pDoclist->pData;
   1.384 +  }
   1.385 +  r->iLastColumn = -1;
   1.386 +  r->iLastPos = -1;
   1.387 +}
   1.388 +
   1.389 +/*
   1.390 +** Return TRUE if we have reached then end of pReader and there is
   1.391 +** nothing else left to read.
   1.392 +*/
   1.393 +static int atEnd(DocListReader *pReader){
   1.394 +  return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
   1.395 +}
   1.396 +
   1.397 +/* Peek at the next docid without advancing the read pointer. 
   1.398 +*/
   1.399 +static sqlite_int64 peekDocid(DocListReader *pReader){
   1.400 +  sqlite_int64 ret;
   1.401 +  assert( !atEnd(pReader) );
   1.402 +  assert( pReader->iLastPos==-1 );
   1.403 +  getVarint(pReader->p, &ret);
   1.404 +  return ret;
   1.405 +}
   1.406 +
   1.407 +/* Read the next docid.   See also nextDocid().
   1.408 +*/
   1.409 +static sqlite_int64 readDocid(DocListReader *pReader){
   1.410 +  sqlite_int64 ret;
   1.411 +  assert( !atEnd(pReader) );
   1.412 +  assert( pReader->iLastPos==-1 );
   1.413 +  pReader->p += getVarint(pReader->p, &ret);
   1.414 +  if( pReader->pDoclist->iType>=DL_POSITIONS ){
   1.415 +    pReader->iLastColumn = 0;
   1.416 +    pReader->iLastPos = 0;
   1.417 +  }
   1.418 +  return ret;
   1.419 +}
   1.420 +
   1.421 +/* Read the next position and column index from a position list.
   1.422 + * Returns the position, or -1 at the end of the list. */
   1.423 +static int readPosition(DocListReader *pReader, int *iColumn){
   1.424 +  int i;
   1.425 +  int iType = pReader->pDoclist->iType;
   1.426 +
   1.427 +  if( pReader->iLastPos==-1 ){
   1.428 +    return -1;
   1.429 +  }
   1.430 +  assert( !atEnd(pReader) );
   1.431 +
   1.432 +  if( iType<DL_POSITIONS ){
   1.433 +    return -1;
   1.434 +  }
   1.435 +  pReader->p += getVarint32(pReader->p, &i);
   1.436 +  if( i==POS_END ){
   1.437 +    pReader->iLastColumn = pReader->iLastPos = -1;
   1.438 +    *iColumn = -1;
   1.439 +    return -1;
   1.440 +  }
   1.441 +  if( i==POS_COLUMN ){
   1.442 +    pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
   1.443 +    pReader->iLastPos = 0;
   1.444 +    pReader->p += getVarint32(pReader->p, &i);
   1.445 +    assert( i>=POS_BASE );
   1.446 +  }
   1.447 +  pReader->iLastPos += ((int) i)-POS_BASE;
   1.448 +  if( iType>=DL_POSITIONS_OFFSETS ){
   1.449 +    /* Skip over offsets, ignoring them for now. */
   1.450 +    int iStart, iEnd;
   1.451 +    pReader->p += getVarint32(pReader->p, &iStart);
   1.452 +    pReader->p += getVarint32(pReader->p, &iEnd);
   1.453 +  }
   1.454 +  *iColumn = pReader->iLastColumn;
   1.455 +  return pReader->iLastPos;
   1.456 +}
   1.457 +
   1.458 +/* Skip past the end of a position list. */
   1.459 +static void skipPositionList(DocListReader *pReader){
   1.460 +  DocList *p = pReader->pDoclist;
   1.461 +  if( p && p->iType>=DL_POSITIONS ){
   1.462 +    int iColumn;
   1.463 +    while( readPosition(pReader, &iColumn)!=-1 ){}
   1.464 +  }
   1.465 +}
   1.466 +
   1.467 +/* Skip over a docid, including its position list if the doclist has
   1.468 + * positions. */
   1.469 +static void skipDocument(DocListReader *pReader){
   1.470 +  readDocid(pReader);
   1.471 +  skipPositionList(pReader);
   1.472 +}
   1.473 +
   1.474 +/* Skip past all docids which are less than [iDocid].  Returns 1 if a docid
   1.475 + * matching [iDocid] was found.  */
   1.476 +static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
   1.477 +  sqlite_int64 d = 0;
   1.478 +  while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
   1.479 +    skipDocument(pReader);
   1.480 +  }
   1.481 +  return !atEnd(pReader) && d==iDocid;
   1.482 +}
   1.483 +
   1.484 +/* Return the first document in a document list.
   1.485 +*/
   1.486 +static sqlite_int64 firstDocid(DocList *d){
   1.487 +  DocListReader r;
   1.488 +  readerInit(&r, d);
   1.489 +  return readDocid(&r);
   1.490 +}
   1.491 +
   1.492 +#ifdef SQLITE_DEBUG
   1.493 +/*
   1.494 +** This routine is used for debugging purpose only.
   1.495 +**
   1.496 +** Write the content of a doclist to standard output.
   1.497 +*/
   1.498 +static void printDoclist(DocList *p){
   1.499 +  DocListReader r;
   1.500 +  const char *zSep = "";
   1.501 +
   1.502 +  readerInit(&r, p);
   1.503 +  while( !atEnd(&r) ){
   1.504 +    sqlite_int64 docid = readDocid(&r);
   1.505 +    if( docid==0 ){
   1.506 +      skipPositionList(&r);
   1.507 +      continue;
   1.508 +    }
   1.509 +    printf("%s%lld", zSep, docid);
   1.510 +    zSep =  ",";
   1.511 +    if( p->iType>=DL_POSITIONS ){
   1.512 +      int iPos, iCol;
   1.513 +      const char *zDiv = "";
   1.514 +      printf("(");
   1.515 +      while( (iPos = readPosition(&r, &iCol))>=0 ){
   1.516 +        printf("%s%d:%d", zDiv, iCol, iPos);
   1.517 +        zDiv = ":";
   1.518 +      }
   1.519 +      printf(")");
   1.520 +    }
   1.521 +  }
   1.522 +  printf("\n");
   1.523 +  fflush(stdout);
   1.524 +}
   1.525 +#endif /* SQLITE_DEBUG */
   1.526 +
   1.527 +/* Trim the given doclist to contain only positions in column
   1.528 + * [iRestrictColumn]. */
   1.529 +static void docListRestrictColumn(DocList *in, int iRestrictColumn){
   1.530 +  DocListReader r;
   1.531 +  DocList out;
   1.532 +
   1.533 +  assert( in->iType>=DL_POSITIONS );
   1.534 +  readerInit(&r, in);
   1.535 +  docListInit(&out, DL_POSITIONS, NULL, 0);
   1.536 +
   1.537 +  while( !atEnd(&r) ){
   1.538 +    sqlite_int64 iDocid = readDocid(&r);
   1.539 +    int iPos, iColumn;
   1.540 +
   1.541 +    docListAddDocid(&out, iDocid);
   1.542 +    while( (iPos = readPosition(&r, &iColumn)) != -1 ){
   1.543 +      if( iColumn==iRestrictColumn ){
   1.544 +        docListAddPos(&out, iColumn, iPos);
   1.545 +      }
   1.546 +    }
   1.547 +  }
   1.548 +
   1.549 +  docListDestroy(in);
   1.550 +  *in = out;
   1.551 +}
   1.552 +
   1.553 +/* Trim the given doclist by discarding any docids without any remaining
   1.554 + * positions. */
   1.555 +static void docListDiscardEmpty(DocList *in) {
   1.556 +  DocListReader r;
   1.557 +  DocList out;
   1.558 +
   1.559 +  /* TODO: It would be nice to implement this operation in place; that
   1.560 +   * could save a significant amount of memory in queries with long doclists. */
   1.561 +  assert( in->iType>=DL_POSITIONS );
   1.562 +  readerInit(&r, in);
   1.563 +  docListInit(&out, DL_POSITIONS, NULL, 0);
   1.564 +
   1.565 +  while( !atEnd(&r) ){
   1.566 +    sqlite_int64 iDocid = readDocid(&r);
   1.567 +    int match = 0;
   1.568 +    int iPos, iColumn;
   1.569 +    while( (iPos = readPosition(&r, &iColumn)) != -1 ){
   1.570 +      if( !match ){
   1.571 +        docListAddDocid(&out, iDocid);
   1.572 +        match = 1;
   1.573 +      }
   1.574 +      docListAddPos(&out, iColumn, iPos);
   1.575 +    }
   1.576 +  }
   1.577 +
   1.578 +  docListDestroy(in);
   1.579 +  *in = out;
   1.580 +}
   1.581 +
   1.582 +/* Helper function for docListUpdate() and docListAccumulate().
   1.583 +** Splices a doclist element into the doclist represented by r,
   1.584 +** leaving r pointing after the newly spliced element.
   1.585 +*/
   1.586 +static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
   1.587 +                                 const char *pSource, int nSource){
   1.588 +  DocList *d = r->pDoclist;
   1.589 +  char *pTarget;
   1.590 +  int nTarget, found;
   1.591 +
   1.592 +  found = skipToDocid(r, iDocid);
   1.593 +
   1.594 +  /* Describe slice in d to place pSource/nSource. */
   1.595 +  pTarget = r->p;
   1.596 +  if( found ){
   1.597 +    skipDocument(r);
   1.598 +    nTarget = r->p-pTarget;
   1.599 +  }else{
   1.600 +    nTarget = 0;
   1.601 +  }
   1.602 +
   1.603 +  /* The sense of the following is that there are three possibilities.
   1.604 +  ** If nTarget==nSource, we should not move any memory nor realloc.
   1.605 +  ** If nTarget>nSource, trim target and realloc.
   1.606 +  ** If nTarget<nSource, realloc then expand target.
   1.607 +  */
   1.608 +  if( nTarget>nSource ){
   1.609 +    memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
   1.610 +  }
   1.611 +  if( nTarget!=nSource ){
   1.612 +    int iDoclist = pTarget-d->pData;
   1.613 +    d->pData = realloc(d->pData, d->nData+nSource-nTarget);
   1.614 +    pTarget = d->pData+iDoclist;
   1.615 +  }
   1.616 +  if( nTarget<nSource ){
   1.617 +    memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
   1.618 +  }
   1.619 +
   1.620 +  memcpy(pTarget, pSource, nSource);
   1.621 +  d->nData += nSource-nTarget;
   1.622 +  r->p = pTarget+nSource;
   1.623 +}
   1.624 +
   1.625 +/* Insert/update pUpdate into the doclist. */
   1.626 +static void docListUpdate(DocList *d, DocList *pUpdate){
   1.627 +  DocListReader reader;
   1.628 +
   1.629 +  assert( d!=NULL && pUpdate!=NULL );
   1.630 +  assert( d->iType==pUpdate->iType);
   1.631 +
   1.632 +  readerInit(&reader, d);
   1.633 +  docListSpliceElement(&reader, firstDocid(pUpdate),
   1.634 +                       pUpdate->pData, pUpdate->nData);
   1.635 +}
   1.636 +
   1.637 +/* Propagate elements from pUpdate to pAcc, overwriting elements with
   1.638 +** matching docids.
   1.639 +*/
   1.640 +static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
   1.641 +  DocListReader accReader, updateReader;
   1.642 +
   1.643 +  /* Handle edge cases where one doclist is empty. */
   1.644 +  assert( pAcc!=NULL );
   1.645 +  if( pUpdate==NULL || pUpdate->nData==0 ) return;
   1.646 +  if( pAcc->nData==0 ){
   1.647 +    pAcc->pData = malloc(pUpdate->nData);
   1.648 +    memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
   1.649 +    pAcc->nData = pUpdate->nData;
   1.650 +    return;
   1.651 +  }
   1.652 +
   1.653 +  readerInit(&accReader, pAcc);
   1.654 +  readerInit(&updateReader, pUpdate);
   1.655 +
   1.656 +  while( !atEnd(&updateReader) ){
   1.657 +    char *pSource = updateReader.p;
   1.658 +    sqlite_int64 iDocid = readDocid(&updateReader);
   1.659 +    skipPositionList(&updateReader);
   1.660 +    docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
   1.661 +  }
   1.662 +}
   1.663 +
   1.664 +/*
   1.665 +** Read the next docid off of pIn.  Return 0 if we reach the end.
   1.666 +*
   1.667 +* TODO: This assumes that docids are never 0, but they may actually be 0 since
   1.668 +* users can choose docids when inserting into a full-text table.  Fix this.
   1.669 +*/
   1.670 +static sqlite_int64 nextDocid(DocListReader *pIn){
   1.671 +  skipPositionList(pIn);
   1.672 +  return atEnd(pIn) ? 0 : readDocid(pIn);
   1.673 +}
   1.674 +
   1.675 +/*
   1.676 +** pLeft and pRight are two DocListReaders that are pointing to
   1.677 +** positions lists of the same document: iDocid. 
   1.678 +**
   1.679 +** If there are no instances in pLeft or pRight where the position
   1.680 +** of pLeft is one less than the position of pRight, then this
   1.681 +** routine adds nothing to pOut.
   1.682 +**
   1.683 +** If there are one or more instances where positions from pLeft
   1.684 +** are exactly one less than positions from pRight, then add a new
   1.685 +** document record to pOut.  If pOut wants to hold positions, then
   1.686 +** include the positions from pRight that are one more than a
   1.687 +** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.
   1.688 +**
   1.689 +** pLeft and pRight are left pointing at the next document record.
   1.690 +*/
   1.691 +static void mergePosList(
   1.692 +  DocListReader *pLeft,    /* Left position list */
   1.693 +  DocListReader *pRight,   /* Right position list */
   1.694 +  sqlite_int64 iDocid,     /* The docid from pLeft and pRight */
   1.695 +  DocList *pOut            /* Write the merged document record here */
   1.696 +){
   1.697 +  int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
   1.698 +  int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
   1.699 +  int match = 0;
   1.700 +
   1.701 +  /* Loop until we've reached the end of both position lists. */
   1.702 +  while( iLeftPos!=-1 && iRightPos!=-1 ){
   1.703 +    if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
   1.704 +      if( !match ){
   1.705 +        docListAddDocid(pOut, iDocid);
   1.706 +        match = 1;
   1.707 +      }
   1.708 +      if( pOut->iType>=DL_POSITIONS ){
   1.709 +        docListAddPos(pOut, iRightCol, iRightPos);
   1.710 +      }
   1.711 +      iLeftPos = readPosition(pLeft, &iLeftCol);
   1.712 +      iRightPos = readPosition(pRight, &iRightCol);
   1.713 +    }else if( iRightCol<iLeftCol ||
   1.714 +              (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
   1.715 +      iRightPos = readPosition(pRight, &iRightCol);
   1.716 +    }else{
   1.717 +      iLeftPos = readPosition(pLeft, &iLeftCol);
   1.718 +    }
   1.719 +  }
   1.720 +  if( iLeftPos>=0 ) skipPositionList(pLeft);
   1.721 +  if( iRightPos>=0 ) skipPositionList(pRight);
   1.722 +}
   1.723 +
   1.724 +/* We have two doclists:  pLeft and pRight.
   1.725 +** Write the phrase intersection of these two doclists into pOut.
   1.726 +**
   1.727 +** A phrase intersection means that two documents only match
   1.728 +** if pLeft.iPos+1==pRight.iPos.
   1.729 +**
   1.730 +** The output pOut may or may not contain positions.  If pOut
   1.731 +** does contain positions, they are the positions of pRight.
   1.732 +*/
   1.733 +static void docListPhraseMerge(
   1.734 +  DocList *pLeft,    /* Doclist resulting from the words on the left */
   1.735 +  DocList *pRight,   /* Doclist for the next word to the right */
   1.736 +  DocList *pOut      /* Write the combined doclist here */
   1.737 +){
   1.738 +  DocListReader left, right;
   1.739 +  sqlite_int64 docidLeft, docidRight;
   1.740 +
   1.741 +  readerInit(&left, pLeft);
   1.742 +  readerInit(&right, pRight);
   1.743 +  docidLeft = nextDocid(&left);
   1.744 +  docidRight = nextDocid(&right);
   1.745 +
   1.746 +  while( docidLeft>0 && docidRight>0 ){
   1.747 +    if( docidLeft<docidRight ){
   1.748 +      docidLeft = nextDocid(&left);
   1.749 +    }else if( docidRight<docidLeft ){
   1.750 +      docidRight = nextDocid(&right);
   1.751 +    }else{
   1.752 +      mergePosList(&left, &right, docidLeft, pOut);
   1.753 +      docidLeft = nextDocid(&left);
   1.754 +      docidRight = nextDocid(&right);
   1.755 +    }
   1.756 +  }
   1.757 +}
   1.758 +
   1.759 +/* We have two doclists:  pLeft and pRight.
   1.760 +** Write the intersection of these two doclists into pOut.
   1.761 +** Only docids are matched.  Position information is ignored.
   1.762 +**
   1.763 +** The output pOut never holds positions.
   1.764 +*/
   1.765 +static void docListAndMerge(
   1.766 +  DocList *pLeft,    /* Doclist resulting from the words on the left */
   1.767 +  DocList *pRight,   /* Doclist for the next word to the right */
   1.768 +  DocList *pOut      /* Write the combined doclist here */
   1.769 +){
   1.770 +  DocListReader left, right;
   1.771 +  sqlite_int64 docidLeft, docidRight;
   1.772 +
   1.773 +  assert( pOut->iType<DL_POSITIONS );
   1.774 +
   1.775 +  readerInit(&left, pLeft);
   1.776 +  readerInit(&right, pRight);
   1.777 +  docidLeft = nextDocid(&left);
   1.778 +  docidRight = nextDocid(&right);
   1.779 +
   1.780 +  while( docidLeft>0 && docidRight>0 ){
   1.781 +    if( docidLeft<docidRight ){
   1.782 +      docidLeft = nextDocid(&left);
   1.783 +    }else if( docidRight<docidLeft ){
   1.784 +      docidRight = nextDocid(&right);
   1.785 +    }else{
   1.786 +      docListAddDocid(pOut, docidLeft);
   1.787 +      docidLeft = nextDocid(&left);
   1.788 +      docidRight = nextDocid(&right);
   1.789 +    }
   1.790 +  }
   1.791 +}
   1.792 +
   1.793 +/* We have two doclists:  pLeft and pRight.
   1.794 +** Write the union of these two doclists into pOut.
   1.795 +** Only docids are matched.  Position information is ignored.
   1.796 +**
   1.797 +** The output pOut never holds positions.
   1.798 +*/
   1.799 +static void docListOrMerge(
   1.800 +  DocList *pLeft,    /* Doclist resulting from the words on the left */
   1.801 +  DocList *pRight,   /* Doclist for the next word to the right */
   1.802 +  DocList *pOut      /* Write the combined doclist here */
   1.803 +){
   1.804 +  DocListReader left, right;
   1.805 +  sqlite_int64 docidLeft, docidRight, priorLeft;
   1.806 +
   1.807 +  readerInit(&left, pLeft);
   1.808 +  readerInit(&right, pRight);
   1.809 +  docidLeft = nextDocid(&left);
   1.810 +  docidRight = nextDocid(&right);
   1.811 +
   1.812 +  while( docidLeft>0 && docidRight>0 ){
   1.813 +    if( docidLeft<=docidRight ){
   1.814 +      docListAddDocid(pOut, docidLeft);
   1.815 +    }else{
   1.816 +      docListAddDocid(pOut, docidRight);
   1.817 +    }
   1.818 +    priorLeft = docidLeft;
   1.819 +    if( docidLeft<=docidRight ){
   1.820 +      docidLeft = nextDocid(&left);
   1.821 +    }
   1.822 +    if( docidRight>0 && docidRight<=priorLeft ){
   1.823 +      docidRight = nextDocid(&right);
   1.824 +    }
   1.825 +  }
   1.826 +  while( docidLeft>0 ){
   1.827 +    docListAddDocid(pOut, docidLeft);
   1.828 +    docidLeft = nextDocid(&left);
   1.829 +  }
   1.830 +  while( docidRight>0 ){
   1.831 +    docListAddDocid(pOut, docidRight);
   1.832 +    docidRight = nextDocid(&right);
   1.833 +  }
   1.834 +}
   1.835 +
   1.836 +/* We have two doclists:  pLeft and pRight.
   1.837 +** Write into pOut all documents that occur in pLeft but not
   1.838 +** in pRight.
   1.839 +**
   1.840 +** Only docids are matched.  Position information is ignored.
   1.841 +**
   1.842 +** The output pOut never holds positions.
   1.843 +*/
   1.844 +static void docListExceptMerge(
   1.845 +  DocList *pLeft,    /* Doclist resulting from the words on the left */
   1.846 +  DocList *pRight,   /* Doclist for the next word to the right */
   1.847 +  DocList *pOut      /* Write the combined doclist here */
   1.848 +){
   1.849 +  DocListReader left, right;
   1.850 +  sqlite_int64 docidLeft, docidRight, priorLeft;
   1.851 +
   1.852 +  readerInit(&left, pLeft);
   1.853 +  readerInit(&right, pRight);
   1.854 +  docidLeft = nextDocid(&left);
   1.855 +  docidRight = nextDocid(&right);
   1.856 +
   1.857 +  while( docidLeft>0 && docidRight>0 ){
   1.858 +    priorLeft = docidLeft;
   1.859 +    if( docidLeft<docidRight ){
   1.860 +      docListAddDocid(pOut, docidLeft);
   1.861 +    }
   1.862 +    if( docidLeft<=docidRight ){
   1.863 +      docidLeft = nextDocid(&left);
   1.864 +    }
   1.865 +    if( docidRight>0 && docidRight<=priorLeft ){
   1.866 +      docidRight = nextDocid(&right);
   1.867 +    }
   1.868 +  }
   1.869 +  while( docidLeft>0 ){
   1.870 +    docListAddDocid(pOut, docidLeft);
   1.871 +    docidLeft = nextDocid(&left);
   1.872 +  }
   1.873 +}
   1.874 +
   1.875 +static char *string_dup_n(const char *s, int n){
   1.876 +  char *str = malloc(n + 1);
   1.877 +  memcpy(str, s, n);
   1.878 +  str[n] = '\0';
   1.879 +  return str;
   1.880 +}
   1.881 +
   1.882 +/* Duplicate a string; the caller must free() the returned string.
   1.883 + * (We don't use strdup() since it is not part of the standard C library and
   1.884 + * may not be available everywhere.) */
   1.885 +static char *string_dup(const char *s){
   1.886 +  return string_dup_n(s, strlen(s));
   1.887 +}
   1.888 +
   1.889 +/* Format a string, replacing each occurrence of the % character with
   1.890 + * zDb.zName.  This may be more convenient than sqlite_mprintf()
   1.891 + * when one string is used repeatedly in a format string.
   1.892 + * The caller must free() the returned string. */
   1.893 +static char *string_format(const char *zFormat,
   1.894 +                           const char *zDb, const char *zName){
   1.895 +  const char *p;
   1.896 +  size_t len = 0;
   1.897 +  size_t nDb = strlen(zDb);
   1.898 +  size_t nName = strlen(zName);
   1.899 +  size_t nFullTableName = nDb+1+nName;
   1.900 +  char *result;
   1.901 +  char *r;
   1.902 +
   1.903 +  /* first compute length needed */
   1.904 +  for(p = zFormat ; *p ; ++p){
   1.905 +    len += (*p=='%' ? nFullTableName : 1);
   1.906 +  }
   1.907 +  len += 1;  /* for null terminator */
   1.908 +
   1.909 +  r = result = malloc(len);
   1.910 +  for(p = zFormat; *p; ++p){
   1.911 +    if( *p=='%' ){
   1.912 +      memcpy(r, zDb, nDb);
   1.913 +      r += nDb;
   1.914 +      *r++ = '.';
   1.915 +      memcpy(r, zName, nName);
   1.916 +      r += nName;
   1.917 +    } else {
   1.918 +      *r++ = *p;
   1.919 +    }
   1.920 +  }
   1.921 +  *r++ = '\0';
   1.922 +  assert( r == result + len );
   1.923 +  return result;
   1.924 +}
   1.925 +
   1.926 +static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
   1.927 +                    const char *zFormat){
   1.928 +  char *zCommand = string_format(zFormat, zDb, zName);
   1.929 +  int rc;
   1.930 +  TRACE(("FTS1 sql: %s\n", zCommand));
   1.931 +  rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
   1.932 +  free(zCommand);
   1.933 +  return rc;
   1.934 +}
   1.935 +
   1.936 +static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
   1.937 +                       sqlite3_stmt **ppStmt, const char *zFormat){
   1.938 +  char *zCommand = string_format(zFormat, zDb, zName);
   1.939 +  int rc;
   1.940 +  TRACE(("FTS1 prepare: %s\n", zCommand));
   1.941 +  rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
   1.942 +  free(zCommand);
   1.943 +  return rc;
   1.944 +}
   1.945 +
   1.946 +/* end utility functions */
   1.947 +
   1.948 +/* Forward reference */
   1.949 +typedef struct fulltext_vtab fulltext_vtab;
   1.950 +
   1.951 +/* A single term in a query is represented by an instances of
   1.952 +** the following structure.
   1.953 +*/
   1.954 +typedef struct QueryTerm {
   1.955 +  short int nPhrase; /* How many following terms are part of the same phrase */
   1.956 +  short int iPhrase; /* This is the i-th term of a phrase. */
   1.957 +  short int iColumn; /* Column of the index that must match this term */
   1.958 +  signed char isOr;  /* this term is preceded by "OR" */
   1.959 +  signed char isNot; /* this term is preceded by "-" */
   1.960 +  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
   1.961 +  int nTerm;         /* Number of bytes in pTerm[] */
   1.962 +} QueryTerm;
   1.963 +
   1.964 +
   1.965 +/* A query string is parsed into a Query structure.
   1.966 + *
   1.967 + * We could, in theory, allow query strings to be complicated
   1.968 + * nested expressions with precedence determined by parentheses.
   1.969 + * But none of the major search engines do this.  (Perhaps the
   1.970 + * feeling is that an parenthesized expression is two complex of
   1.971 + * an idea for the average user to grasp.)  Taking our lead from
   1.972 + * the major search engines, we will allow queries to be a list
   1.973 + * of terms (with an implied AND operator) or phrases in double-quotes,
   1.974 + * with a single optional "-" before each non-phrase term to designate
   1.975 + * negation and an optional OR connector.
   1.976 + *
   1.977 + * OR binds more tightly than the implied AND, which is what the
   1.978 + * major search engines seem to do.  So, for example:
   1.979 + * 
   1.980 + *    [one two OR three]     ==>    one AND (two OR three)
   1.981 + *    [one OR two three]     ==>    (one OR two) AND three
   1.982 + *
   1.983 + * A "-" before a term matches all entries that lack that term.
   1.984 + * The "-" must occur immediately before the term with in intervening
   1.985 + * space.  This is how the search engines do it.
   1.986 + *
   1.987 + * A NOT term cannot be the right-hand operand of an OR.  If this
   1.988 + * occurs in the query string, the NOT is ignored:
   1.989 + *
   1.990 + *    [one OR -two]          ==>    one OR two
   1.991 + *
   1.992 + */
   1.993 +typedef struct Query {
   1.994 +  fulltext_vtab *pFts;  /* The full text index */
   1.995 +  int nTerms;           /* Number of terms in the query */
   1.996 +  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
   1.997 +  int nextIsOr;         /* Set the isOr flag on the next inserted term */
   1.998 +  int nextColumn;       /* Next word parsed must be in this column */
   1.999 +  int dfltColumn;       /* The default column */
  1.1000 +} Query;
  1.1001 +
  1.1002 +
  1.1003 +/*
  1.1004 +** An instance of the following structure keeps track of generated
  1.1005 +** matching-word offset information and snippets.
  1.1006 +*/
  1.1007 +typedef struct Snippet {
  1.1008 +  int nMatch;     /* Total number of matches */
  1.1009 +  int nAlloc;     /* Space allocated for aMatch[] */
  1.1010 +  struct snippetMatch { /* One entry for each matching term */
  1.1011 +    char snStatus;       /* Status flag for use while constructing snippets */
  1.1012 +    short int iCol;      /* The column that contains the match */
  1.1013 +    short int iTerm;     /* The index in Query.pTerms[] of the matching term */
  1.1014 +    short int nByte;     /* Number of bytes in the term */
  1.1015 +    int iStart;          /* The offset to the first character of the term */
  1.1016 +  } *aMatch;      /* Points to space obtained from malloc */
  1.1017 +  char *zOffset;  /* Text rendering of aMatch[] */
  1.1018 +  int nOffset;    /* strlen(zOffset) */
  1.1019 +  char *zSnippet; /* Snippet text */
  1.1020 +  int nSnippet;   /* strlen(zSnippet) */
  1.1021 +} Snippet;
  1.1022 +
  1.1023 +
  1.1024 +typedef enum QueryType {
  1.1025 +  QUERY_GENERIC,   /* table scan */
  1.1026 +  QUERY_ROWID,     /* lookup by rowid */
  1.1027 +  QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
  1.1028 +} QueryType;
  1.1029 +
  1.1030 +/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
  1.1031 +** before we start aggregating into larger segments.  Lower CHUNK_MAX
  1.1032 +** means that for a given input we have more individual segments per
  1.1033 +** term, which means more rows in the table and a bigger index (due to
  1.1034 +** both more rows and bigger rowids).  But it also reduces the average
  1.1035 +** cost of adding new elements to the segment 0 doclist, and it seems
  1.1036 +** to reduce the number of pages read and written during inserts.  256
  1.1037 +** was chosen by measuring insertion times for a certain input (first
  1.1038 +** 10k documents of Enron corpus), though including query performance
  1.1039 +** in the decision may argue for a larger value.
  1.1040 +*/
  1.1041 +#define CHUNK_MAX 256
  1.1042 +
  1.1043 +typedef enum fulltext_statement {
  1.1044 +  CONTENT_INSERT_STMT,
  1.1045 +  CONTENT_SELECT_STMT,
  1.1046 +  CONTENT_UPDATE_STMT,
  1.1047 +  CONTENT_DELETE_STMT,
  1.1048 +
  1.1049 +  TERM_SELECT_STMT,
  1.1050 +  TERM_SELECT_ALL_STMT,
  1.1051 +  TERM_INSERT_STMT,
  1.1052 +  TERM_UPDATE_STMT,
  1.1053 +  TERM_DELETE_STMT,
  1.1054 +
  1.1055 +  MAX_STMT                     /* Always at end! */
  1.1056 +} fulltext_statement;
  1.1057 +
  1.1058 +/* These must exactly match the enum above. */
  1.1059 +/* TODO(adam): Is there some risk that a statement (in particular,
  1.1060 +** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
  1.1061 +** query joins a virtual table to itself?  If so perhaps we should
  1.1062 +** move some of these to the cursor object.
  1.1063 +*/
  1.1064 +static const char *const fulltext_zStatement[MAX_STMT] = {
  1.1065 +  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  1.1066 +  /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
  1.1067 +  /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  1.1068 +  /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
  1.1069 +
  1.1070 +  /* TERM_SELECT */
  1.1071 +  "select rowid, doclist from %_term where term = ? and segment = ?",
  1.1072 +  /* TERM_SELECT_ALL */
  1.1073 +  "select doclist from %_term where term = ? order by segment",
  1.1074 +  /* TERM_INSERT */
  1.1075 +  "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
  1.1076 +  /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
  1.1077 +  /* TERM_DELETE */ "delete from %_term where rowid = ?",
  1.1078 +};
  1.1079 +
  1.1080 +/*
  1.1081 +** A connection to a fulltext index is an instance of the following
  1.1082 +** structure.  The xCreate and xConnect methods create an instance
  1.1083 +** of this structure and xDestroy and xDisconnect free that instance.
  1.1084 +** All other methods receive a pointer to the structure as one of their
  1.1085 +** arguments.
  1.1086 +*/
  1.1087 +struct fulltext_vtab {
  1.1088 +  sqlite3_vtab base;               /* Base class used by SQLite core */
  1.1089 +  sqlite3 *db;                     /* The database connection */
  1.1090 +  const char *zDb;                 /* logical database name */
  1.1091 +  const char *zName;               /* virtual table name */
  1.1092 +  int nColumn;                     /* number of columns in virtual table */
  1.1093 +  char **azColumn;                 /* column names.  malloced */
  1.1094 +  char **azContentColumn;          /* column names in content table; malloced */
  1.1095 +  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
  1.1096 +
  1.1097 +  /* Precompiled statements which we keep as long as the table is
  1.1098 +  ** open.
  1.1099 +  */
  1.1100 +  sqlite3_stmt *pFulltextStatements[MAX_STMT];
  1.1101 +};
  1.1102 +
  1.1103 +/*
  1.1104 +** When the core wants to do a query, it create a cursor using a
  1.1105 +** call to xOpen.  This structure is an instance of a cursor.  It
  1.1106 +** is destroyed by xClose.
  1.1107 +*/
  1.1108 +typedef struct fulltext_cursor {
  1.1109 +  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  1.1110 +  QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
  1.1111 +  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  1.1112 +  int eof;                         /* True if at End Of Results */
  1.1113 +  Query q;                         /* Parsed query string */
  1.1114 +  Snippet snippet;                 /* Cached snippet for the current row */
  1.1115 +  int iColumn;                     /* Column being searched */
  1.1116 +  DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
  1.1117 +} fulltext_cursor;
  1.1118 +
  1.1119 +static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  1.1120 +  return (fulltext_vtab *) c->base.pVtab;
  1.1121 +}
  1.1122 +
  1.1123 +static const sqlite3_module fulltextModule;   /* forward declaration */
  1.1124 +
  1.1125 +/* Append a list of strings separated by commas to a StringBuffer. */
  1.1126 +static void appendList(StringBuffer *sb, int nString, char **azString){
  1.1127 +  int i;
  1.1128 +  for(i=0; i<nString; ++i){
  1.1129 +    if( i>0 ) append(sb, ", ");
  1.1130 +    append(sb, azString[i]);
  1.1131 +  }
  1.1132 +}
  1.1133 +
  1.1134 +/* Return a dynamically generated statement of the form
  1.1135 + *   insert into %_content (rowid, ...) values (?, ...)
  1.1136 + */
  1.1137 +static const char *contentInsertStatement(fulltext_vtab *v){
  1.1138 +  StringBuffer sb;
  1.1139 +  int i;
  1.1140 +
  1.1141 +  initStringBuffer(&sb);
  1.1142 +  append(&sb, "insert into %_content (rowid, ");
  1.1143 +  appendList(&sb, v->nColumn, v->azContentColumn);
  1.1144 +  append(&sb, ") values (?");
  1.1145 +  for(i=0; i<v->nColumn; ++i)
  1.1146 +    append(&sb, ", ?");
  1.1147 +  append(&sb, ")");
  1.1148 +  return sb.s;
  1.1149 +}
  1.1150 +
  1.1151 +/* Return a dynamically generated statement of the form
  1.1152 + *   update %_content set [col_0] = ?, [col_1] = ?, ...
  1.1153 + *                    where rowid = ?
  1.1154 + */
  1.1155 +static const char *contentUpdateStatement(fulltext_vtab *v){
  1.1156 +  StringBuffer sb;
  1.1157 +  int i;
  1.1158 +
  1.1159 +  initStringBuffer(&sb);
  1.1160 +  append(&sb, "update %_content set ");
  1.1161 +  for(i=0; i<v->nColumn; ++i) {
  1.1162 +    if( i>0 ){
  1.1163 +      append(&sb, ", ");
  1.1164 +    }
  1.1165 +    append(&sb, v->azContentColumn[i]);
  1.1166 +    append(&sb, " = ?");
  1.1167 +  }
  1.1168 +  append(&sb, " where rowid = ?");
  1.1169 +  return sb.s;
  1.1170 +}
  1.1171 +
  1.1172 +/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
  1.1173 +** If the indicated statement has never been prepared, it is prepared
  1.1174 +** and cached, otherwise the cached version is reset.
  1.1175 +*/
  1.1176 +static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
  1.1177 +                             sqlite3_stmt **ppStmt){
  1.1178 +  assert( iStmt<MAX_STMT );
  1.1179 +  if( v->pFulltextStatements[iStmt]==NULL ){
  1.1180 +    const char *zStmt;
  1.1181 +    int rc;
  1.1182 +    switch( iStmt ){
  1.1183 +      case CONTENT_INSERT_STMT:
  1.1184 +        zStmt = contentInsertStatement(v); break;
  1.1185 +      case CONTENT_UPDATE_STMT:
  1.1186 +        zStmt = contentUpdateStatement(v); break;
  1.1187 +      default:
  1.1188 +        zStmt = fulltext_zStatement[iStmt];
  1.1189 +    }
  1.1190 +    rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
  1.1191 +                         zStmt);
  1.1192 +    if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
  1.1193 +    if( rc!=SQLITE_OK ) return rc;
  1.1194 +  } else {
  1.1195 +    int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
  1.1196 +    if( rc!=SQLITE_OK ) return rc;
  1.1197 +  }
  1.1198 +
  1.1199 +  *ppStmt = v->pFulltextStatements[iStmt];
  1.1200 +  return SQLITE_OK;
  1.1201 +}
  1.1202 +
  1.1203 +/* Step the indicated statement, handling errors SQLITE_BUSY (by
  1.1204 +** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
  1.1205 +** bindings to the new statement).
  1.1206 +** TODO(adam): We should extend this function so that it can work with
  1.1207 +** statements declared locally, not only globally cached statements.
  1.1208 +*/
  1.1209 +static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
  1.1210 +                              sqlite3_stmt **ppStmt){
  1.1211 +  int rc;
  1.1212 +  sqlite3_stmt *s = *ppStmt;
  1.1213 +  assert( iStmt<MAX_STMT );
  1.1214 +  assert( s==v->pFulltextStatements[iStmt] );
  1.1215 +
  1.1216 +  while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
  1.1217 +    if( rc==SQLITE_BUSY ) continue;
  1.1218 +    if( rc!=SQLITE_ERROR ) return rc;
  1.1219 +
  1.1220 +    /* If an SQLITE_SCHEMA error has occured, then finalizing this
  1.1221 +     * statement is going to delete the fulltext_vtab structure. If
  1.1222 +     * the statement just executed is in the pFulltextStatements[]
  1.1223 +     * array, it will be finalized twice. So remove it before
  1.1224 +     * calling sqlite3_finalize().
  1.1225 +     */
  1.1226 +    v->pFulltextStatements[iStmt] = NULL;
  1.1227 +    rc = sqlite3_finalize(s);
  1.1228 +    break;
  1.1229 +  }
  1.1230 +  return rc;
  1.1231 +
  1.1232 + err:
  1.1233 +  sqlite3_finalize(s);
  1.1234 +  return rc;
  1.1235 +}
  1.1236 +
  1.1237 +/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
  1.1238 +** Useful for statements like UPDATE, where we expect no results.
  1.1239 +*/
  1.1240 +static int sql_single_step_statement(fulltext_vtab *v,
  1.1241 +                                     fulltext_statement iStmt,
  1.1242 +                                     sqlite3_stmt **ppStmt){
  1.1243 +  int rc = sql_step_statement(v, iStmt, ppStmt);
  1.1244 +  return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
  1.1245 +}
  1.1246 +
  1.1247 +/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
  1.1248 +static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
  1.1249 +                          sqlite3_value **pValues){
  1.1250 +  sqlite3_stmt *s;
  1.1251 +  int i;
  1.1252 +  int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
  1.1253 +  if( rc!=SQLITE_OK ) return rc;
  1.1254 +
  1.1255 +  rc = sqlite3_bind_value(s, 1, rowid);
  1.1256 +  if( rc!=SQLITE_OK ) return rc;
  1.1257 +
  1.1258 +  for(i=0; i<v->nColumn; ++i){
  1.1259 +    rc = sqlite3_bind_value(s, 2+i, pValues[i]);
  1.1260 +    if( rc!=SQLITE_OK ) return rc;
  1.1261 +  }
  1.1262 +
  1.1263 +  return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
  1.1264 +}
  1.1265 +
  1.1266 +/* update %_content set col0 = pValues[0], col1 = pValues[1], ...
  1.1267 + *                  where rowid = [iRowid] */
  1.1268 +static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
  1.1269 +                          sqlite_int64 iRowid){
  1.1270 +  sqlite3_stmt *s;
  1.1271 +  int i;
  1.1272 +  int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
  1.1273 +  if( rc!=SQLITE_OK ) return rc;
  1.1274 +
  1.1275 +  for(i=0; i<v->nColumn; ++i){
  1.1276 +    rc = sqlite3_bind_value(s, 1+i, pValues[i]);
  1.1277 +    if( rc!=SQLITE_OK ) return rc;
  1.1278 +  }
  1.1279 +
  1.1280 +  rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
  1.1281 +  if( rc!=SQLITE_OK ) return rc;
  1.1282 +
  1.1283 +  return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
  1.1284 +}
  1.1285 +
  1.1286 +static void freeStringArray(int nString, const char **pString){
  1.1287 +  int i;
  1.1288 +
  1.1289 +  for (i=0 ; i < nString ; ++i) {
  1.1290 +    if( pString[i]!=NULL ) free((void *) pString[i]);
  1.1291 +  }
  1.1292 +  free((void *) pString);
  1.1293 +}
  1.1294 +
  1.1295 +/* select * from %_content where rowid = [iRow]
  1.1296 + * The caller must delete the returned array and all strings in it.
  1.1297 + * null fields will be NULL in the returned array.
  1.1298 + *
  1.1299 + * TODO: Perhaps we should return pointer/length strings here for consistency
  1.1300 + * with other code which uses pointer/length. */
  1.1301 +static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
  1.1302 +                          const char ***pValues){
  1.1303 +  sqlite3_stmt *s;
  1.1304 +  const char **values;
  1.1305 +  int i;
  1.1306 +  int rc;
  1.1307 +
  1.1308 +  *pValues = NULL;
  1.1309 +
  1.1310 +  rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
  1.1311 +  if( rc!=SQLITE_OK ) return rc;
  1.1312 +
  1.1313 +  rc = sqlite3_bind_int64(s, 1, iRow);
  1.1314 +  if( rc!=SQLITE_OK ) return rc;
  1.1315 +
  1.1316 +  rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
  1.1317 +  if( rc!=SQLITE_ROW ) return rc;
  1.1318 +
  1.1319 +  values = (const char **) malloc(v->nColumn * sizeof(const char *));
  1.1320 +  for(i=0; i<v->nColumn; ++i){
  1.1321 +    if( sqlite3_column_type(s, i)==SQLITE_NULL ){
  1.1322 +      values[i] = NULL;
  1.1323 +    }else{
  1.1324 +      values[i] = string_dup((char*)sqlite3_column_text(s, i));
  1.1325 +    }
  1.1326 +  }
  1.1327 +
  1.1328 +  /* We expect only one row.  We must execute another sqlite3_step()
  1.1329 +   * to complete the iteration; otherwise the table will remain locked. */
  1.1330 +  rc = sqlite3_step(s);
  1.1331 +  if( rc==SQLITE_DONE ){
  1.1332 +    *pValues = values;
  1.1333 +    return SQLITE_OK;
  1.1334 +  }
  1.1335 +
  1.1336 +  freeStringArray(v->nColumn, values);
  1.1337 +  return rc;
  1.1338 +}
  1.1339 +
  1.1340 +/* delete from %_content where rowid = [iRow ] */
  1.1341 +static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
  1.1342 +  sqlite3_stmt *s;
  1.1343 +  int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
  1.1344 +  if( rc!=SQLITE_OK ) return rc;
  1.1345 +
  1.1346 +  rc = sqlite3_bind_int64(s, 1, iRow);
  1.1347 +  if( rc!=SQLITE_OK ) return rc;
  1.1348 +
  1.1349 +  return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
  1.1350 +}
  1.1351 +
  1.1352 +/* select rowid, doclist from %_term
  1.1353 + *  where term = [pTerm] and segment = [iSegment]
  1.1354 + * If found, returns SQLITE_ROW; the caller must free the
  1.1355 + * returned doclist.  If no rows found, returns SQLITE_DONE. */
  1.1356 +static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
  1.1357 +                       int iSegment,
  1.1358 +                       sqlite_int64 *rowid, DocList *out){
  1.1359 +  sqlite3_stmt *s;
  1.1360 +  int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
  1.1361 +  if( rc!=SQLITE_OK ) return rc;
  1.1362 +
  1.1363 +  rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  1.1364 +  if( rc!=SQLITE_OK ) return rc;
  1.1365 +
  1.1366 +  rc = sqlite3_bind_int(s, 2, iSegment);
  1.1367 +  if( rc!=SQLITE_OK ) return rc;
  1.1368 +
  1.1369 +  rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
  1.1370 +  if( rc!=SQLITE_ROW ) return rc;
  1.1371 +
  1.1372 +  *rowid = sqlite3_column_int64(s, 0);
  1.1373 +  docListInit(out, DL_DEFAULT,
  1.1374 +              sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
  1.1375 +
  1.1376 +  /* We expect only one row.  We must execute another sqlite3_step()
  1.1377 +   * to complete the iteration; otherwise the table will remain locked. */
  1.1378 +  rc = sqlite3_step(s);
  1.1379 +  return rc==SQLITE_DONE ? SQLITE_ROW : rc;
  1.1380 +}
  1.1381 +
  1.1382 +/* Load the segment doclists for term pTerm and merge them in
  1.1383 +** appropriate order into out.  Returns SQLITE_OK if successful.  If
  1.1384 +** there are no segments for pTerm, successfully returns an empty
  1.1385 +** doclist in out.
  1.1386 +**
  1.1387 +** Each document consists of 1 or more "columns".  The number of
  1.1388 +** columns is v->nColumn.  If iColumn==v->nColumn, then return
  1.1389 +** position information about all columns.  If iColumn<v->nColumn,
  1.1390 +** then only return position information about the iColumn-th column
  1.1391 +** (where the first column is 0).
  1.1392 +*/
  1.1393 +static int term_select_all(
  1.1394 +  fulltext_vtab *v,     /* The fulltext index we are querying against */
  1.1395 +  int iColumn,          /* If <nColumn, only look at the iColumn-th column */
  1.1396 +  const char *pTerm,    /* The term whose posting lists we want */
  1.1397 +  int nTerm,            /* Number of bytes in pTerm */
  1.1398 +  DocList *out          /* Write the resulting doclist here */
  1.1399 +){
  1.1400 +  DocList doclist;
  1.1401 +  sqlite3_stmt *s;
  1.1402 +  int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
  1.1403 +  if( rc!=SQLITE_OK ) return rc;
  1.1404 +
  1.1405 +  rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  1.1406 +  if( rc!=SQLITE_OK ) return rc;
  1.1407 +
  1.1408 +  docListInit(&doclist, DL_DEFAULT, 0, 0);
  1.1409 +
  1.1410 +  /* TODO(shess) Handle schema and busy errors. */
  1.1411 +  while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
  1.1412 +    DocList old;
  1.1413 +
  1.1414 +    /* TODO(shess) If we processed doclists from oldest to newest, we
  1.1415 +    ** could skip the malloc() involved with the following call.  For
  1.1416 +    ** now, I'd rather keep this logic similar to index_insert_term().
  1.1417 +    ** We could additionally drop elements when we see deletes, but
  1.1418 +    ** that would require a distinct version of docListAccumulate().
  1.1419 +    */
  1.1420 +    docListInit(&old, DL_DEFAULT,
  1.1421 +                sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
  1.1422 +
  1.1423 +    if( iColumn<v->nColumn ){   /* querying a single column */
  1.1424 +      docListRestrictColumn(&old, iColumn);
  1.1425 +    }
  1.1426 +
  1.1427 +    /* doclist contains the newer data, so write it over old.  Then
  1.1428 +    ** steal accumulated result for doclist.
  1.1429 +    */
  1.1430 +    docListAccumulate(&old, &doclist);
  1.1431 +    docListDestroy(&doclist);
  1.1432 +    doclist = old;
  1.1433 +  }
  1.1434 +  if( rc!=SQLITE_DONE ){
  1.1435 +    docListDestroy(&doclist);
  1.1436 +    return rc;
  1.1437 +  }
  1.1438 +
  1.1439 +  docListDiscardEmpty(&doclist);
  1.1440 +  *out = doclist;
  1.1441 +  return SQLITE_OK;
  1.1442 +}
  1.1443 +
  1.1444 +/* insert into %_term (rowid, term, segment, doclist)
  1.1445 +               values ([piRowid], [pTerm], [iSegment], [doclist])
  1.1446 +** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
  1.1447 +**
  1.1448 +** NOTE(shess) piRowid is IN, with values of "space of int64" plus
  1.1449 +** null, it is not used to pass data back to the caller.
  1.1450 +*/
  1.1451 +static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
  1.1452 +                       const char *pTerm, int nTerm,
  1.1453 +                       int iSegment, DocList *doclist){
  1.1454 +  sqlite3_stmt *s;
  1.1455 +  int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
  1.1456 +  if( rc!=SQLITE_OK ) return rc;
  1.1457 +
  1.1458 +  if( piRowid==NULL ){
  1.1459 +    rc = sqlite3_bind_null(s, 1);
  1.1460 +  }else{
  1.1461 +    rc = sqlite3_bind_int64(s, 1, *piRowid);
  1.1462 +  }
  1.1463 +  if( rc!=SQLITE_OK ) return rc;
  1.1464 +
  1.1465 +  rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
  1.1466 +  if( rc!=SQLITE_OK ) return rc;
  1.1467 +
  1.1468 +  rc = sqlite3_bind_int(s, 3, iSegment);
  1.1469 +  if( rc!=SQLITE_OK ) return rc;
  1.1470 +
  1.1471 +  rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
  1.1472 +  if( rc!=SQLITE_OK ) return rc;
  1.1473 +
  1.1474 +  return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
  1.1475 +}
  1.1476 +
  1.1477 +/* update %_term set doclist = [doclist] where rowid = [rowid] */
  1.1478 +static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
  1.1479 +                       DocList *doclist){
  1.1480 +  sqlite3_stmt *s;
  1.1481 +  int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
  1.1482 +  if( rc!=SQLITE_OK ) return rc;
  1.1483 +
  1.1484 +  rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
  1.1485 +  if( rc!=SQLITE_OK ) return rc;
  1.1486 +
  1.1487 +  rc = sqlite3_bind_int64(s, 2, rowid);
  1.1488 +  if( rc!=SQLITE_OK ) return rc;
  1.1489 +
  1.1490 +  return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
  1.1491 +}
  1.1492 +
  1.1493 +static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
  1.1494 +  sqlite3_stmt *s;
  1.1495 +  int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
  1.1496 +  if( rc!=SQLITE_OK ) return rc;
  1.1497 +
  1.1498 +  rc = sqlite3_bind_int64(s, 1, rowid);
  1.1499 +  if( rc!=SQLITE_OK ) return rc;
  1.1500 +
  1.1501 +  return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
  1.1502 +}
  1.1503 +
  1.1504 +/*
  1.1505 +** Free the memory used to contain a fulltext_vtab structure.
  1.1506 +*/
  1.1507 +static void fulltext_vtab_destroy(fulltext_vtab *v){
  1.1508 +  int iStmt, i;
  1.1509 +
  1.1510 +  TRACE(("FTS1 Destroy %p\n", v));
  1.1511 +  for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
  1.1512 +    if( v->pFulltextStatements[iStmt]!=NULL ){
  1.1513 +      sqlite3_finalize(v->pFulltextStatements[iStmt]);
  1.1514 +      v->pFulltextStatements[iStmt] = NULL;
  1.1515 +    }
  1.1516 +  }
  1.1517 +
  1.1518 +  if( v->pTokenizer!=NULL ){
  1.1519 +    v->pTokenizer->pModule->xDestroy(v->pTokenizer);
  1.1520 +    v->pTokenizer = NULL;
  1.1521 +  }
  1.1522 +  
  1.1523 +  free(v->azColumn);
  1.1524 +  for(i = 0; i < v->nColumn; ++i) {
  1.1525 +    sqlite3_free(v->azContentColumn[i]);
  1.1526 +  }
  1.1527 +  free(v->azContentColumn);
  1.1528 +  free(v);
  1.1529 +}
  1.1530 +
  1.1531 +/*
  1.1532 +** Token types for parsing the arguments to xConnect or xCreate.
  1.1533 +*/
  1.1534 +#define TOKEN_EOF         0    /* End of file */
  1.1535 +#define TOKEN_SPACE       1    /* Any kind of whitespace */
  1.1536 +#define TOKEN_ID          2    /* An identifier */
  1.1537 +#define TOKEN_STRING      3    /* A string literal */
  1.1538 +#define TOKEN_PUNCT       4    /* A single punctuation character */
  1.1539 +
  1.1540 +/*
  1.1541 +** If X is a character that can be used in an identifier then
  1.1542 +** IdChar(X) will be true.  Otherwise it is false.
  1.1543 +**
  1.1544 +** For ASCII, any character with the high-order bit set is
  1.1545 +** allowed in an identifier.  For 7-bit characters, 
  1.1546 +** sqlite3IsIdChar[X] must be 1.
  1.1547 +**
  1.1548 +** Ticket #1066.  the SQL standard does not allow '$' in the
  1.1549 +** middle of identfiers.  But many SQL implementations do. 
  1.1550 +** SQLite will allow '$' in identifiers for compatibility.
  1.1551 +** But the feature is undocumented.
  1.1552 +*/
  1.1553 +static const char isIdChar[] = {
  1.1554 +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
  1.1555 +    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
  1.1556 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
  1.1557 +    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
  1.1558 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
  1.1559 +    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
  1.1560 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
  1.1561 +};
  1.1562 +#define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
  1.1563 +
  1.1564 +
  1.1565 +/*
  1.1566 +** Return the length of the token that begins at z[0]. 
  1.1567 +** Store the token type in *tokenType before returning.
  1.1568 +*/
  1.1569 +static int getToken(const char *z, int *tokenType){
  1.1570 +  int i, c;
  1.1571 +  switch( *z ){
  1.1572 +    case 0: {
  1.1573 +      *tokenType = TOKEN_EOF;
  1.1574 +      return 0;
  1.1575 +    }
  1.1576 +    case ' ': case '\t': case '\n': case '\f': case '\r': {
  1.1577 +      for(i=1; safe_isspace(z[i]); i++){}
  1.1578 +      *tokenType = TOKEN_SPACE;
  1.1579 +      return i;
  1.1580 +    }
  1.1581 +    case '`':
  1.1582 +    case '\'':
  1.1583 +    case '"': {
  1.1584 +      int delim = z[0];
  1.1585 +      for(i=1; (c=z[i])!=0; i++){
  1.1586 +        if( c==delim ){
  1.1587 +          if( z[i+1]==delim ){
  1.1588 +            i++;
  1.1589 +          }else{
  1.1590 +            break;
  1.1591 +          }
  1.1592 +        }
  1.1593 +      }
  1.1594 +      *tokenType = TOKEN_STRING;
  1.1595 +      return i + (c!=0);
  1.1596 +    }
  1.1597 +    case '[': {
  1.1598 +      for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
  1.1599 +      *tokenType = TOKEN_ID;
  1.1600 +      return i;
  1.1601 +    }
  1.1602 +    default: {
  1.1603 +      if( !IdChar(*z) ){
  1.1604 +        break;
  1.1605 +      }
  1.1606 +      for(i=1; IdChar(z[i]); i++){}
  1.1607 +      *tokenType = TOKEN_ID;
  1.1608 +      return i;
  1.1609 +    }
  1.1610 +  }
  1.1611 +  *tokenType = TOKEN_PUNCT;
  1.1612 +  return 1;
  1.1613 +}
  1.1614 +
  1.1615 +/*
  1.1616 +** A token extracted from a string is an instance of the following
  1.1617 +** structure.
  1.1618 +*/
  1.1619 +typedef struct Token {
  1.1620 +  const char *z;       /* Pointer to token text.  Not '\000' terminated */
  1.1621 +  short int n;         /* Length of the token text in bytes. */
  1.1622 +} Token;
  1.1623 +
  1.1624 +/*
  1.1625 +** Given a input string (which is really one of the argv[] parameters
  1.1626 +** passed into xConnect or xCreate) split the string up into tokens.
  1.1627 +** Return an array of pointers to '\000' terminated strings, one string
  1.1628 +** for each non-whitespace token.
  1.1629 +**
  1.1630 +** The returned array is terminated by a single NULL pointer.
  1.1631 +**
  1.1632 +** Space to hold the returned array is obtained from a single
  1.1633 +** malloc and should be freed by passing the return value to free().
  1.1634 +** The individual strings within the token list are all a part of
  1.1635 +** the single memory allocation and will all be freed at once.
  1.1636 +*/
  1.1637 +static char **tokenizeString(const char *z, int *pnToken){
  1.1638 +  int nToken = 0;
  1.1639 +  Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
  1.1640 +  int n = 1;
  1.1641 +  int e, i;
  1.1642 +  int totalSize = 0;
  1.1643 +  char **azToken;
  1.1644 +  char *zCopy;
  1.1645 +  while( n>0 ){
  1.1646 +    n = getToken(z, &e);
  1.1647 +    if( e!=TOKEN_SPACE ){
  1.1648 +      aToken[nToken].z = z;
  1.1649 +      aToken[nToken].n = n;
  1.1650 +      nToken++;
  1.1651 +      totalSize += n+1;
  1.1652 +    }
  1.1653 +    z += n;
  1.1654 +  }
  1.1655 +  azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
  1.1656 +  zCopy = (char*)&azToken[nToken];
  1.1657 +  nToken--;
  1.1658 +  for(i=0; i<nToken; i++){
  1.1659 +    azToken[i] = zCopy;
  1.1660 +    n = aToken[i].n;
  1.1661 +    memcpy(zCopy, aToken[i].z, n);
  1.1662 +    zCopy[n] = 0;
  1.1663 +    zCopy += n+1;
  1.1664 +  }
  1.1665 +  azToken[nToken] = 0;
  1.1666 +  free(aToken);
  1.1667 +  *pnToken = nToken;
  1.1668 +  return azToken;
  1.1669 +}
  1.1670 +
  1.1671 +/*
  1.1672 +** Convert an SQL-style quoted string into a normal string by removing
  1.1673 +** the quote characters.  The conversion is done in-place.  If the
  1.1674 +** input does not begin with a quote character, then this routine
  1.1675 +** is a no-op.
  1.1676 +**
  1.1677 +** Examples:
  1.1678 +**
  1.1679 +**     "abc"   becomes   abc
  1.1680 +**     'xyz'   becomes   xyz
  1.1681 +**     [pqr]   becomes   pqr
  1.1682 +**     `mno`   becomes   mno
  1.1683 +*/
  1.1684 +static void dequoteString(char *z){
  1.1685 +  int quote;
  1.1686 +  int i, j;
  1.1687 +  if( z==0 ) return;
  1.1688 +  quote = z[0];
  1.1689 +  switch( quote ){
  1.1690 +    case '\'':  break;
  1.1691 +    case '"':   break;
  1.1692 +    case '`':   break;                /* For MySQL compatibility */
  1.1693 +    case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
  1.1694 +    default:    return;
  1.1695 +  }
  1.1696 +  for(i=1, j=0; z[i]; i++){
  1.1697 +    if( z[i]==quote ){
  1.1698 +      if( z[i+1]==quote ){
  1.1699 +        z[j++] = quote;
  1.1700 +        i++;
  1.1701 +      }else{
  1.1702 +        z[j++] = 0;
  1.1703 +        break;
  1.1704 +      }
  1.1705 +    }else{
  1.1706 +      z[j++] = z[i];
  1.1707 +    }
  1.1708 +  }
  1.1709 +}
  1.1710 +
  1.1711 +/*
  1.1712 +** The input azIn is a NULL-terminated list of tokens.  Remove the first
  1.1713 +** token and all punctuation tokens.  Remove the quotes from
  1.1714 +** around string literal tokens.
  1.1715 +**
  1.1716 +** Example:
  1.1717 +**
  1.1718 +**     input:      tokenize chinese ( 'simplifed' , 'mixed' )
  1.1719 +**     output:     chinese simplifed mixed
  1.1720 +**
  1.1721 +** Another example:
  1.1722 +**
  1.1723 +**     input:      delimiters ( '[' , ']' , '...' )
  1.1724 +**     output:     [ ] ...
  1.1725 +*/
  1.1726 +static void tokenListToIdList(char **azIn){
  1.1727 +  int i, j;
  1.1728 +  if( azIn ){
  1.1729 +    for(i=0, j=-1; azIn[i]; i++){
  1.1730 +      if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
  1.1731 +        dequoteString(azIn[i]);
  1.1732 +        if( j>=0 ){
  1.1733 +          azIn[j] = azIn[i];
  1.1734 +        }
  1.1735 +        j++;
  1.1736 +      }
  1.1737 +    }
  1.1738 +    azIn[j] = 0;
  1.1739 +  }
  1.1740 +}
  1.1741 +
  1.1742 +
  1.1743 +/*
  1.1744 +** Find the first alphanumeric token in the string zIn.  Null-terminate
  1.1745 +** this token.  Remove any quotation marks.  And return a pointer to
  1.1746 +** the result.
  1.1747 +*/
  1.1748 +static char *firstToken(char *zIn, char **pzTail){
  1.1749 +  int n, ttype;
  1.1750 +  while(1){
  1.1751 +    n = getToken(zIn, &ttype);
  1.1752 +    if( ttype==TOKEN_SPACE ){
  1.1753 +      zIn += n;
  1.1754 +    }else if( ttype==TOKEN_EOF ){
  1.1755 +      *pzTail = zIn;
  1.1756 +      return 0;
  1.1757 +    }else{
  1.1758 +      zIn[n] = 0;
  1.1759 +      *pzTail = &zIn[1];
  1.1760 +      dequoteString(zIn);
  1.1761 +      return zIn;
  1.1762 +    }
  1.1763 +  }
  1.1764 +  /*NOTREACHED*/
  1.1765 +}
  1.1766 +
  1.1767 +/* Return true if...
  1.1768 +**
  1.1769 +**   *  s begins with the string t, ignoring case
  1.1770 +**   *  s is longer than t
  1.1771 +**   *  The first character of s beyond t is not a alphanumeric
  1.1772 +** 
  1.1773 +** Ignore leading space in *s.
  1.1774 +**
  1.1775 +** To put it another way, return true if the first token of
  1.1776 +** s[] is t[].
  1.1777 +*/
  1.1778 +static int startsWith(const char *s, const char *t){
  1.1779 +  while( safe_isspace(*s) ){ s++; }
  1.1780 +  while( *t ){
  1.1781 +    if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
  1.1782 +  }
  1.1783 +  return *s!='_' && !safe_isalnum(*s);
  1.1784 +}
  1.1785 +
  1.1786 +/*
  1.1787 +** An instance of this structure defines the "spec" of a
  1.1788 +** full text index.  This structure is populated by parseSpec
  1.1789 +** and use by fulltextConnect and fulltextCreate.
  1.1790 +*/
  1.1791 +typedef struct TableSpec {
  1.1792 +  const char *zDb;         /* Logical database name */
  1.1793 +  const char *zName;       /* Name of the full-text index */
  1.1794 +  int nColumn;             /* Number of columns to be indexed */
  1.1795 +  char **azColumn;         /* Original names of columns to be indexed */
  1.1796 +  char **azContentColumn;  /* Column names for %_content */
  1.1797 +  char **azTokenizer;      /* Name of tokenizer and its arguments */
  1.1798 +} TableSpec;
  1.1799 +
  1.1800 +/*
  1.1801 +** Reclaim all of the memory used by a TableSpec
  1.1802 +*/
  1.1803 +static void clearTableSpec(TableSpec *p) {
  1.1804 +  free(p->azColumn);
  1.1805 +  free(p->azContentColumn);
  1.1806 +  free(p->azTokenizer);
  1.1807 +}
  1.1808 +
  1.1809 +/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
  1.1810 + *
  1.1811 + * CREATE VIRTUAL TABLE email
  1.1812 + *        USING fts1(subject, body, tokenize mytokenizer(myarg))
  1.1813 + *
  1.1814 + * We return parsed information in a TableSpec structure.
  1.1815 + * 
  1.1816 + */
  1.1817 +static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
  1.1818 +                     char**pzErr){
  1.1819 +  int i, n;
  1.1820 +  char *z, *zDummy;
  1.1821 +  char **azArg;
  1.1822 +  const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
  1.1823 +
  1.1824 +  assert( argc>=3 );
  1.1825 +  /* Current interface:
  1.1826 +  ** argv[0] - module name
  1.1827 +  ** argv[1] - database name
  1.1828 +  ** argv[2] - table name
  1.1829 +  ** argv[3..] - columns, optionally followed by tokenizer specification
  1.1830 +  **             and snippet delimiters specification.
  1.1831 +  */
  1.1832 +
  1.1833 +  /* Make a copy of the complete argv[][] array in a single allocation.
  1.1834 +  ** The argv[][] array is read-only and transient.  We can write to the
  1.1835 +  ** copy in order to modify things and the copy is persistent.
  1.1836 +  */
  1.1837 +  memset(pSpec, 0, sizeof(*pSpec));
  1.1838 +  for(i=n=0; i<argc; i++){
  1.1839 +    n += strlen(argv[i]) + 1;
  1.1840 +  }
  1.1841 +  azArg = malloc( sizeof(char*)*argc + n );
  1.1842 +  if( azArg==0 ){
  1.1843 +    return SQLITE_NOMEM;
  1.1844 +  }
  1.1845 +  z = (char*)&azArg[argc];
  1.1846 +  for(i=0; i<argc; i++){
  1.1847 +    azArg[i] = z;
  1.1848 +    strcpy(z, argv[i]);
  1.1849 +    z += strlen(z)+1;
  1.1850 +  }
  1.1851 +
  1.1852 +  /* Identify the column names and the tokenizer and delimiter arguments
  1.1853 +  ** in the argv[][] array.
  1.1854 +  */
  1.1855 +  pSpec->zDb = azArg[1];
  1.1856 +  pSpec->zName = azArg[2];
  1.1857 +  pSpec->nColumn = 0;
  1.1858 +  pSpec->azColumn = azArg;
  1.1859 +  zTokenizer = "tokenize simple";
  1.1860 +  for(i=3; i<argc; ++i){
  1.1861 +    if( startsWith(azArg[i],"tokenize") ){
  1.1862 +      zTokenizer = azArg[i];
  1.1863 +    }else{
  1.1864 +      z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
  1.1865 +      pSpec->nColumn++;
  1.1866 +    }
  1.1867 +  }
  1.1868 +  if( pSpec->nColumn==0 ){
  1.1869 +    azArg[0] = "content";
  1.1870 +    pSpec->nColumn = 1;
  1.1871 +  }
  1.1872 +
  1.1873 +  /*
  1.1874 +  ** Construct the list of content column names.
  1.1875 +  **
  1.1876 +  ** Each content column name will be of the form cNNAAAA
  1.1877 +  ** where NN is the column number and AAAA is the sanitized
  1.1878 +  ** column name.  "sanitized" means that special characters are
  1.1879 +  ** converted to "_".  The cNN prefix guarantees that all column
  1.1880 +  ** names are unique.
  1.1881 +  **
  1.1882 +  ** The AAAA suffix is not strictly necessary.  It is included
  1.1883 +  ** for the convenience of people who might examine the generated
  1.1884 +  ** %_content table and wonder what the columns are used for.
  1.1885 +  */
  1.1886 +  pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
  1.1887 +  if( pSpec->azContentColumn==0 ){
  1.1888 +    clearTableSpec(pSpec);
  1.1889 +    return SQLITE_NOMEM;
  1.1890 +  }
  1.1891 +  for(i=0; i<pSpec->nColumn; i++){
  1.1892 +    char *p;
  1.1893 +    pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
  1.1894 +    for (p = pSpec->azContentColumn[i]; *p ; ++p) {
  1.1895 +      if( !safe_isalnum(*p) ) *p = '_';
  1.1896 +    }
  1.1897 +  }
  1.1898 +
  1.1899 +  /*
  1.1900 +  ** Parse the tokenizer specification string.
  1.1901 +  */
  1.1902 +  pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
  1.1903 +  tokenListToIdList(pSpec->azTokenizer);
  1.1904 +
  1.1905 +  return SQLITE_OK;
  1.1906 +}
  1.1907 +
  1.1908 +/*
  1.1909 +** Generate a CREATE TABLE statement that describes the schema of
  1.1910 +** the virtual table.  Return a pointer to this schema string.
  1.1911 +**
  1.1912 +** Space is obtained from sqlite3_mprintf() and should be freed
  1.1913 +** using sqlite3_free().
  1.1914 +*/
  1.1915 +static char *fulltextSchema(
  1.1916 +  int nColumn,                  /* Number of columns */
  1.1917 +  const char *const* azColumn,  /* List of columns */
  1.1918 +  const char *zTableName        /* Name of the table */
  1.1919 +){
  1.1920 +  int i;
  1.1921 +  char *zSchema, *zNext;
  1.1922 +  const char *zSep = "(";
  1.1923 +  zSchema = sqlite3_mprintf("CREATE TABLE x");
  1.1924 +  for(i=0; i<nColumn; i++){
  1.1925 +    zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
  1.1926 +    sqlite3_free(zSchema);
  1.1927 +    zSchema = zNext;
  1.1928 +    zSep = ",";
  1.1929 +  }
  1.1930 +  zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
  1.1931 +  sqlite3_free(zSchema);
  1.1932 +  return zNext;
  1.1933 +}
  1.1934 +
  1.1935 +/*
  1.1936 +** Build a new sqlite3_vtab structure that will describe the
  1.1937 +** fulltext index defined by spec.
  1.1938 +*/
  1.1939 +static int constructVtab(
  1.1940 +  sqlite3 *db,              /* The SQLite database connection */
  1.1941 +  TableSpec *spec,          /* Parsed spec information from parseSpec() */
  1.1942 +  sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
  1.1943 +  char **pzErr              /* Write any error message here */
  1.1944 +){
  1.1945 +  int rc;
  1.1946 +  int n;
  1.1947 +  fulltext_vtab *v = 0;
  1.1948 +  const sqlite3_tokenizer_module *m = NULL;
  1.1949 +  char *schema;
  1.1950 +
  1.1951 +  v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
  1.1952 +  if( v==0 ) return SQLITE_NOMEM;
  1.1953 +  memset(v, 0, sizeof(*v));
  1.1954 +  /* sqlite will initialize v->base */
  1.1955 +  v->db = db;
  1.1956 +  v->zDb = spec->zDb;       /* Freed when azColumn is freed */
  1.1957 +  v->zName = spec->zName;   /* Freed when azColumn is freed */
  1.1958 +  v->nColumn = spec->nColumn;
  1.1959 +  v->azContentColumn = spec->azContentColumn;
  1.1960 +  spec->azContentColumn = 0;
  1.1961 +  v->azColumn = spec->azColumn;
  1.1962 +  spec->azColumn = 0;
  1.1963 +
  1.1964 +  if( spec->azTokenizer==0 ){
  1.1965 +    return SQLITE_NOMEM;
  1.1966 +  }
  1.1967 +  /* TODO(shess) For now, add new tokenizers as else if clauses. */
  1.1968 +  if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
  1.1969 +    sqlite3Fts1SimpleTokenizerModule(&m);
  1.1970 +  }else if( startsWith(spec->azTokenizer[0], "porter") ){
  1.1971 +    sqlite3Fts1PorterTokenizerModule(&m);
  1.1972 +  }else{
  1.1973 +    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
  1.1974 +    rc = SQLITE_ERROR;
  1.1975 +    goto err;
  1.1976 +  }
  1.1977 +  for(n=0; spec->azTokenizer[n]; n++){}
  1.1978 +  if( n ){
  1.1979 +    rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
  1.1980 +                    &v->pTokenizer);
  1.1981 +  }else{
  1.1982 +    rc = m->xCreate(0, 0, &v->pTokenizer);
  1.1983 +  }
  1.1984 +  if( rc!=SQLITE_OK ) goto err;
  1.1985 +  v->pTokenizer->pModule = m;
  1.1986 +
  1.1987 +  /* TODO: verify the existence of backing tables foo_content, foo_term */
  1.1988 +
  1.1989 +  schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
  1.1990 +                          spec->zName);
  1.1991 +  rc = sqlite3_declare_vtab(db, schema);
  1.1992 +  sqlite3_free(schema);
  1.1993 +  if( rc!=SQLITE_OK ) goto err;
  1.1994 +
  1.1995 +  memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
  1.1996 +
  1.1997 +  *ppVTab = &v->base;
  1.1998 +  TRACE(("FTS1 Connect %p\n", v));
  1.1999 +
  1.2000 +  return rc;
  1.2001 +
  1.2002 +err:
  1.2003 +  fulltext_vtab_destroy(v);
  1.2004 +  return rc;
  1.2005 +}
  1.2006 +
  1.2007 +static int fulltextConnect(
  1.2008 +  sqlite3 *db,
  1.2009 +  void *pAux,
  1.2010 +  int argc, const char *const*argv,
  1.2011 +  sqlite3_vtab **ppVTab,
  1.2012 +  char **pzErr
  1.2013 +){
  1.2014 +  TableSpec spec;
  1.2015 +  int rc = parseSpec(&spec, argc, argv, pzErr);
  1.2016 +  if( rc!=SQLITE_OK ) return rc;
  1.2017 +
  1.2018 +  rc = constructVtab(db, &spec, ppVTab, pzErr);
  1.2019 +  clearTableSpec(&spec);
  1.2020 +  return rc;
  1.2021 +}
  1.2022 +
  1.2023 +  /* The %_content table holds the text of each document, with
  1.2024 +  ** the rowid used as the docid.
  1.2025 +  **
  1.2026 +  ** The %_term table maps each term to a document list blob
  1.2027 +  ** containing elements sorted by ascending docid, each element
  1.2028 +  ** encoded as:
  1.2029 +  **
  1.2030 +  **   docid varint-encoded
  1.2031 +  **   token elements:
  1.2032 +  **     position+1 varint-encoded as delta from previous position
  1.2033 +  **     start offset varint-encoded as delta from previous start offset
  1.2034 +  **     end offset varint-encoded as delta from start offset
  1.2035 +  **
  1.2036 +  ** The sentinel position of 0 indicates the end of the token list.
  1.2037 +  **
  1.2038 +  ** Additionally, doclist blobs are chunked into multiple segments,
  1.2039 +  ** using segment to order the segments.  New elements are added to
  1.2040 +  ** the segment at segment 0, until it exceeds CHUNK_MAX.  Then
  1.2041 +  ** segment 0 is deleted, and the doclist is inserted at segment 1.
  1.2042 +  ** If there is already a doclist at segment 1, the segment 0 doclist
  1.2043 +  ** is merged with it, the segment 1 doclist is deleted, and the
  1.2044 +  ** merged doclist is inserted at segment 2, repeating those
  1.2045 +  ** operations until an insert succeeds.
  1.2046 +  **
  1.2047 +  ** Since this structure doesn't allow us to update elements in place
  1.2048 +  ** in case of deletion or update, these are simply written to
  1.2049 +  ** segment 0 (with an empty token list in case of deletion), with
  1.2050 +  ** docListAccumulate() taking care to retain lower-segment
  1.2051 +  ** information in preference to higher-segment information.
  1.2052 +  */
  1.2053 +  /* TODO(shess) Provide a VACUUM type operation which both removes
  1.2054 +  ** deleted elements which are no longer necessary, and duplicated
  1.2055 +  ** elements.  I suspect this will probably not be necessary in
  1.2056 +  ** practice, though.
  1.2057 +  */
  1.2058 +static int fulltextCreate(sqlite3 *db, void *pAux,
  1.2059 +                          int argc, const char * const *argv,
  1.2060 +                          sqlite3_vtab **ppVTab, char **pzErr){
  1.2061 +  int rc;
  1.2062 +  TableSpec spec;
  1.2063 +  StringBuffer schema;
  1.2064 +  TRACE(("FTS1 Create\n"));
  1.2065 +
  1.2066 +  rc = parseSpec(&spec, argc, argv, pzErr);
  1.2067 +  if( rc!=SQLITE_OK ) return rc;
  1.2068 +
  1.2069 +  initStringBuffer(&schema);
  1.2070 +  append(&schema, "CREATE TABLE %_content(");
  1.2071 +  appendList(&schema, spec.nColumn, spec.azContentColumn);
  1.2072 +  append(&schema, ")");
  1.2073 +  rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
  1.2074 +  free(schema.s);
  1.2075 +  if( rc!=SQLITE_OK ) goto out;
  1.2076 +
  1.2077 +  rc = sql_exec(db, spec.zDb, spec.zName,
  1.2078 +    "create table %_term(term text, segment integer, doclist blob, "
  1.2079 +                        "primary key(term, segment));");
  1.2080 +  if( rc!=SQLITE_OK ) goto out;
  1.2081 +
  1.2082 +  rc = constructVtab(db, &spec, ppVTab, pzErr);
  1.2083 +
  1.2084 +out:
  1.2085 +  clearTableSpec(&spec);
  1.2086 +  return rc;
  1.2087 +}
  1.2088 +
  1.2089 +/* Decide how to handle an SQL query. */
  1.2090 +static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
  1.2091 +  int i;
  1.2092 +  TRACE(("FTS1 BestIndex\n"));
  1.2093 +
  1.2094 +  for(i=0; i<pInfo->nConstraint; ++i){
  1.2095 +    const struct sqlite3_index_constraint *pConstraint;
  1.2096 +    pConstraint = &pInfo->aConstraint[i];
  1.2097 +    if( pConstraint->usable ) {
  1.2098 +      if( pConstraint->iColumn==-1 &&
  1.2099 +          pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
  1.2100 +        pInfo->idxNum = QUERY_ROWID;      /* lookup by rowid */
  1.2101 +        TRACE(("FTS1 QUERY_ROWID\n"));
  1.2102 +      } else if( pConstraint->iColumn>=0 &&
  1.2103 +                 pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
  1.2104 +        /* full-text search */
  1.2105 +        pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
  1.2106 +        TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
  1.2107 +      } else continue;
  1.2108 +
  1.2109 +      pInfo->aConstraintUsage[i].argvIndex = 1;
  1.2110 +      pInfo->aConstraintUsage[i].omit = 1;
  1.2111 +
  1.2112 +      /* An arbitrary value for now.
  1.2113 +       * TODO: Perhaps rowid matches should be considered cheaper than
  1.2114 +       * full-text searches. */
  1.2115 +      pInfo->estimatedCost = 1.0;   
  1.2116 +
  1.2117 +      return SQLITE_OK;
  1.2118 +    }
  1.2119 +  }
  1.2120 +  pInfo->idxNum = QUERY_GENERIC;
  1.2121 +  return SQLITE_OK;
  1.2122 +}
  1.2123 +
  1.2124 +static int fulltextDisconnect(sqlite3_vtab *pVTab){
  1.2125 +  TRACE(("FTS1 Disconnect %p\n", pVTab));
  1.2126 +  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
  1.2127 +  return SQLITE_OK;
  1.2128 +}
  1.2129 +
  1.2130 +static int fulltextDestroy(sqlite3_vtab *pVTab){
  1.2131 +  fulltext_vtab *v = (fulltext_vtab *)pVTab;
  1.2132 +  int rc;
  1.2133 +
  1.2134 +  TRACE(("FTS1 Destroy %p\n", pVTab));
  1.2135 +  rc = sql_exec(v->db, v->zDb, v->zName,
  1.2136 +                "drop table if exists %_content;"
  1.2137 +                "drop table if exists %_term;"
  1.2138 +                );
  1.2139 +  if( rc!=SQLITE_OK ) return rc;
  1.2140 +
  1.2141 +  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
  1.2142 +  return SQLITE_OK;
  1.2143 +}
  1.2144 +
  1.2145 +static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
  1.2146 +  fulltext_cursor *c;
  1.2147 +
  1.2148 +  c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
  1.2149 +  /* sqlite will initialize c->base */
  1.2150 +  *ppCursor = &c->base;
  1.2151 +  TRACE(("FTS1 Open %p: %p\n", pVTab, c));
  1.2152 +
  1.2153 +  return SQLITE_OK;
  1.2154 +}
  1.2155 +
  1.2156 +
  1.2157 +/* Free all of the dynamically allocated memory held by *q
  1.2158 +*/
  1.2159 +static void queryClear(Query *q){
  1.2160 +  int i;
  1.2161 +  for(i = 0; i < q->nTerms; ++i){
  1.2162 +    free(q->pTerms[i].pTerm);
  1.2163 +  }
  1.2164 +  free(q->pTerms);
  1.2165 +  memset(q, 0, sizeof(*q));
  1.2166 +}
  1.2167 +
  1.2168 +/* Free all of the dynamically allocated memory held by the
  1.2169 +** Snippet
  1.2170 +*/
  1.2171 +static void snippetClear(Snippet *p){
  1.2172 +  free(p->aMatch);
  1.2173 +  free(p->zOffset);
  1.2174 +  free(p->zSnippet);
  1.2175 +  memset(p, 0, sizeof(*p));
  1.2176 +}
  1.2177 +/*
  1.2178 +** Append a single entry to the p->aMatch[] log.
  1.2179 +*/
  1.2180 +static void snippetAppendMatch(
  1.2181 +  Snippet *p,               /* Append the entry to this snippet */
  1.2182 +  int iCol, int iTerm,      /* The column and query term */
  1.2183 +  int iStart, int nByte     /* Offset and size of the match */
  1.2184 +){
  1.2185 +  int i;
  1.2186 +  struct snippetMatch *pMatch;
  1.2187 +  if( p->nMatch+1>=p->nAlloc ){
  1.2188 +    p->nAlloc = p->nAlloc*2 + 10;
  1.2189 +    p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
  1.2190 +    if( p->aMatch==0 ){
  1.2191 +      p->nMatch = 0;
  1.2192 +      p->nAlloc = 0;
  1.2193 +      return;
  1.2194 +    }
  1.2195 +  }
  1.2196 +  i = p->nMatch++;
  1.2197 +  pMatch = &p->aMatch[i];
  1.2198 +  pMatch->iCol = iCol;
  1.2199 +  pMatch->iTerm = iTerm;
  1.2200 +  pMatch->iStart = iStart;
  1.2201 +  pMatch->nByte = nByte;
  1.2202 +}
  1.2203 +
  1.2204 +/*
  1.2205 +** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
  1.2206 +*/
  1.2207 +#define FTS1_ROTOR_SZ   (32)
  1.2208 +#define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
  1.2209 +
  1.2210 +/*
  1.2211 +** Add entries to pSnippet->aMatch[] for every match that occurs against
  1.2212 +** document zDoc[0..nDoc-1] which is stored in column iColumn.
  1.2213 +*/
  1.2214 +static void snippetOffsetsOfColumn(
  1.2215 +  Query *pQuery,
  1.2216 +  Snippet *pSnippet,
  1.2217 +  int iColumn,
  1.2218 +  const char *zDoc,
  1.2219 +  int nDoc
  1.2220 +){
  1.2221 +  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
  1.2222 +  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
  1.2223 +  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
  1.2224 +  fulltext_vtab *pVtab;                /* The full text index */
  1.2225 +  int nColumn;                         /* Number of columns in the index */
  1.2226 +  const QueryTerm *aTerm;              /* Query string terms */
  1.2227 +  int nTerm;                           /* Number of query string terms */  
  1.2228 +  int i, j;                            /* Loop counters */
  1.2229 +  int rc;                              /* Return code */
  1.2230 +  unsigned int match, prevMatch;       /* Phrase search bitmasks */
  1.2231 +  const char *zToken;                  /* Next token from the tokenizer */
  1.2232 +  int nToken;                          /* Size of zToken */
  1.2233 +  int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
  1.2234 +
  1.2235 +  /* The following variables keep a circular buffer of the last
  1.2236 +  ** few tokens */
  1.2237 +  unsigned int iRotor = 0;             /* Index of current token */
  1.2238 +  int iRotorBegin[FTS1_ROTOR_SZ];      /* Beginning offset of token */
  1.2239 +  int iRotorLen[FTS1_ROTOR_SZ];        /* Length of token */
  1.2240 +
  1.2241 +  pVtab = pQuery->pFts;
  1.2242 +  nColumn = pVtab->nColumn;
  1.2243 +  pTokenizer = pVtab->pTokenizer;
  1.2244 +  pTModule = pTokenizer->pModule;
  1.2245 +  rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
  1.2246 +  if( rc ) return;
  1.2247 +  pTCursor->pTokenizer = pTokenizer;
  1.2248 +  aTerm = pQuery->pTerms;
  1.2249 +  nTerm = pQuery->nTerms;
  1.2250 +  if( nTerm>=FTS1_ROTOR_SZ ){
  1.2251 +    nTerm = FTS1_ROTOR_SZ - 1;
  1.2252 +  }
  1.2253 +  prevMatch = 0;
  1.2254 +  while(1){
  1.2255 +    rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
  1.2256 +    if( rc ) break;
  1.2257 +    iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
  1.2258 +    iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
  1.2259 +    match = 0;
  1.2260 +    for(i=0; i<nTerm; i++){
  1.2261 +      int iCol;
  1.2262 +      iCol = aTerm[i].iColumn;
  1.2263 +      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
  1.2264 +      if( aTerm[i].nTerm!=nToken ) continue;
  1.2265 +      if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
  1.2266 +      if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
  1.2267 +      match |= 1<<i;
  1.2268 +      if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
  1.2269 +        for(j=aTerm[i].iPhrase-1; j>=0; j--){
  1.2270 +          int k = (iRotor-j) & FTS1_ROTOR_MASK;
  1.2271 +          snippetAppendMatch(pSnippet, iColumn, i-j,
  1.2272 +                iRotorBegin[k], iRotorLen[k]);
  1.2273 +        }
  1.2274 +      }
  1.2275 +    }
  1.2276 +    prevMatch = match<<1;
  1.2277 +    iRotor++;
  1.2278 +  }
  1.2279 +  pTModule->xClose(pTCursor);  
  1.2280 +}
  1.2281 +
  1.2282 +
  1.2283 +/*
  1.2284 +** Compute all offsets for the current row of the query.  
  1.2285 +** If the offsets have already been computed, this routine is a no-op.
  1.2286 +*/
  1.2287 +static void snippetAllOffsets(fulltext_cursor *p){
  1.2288 +  int nColumn;
  1.2289 +  int iColumn, i;
  1.2290 +  int iFirst, iLast;
  1.2291 +  fulltext_vtab *pFts;
  1.2292 +
  1.2293 +  if( p->snippet.nMatch ) return;
  1.2294 +  if( p->q.nTerms==0 ) return;
  1.2295 +  pFts = p->q.pFts;
  1.2296 +  nColumn = pFts->nColumn;
  1.2297 +  iColumn = p->iCursorType - QUERY_FULLTEXT;
  1.2298 +  if( iColumn<0 || iColumn>=nColumn ){
  1.2299 +    iFirst = 0;
  1.2300 +    iLast = nColumn-1;
  1.2301 +  }else{
  1.2302 +    iFirst = iColumn;
  1.2303 +    iLast = iColumn;
  1.2304 +  }
  1.2305 +  for(i=iFirst; i<=iLast; i++){
  1.2306 +    const char *zDoc;
  1.2307 +    int nDoc;
  1.2308 +    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
  1.2309 +    nDoc = sqlite3_column_bytes(p->pStmt, i+1);
  1.2310 +    snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
  1.2311 +  }
  1.2312 +}
  1.2313 +
  1.2314 +/*
  1.2315 +** Convert the information in the aMatch[] array of the snippet
  1.2316 +** into the string zOffset[0..nOffset-1].
  1.2317 +*/
  1.2318 +static void snippetOffsetText(Snippet *p){
  1.2319 +  int i;
  1.2320 +  int cnt = 0;
  1.2321 +  StringBuffer sb;
  1.2322 +  char zBuf[200];
  1.2323 +  if( p->zOffset ) return;
  1.2324 +  initStringBuffer(&sb);
  1.2325 +  for(i=0; i<p->nMatch; i++){
  1.2326 +    struct snippetMatch *pMatch = &p->aMatch[i];
  1.2327 +    zBuf[0] = ' ';
  1.2328 +    sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
  1.2329 +        pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
  1.2330 +    append(&sb, zBuf);
  1.2331 +    cnt++;
  1.2332 +  }
  1.2333 +  p->zOffset = sb.s;
  1.2334 +  p->nOffset = sb.len;
  1.2335 +}
  1.2336 +
  1.2337 +/*
  1.2338 +** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
  1.2339 +** of matching words some of which might be in zDoc.  zDoc is column
  1.2340 +** number iCol.
  1.2341 +**
  1.2342 +** iBreak is suggested spot in zDoc where we could begin or end an
  1.2343 +** excerpt.  Return a value similar to iBreak but possibly adjusted
  1.2344 +** to be a little left or right so that the break point is better.
  1.2345 +*/
  1.2346 +static int wordBoundary(
  1.2347 +  int iBreak,                   /* The suggested break point */
  1.2348 +  const char *zDoc,             /* Document text */
  1.2349 +  int nDoc,                     /* Number of bytes in zDoc[] */
  1.2350 +  struct snippetMatch *aMatch,  /* Matching words */
  1.2351 +  int nMatch,                   /* Number of entries in aMatch[] */
  1.2352 +  int iCol                      /* The column number for zDoc[] */
  1.2353 +){
  1.2354 +  int i;
  1.2355 +  if( iBreak<=10 ){
  1.2356 +    return 0;
  1.2357 +  }
  1.2358 +  if( iBreak>=nDoc-10 ){
  1.2359 +    return nDoc;
  1.2360 +  }
  1.2361 +  for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
  1.2362 +  while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
  1.2363 +  if( i<nMatch ){
  1.2364 +    if( aMatch[i].iStart<iBreak+10 ){
  1.2365 +      return aMatch[i].iStart;
  1.2366 +    }
  1.2367 +    if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
  1.2368 +      return aMatch[i-1].iStart;
  1.2369 +    }
  1.2370 +  }
  1.2371 +  for(i=1; i<=10; i++){
  1.2372 +    if( safe_isspace(zDoc[iBreak-i]) ){
  1.2373 +      return iBreak - i + 1;
  1.2374 +    }
  1.2375 +    if( safe_isspace(zDoc[iBreak+i]) ){
  1.2376 +      return iBreak + i + 1;
  1.2377 +    }
  1.2378 +  }
  1.2379 +  return iBreak;
  1.2380 +}
  1.2381 +
  1.2382 +/*
  1.2383 +** If the StringBuffer does not end in white space, add a single
  1.2384 +** space character to the end.
  1.2385 +*/
  1.2386 +static void appendWhiteSpace(StringBuffer *p){
  1.2387 +  if( p->len==0 ) return;
  1.2388 +  if( safe_isspace(p->s[p->len-1]) ) return;
  1.2389 +  append(p, " ");
  1.2390 +}
  1.2391 +
  1.2392 +/*
  1.2393 +** Remove white space from teh end of the StringBuffer
  1.2394 +*/
  1.2395 +static void trimWhiteSpace(StringBuffer *p){
  1.2396 +  while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
  1.2397 +    p->len--;
  1.2398 +  }
  1.2399 +}
  1.2400 +
  1.2401 +
  1.2402 +
  1.2403 +/*
  1.2404 +** Allowed values for Snippet.aMatch[].snStatus
  1.2405 +*/
  1.2406 +#define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
  1.2407 +#define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
  1.2408 +
  1.2409 +/*
  1.2410 +** Generate the text of a snippet.
  1.2411 +*/
  1.2412 +static void snippetText(
  1.2413 +  fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
  1.2414 +  const char *zStartMark,     /* Markup to appear before each match */
  1.2415 +  const char *zEndMark,       /* Markup to appear after each match */
  1.2416 +  const char *zEllipsis       /* Ellipsis mark */
  1.2417 +){
  1.2418 +  int i, j;
  1.2419 +  struct snippetMatch *aMatch;
  1.2420 +  int nMatch;
  1.2421 +  int nDesired;
  1.2422 +  StringBuffer sb;
  1.2423 +  int tailCol;
  1.2424 +  int tailOffset;
  1.2425 +  int iCol;
  1.2426 +  int nDoc;
  1.2427 +  const char *zDoc;
  1.2428 +  int iStart, iEnd;
  1.2429 +  int tailEllipsis = 0;
  1.2430 +  int iMatch;
  1.2431 +  
  1.2432 +
  1.2433 +  free(pCursor->snippet.zSnippet);
  1.2434 +  pCursor->snippet.zSnippet = 0;
  1.2435 +  aMatch = pCursor->snippet.aMatch;
  1.2436 +  nMatch = pCursor->snippet.nMatch;
  1.2437 +  initStringBuffer(&sb);
  1.2438 +
  1.2439 +  for(i=0; i<nMatch; i++){
  1.2440 +    aMatch[i].snStatus = SNIPPET_IGNORE;
  1.2441 +  }
  1.2442 +  nDesired = 0;
  1.2443 +  for(i=0; i<pCursor->q.nTerms; i++){
  1.2444 +    for(j=0; j<nMatch; j++){
  1.2445 +      if( aMatch[j].iTerm==i ){
  1.2446 +        aMatch[j].snStatus = SNIPPET_DESIRED;
  1.2447 +        nDesired++;
  1.2448 +        break;
  1.2449 +      }
  1.2450 +    }
  1.2451 +  }
  1.2452 +
  1.2453 +  iMatch = 0;
  1.2454 +  tailCol = -1;
  1.2455 +  tailOffset = 0;
  1.2456 +  for(i=0; i<nMatch && nDesired>0; i++){
  1.2457 +    if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
  1.2458 +    nDesired--;
  1.2459 +    iCol = aMatch[i].iCol;
  1.2460 +    zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
  1.2461 +    nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
  1.2462 +    iStart = aMatch[i].iStart - 40;
  1.2463 +    iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
  1.2464 +    if( iStart<=10 ){
  1.2465 +      iStart = 0;
  1.2466 +    }
  1.2467 +    if( iCol==tailCol && iStart<=tailOffset+20 ){
  1.2468 +      iStart = tailOffset;
  1.2469 +    }
  1.2470 +    if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
  1.2471 +      trimWhiteSpace(&sb);
  1.2472 +      appendWhiteSpace(&sb);
  1.2473 +      append(&sb, zEllipsis);
  1.2474 +      appendWhiteSpace(&sb);
  1.2475 +    }
  1.2476 +    iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
  1.2477 +    iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
  1.2478 +    if( iEnd>=nDoc-10 ){
  1.2479 +      iEnd = nDoc;
  1.2480 +      tailEllipsis = 0;
  1.2481 +    }else{
  1.2482 +      tailEllipsis = 1;
  1.2483 +    }
  1.2484 +    while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
  1.2485 +    while( iStart<iEnd ){
  1.2486 +      while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
  1.2487 +             && aMatch[iMatch].iCol<=iCol ){
  1.2488 +        iMatch++;
  1.2489 +      }
  1.2490 +      if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
  1.2491 +             && aMatch[iMatch].iCol==iCol ){
  1.2492 +        nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
  1.2493 +        iStart = aMatch[iMatch].iStart;
  1.2494 +        append(&sb, zStartMark);
  1.2495 +        nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
  1.2496 +        append(&sb, zEndMark);
  1.2497 +        iStart += aMatch[iMatch].nByte;
  1.2498 +        for(j=iMatch+1; j<nMatch; j++){
  1.2499 +          if( aMatch[j].iTerm==aMatch[iMatch].iTerm
  1.2500 +              && aMatch[j].snStatus==SNIPPET_DESIRED ){
  1.2501 +            nDesired--;
  1.2502 +            aMatch[j].snStatus = SNIPPET_IGNORE;
  1.2503 +          }
  1.2504 +        }
  1.2505 +      }else{
  1.2506 +        nappend(&sb, &zDoc[iStart], iEnd - iStart);
  1.2507 +        iStart = iEnd;
  1.2508 +      }
  1.2509 +    }
  1.2510 +    tailCol = iCol;
  1.2511 +    tailOffset = iEnd;
  1.2512 +  }
  1.2513 +  trimWhiteSpace(&sb);
  1.2514 +  if( tailEllipsis ){
  1.2515 +    appendWhiteSpace(&sb);
  1.2516 +    append(&sb, zEllipsis);
  1.2517 +  }
  1.2518 +  pCursor->snippet.zSnippet = sb.s;
  1.2519 +  pCursor->snippet.nSnippet = sb.len;  
  1.2520 +}
  1.2521 +
  1.2522 +
  1.2523 +/*
  1.2524 +** Close the cursor.  For additional information see the documentation
  1.2525 +** on the xClose method of the virtual table interface.
  1.2526 +*/
  1.2527 +static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  1.2528 +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  1.2529 +  TRACE(("FTS1 Close %p\n", c));
  1.2530 +  sqlite3_finalize(c->pStmt);
  1.2531 +  queryClear(&c->q);
  1.2532 +  snippetClear(&c->snippet);
  1.2533 +  if( c->result.pDoclist!=NULL ){
  1.2534 +    docListDelete(c->result.pDoclist);
  1.2535 +  }
  1.2536 +  free(c);
  1.2537 +  return SQLITE_OK;
  1.2538 +}
  1.2539 +
  1.2540 +static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  1.2541 +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  1.2542 +  sqlite_int64 iDocid;
  1.2543 +  int rc;
  1.2544 +
  1.2545 +  TRACE(("FTS1 Next %p\n", pCursor));
  1.2546 +  snippetClear(&c->snippet);
  1.2547 +  if( c->iCursorType < QUERY_FULLTEXT ){
  1.2548 +    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
  1.2549 +    rc = sqlite3_step(c->pStmt);
  1.2550 +    switch( rc ){
  1.2551 +      case SQLITE_ROW:
  1.2552 +        c->eof = 0;
  1.2553 +        return SQLITE_OK;
  1.2554 +      case SQLITE_DONE:
  1.2555 +        c->eof = 1;
  1.2556 +        return SQLITE_OK;
  1.2557 +      default:
  1.2558 +        c->eof = 1;
  1.2559 +        return rc;
  1.2560 +    }
  1.2561 +  } else {  /* full-text query */
  1.2562 +    rc = sqlite3_reset(c->pStmt);
  1.2563 +    if( rc!=SQLITE_OK ) return rc;
  1.2564 +
  1.2565 +    iDocid = nextDocid(&c->result);
  1.2566 +    if( iDocid==0 ){
  1.2567 +      c->eof = 1;
  1.2568 +      return SQLITE_OK;
  1.2569 +    }
  1.2570 +    rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
  1.2571 +    if( rc!=SQLITE_OK ) return rc;
  1.2572 +    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
  1.2573 +    rc = sqlite3_step(c->pStmt);
  1.2574 +    if( rc==SQLITE_ROW ){   /* the case we expect */
  1.2575 +      c->eof = 0;
  1.2576 +      return SQLITE_OK;
  1.2577 +    }
  1.2578 +    /* an error occurred; abort */
  1.2579 +    return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
  1.2580 +  }
  1.2581 +}
  1.2582 +
  1.2583 +
  1.2584 +/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
  1.2585 +** is the first term of a phrase query, go ahead and evaluate the phrase
  1.2586 +** query and return the doclist for the entire phrase query.
  1.2587 +**
  1.2588 +** The result is stored in pTerm->doclist.
  1.2589 +*/
  1.2590 +static int docListOfTerm(
  1.2591 +  fulltext_vtab *v,     /* The full text index */
  1.2592 +  int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
  1.2593 +  QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
  1.2594 +  DocList **ppResult    /* Write the result here */
  1.2595 +){
  1.2596 +  DocList *pLeft, *pRight, *pNew;
  1.2597 +  int i, rc;
  1.2598 +
  1.2599 +  pLeft = docListNew(DL_POSITIONS);
  1.2600 +  rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
  1.2601 +  if( rc ){
  1.2602 +    docListDelete(pLeft);
  1.2603 +    return rc;
  1.2604 +  }
  1.2605 +  for(i=1; i<=pQTerm->nPhrase; i++){
  1.2606 +    pRight = docListNew(DL_POSITIONS);
  1.2607 +    rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
  1.2608 +    if( rc ){
  1.2609 +      docListDelete(pLeft);
  1.2610 +      return rc;
  1.2611 +    }
  1.2612 +    pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
  1.2613 +    docListPhraseMerge(pLeft, pRight, pNew);
  1.2614 +    docListDelete(pLeft);
  1.2615 +    docListDelete(pRight);
  1.2616 +    pLeft = pNew;
  1.2617 +  }
  1.2618 +  *ppResult = pLeft;
  1.2619 +  return SQLITE_OK;
  1.2620 +}
  1.2621 +
  1.2622 +/* Add a new term pTerm[0..nTerm-1] to the query *q.
  1.2623 +*/
  1.2624 +static void queryAdd(Query *q, const char *pTerm, int nTerm){
  1.2625 +  QueryTerm *t;
  1.2626 +  ++q->nTerms;
  1.2627 +  q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
  1.2628 +  if( q->pTerms==0 ){
  1.2629 +    q->nTerms = 0;
  1.2630 +    return;
  1.2631 +  }
  1.2632 +  t = &q->pTerms[q->nTerms - 1];
  1.2633 +  memset(t, 0, sizeof(*t));
  1.2634 +  t->pTerm = malloc(nTerm+1);
  1.2635 +  memcpy(t->pTerm, pTerm, nTerm);
  1.2636 +  t->pTerm[nTerm] = 0;
  1.2637 +  t->nTerm = nTerm;
  1.2638 +  t->isOr = q->nextIsOr;
  1.2639 +  q->nextIsOr = 0;
  1.2640 +  t->iColumn = q->nextColumn;
  1.2641 +  q->nextColumn = q->dfltColumn;
  1.2642 +}
  1.2643 +
  1.2644 +/*
  1.2645 +** Check to see if the string zToken[0...nToken-1] matches any
  1.2646 +** column name in the virtual table.   If it does,
  1.2647 +** return the zero-indexed column number.  If not, return -1.
  1.2648 +*/
  1.2649 +static int checkColumnSpecifier(
  1.2650 +  fulltext_vtab *pVtab,    /* The virtual table */
  1.2651 +  const char *zToken,      /* Text of the token */
  1.2652 +  int nToken               /* Number of characters in the token */
  1.2653 +){
  1.2654 +  int i;
  1.2655 +  for(i=0; i<pVtab->nColumn; i++){
  1.2656 +    if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
  1.2657 +        && pVtab->azColumn[i][nToken]==0 ){
  1.2658 +      return i;
  1.2659 +    }
  1.2660 +  }
  1.2661 +  return -1;
  1.2662 +}
  1.2663 +
  1.2664 +/*
  1.2665 +** Parse the text at pSegment[0..nSegment-1].  Add additional terms
  1.2666 +** to the query being assemblied in pQuery.
  1.2667 +**
  1.2668 +** inPhrase is true if pSegment[0..nSegement-1] is contained within
  1.2669 +** double-quotes.  If inPhrase is true, then the first term
  1.2670 +** is marked with the number of terms in the phrase less one and
  1.2671 +** OR and "-" syntax is ignored.  If inPhrase is false, then every
  1.2672 +** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
  1.2673 +*/
  1.2674 +static int tokenizeSegment(
  1.2675 +  sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
  1.2676 +  const char *pSegment, int nSegment,     /* Query expression being parsed */
  1.2677 +  int inPhrase,                           /* True if within "..." */
  1.2678 +  Query *pQuery                           /* Append results here */
  1.2679 +){
  1.2680 +  const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
  1.2681 +  sqlite3_tokenizer_cursor *pCursor;
  1.2682 +  int firstIndex = pQuery->nTerms;
  1.2683 +  int iCol;
  1.2684 +  int nTerm = 1;
  1.2685 +  
  1.2686 +  int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
  1.2687 +  if( rc!=SQLITE_OK ) return rc;
  1.2688 +  pCursor->pTokenizer = pTokenizer;
  1.2689 +
  1.2690 +  while( 1 ){
  1.2691 +    const char *pToken;
  1.2692 +    int nToken, iBegin, iEnd, iPos;
  1.2693 +
  1.2694 +    rc = pModule->xNext(pCursor,
  1.2695 +                        &pToken, &nToken,
  1.2696 +                        &iBegin, &iEnd, &iPos);
  1.2697 +    if( rc!=SQLITE_OK ) break;
  1.2698 +    if( !inPhrase &&
  1.2699 +        pSegment[iEnd]==':' &&
  1.2700 +         (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
  1.2701 +      pQuery->nextColumn = iCol;
  1.2702 +      continue;
  1.2703 +    }
  1.2704 +    if( !inPhrase && pQuery->nTerms>0 && nToken==2
  1.2705 +         && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
  1.2706 +      pQuery->nextIsOr = 1;
  1.2707 +      continue;
  1.2708 +    }
  1.2709 +    queryAdd(pQuery, pToken, nToken);
  1.2710 +    if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
  1.2711 +      pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
  1.2712 +    }
  1.2713 +    pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
  1.2714 +    if( inPhrase ){
  1.2715 +      nTerm++;
  1.2716 +    }
  1.2717 +  }
  1.2718 +
  1.2719 +  if( inPhrase && pQuery->nTerms>firstIndex ){
  1.2720 +    pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
  1.2721 +  }
  1.2722 +
  1.2723 +  return pModule->xClose(pCursor);
  1.2724 +}
  1.2725 +
  1.2726 +/* Parse a query string, yielding a Query object pQuery.
  1.2727 +**
  1.2728 +** The calling function will need to queryClear() to clean up
  1.2729 +** the dynamically allocated memory held by pQuery.
  1.2730 +*/
  1.2731 +static int parseQuery(
  1.2732 +  fulltext_vtab *v,        /* The fulltext index */
  1.2733 +  const char *zInput,      /* Input text of the query string */
  1.2734 +  int nInput,              /* Size of the input text */
  1.2735 +  int dfltColumn,          /* Default column of the index to match against */
  1.2736 +  Query *pQuery            /* Write the parse results here. */
  1.2737 +){
  1.2738 +  int iInput, inPhrase = 0;
  1.2739 +
  1.2740 +  if( zInput==0 ) nInput = 0;
  1.2741 +  if( nInput<0 ) nInput = strlen(zInput);
  1.2742 +  pQuery->nTerms = 0;
  1.2743 +  pQuery->pTerms = NULL;
  1.2744 +  pQuery->nextIsOr = 0;
  1.2745 +  pQuery->nextColumn = dfltColumn;
  1.2746 +  pQuery->dfltColumn = dfltColumn;
  1.2747 +  pQuery->pFts = v;
  1.2748 +
  1.2749 +  for(iInput=0; iInput<nInput; ++iInput){
  1.2750 +    int i;
  1.2751 +    for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
  1.2752 +    if( i>iInput ){
  1.2753 +      tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
  1.2754 +                       pQuery);
  1.2755 +    }
  1.2756 +    iInput = i;
  1.2757 +    if( i<nInput ){
  1.2758 +      assert( zInput[i]=='"' );
  1.2759 +      inPhrase = !inPhrase;
  1.2760 +    }
  1.2761 +  }
  1.2762 +
  1.2763 +  if( inPhrase ){
  1.2764 +    /* unmatched quote */
  1.2765 +    queryClear(pQuery);
  1.2766 +    return SQLITE_ERROR;
  1.2767 +  }
  1.2768 +  return SQLITE_OK;
  1.2769 +}
  1.2770 +
  1.2771 +/* Perform a full-text query using the search expression in
  1.2772 +** zInput[0..nInput-1].  Return a list of matching documents
  1.2773 +** in pResult.
  1.2774 +**
  1.2775 +** Queries must match column iColumn.  Or if iColumn>=nColumn
  1.2776 +** they are allowed to match against any column.
  1.2777 +*/
  1.2778 +static int fulltextQuery(
  1.2779 +  fulltext_vtab *v,      /* The full text index */
  1.2780 +  int iColumn,           /* Match against this column by default */
  1.2781 +  const char *zInput,    /* The query string */
  1.2782 +  int nInput,            /* Number of bytes in zInput[] */
  1.2783 +  DocList **pResult,     /* Write the result doclist here */
  1.2784 +  Query *pQuery          /* Put parsed query string here */
  1.2785 +){
  1.2786 +  int i, iNext, rc;
  1.2787 +  DocList *pLeft = NULL;
  1.2788 +  DocList *pRight, *pNew, *pOr;
  1.2789 +  int nNot = 0;
  1.2790 +  QueryTerm *aTerm;
  1.2791 +
  1.2792 +  rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
  1.2793 +  if( rc!=SQLITE_OK ) return rc;
  1.2794 +
  1.2795 +  /* Merge AND terms. */
  1.2796 +  aTerm = pQuery->pTerms;
  1.2797 +  for(i = 0; i<pQuery->nTerms; i=iNext){
  1.2798 +    if( aTerm[i].isNot ){
  1.2799 +      /* Handle all NOT terms in a separate pass */
  1.2800 +      nNot++;
  1.2801 +      iNext = i + aTerm[i].nPhrase+1;
  1.2802 +      continue;
  1.2803 +    }
  1.2804 +    iNext = i + aTerm[i].nPhrase + 1;
  1.2805 +    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
  1.2806 +    if( rc ){
  1.2807 +      queryClear(pQuery);
  1.2808 +      return rc;
  1.2809 +    }
  1.2810 +    while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
  1.2811 +      rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
  1.2812 +      iNext += aTerm[iNext].nPhrase + 1;
  1.2813 +      if( rc ){
  1.2814 +        queryClear(pQuery);
  1.2815 +        return rc;
  1.2816 +      }
  1.2817 +      pNew = docListNew(DL_DOCIDS);
  1.2818 +      docListOrMerge(pRight, pOr, pNew);
  1.2819 +      docListDelete(pRight);
  1.2820 +      docListDelete(pOr);
  1.2821 +      pRight = pNew;
  1.2822 +    }
  1.2823 +    if( pLeft==0 ){
  1.2824 +      pLeft = pRight;
  1.2825 +    }else{
  1.2826 +      pNew = docListNew(DL_DOCIDS);
  1.2827 +      docListAndMerge(pLeft, pRight, pNew);
  1.2828 +      docListDelete(pRight);
  1.2829 +      docListDelete(pLeft);
  1.2830 +      pLeft = pNew;
  1.2831 +    }
  1.2832 +  }
  1.2833 +
  1.2834 +  if( nNot && pLeft==0 ){
  1.2835 +    /* We do not yet know how to handle a query of only NOT terms */
  1.2836 +    return SQLITE_ERROR;
  1.2837 +  }
  1.2838 +
  1.2839 +  /* Do the EXCEPT terms */
  1.2840 +  for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
  1.2841 +    if( !aTerm[i].isNot ) continue;
  1.2842 +    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
  1.2843 +    if( rc ){
  1.2844 +      queryClear(pQuery);
  1.2845 +      docListDelete(pLeft);
  1.2846 +      return rc;
  1.2847 +    }
  1.2848 +    pNew = docListNew(DL_DOCIDS);
  1.2849 +    docListExceptMerge(pLeft, pRight, pNew);
  1.2850 +    docListDelete(pRight);
  1.2851 +    docListDelete(pLeft);
  1.2852 +    pLeft = pNew;
  1.2853 +  }
  1.2854 +
  1.2855 +  *pResult = pLeft;
  1.2856 +  return rc;
  1.2857 +}
  1.2858 +
  1.2859 +/*
  1.2860 +** This is the xFilter interface for the virtual table.  See
  1.2861 +** the virtual table xFilter method documentation for additional
  1.2862 +** information.
  1.2863 +**
  1.2864 +** If idxNum==QUERY_GENERIC then do a full table scan against
  1.2865 +** the %_content table.
  1.2866 +**
  1.2867 +** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
  1.2868 +** in the %_content table.
  1.2869 +**
  1.2870 +** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
  1.2871 +** column on the left-hand side of the MATCH operator is column
  1.2872 +** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
  1.2873 +** side of the MATCH operator.
  1.2874 +*/
  1.2875 +/* TODO(shess) Upgrade the cursor initialization and destruction to
  1.2876 +** account for fulltextFilter() being called multiple times on the
  1.2877 +** same cursor.  The current solution is very fragile.  Apply fix to
  1.2878 +** fts2 as appropriate.
  1.2879 +*/
  1.2880 +static int fulltextFilter(
  1.2881 +  sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
  1.2882 +  int idxNum, const char *idxStr,   /* Which indexing scheme to use */
  1.2883 +  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
  1.2884 +){
  1.2885 +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  1.2886 +  fulltext_vtab *v = cursor_vtab(c);
  1.2887 +  int rc;
  1.2888 +  char *zSql;
  1.2889 +
  1.2890 +  TRACE(("FTS1 Filter %p\n",pCursor));
  1.2891 +
  1.2892 +  zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
  1.2893 +                          idxNum==QUERY_GENERIC ? "" : "where rowid=?");
  1.2894 +  sqlite3_finalize(c->pStmt);
  1.2895 +  rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
  1.2896 +  sqlite3_free(zSql);
  1.2897 +  if( rc!=SQLITE_OK ) return rc;
  1.2898 +
  1.2899 +  c->iCursorType = idxNum;
  1.2900 +  switch( idxNum ){
  1.2901 +    case QUERY_GENERIC:
  1.2902 +      break;
  1.2903 +
  1.2904 +    case QUERY_ROWID:
  1.2905 +      rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
  1.2906 +      if( rc!=SQLITE_OK ) return rc;
  1.2907 +      break;
  1.2908 +
  1.2909 +    default:   /* full-text search */
  1.2910 +    {
  1.2911 +      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
  1.2912 +      DocList *pResult;
  1.2913 +      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
  1.2914 +      assert( argc==1 );
  1.2915 +      queryClear(&c->q);
  1.2916 +      rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
  1.2917 +      if( rc!=SQLITE_OK ) return rc;
  1.2918 +      if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
  1.2919 +      readerInit(&c->result, pResult);
  1.2920 +      break;
  1.2921 +    }
  1.2922 +  }
  1.2923 +
  1.2924 +  return fulltextNext(pCursor);
  1.2925 +}
  1.2926 +
  1.2927 +/* This is the xEof method of the virtual table.  The SQLite core
  1.2928 +** calls this routine to find out if it has reached the end of
  1.2929 +** a query's results set.
  1.2930 +*/
  1.2931 +static int fulltextEof(sqlite3_vtab_cursor *pCursor){
  1.2932 +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  1.2933 +  return c->eof;
  1.2934 +}
  1.2935 +
  1.2936 +/* This is the xColumn method of the virtual table.  The SQLite
  1.2937 +** core calls this method during a query when it needs the value
  1.2938 +** of a column from the virtual table.  This method needs to use
  1.2939 +** one of the sqlite3_result_*() routines to store the requested
  1.2940 +** value back in the pContext.
  1.2941 +*/
  1.2942 +static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
  1.2943 +                          sqlite3_context *pContext, int idxCol){
  1.2944 +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  1.2945 +  fulltext_vtab *v = cursor_vtab(c);
  1.2946 +
  1.2947 +  if( idxCol<v->nColumn ){
  1.2948 +    sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
  1.2949 +    sqlite3_result_value(pContext, pVal);
  1.2950 +  }else if( idxCol==v->nColumn ){
  1.2951 +    /* The extra column whose name is the same as the table.
  1.2952 +    ** Return a blob which is a pointer to the cursor
  1.2953 +    */
  1.2954 +    sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
  1.2955 +  }
  1.2956 +  return SQLITE_OK;
  1.2957 +}
  1.2958 +
  1.2959 +/* This is the xRowid method.  The SQLite core calls this routine to
  1.2960 +** retrive the rowid for the current row of the result set.  The
  1.2961 +** rowid should be written to *pRowid.
  1.2962 +*/
  1.2963 +static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  1.2964 +  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  1.2965 +
  1.2966 +  *pRowid = sqlite3_column_int64(c->pStmt, 0);
  1.2967 +  return SQLITE_OK;
  1.2968 +}
  1.2969 +
  1.2970 +/* Add all terms in [zText] to the given hash table.  If [iColumn] > 0,
  1.2971 + * we also store positions and offsets in the hash table using the given
  1.2972 + * column number. */
  1.2973 +static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
  1.2974 +                      const char *zText, int iColumn){
  1.2975 +  sqlite3_tokenizer *pTokenizer = v->pTokenizer;
  1.2976 +  sqlite3_tokenizer_cursor *pCursor;
  1.2977 +  const char *pToken;
  1.2978 +  int nTokenBytes;
  1.2979 +  int iStartOffset, iEndOffset, iPosition;
  1.2980 +  int rc;
  1.2981 +
  1.2982 +  rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
  1.2983 +  if( rc!=SQLITE_OK ) return rc;
  1.2984 +
  1.2985 +  pCursor->pTokenizer = pTokenizer;
  1.2986 +  while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
  1.2987 +                                               &pToken, &nTokenBytes,
  1.2988 +                                               &iStartOffset, &iEndOffset,
  1.2989 +                                               &iPosition) ){
  1.2990 +    DocList *p;
  1.2991 +
  1.2992 +    /* Positions can't be negative; we use -1 as a terminator internally. */
  1.2993 +    if( iPosition<0 ){
  1.2994 +      pTokenizer->pModule->xClose(pCursor);
  1.2995 +      return SQLITE_ERROR;
  1.2996 +    }
  1.2997 +
  1.2998 +    p = fts1HashFind(terms, pToken, nTokenBytes);
  1.2999 +    if( p==NULL ){
  1.3000 +      p = docListNew(DL_DEFAULT);
  1.3001 +      docListAddDocid(p, iDocid);
  1.3002 +      fts1HashInsert(terms, pToken, nTokenBytes, p);
  1.3003 +    }
  1.3004 +    if( iColumn>=0 ){
  1.3005 +      docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
  1.3006 +    }
  1.3007 +  }
  1.3008 +
  1.3009 +  /* TODO(shess) Check return?  Should this be able to cause errors at
  1.3010 +  ** this point?  Actually, same question about sqlite3_finalize(),
  1.3011 +  ** though one could argue that failure there means that the data is
  1.3012 +  ** not durable.  *ponder*
  1.3013 +  */
  1.3014 +  pTokenizer->pModule->xClose(pCursor);
  1.3015 +  return rc;
  1.3016 +}
  1.3017 +
  1.3018 +/* Update the %_terms table to map the term [pTerm] to the given rowid. */
  1.3019 +static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
  1.3020 +                             DocList *d){
  1.3021 +  sqlite_int64 iIndexRow;
  1.3022 +  DocList doclist;
  1.3023 +  int iSegment = 0, rc;
  1.3024 +
  1.3025 +  rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
  1.3026 +  if( rc==SQLITE_DONE ){
  1.3027 +    docListInit(&doclist, DL_DEFAULT, 0, 0);
  1.3028 +    docListUpdate(&doclist, d);
  1.3029 +    /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
  1.3030 +    rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
  1.3031 +    goto err;
  1.3032 +  }
  1.3033 +  if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
  1.3034 +
  1.3035 +  docListUpdate(&doclist, d);
  1.3036 +  if( doclist.nData<=CHUNK_MAX ){
  1.3037 +    rc = term_update(v, iIndexRow, &doclist);
  1.3038 +    goto err;
  1.3039 +  }
  1.3040 +
  1.3041 +  /* Doclist doesn't fit, delete what's there, and accumulate
  1.3042 +  ** forward.
  1.3043 +  */
  1.3044 +  rc = term_delete(v, iIndexRow);
  1.3045 +  if( rc!=SQLITE_OK ) goto err;
  1.3046 +
  1.3047 +  /* Try to insert the doclist into a higher segment bucket.  On
  1.3048 +  ** failure, accumulate existing doclist with the doclist from that
  1.3049 +  ** bucket, and put results in the next bucket.
  1.3050 +  */
  1.3051 +  iSegment++;
  1.3052 +  while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
  1.3053 +                         &doclist))!=SQLITE_OK ){
  1.3054 +    sqlite_int64 iSegmentRow;
  1.3055 +    DocList old;
  1.3056 +    int rc2;
  1.3057 +
  1.3058 +    /* Retain old error in case the term_insert() error was really an
  1.3059 +    ** error rather than a bounced insert.
  1.3060 +    */
  1.3061 +    rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
  1.3062 +    if( rc2!=SQLITE_ROW ) goto err;
  1.3063 +
  1.3064 +    rc = term_delete(v, iSegmentRow);
  1.3065 +    if( rc!=SQLITE_OK ) goto err;
  1.3066 +
  1.3067 +    /* Reusing lowest-number deleted row keeps the index smaller. */
  1.3068 +    if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
  1.3069 +
  1.3070 +    /* doclist contains the newer data, so accumulate it over old.
  1.3071 +    ** Then steal accumulated data for doclist.
  1.3072 +    */
  1.3073 +    docListAccumulate(&old, &doclist);
  1.3074 +    docListDestroy(&doclist);
  1.3075 +    doclist = old;
  1.3076 +
  1.3077 +    iSegment++;
  1.3078 +  }
  1.3079 +
  1.3080 + err:
  1.3081 +  docListDestroy(&doclist);
  1.3082 +  return rc;
  1.3083 +}
  1.3084 +
  1.3085 +/* Add doclists for all terms in [pValues] to the hash table [terms]. */
  1.3086 +static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
  1.3087 +                sqlite3_value **pValues){
  1.3088 +  int i;
  1.3089 +  for(i = 0; i < v->nColumn ; ++i){
  1.3090 +    char *zText = (char*)sqlite3_value_text(pValues[i]);
  1.3091 +    int rc = buildTerms(v, terms, iRowid, zText, i);
  1.3092 +    if( rc!=SQLITE_OK ) return rc;
  1.3093 +  }
  1.3094 +  return SQLITE_OK;
  1.3095 +}
  1.3096 +
  1.3097 +/* Add empty doclists for all terms in the given row's content to the hash
  1.3098 + * table [pTerms]. */
  1.3099 +static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
  1.3100 +  const char **pValues;
  1.3101 +  int i;
  1.3102 +
  1.3103 +  int rc = content_select(v, iRowid, &pValues);
  1.3104 +  if( rc!=SQLITE_OK ) return rc;
  1.3105 +
  1.3106 +  for(i = 0 ; i < v->nColumn; ++i) {
  1.3107 +    rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
  1.3108 +    if( rc!=SQLITE_OK ) break;
  1.3109 +  }
  1.3110 +
  1.3111 +  freeStringArray(v->nColumn, pValues);
  1.3112 +  return SQLITE_OK;
  1.3113 +}
  1.3114 +
  1.3115 +/* Insert a row into the %_content table; set *piRowid to be the ID of the
  1.3116 + * new row.  Fill [pTerms] with new doclists for the %_term table. */
  1.3117 +static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
  1.3118 +                        sqlite3_value **pValues,
  1.3119 +                        sqlite_int64 *piRowid, fts1Hash *pTerms){
  1.3120 +  int rc;
  1.3121 +
  1.3122 +  rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
  1.3123 +  if( rc!=SQLITE_OK ) return rc;
  1.3124 +  *piRowid = sqlite3_last_insert_rowid(v->db);
  1.3125 +  return insertTerms(v, pTerms, *piRowid, pValues);
  1.3126 +}
  1.3127 +
  1.3128 +/* Delete a row from the %_content table; fill [pTerms] with empty doclists
  1.3129 + * to be written to the %_term table. */
  1.3130 +static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
  1.3131 +  int rc = deleteTerms(v, pTerms, iRow);
  1.3132 +  if( rc!=SQLITE_OK ) return rc;
  1.3133 +  return content_delete(v, iRow);  /* execute an SQL DELETE */
  1.3134 +}
  1.3135 +
  1.3136 +/* Update a row in the %_content table; fill [pTerms] with new doclists for the
  1.3137 + * %_term table. */
  1.3138 +static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
  1.3139 +                        sqlite3_value **pValues, fts1Hash *pTerms){
  1.3140 +  /* Generate an empty doclist for each term that previously appeared in this
  1.3141 +   * row. */
  1.3142 +  int rc = deleteTerms(v, pTerms, iRow);
  1.3143 +  if( rc!=SQLITE_OK ) return rc;
  1.3144 +
  1.3145 +  rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
  1.3146 +  if( rc!=SQLITE_OK ) return rc;
  1.3147 +
  1.3148 +  /* Now add positions for terms which appear in the updated row. */
  1.3149 +  return insertTerms(v, pTerms, iRow, pValues);
  1.3150 +}
  1.3151 +
  1.3152 +/* This function implements the xUpdate callback; it is the top-level entry
  1.3153 + * point for inserting, deleting or updating a row in a full-text table. */
  1.3154 +static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
  1.3155 +                   sqlite_int64 *pRowid){
  1.3156 +  fulltext_vtab *v = (fulltext_vtab *) pVtab;
  1.3157 +  fts1Hash terms;   /* maps term string -> PosList */
  1.3158 +  int rc;
  1.3159 +  fts1HashElem *e;
  1.3160 +
  1.3161 +  TRACE(("FTS1 Update %p\n", pVtab));
  1.3162 +  
  1.3163 +  fts1HashInit(&terms, FTS1_HASH_STRING, 1);
  1.3164 +
  1.3165 +  if( nArg<2 ){
  1.3166 +    rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
  1.3167 +  } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
  1.3168 +    /* An update:
  1.3169 +     * ppArg[0] = old rowid
  1.3170 +     * ppArg[1] = new rowid
  1.3171 +     * ppArg[2..2+v->nColumn-1] = values
  1.3172 +     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  1.3173 +     */
  1.3174 +    sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
  1.3175 +    if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
  1.3176 +      sqlite3_value_int64(ppArg[1]) != rowid ){
  1.3177 +      rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
  1.3178 +    } else {
  1.3179 +      assert( nArg==2+v->nColumn+1);
  1.3180 +      rc = index_update(v, rowid, &ppArg[2], &terms);
  1.3181 +    }
  1.3182 +  } else {
  1.3183 +    /* An insert:
  1.3184 +     * ppArg[1] = requested rowid
  1.3185 +     * ppArg[2..2+v->nColumn-1] = values
  1.3186 +     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  1.3187 +     */
  1.3188 +    assert( nArg==2+v->nColumn+1);
  1.3189 +    rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
  1.3190 +  }
  1.3191 +
  1.3192 +  if( rc==SQLITE_OK ){
  1.3193 +    /* Write updated doclists to disk. */
  1.3194 +    for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
  1.3195 +      DocList *p = fts1HashData(e);
  1.3196 +      rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
  1.3197 +      if( rc!=SQLITE_OK ) break;
  1.3198 +    }
  1.3199 +  }
  1.3200 +
  1.3201 +  /* clean up */
  1.3202 +  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
  1.3203 +    DocList *p = fts1HashData(e);
  1.3204 +    docListDelete(p);
  1.3205 +  }
  1.3206 +  fts1HashClear(&terms);
  1.3207 +
  1.3208 +  return rc;
  1.3209 +}
  1.3210 +
  1.3211 +/*
  1.3212 +** Implementation of the snippet() function for FTS1
  1.3213 +*/
  1.3214 +static void snippetFunc(
  1.3215 +  sqlite3_context *pContext,
  1.3216 +  int argc,
  1.3217 +  sqlite3_value **argv
  1.3218 +){
  1.3219 +  fulltext_cursor *pCursor;
  1.3220 +  if( argc<1 ) return;
  1.3221 +  if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  1.3222 +      sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  1.3223 +    sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
  1.3224 +  }else{
  1.3225 +    const char *zStart = "<b>";
  1.3226 +    const char *zEnd = "</b>";
  1.3227 +    const char *zEllipsis = "<b>...</b>";
  1.3228 +    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  1.3229 +    if( argc>=2 ){
  1.3230 +      zStart = (const char*)sqlite3_value_text(argv[1]);
  1.3231 +      if( argc>=3 ){
  1.3232 +        zEnd = (const char*)sqlite3_value_text(argv[2]);
  1.3233 +        if( argc>=4 ){
  1.3234 +          zEllipsis = (const char*)sqlite3_value_text(argv[3]);
  1.3235 +        }
  1.3236 +      }
  1.3237 +    }
  1.3238 +    snippetAllOffsets(pCursor);
  1.3239 +    snippetText(pCursor, zStart, zEnd, zEllipsis);
  1.3240 +    sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
  1.3241 +                        pCursor->snippet.nSnippet, SQLITE_STATIC);
  1.3242 +  }
  1.3243 +}
  1.3244 +
  1.3245 +/*
  1.3246 +** Implementation of the offsets() function for FTS1
  1.3247 +*/
  1.3248 +static void snippetOffsetsFunc(
  1.3249 +  sqlite3_context *pContext,
  1.3250 +  int argc,
  1.3251 +  sqlite3_value **argv
  1.3252 +){
  1.3253 +  fulltext_cursor *pCursor;
  1.3254 +  if( argc<1 ) return;
  1.3255 +  if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  1.3256 +      sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  1.3257 +    sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
  1.3258 +  }else{
  1.3259 +    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  1.3260 +    snippetAllOffsets(pCursor);
  1.3261 +    snippetOffsetText(&pCursor->snippet);
  1.3262 +    sqlite3_result_text(pContext,
  1.3263 +                        pCursor->snippet.zOffset, pCursor->snippet.nOffset,
  1.3264 +                        SQLITE_STATIC);
  1.3265 +  }
  1.3266 +}
  1.3267 +
  1.3268 +/*
  1.3269 +** This routine implements the xFindFunction method for the FTS1
  1.3270 +** virtual table.
  1.3271 +*/
  1.3272 +static int fulltextFindFunction(
  1.3273 +  sqlite3_vtab *pVtab,
  1.3274 +  int nArg,
  1.3275 +  const char *zName,
  1.3276 +  void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
  1.3277 +  void **ppArg
  1.3278 +){
  1.3279 +  if( strcmp(zName,"snippet")==0 ){
  1.3280 +    *pxFunc = snippetFunc;
  1.3281 +    return 1;
  1.3282 +  }else if( strcmp(zName,"offsets")==0 ){
  1.3283 +    *pxFunc = snippetOffsetsFunc;
  1.3284 +    return 1;
  1.3285 +  }
  1.3286 +  return 0;
  1.3287 +}
  1.3288 +
  1.3289 +/*
  1.3290 +** Rename an fts1 table.
  1.3291 +*/
  1.3292 +static int fulltextRename(
  1.3293 +  sqlite3_vtab *pVtab,
  1.3294 +  const char *zName
  1.3295 +){
  1.3296 +  fulltext_vtab *p = (fulltext_vtab *)pVtab;
  1.3297 +  int rc = SQLITE_NOMEM;
  1.3298 +  char *zSql = sqlite3_mprintf(
  1.3299 +    "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';"
  1.3300 +    "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
  1.3301 +    , p->zDb, p->zName, zName
  1.3302 +    , p->zDb, p->zName, zName
  1.3303 +  );
  1.3304 +  if( zSql ){
  1.3305 +    rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
  1.3306 +    sqlite3_free(zSql);
  1.3307 +  }
  1.3308 +  return rc;
  1.3309 +}
  1.3310 +
  1.3311 +static const sqlite3_module fulltextModule = {
  1.3312 +  /* iVersion      */ 0,
  1.3313 +  /* xCreate       */ fulltextCreate,
  1.3314 +  /* xConnect      */ fulltextConnect,
  1.3315 +  /* xBestIndex    */ fulltextBestIndex,
  1.3316 +  /* xDisconnect   */ fulltextDisconnect,
  1.3317 +  /* xDestroy      */ fulltextDestroy,
  1.3318 +  /* xOpen         */ fulltextOpen,
  1.3319 +  /* xClose        */ fulltextClose,
  1.3320 +  /* xFilter       */ fulltextFilter,
  1.3321 +  /* xNext         */ fulltextNext,
  1.3322 +  /* xEof          */ fulltextEof,
  1.3323 +  /* xColumn       */ fulltextColumn,
  1.3324 +  /* xRowid        */ fulltextRowid,
  1.3325 +  /* xUpdate       */ fulltextUpdate,
  1.3326 +  /* xBegin        */ 0, 
  1.3327 +  /* xSync         */ 0,
  1.3328 +  /* xCommit       */ 0,
  1.3329 +  /* xRollback     */ 0,
  1.3330 +  /* xFindFunction */ fulltextFindFunction,
  1.3331 +  /* xRename       */ fulltextRename,
  1.3332 +};
  1.3333 +
  1.3334 +int sqlite3Fts1Init(sqlite3 *db){
  1.3335 +  sqlite3_overload_function(db, "snippet", -1);
  1.3336 +  sqlite3_overload_function(db, "offsets", -1);
  1.3337 +  return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
  1.3338 +}
  1.3339 +
  1.3340 +#if !SQLITE_CORE
  1.3341 +int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
  1.3342 +                           const sqlite3_api_routines *pApi){
  1.3343 +  SQLITE_EXTENSION_INIT2(pApi)
  1.3344 +  return sqlite3Fts1Init(db);
  1.3345 +}
  1.3346 +#endif
  1.3347 +
  1.3348 +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */