os/persistentdata/persistentstorage/sqlite3api/SQLite/fts1.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/* fts1 has a design flaw which can lead to database corruption (see
sl@0
     2
** below).  It is recommended not to use it any longer, instead use
sl@0
     3
** fts3 (or higher).  If you believe that your use of fts1 is safe,
sl@0
     4
** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
sl@0
     5
*/
sl@0
     6
#if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \
sl@0
     7
        && !defined(SQLITE_ENABLE_BROKEN_FTS1)
sl@0
     8
#error fts1 has a design flaw and has been deprecated.
sl@0
     9
#endif
sl@0
    10
/* The flaw is that fts1 uses the content table's unaliased rowid as
sl@0
    11
** the unique docid.  fts1 embeds the rowid in the index it builds,
sl@0
    12
** and expects the rowid to not change.  The SQLite VACUUM operation
sl@0
    13
** will renumber such rowids, thereby breaking fts1.  If you are using
sl@0
    14
** fts1 in a system which has disabled VACUUM, then you can continue
sl@0
    15
** to use it safely.  Note that PRAGMA auto_vacuum does NOT disable
sl@0
    16
** VACUUM, though systems using auto_vacuum are unlikely to invoke
sl@0
    17
** VACUUM.
sl@0
    18
**
sl@0
    19
** fts1 should be safe even across VACUUM if you only insert documents
sl@0
    20
** and never delete.
sl@0
    21
*/
sl@0
    22
sl@0
    23
/* The author disclaims copyright to this source code.
sl@0
    24
 *
sl@0
    25
 * This is an SQLite module implementing full-text search.
sl@0
    26
 */
sl@0
    27
sl@0
    28
/*
sl@0
    29
** The code in this file is only compiled if:
sl@0
    30
**
sl@0
    31
**     * The FTS1 module is being built as an extension
sl@0
    32
**       (in which case SQLITE_CORE is not defined), or
sl@0
    33
**
sl@0
    34
**     * The FTS1 module is being built into the core of
sl@0
    35
**       SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
sl@0
    36
*/
sl@0
    37
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
sl@0
    38
sl@0
    39
#if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
sl@0
    40
# define SQLITE_CORE 1
sl@0
    41
#endif
sl@0
    42
sl@0
    43
#include <assert.h>
sl@0
    44
#include <stdlib.h>
sl@0
    45
#include <stdio.h>
sl@0
    46
#include <string.h>
sl@0
    47
#include <ctype.h>
sl@0
    48
sl@0
    49
#include "fts1.h"
sl@0
    50
#include "fts1_hash.h"
sl@0
    51
#include "fts1_tokenizer.h"
sl@0
    52
#include "sqlite3.h"
sl@0
    53
#include "sqlite3ext.h"
sl@0
    54
SQLITE_EXTENSION_INIT1
sl@0
    55
sl@0
    56
sl@0
    57
#if 0
sl@0
    58
# define TRACE(A)  printf A; fflush(stdout)
sl@0
    59
#else
sl@0
    60
# define TRACE(A)
sl@0
    61
#endif
sl@0
    62
sl@0
    63
/* utility functions */
sl@0
    64
sl@0
    65
typedef struct StringBuffer {
sl@0
    66
  int len;      /* length, not including null terminator */
sl@0
    67
  int alloced;  /* Space allocated for s[] */ 
sl@0
    68
  char *s;      /* Content of the string */
sl@0
    69
} StringBuffer;
sl@0
    70
sl@0
    71
static void initStringBuffer(StringBuffer *sb){
sl@0
    72
  sb->len = 0;
sl@0
    73
  sb->alloced = 100;
sl@0
    74
  sb->s = malloc(100);
sl@0
    75
  sb->s[0] = '\0';
sl@0
    76
}
sl@0
    77
sl@0
    78
static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
sl@0
    79
  if( sb->len + nFrom >= sb->alloced ){
sl@0
    80
    sb->alloced = sb->len + nFrom + 100;
sl@0
    81
    sb->s = realloc(sb->s, sb->alloced+1);
sl@0
    82
    if( sb->s==0 ){
sl@0
    83
      initStringBuffer(sb);
sl@0
    84
      return;
sl@0
    85
    }
sl@0
    86
  }
sl@0
    87
  memcpy(sb->s + sb->len, zFrom, nFrom);
sl@0
    88
  sb->len += nFrom;
sl@0
    89
  sb->s[sb->len] = 0;
sl@0
    90
}
sl@0
    91
static void append(StringBuffer *sb, const char *zFrom){
sl@0
    92
  nappend(sb, zFrom, strlen(zFrom));
sl@0
    93
}
sl@0
    94
sl@0
    95
/* We encode variable-length integers in little-endian order using seven bits
sl@0
    96
 * per byte as follows:
sl@0
    97
**
sl@0
    98
** KEY:
sl@0
    99
**         A = 0xxxxxxx    7 bits of data and one flag bit
sl@0
   100
**         B = 1xxxxxxx    7 bits of data and one flag bit
sl@0
   101
**
sl@0
   102
**  7 bits - A
sl@0
   103
** 14 bits - BA
sl@0
   104
** 21 bits - BBA
sl@0
   105
** and so on.
sl@0
   106
*/
sl@0
   107
sl@0
   108
/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
sl@0
   109
#define VARINT_MAX 10
sl@0
   110
sl@0
   111
/* Write a 64-bit variable-length integer to memory starting at p[0].
sl@0
   112
 * The length of data written will be between 1 and VARINT_MAX bytes.
sl@0
   113
 * The number of bytes written is returned. */
sl@0
   114
static int putVarint(char *p, sqlite_int64 v){
sl@0
   115
  unsigned char *q = (unsigned char *) p;
sl@0
   116
  sqlite_uint64 vu = v;
sl@0
   117
  do{
sl@0
   118
    *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
sl@0
   119
    vu >>= 7;
sl@0
   120
  }while( vu!=0 );
sl@0
   121
  q[-1] &= 0x7f;  /* turn off high bit in final byte */
sl@0
   122
  assert( q - (unsigned char *)p <= VARINT_MAX );
sl@0
   123
  return (int) (q - (unsigned char *)p);
sl@0
   124
}
sl@0
   125
sl@0
   126
/* Read a 64-bit variable-length integer from memory starting at p[0].
sl@0
   127
 * Return the number of bytes read, or 0 on error.
sl@0
   128
 * The value is stored in *v. */
sl@0
   129
static int getVarint(const char *p, sqlite_int64 *v){
sl@0
   130
  const unsigned char *q = (const unsigned char *) p;
sl@0
   131
  sqlite_uint64 x = 0, y = 1;
sl@0
   132
  while( (*q & 0x80) == 0x80 ){
sl@0
   133
    x += y * (*q++ & 0x7f);
sl@0
   134
    y <<= 7;
sl@0
   135
    if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
sl@0
   136
      assert( 0 );
sl@0
   137
      return 0;
sl@0
   138
    }
sl@0
   139
  }
sl@0
   140
  x += y * (*q++);
sl@0
   141
  *v = (sqlite_int64) x;
sl@0
   142
  return (int) (q - (unsigned char *)p);
sl@0
   143
}
sl@0
   144
sl@0
   145
static int getVarint32(const char *p, int *pi){
sl@0
   146
 sqlite_int64 i;
sl@0
   147
 int ret = getVarint(p, &i);
sl@0
   148
 *pi = (int) i;
sl@0
   149
 assert( *pi==i );
sl@0
   150
 return ret;
sl@0
   151
}
sl@0
   152
sl@0
   153
/*** Document lists ***
sl@0
   154
 *
sl@0
   155
 * A document list holds a sorted list of varint-encoded document IDs.
sl@0
   156
 *
sl@0
   157
 * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
sl@0
   158
 *
sl@0
   159
 * array {
sl@0
   160
 *   varint docid;
sl@0
   161
 *   array {
sl@0
   162
 *     varint position;     (delta from previous position plus POS_BASE)
sl@0
   163
 *     varint startOffset;  (delta from previous startOffset)
sl@0
   164
 *     varint endOffset;    (delta from startOffset)
sl@0
   165
 *   }
sl@0
   166
 * }
sl@0
   167
 *
sl@0
   168
 * Here, array { X } means zero or more occurrences of X, adjacent in memory.
sl@0
   169
 *
sl@0
   170
 * A position list may hold positions for text in multiple columns.  A position
sl@0
   171
 * POS_COLUMN is followed by a varint containing the index of the column for
sl@0
   172
 * following positions in the list.  Any positions appearing before any
sl@0
   173
 * occurrences of POS_COLUMN are for column 0.
sl@0
   174
 *
sl@0
   175
 * A doclist with type DL_POSITIONS is like the above, but holds only docids
sl@0
   176
 * and positions without offset information.
sl@0
   177
 *
sl@0
   178
 * A doclist with type DL_DOCIDS is like the above, but holds only docids
sl@0
   179
 * without positions or offset information.
sl@0
   180
 *
sl@0
   181
 * On disk, every document list has positions and offsets, so we don't bother
sl@0
   182
 * to serialize a doclist's type.
sl@0
   183
 * 
sl@0
   184
 * We don't yet delta-encode document IDs; doing so will probably be a
sl@0
   185
 * modest win.
sl@0
   186
 *
sl@0
   187
 * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
sl@0
   188
 * After the first offset, estimate the next offset by using the
sl@0
   189
 * current token position and the previous token position and offset,
sl@0
   190
 * offset to handle some variance.  So the estimate would be
sl@0
   191
 * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
sl@0
   192
 * as normal.  Offsets more than 64 chars from the estimate are
sl@0
   193
 * encoded as the delta to the previous start offset + 128.  An
sl@0
   194
 * additional tiny increment can be gained by using the end offset of
sl@0
   195
 * the previous token to make the estimate a tiny bit more precise.
sl@0
   196
*/
sl@0
   197
sl@0
   198
/* It is not safe to call isspace(), tolower(), or isalnum() on
sl@0
   199
** hi-bit-set characters.  This is the same solution used in the
sl@0
   200
** tokenizer.
sl@0
   201
*/
sl@0
   202
/* TODO(shess) The snippet-generation code should be using the
sl@0
   203
** tokenizer-generated tokens rather than doing its own local
sl@0
   204
** tokenization.
sl@0
   205
*/
sl@0
   206
/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
sl@0
   207
static int safe_isspace(char c){
sl@0
   208
  return (c&0x80)==0 ? isspace(c) : 0;
sl@0
   209
}
sl@0
   210
static int safe_tolower(char c){
sl@0
   211
  return (c&0x80)==0 ? tolower(c) : c;
sl@0
   212
}
sl@0
   213
static int safe_isalnum(char c){
sl@0
   214
  return (c&0x80)==0 ? isalnum(c) : 0;
sl@0
   215
}
sl@0
   216
sl@0
   217
typedef enum DocListType {
sl@0
   218
  DL_DOCIDS,              /* docids only */
sl@0
   219
  DL_POSITIONS,           /* docids + positions */
sl@0
   220
  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
sl@0
   221
} DocListType;
sl@0
   222
sl@0
   223
/*
sl@0
   224
** By default, only positions and not offsets are stored in the doclists.
sl@0
   225
** To change this so that offsets are stored too, compile with
sl@0
   226
**
sl@0
   227
**          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
sl@0
   228
**
sl@0
   229
*/
sl@0
   230
#ifndef DL_DEFAULT
sl@0
   231
# define DL_DEFAULT DL_POSITIONS
sl@0
   232
#endif
sl@0
   233
sl@0
   234
typedef struct DocList {
sl@0
   235
  char *pData;
sl@0
   236
  int nData;
sl@0
   237
  DocListType iType;
sl@0
   238
  int iLastColumn;    /* the last column written */
sl@0
   239
  int iLastPos;       /* the last position written */
sl@0
   240
  int iLastOffset;    /* the last start offset written */
sl@0
   241
} DocList;
sl@0
   242
sl@0
   243
enum {
sl@0
   244
  POS_END = 0,        /* end of this position list */
sl@0
   245
  POS_COLUMN,         /* followed by new column number */
sl@0
   246
  POS_BASE
sl@0
   247
};
sl@0
   248
sl@0
   249
/* Initialize a new DocList to hold the given data. */
sl@0
   250
static void docListInit(DocList *d, DocListType iType,
sl@0
   251
                        const char *pData, int nData){
sl@0
   252
  d->nData = nData;
sl@0
   253
  if( nData>0 ){
sl@0
   254
    d->pData = malloc(nData);
sl@0
   255
    memcpy(d->pData, pData, nData);
sl@0
   256
  } else {
sl@0
   257
    d->pData = NULL;
sl@0
   258
  }
sl@0
   259
  d->iType = iType;
sl@0
   260
  d->iLastColumn = 0;
sl@0
   261
  d->iLastPos = d->iLastOffset = 0;
sl@0
   262
}
sl@0
   263
sl@0
   264
/* Create a new dynamically-allocated DocList. */
sl@0
   265
static DocList *docListNew(DocListType iType){
sl@0
   266
  DocList *d = (DocList *) malloc(sizeof(DocList));
sl@0
   267
  docListInit(d, iType, 0, 0);
sl@0
   268
  return d;
sl@0
   269
}
sl@0
   270
sl@0
   271
static void docListDestroy(DocList *d){
sl@0
   272
  free(d->pData);
sl@0
   273
#ifndef NDEBUG
sl@0
   274
  memset(d, 0x55, sizeof(*d));
sl@0
   275
#endif
sl@0
   276
}
sl@0
   277
sl@0
   278
static void docListDelete(DocList *d){
sl@0
   279
  docListDestroy(d);
sl@0
   280
  free(d);
sl@0
   281
}
sl@0
   282
sl@0
   283
static char *docListEnd(DocList *d){
sl@0
   284
  return d->pData + d->nData;
sl@0
   285
}
sl@0
   286
sl@0
   287
/* Append a varint to a DocList's data. */
sl@0
   288
static void appendVarint(DocList *d, sqlite_int64 i){
sl@0
   289
  char c[VARINT_MAX];
sl@0
   290
  int n = putVarint(c, i);
sl@0
   291
  d->pData = realloc(d->pData, d->nData + n);
sl@0
   292
  memcpy(d->pData + d->nData, c, n);
sl@0
   293
  d->nData += n;
sl@0
   294
}
sl@0
   295
sl@0
   296
static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
sl@0
   297
  appendVarint(d, iDocid);
sl@0
   298
  if( d->iType>=DL_POSITIONS ){
sl@0
   299
    appendVarint(d, POS_END);  /* initially empty position list */
sl@0
   300
    d->iLastColumn = 0;
sl@0
   301
    d->iLastPos = d->iLastOffset = 0;
sl@0
   302
  }
sl@0
   303
}
sl@0
   304
sl@0
   305
/* helper function for docListAddPos and docListAddPosOffset */
sl@0
   306
static void addPos(DocList *d, int iColumn, int iPos){
sl@0
   307
  assert( d->nData>0 );
sl@0
   308
  --d->nData;  /* remove previous terminator */
sl@0
   309
  if( iColumn!=d->iLastColumn ){
sl@0
   310
    assert( iColumn>d->iLastColumn );
sl@0
   311
    appendVarint(d, POS_COLUMN);
sl@0
   312
    appendVarint(d, iColumn);
sl@0
   313
    d->iLastColumn = iColumn;
sl@0
   314
    d->iLastPos = d->iLastOffset = 0;
sl@0
   315
  }
sl@0
   316
  assert( iPos>=d->iLastPos );
sl@0
   317
  appendVarint(d, iPos-d->iLastPos+POS_BASE);
sl@0
   318
  d->iLastPos = iPos;
sl@0
   319
}
sl@0
   320
sl@0
   321
/* Add a position to the last position list in a doclist. */
sl@0
   322
static void docListAddPos(DocList *d, int iColumn, int iPos){
sl@0
   323
  assert( d->iType==DL_POSITIONS );
sl@0
   324
  addPos(d, iColumn, iPos);
sl@0
   325
  appendVarint(d, POS_END);  /* add new terminator */
sl@0
   326
}
sl@0
   327
sl@0
   328
/*
sl@0
   329
** Add a position and starting and ending offsets to a doclist.
sl@0
   330
**
sl@0
   331
** If the doclist is setup to handle only positions, then insert
sl@0
   332
** the position only and ignore the offsets.
sl@0
   333
*/
sl@0
   334
static void docListAddPosOffset(
sl@0
   335
  DocList *d,             /* Doclist under construction */
sl@0
   336
  int iColumn,            /* Column the inserted term is part of */
sl@0
   337
  int iPos,               /* Position of the inserted term */
sl@0
   338
  int iStartOffset,       /* Starting offset of inserted term */
sl@0
   339
  int iEndOffset          /* Ending offset of inserted term */
sl@0
   340
){
sl@0
   341
  assert( d->iType>=DL_POSITIONS );
sl@0
   342
  addPos(d, iColumn, iPos);
sl@0
   343
  if( d->iType==DL_POSITIONS_OFFSETS ){
sl@0
   344
    assert( iStartOffset>=d->iLastOffset );
sl@0
   345
    appendVarint(d, iStartOffset-d->iLastOffset);
sl@0
   346
    d->iLastOffset = iStartOffset;
sl@0
   347
    assert( iEndOffset>=iStartOffset );
sl@0
   348
    appendVarint(d, iEndOffset-iStartOffset);
sl@0
   349
  }
sl@0
   350
  appendVarint(d, POS_END);  /* add new terminator */
sl@0
   351
}
sl@0
   352
sl@0
   353
/*
sl@0
   354
** A DocListReader object is a cursor into a doclist.  Initialize
sl@0
   355
** the cursor to the beginning of the doclist by calling readerInit().
sl@0
   356
** Then use routines
sl@0
   357
**
sl@0
   358
**      peekDocid()
sl@0
   359
**      readDocid()
sl@0
   360
**      readPosition()
sl@0
   361
**      skipPositionList()
sl@0
   362
**      and so forth...
sl@0
   363
**
sl@0
   364
** to read information out of the doclist.  When we reach the end
sl@0
   365
** of the doclist, atEnd() returns TRUE.
sl@0
   366
*/
sl@0
   367
typedef struct DocListReader {
sl@0
   368
  DocList *pDoclist;  /* The document list we are stepping through */
sl@0
   369
  char *p;            /* Pointer to next unread byte in the doclist */
sl@0
   370
  int iLastColumn;
sl@0
   371
  int iLastPos;  /* the last position read, or -1 when not in a position list */
sl@0
   372
} DocListReader;
sl@0
   373
sl@0
   374
/*
sl@0
   375
** Initialize the DocListReader r to point to the beginning of pDoclist.
sl@0
   376
*/
sl@0
   377
static void readerInit(DocListReader *r, DocList *pDoclist){
sl@0
   378
  r->pDoclist = pDoclist;
sl@0
   379
  if( pDoclist!=NULL ){
sl@0
   380
    r->p = pDoclist->pData;
sl@0
   381
  }
sl@0
   382
  r->iLastColumn = -1;
sl@0
   383
  r->iLastPos = -1;
sl@0
   384
}
sl@0
   385
sl@0
   386
/*
sl@0
   387
** Return TRUE if we have reached then end of pReader and there is
sl@0
   388
** nothing else left to read.
sl@0
   389
*/
sl@0
   390
static int atEnd(DocListReader *pReader){
sl@0
   391
  return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
sl@0
   392
}
sl@0
   393
sl@0
   394
/* Peek at the next docid without advancing the read pointer. 
sl@0
   395
*/
sl@0
   396
static sqlite_int64 peekDocid(DocListReader *pReader){
sl@0
   397
  sqlite_int64 ret;
sl@0
   398
  assert( !atEnd(pReader) );
sl@0
   399
  assert( pReader->iLastPos==-1 );
sl@0
   400
  getVarint(pReader->p, &ret);
sl@0
   401
  return ret;
sl@0
   402
}
sl@0
   403
sl@0
   404
/* Read the next docid.   See also nextDocid().
sl@0
   405
*/
sl@0
   406
static sqlite_int64 readDocid(DocListReader *pReader){
sl@0
   407
  sqlite_int64 ret;
sl@0
   408
  assert( !atEnd(pReader) );
sl@0
   409
  assert( pReader->iLastPos==-1 );
sl@0
   410
  pReader->p += getVarint(pReader->p, &ret);
sl@0
   411
  if( pReader->pDoclist->iType>=DL_POSITIONS ){
sl@0
   412
    pReader->iLastColumn = 0;
sl@0
   413
    pReader->iLastPos = 0;
sl@0
   414
  }
sl@0
   415
  return ret;
sl@0
   416
}
sl@0
   417
sl@0
   418
/* Read the next position and column index from a position list.
sl@0
   419
 * Returns the position, or -1 at the end of the list. */
sl@0
   420
static int readPosition(DocListReader *pReader, int *iColumn){
sl@0
   421
  int i;
sl@0
   422
  int iType = pReader->pDoclist->iType;
sl@0
   423
sl@0
   424
  if( pReader->iLastPos==-1 ){
sl@0
   425
    return -1;
sl@0
   426
  }
sl@0
   427
  assert( !atEnd(pReader) );
sl@0
   428
sl@0
   429
  if( iType<DL_POSITIONS ){
sl@0
   430
    return -1;
sl@0
   431
  }
sl@0
   432
  pReader->p += getVarint32(pReader->p, &i);
sl@0
   433
  if( i==POS_END ){
sl@0
   434
    pReader->iLastColumn = pReader->iLastPos = -1;
sl@0
   435
    *iColumn = -1;
sl@0
   436
    return -1;
sl@0
   437
  }
sl@0
   438
  if( i==POS_COLUMN ){
sl@0
   439
    pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
sl@0
   440
    pReader->iLastPos = 0;
sl@0
   441
    pReader->p += getVarint32(pReader->p, &i);
sl@0
   442
    assert( i>=POS_BASE );
sl@0
   443
  }
sl@0
   444
  pReader->iLastPos += ((int) i)-POS_BASE;
sl@0
   445
  if( iType>=DL_POSITIONS_OFFSETS ){
sl@0
   446
    /* Skip over offsets, ignoring them for now. */
sl@0
   447
    int iStart, iEnd;
sl@0
   448
    pReader->p += getVarint32(pReader->p, &iStart);
sl@0
   449
    pReader->p += getVarint32(pReader->p, &iEnd);
sl@0
   450
  }
sl@0
   451
  *iColumn = pReader->iLastColumn;
sl@0
   452
  return pReader->iLastPos;
sl@0
   453
}
sl@0
   454
sl@0
   455
/* Skip past the end of a position list. */
sl@0
   456
static void skipPositionList(DocListReader *pReader){
sl@0
   457
  DocList *p = pReader->pDoclist;
sl@0
   458
  if( p && p->iType>=DL_POSITIONS ){
sl@0
   459
    int iColumn;
sl@0
   460
    while( readPosition(pReader, &iColumn)!=-1 ){}
sl@0
   461
  }
sl@0
   462
}
sl@0
   463
sl@0
   464
/* Skip over a docid, including its position list if the doclist has
sl@0
   465
 * positions. */
sl@0
   466
static void skipDocument(DocListReader *pReader){
sl@0
   467
  readDocid(pReader);
sl@0
   468
  skipPositionList(pReader);
sl@0
   469
}
sl@0
   470
sl@0
   471
/* Skip past all docids which are less than [iDocid].  Returns 1 if a docid
sl@0
   472
 * matching [iDocid] was found.  */
sl@0
   473
static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
sl@0
   474
  sqlite_int64 d = 0;
sl@0
   475
  while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
sl@0
   476
    skipDocument(pReader);
sl@0
   477
  }
sl@0
   478
  return !atEnd(pReader) && d==iDocid;
sl@0
   479
}
sl@0
   480
sl@0
   481
/* Return the first document in a document list.
sl@0
   482
*/
sl@0
   483
static sqlite_int64 firstDocid(DocList *d){
sl@0
   484
  DocListReader r;
sl@0
   485
  readerInit(&r, d);
sl@0
   486
  return readDocid(&r);
sl@0
   487
}
sl@0
   488
sl@0
   489
#ifdef SQLITE_DEBUG
sl@0
   490
/*
sl@0
   491
** This routine is used for debugging purpose only.
sl@0
   492
**
sl@0
   493
** Write the content of a doclist to standard output.
sl@0
   494
*/
sl@0
   495
static void printDoclist(DocList *p){
sl@0
   496
  DocListReader r;
sl@0
   497
  const char *zSep = "";
sl@0
   498
sl@0
   499
  readerInit(&r, p);
sl@0
   500
  while( !atEnd(&r) ){
sl@0
   501
    sqlite_int64 docid = readDocid(&r);
sl@0
   502
    if( docid==0 ){
sl@0
   503
      skipPositionList(&r);
sl@0
   504
      continue;
sl@0
   505
    }
sl@0
   506
    printf("%s%lld", zSep, docid);
sl@0
   507
    zSep =  ",";
sl@0
   508
    if( p->iType>=DL_POSITIONS ){
sl@0
   509
      int iPos, iCol;
sl@0
   510
      const char *zDiv = "";
sl@0
   511
      printf("(");
sl@0
   512
      while( (iPos = readPosition(&r, &iCol))>=0 ){
sl@0
   513
        printf("%s%d:%d", zDiv, iCol, iPos);
sl@0
   514
        zDiv = ":";
sl@0
   515
      }
sl@0
   516
      printf(")");
sl@0
   517
    }
sl@0
   518
  }
sl@0
   519
  printf("\n");
sl@0
   520
  fflush(stdout);
sl@0
   521
}
sl@0
   522
#endif /* SQLITE_DEBUG */
sl@0
   523
sl@0
   524
/* Trim the given doclist to contain only positions in column
sl@0
   525
 * [iRestrictColumn]. */
sl@0
   526
static void docListRestrictColumn(DocList *in, int iRestrictColumn){
sl@0
   527
  DocListReader r;
sl@0
   528
  DocList out;
sl@0
   529
sl@0
   530
  assert( in->iType>=DL_POSITIONS );
sl@0
   531
  readerInit(&r, in);
sl@0
   532
  docListInit(&out, DL_POSITIONS, NULL, 0);
sl@0
   533
sl@0
   534
  while( !atEnd(&r) ){
sl@0
   535
    sqlite_int64 iDocid = readDocid(&r);
sl@0
   536
    int iPos, iColumn;
sl@0
   537
sl@0
   538
    docListAddDocid(&out, iDocid);
sl@0
   539
    while( (iPos = readPosition(&r, &iColumn)) != -1 ){
sl@0
   540
      if( iColumn==iRestrictColumn ){
sl@0
   541
        docListAddPos(&out, iColumn, iPos);
sl@0
   542
      }
sl@0
   543
    }
sl@0
   544
  }
sl@0
   545
sl@0
   546
  docListDestroy(in);
sl@0
   547
  *in = out;
sl@0
   548
}
sl@0
   549
sl@0
   550
/* Trim the given doclist by discarding any docids without any remaining
sl@0
   551
 * positions. */
sl@0
   552
static void docListDiscardEmpty(DocList *in) {
sl@0
   553
  DocListReader r;
sl@0
   554
  DocList out;
sl@0
   555
sl@0
   556
  /* TODO: It would be nice to implement this operation in place; that
sl@0
   557
   * could save a significant amount of memory in queries with long doclists. */
sl@0
   558
  assert( in->iType>=DL_POSITIONS );
sl@0
   559
  readerInit(&r, in);
sl@0
   560
  docListInit(&out, DL_POSITIONS, NULL, 0);
sl@0
   561
sl@0
   562
  while( !atEnd(&r) ){
sl@0
   563
    sqlite_int64 iDocid = readDocid(&r);
sl@0
   564
    int match = 0;
sl@0
   565
    int iPos, iColumn;
sl@0
   566
    while( (iPos = readPosition(&r, &iColumn)) != -1 ){
sl@0
   567
      if( !match ){
sl@0
   568
        docListAddDocid(&out, iDocid);
sl@0
   569
        match = 1;
sl@0
   570
      }
sl@0
   571
      docListAddPos(&out, iColumn, iPos);
sl@0
   572
    }
sl@0
   573
  }
sl@0
   574
sl@0
   575
  docListDestroy(in);
sl@0
   576
  *in = out;
sl@0
   577
}
sl@0
   578
sl@0
   579
/* Helper function for docListUpdate() and docListAccumulate().
sl@0
   580
** Splices a doclist element into the doclist represented by r,
sl@0
   581
** leaving r pointing after the newly spliced element.
sl@0
   582
*/
sl@0
   583
static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
sl@0
   584
                                 const char *pSource, int nSource){
sl@0
   585
  DocList *d = r->pDoclist;
sl@0
   586
  char *pTarget;
sl@0
   587
  int nTarget, found;
sl@0
   588
sl@0
   589
  found = skipToDocid(r, iDocid);
sl@0
   590
sl@0
   591
  /* Describe slice in d to place pSource/nSource. */
sl@0
   592
  pTarget = r->p;
sl@0
   593
  if( found ){
sl@0
   594
    skipDocument(r);
sl@0
   595
    nTarget = r->p-pTarget;
sl@0
   596
  }else{
sl@0
   597
    nTarget = 0;
sl@0
   598
  }
sl@0
   599
sl@0
   600
  /* The sense of the following is that there are three possibilities.
sl@0
   601
  ** If nTarget==nSource, we should not move any memory nor realloc.
sl@0
   602
  ** If nTarget>nSource, trim target and realloc.
sl@0
   603
  ** If nTarget<nSource, realloc then expand target.
sl@0
   604
  */
sl@0
   605
  if( nTarget>nSource ){
sl@0
   606
    memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
sl@0
   607
  }
sl@0
   608
  if( nTarget!=nSource ){
sl@0
   609
    int iDoclist = pTarget-d->pData;
sl@0
   610
    d->pData = realloc(d->pData, d->nData+nSource-nTarget);
sl@0
   611
    pTarget = d->pData+iDoclist;
sl@0
   612
  }
sl@0
   613
  if( nTarget<nSource ){
sl@0
   614
    memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
sl@0
   615
  }
sl@0
   616
sl@0
   617
  memcpy(pTarget, pSource, nSource);
sl@0
   618
  d->nData += nSource-nTarget;
sl@0
   619
  r->p = pTarget+nSource;
sl@0
   620
}
sl@0
   621
sl@0
   622
/* Insert/update pUpdate into the doclist. */
sl@0
   623
static void docListUpdate(DocList *d, DocList *pUpdate){
sl@0
   624
  DocListReader reader;
sl@0
   625
sl@0
   626
  assert( d!=NULL && pUpdate!=NULL );
sl@0
   627
  assert( d->iType==pUpdate->iType);
sl@0
   628
sl@0
   629
  readerInit(&reader, d);
sl@0
   630
  docListSpliceElement(&reader, firstDocid(pUpdate),
sl@0
   631
                       pUpdate->pData, pUpdate->nData);
sl@0
   632
}
sl@0
   633
sl@0
   634
/* Propagate elements from pUpdate to pAcc, overwriting elements with
sl@0
   635
** matching docids.
sl@0
   636
*/
sl@0
   637
static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
sl@0
   638
  DocListReader accReader, updateReader;
sl@0
   639
sl@0
   640
  /* Handle edge cases where one doclist is empty. */
sl@0
   641
  assert( pAcc!=NULL );
sl@0
   642
  if( pUpdate==NULL || pUpdate->nData==0 ) return;
sl@0
   643
  if( pAcc->nData==0 ){
sl@0
   644
    pAcc->pData = malloc(pUpdate->nData);
sl@0
   645
    memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
sl@0
   646
    pAcc->nData = pUpdate->nData;
sl@0
   647
    return;
sl@0
   648
  }
sl@0
   649
sl@0
   650
  readerInit(&accReader, pAcc);
sl@0
   651
  readerInit(&updateReader, pUpdate);
sl@0
   652
sl@0
   653
  while( !atEnd(&updateReader) ){
sl@0
   654
    char *pSource = updateReader.p;
sl@0
   655
    sqlite_int64 iDocid = readDocid(&updateReader);
sl@0
   656
    skipPositionList(&updateReader);
sl@0
   657
    docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
sl@0
   658
  }
sl@0
   659
}
sl@0
   660
sl@0
   661
/*
sl@0
   662
** Read the next docid off of pIn.  Return 0 if we reach the end.
sl@0
   663
*
sl@0
   664
* TODO: This assumes that docids are never 0, but they may actually be 0 since
sl@0
   665
* users can choose docids when inserting into a full-text table.  Fix this.
sl@0
   666
*/
sl@0
   667
static sqlite_int64 nextDocid(DocListReader *pIn){
sl@0
   668
  skipPositionList(pIn);
sl@0
   669
  return atEnd(pIn) ? 0 : readDocid(pIn);
sl@0
   670
}
sl@0
   671
sl@0
   672
/*
sl@0
   673
** pLeft and pRight are two DocListReaders that are pointing to
sl@0
   674
** positions lists of the same document: iDocid. 
sl@0
   675
**
sl@0
   676
** If there are no instances in pLeft or pRight where the position
sl@0
   677
** of pLeft is one less than the position of pRight, then this
sl@0
   678
** routine adds nothing to pOut.
sl@0
   679
**
sl@0
   680
** If there are one or more instances where positions from pLeft
sl@0
   681
** are exactly one less than positions from pRight, then add a new
sl@0
   682
** document record to pOut.  If pOut wants to hold positions, then
sl@0
   683
** include the positions from pRight that are one more than a
sl@0
   684
** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.
sl@0
   685
**
sl@0
   686
** pLeft and pRight are left pointing at the next document record.
sl@0
   687
*/
sl@0
   688
static void mergePosList(
sl@0
   689
  DocListReader *pLeft,    /* Left position list */
sl@0
   690
  DocListReader *pRight,   /* Right position list */
sl@0
   691
  sqlite_int64 iDocid,     /* The docid from pLeft and pRight */
sl@0
   692
  DocList *pOut            /* Write the merged document record here */
sl@0
   693
){
sl@0
   694
  int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
sl@0
   695
  int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
sl@0
   696
  int match = 0;
sl@0
   697
sl@0
   698
  /* Loop until we've reached the end of both position lists. */
sl@0
   699
  while( iLeftPos!=-1 && iRightPos!=-1 ){
sl@0
   700
    if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
sl@0
   701
      if( !match ){
sl@0
   702
        docListAddDocid(pOut, iDocid);
sl@0
   703
        match = 1;
sl@0
   704
      }
sl@0
   705
      if( pOut->iType>=DL_POSITIONS ){
sl@0
   706
        docListAddPos(pOut, iRightCol, iRightPos);
sl@0
   707
      }
sl@0
   708
      iLeftPos = readPosition(pLeft, &iLeftCol);
sl@0
   709
      iRightPos = readPosition(pRight, &iRightCol);
sl@0
   710
    }else if( iRightCol<iLeftCol ||
sl@0
   711
              (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
sl@0
   712
      iRightPos = readPosition(pRight, &iRightCol);
sl@0
   713
    }else{
sl@0
   714
      iLeftPos = readPosition(pLeft, &iLeftCol);
sl@0
   715
    }
sl@0
   716
  }
sl@0
   717
  if( iLeftPos>=0 ) skipPositionList(pLeft);
sl@0
   718
  if( iRightPos>=0 ) skipPositionList(pRight);
sl@0
   719
}
sl@0
   720
sl@0
   721
/* We have two doclists:  pLeft and pRight.
sl@0
   722
** Write the phrase intersection of these two doclists into pOut.
sl@0
   723
**
sl@0
   724
** A phrase intersection means that two documents only match
sl@0
   725
** if pLeft.iPos+1==pRight.iPos.
sl@0
   726
**
sl@0
   727
** The output pOut may or may not contain positions.  If pOut
sl@0
   728
** does contain positions, they are the positions of pRight.
sl@0
   729
*/
sl@0
   730
static void docListPhraseMerge(
sl@0
   731
  DocList *pLeft,    /* Doclist resulting from the words on the left */
sl@0
   732
  DocList *pRight,   /* Doclist for the next word to the right */
sl@0
   733
  DocList *pOut      /* Write the combined doclist here */
sl@0
   734
){
sl@0
   735
  DocListReader left, right;
sl@0
   736
  sqlite_int64 docidLeft, docidRight;
sl@0
   737
sl@0
   738
  readerInit(&left, pLeft);
sl@0
   739
  readerInit(&right, pRight);
sl@0
   740
  docidLeft = nextDocid(&left);
sl@0
   741
  docidRight = nextDocid(&right);
sl@0
   742
sl@0
   743
  while( docidLeft>0 && docidRight>0 ){
sl@0
   744
    if( docidLeft<docidRight ){
sl@0
   745
      docidLeft = nextDocid(&left);
sl@0
   746
    }else if( docidRight<docidLeft ){
sl@0
   747
      docidRight = nextDocid(&right);
sl@0
   748
    }else{
sl@0
   749
      mergePosList(&left, &right, docidLeft, pOut);
sl@0
   750
      docidLeft = nextDocid(&left);
sl@0
   751
      docidRight = nextDocid(&right);
sl@0
   752
    }
sl@0
   753
  }
sl@0
   754
}
sl@0
   755
sl@0
   756
/* We have two doclists:  pLeft and pRight.
sl@0
   757
** Write the intersection of these two doclists into pOut.
sl@0
   758
** Only docids are matched.  Position information is ignored.
sl@0
   759
**
sl@0
   760
** The output pOut never holds positions.
sl@0
   761
*/
sl@0
   762
static void docListAndMerge(
sl@0
   763
  DocList *pLeft,    /* Doclist resulting from the words on the left */
sl@0
   764
  DocList *pRight,   /* Doclist for the next word to the right */
sl@0
   765
  DocList *pOut      /* Write the combined doclist here */
sl@0
   766
){
sl@0
   767
  DocListReader left, right;
sl@0
   768
  sqlite_int64 docidLeft, docidRight;
sl@0
   769
sl@0
   770
  assert( pOut->iType<DL_POSITIONS );
sl@0
   771
sl@0
   772
  readerInit(&left, pLeft);
sl@0
   773
  readerInit(&right, pRight);
sl@0
   774
  docidLeft = nextDocid(&left);
sl@0
   775
  docidRight = nextDocid(&right);
sl@0
   776
sl@0
   777
  while( docidLeft>0 && docidRight>0 ){
sl@0
   778
    if( docidLeft<docidRight ){
sl@0
   779
      docidLeft = nextDocid(&left);
sl@0
   780
    }else if( docidRight<docidLeft ){
sl@0
   781
      docidRight = nextDocid(&right);
sl@0
   782
    }else{
sl@0
   783
      docListAddDocid(pOut, docidLeft);
sl@0
   784
      docidLeft = nextDocid(&left);
sl@0
   785
      docidRight = nextDocid(&right);
sl@0
   786
    }
sl@0
   787
  }
sl@0
   788
}
sl@0
   789
sl@0
   790
/* We have two doclists:  pLeft and pRight.
sl@0
   791
** Write the union of these two doclists into pOut.
sl@0
   792
** Only docids are matched.  Position information is ignored.
sl@0
   793
**
sl@0
   794
** The output pOut never holds positions.
sl@0
   795
*/
sl@0
   796
static void docListOrMerge(
sl@0
   797
  DocList *pLeft,    /* Doclist resulting from the words on the left */
sl@0
   798
  DocList *pRight,   /* Doclist for the next word to the right */
sl@0
   799
  DocList *pOut      /* Write the combined doclist here */
sl@0
   800
){
sl@0
   801
  DocListReader left, right;
sl@0
   802
  sqlite_int64 docidLeft, docidRight, priorLeft;
sl@0
   803
sl@0
   804
  readerInit(&left, pLeft);
sl@0
   805
  readerInit(&right, pRight);
sl@0
   806
  docidLeft = nextDocid(&left);
sl@0
   807
  docidRight = nextDocid(&right);
sl@0
   808
sl@0
   809
  while( docidLeft>0 && docidRight>0 ){
sl@0
   810
    if( docidLeft<=docidRight ){
sl@0
   811
      docListAddDocid(pOut, docidLeft);
sl@0
   812
    }else{
sl@0
   813
      docListAddDocid(pOut, docidRight);
sl@0
   814
    }
sl@0
   815
    priorLeft = docidLeft;
sl@0
   816
    if( docidLeft<=docidRight ){
sl@0
   817
      docidLeft = nextDocid(&left);
sl@0
   818
    }
sl@0
   819
    if( docidRight>0 && docidRight<=priorLeft ){
sl@0
   820
      docidRight = nextDocid(&right);
sl@0
   821
    }
sl@0
   822
  }
sl@0
   823
  while( docidLeft>0 ){
sl@0
   824
    docListAddDocid(pOut, docidLeft);
sl@0
   825
    docidLeft = nextDocid(&left);
sl@0
   826
  }
sl@0
   827
  while( docidRight>0 ){
sl@0
   828
    docListAddDocid(pOut, docidRight);
sl@0
   829
    docidRight = nextDocid(&right);
sl@0
   830
  }
sl@0
   831
}
sl@0
   832
sl@0
   833
/* We have two doclists:  pLeft and pRight.
sl@0
   834
** Write into pOut all documents that occur in pLeft but not
sl@0
   835
** in pRight.
sl@0
   836
**
sl@0
   837
** Only docids are matched.  Position information is ignored.
sl@0
   838
**
sl@0
   839
** The output pOut never holds positions.
sl@0
   840
*/
sl@0
   841
static void docListExceptMerge(
sl@0
   842
  DocList *pLeft,    /* Doclist resulting from the words on the left */
sl@0
   843
  DocList *pRight,   /* Doclist for the next word to the right */
sl@0
   844
  DocList *pOut      /* Write the combined doclist here */
sl@0
   845
){
sl@0
   846
  DocListReader left, right;
sl@0
   847
  sqlite_int64 docidLeft, docidRight, priorLeft;
sl@0
   848
sl@0
   849
  readerInit(&left, pLeft);
sl@0
   850
  readerInit(&right, pRight);
sl@0
   851
  docidLeft = nextDocid(&left);
sl@0
   852
  docidRight = nextDocid(&right);
sl@0
   853
sl@0
   854
  while( docidLeft>0 && docidRight>0 ){
sl@0
   855
    priorLeft = docidLeft;
sl@0
   856
    if( docidLeft<docidRight ){
sl@0
   857
      docListAddDocid(pOut, docidLeft);
sl@0
   858
    }
sl@0
   859
    if( docidLeft<=docidRight ){
sl@0
   860
      docidLeft = nextDocid(&left);
sl@0
   861
    }
sl@0
   862
    if( docidRight>0 && docidRight<=priorLeft ){
sl@0
   863
      docidRight = nextDocid(&right);
sl@0
   864
    }
sl@0
   865
  }
sl@0
   866
  while( docidLeft>0 ){
sl@0
   867
    docListAddDocid(pOut, docidLeft);
sl@0
   868
    docidLeft = nextDocid(&left);
sl@0
   869
  }
sl@0
   870
}
sl@0
   871
sl@0
   872
static char *string_dup_n(const char *s, int n){
sl@0
   873
  char *str = malloc(n + 1);
sl@0
   874
  memcpy(str, s, n);
sl@0
   875
  str[n] = '\0';
sl@0
   876
  return str;
sl@0
   877
}
sl@0
   878
sl@0
   879
/* Duplicate a string; the caller must free() the returned string.
sl@0
   880
 * (We don't use strdup() since it is not part of the standard C library and
sl@0
   881
 * may not be available everywhere.) */
sl@0
   882
static char *string_dup(const char *s){
sl@0
   883
  return string_dup_n(s, strlen(s));
sl@0
   884
}
sl@0
   885
sl@0
   886
/* Format a string, replacing each occurrence of the % character with
sl@0
   887
 * zDb.zName.  This may be more convenient than sqlite_mprintf()
sl@0
   888
 * when one string is used repeatedly in a format string.
sl@0
   889
 * The caller must free() the returned string. */
sl@0
   890
static char *string_format(const char *zFormat,
sl@0
   891
                           const char *zDb, const char *zName){
sl@0
   892
  const char *p;
sl@0
   893
  size_t len = 0;
sl@0
   894
  size_t nDb = strlen(zDb);
sl@0
   895
  size_t nName = strlen(zName);
sl@0
   896
  size_t nFullTableName = nDb+1+nName;
sl@0
   897
  char *result;
sl@0
   898
  char *r;
sl@0
   899
sl@0
   900
  /* first compute length needed */
sl@0
   901
  for(p = zFormat ; *p ; ++p){
sl@0
   902
    len += (*p=='%' ? nFullTableName : 1);
sl@0
   903
  }
sl@0
   904
  len += 1;  /* for null terminator */
sl@0
   905
sl@0
   906
  r = result = malloc(len);
sl@0
   907
  for(p = zFormat; *p; ++p){
sl@0
   908
    if( *p=='%' ){
sl@0
   909
      memcpy(r, zDb, nDb);
sl@0
   910
      r += nDb;
sl@0
   911
      *r++ = '.';
sl@0
   912
      memcpy(r, zName, nName);
sl@0
   913
      r += nName;
sl@0
   914
    } else {
sl@0
   915
      *r++ = *p;
sl@0
   916
    }
sl@0
   917
  }
sl@0
   918
  *r++ = '\0';
sl@0
   919
  assert( r == result + len );
sl@0
   920
  return result;
sl@0
   921
}
sl@0
   922
sl@0
   923
static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
sl@0
   924
                    const char *zFormat){
sl@0
   925
  char *zCommand = string_format(zFormat, zDb, zName);
sl@0
   926
  int rc;
sl@0
   927
  TRACE(("FTS1 sql: %s\n", zCommand));
sl@0
   928
  rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
sl@0
   929
  free(zCommand);
sl@0
   930
  return rc;
sl@0
   931
}
sl@0
   932
sl@0
   933
static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
sl@0
   934
                       sqlite3_stmt **ppStmt, const char *zFormat){
sl@0
   935
  char *zCommand = string_format(zFormat, zDb, zName);
sl@0
   936
  int rc;
sl@0
   937
  TRACE(("FTS1 prepare: %s\n", zCommand));
sl@0
   938
  rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
sl@0
   939
  free(zCommand);
sl@0
   940
  return rc;
sl@0
   941
}
sl@0
   942
sl@0
   943
/* end utility functions */
sl@0
   944
sl@0
   945
/* Forward reference */
sl@0
   946
typedef struct fulltext_vtab fulltext_vtab;
sl@0
   947
sl@0
   948
/* A single term in a query is represented by an instances of
sl@0
   949
** the following structure.
sl@0
   950
*/
sl@0
   951
typedef struct QueryTerm {
sl@0
   952
  short int nPhrase; /* How many following terms are part of the same phrase */
sl@0
   953
  short int iPhrase; /* This is the i-th term of a phrase. */
sl@0
   954
  short int iColumn; /* Column of the index that must match this term */
sl@0
   955
  signed char isOr;  /* this term is preceded by "OR" */
sl@0
   956
  signed char isNot; /* this term is preceded by "-" */
sl@0
   957
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
sl@0
   958
  int nTerm;         /* Number of bytes in pTerm[] */
sl@0
   959
} QueryTerm;
sl@0
   960
sl@0
   961
sl@0
   962
/* A query string is parsed into a Query structure.
sl@0
   963
 *
sl@0
   964
 * We could, in theory, allow query strings to be complicated
sl@0
   965
 * nested expressions with precedence determined by parentheses.
sl@0
   966
 * But none of the major search engines do this.  (Perhaps the
sl@0
   967
 * feeling is that an parenthesized expression is two complex of
sl@0
   968
 * an idea for the average user to grasp.)  Taking our lead from
sl@0
   969
 * the major search engines, we will allow queries to be a list
sl@0
   970
 * of terms (with an implied AND operator) or phrases in double-quotes,
sl@0
   971
 * with a single optional "-" before each non-phrase term to designate
sl@0
   972
 * negation and an optional OR connector.
sl@0
   973
 *
sl@0
   974
 * OR binds more tightly than the implied AND, which is what the
sl@0
   975
 * major search engines seem to do.  So, for example:
sl@0
   976
 * 
sl@0
   977
 *    [one two OR three]     ==>    one AND (two OR three)
sl@0
   978
 *    [one OR two three]     ==>    (one OR two) AND three
sl@0
   979
 *
sl@0
   980
 * A "-" before a term matches all entries that lack that term.
sl@0
   981
 * The "-" must occur immediately before the term with in intervening
sl@0
   982
 * space.  This is how the search engines do it.
sl@0
   983
 *
sl@0
   984
 * A NOT term cannot be the right-hand operand of an OR.  If this
sl@0
   985
 * occurs in the query string, the NOT is ignored:
sl@0
   986
 *
sl@0
   987
 *    [one OR -two]          ==>    one OR two
sl@0
   988
 *
sl@0
   989
 */
sl@0
   990
typedef struct Query {
sl@0
   991
  fulltext_vtab *pFts;  /* The full text index */
sl@0
   992
  int nTerms;           /* Number of terms in the query */
sl@0
   993
  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
sl@0
   994
  int nextIsOr;         /* Set the isOr flag on the next inserted term */
sl@0
   995
  int nextColumn;       /* Next word parsed must be in this column */
sl@0
   996
  int dfltColumn;       /* The default column */
sl@0
   997
} Query;
sl@0
   998
sl@0
   999
sl@0
  1000
/*
sl@0
  1001
** An instance of the following structure keeps track of generated
sl@0
  1002
** matching-word offset information and snippets.
sl@0
  1003
*/
sl@0
  1004
typedef struct Snippet {
sl@0
  1005
  int nMatch;     /* Total number of matches */
sl@0
  1006
  int nAlloc;     /* Space allocated for aMatch[] */
sl@0
  1007
  struct snippetMatch { /* One entry for each matching term */
sl@0
  1008
    char snStatus;       /* Status flag for use while constructing snippets */
sl@0
  1009
    short int iCol;      /* The column that contains the match */
sl@0
  1010
    short int iTerm;     /* The index in Query.pTerms[] of the matching term */
sl@0
  1011
    short int nByte;     /* Number of bytes in the term */
sl@0
  1012
    int iStart;          /* The offset to the first character of the term */
sl@0
  1013
  } *aMatch;      /* Points to space obtained from malloc */
sl@0
  1014
  char *zOffset;  /* Text rendering of aMatch[] */
sl@0
  1015
  int nOffset;    /* strlen(zOffset) */
sl@0
  1016
  char *zSnippet; /* Snippet text */
sl@0
  1017
  int nSnippet;   /* strlen(zSnippet) */
sl@0
  1018
} Snippet;
sl@0
  1019
sl@0
  1020
sl@0
  1021
typedef enum QueryType {
sl@0
  1022
  QUERY_GENERIC,   /* table scan */
sl@0
  1023
  QUERY_ROWID,     /* lookup by rowid */
sl@0
  1024
  QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
sl@0
  1025
} QueryType;
sl@0
  1026
sl@0
  1027
/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
sl@0
  1028
** before we start aggregating into larger segments.  Lower CHUNK_MAX
sl@0
  1029
** means that for a given input we have more individual segments per
sl@0
  1030
** term, which means more rows in the table and a bigger index (due to
sl@0
  1031
** both more rows and bigger rowids).  But it also reduces the average
sl@0
  1032
** cost of adding new elements to the segment 0 doclist, and it seems
sl@0
  1033
** to reduce the number of pages read and written during inserts.  256
sl@0
  1034
** was chosen by measuring insertion times for a certain input (first
sl@0
  1035
** 10k documents of Enron corpus), though including query performance
sl@0
  1036
** in the decision may argue for a larger value.
sl@0
  1037
*/
sl@0
  1038
#define CHUNK_MAX 256
sl@0
  1039
sl@0
  1040
typedef enum fulltext_statement {
sl@0
  1041
  CONTENT_INSERT_STMT,
sl@0
  1042
  CONTENT_SELECT_STMT,
sl@0
  1043
  CONTENT_UPDATE_STMT,
sl@0
  1044
  CONTENT_DELETE_STMT,
sl@0
  1045
sl@0
  1046
  TERM_SELECT_STMT,
sl@0
  1047
  TERM_SELECT_ALL_STMT,
sl@0
  1048
  TERM_INSERT_STMT,
sl@0
  1049
  TERM_UPDATE_STMT,
sl@0
  1050
  TERM_DELETE_STMT,
sl@0
  1051
sl@0
  1052
  MAX_STMT                     /* Always at end! */
sl@0
  1053
} fulltext_statement;
sl@0
  1054
sl@0
  1055
/* These must exactly match the enum above. */
sl@0
  1056
/* TODO(adam): Is there some risk that a statement (in particular,
sl@0
  1057
** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
sl@0
  1058
** query joins a virtual table to itself?  If so perhaps we should
sl@0
  1059
** move some of these to the cursor object.
sl@0
  1060
*/
sl@0
  1061
static const char *const fulltext_zStatement[MAX_STMT] = {
sl@0
  1062
  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
sl@0
  1063
  /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
sl@0
  1064
  /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
sl@0
  1065
  /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
sl@0
  1066
sl@0
  1067
  /* TERM_SELECT */
sl@0
  1068
  "select rowid, doclist from %_term where term = ? and segment = ?",
sl@0
  1069
  /* TERM_SELECT_ALL */
sl@0
  1070
  "select doclist from %_term where term = ? order by segment",
sl@0
  1071
  /* TERM_INSERT */
sl@0
  1072
  "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
sl@0
  1073
  /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
sl@0
  1074
  /* TERM_DELETE */ "delete from %_term where rowid = ?",
sl@0
  1075
};
sl@0
  1076
sl@0
  1077
/*
sl@0
  1078
** A connection to a fulltext index is an instance of the following
sl@0
  1079
** structure.  The xCreate and xConnect methods create an instance
sl@0
  1080
** of this structure and xDestroy and xDisconnect free that instance.
sl@0
  1081
** All other methods receive a pointer to the structure as one of their
sl@0
  1082
** arguments.
sl@0
  1083
*/
sl@0
  1084
struct fulltext_vtab {
sl@0
  1085
  sqlite3_vtab base;               /* Base class used by SQLite core */
sl@0
  1086
  sqlite3 *db;                     /* The database connection */
sl@0
  1087
  const char *zDb;                 /* logical database name */
sl@0
  1088
  const char *zName;               /* virtual table name */
sl@0
  1089
  int nColumn;                     /* number of columns in virtual table */
sl@0
  1090
  char **azColumn;                 /* column names.  malloced */
sl@0
  1091
  char **azContentColumn;          /* column names in content table; malloced */
sl@0
  1092
  sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
sl@0
  1093
sl@0
  1094
  /* Precompiled statements which we keep as long as the table is
sl@0
  1095
  ** open.
sl@0
  1096
  */
sl@0
  1097
  sqlite3_stmt *pFulltextStatements[MAX_STMT];
sl@0
  1098
};
sl@0
  1099
sl@0
  1100
/*
sl@0
  1101
** When the core wants to do a query, it create a cursor using a
sl@0
  1102
** call to xOpen.  This structure is an instance of a cursor.  It
sl@0
  1103
** is destroyed by xClose.
sl@0
  1104
*/
sl@0
  1105
typedef struct fulltext_cursor {
sl@0
  1106
  sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
sl@0
  1107
  QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
sl@0
  1108
  sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
sl@0
  1109
  int eof;                         /* True if at End Of Results */
sl@0
  1110
  Query q;                         /* Parsed query string */
sl@0
  1111
  Snippet snippet;                 /* Cached snippet for the current row */
sl@0
  1112
  int iColumn;                     /* Column being searched */
sl@0
  1113
  DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
sl@0
  1114
} fulltext_cursor;
sl@0
  1115
sl@0
  1116
static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
sl@0
  1117
  return (fulltext_vtab *) c->base.pVtab;
sl@0
  1118
}
sl@0
  1119
sl@0
  1120
static const sqlite3_module fulltextModule;   /* forward declaration */
sl@0
  1121
sl@0
  1122
/* Append a list of strings separated by commas to a StringBuffer. */
sl@0
  1123
static void appendList(StringBuffer *sb, int nString, char **azString){
sl@0
  1124
  int i;
sl@0
  1125
  for(i=0; i<nString; ++i){
sl@0
  1126
    if( i>0 ) append(sb, ", ");
sl@0
  1127
    append(sb, azString[i]);
sl@0
  1128
  }
sl@0
  1129
}
sl@0
  1130
sl@0
  1131
/* Return a dynamically generated statement of the form
sl@0
  1132
 *   insert into %_content (rowid, ...) values (?, ...)
sl@0
  1133
 */
sl@0
  1134
static const char *contentInsertStatement(fulltext_vtab *v){
sl@0
  1135
  StringBuffer sb;
sl@0
  1136
  int i;
sl@0
  1137
sl@0
  1138
  initStringBuffer(&sb);
sl@0
  1139
  append(&sb, "insert into %_content (rowid, ");
sl@0
  1140
  appendList(&sb, v->nColumn, v->azContentColumn);
sl@0
  1141
  append(&sb, ") values (?");
sl@0
  1142
  for(i=0; i<v->nColumn; ++i)
sl@0
  1143
    append(&sb, ", ?");
sl@0
  1144
  append(&sb, ")");
sl@0
  1145
  return sb.s;
sl@0
  1146
}
sl@0
  1147
sl@0
  1148
/* Return a dynamically generated statement of the form
sl@0
  1149
 *   update %_content set [col_0] = ?, [col_1] = ?, ...
sl@0
  1150
 *                    where rowid = ?
sl@0
  1151
 */
sl@0
  1152
static const char *contentUpdateStatement(fulltext_vtab *v){
sl@0
  1153
  StringBuffer sb;
sl@0
  1154
  int i;
sl@0
  1155
sl@0
  1156
  initStringBuffer(&sb);
sl@0
  1157
  append(&sb, "update %_content set ");
sl@0
  1158
  for(i=0; i<v->nColumn; ++i) {
sl@0
  1159
    if( i>0 ){
sl@0
  1160
      append(&sb, ", ");
sl@0
  1161
    }
sl@0
  1162
    append(&sb, v->azContentColumn[i]);
sl@0
  1163
    append(&sb, " = ?");
sl@0
  1164
  }
sl@0
  1165
  append(&sb, " where rowid = ?");
sl@0
  1166
  return sb.s;
sl@0
  1167
}
sl@0
  1168
sl@0
  1169
/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
sl@0
  1170
** If the indicated statement has never been prepared, it is prepared
sl@0
  1171
** and cached, otherwise the cached version is reset.
sl@0
  1172
*/
sl@0
  1173
static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
sl@0
  1174
                             sqlite3_stmt **ppStmt){
sl@0
  1175
  assert( iStmt<MAX_STMT );
sl@0
  1176
  if( v->pFulltextStatements[iStmt]==NULL ){
sl@0
  1177
    const char *zStmt;
sl@0
  1178
    int rc;
sl@0
  1179
    switch( iStmt ){
sl@0
  1180
      case CONTENT_INSERT_STMT:
sl@0
  1181
        zStmt = contentInsertStatement(v); break;
sl@0
  1182
      case CONTENT_UPDATE_STMT:
sl@0
  1183
        zStmt = contentUpdateStatement(v); break;
sl@0
  1184
      default:
sl@0
  1185
        zStmt = fulltext_zStatement[iStmt];
sl@0
  1186
    }
sl@0
  1187
    rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
sl@0
  1188
                         zStmt);
sl@0
  1189
    if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
sl@0
  1190
    if( rc!=SQLITE_OK ) return rc;
sl@0
  1191
  } else {
sl@0
  1192
    int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
sl@0
  1193
    if( rc!=SQLITE_OK ) return rc;
sl@0
  1194
  }
sl@0
  1195
sl@0
  1196
  *ppStmt = v->pFulltextStatements[iStmt];
sl@0
  1197
  return SQLITE_OK;
sl@0
  1198
}
sl@0
  1199
sl@0
  1200
/* Step the indicated statement, handling errors SQLITE_BUSY (by
sl@0
  1201
** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
sl@0
  1202
** bindings to the new statement).
sl@0
  1203
** TODO(adam): We should extend this function so that it can work with
sl@0
  1204
** statements declared locally, not only globally cached statements.
sl@0
  1205
*/
sl@0
  1206
static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
sl@0
  1207
                              sqlite3_stmt **ppStmt){
sl@0
  1208
  int rc;
sl@0
  1209
  sqlite3_stmt *s = *ppStmt;
sl@0
  1210
  assert( iStmt<MAX_STMT );
sl@0
  1211
  assert( s==v->pFulltextStatements[iStmt] );
sl@0
  1212
sl@0
  1213
  while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
sl@0
  1214
    if( rc==SQLITE_BUSY ) continue;
sl@0
  1215
    if( rc!=SQLITE_ERROR ) return rc;
sl@0
  1216
sl@0
  1217
    /* If an SQLITE_SCHEMA error has occured, then finalizing this
sl@0
  1218
     * statement is going to delete the fulltext_vtab structure. If
sl@0
  1219
     * the statement just executed is in the pFulltextStatements[]
sl@0
  1220
     * array, it will be finalized twice. So remove it before
sl@0
  1221
     * calling sqlite3_finalize().
sl@0
  1222
     */
sl@0
  1223
    v->pFulltextStatements[iStmt] = NULL;
sl@0
  1224
    rc = sqlite3_finalize(s);
sl@0
  1225
    break;
sl@0
  1226
  }
sl@0
  1227
  return rc;
sl@0
  1228
sl@0
  1229
 err:
sl@0
  1230
  sqlite3_finalize(s);
sl@0
  1231
  return rc;
sl@0
  1232
}
sl@0
  1233
sl@0
  1234
/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
sl@0
  1235
** Useful for statements like UPDATE, where we expect no results.
sl@0
  1236
*/
sl@0
  1237
static int sql_single_step_statement(fulltext_vtab *v,
sl@0
  1238
                                     fulltext_statement iStmt,
sl@0
  1239
                                     sqlite3_stmt **ppStmt){
sl@0
  1240
  int rc = sql_step_statement(v, iStmt, ppStmt);
sl@0
  1241
  return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
sl@0
  1242
}
sl@0
  1243
sl@0
  1244
/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
sl@0
  1245
static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
sl@0
  1246
                          sqlite3_value **pValues){
sl@0
  1247
  sqlite3_stmt *s;
sl@0
  1248
  int i;
sl@0
  1249
  int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
sl@0
  1250
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1251
sl@0
  1252
  rc = sqlite3_bind_value(s, 1, rowid);
sl@0
  1253
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1254
sl@0
  1255
  for(i=0; i<v->nColumn; ++i){
sl@0
  1256
    rc = sqlite3_bind_value(s, 2+i, pValues[i]);
sl@0
  1257
    if( rc!=SQLITE_OK ) return rc;
sl@0
  1258
  }
sl@0
  1259
sl@0
  1260
  return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
sl@0
  1261
}
sl@0
  1262
sl@0
  1263
/* update %_content set col0 = pValues[0], col1 = pValues[1], ...
sl@0
  1264
 *                  where rowid = [iRowid] */
sl@0
  1265
static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
sl@0
  1266
                          sqlite_int64 iRowid){
sl@0
  1267
  sqlite3_stmt *s;
sl@0
  1268
  int i;
sl@0
  1269
  int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
sl@0
  1270
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1271
sl@0
  1272
  for(i=0; i<v->nColumn; ++i){
sl@0
  1273
    rc = sqlite3_bind_value(s, 1+i, pValues[i]);
sl@0
  1274
    if( rc!=SQLITE_OK ) return rc;
sl@0
  1275
  }
sl@0
  1276
sl@0
  1277
  rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
sl@0
  1278
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1279
sl@0
  1280
  return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
sl@0
  1281
}
sl@0
  1282
sl@0
  1283
static void freeStringArray(int nString, const char **pString){
sl@0
  1284
  int i;
sl@0
  1285
sl@0
  1286
  for (i=0 ; i < nString ; ++i) {
sl@0
  1287
    if( pString[i]!=NULL ) free((void *) pString[i]);
sl@0
  1288
  }
sl@0
  1289
  free((void *) pString);
sl@0
  1290
}
sl@0
  1291
sl@0
  1292
/* select * from %_content where rowid = [iRow]
sl@0
  1293
 * The caller must delete the returned array and all strings in it.
sl@0
  1294
 * null fields will be NULL in the returned array.
sl@0
  1295
 *
sl@0
  1296
 * TODO: Perhaps we should return pointer/length strings here for consistency
sl@0
  1297
 * with other code which uses pointer/length. */
sl@0
  1298
static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
sl@0
  1299
                          const char ***pValues){
sl@0
  1300
  sqlite3_stmt *s;
sl@0
  1301
  const char **values;
sl@0
  1302
  int i;
sl@0
  1303
  int rc;
sl@0
  1304
sl@0
  1305
  *pValues = NULL;
sl@0
  1306
sl@0
  1307
  rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
sl@0
  1308
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1309
sl@0
  1310
  rc = sqlite3_bind_int64(s, 1, iRow);
sl@0
  1311
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1312
sl@0
  1313
  rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
sl@0
  1314
  if( rc!=SQLITE_ROW ) return rc;
sl@0
  1315
sl@0
  1316
  values = (const char **) malloc(v->nColumn * sizeof(const char *));
sl@0
  1317
  for(i=0; i<v->nColumn; ++i){
sl@0
  1318
    if( sqlite3_column_type(s, i)==SQLITE_NULL ){
sl@0
  1319
      values[i] = NULL;
sl@0
  1320
    }else{
sl@0
  1321
      values[i] = string_dup((char*)sqlite3_column_text(s, i));
sl@0
  1322
    }
sl@0
  1323
  }
sl@0
  1324
sl@0
  1325
  /* We expect only one row.  We must execute another sqlite3_step()
sl@0
  1326
   * to complete the iteration; otherwise the table will remain locked. */
sl@0
  1327
  rc = sqlite3_step(s);
sl@0
  1328
  if( rc==SQLITE_DONE ){
sl@0
  1329
    *pValues = values;
sl@0
  1330
    return SQLITE_OK;
sl@0
  1331
  }
sl@0
  1332
sl@0
  1333
  freeStringArray(v->nColumn, values);
sl@0
  1334
  return rc;
sl@0
  1335
}
sl@0
  1336
sl@0
  1337
/* delete from %_content where rowid = [iRow ] */
sl@0
  1338
static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
sl@0
  1339
  sqlite3_stmt *s;
sl@0
  1340
  int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
sl@0
  1341
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1342
sl@0
  1343
  rc = sqlite3_bind_int64(s, 1, iRow);
sl@0
  1344
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1345
sl@0
  1346
  return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
sl@0
  1347
}
sl@0
  1348
sl@0
  1349
/* select rowid, doclist from %_term
sl@0
  1350
 *  where term = [pTerm] and segment = [iSegment]
sl@0
  1351
 * If found, returns SQLITE_ROW; the caller must free the
sl@0
  1352
 * returned doclist.  If no rows found, returns SQLITE_DONE. */
sl@0
  1353
static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
sl@0
  1354
                       int iSegment,
sl@0
  1355
                       sqlite_int64 *rowid, DocList *out){
sl@0
  1356
  sqlite3_stmt *s;
sl@0
  1357
  int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
sl@0
  1358
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1359
sl@0
  1360
  rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
sl@0
  1361
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1362
sl@0
  1363
  rc = sqlite3_bind_int(s, 2, iSegment);
sl@0
  1364
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1365
sl@0
  1366
  rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
sl@0
  1367
  if( rc!=SQLITE_ROW ) return rc;
sl@0
  1368
sl@0
  1369
  *rowid = sqlite3_column_int64(s, 0);
sl@0
  1370
  docListInit(out, DL_DEFAULT,
sl@0
  1371
              sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
sl@0
  1372
sl@0
  1373
  /* We expect only one row.  We must execute another sqlite3_step()
sl@0
  1374
   * to complete the iteration; otherwise the table will remain locked. */
sl@0
  1375
  rc = sqlite3_step(s);
sl@0
  1376
  return rc==SQLITE_DONE ? SQLITE_ROW : rc;
sl@0
  1377
}
sl@0
  1378
sl@0
  1379
/* Load the segment doclists for term pTerm and merge them in
sl@0
  1380
** appropriate order into out.  Returns SQLITE_OK if successful.  If
sl@0
  1381
** there are no segments for pTerm, successfully returns an empty
sl@0
  1382
** doclist in out.
sl@0
  1383
**
sl@0
  1384
** Each document consists of 1 or more "columns".  The number of
sl@0
  1385
** columns is v->nColumn.  If iColumn==v->nColumn, then return
sl@0
  1386
** position information about all columns.  If iColumn<v->nColumn,
sl@0
  1387
** then only return position information about the iColumn-th column
sl@0
  1388
** (where the first column is 0).
sl@0
  1389
*/
sl@0
  1390
static int term_select_all(
sl@0
  1391
  fulltext_vtab *v,     /* The fulltext index we are querying against */
sl@0
  1392
  int iColumn,          /* If <nColumn, only look at the iColumn-th column */
sl@0
  1393
  const char *pTerm,    /* The term whose posting lists we want */
sl@0
  1394
  int nTerm,            /* Number of bytes in pTerm */
sl@0
  1395
  DocList *out          /* Write the resulting doclist here */
sl@0
  1396
){
sl@0
  1397
  DocList doclist;
sl@0
  1398
  sqlite3_stmt *s;
sl@0
  1399
  int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
sl@0
  1400
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1401
sl@0
  1402
  rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
sl@0
  1403
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1404
sl@0
  1405
  docListInit(&doclist, DL_DEFAULT, 0, 0);
sl@0
  1406
sl@0
  1407
  /* TODO(shess) Handle schema and busy errors. */
sl@0
  1408
  while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
sl@0
  1409
    DocList old;
sl@0
  1410
sl@0
  1411
    /* TODO(shess) If we processed doclists from oldest to newest, we
sl@0
  1412
    ** could skip the malloc() involved with the following call.  For
sl@0
  1413
    ** now, I'd rather keep this logic similar to index_insert_term().
sl@0
  1414
    ** We could additionally drop elements when we see deletes, but
sl@0
  1415
    ** that would require a distinct version of docListAccumulate().
sl@0
  1416
    */
sl@0
  1417
    docListInit(&old, DL_DEFAULT,
sl@0
  1418
                sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
sl@0
  1419
sl@0
  1420
    if( iColumn<v->nColumn ){   /* querying a single column */
sl@0
  1421
      docListRestrictColumn(&old, iColumn);
sl@0
  1422
    }
sl@0
  1423
sl@0
  1424
    /* doclist contains the newer data, so write it over old.  Then
sl@0
  1425
    ** steal accumulated result for doclist.
sl@0
  1426
    */
sl@0
  1427
    docListAccumulate(&old, &doclist);
sl@0
  1428
    docListDestroy(&doclist);
sl@0
  1429
    doclist = old;
sl@0
  1430
  }
sl@0
  1431
  if( rc!=SQLITE_DONE ){
sl@0
  1432
    docListDestroy(&doclist);
sl@0
  1433
    return rc;
sl@0
  1434
  }
sl@0
  1435
sl@0
  1436
  docListDiscardEmpty(&doclist);
sl@0
  1437
  *out = doclist;
sl@0
  1438
  return SQLITE_OK;
sl@0
  1439
}
sl@0
  1440
sl@0
  1441
/* insert into %_term (rowid, term, segment, doclist)
sl@0
  1442
               values ([piRowid], [pTerm], [iSegment], [doclist])
sl@0
  1443
** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
sl@0
  1444
**
sl@0
  1445
** NOTE(shess) piRowid is IN, with values of "space of int64" plus
sl@0
  1446
** null, it is not used to pass data back to the caller.
sl@0
  1447
*/
sl@0
  1448
static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
sl@0
  1449
                       const char *pTerm, int nTerm,
sl@0
  1450
                       int iSegment, DocList *doclist){
sl@0
  1451
  sqlite3_stmt *s;
sl@0
  1452
  int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
sl@0
  1453
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1454
sl@0
  1455
  if( piRowid==NULL ){
sl@0
  1456
    rc = sqlite3_bind_null(s, 1);
sl@0
  1457
  }else{
sl@0
  1458
    rc = sqlite3_bind_int64(s, 1, *piRowid);
sl@0
  1459
  }
sl@0
  1460
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1461
sl@0
  1462
  rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
sl@0
  1463
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1464
sl@0
  1465
  rc = sqlite3_bind_int(s, 3, iSegment);
sl@0
  1466
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1467
sl@0
  1468
  rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
sl@0
  1469
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1470
sl@0
  1471
  return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
sl@0
  1472
}
sl@0
  1473
sl@0
  1474
/* update %_term set doclist = [doclist] where rowid = [rowid] */
sl@0
  1475
static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
sl@0
  1476
                       DocList *doclist){
sl@0
  1477
  sqlite3_stmt *s;
sl@0
  1478
  int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
sl@0
  1479
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1480
sl@0
  1481
  rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
sl@0
  1482
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1483
sl@0
  1484
  rc = sqlite3_bind_int64(s, 2, rowid);
sl@0
  1485
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1486
sl@0
  1487
  return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
sl@0
  1488
}
sl@0
  1489
sl@0
  1490
static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
sl@0
  1491
  sqlite3_stmt *s;
sl@0
  1492
  int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
sl@0
  1493
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1494
sl@0
  1495
  rc = sqlite3_bind_int64(s, 1, rowid);
sl@0
  1496
  if( rc!=SQLITE_OK ) return rc;
sl@0
  1497
sl@0
  1498
  return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
sl@0
  1499
}
sl@0
  1500
sl@0
  1501
/*
sl@0
  1502
** Free the memory used to contain a fulltext_vtab structure.
sl@0
  1503
*/
sl@0
  1504
static void fulltext_vtab_destroy(fulltext_vtab *v){
sl@0
  1505
  int iStmt, i;
sl@0
  1506
sl@0
  1507
  TRACE(("FTS1 Destroy %p\n", v));
sl@0
  1508
  for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
sl@0
  1509
    if( v->pFulltextStatements[iStmt]!=NULL ){
sl@0
  1510
      sqlite3_finalize(v->pFulltextStatements[iStmt]);
sl@0
  1511
      v->pFulltextStatements[iStmt] = NULL;
sl@0
  1512
    }
sl@0
  1513
  }
sl@0
  1514
sl@0
  1515
  if( v->pTokenizer!=NULL ){
sl@0
  1516
    v->pTokenizer->pModule->xDestroy(v->pTokenizer);
sl@0
  1517
    v->pTokenizer = NULL;
sl@0
  1518
  }
sl@0
  1519
  
sl@0
  1520
  free(v->azColumn);
sl@0
  1521
  for(i = 0; i < v->nColumn; ++i) {
sl@0
  1522
    sqlite3_free(v->azContentColumn[i]);
sl@0
  1523
  }
sl@0
  1524
  free(v->azContentColumn);
sl@0
  1525
  free(v);
sl@0
  1526
}
sl@0
  1527
sl@0
  1528
/*
sl@0
  1529
** Token types for parsing the arguments to xConnect or xCreate.
sl@0
  1530
*/
sl@0
  1531
#define TOKEN_EOF         0    /* End of file */
sl@0
  1532
#define TOKEN_SPACE       1    /* Any kind of whitespace */
sl@0
  1533
#define TOKEN_ID          2    /* An identifier */
sl@0
  1534
#define TOKEN_STRING      3    /* A string literal */
sl@0
  1535
#define TOKEN_PUNCT       4    /* A single punctuation character */
sl@0
  1536
sl@0
  1537
/*
sl@0
  1538
** If X is a character that can be used in an identifier then
sl@0
  1539
** IdChar(X) will be true.  Otherwise it is false.
sl@0
  1540
**
sl@0
  1541
** For ASCII, any character with the high-order bit set is
sl@0
  1542
** allowed in an identifier.  For 7-bit characters, 
sl@0
  1543
** sqlite3IsIdChar[X] must be 1.
sl@0
  1544
**
sl@0
  1545
** Ticket #1066.  the SQL standard does not allow '$' in the
sl@0
  1546
** middle of identfiers.  But many SQL implementations do. 
sl@0
  1547
** SQLite will allow '$' in identifiers for compatibility.
sl@0
  1548
** But the feature is undocumented.
sl@0
  1549
*/
sl@0
  1550
static const char isIdChar[] = {
sl@0
  1551
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
sl@0
  1552
    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
sl@0
  1553
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
sl@0
  1554
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
sl@0
  1555
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
sl@0
  1556
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
sl@0
  1557
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
sl@0
  1558
};
sl@0
  1559
#define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
sl@0
  1560
sl@0
  1561
sl@0
  1562
/*
sl@0
  1563
** Return the length of the token that begins at z[0]. 
sl@0
  1564
** Store the token type in *tokenType before returning.
sl@0
  1565
*/
sl@0
  1566
static int getToken(const char *z, int *tokenType){
sl@0
  1567
  int i, c;
sl@0
  1568
  switch( *z ){
sl@0
  1569
    case 0: {
sl@0
  1570
      *tokenType = TOKEN_EOF;
sl@0
  1571
      return 0;
sl@0
  1572
    }
sl@0
  1573
    case ' ': case '\t': case '\n': case '\f': case '\r': {
sl@0
  1574
      for(i=1; safe_isspace(z[i]); i++){}
sl@0
  1575
      *tokenType = TOKEN_SPACE;
sl@0
  1576
      return i;
sl@0
  1577
    }
sl@0
  1578
    case '`':
sl@0
  1579
    case '\'':
sl@0
  1580
    case '"': {
sl@0
  1581
      int delim = z[0];
sl@0
  1582
      for(i=1; (c=z[i])!=0; i++){
sl@0
  1583
        if( c==delim ){
sl@0
  1584
          if( z[i+1]==delim ){
sl@0
  1585
            i++;
sl@0
  1586
          }else{
sl@0
  1587
            break;
sl@0
  1588
          }
sl@0
  1589
        }
sl@0
  1590
      }
sl@0
  1591
      *tokenType = TOKEN_STRING;
sl@0
  1592
      return i + (c!=0);
sl@0
  1593
    }
sl@0
  1594
    case '[': {
sl@0
  1595
      for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
sl@0
  1596
      *tokenType = TOKEN_ID;
sl@0
  1597
      return i;
sl@0
  1598
    }
sl@0
  1599
    default: {
sl@0
  1600
      if( !IdChar(*z) ){
sl@0
  1601
        break;
sl@0
  1602
      }
sl@0
  1603
      for(i=1; IdChar(z[i]); i++){}
sl@0
  1604
      *tokenType = TOKEN_ID;
sl@0
  1605
      return i;
sl@0
  1606
    }
sl@0
  1607
  }
sl@0
  1608
  *tokenType = TOKEN_PUNCT;
sl@0
  1609
  return 1;
sl@0
  1610
}
sl@0
  1611
sl@0
  1612
/*
sl@0
  1613
** A token extracted from a string is an instance of the following
sl@0
  1614
** structure.
sl@0
  1615
*/
sl@0
  1616
typedef struct Token {
sl@0
  1617
  const char *z;       /* Pointer to token text.  Not '\000' terminated */
sl@0
  1618
  short int n;         /* Length of the token text in bytes. */
sl@0
  1619
} Token;
sl@0
  1620
sl@0
  1621
/*
sl@0
  1622
** Given a input string (which is really one of the argv[] parameters
sl@0
  1623
** passed into xConnect or xCreate) split the string up into tokens.
sl@0
  1624
** Return an array of pointers to '\000' terminated strings, one string
sl@0
  1625
** for each non-whitespace token.
sl@0
  1626
**
sl@0
  1627
** The returned array is terminated by a single NULL pointer.
sl@0
  1628
**
sl@0
  1629
** Space to hold the returned array is obtained from a single
sl@0
  1630
** malloc and should be freed by passing the return value to free().
sl@0
  1631
** The individual strings within the token list are all a part of
sl@0
  1632
** the single memory allocation and will all be freed at once.
sl@0
  1633
*/
sl@0
  1634
static char **tokenizeString(const char *z, int *pnToken){
sl@0
  1635
  int nToken = 0;
sl@0
  1636
  Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
sl@0
  1637
  int n = 1;
sl@0
  1638
  int e, i;
sl@0
  1639
  int totalSize = 0;
sl@0
  1640
  char **azToken;
sl@0
  1641
  char *zCopy;
sl@0
  1642
  while( n>0 ){
sl@0
  1643
    n = getToken(z, &e);
sl@0
  1644
    if( e!=TOKEN_SPACE ){
sl@0
  1645
      aToken[nToken].z = z;
sl@0
  1646
      aToken[nToken].n = n;
sl@0
  1647
      nToken++;
sl@0
  1648
      totalSize += n+1;
sl@0
  1649
    }
sl@0
  1650
    z += n;
sl@0
  1651
  }
sl@0
  1652
  azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
sl@0
  1653
  zCopy = (char*)&azToken[nToken];
sl@0
  1654
  nToken--;
sl@0
  1655
  for(i=0; i<nToken; i++){
sl@0
  1656
    azToken[i] = zCopy;
sl@0
  1657
    n = aToken[i].n;
sl@0
  1658
    memcpy(zCopy, aToken[i].z, n);
sl@0
  1659
    zCopy[n] = 0;
sl@0
  1660
    zCopy += n+1;
sl@0
  1661
  }
sl@0
  1662
  azToken[nToken] = 0;
sl@0
  1663
  free(aToken);
sl@0
  1664
  *pnToken = nToken;
sl@0
  1665
  return azToken;
sl@0
  1666
}
sl@0
  1667
sl@0
  1668
/*
sl@0
  1669
** Convert an SQL-style quoted string into a normal string by removing
sl@0
  1670
** the quote characters.  The conversion is done in-place.  If the
sl@0
  1671
** input does not begin with a quote character, then this routine
sl@0
  1672
** is a no-op.
sl@0
  1673
**
sl@0
  1674
** Examples:
sl@0
  1675
**
sl@0
  1676
**     "abc"   becomes   abc
sl@0
  1677
**     'xyz'   becomes   xyz
sl@0
  1678
**     [pqr]   becomes   pqr
sl@0
  1679
**     `mno`   becomes   mno
sl@0
  1680
*/
sl@0
  1681
static void dequoteString(char *z){
sl@0
  1682
  int quote;
sl@0
  1683
  int i, j;
sl@0
  1684
  if( z==0 ) return;
sl@0
  1685
  quote = z[0];
sl@0
  1686
  switch( quote ){
sl@0
  1687
    case '\'':  break;
sl@0
  1688
    case '"':   break;
sl@0
  1689
    case '`':   break;                /* For MySQL compatibility */
sl@0
  1690
    case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
sl@0
  1691
    default:    return;
sl@0
  1692
  }
sl@0
  1693
  for(i=1, j=0; z[i]; i++){
sl@0
  1694
    if( z[i]==quote ){
sl@0
  1695
      if( z[i+1]==quote ){
sl@0
  1696
        z[j++] = quote;
sl@0
  1697
        i++;
sl@0
  1698
      }else{
sl@0
  1699
        z[j++] = 0;
sl@0
  1700
        break;
sl@0
  1701
      }
sl@0
  1702
    }else{
sl@0
  1703
      z[j++] = z[i];
sl@0
  1704
    }
sl@0
  1705
  }
sl@0
  1706
}
sl@0
  1707
sl@0
  1708
/*
sl@0
  1709
** The input azIn is a NULL-terminated list of tokens.  Remove the first
sl@0
  1710
** token and all punctuation tokens.  Remove the quotes from
sl@0
  1711
** around string literal tokens.
sl@0
  1712
**
sl@0
  1713
** Example:
sl@0
  1714
**
sl@0
  1715
**     input:      tokenize chinese ( 'simplifed' , 'mixed' )
sl@0
  1716
**     output:     chinese simplifed mixed
sl@0
  1717
**
sl@0
  1718
** Another example:
sl@0
  1719
**
sl@0
  1720
**     input:      delimiters ( '[' , ']' , '...' )
sl@0
  1721
**     output:     [ ] ...
sl@0
  1722
*/
sl@0
  1723
static void tokenListToIdList(char **azIn){
sl@0
  1724
  int i, j;
sl@0
  1725
  if( azIn ){
sl@0
  1726
    for(i=0, j=-1; azIn[i]; i++){
sl@0
  1727
      if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
sl@0
  1728
        dequoteString(azIn[i]);
sl@0
  1729
        if( j>=0 ){
sl@0
  1730
          azIn[j] = azIn[i];
sl@0
  1731
        }
sl@0
  1732
        j++;
sl@0
  1733
      }
sl@0
  1734
    }
sl@0
  1735
    azIn[j] = 0;
sl@0
  1736
  }
sl@0
  1737
}
sl@0
  1738
sl@0
  1739
sl@0
  1740
/*
sl@0
  1741
** Find the first alphanumeric token in the string zIn.  Null-terminate
sl@0
  1742
** this token.  Remove any quotation marks.  And return a pointer to
sl@0
  1743
** the result.
sl@0
  1744
*/
sl@0
  1745
static char *firstToken(char *zIn, char **pzTail){
sl@0
  1746
  int n, ttype;
sl@0
  1747
  while(1){
sl@0
  1748
    n = getToken(zIn, &ttype);
sl@0
  1749
    if( ttype==TOKEN_SPACE ){
sl@0
  1750
      zIn += n;
sl@0
  1751
    }else if( ttype==TOKEN_EOF ){
sl@0
  1752
      *pzTail = zIn;
sl@0
  1753
      return 0;
sl@0
  1754
    }else{
sl@0
  1755
      zIn[n] = 0;
sl@0
  1756
      *pzTail = &zIn[1];
sl@0
  1757
      dequoteString(zIn);
sl@0
  1758
      return zIn;
sl@0
  1759
    }
sl@0
  1760
  }
sl@0
  1761
  /*NOTREACHED*/
sl@0
  1762
}
sl@0
  1763
sl@0
  1764
/* Return true if...
sl@0
  1765
**
sl@0
  1766
**   *  s begins with the string t, ignoring case
sl@0
  1767
**   *  s is longer than t
sl@0
  1768
**   *  The first character of s beyond t is not a alphanumeric
sl@0
  1769
** 
sl@0
  1770
** Ignore leading space in *s.
sl@0
  1771
**
sl@0
  1772
** To put it another way, return true if the first token of
sl@0
  1773
** s[] is t[].
sl@0
  1774
*/
sl@0
  1775
static int startsWith(const char *s, const char *t){
sl@0
  1776
  while( safe_isspace(*s) ){ s++; }
sl@0
  1777
  while( *t ){
sl@0
  1778
    if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
sl@0
  1779
  }
sl@0
  1780
  return *s!='_' && !safe_isalnum(*s);
sl@0
  1781
}
sl@0
  1782
sl@0
  1783
/*
sl@0
  1784
** An instance of this structure defines the "spec" of a
sl@0
  1785
** full text index.  This structure is populated by parseSpec
sl@0
  1786
** and use by fulltextConnect and fulltextCreate.
sl@0
  1787
*/
sl@0
  1788
typedef struct TableSpec {
sl@0
  1789
  const char *zDb;         /* Logical database name */
sl@0
  1790
  const char *zName;       /* Name of the full-text index */
sl@0
  1791
  int nColumn;             /* Number of columns to be indexed */
sl@0
  1792
  char **azColumn;         /* Original names of columns to be indexed */
sl@0
  1793
  char **azContentColumn;  /* Column names for %_content */
sl@0
  1794
  char **azTokenizer;      /* Name of tokenizer and its arguments */
sl@0
  1795
} TableSpec;
sl@0
  1796
sl@0
  1797
/*
sl@0
  1798
** Reclaim all of the memory used by a TableSpec
sl@0
  1799
*/
sl@0
  1800
static void clearTableSpec(TableSpec *p) {
sl@0
  1801
  free(p->azColumn);
sl@0
  1802
  free(p->azContentColumn);
sl@0
  1803
  free(p->azTokenizer);
sl@0
  1804
}
sl@0
  1805
sl@0
  1806
/* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
sl@0
  1807
 *
sl@0
  1808
 * CREATE VIRTUAL TABLE email
sl@0
  1809
 *        USING fts1(subject, body, tokenize mytokenizer(myarg))
sl@0
  1810
 *
sl@0
  1811
 * We return parsed information in a TableSpec structure.
sl@0
  1812
 * 
sl@0
  1813
 */
sl@0
  1814
static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
sl@0
  1815
                     char**pzErr){
sl@0
  1816
  int i, n;
sl@0
  1817
  char *z, *zDummy;
sl@0
  1818
  char **azArg;
sl@0
  1819
  const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
sl@0
  1820
sl@0
  1821
  assert( argc>=3 );
sl@0
  1822
  /* Current interface:
sl@0
  1823
  ** argv[0] - module name
sl@0
  1824
  ** argv[1] - database name
sl@0
  1825
  ** argv[2] - table name
sl@0
  1826
  ** argv[3..] - columns, optionally followed by tokenizer specification
sl@0
  1827
  **             and snippet delimiters specification.
sl@0
  1828
  */
sl@0
  1829
sl@0
  1830
  /* Make a copy of the complete argv[][] array in a single allocation.
sl@0
  1831
  ** The argv[][] array is read-only and transient.  We can write to the
sl@0
  1832
  ** copy in order to modify things and the copy is persistent.
sl@0
  1833
  */
sl@0
  1834
  memset(pSpec, 0, sizeof(*pSpec));
sl@0
  1835
  for(i=n=0; i<argc; i++){
sl@0
  1836
    n += strlen(argv[i]) + 1;
sl@0
  1837
  }
sl@0
  1838
  azArg = malloc( sizeof(char*)*argc + n );
sl@0
  1839
  if( azArg==0 ){
sl@0
  1840
    return SQLITE_NOMEM;
sl@0
  1841
  }
sl@0
  1842
  z = (char*)&azArg[argc];
sl@0
  1843
  for(i=0; i<argc; i++){
sl@0
  1844
    azArg[i] = z;
sl@0
  1845
    strcpy(z, argv[i]);
sl@0
  1846
    z += strlen(z)+1;
sl@0
  1847
  }
sl@0
  1848
sl@0
  1849
  /* Identify the column names and the tokenizer and delimiter arguments
sl@0
  1850
  ** in the argv[][] array.
sl@0
  1851
  */
sl@0
  1852
  pSpec->zDb = azArg[1];
sl@0
  1853
  pSpec->zName = azArg[2];
sl@0
  1854
  pSpec->nColumn = 0;
sl@0
  1855
  pSpec->azColumn = azArg;
sl@0
  1856
  zTokenizer = "tokenize simple";
sl@0
  1857
  for(i=3; i<argc; ++i){
sl@0
  1858
    if( startsWith(azArg[i],"tokenize") ){
sl@0
  1859
      zTokenizer = azArg[i];
sl@0
  1860
    }else{
sl@0
  1861
      z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
sl@0
  1862
      pSpec->nColumn++;
sl@0
  1863
    }
sl@0
  1864
  }
sl@0
  1865
  if( pSpec->nColumn==0 ){
sl@0
  1866
    azArg[0] = "content";
sl@0
  1867
    pSpec->nColumn = 1;
sl@0
  1868
  }
sl@0
  1869
sl@0
  1870
  /*
sl@0
  1871
  ** Construct the list of content column names.
sl@0
  1872
  **
sl@0
  1873
  ** Each content column name will be of the form cNNAAAA
sl@0
  1874
  ** where NN is the column number and AAAA is the sanitized
sl@0
  1875
  ** column name.  "sanitized" means that special characters are
sl@0
  1876
  ** converted to "_".  The cNN prefix guarantees that all column
sl@0
  1877
  ** names are unique.
sl@0
  1878
  **
sl@0
  1879
  ** The AAAA suffix is not strictly necessary.  It is included
sl@0
  1880
  ** for the convenience of people who might examine the generated
sl@0
  1881
  ** %_content table and wonder what the columns are used for.
sl@0
  1882
  */
sl@0
  1883
  pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
sl@0
  1884
  if( pSpec->azContentColumn==0 ){
sl@0
  1885
    clearTableSpec(pSpec);
sl@0
  1886
    return SQLITE_NOMEM;
sl@0
  1887
  }
sl@0
  1888
  for(i=0; i<pSpec->nColumn; i++){
sl@0
  1889
    char *p;
sl@0
  1890
    pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
sl@0
  1891
    for (p = pSpec->azContentColumn[i]; *p ; ++p) {
sl@0
  1892
      if( !safe_isalnum(*p) ) *p = '_';
sl@0
  1893
    }
sl@0
  1894
  }
sl@0
  1895
sl@0
  1896
  /*
sl@0
  1897
  ** Parse the tokenizer specification string.
sl@0
  1898
  */
sl@0
  1899
  pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
sl@0
  1900
  tokenListToIdList(pSpec->azTokenizer);
sl@0
  1901
sl@0
  1902
  return SQLITE_OK;
sl@0
  1903
}
sl@0
  1904
sl@0
  1905
/*
sl@0
  1906
** Generate a CREATE TABLE statement that describes the schema of
sl@0
  1907
** the virtual table.  Return a pointer to this schema string.
sl@0
  1908
**
sl@0
  1909
** Space is obtained from sqlite3_mprintf() and should be freed
sl@0
  1910
** using sqlite3_free().
sl@0
  1911
*/
sl@0
  1912
static char *fulltextSchema(
sl@0
  1913
  int nColumn,                  /* Number of columns */
sl@0
  1914
  const char *const* azColumn,  /* List of columns */
sl@0
  1915
  const char *zTableName        /* Name of the table */
sl@0
  1916
){
sl@0
  1917
  int i;
sl@0
  1918
  char *zSchema, *zNext;
sl@0
  1919
  const char *zSep = "(";
sl@0
  1920
  zSchema = sqlite3_mprintf("CREATE TABLE x");
sl@0
  1921
  for(i=0; i<nColumn; i++){
sl@0
  1922
    zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
sl@0
  1923
    sqlite3_free(zSchema);
sl@0
  1924
    zSchema = zNext;
sl@0
  1925
    zSep = ",";
sl@0
  1926
  }
sl@0
  1927
  zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
sl@0
  1928
  sqlite3_free(zSchema);
sl@0
  1929
  return zNext;
sl@0
  1930
}
sl@0
  1931
sl@0
  1932
/*
sl@0
  1933
** Build a new sqlite3_vtab structure that will describe the
sl@0
  1934
** fulltext index defined by spec.
sl@0
  1935
*/
sl@0
  1936
static int constructVtab(
sl@0
  1937
  sqlite3 *db,              /* The SQLite database connection */
sl@0
  1938
  TableSpec *spec,          /* Parsed spec information from parseSpec() */
sl@0
  1939
  sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
sl@0
  1940
  char **pzErr              /* Write any error message here */
sl@0
  1941
){
sl@0
  1942
  int rc;
sl@0
  1943
  int n;
sl@0
  1944
  fulltext_vtab *v = 0;
sl@0
  1945
  const sqlite3_tokenizer_module *m = NULL;
sl@0
  1946
  char *schema;
sl@0
  1947
sl@0
  1948
  v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
sl@0
  1949
  if( v==0 ) return SQLITE_NOMEM;
sl@0
  1950
  memset(v, 0, sizeof(*v));
sl@0
  1951
  /* sqlite will initialize v->base */
sl@0
  1952
  v->db = db;
sl@0
  1953
  v->zDb = spec->zDb;       /* Freed when azColumn is freed */
sl@0
  1954
  v->zName = spec->zName;   /* Freed when azColumn is freed */
sl@0
  1955
  v->nColumn = spec->nColumn;
sl@0
  1956
  v->azContentColumn = spec->azContentColumn;
sl@0
  1957
  spec->azContentColumn = 0;
sl@0
  1958
  v->azColumn = spec->azColumn;
sl@0
  1959
  spec->azColumn = 0;
sl@0
  1960
sl@0
  1961
  if( spec->azTokenizer==0 ){
sl@0
  1962
    return SQLITE_NOMEM;
sl@0
  1963
  }
sl@0
  1964
  /* TODO(shess) For now, add new tokenizers as else if clauses. */
sl@0
  1965
  if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
sl@0
  1966
    sqlite3Fts1SimpleTokenizerModule(&m);
sl@0
  1967
  }else if( startsWith(spec->azTokenizer[0], "porter") ){
sl@0
  1968
    sqlite3Fts1PorterTokenizerModule(&m);
sl@0
  1969
  }else{
sl@0
  1970
    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
sl@0
  1971
    rc = SQLITE_ERROR;
sl@0
  1972
    goto err;
sl@0
  1973
  }
sl@0
  1974
  for(n=0; spec->azTokenizer[n]; n++){}
sl@0
  1975
  if( n ){
sl@0
  1976
    rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
sl@0
  1977
                    &v->pTokenizer);
sl@0
  1978
  }else{
sl@0
  1979
    rc = m->xCreate(0, 0, &v->pTokenizer);
sl@0
  1980
  }
sl@0
  1981
  if( rc!=SQLITE_OK ) goto err;
sl@0
  1982
  v->pTokenizer->pModule = m;
sl@0
  1983
sl@0
  1984
  /* TODO: verify the existence of backing tables foo_content, foo_term */
sl@0
  1985
sl@0
  1986
  schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
sl@0
  1987
                          spec->zName);
sl@0
  1988
  rc = sqlite3_declare_vtab(db, schema);
sl@0
  1989
  sqlite3_free(schema);
sl@0
  1990
  if( rc!=SQLITE_OK ) goto err;
sl@0
  1991
sl@0
  1992
  memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
sl@0
  1993
sl@0
  1994
  *ppVTab = &v->base;
sl@0
  1995
  TRACE(("FTS1 Connect %p\n", v));
sl@0
  1996
sl@0
  1997
  return rc;
sl@0
  1998
sl@0
  1999
err:
sl@0
  2000
  fulltext_vtab_destroy(v);
sl@0
  2001
  return rc;
sl@0
  2002
}
sl@0
  2003
sl@0
  2004
static int fulltextConnect(
sl@0
  2005
  sqlite3 *db,
sl@0
  2006
  void *pAux,
sl@0
  2007
  int argc, const char *const*argv,
sl@0
  2008
  sqlite3_vtab **ppVTab,
sl@0
  2009
  char **pzErr
sl@0
  2010
){
sl@0
  2011
  TableSpec spec;
sl@0
  2012
  int rc = parseSpec(&spec, argc, argv, pzErr);
sl@0
  2013
  if( rc!=SQLITE_OK ) return rc;
sl@0
  2014
sl@0
  2015
  rc = constructVtab(db, &spec, ppVTab, pzErr);
sl@0
  2016
  clearTableSpec(&spec);
sl@0
  2017
  return rc;
sl@0
  2018
}
sl@0
  2019
sl@0
  2020
  /* The %_content table holds the text of each document, with
sl@0
  2021
  ** the rowid used as the docid.
sl@0
  2022
  **
sl@0
  2023
  ** The %_term table maps each term to a document list blob
sl@0
  2024
  ** containing elements sorted by ascending docid, each element
sl@0
  2025
  ** encoded as:
sl@0
  2026
  **
sl@0
  2027
  **   docid varint-encoded
sl@0
  2028
  **   token elements:
sl@0
  2029
  **     position+1 varint-encoded as delta from previous position
sl@0
  2030
  **     start offset varint-encoded as delta from previous start offset
sl@0
  2031
  **     end offset varint-encoded as delta from start offset
sl@0
  2032
  **
sl@0
  2033
  ** The sentinel position of 0 indicates the end of the token list.
sl@0
  2034
  **
sl@0
  2035
  ** Additionally, doclist blobs are chunked into multiple segments,
sl@0
  2036
  ** using segment to order the segments.  New elements are added to
sl@0
  2037
  ** the segment at segment 0, until it exceeds CHUNK_MAX.  Then
sl@0
  2038
  ** segment 0 is deleted, and the doclist is inserted at segment 1.
sl@0
  2039
  ** If there is already a doclist at segment 1, the segment 0 doclist
sl@0
  2040
  ** is merged with it, the segment 1 doclist is deleted, and the
sl@0
  2041
  ** merged doclist is inserted at segment 2, repeating those
sl@0
  2042
  ** operations until an insert succeeds.
sl@0
  2043
  **
sl@0
  2044
  ** Since this structure doesn't allow us to update elements in place
sl@0
  2045
  ** in case of deletion or update, these are simply written to
sl@0
  2046
  ** segment 0 (with an empty token list in case of deletion), with
sl@0
  2047
  ** docListAccumulate() taking care to retain lower-segment
sl@0
  2048
  ** information in preference to higher-segment information.
sl@0
  2049
  */
sl@0
  2050
  /* TODO(shess) Provide a VACUUM type operation which both removes
sl@0
  2051
  ** deleted elements which are no longer necessary, and duplicated
sl@0
  2052
  ** elements.  I suspect this will probably not be necessary in
sl@0
  2053
  ** practice, though.
sl@0
  2054
  */
sl@0
  2055
static int fulltextCreate(sqlite3 *db, void *pAux,
sl@0
  2056
                          int argc, const char * const *argv,
sl@0
  2057
                          sqlite3_vtab **ppVTab, char **pzErr){
sl@0
  2058
  int rc;
sl@0
  2059
  TableSpec spec;
sl@0
  2060
  StringBuffer schema;
sl@0
  2061
  TRACE(("FTS1 Create\n"));
sl@0
  2062
sl@0
  2063
  rc = parseSpec(&spec, argc, argv, pzErr);
sl@0
  2064
  if( rc!=SQLITE_OK ) return rc;
sl@0
  2065
sl@0
  2066
  initStringBuffer(&schema);
sl@0
  2067
  append(&schema, "CREATE TABLE %_content(");
sl@0
  2068
  appendList(&schema, spec.nColumn, spec.azContentColumn);
sl@0
  2069
  append(&schema, ")");
sl@0
  2070
  rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
sl@0
  2071
  free(schema.s);
sl@0
  2072
  if( rc!=SQLITE_OK ) goto out;
sl@0
  2073
sl@0
  2074
  rc = sql_exec(db, spec.zDb, spec.zName,
sl@0
  2075
    "create table %_term(term text, segment integer, doclist blob, "
sl@0
  2076
                        "primary key(term, segment));");
sl@0
  2077
  if( rc!=SQLITE_OK ) goto out;
sl@0
  2078
sl@0
  2079
  rc = constructVtab(db, &spec, ppVTab, pzErr);
sl@0
  2080
sl@0
  2081
out:
sl@0
  2082
  clearTableSpec(&spec);
sl@0
  2083
  return rc;
sl@0
  2084
}
sl@0
  2085
sl@0
  2086
/* Decide how to handle an SQL query. */
sl@0
  2087
static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
sl@0
  2088
  int i;
sl@0
  2089
  TRACE(("FTS1 BestIndex\n"));
sl@0
  2090
sl@0
  2091
  for(i=0; i<pInfo->nConstraint; ++i){
sl@0
  2092
    const struct sqlite3_index_constraint *pConstraint;
sl@0
  2093
    pConstraint = &pInfo->aConstraint[i];
sl@0
  2094
    if( pConstraint->usable ) {
sl@0
  2095
      if( pConstraint->iColumn==-1 &&
sl@0
  2096
          pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
sl@0
  2097
        pInfo->idxNum = QUERY_ROWID;      /* lookup by rowid */
sl@0
  2098
        TRACE(("FTS1 QUERY_ROWID\n"));
sl@0
  2099
      } else if( pConstraint->iColumn>=0 &&
sl@0
  2100
                 pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
sl@0
  2101
        /* full-text search */
sl@0
  2102
        pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
sl@0
  2103
        TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
sl@0
  2104
      } else continue;
sl@0
  2105
sl@0
  2106
      pInfo->aConstraintUsage[i].argvIndex = 1;
sl@0
  2107
      pInfo->aConstraintUsage[i].omit = 1;
sl@0
  2108
sl@0
  2109
      /* An arbitrary value for now.
sl@0
  2110
       * TODO: Perhaps rowid matches should be considered cheaper than
sl@0
  2111
       * full-text searches. */
sl@0
  2112
      pInfo->estimatedCost = 1.0;   
sl@0
  2113
sl@0
  2114
      return SQLITE_OK;
sl@0
  2115
    }
sl@0
  2116
  }
sl@0
  2117
  pInfo->idxNum = QUERY_GENERIC;
sl@0
  2118
  return SQLITE_OK;
sl@0
  2119
}
sl@0
  2120
sl@0
  2121
static int fulltextDisconnect(sqlite3_vtab *pVTab){
sl@0
  2122
  TRACE(("FTS1 Disconnect %p\n", pVTab));
sl@0
  2123
  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
sl@0
  2124
  return SQLITE_OK;
sl@0
  2125
}
sl@0
  2126
sl@0
  2127
static int fulltextDestroy(sqlite3_vtab *pVTab){
sl@0
  2128
  fulltext_vtab *v = (fulltext_vtab *)pVTab;
sl@0
  2129
  int rc;
sl@0
  2130
sl@0
  2131
  TRACE(("FTS1 Destroy %p\n", pVTab));
sl@0
  2132
  rc = sql_exec(v->db, v->zDb, v->zName,
sl@0
  2133
                "drop table if exists %_content;"
sl@0
  2134
                "drop table if exists %_term;"
sl@0
  2135
                );
sl@0
  2136
  if( rc!=SQLITE_OK ) return rc;
sl@0
  2137
sl@0
  2138
  fulltext_vtab_destroy((fulltext_vtab *)pVTab);
sl@0
  2139
  return SQLITE_OK;
sl@0
  2140
}
sl@0
  2141
sl@0
  2142
static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
sl@0
  2143
  fulltext_cursor *c;
sl@0
  2144
sl@0
  2145
  c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
sl@0
  2146
  /* sqlite will initialize c->base */
sl@0
  2147
  *ppCursor = &c->base;
sl@0
  2148
  TRACE(("FTS1 Open %p: %p\n", pVTab, c));
sl@0
  2149
sl@0
  2150
  return SQLITE_OK;
sl@0
  2151
}
sl@0
  2152
sl@0
  2153
sl@0
  2154
/* Free all of the dynamically allocated memory held by *q
sl@0
  2155
*/
sl@0
  2156
static void queryClear(Query *q){
sl@0
  2157
  int i;
sl@0
  2158
  for(i = 0; i < q->nTerms; ++i){
sl@0
  2159
    free(q->pTerms[i].pTerm);
sl@0
  2160
  }
sl@0
  2161
  free(q->pTerms);
sl@0
  2162
  memset(q, 0, sizeof(*q));
sl@0
  2163
}
sl@0
  2164
sl@0
  2165
/* Free all of the dynamically allocated memory held by the
sl@0
  2166
** Snippet
sl@0
  2167
*/
sl@0
  2168
static void snippetClear(Snippet *p){
sl@0
  2169
  free(p->aMatch);
sl@0
  2170
  free(p->zOffset);
sl@0
  2171
  free(p->zSnippet);
sl@0
  2172
  memset(p, 0, sizeof(*p));
sl@0
  2173
}
sl@0
  2174
/*
sl@0
  2175
** Append a single entry to the p->aMatch[] log.
sl@0
  2176
*/
sl@0
  2177
static void snippetAppendMatch(
sl@0
  2178
  Snippet *p,               /* Append the entry to this snippet */
sl@0
  2179
  int iCol, int iTerm,      /* The column and query term */
sl@0
  2180
  int iStart, int nByte     /* Offset and size of the match */
sl@0
  2181
){
sl@0
  2182
  int i;
sl@0
  2183
  struct snippetMatch *pMatch;
sl@0
  2184
  if( p->nMatch+1>=p->nAlloc ){
sl@0
  2185
    p->nAlloc = p->nAlloc*2 + 10;
sl@0
  2186
    p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
sl@0
  2187
    if( p->aMatch==0 ){
sl@0
  2188
      p->nMatch = 0;
sl@0
  2189
      p->nAlloc = 0;
sl@0
  2190
      return;
sl@0
  2191
    }
sl@0
  2192
  }
sl@0
  2193
  i = p->nMatch++;
sl@0
  2194
  pMatch = &p->aMatch[i];
sl@0
  2195
  pMatch->iCol = iCol;
sl@0
  2196
  pMatch->iTerm = iTerm;
sl@0
  2197
  pMatch->iStart = iStart;
sl@0
  2198
  pMatch->nByte = nByte;
sl@0
  2199
}
sl@0
  2200
sl@0
  2201
/*
sl@0
  2202
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
sl@0
  2203
*/
sl@0
  2204
#define FTS1_ROTOR_SZ   (32)
sl@0
  2205
#define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
sl@0
  2206
sl@0
  2207
/*
sl@0
  2208
** Add entries to pSnippet->aMatch[] for every match that occurs against
sl@0
  2209
** document zDoc[0..nDoc-1] which is stored in column iColumn.
sl@0
  2210
*/
sl@0
  2211
static void snippetOffsetsOfColumn(
sl@0
  2212
  Query *pQuery,
sl@0
  2213
  Snippet *pSnippet,
sl@0
  2214
  int iColumn,
sl@0
  2215
  const char *zDoc,
sl@0
  2216
  int nDoc
sl@0
  2217
){
sl@0
  2218
  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
sl@0
  2219
  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
sl@0
  2220
  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
sl@0
  2221
  fulltext_vtab *pVtab;                /* The full text index */
sl@0
  2222
  int nColumn;                         /* Number of columns in the index */
sl@0
  2223
  const QueryTerm *aTerm;              /* Query string terms */
sl@0
  2224
  int nTerm;                           /* Number of query string terms */  
sl@0
  2225
  int i, j;                            /* Loop counters */
sl@0
  2226
  int rc;                              /* Return code */
sl@0
  2227
  unsigned int match, prevMatch;       /* Phrase search bitmasks */
sl@0
  2228
  const char *zToken;                  /* Next token from the tokenizer */
sl@0
  2229
  int nToken;                          /* Size of zToken */
sl@0
  2230
  int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
sl@0
  2231
sl@0
  2232
  /* The following variables keep a circular buffer of the last
sl@0
  2233
  ** few tokens */
sl@0
  2234
  unsigned int iRotor = 0;             /* Index of current token */
sl@0
  2235
  int iRotorBegin[FTS1_ROTOR_SZ];      /* Beginning offset of token */
sl@0
  2236
  int iRotorLen[FTS1_ROTOR_SZ];        /* Length of token */
sl@0
  2237
sl@0
  2238
  pVtab = pQuery->pFts;
sl@0
  2239
  nColumn = pVtab->nColumn;
sl@0
  2240
  pTokenizer = pVtab->pTokenizer;
sl@0
  2241
  pTModule = pTokenizer->pModule;
sl@0
  2242
  rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
sl@0
  2243
  if( rc ) return;
sl@0
  2244
  pTCursor->pTokenizer = pTokenizer;
sl@0
  2245
  aTerm = pQuery->pTerms;
sl@0
  2246
  nTerm = pQuery->nTerms;
sl@0
  2247
  if( nTerm>=FTS1_ROTOR_SZ ){
sl@0
  2248
    nTerm = FTS1_ROTOR_SZ - 1;
sl@0
  2249
  }
sl@0
  2250
  prevMatch = 0;
sl@0
  2251
  while(1){
sl@0
  2252
    rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
sl@0
  2253
    if( rc ) break;
sl@0
  2254
    iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
sl@0
  2255
    iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
sl@0
  2256
    match = 0;
sl@0
  2257
    for(i=0; i<nTerm; i++){
sl@0
  2258
      int iCol;
sl@0
  2259
      iCol = aTerm[i].iColumn;
sl@0
  2260
      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
sl@0
  2261
      if( aTerm[i].nTerm!=nToken ) continue;
sl@0
  2262
      if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
sl@0
  2263
      if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
sl@0
  2264
      match |= 1<<i;
sl@0
  2265
      if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
sl@0
  2266
        for(j=aTerm[i].iPhrase-1; j>=0; j--){
sl@0
  2267
          int k = (iRotor-j) & FTS1_ROTOR_MASK;
sl@0
  2268
          snippetAppendMatch(pSnippet, iColumn, i-j,
sl@0
  2269
                iRotorBegin[k], iRotorLen[k]);
sl@0
  2270
        }
sl@0
  2271
      }
sl@0
  2272
    }
sl@0
  2273
    prevMatch = match<<1;
sl@0
  2274
    iRotor++;
sl@0
  2275
  }
sl@0
  2276
  pTModule->xClose(pTCursor);  
sl@0
  2277
}
sl@0
  2278
sl@0
  2279
sl@0
  2280
/*
sl@0
  2281
** Compute all offsets for the current row of the query.  
sl@0
  2282
** If the offsets have already been computed, this routine is a no-op.
sl@0
  2283
*/
sl@0
  2284
static void snippetAllOffsets(fulltext_cursor *p){
sl@0
  2285
  int nColumn;
sl@0
  2286
  int iColumn, i;
sl@0
  2287
  int iFirst, iLast;
sl@0
  2288
  fulltext_vtab *pFts;
sl@0
  2289
sl@0
  2290
  if( p->snippet.nMatch ) return;
sl@0
  2291
  if( p->q.nTerms==0 ) return;
sl@0
  2292
  pFts = p->q.pFts;
sl@0
  2293
  nColumn = pFts->nColumn;
sl@0
  2294
  iColumn = p->iCursorType - QUERY_FULLTEXT;
sl@0
  2295
  if( iColumn<0 || iColumn>=nColumn ){
sl@0
  2296
    iFirst = 0;
sl@0
  2297
    iLast = nColumn-1;
sl@0
  2298
  }else{
sl@0
  2299
    iFirst = iColumn;
sl@0
  2300
    iLast = iColumn;
sl@0
  2301
  }
sl@0
  2302
  for(i=iFirst; i<=iLast; i++){
sl@0
  2303
    const char *zDoc;
sl@0
  2304
    int nDoc;
sl@0
  2305
    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
sl@0
  2306
    nDoc = sqlite3_column_bytes(p->pStmt, i+1);
sl@0
  2307
    snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
sl@0
  2308
  }
sl@0
  2309
}
sl@0
  2310
sl@0
  2311
/*
sl@0
  2312
** Convert the information in the aMatch[] array of the snippet
sl@0
  2313
** into the string zOffset[0..nOffset-1].
sl@0
  2314
*/
sl@0
  2315
static void snippetOffsetText(Snippet *p){
sl@0
  2316
  int i;
sl@0
  2317
  int cnt = 0;
sl@0
  2318
  StringBuffer sb;
sl@0
  2319
  char zBuf[200];
sl@0
  2320
  if( p->zOffset ) return;
sl@0
  2321
  initStringBuffer(&sb);
sl@0
  2322
  for(i=0; i<p->nMatch; i++){
sl@0
  2323
    struct snippetMatch *pMatch = &p->aMatch[i];
sl@0
  2324
    zBuf[0] = ' ';
sl@0
  2325
    sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
sl@0
  2326
        pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
sl@0
  2327
    append(&sb, zBuf);
sl@0
  2328
    cnt++;
sl@0
  2329
  }
sl@0
  2330
  p->zOffset = sb.s;
sl@0
  2331
  p->nOffset = sb.len;
sl@0
  2332
}
sl@0
  2333
sl@0
  2334
/*
sl@0
  2335
** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
sl@0
  2336
** of matching words some of which might be in zDoc.  zDoc is column
sl@0
  2337
** number iCol.
sl@0
  2338
**
sl@0
  2339
** iBreak is suggested spot in zDoc where we could begin or end an
sl@0
  2340
** excerpt.  Return a value similar to iBreak but possibly adjusted
sl@0
  2341
** to be a little left or right so that the break point is better.
sl@0
  2342
*/
sl@0
  2343
static int wordBoundary(
sl@0
  2344
  int iBreak,                   /* The suggested break point */
sl@0
  2345
  const char *zDoc,             /* Document text */
sl@0
  2346
  int nDoc,                     /* Number of bytes in zDoc[] */
sl@0
  2347
  struct snippetMatch *aMatch,  /* Matching words */
sl@0
  2348
  int nMatch,                   /* Number of entries in aMatch[] */
sl@0
  2349
  int iCol                      /* The column number for zDoc[] */
sl@0
  2350
){
sl@0
  2351
  int i;
sl@0
  2352
  if( iBreak<=10 ){
sl@0
  2353
    return 0;
sl@0
  2354
  }
sl@0
  2355
  if( iBreak>=nDoc-10 ){
sl@0
  2356
    return nDoc;
sl@0
  2357
  }
sl@0
  2358
  for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
sl@0
  2359
  while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
sl@0
  2360
  if( i<nMatch ){
sl@0
  2361
    if( aMatch[i].iStart<iBreak+10 ){
sl@0
  2362
      return aMatch[i].iStart;
sl@0
  2363
    }
sl@0
  2364
    if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
sl@0
  2365
      return aMatch[i-1].iStart;
sl@0
  2366
    }
sl@0
  2367
  }
sl@0
  2368
  for(i=1; i<=10; i++){
sl@0
  2369
    if( safe_isspace(zDoc[iBreak-i]) ){
sl@0
  2370
      return iBreak - i + 1;
sl@0
  2371
    }
sl@0
  2372
    if( safe_isspace(zDoc[iBreak+i]) ){
sl@0
  2373
      return iBreak + i + 1;
sl@0
  2374
    }
sl@0
  2375
  }
sl@0
  2376
  return iBreak;
sl@0
  2377
}
sl@0
  2378
sl@0
  2379
/*
sl@0
  2380
** If the StringBuffer does not end in white space, add a single
sl@0
  2381
** space character to the end.
sl@0
  2382
*/
sl@0
  2383
static void appendWhiteSpace(StringBuffer *p){
sl@0
  2384
  if( p->len==0 ) return;
sl@0
  2385
  if( safe_isspace(p->s[p->len-1]) ) return;
sl@0
  2386
  append(p, " ");
sl@0
  2387
}
sl@0
  2388
sl@0
  2389
/*
sl@0
  2390
** Remove white space from teh end of the StringBuffer
sl@0
  2391
*/
sl@0
  2392
static void trimWhiteSpace(StringBuffer *p){
sl@0
  2393
  while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
sl@0
  2394
    p->len--;
sl@0
  2395
  }
sl@0
  2396
}
sl@0
  2397
sl@0
  2398
sl@0
  2399
sl@0
  2400
/*
sl@0
  2401
** Allowed values for Snippet.aMatch[].snStatus
sl@0
  2402
*/
sl@0
  2403
#define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
sl@0
  2404
#define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
sl@0
  2405
sl@0
  2406
/*
sl@0
  2407
** Generate the text of a snippet.
sl@0
  2408
*/
sl@0
  2409
static void snippetText(
sl@0
  2410
  fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
sl@0
  2411
  const char *zStartMark,     /* Markup to appear before each match */
sl@0
  2412
  const char *zEndMark,       /* Markup to appear after each match */
sl@0
  2413
  const char *zEllipsis       /* Ellipsis mark */
sl@0
  2414
){
sl@0
  2415
  int i, j;
sl@0
  2416
  struct snippetMatch *aMatch;
sl@0
  2417
  int nMatch;
sl@0
  2418
  int nDesired;
sl@0
  2419
  StringBuffer sb;
sl@0
  2420
  int tailCol;
sl@0
  2421
  int tailOffset;
sl@0
  2422
  int iCol;
sl@0
  2423
  int nDoc;
sl@0
  2424
  const char *zDoc;
sl@0
  2425
  int iStart, iEnd;
sl@0
  2426
  int tailEllipsis = 0;
sl@0
  2427
  int iMatch;
sl@0
  2428
  
sl@0
  2429
sl@0
  2430
  free(pCursor->snippet.zSnippet);
sl@0
  2431
  pCursor->snippet.zSnippet = 0;
sl@0
  2432
  aMatch = pCursor->snippet.aMatch;
sl@0
  2433
  nMatch = pCursor->snippet.nMatch;
sl@0
  2434
  initStringBuffer(&sb);
sl@0
  2435
sl@0
  2436
  for(i=0; i<nMatch; i++){
sl@0
  2437
    aMatch[i].snStatus = SNIPPET_IGNORE;
sl@0
  2438
  }
sl@0
  2439
  nDesired = 0;
sl@0
  2440
  for(i=0; i<pCursor->q.nTerms; i++){
sl@0
  2441
    for(j=0; j<nMatch; j++){
sl@0
  2442
      if( aMatch[j].iTerm==i ){
sl@0
  2443
        aMatch[j].snStatus = SNIPPET_DESIRED;
sl@0
  2444
        nDesired++;
sl@0
  2445
        break;
sl@0
  2446
      }
sl@0
  2447
    }
sl@0
  2448
  }
sl@0
  2449
sl@0
  2450
  iMatch = 0;
sl@0
  2451
  tailCol = -1;
sl@0
  2452
  tailOffset = 0;
sl@0
  2453
  for(i=0; i<nMatch && nDesired>0; i++){
sl@0
  2454
    if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
sl@0
  2455
    nDesired--;
sl@0
  2456
    iCol = aMatch[i].iCol;
sl@0
  2457
    zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
sl@0
  2458
    nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
sl@0
  2459
    iStart = aMatch[i].iStart - 40;
sl@0
  2460
    iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
sl@0
  2461
    if( iStart<=10 ){
sl@0
  2462
      iStart = 0;
sl@0
  2463
    }
sl@0
  2464
    if( iCol==tailCol && iStart<=tailOffset+20 ){
sl@0
  2465
      iStart = tailOffset;
sl@0
  2466
    }
sl@0
  2467
    if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
sl@0
  2468
      trimWhiteSpace(&sb);
sl@0
  2469
      appendWhiteSpace(&sb);
sl@0
  2470
      append(&sb, zEllipsis);
sl@0
  2471
      appendWhiteSpace(&sb);
sl@0
  2472
    }
sl@0
  2473
    iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
sl@0
  2474
    iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
sl@0
  2475
    if( iEnd>=nDoc-10 ){
sl@0
  2476
      iEnd = nDoc;
sl@0
  2477
      tailEllipsis = 0;
sl@0
  2478
    }else{
sl@0
  2479
      tailEllipsis = 1;
sl@0
  2480
    }
sl@0
  2481
    while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
sl@0
  2482
    while( iStart<iEnd ){
sl@0
  2483
      while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
sl@0
  2484
             && aMatch[iMatch].iCol<=iCol ){
sl@0
  2485
        iMatch++;
sl@0
  2486
      }
sl@0
  2487
      if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
sl@0
  2488
             && aMatch[iMatch].iCol==iCol ){
sl@0
  2489
        nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
sl@0
  2490
        iStart = aMatch[iMatch].iStart;
sl@0
  2491
        append(&sb, zStartMark);
sl@0
  2492
        nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
sl@0
  2493
        append(&sb, zEndMark);
sl@0
  2494
        iStart += aMatch[iMatch].nByte;
sl@0
  2495
        for(j=iMatch+1; j<nMatch; j++){
sl@0
  2496
          if( aMatch[j].iTerm==aMatch[iMatch].iTerm
sl@0
  2497
              && aMatch[j].snStatus==SNIPPET_DESIRED ){
sl@0
  2498
            nDesired--;
sl@0
  2499
            aMatch[j].snStatus = SNIPPET_IGNORE;
sl@0
  2500
          }
sl@0
  2501
        }
sl@0
  2502
      }else{
sl@0
  2503
        nappend(&sb, &zDoc[iStart], iEnd - iStart);
sl@0
  2504
        iStart = iEnd;
sl@0
  2505
      }
sl@0
  2506
    }
sl@0
  2507
    tailCol = iCol;
sl@0
  2508
    tailOffset = iEnd;
sl@0
  2509
  }
sl@0
  2510
  trimWhiteSpace(&sb);
sl@0
  2511
  if( tailEllipsis ){
sl@0
  2512
    appendWhiteSpace(&sb);
sl@0
  2513
    append(&sb, zEllipsis);
sl@0
  2514
  }
sl@0
  2515
  pCursor->snippet.zSnippet = sb.s;
sl@0
  2516
  pCursor->snippet.nSnippet = sb.len;  
sl@0
  2517
}
sl@0
  2518
sl@0
  2519
sl@0
  2520
/*
sl@0
  2521
** Close the cursor.  For additional information see the documentation
sl@0
  2522
** on the xClose method of the virtual table interface.
sl@0
  2523
*/
sl@0
  2524
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
sl@0
  2525
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
sl@0
  2526
  TRACE(("FTS1 Close %p\n", c));
sl@0
  2527
  sqlite3_finalize(c->pStmt);
sl@0
  2528
  queryClear(&c->q);
sl@0
  2529
  snippetClear(&c->snippet);
sl@0
  2530
  if( c->result.pDoclist!=NULL ){
sl@0
  2531
    docListDelete(c->result.pDoclist);
sl@0
  2532
  }
sl@0
  2533
  free(c);
sl@0
  2534
  return SQLITE_OK;
sl@0
  2535
}
sl@0
  2536
sl@0
  2537
static int fulltextNext(sqlite3_vtab_cursor *pCursor){
sl@0
  2538
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
sl@0
  2539
  sqlite_int64 iDocid;
sl@0
  2540
  int rc;
sl@0
  2541
sl@0
  2542
  TRACE(("FTS1 Next %p\n", pCursor));
sl@0
  2543
  snippetClear(&c->snippet);
sl@0
  2544
  if( c->iCursorType < QUERY_FULLTEXT ){
sl@0
  2545
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
sl@0
  2546
    rc = sqlite3_step(c->pStmt);
sl@0
  2547
    switch( rc ){
sl@0
  2548
      case SQLITE_ROW:
sl@0
  2549
        c->eof = 0;
sl@0
  2550
        return SQLITE_OK;
sl@0
  2551
      case SQLITE_DONE:
sl@0
  2552
        c->eof = 1;
sl@0
  2553
        return SQLITE_OK;
sl@0
  2554
      default:
sl@0
  2555
        c->eof = 1;
sl@0
  2556
        return rc;
sl@0
  2557
    }
sl@0
  2558
  } else {  /* full-text query */
sl@0
  2559
    rc = sqlite3_reset(c->pStmt);
sl@0
  2560
    if( rc!=SQLITE_OK ) return rc;
sl@0
  2561
sl@0
  2562
    iDocid = nextDocid(&c->result);
sl@0
  2563
    if( iDocid==0 ){
sl@0
  2564
      c->eof = 1;
sl@0
  2565
      return SQLITE_OK;
sl@0
  2566
    }
sl@0
  2567
    rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
sl@0
  2568
    if( rc!=SQLITE_OK ) return rc;
sl@0
  2569
    /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
sl@0
  2570
    rc = sqlite3_step(c->pStmt);
sl@0
  2571
    if( rc==SQLITE_ROW ){   /* the case we expect */
sl@0
  2572
      c->eof = 0;
sl@0
  2573
      return SQLITE_OK;
sl@0
  2574
    }
sl@0
  2575
    /* an error occurred; abort */
sl@0
  2576
    return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
sl@0
  2577
  }
sl@0
  2578
}
sl@0
  2579
sl@0
  2580
sl@0
  2581
/* Return a DocList corresponding to the query term *pTerm.  If *pTerm
sl@0
  2582
** is the first term of a phrase query, go ahead and evaluate the phrase
sl@0
  2583
** query and return the doclist for the entire phrase query.
sl@0
  2584
**
sl@0
  2585
** The result is stored in pTerm->doclist.
sl@0
  2586
*/
sl@0
  2587
static int docListOfTerm(
sl@0
  2588
  fulltext_vtab *v,     /* The full text index */
sl@0
  2589
  int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
sl@0
  2590
  QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
sl@0
  2591
  DocList **ppResult    /* Write the result here */
sl@0
  2592
){
sl@0
  2593
  DocList *pLeft, *pRight, *pNew;
sl@0
  2594
  int i, rc;
sl@0
  2595
sl@0
  2596
  pLeft = docListNew(DL_POSITIONS);
sl@0
  2597
  rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
sl@0
  2598
  if( rc ){
sl@0
  2599
    docListDelete(pLeft);
sl@0
  2600
    return rc;
sl@0
  2601
  }
sl@0
  2602
  for(i=1; i<=pQTerm->nPhrase; i++){
sl@0
  2603
    pRight = docListNew(DL_POSITIONS);
sl@0
  2604
    rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
sl@0
  2605
    if( rc ){
sl@0
  2606
      docListDelete(pLeft);
sl@0
  2607
      return rc;
sl@0
  2608
    }
sl@0
  2609
    pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
sl@0
  2610
    docListPhraseMerge(pLeft, pRight, pNew);
sl@0
  2611
    docListDelete(pLeft);
sl@0
  2612
    docListDelete(pRight);
sl@0
  2613
    pLeft = pNew;
sl@0
  2614
  }
sl@0
  2615
  *ppResult = pLeft;
sl@0
  2616
  return SQLITE_OK;
sl@0
  2617
}
sl@0
  2618
sl@0
  2619
/* Add a new term pTerm[0..nTerm-1] to the query *q.
sl@0
  2620
*/
sl@0
  2621
static void queryAdd(Query *q, const char *pTerm, int nTerm){
sl@0
  2622
  QueryTerm *t;
sl@0
  2623
  ++q->nTerms;
sl@0
  2624
  q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
sl@0
  2625
  if( q->pTerms==0 ){
sl@0
  2626
    q->nTerms = 0;
sl@0
  2627
    return;
sl@0
  2628
  }
sl@0
  2629
  t = &q->pTerms[q->nTerms - 1];
sl@0
  2630
  memset(t, 0, sizeof(*t));
sl@0
  2631
  t->pTerm = malloc(nTerm+1);
sl@0
  2632
  memcpy(t->pTerm, pTerm, nTerm);
sl@0
  2633
  t->pTerm[nTerm] = 0;
sl@0
  2634
  t->nTerm = nTerm;
sl@0
  2635
  t->isOr = q->nextIsOr;
sl@0
  2636
  q->nextIsOr = 0;
sl@0
  2637
  t->iColumn = q->nextColumn;
sl@0
  2638
  q->nextColumn = q->dfltColumn;
sl@0
  2639
}
sl@0
  2640
sl@0
  2641
/*
sl@0
  2642
** Check to see if the string zToken[0...nToken-1] matches any
sl@0
  2643
** column name in the virtual table.   If it does,
sl@0
  2644
** return the zero-indexed column number.  If not, return -1.
sl@0
  2645
*/
sl@0
  2646
static int checkColumnSpecifier(
sl@0
  2647
  fulltext_vtab *pVtab,    /* The virtual table */
sl@0
  2648
  const char *zToken,      /* Text of the token */
sl@0
  2649
  int nToken               /* Number of characters in the token */
sl@0
  2650
){
sl@0
  2651
  int i;
sl@0
  2652
  for(i=0; i<pVtab->nColumn; i++){
sl@0
  2653
    if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
sl@0
  2654
        && pVtab->azColumn[i][nToken]==0 ){
sl@0
  2655
      return i;
sl@0
  2656
    }
sl@0
  2657
  }
sl@0
  2658
  return -1;
sl@0
  2659
}
sl@0
  2660
sl@0
  2661
/*
sl@0
  2662
** Parse the text at pSegment[0..nSegment-1].  Add additional terms
sl@0
  2663
** to the query being assemblied in pQuery.
sl@0
  2664
**
sl@0
  2665
** inPhrase is true if pSegment[0..nSegement-1] is contained within
sl@0
  2666
** double-quotes.  If inPhrase is true, then the first term
sl@0
  2667
** is marked with the number of terms in the phrase less one and
sl@0
  2668
** OR and "-" syntax is ignored.  If inPhrase is false, then every
sl@0
  2669
** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
sl@0
  2670
*/
sl@0
  2671
static int tokenizeSegment(
sl@0
  2672
  sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
sl@0
  2673
  const char *pSegment, int nSegment,     /* Query expression being parsed */
sl@0
  2674
  int inPhrase,                           /* True if within "..." */
sl@0
  2675
  Query *pQuery                           /* Append results here */
sl@0
  2676
){
sl@0
  2677
  const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
sl@0
  2678
  sqlite3_tokenizer_cursor *pCursor;
sl@0
  2679
  int firstIndex = pQuery->nTerms;
sl@0
  2680
  int iCol;
sl@0
  2681
  int nTerm = 1;
sl@0
  2682
  
sl@0
  2683
  int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
sl@0
  2684
  if( rc!=SQLITE_OK ) return rc;
sl@0
  2685
  pCursor->pTokenizer = pTokenizer;
sl@0
  2686
sl@0
  2687
  while( 1 ){
sl@0
  2688
    const char *pToken;
sl@0
  2689
    int nToken, iBegin, iEnd, iPos;
sl@0
  2690
sl@0
  2691
    rc = pModule->xNext(pCursor,
sl@0
  2692
                        &pToken, &nToken,
sl@0
  2693
                        &iBegin, &iEnd, &iPos);
sl@0
  2694
    if( rc!=SQLITE_OK ) break;
sl@0
  2695
    if( !inPhrase &&
sl@0
  2696
        pSegment[iEnd]==':' &&
sl@0
  2697
         (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
sl@0
  2698
      pQuery->nextColumn = iCol;
sl@0
  2699
      continue;
sl@0
  2700
    }
sl@0
  2701
    if( !inPhrase && pQuery->nTerms>0 && nToken==2
sl@0
  2702
         && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
sl@0
  2703
      pQuery->nextIsOr = 1;
sl@0
  2704
      continue;
sl@0
  2705
    }
sl@0
  2706
    queryAdd(pQuery, pToken, nToken);
sl@0
  2707
    if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
sl@0
  2708
      pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
sl@0
  2709
    }
sl@0
  2710
    pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
sl@0
  2711
    if( inPhrase ){
sl@0
  2712
      nTerm++;
sl@0
  2713
    }
sl@0
  2714
  }
sl@0
  2715
sl@0
  2716
  if( inPhrase && pQuery->nTerms>firstIndex ){
sl@0
  2717
    pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
sl@0
  2718
  }
sl@0
  2719
sl@0
  2720
  return pModule->xClose(pCursor);
sl@0
  2721
}
sl@0
  2722
sl@0
  2723
/* Parse a query string, yielding a Query object pQuery.
sl@0
  2724
**
sl@0
  2725
** The calling function will need to queryClear() to clean up
sl@0
  2726
** the dynamically allocated memory held by pQuery.
sl@0
  2727
*/
sl@0
  2728
static int parseQuery(
sl@0
  2729
  fulltext_vtab *v,        /* The fulltext index */
sl@0
  2730
  const char *zInput,      /* Input text of the query string */
sl@0
  2731
  int nInput,              /* Size of the input text */
sl@0
  2732
  int dfltColumn,          /* Default column of the index to match against */
sl@0
  2733
  Query *pQuery            /* Write the parse results here. */
sl@0
  2734
){
sl@0
  2735
  int iInput, inPhrase = 0;
sl@0
  2736
sl@0
  2737
  if( zInput==0 ) nInput = 0;
sl@0
  2738
  if( nInput<0 ) nInput = strlen(zInput);
sl@0
  2739
  pQuery->nTerms = 0;
sl@0
  2740
  pQuery->pTerms = NULL;
sl@0
  2741
  pQuery->nextIsOr = 0;
sl@0
  2742
  pQuery->nextColumn = dfltColumn;
sl@0
  2743
  pQuery->dfltColumn = dfltColumn;
sl@0
  2744
  pQuery->pFts = v;
sl@0
  2745
sl@0
  2746
  for(iInput=0; iInput<nInput; ++iInput){
sl@0
  2747
    int i;
sl@0
  2748
    for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
sl@0
  2749
    if( i>iInput ){
sl@0
  2750
      tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
sl@0
  2751
                       pQuery);
sl@0
  2752
    }
sl@0
  2753
    iInput = i;
sl@0
  2754
    if( i<nInput ){
sl@0
  2755
      assert( zInput[i]=='"' );
sl@0
  2756
      inPhrase = !inPhrase;
sl@0
  2757
    }
sl@0
  2758
  }
sl@0
  2759
sl@0
  2760
  if( inPhrase ){
sl@0
  2761
    /* unmatched quote */
sl@0
  2762
    queryClear(pQuery);
sl@0
  2763
    return SQLITE_ERROR;
sl@0
  2764
  }
sl@0
  2765
  return SQLITE_OK;
sl@0
  2766
}
sl@0
  2767
sl@0
  2768
/* Perform a full-text query using the search expression in
sl@0
  2769
** zInput[0..nInput-1].  Return a list of matching documents
sl@0
  2770
** in pResult.
sl@0
  2771
**
sl@0
  2772
** Queries must match column iColumn.  Or if iColumn>=nColumn
sl@0
  2773
** they are allowed to match against any column.
sl@0
  2774
*/
sl@0
  2775
static int fulltextQuery(
sl@0
  2776
  fulltext_vtab *v,      /* The full text index */
sl@0
  2777
  int iColumn,           /* Match against this column by default */
sl@0
  2778
  const char *zInput,    /* The query string */
sl@0
  2779
  int nInput,            /* Number of bytes in zInput[] */
sl@0
  2780
  DocList **pResult,     /* Write the result doclist here */
sl@0
  2781
  Query *pQuery          /* Put parsed query string here */
sl@0
  2782
){
sl@0
  2783
  int i, iNext, rc;
sl@0
  2784
  DocList *pLeft = NULL;
sl@0
  2785
  DocList *pRight, *pNew, *pOr;
sl@0
  2786
  int nNot = 0;
sl@0
  2787
  QueryTerm *aTerm;
sl@0
  2788
sl@0
  2789
  rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
sl@0
  2790
  if( rc!=SQLITE_OK ) return rc;
sl@0
  2791
sl@0
  2792
  /* Merge AND terms. */
sl@0
  2793
  aTerm = pQuery->pTerms;
sl@0
  2794
  for(i = 0; i<pQuery->nTerms; i=iNext){
sl@0
  2795
    if( aTerm[i].isNot ){
sl@0
  2796
      /* Handle all NOT terms in a separate pass */
sl@0
  2797
      nNot++;
sl@0
  2798
      iNext = i + aTerm[i].nPhrase+1;
sl@0
  2799
      continue;
sl@0
  2800
    }
sl@0
  2801
    iNext = i + aTerm[i].nPhrase + 1;
sl@0
  2802
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
sl@0
  2803
    if( rc ){
sl@0
  2804
      queryClear(pQuery);
sl@0
  2805
      return rc;
sl@0
  2806
    }
sl@0
  2807
    while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
sl@0
  2808
      rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
sl@0
  2809
      iNext += aTerm[iNext].nPhrase + 1;
sl@0
  2810
      if( rc ){
sl@0
  2811
        queryClear(pQuery);
sl@0
  2812
        return rc;
sl@0
  2813
      }
sl@0
  2814
      pNew = docListNew(DL_DOCIDS);
sl@0
  2815
      docListOrMerge(pRight, pOr, pNew);
sl@0
  2816
      docListDelete(pRight);
sl@0
  2817
      docListDelete(pOr);
sl@0
  2818
      pRight = pNew;
sl@0
  2819
    }
sl@0
  2820
    if( pLeft==0 ){
sl@0
  2821
      pLeft = pRight;
sl@0
  2822
    }else{
sl@0
  2823
      pNew = docListNew(DL_DOCIDS);
sl@0
  2824
      docListAndMerge(pLeft, pRight, pNew);
sl@0
  2825
      docListDelete(pRight);
sl@0
  2826
      docListDelete(pLeft);
sl@0
  2827
      pLeft = pNew;
sl@0
  2828
    }
sl@0
  2829
  }
sl@0
  2830
sl@0
  2831
  if( nNot && pLeft==0 ){
sl@0
  2832
    /* We do not yet know how to handle a query of only NOT terms */
sl@0
  2833
    return SQLITE_ERROR;
sl@0
  2834
  }
sl@0
  2835
sl@0
  2836
  /* Do the EXCEPT terms */
sl@0
  2837
  for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
sl@0
  2838
    if( !aTerm[i].isNot ) continue;
sl@0
  2839
    rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
sl@0
  2840
    if( rc ){
sl@0
  2841
      queryClear(pQuery);
sl@0
  2842
      docListDelete(pLeft);
sl@0
  2843
      return rc;
sl@0
  2844
    }
sl@0
  2845
    pNew = docListNew(DL_DOCIDS);
sl@0
  2846
    docListExceptMerge(pLeft, pRight, pNew);
sl@0
  2847
    docListDelete(pRight);
sl@0
  2848
    docListDelete(pLeft);
sl@0
  2849
    pLeft = pNew;
sl@0
  2850
  }
sl@0
  2851
sl@0
  2852
  *pResult = pLeft;
sl@0
  2853
  return rc;
sl@0
  2854
}
sl@0
  2855
sl@0
  2856
/*
sl@0
  2857
** This is the xFilter interface for the virtual table.  See
sl@0
  2858
** the virtual table xFilter method documentation for additional
sl@0
  2859
** information.
sl@0
  2860
**
sl@0
  2861
** If idxNum==QUERY_GENERIC then do a full table scan against
sl@0
  2862
** the %_content table.
sl@0
  2863
**
sl@0
  2864
** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
sl@0
  2865
** in the %_content table.
sl@0
  2866
**
sl@0
  2867
** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
sl@0
  2868
** column on the left-hand side of the MATCH operator is column
sl@0
  2869
** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
sl@0
  2870
** side of the MATCH operator.
sl@0
  2871
*/
sl@0
  2872
/* TODO(shess) Upgrade the cursor initialization and destruction to
sl@0
  2873
** account for fulltextFilter() being called multiple times on the
sl@0
  2874
** same cursor.  The current solution is very fragile.  Apply fix to
sl@0
  2875
** fts2 as appropriate.
sl@0
  2876
*/
sl@0
  2877
static int fulltextFilter(
sl@0
  2878
  sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
sl@0
  2879
  int idxNum, const char *idxStr,   /* Which indexing scheme to use */
sl@0
  2880
  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
sl@0
  2881
){
sl@0
  2882
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
sl@0
  2883
  fulltext_vtab *v = cursor_vtab(c);
sl@0
  2884
  int rc;
sl@0
  2885
  char *zSql;
sl@0
  2886
sl@0
  2887
  TRACE(("FTS1 Filter %p\n",pCursor));
sl@0
  2888
sl@0
  2889
  zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
sl@0
  2890
                          idxNum==QUERY_GENERIC ? "" : "where rowid=?");
sl@0
  2891
  sqlite3_finalize(c->pStmt);
sl@0
  2892
  rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
sl@0
  2893
  sqlite3_free(zSql);
sl@0
  2894
  if( rc!=SQLITE_OK ) return rc;
sl@0
  2895
sl@0
  2896
  c->iCursorType = idxNum;
sl@0
  2897
  switch( idxNum ){
sl@0
  2898
    case QUERY_GENERIC:
sl@0
  2899
      break;
sl@0
  2900
sl@0
  2901
    case QUERY_ROWID:
sl@0
  2902
      rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
sl@0
  2903
      if( rc!=SQLITE_OK ) return rc;
sl@0
  2904
      break;
sl@0
  2905
sl@0
  2906
    default:   /* full-text search */
sl@0
  2907
    {
sl@0
  2908
      const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
sl@0
  2909
      DocList *pResult;
sl@0
  2910
      assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
sl@0
  2911
      assert( argc==1 );
sl@0
  2912
      queryClear(&c->q);
sl@0
  2913
      rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
sl@0
  2914
      if( rc!=SQLITE_OK ) return rc;
sl@0
  2915
      if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
sl@0
  2916
      readerInit(&c->result, pResult);
sl@0
  2917
      break;
sl@0
  2918
    }
sl@0
  2919
  }
sl@0
  2920
sl@0
  2921
  return fulltextNext(pCursor);
sl@0
  2922
}
sl@0
  2923
sl@0
  2924
/* This is the xEof method of the virtual table.  The SQLite core
sl@0
  2925
** calls this routine to find out if it has reached the end of
sl@0
  2926
** a query's results set.
sl@0
  2927
*/
sl@0
  2928
static int fulltextEof(sqlite3_vtab_cursor *pCursor){
sl@0
  2929
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
sl@0
  2930
  return c->eof;
sl@0
  2931
}
sl@0
  2932
sl@0
  2933
/* This is the xColumn method of the virtual table.  The SQLite
sl@0
  2934
** core calls this method during a query when it needs the value
sl@0
  2935
** of a column from the virtual table.  This method needs to use
sl@0
  2936
** one of the sqlite3_result_*() routines to store the requested
sl@0
  2937
** value back in the pContext.
sl@0
  2938
*/
sl@0
  2939
static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
sl@0
  2940
                          sqlite3_context *pContext, int idxCol){
sl@0
  2941
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
sl@0
  2942
  fulltext_vtab *v = cursor_vtab(c);
sl@0
  2943
sl@0
  2944
  if( idxCol<v->nColumn ){
sl@0
  2945
    sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
sl@0
  2946
    sqlite3_result_value(pContext, pVal);
sl@0
  2947
  }else if( idxCol==v->nColumn ){
sl@0
  2948
    /* The extra column whose name is the same as the table.
sl@0
  2949
    ** Return a blob which is a pointer to the cursor
sl@0
  2950
    */
sl@0
  2951
    sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
sl@0
  2952
  }
sl@0
  2953
  return SQLITE_OK;
sl@0
  2954
}
sl@0
  2955
sl@0
  2956
/* This is the xRowid method.  The SQLite core calls this routine to
sl@0
  2957
** retrive the rowid for the current row of the result set.  The
sl@0
  2958
** rowid should be written to *pRowid.
sl@0
  2959
*/
sl@0
  2960
static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
sl@0
  2961
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
sl@0
  2962
sl@0
  2963
  *pRowid = sqlite3_column_int64(c->pStmt, 0);
sl@0
  2964
  return SQLITE_OK;
sl@0
  2965
}
sl@0
  2966
sl@0
  2967
/* Add all terms in [zText] to the given hash table.  If [iColumn] > 0,
sl@0
  2968
 * we also store positions and offsets in the hash table using the given
sl@0
  2969
 * column number. */
sl@0
  2970
static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
sl@0
  2971
                      const char *zText, int iColumn){
sl@0
  2972
  sqlite3_tokenizer *pTokenizer = v->pTokenizer;
sl@0
  2973
  sqlite3_tokenizer_cursor *pCursor;
sl@0
  2974
  const char *pToken;
sl@0
  2975
  int nTokenBytes;
sl@0
  2976
  int iStartOffset, iEndOffset, iPosition;
sl@0
  2977
  int rc;
sl@0
  2978
sl@0
  2979
  rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
sl@0
  2980
  if( rc!=SQLITE_OK ) return rc;
sl@0
  2981
sl@0
  2982
  pCursor->pTokenizer = pTokenizer;
sl@0
  2983
  while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
sl@0
  2984
                                               &pToken, &nTokenBytes,
sl@0
  2985
                                               &iStartOffset, &iEndOffset,
sl@0
  2986
                                               &iPosition) ){
sl@0
  2987
    DocList *p;
sl@0
  2988
sl@0
  2989
    /* Positions can't be negative; we use -1 as a terminator internally. */
sl@0
  2990
    if( iPosition<0 ){
sl@0
  2991
      pTokenizer->pModule->xClose(pCursor);
sl@0
  2992
      return SQLITE_ERROR;
sl@0
  2993
    }
sl@0
  2994
sl@0
  2995
    p = fts1HashFind(terms, pToken, nTokenBytes);
sl@0
  2996
    if( p==NULL ){
sl@0
  2997
      p = docListNew(DL_DEFAULT);
sl@0
  2998
      docListAddDocid(p, iDocid);
sl@0
  2999
      fts1HashInsert(terms, pToken, nTokenBytes, p);
sl@0
  3000
    }
sl@0
  3001
    if( iColumn>=0 ){
sl@0
  3002
      docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
sl@0
  3003
    }
sl@0
  3004
  }
sl@0
  3005
sl@0
  3006
  /* TODO(shess) Check return?  Should this be able to cause errors at
sl@0
  3007
  ** this point?  Actually, same question about sqlite3_finalize(),
sl@0
  3008
  ** though one could argue that failure there means that the data is
sl@0
  3009
  ** not durable.  *ponder*
sl@0
  3010
  */
sl@0
  3011
  pTokenizer->pModule->xClose(pCursor);
sl@0
  3012
  return rc;
sl@0
  3013
}
sl@0
  3014
sl@0
  3015
/* Update the %_terms table to map the term [pTerm] to the given rowid. */
sl@0
  3016
static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
sl@0
  3017
                             DocList *d){
sl@0
  3018
  sqlite_int64 iIndexRow;
sl@0
  3019
  DocList doclist;
sl@0
  3020
  int iSegment = 0, rc;
sl@0
  3021
sl@0
  3022
  rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
sl@0
  3023
  if( rc==SQLITE_DONE ){
sl@0
  3024
    docListInit(&doclist, DL_DEFAULT, 0, 0);
sl@0
  3025
    docListUpdate(&doclist, d);
sl@0
  3026
    /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
sl@0
  3027
    rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
sl@0
  3028
    goto err;
sl@0
  3029
  }
sl@0
  3030
  if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
sl@0
  3031
sl@0
  3032
  docListUpdate(&doclist, d);
sl@0
  3033
  if( doclist.nData<=CHUNK_MAX ){
sl@0
  3034
    rc = term_update(v, iIndexRow, &doclist);
sl@0
  3035
    goto err;
sl@0
  3036
  }
sl@0
  3037
sl@0
  3038
  /* Doclist doesn't fit, delete what's there, and accumulate
sl@0
  3039
  ** forward.
sl@0
  3040
  */
sl@0
  3041
  rc = term_delete(v, iIndexRow);
sl@0
  3042
  if( rc!=SQLITE_OK ) goto err;
sl@0
  3043
sl@0
  3044
  /* Try to insert the doclist into a higher segment bucket.  On
sl@0
  3045
  ** failure, accumulate existing doclist with the doclist from that
sl@0
  3046
  ** bucket, and put results in the next bucket.
sl@0
  3047
  */
sl@0
  3048
  iSegment++;
sl@0
  3049
  while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
sl@0
  3050
                         &doclist))!=SQLITE_OK ){
sl@0
  3051
    sqlite_int64 iSegmentRow;
sl@0
  3052
    DocList old;
sl@0
  3053
    int rc2;
sl@0
  3054
sl@0
  3055
    /* Retain old error in case the term_insert() error was really an
sl@0
  3056
    ** error rather than a bounced insert.
sl@0
  3057
    */
sl@0
  3058
    rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
sl@0
  3059
    if( rc2!=SQLITE_ROW ) goto err;
sl@0
  3060
sl@0
  3061
    rc = term_delete(v, iSegmentRow);
sl@0
  3062
    if( rc!=SQLITE_OK ) goto err;
sl@0
  3063
sl@0
  3064
    /* Reusing lowest-number deleted row keeps the index smaller. */
sl@0
  3065
    if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
sl@0
  3066
sl@0
  3067
    /* doclist contains the newer data, so accumulate it over old.
sl@0
  3068
    ** Then steal accumulated data for doclist.
sl@0
  3069
    */
sl@0
  3070
    docListAccumulate(&old, &doclist);
sl@0
  3071
    docListDestroy(&doclist);
sl@0
  3072
    doclist = old;
sl@0
  3073
sl@0
  3074
    iSegment++;
sl@0
  3075
  }
sl@0
  3076
sl@0
  3077
 err:
sl@0
  3078
  docListDestroy(&doclist);
sl@0
  3079
  return rc;
sl@0
  3080
}
sl@0
  3081
sl@0
  3082
/* Add doclists for all terms in [pValues] to the hash table [terms]. */
sl@0
  3083
static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
sl@0
  3084
                sqlite3_value **pValues){
sl@0
  3085
  int i;
sl@0
  3086
  for(i = 0; i < v->nColumn ; ++i){
sl@0
  3087
    char *zText = (char*)sqlite3_value_text(pValues[i]);
sl@0
  3088
    int rc = buildTerms(v, terms, iRowid, zText, i);
sl@0
  3089
    if( rc!=SQLITE_OK ) return rc;
sl@0
  3090
  }
sl@0
  3091
  return SQLITE_OK;
sl@0
  3092
}
sl@0
  3093
sl@0
  3094
/* Add empty doclists for all terms in the given row's content to the hash
sl@0
  3095
 * table [pTerms]. */
sl@0
  3096
static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
sl@0
  3097
  const char **pValues;
sl@0
  3098
  int i;
sl@0
  3099
sl@0
  3100
  int rc = content_select(v, iRowid, &pValues);
sl@0
  3101
  if( rc!=SQLITE_OK ) return rc;
sl@0
  3102
sl@0
  3103
  for(i = 0 ; i < v->nColumn; ++i) {
sl@0
  3104
    rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
sl@0
  3105
    if( rc!=SQLITE_OK ) break;
sl@0
  3106
  }
sl@0
  3107
sl@0
  3108
  freeStringArray(v->nColumn, pValues);
sl@0
  3109
  return SQLITE_OK;
sl@0
  3110
}
sl@0
  3111
sl@0
  3112
/* Insert a row into the %_content table; set *piRowid to be the ID of the
sl@0
  3113
 * new row.  Fill [pTerms] with new doclists for the %_term table. */
sl@0
  3114
static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
sl@0
  3115
                        sqlite3_value **pValues,
sl@0
  3116
                        sqlite_int64 *piRowid, fts1Hash *pTerms){
sl@0
  3117
  int rc;
sl@0
  3118
sl@0
  3119
  rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
sl@0
  3120
  if( rc!=SQLITE_OK ) return rc;
sl@0
  3121
  *piRowid = sqlite3_last_insert_rowid(v->db);
sl@0
  3122
  return insertTerms(v, pTerms, *piRowid, pValues);
sl@0
  3123
}
sl@0
  3124
sl@0
  3125
/* Delete a row from the %_content table; fill [pTerms] with empty doclists
sl@0
  3126
 * to be written to the %_term table. */
sl@0
  3127
static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
sl@0
  3128
  int rc = deleteTerms(v, pTerms, iRow);
sl@0
  3129
  if( rc!=SQLITE_OK ) return rc;
sl@0
  3130
  return content_delete(v, iRow);  /* execute an SQL DELETE */
sl@0
  3131
}
sl@0
  3132
sl@0
  3133
/* Update a row in the %_content table; fill [pTerms] with new doclists for the
sl@0
  3134
 * %_term table. */
sl@0
  3135
static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
sl@0
  3136
                        sqlite3_value **pValues, fts1Hash *pTerms){
sl@0
  3137
  /* Generate an empty doclist for each term that previously appeared in this
sl@0
  3138
   * row. */
sl@0
  3139
  int rc = deleteTerms(v, pTerms, iRow);
sl@0
  3140
  if( rc!=SQLITE_OK ) return rc;
sl@0
  3141
sl@0
  3142
  rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
sl@0
  3143
  if( rc!=SQLITE_OK ) return rc;
sl@0
  3144
sl@0
  3145
  /* Now add positions for terms which appear in the updated row. */
sl@0
  3146
  return insertTerms(v, pTerms, iRow, pValues);
sl@0
  3147
}
sl@0
  3148
sl@0
  3149
/* This function implements the xUpdate callback; it is the top-level entry
sl@0
  3150
 * point for inserting, deleting or updating a row in a full-text table. */
sl@0
  3151
static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
sl@0
  3152
                   sqlite_int64 *pRowid){
sl@0
  3153
  fulltext_vtab *v = (fulltext_vtab *) pVtab;
sl@0
  3154
  fts1Hash terms;   /* maps term string -> PosList */
sl@0
  3155
  int rc;
sl@0
  3156
  fts1HashElem *e;
sl@0
  3157
sl@0
  3158
  TRACE(("FTS1 Update %p\n", pVtab));
sl@0
  3159
  
sl@0
  3160
  fts1HashInit(&terms, FTS1_HASH_STRING, 1);
sl@0
  3161
sl@0
  3162
  if( nArg<2 ){
sl@0
  3163
    rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
sl@0
  3164
  } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
sl@0
  3165
    /* An update:
sl@0
  3166
     * ppArg[0] = old rowid
sl@0
  3167
     * ppArg[1] = new rowid
sl@0
  3168
     * ppArg[2..2+v->nColumn-1] = values
sl@0
  3169
     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
sl@0
  3170
     */
sl@0
  3171
    sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
sl@0
  3172
    if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
sl@0
  3173
      sqlite3_value_int64(ppArg[1]) != rowid ){
sl@0
  3174
      rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
sl@0
  3175
    } else {
sl@0
  3176
      assert( nArg==2+v->nColumn+1);
sl@0
  3177
      rc = index_update(v, rowid, &ppArg[2], &terms);
sl@0
  3178
    }
sl@0
  3179
  } else {
sl@0
  3180
    /* An insert:
sl@0
  3181
     * ppArg[1] = requested rowid
sl@0
  3182
     * ppArg[2..2+v->nColumn-1] = values
sl@0
  3183
     * ppArg[2+v->nColumn] = value for magic column (we ignore this)
sl@0
  3184
     */
sl@0
  3185
    assert( nArg==2+v->nColumn+1);
sl@0
  3186
    rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
sl@0
  3187
  }
sl@0
  3188
sl@0
  3189
  if( rc==SQLITE_OK ){
sl@0
  3190
    /* Write updated doclists to disk. */
sl@0
  3191
    for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
sl@0
  3192
      DocList *p = fts1HashData(e);
sl@0
  3193
      rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
sl@0
  3194
      if( rc!=SQLITE_OK ) break;
sl@0
  3195
    }
sl@0
  3196
  }
sl@0
  3197
sl@0
  3198
  /* clean up */
sl@0
  3199
  for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
sl@0
  3200
    DocList *p = fts1HashData(e);
sl@0
  3201
    docListDelete(p);
sl@0
  3202
  }
sl@0
  3203
  fts1HashClear(&terms);
sl@0
  3204
sl@0
  3205
  return rc;
sl@0
  3206
}
sl@0
  3207
sl@0
  3208
/*
sl@0
  3209
** Implementation of the snippet() function for FTS1
sl@0
  3210
*/
sl@0
  3211
static void snippetFunc(
sl@0
  3212
  sqlite3_context *pContext,
sl@0
  3213
  int argc,
sl@0
  3214
  sqlite3_value **argv
sl@0
  3215
){
sl@0
  3216
  fulltext_cursor *pCursor;
sl@0
  3217
  if( argc<1 ) return;
sl@0
  3218
  if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
sl@0
  3219
      sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
sl@0
  3220
    sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
sl@0
  3221
  }else{
sl@0
  3222
    const char *zStart = "<b>";
sl@0
  3223
    const char *zEnd = "</b>";
sl@0
  3224
    const char *zEllipsis = "<b>...</b>";
sl@0
  3225
    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
sl@0
  3226
    if( argc>=2 ){
sl@0
  3227
      zStart = (const char*)sqlite3_value_text(argv[1]);
sl@0
  3228
      if( argc>=3 ){
sl@0
  3229
        zEnd = (const char*)sqlite3_value_text(argv[2]);
sl@0
  3230
        if( argc>=4 ){
sl@0
  3231
          zEllipsis = (const char*)sqlite3_value_text(argv[3]);
sl@0
  3232
        }
sl@0
  3233
      }
sl@0
  3234
    }
sl@0
  3235
    snippetAllOffsets(pCursor);
sl@0
  3236
    snippetText(pCursor, zStart, zEnd, zEllipsis);
sl@0
  3237
    sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
sl@0
  3238
                        pCursor->snippet.nSnippet, SQLITE_STATIC);
sl@0
  3239
  }
sl@0
  3240
}
sl@0
  3241
sl@0
  3242
/*
sl@0
  3243
** Implementation of the offsets() function for FTS1
sl@0
  3244
*/
sl@0
  3245
static void snippetOffsetsFunc(
sl@0
  3246
  sqlite3_context *pContext,
sl@0
  3247
  int argc,
sl@0
  3248
  sqlite3_value **argv
sl@0
  3249
){
sl@0
  3250
  fulltext_cursor *pCursor;
sl@0
  3251
  if( argc<1 ) return;
sl@0
  3252
  if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
sl@0
  3253
      sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
sl@0
  3254
    sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
sl@0
  3255
  }else{
sl@0
  3256
    memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
sl@0
  3257
    snippetAllOffsets(pCursor);
sl@0
  3258
    snippetOffsetText(&pCursor->snippet);
sl@0
  3259
    sqlite3_result_text(pContext,
sl@0
  3260
                        pCursor->snippet.zOffset, pCursor->snippet.nOffset,
sl@0
  3261
                        SQLITE_STATIC);
sl@0
  3262
  }
sl@0
  3263
}
sl@0
  3264
sl@0
  3265
/*
sl@0
  3266
** This routine implements the xFindFunction method for the FTS1
sl@0
  3267
** virtual table.
sl@0
  3268
*/
sl@0
  3269
static int fulltextFindFunction(
sl@0
  3270
  sqlite3_vtab *pVtab,
sl@0
  3271
  int nArg,
sl@0
  3272
  const char *zName,
sl@0
  3273
  void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
sl@0
  3274
  void **ppArg
sl@0
  3275
){
sl@0
  3276
  if( strcmp(zName,"snippet")==0 ){
sl@0
  3277
    *pxFunc = snippetFunc;
sl@0
  3278
    return 1;
sl@0
  3279
  }else if( strcmp(zName,"offsets")==0 ){
sl@0
  3280
    *pxFunc = snippetOffsetsFunc;
sl@0
  3281
    return 1;
sl@0
  3282
  }
sl@0
  3283
  return 0;
sl@0
  3284
}
sl@0
  3285
sl@0
  3286
/*
sl@0
  3287
** Rename an fts1 table.
sl@0
  3288
*/
sl@0
  3289
static int fulltextRename(
sl@0
  3290
  sqlite3_vtab *pVtab,
sl@0
  3291
  const char *zName
sl@0
  3292
){
sl@0
  3293
  fulltext_vtab *p = (fulltext_vtab *)pVtab;
sl@0
  3294
  int rc = SQLITE_NOMEM;
sl@0
  3295
  char *zSql = sqlite3_mprintf(
sl@0
  3296
    "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';"
sl@0
  3297
    "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
sl@0
  3298
    , p->zDb, p->zName, zName
sl@0
  3299
    , p->zDb, p->zName, zName
sl@0
  3300
  );
sl@0
  3301
  if( zSql ){
sl@0
  3302
    rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
sl@0
  3303
    sqlite3_free(zSql);
sl@0
  3304
  }
sl@0
  3305
  return rc;
sl@0
  3306
}
sl@0
  3307
sl@0
  3308
static const sqlite3_module fulltextModule = {
sl@0
  3309
  /* iVersion      */ 0,
sl@0
  3310
  /* xCreate       */ fulltextCreate,
sl@0
  3311
  /* xConnect      */ fulltextConnect,
sl@0
  3312
  /* xBestIndex    */ fulltextBestIndex,
sl@0
  3313
  /* xDisconnect   */ fulltextDisconnect,
sl@0
  3314
  /* xDestroy      */ fulltextDestroy,
sl@0
  3315
  /* xOpen         */ fulltextOpen,
sl@0
  3316
  /* xClose        */ fulltextClose,
sl@0
  3317
  /* xFilter       */ fulltextFilter,
sl@0
  3318
  /* xNext         */ fulltextNext,
sl@0
  3319
  /* xEof          */ fulltextEof,
sl@0
  3320
  /* xColumn       */ fulltextColumn,
sl@0
  3321
  /* xRowid        */ fulltextRowid,
sl@0
  3322
  /* xUpdate       */ fulltextUpdate,
sl@0
  3323
  /* xBegin        */ 0, 
sl@0
  3324
  /* xSync         */ 0,
sl@0
  3325
  /* xCommit       */ 0,
sl@0
  3326
  /* xRollback     */ 0,
sl@0
  3327
  /* xFindFunction */ fulltextFindFunction,
sl@0
  3328
  /* xRename       */ fulltextRename,
sl@0
  3329
};
sl@0
  3330
sl@0
  3331
int sqlite3Fts1Init(sqlite3 *db){
sl@0
  3332
  sqlite3_overload_function(db, "snippet", -1);
sl@0
  3333
  sqlite3_overload_function(db, "offsets", -1);
sl@0
  3334
  return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
sl@0
  3335
}
sl@0
  3336
sl@0
  3337
#if !SQLITE_CORE
sl@0
  3338
int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
sl@0
  3339
                           const sqlite3_api_routines *pApi){
sl@0
  3340
  SQLITE_EXTENSION_INIT2(pApi)
sl@0
  3341
  return sqlite3Fts1Init(db);
sl@0
  3342
}
sl@0
  3343
#endif
sl@0
  3344
sl@0
  3345
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */