os/persistentdata/persistentstorage/sqlite3api/SQLite/fts3_tokenizer.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/SQLite/fts3_tokenizer.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,145 @@
     1.4 +/*
     1.5 +** 2006 July 10
     1.6 +**
     1.7 +** The author disclaims copyright to this source code.
     1.8 +**
     1.9 +*************************************************************************
    1.10 +** Defines the interface to tokenizers used by fulltext-search.  There
    1.11 +** are three basic components:
    1.12 +**
    1.13 +** sqlite3_tokenizer_module is a singleton defining the tokenizer
    1.14 +** interface functions.  This is essentially the class structure for
    1.15 +** tokenizers.
    1.16 +**
    1.17 +** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
    1.18 +** including customization information defined at creation time.
    1.19 +**
    1.20 +** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
    1.21 +** tokens from a particular input.
    1.22 +*/
    1.23 +#ifndef _FTS3_TOKENIZER_H_
    1.24 +#define _FTS3_TOKENIZER_H_
    1.25 +
    1.26 +/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
    1.27 +** If tokenizers are to be allowed to call sqlite3_*() functions, then
    1.28 +** we will need a way to register the API consistently.
    1.29 +*/
    1.30 +#include "sqlite3.h"
    1.31 +
    1.32 +/*
    1.33 +** Structures used by the tokenizer interface. When a new tokenizer
    1.34 +** implementation is registered, the caller provides a pointer to
    1.35 +** an sqlite3_tokenizer_module containing pointers to the callback
    1.36 +** functions that make up an implementation.
    1.37 +**
    1.38 +** When an fts3 table is created, it passes any arguments passed to
    1.39 +** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
    1.40 +** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
    1.41 +** implementation. The xCreate() function in turn returns an 
    1.42 +** sqlite3_tokenizer structure representing the specific tokenizer to
    1.43 +** be used for the fts3 table (customized by the tokenizer clause arguments).
    1.44 +**
    1.45 +** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
    1.46 +** method is called. It returns an sqlite3_tokenizer_cursor object
    1.47 +** that may be used to tokenize a specific input buffer based on
    1.48 +** the tokenization rules supplied by a specific sqlite3_tokenizer
    1.49 +** object.
    1.50 +*/
    1.51 +typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
    1.52 +typedef struct sqlite3_tokenizer sqlite3_tokenizer;
    1.53 +typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
    1.54 +
    1.55 +struct sqlite3_tokenizer_module {
    1.56 +
    1.57 +  /*
    1.58 +  ** Structure version. Should always be set to 0.
    1.59 +  */
    1.60 +  int iVersion;
    1.61 +
    1.62 +  /*
    1.63 +  ** Create a new tokenizer. The values in the argv[] array are the
    1.64 +  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
    1.65 +  ** TABLE statement that created the fts3 table. For example, if
    1.66 +  ** the following SQL is executed:
    1.67 +  **
    1.68 +  **   CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
    1.69 +  **
    1.70 +  ** then argc is set to 2, and the argv[] array contains pointers
    1.71 +  ** to the strings "arg1" and "arg2".
    1.72 +  **
    1.73 +  ** This method should return either SQLITE_OK (0), or an SQLite error 
    1.74 +  ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
    1.75 +  ** to point at the newly created tokenizer structure. The generic
    1.76 +  ** sqlite3_tokenizer.pModule variable should not be initialised by
    1.77 +  ** this callback. The caller will do so.
    1.78 +  */
    1.79 +  int (*xCreate)(
    1.80 +    int argc,                           /* Size of argv array */
    1.81 +    const char *const*argv,             /* Tokenizer argument strings */
    1.82 +    sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
    1.83 +  );
    1.84 +
    1.85 +  /*
    1.86 +  ** Destroy an existing tokenizer. The fts3 module calls this method
    1.87 +  ** exactly once for each successful call to xCreate().
    1.88 +  */
    1.89 +  int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
    1.90 +
    1.91 +  /*
    1.92 +  ** Create a tokenizer cursor to tokenize an input buffer. The caller
    1.93 +  ** is responsible for ensuring that the input buffer remains valid
    1.94 +  ** until the cursor is closed (using the xClose() method). 
    1.95 +  */
    1.96 +  int (*xOpen)(
    1.97 +    sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
    1.98 +    const char *pInput, int nBytes,      /* Input buffer */
    1.99 +    sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
   1.100 +  );
   1.101 +
   1.102 +  /*
   1.103 +  ** Destroy an existing tokenizer cursor. The fts3 module calls this 
   1.104 +  ** method exactly once for each successful call to xOpen().
   1.105 +  */
   1.106 +  int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
   1.107 +
   1.108 +  /*
   1.109 +  ** Retrieve the next token from the tokenizer cursor pCursor. This
   1.110 +  ** method should either return SQLITE_OK and set the values of the
   1.111 +  ** "OUT" variables identified below, or SQLITE_DONE to indicate that
   1.112 +  ** the end of the buffer has been reached, or an SQLite error code.
   1.113 +  **
   1.114 +  ** *ppToken should be set to point at a buffer containing the 
   1.115 +  ** normalized version of the token (i.e. after any case-folding and/or
   1.116 +  ** stemming has been performed). *pnBytes should be set to the length
   1.117 +  ** of this buffer in bytes. The input text that generated the token is
   1.118 +  ** identified by the byte offsets returned in *piStartOffset and
   1.119 +  ** *piEndOffset.
   1.120 +  **
   1.121 +  ** The buffer *ppToken is set to point at is managed by the tokenizer
   1.122 +  ** implementation. It is only required to be valid until the next call
   1.123 +  ** to xNext() or xClose(). 
   1.124 +  */
   1.125 +  /* TODO(shess) current implementation requires pInput to be
   1.126 +  ** nul-terminated.  This should either be fixed, or pInput/nBytes
   1.127 +  ** should be converted to zInput.
   1.128 +  */
   1.129 +  int (*xNext)(
   1.130 +    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
   1.131 +    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
   1.132 +    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
   1.133 +    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
   1.134 +    int *piPosition      /* OUT: Number of tokens returned before this one */
   1.135 +  );
   1.136 +};
   1.137 +
   1.138 +struct sqlite3_tokenizer {
   1.139 +  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
   1.140 +  /* Tokenizer implementations will typically add additional fields */
   1.141 +};
   1.142 +
   1.143 +struct sqlite3_tokenizer_cursor {
   1.144 +  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
   1.145 +  /* Tokenizer implementations will typically add additional fields */
   1.146 +};
   1.147 +
   1.148 +#endif /* _FTS3_TOKENIZER_H_ */