1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/SQLite/fts3_tokenizer.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,145 @@
1.4 +/*
1.5 +** 2006 July 10
1.6 +**
1.7 +** The author disclaims copyright to this source code.
1.8 +**
1.9 +*************************************************************************
1.10 +** Defines the interface to tokenizers used by fulltext-search. There
1.11 +** are three basic components:
1.12 +**
1.13 +** sqlite3_tokenizer_module is a singleton defining the tokenizer
1.14 +** interface functions. This is essentially the class structure for
1.15 +** tokenizers.
1.16 +**
1.17 +** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
1.18 +** including customization information defined at creation time.
1.19 +**
1.20 +** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
1.21 +** tokens from a particular input.
1.22 +*/
1.23 +#ifndef _FTS3_TOKENIZER_H_
1.24 +#define _FTS3_TOKENIZER_H_
1.25 +
1.26 +/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
1.27 +** If tokenizers are to be allowed to call sqlite3_*() functions, then
1.28 +** we will need a way to register the API consistently.
1.29 +*/
1.30 +#include "sqlite3.h"
1.31 +
1.32 +/*
1.33 +** Structures used by the tokenizer interface. When a new tokenizer
1.34 +** implementation is registered, the caller provides a pointer to
1.35 +** an sqlite3_tokenizer_module containing pointers to the callback
1.36 +** functions that make up an implementation.
1.37 +**
1.38 +** When an fts3 table is created, it passes any arguments passed to
1.39 +** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
1.40 +** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
1.41 +** implementation. The xCreate() function in turn returns an
1.42 +** sqlite3_tokenizer structure representing the specific tokenizer to
1.43 +** be used for the fts3 table (customized by the tokenizer clause arguments).
1.44 +**
1.45 +** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
1.46 +** method is called. It returns an sqlite3_tokenizer_cursor object
1.47 +** that may be used to tokenize a specific input buffer based on
1.48 +** the tokenization rules supplied by a specific sqlite3_tokenizer
1.49 +** object.
1.50 +*/
1.51 +typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
1.52 +typedef struct sqlite3_tokenizer sqlite3_tokenizer;
1.53 +typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
1.54 +
1.55 +struct sqlite3_tokenizer_module {
1.56 +
1.57 + /*
1.58 + ** Structure version. Should always be set to 0.
1.59 + */
1.60 + int iVersion;
1.61 +
1.62 + /*
1.63 + ** Create a new tokenizer. The values in the argv[] array are the
1.64 + ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
1.65 + ** TABLE statement that created the fts3 table. For example, if
1.66 + ** the following SQL is executed:
1.67 + **
1.68 + ** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
1.69 + **
1.70 + ** then argc is set to 2, and the argv[] array contains pointers
1.71 + ** to the strings "arg1" and "arg2".
1.72 + **
1.73 + ** This method should return either SQLITE_OK (0), or an SQLite error
1.74 + ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
1.75 + ** to point at the newly created tokenizer structure. The generic
1.76 + ** sqlite3_tokenizer.pModule variable should not be initialised by
1.77 + ** this callback. The caller will do so.
1.78 + */
1.79 + int (*xCreate)(
1.80 + int argc, /* Size of argv array */
1.81 + const char *const*argv, /* Tokenizer argument strings */
1.82 + sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
1.83 + );
1.84 +
1.85 + /*
1.86 + ** Destroy an existing tokenizer. The fts3 module calls this method
1.87 + ** exactly once for each successful call to xCreate().
1.88 + */
1.89 + int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
1.90 +
1.91 + /*
1.92 + ** Create a tokenizer cursor to tokenize an input buffer. The caller
1.93 + ** is responsible for ensuring that the input buffer remains valid
1.94 + ** until the cursor is closed (using the xClose() method).
1.95 + */
1.96 + int (*xOpen)(
1.97 + sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
1.98 + const char *pInput, int nBytes, /* Input buffer */
1.99 + sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
1.100 + );
1.101 +
1.102 + /*
1.103 + ** Destroy an existing tokenizer cursor. The fts3 module calls this
1.104 + ** method exactly once for each successful call to xOpen().
1.105 + */
1.106 + int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
1.107 +
1.108 + /*
1.109 + ** Retrieve the next token from the tokenizer cursor pCursor. This
1.110 + ** method should either return SQLITE_OK and set the values of the
1.111 + ** "OUT" variables identified below, or SQLITE_DONE to indicate that
1.112 + ** the end of the buffer has been reached, or an SQLite error code.
1.113 + **
1.114 + ** *ppToken should be set to point at a buffer containing the
1.115 + ** normalized version of the token (i.e. after any case-folding and/or
1.116 + ** stemming has been performed). *pnBytes should be set to the length
1.117 + ** of this buffer in bytes. The input text that generated the token is
1.118 + ** identified by the byte offsets returned in *piStartOffset and
1.119 + ** *piEndOffset.
1.120 + **
1.121 + ** The buffer *ppToken is set to point at is managed by the tokenizer
1.122 + ** implementation. It is only required to be valid until the next call
1.123 + ** to xNext() or xClose().
1.124 + */
1.125 + /* TODO(shess) current implementation requires pInput to be
1.126 + ** nul-terminated. This should either be fixed, or pInput/nBytes
1.127 + ** should be converted to zInput.
1.128 + */
1.129 + int (*xNext)(
1.130 + sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
1.131 + const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
1.132 + int *piStartOffset, /* OUT: Byte offset of token in input buffer */
1.133 + int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
1.134 + int *piPosition /* OUT: Number of tokens returned before this one */
1.135 + );
1.136 +};
1.137 +
1.138 +struct sqlite3_tokenizer {
1.139 + const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
1.140 + /* Tokenizer implementations will typically add additional fields */
1.141 +};
1.142 +
1.143 +struct sqlite3_tokenizer_cursor {
1.144 + sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
1.145 + /* Tokenizer implementations will typically add additional fields */
1.146 +};
1.147 +
1.148 +#endif /* _FTS3_TOKENIZER_H_ */