os/persistentdata/persistentstorage/sqlite3api/SQLite/icu.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/SQLite/icu.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,499 @@
     1.4 +/*
     1.5 +** 2007 May 6
     1.6 +**
     1.7 +** The author disclaims copyright to this source code.  In place of
     1.8 +** a legal notice, here is a blessing:
     1.9 +**
    1.10 +**    May you do good and not evil.
    1.11 +**    May you find forgiveness for yourself and forgive others.
    1.12 +**    May you share freely, never taking more than you give.
    1.13 +**
    1.14 +*************************************************************************
    1.15 +** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
    1.16 +**
    1.17 +** This file implements an integration between the ICU library 
    1.18 +** ("International Components for Unicode", an open-source library 
    1.19 +** for handling unicode data) and SQLite. The integration uses 
    1.20 +** ICU to provide the following to SQLite:
    1.21 +**
    1.22 +**   * An implementation of the SQL regexp() function (and hence REGEXP
    1.23 +**     operator) using the ICU uregex_XX() APIs.
    1.24 +**
    1.25 +**   * Implementations of the SQL scalar upper() and lower() functions
    1.26 +**     for case mapping.
    1.27 +**
    1.28 +**   * Integration of ICU and SQLite collation seqences.
    1.29 +**
    1.30 +**   * An implementation of the LIKE operator that uses ICU to 
    1.31 +**     provide case-independent matching.
    1.32 +*/
    1.33 +
    1.34 +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
    1.35 +
    1.36 +/* Include ICU headers */
    1.37 +#include <unicode/utypes.h>
    1.38 +#include <unicode/uregex.h>
    1.39 +#include <unicode/ustring.h>
    1.40 +#include <unicode/ucol.h>
    1.41 +
    1.42 +#include <assert.h>
    1.43 +
    1.44 +#ifndef SQLITE_CORE
    1.45 +  #include "sqlite3ext.h"
    1.46 +  SQLITE_EXTENSION_INIT1
    1.47 +#else
    1.48 +  #include "sqlite3.h"
    1.49 +#endif
    1.50 +
    1.51 +/*
    1.52 +** Maximum length (in bytes) of the pattern in a LIKE or GLOB
    1.53 +** operator.
    1.54 +*/
    1.55 +#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
    1.56 +# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
    1.57 +#endif
    1.58 +
    1.59 +/*
    1.60 +** Version of sqlite3_free() that is always a function, never a macro.
    1.61 +*/
    1.62 +static void xFree(void *p){
    1.63 +  sqlite3_free(p);
    1.64 +}
    1.65 +
    1.66 +/*
    1.67 +** Compare two UTF-8 strings for equality where the first string is
    1.68 +** a "LIKE" expression. Return true (1) if they are the same and 
    1.69 +** false (0) if they are different.
    1.70 +*/
    1.71 +static int icuLikeCompare(
    1.72 +  const uint8_t *zPattern,   /* LIKE pattern */
    1.73 +  const uint8_t *zString,    /* The UTF-8 string to compare against */
    1.74 +  const UChar32 uEsc         /* The escape character */
    1.75 +){
    1.76 +  static const int MATCH_ONE = (UChar32)'_';
    1.77 +  static const int MATCH_ALL = (UChar32)'%';
    1.78 +
    1.79 +  int iPattern = 0;       /* Current byte index in zPattern */
    1.80 +  int iString = 0;        /* Current byte index in zString */
    1.81 +
    1.82 +  int prevEscape = 0;     /* True if the previous character was uEsc */
    1.83 +
    1.84 +  while( zPattern[iPattern]!=0 ){
    1.85 +
    1.86 +    /* Read (and consume) the next character from the input pattern. */
    1.87 +    UChar32 uPattern;
    1.88 +    U8_NEXT_UNSAFE(zPattern, iPattern, uPattern);
    1.89 +    assert(uPattern!=0);
    1.90 +
    1.91 +    /* There are now 4 possibilities:
    1.92 +    **
    1.93 +    **     1. uPattern is an unescaped match-all character "%",
    1.94 +    **     2. uPattern is an unescaped match-one character "_",
    1.95 +    **     3. uPattern is an unescaped escape character, or
    1.96 +    **     4. uPattern is to be handled as an ordinary character
    1.97 +    */
    1.98 +    if( !prevEscape && uPattern==MATCH_ALL ){
    1.99 +      /* Case 1. */
   1.100 +      uint8_t c;
   1.101 +
   1.102 +      /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
   1.103 +      ** MATCH_ALL. For each MATCH_ONE, skip one character in the 
   1.104 +      ** test string.
   1.105 +      */
   1.106 +      while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){
   1.107 +        if( c==MATCH_ONE ){
   1.108 +          if( zString[iString]==0 ) return 0;
   1.109 +          U8_FWD_1_UNSAFE(zString, iString);
   1.110 +        }
   1.111 +        iPattern++;
   1.112 +      }
   1.113 +
   1.114 +      if( zPattern[iPattern]==0 ) return 1;
   1.115 +
   1.116 +      while( zString[iString] ){
   1.117 +        if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){
   1.118 +          return 1;
   1.119 +        }
   1.120 +        U8_FWD_1_UNSAFE(zString, iString);
   1.121 +      }
   1.122 +      return 0;
   1.123 +
   1.124 +    }else if( !prevEscape && uPattern==MATCH_ONE ){
   1.125 +      /* Case 2. */
   1.126 +      if( zString[iString]==0 ) return 0;
   1.127 +      U8_FWD_1_UNSAFE(zString, iString);
   1.128 +
   1.129 +    }else if( !prevEscape && uPattern==uEsc){
   1.130 +      /* Case 3. */
   1.131 +      prevEscape = 1;
   1.132 +
   1.133 +    }else{
   1.134 +      /* Case 4. */
   1.135 +      UChar32 uString;
   1.136 +      U8_NEXT_UNSAFE(zString, iString, uString);
   1.137 +      uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT);
   1.138 +      uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT);
   1.139 +      if( uString!=uPattern ){
   1.140 +        return 0;
   1.141 +      }
   1.142 +      prevEscape = 0;
   1.143 +    }
   1.144 +  }
   1.145 +
   1.146 +  return zString[iString]==0;
   1.147 +}
   1.148 +
   1.149 +/*
   1.150 +** Implementation of the like() SQL function.  This function implements
   1.151 +** the build-in LIKE operator.  The first argument to the function is the
   1.152 +** pattern and the second argument is the string.  So, the SQL statements:
   1.153 +**
   1.154 +**       A LIKE B
   1.155 +**
   1.156 +** is implemented as like(B, A). If there is an escape character E, 
   1.157 +**
   1.158 +**       A LIKE B ESCAPE E
   1.159 +**
   1.160 +** is mapped to like(B, A, E).
   1.161 +*/
   1.162 +static void icuLikeFunc(
   1.163 +  sqlite3_context *context, 
   1.164 +  int argc, 
   1.165 +  sqlite3_value **argv
   1.166 +){
   1.167 +  const unsigned char *zA = sqlite3_value_text(argv[0]);
   1.168 +  const unsigned char *zB = sqlite3_value_text(argv[1]);
   1.169 +  UChar32 uEsc = 0;
   1.170 +
   1.171 +  /* Limit the length of the LIKE or GLOB pattern to avoid problems
   1.172 +  ** of deep recursion and N*N behavior in patternCompare().
   1.173 +  */
   1.174 +  if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
   1.175 +    sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
   1.176 +    return;
   1.177 +  }
   1.178 +
   1.179 +
   1.180 +  if( argc==3 ){
   1.181 +    /* The escape character string must consist of a single UTF-8 character.
   1.182 +    ** Otherwise, return an error.
   1.183 +    */
   1.184 +    int nE= sqlite3_value_bytes(argv[2]);
   1.185 +    const unsigned char *zE = sqlite3_value_text(argv[2]);
   1.186 +    int i = 0;
   1.187 +    if( zE==0 ) return;
   1.188 +    U8_NEXT(zE, i, nE, uEsc);
   1.189 +    if( i!=nE){
   1.190 +      sqlite3_result_error(context, 
   1.191 +          "ESCAPE expression must be a single character", -1);
   1.192 +      return;
   1.193 +    }
   1.194 +  }
   1.195 +
   1.196 +  if( zA && zB ){
   1.197 +    sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
   1.198 +  }
   1.199 +}
   1.200 +
   1.201 +/*
   1.202 +** This function is called when an ICU function called from within
   1.203 +** the implementation of an SQL scalar function returns an error.
   1.204 +**
   1.205 +** The scalar function context passed as the first argument is 
   1.206 +** loaded with an error message based on the following two args.
   1.207 +*/
   1.208 +static void icuFunctionError(
   1.209 +  sqlite3_context *pCtx,       /* SQLite scalar function context */
   1.210 +  const char *zName,           /* Name of ICU function that failed */
   1.211 +  UErrorCode e                 /* Error code returned by ICU function */
   1.212 +){
   1.213 +  char zBuf[128];
   1.214 +  sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
   1.215 +  zBuf[127] = '\0';
   1.216 +  sqlite3_result_error(pCtx, zBuf, -1);
   1.217 +}
   1.218 +
   1.219 +/*
   1.220 +** Function to delete compiled regexp objects. Registered as
   1.221 +** a destructor function with sqlite3_set_auxdata().
   1.222 +*/
   1.223 +static void icuRegexpDelete(void *p){
   1.224 +  URegularExpression *pExpr = (URegularExpression *)p;
   1.225 +  uregex_close(pExpr);
   1.226 +}
   1.227 +
   1.228 +/*
   1.229 +** Implementation of SQLite REGEXP operator. This scalar function takes
   1.230 +** two arguments. The first is a regular expression pattern to compile
   1.231 +** the second is a string to match against that pattern. If either 
   1.232 +** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
   1.233 +** is 1 if the string matches the pattern, or 0 otherwise.
   1.234 +**
   1.235 +** SQLite maps the regexp() function to the regexp() operator such
   1.236 +** that the following two are equivalent:
   1.237 +**
   1.238 +**     zString REGEXP zPattern
   1.239 +**     regexp(zPattern, zString)
   1.240 +**
   1.241 +** Uses the following ICU regexp APIs:
   1.242 +**
   1.243 +**     uregex_open()
   1.244 +**     uregex_matches()
   1.245 +**     uregex_close()
   1.246 +*/
   1.247 +static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
   1.248 +  UErrorCode status = U_ZERO_ERROR;
   1.249 +  URegularExpression *pExpr;
   1.250 +  UBool res;
   1.251 +  const UChar *zString = sqlite3_value_text16(apArg[1]);
   1.252 +
   1.253 +  /* If the left hand side of the regexp operator is NULL, 
   1.254 +  ** then the result is also NULL. 
   1.255 +  */
   1.256 +  if( !zString ){
   1.257 +    return;
   1.258 +  }
   1.259 +
   1.260 +  pExpr = sqlite3_get_auxdata(p, 0);
   1.261 +  if( !pExpr ){
   1.262 +    const UChar *zPattern = sqlite3_value_text16(apArg[0]);
   1.263 +    if( !zPattern ){
   1.264 +      return;
   1.265 +    }
   1.266 +    pExpr = uregex_open(zPattern, -1, 0, 0, &status);
   1.267 +
   1.268 +    if( U_SUCCESS(status) ){
   1.269 +      sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
   1.270 +    }else{
   1.271 +      assert(!pExpr);
   1.272 +      icuFunctionError(p, "uregex_open", status);
   1.273 +      return;
   1.274 +    }
   1.275 +  }
   1.276 +
   1.277 +  /* Configure the text that the regular expression operates on. */
   1.278 +  uregex_setText(pExpr, zString, -1, &status);
   1.279 +  if( !U_SUCCESS(status) ){
   1.280 +    icuFunctionError(p, "uregex_setText", status);
   1.281 +    return;
   1.282 +  }
   1.283 +
   1.284 +  /* Attempt the match */
   1.285 +  res = uregex_matches(pExpr, 0, &status);
   1.286 +  if( !U_SUCCESS(status) ){
   1.287 +    icuFunctionError(p, "uregex_matches", status);
   1.288 +    return;
   1.289 +  }
   1.290 +
   1.291 +  /* Set the text that the regular expression operates on to a NULL
   1.292 +  ** pointer. This is not really necessary, but it is tidier than 
   1.293 +  ** leaving the regular expression object configured with an invalid
   1.294 +  ** pointer after this function returns.
   1.295 +  */
   1.296 +  uregex_setText(pExpr, 0, 0, &status);
   1.297 +
   1.298 +  /* Return 1 or 0. */
   1.299 +  sqlite3_result_int(p, res ? 1 : 0);
   1.300 +}
   1.301 +
   1.302 +/*
   1.303 +** Implementations of scalar functions for case mapping - upper() and 
   1.304 +** lower(). Function upper() converts its input to upper-case (ABC).
   1.305 +** Function lower() converts to lower-case (abc).
   1.306 +**
   1.307 +** ICU provides two types of case mapping, "general" case mapping and
   1.308 +** "language specific". Refer to ICU documentation for the differences
   1.309 +** between the two.
   1.310 +**
   1.311 +** To utilise "general" case mapping, the upper() or lower() scalar 
   1.312 +** functions are invoked with one argument:
   1.313 +**
   1.314 +**     upper('ABC') -> 'abc'
   1.315 +**     lower('abc') -> 'ABC'
   1.316 +**
   1.317 +** To access ICU "language specific" case mapping, upper() or lower()
   1.318 +** should be invoked with two arguments. The second argument is the name
   1.319 +** of the locale to use. Passing an empty string ("") or SQL NULL value
   1.320 +** as the second argument is the same as invoking the 1 argument version
   1.321 +** of upper() or lower().
   1.322 +**
   1.323 +**     lower('I', 'en_us') -> 'i'
   1.324 +**     lower('I', 'tr_tr') -> 'ı' (small dotless i)
   1.325 +**
   1.326 +** http://www.icu-project.org/userguide/posix.html#case_mappings
   1.327 +*/
   1.328 +static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
   1.329 +  const UChar *zInput;
   1.330 +  UChar *zOutput;
   1.331 +  int nInput;
   1.332 +  int nOutput;
   1.333 +
   1.334 +  UErrorCode status = U_ZERO_ERROR;
   1.335 +  const char *zLocale = 0;
   1.336 +
   1.337 +  assert(nArg==1 || nArg==2);
   1.338 +  if( nArg==2 ){
   1.339 +    zLocale = (const char *)sqlite3_value_text(apArg[1]);
   1.340 +  }
   1.341 +
   1.342 +  zInput = sqlite3_value_text16(apArg[0]);
   1.343 +  if( !zInput ){
   1.344 +    return;
   1.345 +  }
   1.346 +  nInput = sqlite3_value_bytes16(apArg[0]);
   1.347 +
   1.348 +  nOutput = nInput * 2 + 2;
   1.349 +  zOutput = sqlite3_malloc(nOutput);
   1.350 +  if( !zOutput ){
   1.351 +    return;
   1.352 +  }
   1.353 +
   1.354 +  if( sqlite3_user_data(p) ){
   1.355 +    u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
   1.356 +  }else{
   1.357 +    u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
   1.358 +  }
   1.359 +
   1.360 +  if( !U_SUCCESS(status) ){
   1.361 +    icuFunctionError(p, "u_strToLower()/u_strToUpper", status);
   1.362 +    return;
   1.363 +  }
   1.364 +
   1.365 +  sqlite3_result_text16(p, zOutput, -1, xFree);
   1.366 +}
   1.367 +
   1.368 +/*
   1.369 +** Collation sequence destructor function. The pCtx argument points to
   1.370 +** a UCollator structure previously allocated using ucol_open().
   1.371 +*/
   1.372 +static void icuCollationDel(void *pCtx){
   1.373 +  UCollator *p = (UCollator *)pCtx;
   1.374 +  ucol_close(p);
   1.375 +}
   1.376 +
   1.377 +/*
   1.378 +** Collation sequence comparison function. The pCtx argument points to
   1.379 +** a UCollator structure previously allocated using ucol_open().
   1.380 +*/
   1.381 +static int icuCollationColl(
   1.382 +  void *pCtx,
   1.383 +  int nLeft,
   1.384 +  const void *zLeft,
   1.385 +  int nRight,
   1.386 +  const void *zRight
   1.387 +){
   1.388 +  UCollationResult res;
   1.389 +  UCollator *p = (UCollator *)pCtx;
   1.390 +  res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
   1.391 +  switch( res ){
   1.392 +    case UCOL_LESS:    return -1;
   1.393 +    case UCOL_GREATER: return +1;
   1.394 +    case UCOL_EQUAL:   return 0;
   1.395 +  }
   1.396 +  assert(!"Unexpected return value from ucol_strcoll()");
   1.397 +  return 0;
   1.398 +}
   1.399 +
   1.400 +/*
   1.401 +** Implementation of the scalar function icu_load_collation().
   1.402 +**
   1.403 +** This scalar function is used to add ICU collation based collation 
   1.404 +** types to an SQLite database connection. It is intended to be called
   1.405 +** as follows:
   1.406 +**
   1.407 +**     SELECT icu_load_collation(<locale>, <collation-name>);
   1.408 +**
   1.409 +** Where <locale> is a string containing an ICU locale identifier (i.e.
   1.410 +** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
   1.411 +** collation sequence to create.
   1.412 +*/
   1.413 +static void icuLoadCollation(
   1.414 +  sqlite3_context *p, 
   1.415 +  int nArg, 
   1.416 +  sqlite3_value **apArg
   1.417 +){
   1.418 +  sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
   1.419 +  UErrorCode status = U_ZERO_ERROR;
   1.420 +  const char *zLocale;      /* Locale identifier - (eg. "jp_JP") */
   1.421 +  const char *zName;        /* SQL Collation sequence name (eg. "japanese") */
   1.422 +  UCollator *pUCollator;    /* ICU library collation object */
   1.423 +  int rc;                   /* Return code from sqlite3_create_collation_x() */
   1.424 +
   1.425 +  assert(nArg==2);
   1.426 +  zLocale = (const char *)sqlite3_value_text(apArg[0]);
   1.427 +  zName = (const char *)sqlite3_value_text(apArg[1]);
   1.428 +
   1.429 +  if( !zLocale || !zName ){
   1.430 +    return;
   1.431 +  }
   1.432 +
   1.433 +  pUCollator = ucol_open(zLocale, &status);
   1.434 +  if( !U_SUCCESS(status) ){
   1.435 +    icuFunctionError(p, "ucol_open", status);
   1.436 +    return;
   1.437 +  }
   1.438 +  assert(p);
   1.439 +
   1.440 +  rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, 
   1.441 +      icuCollationColl, icuCollationDel
   1.442 +  );
   1.443 +  if( rc!=SQLITE_OK ){
   1.444 +    ucol_close(pUCollator);
   1.445 +    sqlite3_result_error(p, "Error registering collation function", -1);
   1.446 +  }
   1.447 +}
   1.448 +
   1.449 +/*
   1.450 +** Register the ICU extension functions with database db.
   1.451 +*/
   1.452 +int sqlite3IcuInit(sqlite3 *db){
   1.453 +  struct IcuScalar {
   1.454 +    const char *zName;                        /* Function name */
   1.455 +    int nArg;                                 /* Number of arguments */
   1.456 +    int enc;                                  /* Optimal text encoding */
   1.457 +    void *pContext;                           /* sqlite3_user_data() context */
   1.458 +    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
   1.459 +  } scalars[] = {
   1.460 +    {"regexp",-1, SQLITE_ANY,          0, icuRegexpFunc},
   1.461 +
   1.462 +    {"lower",  1, SQLITE_UTF16,        0, icuCaseFunc16},
   1.463 +    {"lower",  2, SQLITE_UTF16,        0, icuCaseFunc16},
   1.464 +    {"upper",  1, SQLITE_UTF16, (void*)1, icuCaseFunc16},
   1.465 +    {"upper",  2, SQLITE_UTF16, (void*)1, icuCaseFunc16},
   1.466 +
   1.467 +    {"lower",  1, SQLITE_UTF8,         0, icuCaseFunc16},
   1.468 +    {"lower",  2, SQLITE_UTF8,         0, icuCaseFunc16},
   1.469 +    {"upper",  1, SQLITE_UTF8,  (void*)1, icuCaseFunc16},
   1.470 +    {"upper",  2, SQLITE_UTF8,  (void*)1, icuCaseFunc16},
   1.471 +
   1.472 +    {"like",   2, SQLITE_UTF8,         0, icuLikeFunc},
   1.473 +    {"like",   3, SQLITE_UTF8,         0, icuLikeFunc},
   1.474 +
   1.475 +    {"icu_load_collation",  2, SQLITE_UTF8, (void*)db, icuLoadCollation},
   1.476 +  };
   1.477 +
   1.478 +  int rc = SQLITE_OK;
   1.479 +  int i;
   1.480 +
   1.481 +  for(i=0; rc==SQLITE_OK && i<(sizeof(scalars)/sizeof(struct IcuScalar)); i++){
   1.482 +    struct IcuScalar *p = &scalars[i];
   1.483 +    rc = sqlite3_create_function(
   1.484 +        db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0
   1.485 +    );
   1.486 +  }
   1.487 +
   1.488 +  return rc;
   1.489 +}
   1.490 +
   1.491 +#if !SQLITE_CORE
   1.492 +int sqlite3_extension_init(
   1.493 +  sqlite3 *db, 
   1.494 +  char **pzErrMsg,
   1.495 +  const sqlite3_api_routines *pApi
   1.496 +){
   1.497 +  SQLITE_EXTENSION_INIT2(pApi)
   1.498 +  return sqlite3IcuInit(db);
   1.499 +}
   1.500 +#endif
   1.501 +
   1.502 +#endif