os/persistentdata/persistentstorage/sql/SQLite364/utf.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sql/SQLite364/utf.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,530 @@
     1.4 +/*
     1.5 +** 2004 April 13
     1.6 +**
     1.7 +** The author disclaims copyright to this source code.  In place of
     1.8 +** a legal notice, here is a blessing:
     1.9 +**
    1.10 +**    May you do good and not evil.
    1.11 +**    May you find forgiveness for yourself and forgive others.
    1.12 +**    May you share freely, never taking more than you give.
    1.13 +**
    1.14 +*************************************************************************
    1.15 +** This file contains routines used to translate between UTF-8, 
    1.16 +** UTF-16, UTF-16BE, and UTF-16LE.
    1.17 +**
    1.18 +** $Id: utf.c,v 1.65 2008/08/12 15:04:59 danielk1977 Exp $
    1.19 +**
    1.20 +** Notes on UTF-8:
    1.21 +**
    1.22 +**   Byte-0    Byte-1    Byte-2    Byte-3    Value
    1.23 +**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
    1.24 +**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
    1.25 +**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
    1.26 +**  11110uuu  10uuzzzz  10yyyyyy  10xxxxxx   000uuuuu zzzzyyyy yyxxxxxx
    1.27 +**
    1.28 +**
    1.29 +** Notes on UTF-16:  (with wwww+1==uuuuu)
    1.30 +**
    1.31 +**      Word-0               Word-1          Value
    1.32 +**  110110ww wwzzzzyy   110111yy yyxxxxxx    000uuuuu zzzzyyyy yyxxxxxx
    1.33 +**  zzzzyyyy yyxxxxxx                        00000000 zzzzyyyy yyxxxxxx
    1.34 +**
    1.35 +**
    1.36 +** BOM or Byte Order Mark:
    1.37 +**     0xff 0xfe   little-endian utf-16 follows
    1.38 +**     0xfe 0xff   big-endian utf-16 follows
    1.39 +**
    1.40 +*/
    1.41 +#include "sqliteInt.h"
    1.42 +#include <assert.h>
    1.43 +#include "vdbeInt.h"
    1.44 +
    1.45 +/*
    1.46 +** The following constant value is used by the SQLITE_BIGENDIAN and
    1.47 +** SQLITE_LITTLEENDIAN macros.
    1.48 +*/
    1.49 +const int sqlite3one = 1;
    1.50 +
    1.51 +/*
    1.52 +** This lookup table is used to help decode the first byte of
    1.53 +** a multi-byte UTF8 character.
    1.54 +*/
    1.55 +static const unsigned char sqlite3UtfTrans1[] = {
    1.56 +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    1.57 +  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    1.58 +  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    1.59 +  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
    1.60 +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    1.61 +  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    1.62 +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    1.63 +  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
    1.64 +};
    1.65 +
    1.66 +
    1.67 +#define WRITE_UTF8(zOut, c) {                          \
    1.68 +  if( c<0x00080 ){                                     \
    1.69 +    *zOut++ = (c&0xFF);                                \
    1.70 +  }                                                    \
    1.71 +  else if( c<0x00800 ){                                \
    1.72 +    *zOut++ = 0xC0 + ((c>>6)&0x1F);                    \
    1.73 +    *zOut++ = 0x80 + (c & 0x3F);                       \
    1.74 +  }                                                    \
    1.75 +  else if( c<0x10000 ){                                \
    1.76 +    *zOut++ = 0xE0 + ((c>>12)&0x0F);                   \
    1.77 +    *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
    1.78 +    *zOut++ = 0x80 + (c & 0x3F);                       \
    1.79 +  }else{                                               \
    1.80 +    *zOut++ = 0xF0 + ((c>>18) & 0x07);                 \
    1.81 +    *zOut++ = 0x80 + ((c>>12) & 0x3F);                 \
    1.82 +    *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
    1.83 +    *zOut++ = 0x80 + (c & 0x3F);                       \
    1.84 +  }                                                    \
    1.85 +}
    1.86 +
    1.87 +#define WRITE_UTF16LE(zOut, c) {                                \
    1.88 +  if( c<=0xFFFF ){                                              \
    1.89 +    *zOut++ = (c&0x00FF);                                       \
    1.90 +    *zOut++ = ((c>>8)&0x00FF);                                  \
    1.91 +  }else{                                                        \
    1.92 +    *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
    1.93 +    *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
    1.94 +    *zOut++ = (c&0x00FF);                                       \
    1.95 +    *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
    1.96 +  }                                                             \
    1.97 +}
    1.98 +
    1.99 +#define WRITE_UTF16BE(zOut, c) {                                \
   1.100 +  if( c<=0xFFFF ){                                              \
   1.101 +    *zOut++ = ((c>>8)&0x00FF);                                  \
   1.102 +    *zOut++ = (c&0x00FF);                                       \
   1.103 +  }else{                                                        \
   1.104 +    *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
   1.105 +    *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
   1.106 +    *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
   1.107 +    *zOut++ = (c&0x00FF);                                       \
   1.108 +  }                                                             \
   1.109 +}
   1.110 +
   1.111 +#define READ_UTF16LE(zIn, c){                                         \
   1.112 +  c = (*zIn++);                                                       \
   1.113 +  c += ((*zIn++)<<8);                                                 \
   1.114 +  if( c>=0xD800 && c<0xE000 ){                                       \
   1.115 +    int c2 = (*zIn++);                                                \
   1.116 +    c2 += ((*zIn++)<<8);                                              \
   1.117 +    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
   1.118 +    if( (c & 0xFFFF0000)==0 ) c = 0xFFFD;                             \
   1.119 +  }                                                                   \
   1.120 +}
   1.121 +
   1.122 +#define READ_UTF16BE(zIn, c){                                         \
   1.123 +  c = ((*zIn++)<<8);                                                  \
   1.124 +  c += (*zIn++);                                                      \
   1.125 +  if( c>=0xD800 && c<0xE000 ){                                       \
   1.126 +    int c2 = ((*zIn++)<<8);                                           \
   1.127 +    c2 += (*zIn++);                                                   \
   1.128 +    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
   1.129 +    if( (c & 0xFFFF0000)==0 ) c = 0xFFFD;                             \
   1.130 +  }                                                                   \
   1.131 +}
   1.132 +
   1.133 +/*
   1.134 +** Translate a single UTF-8 character.  Return the unicode value.
   1.135 +**
   1.136 +** During translation, assume that the byte that zTerm points
   1.137 +** is a 0x00.
   1.138 +**
   1.139 +** Write a pointer to the next unread byte back into *pzNext.
   1.140 +**
   1.141 +** Notes On Invalid UTF-8:
   1.142 +**
   1.143 +**  *  This routine never allows a 7-bit character (0x00 through 0x7f) to
   1.144 +**     be encoded as a multi-byte character.  Any multi-byte character that
   1.145 +**     attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
   1.146 +**
   1.147 +**  *  This routine never allows a UTF16 surrogate value to be encoded.
   1.148 +**     If a multi-byte character attempts to encode a value between
   1.149 +**     0xd800 and 0xe000 then it is rendered as 0xfffd.
   1.150 +**
   1.151 +**  *  Bytes in the range of 0x80 through 0xbf which occur as the first
   1.152 +**     byte of a character are interpreted as single-byte characters
   1.153 +**     and rendered as themselves even though they are technically
   1.154 +**     invalid characters.
   1.155 +**
   1.156 +**  *  This routine accepts an infinite number of different UTF8 encodings
   1.157 +**     for unicode values 0x80 and greater.  It do not change over-length
   1.158 +**     encodings to 0xfffd as some systems recommend.
   1.159 +*/
   1.160 +#define READ_UTF8(zIn, zTerm, c)                           \
   1.161 +  c = *(zIn++);                                            \
   1.162 +  if( c>=0xc0 ){                                           \
   1.163 +    c = sqlite3UtfTrans1[c-0xc0];                          \
   1.164 +    while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
   1.165 +      c = (c<<6) + (0x3f & *(zIn++));                      \
   1.166 +    }                                                      \
   1.167 +    if( c<0x80                                             \
   1.168 +        || (c&0xFFFFF800)==0xD800                          \
   1.169 +        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
   1.170 +  }
   1.171 +int sqlite3Utf8Read(
   1.172 +  const unsigned char *z,         /* First byte of UTF-8 character */
   1.173 +  const unsigned char *zTerm,     /* Pretend this byte is 0x00 */
   1.174 +  const unsigned char **pzNext    /* Write first byte past UTF-8 char here */
   1.175 +){
   1.176 +  int c;
   1.177 +  READ_UTF8(z, zTerm, c);
   1.178 +  *pzNext = z;
   1.179 +  return c;
   1.180 +}
   1.181 +
   1.182 +
   1.183 +
   1.184 +
   1.185 +/*
   1.186 +** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
   1.187 +** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
   1.188 +*/ 
   1.189 +/* #define TRANSLATE_TRACE 1 */
   1.190 +
   1.191 +#ifndef SQLITE_OMIT_UTF16
   1.192 +/*
   1.193 +** This routine transforms the internal text encoding used by pMem to
   1.194 +** desiredEnc. It is an error if the string is already of the desired
   1.195 +** encoding, or if *pMem does not contain a string value.
   1.196 +*/
   1.197 +int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
   1.198 +  int len;                    /* Maximum length of output string in bytes */
   1.199 +  unsigned char *zOut;                  /* Output buffer */
   1.200 +  unsigned char *zIn;                   /* Input iterator */
   1.201 +  unsigned char *zTerm;                 /* End of input */
   1.202 +  unsigned char *z;                     /* Output iterator */
   1.203 +  unsigned int c;
   1.204 +
   1.205 +  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
   1.206 +  assert( pMem->flags&MEM_Str );
   1.207 +  assert( pMem->enc!=desiredEnc );
   1.208 +  assert( pMem->enc!=0 );
   1.209 +  assert( pMem->n>=0 );
   1.210 +
   1.211 +#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
   1.212 +  {
   1.213 +    char zBuf[100];
   1.214 +    sqlite3VdbeMemPrettyPrint(pMem, zBuf);
   1.215 +    fprintf(stderr, "INPUT:  %s\n", zBuf);
   1.216 +  }
   1.217 +#endif
   1.218 +
   1.219 +  /* If the translation is between UTF-16 little and big endian, then 
   1.220 +  ** all that is required is to swap the byte order. This case is handled
   1.221 +  ** differently from the others.
   1.222 +  */
   1.223 +  if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
   1.224 +    u8 temp;
   1.225 +    int rc;
   1.226 +    rc = sqlite3VdbeMemMakeWriteable(pMem);
   1.227 +    if( rc!=SQLITE_OK ){
   1.228 +      assert( rc==SQLITE_NOMEM );
   1.229 +      return SQLITE_NOMEM;
   1.230 +    }
   1.231 +    zIn = (u8*)pMem->z;
   1.232 +    zTerm = &zIn[pMem->n];
   1.233 +    while( zIn<zTerm ){
   1.234 +      temp = *zIn;
   1.235 +      *zIn = *(zIn+1);
   1.236 +      zIn++;
   1.237 +      *zIn++ = temp;
   1.238 +    }
   1.239 +    pMem->enc = desiredEnc;
   1.240 +    goto translate_out;
   1.241 +  }
   1.242 +
   1.243 +  /* Set len to the maximum number of bytes required in the output buffer. */
   1.244 +  if( desiredEnc==SQLITE_UTF8 ){
   1.245 +    /* When converting from UTF-16, the maximum growth results from
   1.246 +    ** translating a 2-byte character to a 4-byte UTF-8 character.
   1.247 +    ** A single byte is required for the output string
   1.248 +    ** nul-terminator.
   1.249 +    */
   1.250 +    len = pMem->n * 2 + 1;
   1.251 +  }else{
   1.252 +    /* When converting from UTF-8 to UTF-16 the maximum growth is caused
   1.253 +    ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16
   1.254 +    ** character. Two bytes are required in the output buffer for the
   1.255 +    ** nul-terminator.
   1.256 +    */
   1.257 +    len = pMem->n * 2 + 2;
   1.258 +  }
   1.259 +
   1.260 +  /* Set zIn to point at the start of the input buffer and zTerm to point 1
   1.261 +  ** byte past the end.
   1.262 +  **
   1.263 +  ** Variable zOut is set to point at the output buffer, space obtained
   1.264 +  ** from sqlite3_malloc().
   1.265 +  */
   1.266 +  zIn = (u8*)pMem->z;
   1.267 +  zTerm = &zIn[pMem->n];
   1.268 +  zOut = sqlite3DbMallocRaw(pMem->db, len);
   1.269 +  if( !zOut ){
   1.270 +    return SQLITE_NOMEM;
   1.271 +  }
   1.272 +  z = zOut;
   1.273 +
   1.274 +  if( pMem->enc==SQLITE_UTF8 ){
   1.275 +    if( desiredEnc==SQLITE_UTF16LE ){
   1.276 +      /* UTF-8 -> UTF-16 Little-endian */
   1.277 +      while( zIn<zTerm ){
   1.278 +        /* c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); */
   1.279 +        READ_UTF8(zIn, zTerm, c);
   1.280 +        WRITE_UTF16LE(z, c);
   1.281 +      }
   1.282 +    }else{
   1.283 +      assert( desiredEnc==SQLITE_UTF16BE );
   1.284 +      /* UTF-8 -> UTF-16 Big-endian */
   1.285 +      while( zIn<zTerm ){
   1.286 +        /* c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); */
   1.287 +        READ_UTF8(zIn, zTerm, c);
   1.288 +        WRITE_UTF16BE(z, c);
   1.289 +      }
   1.290 +    }
   1.291 +    pMem->n = z - zOut;
   1.292 +    *z++ = 0;
   1.293 +  }else{
   1.294 +    assert( desiredEnc==SQLITE_UTF8 );
   1.295 +    if( pMem->enc==SQLITE_UTF16LE ){
   1.296 +      /* UTF-16 Little-endian -> UTF-8 */
   1.297 +      while( zIn<zTerm ){
   1.298 +        READ_UTF16LE(zIn, c); 
   1.299 +        WRITE_UTF8(z, c);
   1.300 +      }
   1.301 +    }else{
   1.302 +      /* UTF-16 Big-endian -> UTF-8 */
   1.303 +      while( zIn<zTerm ){
   1.304 +        READ_UTF16BE(zIn, c); 
   1.305 +        WRITE_UTF8(z, c);
   1.306 +      }
   1.307 +    }
   1.308 +    pMem->n = z - zOut;
   1.309 +  }
   1.310 +  *z = 0;
   1.311 +  assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );
   1.312 +
   1.313 +  sqlite3VdbeMemRelease(pMem);
   1.314 +  pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem);
   1.315 +  pMem->enc = desiredEnc;
   1.316 +  pMem->flags |= (MEM_Term|MEM_Dyn);
   1.317 +  pMem->z = (char*)zOut;
   1.318 +  pMem->zMalloc = pMem->z;
   1.319 +
   1.320 +translate_out:
   1.321 +#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
   1.322 +  {
   1.323 +    char zBuf[100];
   1.324 +    sqlite3VdbeMemPrettyPrint(pMem, zBuf);
   1.325 +    fprintf(stderr, "OUTPUT: %s\n", zBuf);
   1.326 +  }
   1.327 +#endif
   1.328 +  return SQLITE_OK;
   1.329 +}
   1.330 +
   1.331 +/*
   1.332 +** This routine checks for a byte-order mark at the beginning of the 
   1.333 +** UTF-16 string stored in *pMem. If one is present, it is removed and
   1.334 +** the encoding of the Mem adjusted. This routine does not do any
   1.335 +** byte-swapping, it just sets Mem.enc appropriately.
   1.336 +**
   1.337 +** The allocation (static, dynamic etc.) and encoding of the Mem may be
   1.338 +** changed by this function.
   1.339 +*/
   1.340 +int sqlite3VdbeMemHandleBom(Mem *pMem){
   1.341 +  int rc = SQLITE_OK;
   1.342 +  u8 bom = 0;
   1.343 +
   1.344 +  if( pMem->n<0 || pMem->n>1 ){
   1.345 +    u8 b1 = *(u8 *)pMem->z;
   1.346 +    u8 b2 = *(((u8 *)pMem->z) + 1);
   1.347 +    if( b1==0xFE && b2==0xFF ){
   1.348 +      bom = SQLITE_UTF16BE;
   1.349 +    }
   1.350 +    if( b1==0xFF && b2==0xFE ){
   1.351 +      bom = SQLITE_UTF16LE;
   1.352 +    }
   1.353 +  }
   1.354 +  
   1.355 +  if( bom ){
   1.356 +    rc = sqlite3VdbeMemMakeWriteable(pMem);
   1.357 +    if( rc==SQLITE_OK ){
   1.358 +      pMem->n -= 2;
   1.359 +      memmove(pMem->z, &pMem->z[2], pMem->n);
   1.360 +      pMem->z[pMem->n] = '\0';
   1.361 +      pMem->z[pMem->n+1] = '\0';
   1.362 +      pMem->flags |= MEM_Term;
   1.363 +      pMem->enc = bom;
   1.364 +    }
   1.365 +  }
   1.366 +  return rc;
   1.367 +}
   1.368 +#endif /* SQLITE_OMIT_UTF16 */
   1.369 +
   1.370 +/*
   1.371 +** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
   1.372 +** return the number of unicode characters in pZ up to (but not including)
   1.373 +** the first 0x00 byte. If nByte is not less than zero, return the
   1.374 +** number of unicode characters in the first nByte of pZ (or up to 
   1.375 +** the first 0x00, whichever comes first).
   1.376 +*/
   1.377 +int sqlite3Utf8CharLen(const char *zIn, int nByte){
   1.378 +  int r = 0;
   1.379 +  const u8 *z = (const u8*)zIn;
   1.380 +  const u8 *zTerm;
   1.381 +  if( nByte>=0 ){
   1.382 +    zTerm = &z[nByte];
   1.383 +  }else{
   1.384 +    zTerm = (const u8*)(-1);
   1.385 +  }
   1.386 +  assert( z<=zTerm );
   1.387 +  while( *z!=0 && z<zTerm ){
   1.388 +    SQLITE_SKIP_UTF8(z);
   1.389 +    r++;
   1.390 +  }
   1.391 +  return r;
   1.392 +}
   1.393 +
   1.394 +/* This test function is not currently used by the automated test-suite. 
   1.395 +** Hence it is only available in debug builds.
   1.396 +*/
   1.397 +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
   1.398 +/*
   1.399 +** Translate UTF-8 to UTF-8.
   1.400 +**
   1.401 +** This has the effect of making sure that the string is well-formed
   1.402 +** UTF-8.  Miscoded characters are removed.
   1.403 +**
   1.404 +** The translation is done in-place (since it is impossible for the
   1.405 +** correct UTF-8 encoding to be longer than a malformed encoding).
   1.406 +*/
   1.407 +int sqlite3Utf8To8(unsigned char *zIn){
   1.408 +  unsigned char *zOut = zIn;
   1.409 +  unsigned char *zStart = zIn;
   1.410 +  unsigned char *zTerm = &zIn[strlen((char *)zIn)];
   1.411 +  u32 c;
   1.412 +
   1.413 +  while( zIn[0] ){
   1.414 +    c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
   1.415 +    if( c!=0xfffd ){
   1.416 +      WRITE_UTF8(zOut, c);
   1.417 +    }
   1.418 +  }
   1.419 +  *zOut = 0;
   1.420 +  return zOut - zStart;
   1.421 +}
   1.422 +#endif
   1.423 +
   1.424 +#ifndef SQLITE_OMIT_UTF16
   1.425 +/*
   1.426 +** Convert a UTF-16 string in the native encoding into a UTF-8 string.
   1.427 +** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must
   1.428 +** be freed by the calling function.
   1.429 +**
   1.430 +** NULL is returned if there is an allocation error.
   1.431 +*/
   1.432 +char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte){
   1.433 +  Mem m;
   1.434 +  memset(&m, 0, sizeof(m));
   1.435 +  m.db = db;
   1.436 +  sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC);
   1.437 +  sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);
   1.438 +  if( db->mallocFailed ){
   1.439 +    sqlite3VdbeMemRelease(&m);
   1.440 +    m.z = 0;
   1.441 +  }
   1.442 +  assert( (m.flags & MEM_Term)!=0 || db->mallocFailed );
   1.443 +  assert( (m.flags & MEM_Str)!=0 || db->mallocFailed );
   1.444 +  return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z);
   1.445 +}
   1.446 +
   1.447 +/*
   1.448 +** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
   1.449 +** return the number of bytes up to (but not including), the first pair
   1.450 +** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
   1.451 +** then return the number of bytes in the first nChar unicode characters
   1.452 +** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
   1.453 +*/
   1.454 +int sqlite3Utf16ByteLen(const void *zIn, int nChar){
   1.455 +  unsigned int c = 1;
   1.456 +  char const *z = zIn;
   1.457 +  int n = 0;
   1.458 +  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
   1.459 +    /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
   1.460 +    ** and in other parts of this file means that at one branch will
   1.461 +    ** not be covered by coverage testing on any single host. But coverage
   1.462 +    ** will be complete if the tests are run on both a little-endian and 
   1.463 +    ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE
   1.464 +    ** macros are constant at compile time the compiler can determine
   1.465 +    ** which branch will be followed. It is therefore assumed that no runtime
   1.466 +    ** penalty is paid for this "if" statement.
   1.467 +    */
   1.468 +    while( c && ((nChar<0) || n<nChar) ){
   1.469 +      READ_UTF16BE(z, c);
   1.470 +      n++;
   1.471 +    }
   1.472 +  }else{
   1.473 +    while( c && ((nChar<0) || n<nChar) ){
   1.474 +      READ_UTF16LE(z, c);
   1.475 +      n++;
   1.476 +    }
   1.477 +  }
   1.478 +  return (z-(char const *)zIn)-((c==0)?2:0);
   1.479 +}
   1.480 +
   1.481 +#if defined(SQLITE_TEST)
   1.482 +/*
   1.483 +** This routine is called from the TCL test function "translate_selftest".
   1.484 +** It checks that the primitives for serializing and deserializing
   1.485 +** characters in each encoding are inverses of each other.
   1.486 +*/
   1.487 +void sqlite3UtfSelfTest(void){
   1.488 +  unsigned int i, t;
   1.489 +  unsigned char zBuf[20];
   1.490 +  unsigned char *z;
   1.491 +  unsigned char *zTerm;
   1.492 +  int n;
   1.493 +  unsigned int c;
   1.494 +
   1.495 +  for(i=0; i<0x00110000; i++){
   1.496 +    z = zBuf;
   1.497 +    WRITE_UTF8(z, i);
   1.498 +    n = z-zBuf;
   1.499 +    z[0] = 0;
   1.500 +    zTerm = z;
   1.501 +    z = zBuf;
   1.502 +    c = sqlite3Utf8Read(z, zTerm, (const u8**)&z);
   1.503 +    t = i;
   1.504 +    if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
   1.505 +    if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
   1.506 +    assert( c==t );
   1.507 +    assert( (z-zBuf)==n );
   1.508 +  }
   1.509 +  for(i=0; i<0x00110000; i++){
   1.510 +    if( i>=0xD800 && i<0xE000 ) continue;
   1.511 +    z = zBuf;
   1.512 +    WRITE_UTF16LE(z, i);
   1.513 +    n = z-zBuf;
   1.514 +    z[0] = 0;
   1.515 +    z = zBuf;
   1.516 +    READ_UTF16LE(z, c);
   1.517 +    assert( c==i );
   1.518 +    assert( (z-zBuf)==n );
   1.519 +  }
   1.520 +  for(i=0; i<0x00110000; i++){
   1.521 +    if( i>=0xD800 && i<0xE000 ) continue;
   1.522 +    z = zBuf;
   1.523 +    WRITE_UTF16BE(z, i);
   1.524 +    n = z-zBuf;
   1.525 +    z[0] = 0;
   1.526 +    z = zBuf;
   1.527 +    READ_UTF16BE(z, c);
   1.528 +    assert( c==i );
   1.529 +    assert( (z-zBuf)==n );
   1.530 +  }
   1.531 +}
   1.532 +#endif /* SQLITE_TEST */
   1.533 +#endif /* SQLITE_OMIT_UTF16 */