os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclUtf.c
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclUtf.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,1951 @@
1.4 +/*
1.5 + * tclUtf.c --
1.6 + *
1.7 + * Routines for manipulating UTF-8 strings.
1.8 + *
1.9 + * Copyright (c) 1997-1998 Sun Microsystems, Inc.
1.10 + * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved.
1.11 + *
1.12 + * See the file "license.terms" for information on usage and redistribution
1.13 + * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
1.14 + *
1.15 + * RCS: @(#) $Id: tclUtf.c,v 1.30.2.3 2005/09/07 14:35:56 dgp Exp $
1.16 + */
1.17 +
1.18 +#include "tclInt.h"
1.19 +
1.20 +/*
1.21 + * Include the static character classification tables and macros.
1.22 + */
1.23 +
1.24 +#include "tclUniData.c"
1.25 +
1.26 +/*
1.27 + * The following macros are used for fast character category tests. The
1.28 + * x_BITS values are shifted right by the category value to determine whether
1.29 + * the given category is included in the set.
1.30 + */
1.31 +
1.32 +#define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \
1.33 + | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1 << OTHER_LETTER))
1.34 +
1.35 +#define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER)
1.36 +
1.37 +#define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \
1.38 + | (1 << PARAGRAPH_SEPARATOR))
1.39 +
1.40 +#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION)
1.41 +
1.42 +#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \
1.43 + (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
1.44 + (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
1.45 + (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \
1.46 + (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
1.47 + (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
1.48 + (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \
1.49 + (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
1.50 + (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))
1.51 +
1.52 +#define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \
1.53 + (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
1.54 + (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
1.55 + (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION))
1.56 +
1.57 +/*
1.58 + * Unicode characters less than this value are represented by themselves
1.59 + * in UTF-8 strings.
1.60 + */
1.61 +
1.62 +#define UNICODE_SELF 0x80
1.63 +
1.64 +/*
1.65 + * The following structures are used when mapping between Unicode (UCS-2)
1.66 + * and UTF-8.
1.67 + */
1.68 +
1.69 +static CONST unsigned char totalBytes[256] = {
1.70 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1.71 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1.72 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1.73 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1.74 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1.75 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1.76 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1.77 + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
1.78 +#if TCL_UTF_MAX > 3
1.79 + 4,4,4,4,4,4,4,4,
1.80 +#else
1.81 + 1,1,1,1,1,1,1,1,
1.82 +#endif
1.83 +#if TCL_UTF_MAX > 4
1.84 + 5,5,5,5,
1.85 +#else
1.86 + 1,1,1,1,
1.87 +#endif
1.88 +#if TCL_UTF_MAX > 5
1.89 + 6,6,6,6
1.90 +#else
1.91 + 1,1,1,1
1.92 +#endif
1.93 +};
1.94 +
1.95 +/*
1.96 + * Procedures used only in this module.
1.97 + */
1.98 +
1.99 +static int UtfCount _ANSI_ARGS_((int ch));
1.100 +
1.101 +
1.102 +/*
1.103 + *---------------------------------------------------------------------------
1.104 + *
1.105 + * UtfCount --
1.106 + *
1.107 + * Find the number of bytes in the Utf character "ch".
1.108 + *
1.109 + * Results:
1.110 + * The return values is the number of bytes in the Utf character "ch".
1.111 + *
1.112 + * Side effects:
1.113 + * None.
1.114 + *
1.115 + *---------------------------------------------------------------------------
1.116 + */
1.117 +
1.118 +INLINE static int
1.119 +UtfCount(ch)
1.120 + int ch; /* The Tcl_UniChar whose size is returned. */
1.121 +{
1.122 + if ((ch > 0) && (ch < UNICODE_SELF)) {
1.123 + return 1;
1.124 + }
1.125 + if (ch <= 0x7FF) {
1.126 + return 2;
1.127 + }
1.128 + if (ch <= 0xFFFF) {
1.129 + return 3;
1.130 + }
1.131 +#if TCL_UTF_MAX > 3
1.132 + if (ch <= 0x1FFFFF) {
1.133 + return 4;
1.134 + }
1.135 + if (ch <= 0x3FFFFFF) {
1.136 + return 5;
1.137 + }
1.138 + if (ch <= 0x7FFFFFFF) {
1.139 + return 6;
1.140 + }
1.141 +#endif
1.142 + return 3;
1.143 +}
1.144 +
1.145 +/*
1.146 + *---------------------------------------------------------------------------
1.147 + *
1.148 + * Tcl_UniCharToUtf --
1.149 + *
1.150 + * Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the
1.151 + * provided buffer. Equivalent to Plan 9 runetochar().
1.152 + *
1.153 + * Results:
1.154 + * The return values is the number of bytes in the buffer that
1.155 + * were consumed.
1.156 + *
1.157 + * Side effects:
1.158 + * None.
1.159 + *
1.160 + *---------------------------------------------------------------------------
1.161 + */
1.162 +
1.163 +EXPORT_C INLINE int
1.164 +Tcl_UniCharToUtf(ch, str)
1.165 + int ch; /* The Tcl_UniChar to be stored in the
1.166 + * buffer. */
1.167 + char *str; /* Buffer in which the UTF-8 representation
1.168 + * of the Tcl_UniChar is stored. Buffer must
1.169 + * be large enough to hold the UTF-8 character
1.170 + * (at most TCL_UTF_MAX bytes). */
1.171 +{
1.172 + if ((ch > 0) && (ch < UNICODE_SELF)) {
1.173 + str[0] = (char) ch;
1.174 + return 1;
1.175 + }
1.176 + if (ch >= 0) {
1.177 + if (ch <= 0x7FF) {
1.178 + str[1] = (char) ((ch | 0x80) & 0xBF);
1.179 + str[0] = (char) ((ch >> 6) | 0xC0);
1.180 + return 2;
1.181 + }
1.182 + if (ch <= 0xFFFF) {
1.183 + three:
1.184 + str[2] = (char) ((ch | 0x80) & 0xBF);
1.185 + str[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
1.186 + str[0] = (char) ((ch >> 12) | 0xE0);
1.187 + return 3;
1.188 + }
1.189 +
1.190 +#if TCL_UTF_MAX > 3
1.191 + if (ch <= 0x1FFFFF) {
1.192 + str[3] = (char) ((ch | 0x80) & 0xBF);
1.193 + str[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
1.194 + str[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
1.195 + str[0] = (char) ((ch >> 18) | 0xF0);
1.196 + return 4;
1.197 + }
1.198 + if (ch <= 0x3FFFFFF) {
1.199 + str[4] = (char) ((ch | 0x80) & 0xBF);
1.200 + str[3] = (char) (((ch >> 6) | 0x80) & 0xBF);
1.201 + str[2] = (char) (((ch >> 12) | 0x80) & 0xBF);
1.202 + str[1] = (char) (((ch >> 18) | 0x80) & 0xBF);
1.203 + str[0] = (char) ((ch >> 24) | 0xF8);
1.204 + return 5;
1.205 + }
1.206 + if (ch <= 0x7FFFFFFF) {
1.207 + str[5] = (char) ((ch | 0x80) & 0xBF);
1.208 + str[4] = (char) (((ch >> 6) | 0x80) & 0xBF);
1.209 + str[3] = (char) (((ch >> 12) | 0x80) & 0xBF);
1.210 + str[2] = (char) (((ch >> 18) | 0x80) & 0xBF);
1.211 + str[1] = (char) (((ch >> 24) | 0x80) & 0xBF);
1.212 + str[0] = (char) ((ch >> 30) | 0xFC);
1.213 + return 6;
1.214 + }
1.215 +#endif
1.216 + }
1.217 +
1.218 + ch = 0xFFFD;
1.219 + goto three;
1.220 +}
1.221 +
1.222 +/*
1.223 + *---------------------------------------------------------------------------
1.224 + *
1.225 + * Tcl_UniCharToUtfDString --
1.226 + *
1.227 + * Convert the given Unicode string to UTF-8.
1.228 + *
1.229 + * Results:
1.230 + * The return value is a pointer to the UTF-8 representation of the
1.231 + * Unicode string. Storage for the return value is appended to the
1.232 + * end of dsPtr.
1.233 + *
1.234 + * Side effects:
1.235 + * None.
1.236 + *
1.237 + *---------------------------------------------------------------------------
1.238 + */
1.239 +
1.240 +EXPORT_C char *
1.241 +Tcl_UniCharToUtfDString(wString, numChars, dsPtr)
1.242 + CONST Tcl_UniChar *wString; /* Unicode string to convert to UTF-8. */
1.243 + int numChars; /* Length of Unicode string in Tcl_UniChars
1.244 + * (must be >= 0). */
1.245 + Tcl_DString *dsPtr; /* UTF-8 representation of string is
1.246 + * appended to this previously initialized
1.247 + * DString. */
1.248 +{
1.249 + CONST Tcl_UniChar *w, *wEnd;
1.250 + char *p, *string;
1.251 + int oldLength;
1.252 +
1.253 + /*
1.254 + * UTF-8 string length in bytes will be <= Unicode string length *
1.255 + * TCL_UTF_MAX.
1.256 + */
1.257 +
1.258 + oldLength = Tcl_DStringLength(dsPtr);
1.259 + Tcl_DStringSetLength(dsPtr, (oldLength + numChars + 1) * TCL_UTF_MAX);
1.260 + string = Tcl_DStringValue(dsPtr) + oldLength;
1.261 +
1.262 + p = string;
1.263 + wEnd = wString + numChars;
1.264 + for (w = wString; w < wEnd; ) {
1.265 + p += Tcl_UniCharToUtf(*w, p);
1.266 + w++;
1.267 + }
1.268 + Tcl_DStringSetLength(dsPtr, oldLength + (p - string));
1.269 +
1.270 + return string;
1.271 +}
1.272 +
1.273 +/*
1.274 + *---------------------------------------------------------------------------
1.275 + *
1.276 + * Tcl_UtfToUniChar --
1.277 + *
1.278 + * Extract the Tcl_UniChar represented by the UTF-8 string. Bad
1.279 + * UTF-8 sequences are converted to valid Tcl_UniChars and processing
1.280 + * continues. Equivalent to Plan 9 chartorune().
1.281 + *
1.282 + * The caller must ensure that the source buffer is long enough that
1.283 + * this routine does not run off the end and dereference non-existent
1.284 + * memory looking for trail bytes. If the source buffer is known to
1.285 + * be '\0' terminated, this cannot happen. Otherwise, the caller
1.286 + * should call Tcl_UtfCharComplete() before calling this routine to
1.287 + * ensure that enough bytes remain in the string.
1.288 + *
1.289 + * Results:
1.290 + * *chPtr is filled with the Tcl_UniChar, and the return value is the
1.291 + * number of bytes from the UTF-8 string that were consumed.
1.292 + *
1.293 + * Side effects:
1.294 + * None.
1.295 + *
1.296 + *---------------------------------------------------------------------------
1.297 + */
1.298 +
1.299 +EXPORT_C int
1.300 +Tcl_UtfToUniChar(str, chPtr)
1.301 + register CONST char *str; /* The UTF-8 string. */
1.302 + register Tcl_UniChar *chPtr; /* Filled with the Tcl_UniChar represented
1.303 + * by the UTF-8 string. */
1.304 +{
1.305 + register int byte;
1.306 +
1.307 + /*
1.308 + * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones.
1.309 + */
1.310 +
1.311 + byte = *((unsigned char *) str);
1.312 + if (byte < 0xC0) {
1.313 + /*
1.314 + * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
1.315 + * Also treats \0 and naked trail bytes 0x80 to 0xBF as valid
1.316 + * characters representing themselves.
1.317 + */
1.318 +
1.319 + *chPtr = (Tcl_UniChar) byte;
1.320 + return 1;
1.321 + } else if (byte < 0xE0) {
1.322 + if ((str[1] & 0xC0) == 0x80) {
1.323 + /*
1.324 + * Two-byte-character lead-byte followed by a trail-byte.
1.325 + */
1.326 +
1.327 + *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (str[1] & 0x3F));
1.328 + return 2;
1.329 + }
1.330 + /*
1.331 + * A two-byte-character lead-byte not followed by trail-byte
1.332 + * represents itself.
1.333 + */
1.334 +
1.335 + *chPtr = (Tcl_UniChar) byte;
1.336 + return 1;
1.337 + } else if (byte < 0xF0) {
1.338 + if (((str[1] & 0xC0) == 0x80) && ((str[2] & 0xC0) == 0x80)) {
1.339 + /*
1.340 + * Three-byte-character lead byte followed by two trail bytes.
1.341 + */
1.342 +
1.343 + *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
1.344 + | ((str[1] & 0x3F) << 6) | (str[2] & 0x3F));
1.345 + return 3;
1.346 + }
1.347 + /*
1.348 + * A three-byte-character lead-byte not followed by two trail-bytes
1.349 + * represents itself.
1.350 + */
1.351 +
1.352 + *chPtr = (Tcl_UniChar) byte;
1.353 + return 1;
1.354 + }
1.355 +#if TCL_UTF_MAX > 3
1.356 + else {
1.357 + int ch, total, trail;
1.358 +
1.359 + total = totalBytes[byte];
1.360 + trail = total - 1;
1.361 + if (trail > 0) {
1.362 + ch = byte & (0x3F >> trail);
1.363 + do {
1.364 + str++;
1.365 + if ((*str & 0xC0) != 0x80) {
1.366 + *chPtr = byte;
1.367 + return 1;
1.368 + }
1.369 + ch <<= 6;
1.370 + ch |= (*str & 0x3F);
1.371 + trail--;
1.372 + } while (trail > 0);
1.373 + *chPtr = ch;
1.374 + return total;
1.375 + }
1.376 + }
1.377 +#endif
1.378 +
1.379 + *chPtr = (Tcl_UniChar) byte;
1.380 + return 1;
1.381 +}
1.382 +
1.383 +/*
1.384 + *---------------------------------------------------------------------------
1.385 + *
1.386 + * Tcl_UtfToUniCharDString --
1.387 + *
1.388 + * Convert the UTF-8 string to Unicode.
1.389 + *
1.390 + * Results:
1.391 + * The return value is a pointer to the Unicode representation of the
1.392 + * UTF-8 string. Storage for the return value is appended to the
1.393 + * end of dsPtr. The Unicode string is terminated with a Unicode
1.394 + * NULL character.
1.395 + *
1.396 + * Side effects:
1.397 + * None.
1.398 + *
1.399 + *---------------------------------------------------------------------------
1.400 + */
1.401 +
1.402 +EXPORT_C Tcl_UniChar *
1.403 +Tcl_UtfToUniCharDString(string, length, dsPtr)
1.404 + CONST char *string; /* UTF-8 string to convert to Unicode. */
1.405 + int length; /* Length of UTF-8 string in bytes, or -1
1.406 + * for strlen(). */
1.407 + Tcl_DString *dsPtr; /* Unicode representation of string is
1.408 + * appended to this previously initialized
1.409 + * DString. */
1.410 +{
1.411 + Tcl_UniChar *w, *wString;
1.412 + CONST char *p, *end;
1.413 + int oldLength;
1.414 +
1.415 + if (length < 0) {
1.416 + length = strlen(string);
1.417 + }
1.418 +
1.419 + /*
1.420 + * Unicode string length in Tcl_UniChars will be <= UTF-8 string length
1.421 + * in bytes.
1.422 + */
1.423 +
1.424 + oldLength = Tcl_DStringLength(dsPtr);
1.425 + Tcl_DStringSetLength(dsPtr,
1.426 + (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));
1.427 + wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
1.428 +
1.429 + w = wString;
1.430 + end = string + length;
1.431 + for (p = string; p < end; ) {
1.432 + p += TclUtfToUniChar(p, w);
1.433 + w++;
1.434 + }
1.435 + *w = '\0';
1.436 + Tcl_DStringSetLength(dsPtr,
1.437 + (oldLength + ((char *) w - (char *) wString)));
1.438 +
1.439 + return wString;
1.440 +}
1.441 +
1.442 +/*
1.443 + *---------------------------------------------------------------------------
1.444 + *
1.445 + * Tcl_UtfCharComplete --
1.446 + *
1.447 + * Determine if the UTF-8 string of the given length is long enough
1.448 + * to be decoded by Tcl_UtfToUniChar(). This does not ensure that the
1.449 + * UTF-8 string is properly formed. Equivalent to Plan 9 fullrune().
1.450 + *
1.451 + * Results:
1.452 + * The return value is 0 if the string is not long enough, non-zero
1.453 + * otherwise.
1.454 + *
1.455 + * Side effects:
1.456 + * None.
1.457 + *
1.458 + *---------------------------------------------------------------------------
1.459 + */
1.460 +
1.461 +EXPORT_C int
1.462 +Tcl_UtfCharComplete(str, len)
1.463 + CONST char *str; /* String to check if first few bytes
1.464 + * contain a complete UTF-8 character. */
1.465 + int len; /* Length of above string in bytes. */
1.466 +{
1.467 + int ch;
1.468 +
1.469 + ch = *((unsigned char *) str);
1.470 + return len >= totalBytes[ch];
1.471 +}
1.472 +
1.473 +/*
1.474 + *---------------------------------------------------------------------------
1.475 + *
1.476 + * Tcl_NumUtfChars --
1.477 + *
1.478 + * Returns the number of characters (not bytes) in the UTF-8 string,
1.479 + * not including the terminating NULL byte. This is equivalent to
1.480 + * Plan 9 utflen() and utfnlen().
1.481 + *
1.482 + * Results:
1.483 + * As above.
1.484 + *
1.485 + * Side effects:
1.486 + * None.
1.487 + *
1.488 + *---------------------------------------------------------------------------
1.489 + */
1.490 +
1.491 +EXPORT_C int
1.492 +Tcl_NumUtfChars(str, len)
1.493 + register CONST char *str; /* The UTF-8 string to measure. */
1.494 + int len; /* The length of the string in bytes, or -1
1.495 + * for strlen(string). */
1.496 +{
1.497 + Tcl_UniChar ch;
1.498 + register Tcl_UniChar *chPtr = &ch;
1.499 + register int i;
1.500 +
1.501 + /*
1.502 + * The separate implementations are faster.
1.503 + *
1.504 + * Since this is a time-sensitive function, we also do the check for
1.505 + * the single-byte char case specially.
1.506 + */
1.507 +
1.508 + i = 0;
1.509 + if (len < 0) {
1.510 + while (*str != '\0') {
1.511 + str += TclUtfToUniChar(str, chPtr);
1.512 + i++;
1.513 + }
1.514 + } else {
1.515 + register int n;
1.516 +
1.517 + while (len > 0) {
1.518 + if (UCHAR(*str) < 0xC0) {
1.519 + len--;
1.520 + str++;
1.521 + } else {
1.522 + n = Tcl_UtfToUniChar(str, chPtr);
1.523 + len -= n;
1.524 + str += n;
1.525 + }
1.526 + i++;
1.527 + }
1.528 + }
1.529 + return i;
1.530 +}
1.531 +
1.532 +/*
1.533 + *---------------------------------------------------------------------------
1.534 + *
1.535 + * Tcl_UtfFindFirst --
1.536 + *
1.537 + * Returns a pointer to the first occurance of the given Tcl_UniChar
1.538 + * in the NULL-terminated UTF-8 string. The NULL terminator is
1.539 + * considered part of the UTF-8 string. Equivalent to Plan 9
1.540 + * utfrune().
1.541 + *
1.542 + * Results:
1.543 + * As above. If the Tcl_UniChar does not exist in the given string,
1.544 + * the return value is NULL.
1.545 + *
1.546 + * Side effects:
1.547 + * None.
1.548 + *
1.549 + *---------------------------------------------------------------------------
1.550 + */
1.551 +EXPORT_C CONST char *
1.552 +Tcl_UtfFindFirst(string, ch)
1.553 + CONST char *string; /* The UTF-8 string to be searched. */
1.554 + int ch; /* The Tcl_UniChar to search for. */
1.555 +{
1.556 + int len;
1.557 + Tcl_UniChar find;
1.558 +
1.559 + while (1) {
1.560 + len = TclUtfToUniChar(string, &find);
1.561 + if (find == ch) {
1.562 + return string;
1.563 + }
1.564 + if (*string == '\0') {
1.565 + return NULL;
1.566 + }
1.567 + string += len;
1.568 + }
1.569 +}
1.570 +
1.571 +/*
1.572 + *---------------------------------------------------------------------------
1.573 + *
1.574 + * Tcl_UtfFindLast --
1.575 + *
1.576 + * Returns a pointer to the last occurance of the given Tcl_UniChar
1.577 + * in the NULL-terminated UTF-8 string. The NULL terminator is
1.578 + * considered part of the UTF-8 string. Equivalent to Plan 9
1.579 + * utfrrune().
1.580 + *
1.581 + * Results:
1.582 + * As above. If the Tcl_UniChar does not exist in the given string,
1.583 + * the return value is NULL.
1.584 + *
1.585 + * Side effects:
1.586 + * None.
1.587 + *
1.588 + *---------------------------------------------------------------------------
1.589 + */
1.590 +
1.591 +EXPORT_C CONST char *
1.592 +Tcl_UtfFindLast(string, ch)
1.593 + CONST char *string; /* The UTF-8 string to be searched. */
1.594 + int ch; /* The Tcl_UniChar to search for. */
1.595 +{
1.596 + int len;
1.597 + Tcl_UniChar find;
1.598 + CONST char *last;
1.599 +
1.600 + last = NULL;
1.601 + while (1) {
1.602 + len = TclUtfToUniChar(string, &find);
1.603 + if (find == ch) {
1.604 + last = string;
1.605 + }
1.606 + if (*string == '\0') {
1.607 + break;
1.608 + }
1.609 + string += len;
1.610 + }
1.611 + return last;
1.612 +}
1.613 +
1.614 +/*
1.615 + *---------------------------------------------------------------------------
1.616 + *
1.617 + * Tcl_UtfNext --
1.618 + *
1.619 + * Given a pointer to some current location in a UTF-8 string,
1.620 + * move forward one character. The caller must ensure that they
1.621 + * are not asking for the next character after the last character
1.622 + * in the string.
1.623 + *
1.624 + * Results:
1.625 + * The return value is the pointer to the next character in
1.626 + * the UTF-8 string.
1.627 + *
1.628 + * Side effects:
1.629 + * None.
1.630 + *
1.631 + *---------------------------------------------------------------------------
1.632 + */
1.633 +
1.634 +EXPORT_C CONST char *
1.635 +Tcl_UtfNext(str)
1.636 + CONST char *str; /* The current location in the string. */
1.637 +{
1.638 + Tcl_UniChar ch;
1.639 +
1.640 + return str + TclUtfToUniChar(str, &ch);
1.641 +}
1.642 +
1.643 +/*
1.644 + *---------------------------------------------------------------------------
1.645 + *
1.646 + * Tcl_UtfPrev --
1.647 + *
1.648 + * Given a pointer to some current location in a UTF-8 string,
1.649 + * move backwards one character. This works correctly when the
1.650 + * pointer is in the middle of a UTF-8 character.
1.651 + *
1.652 + * Results:
1.653 + * The return value is a pointer to the previous character in the
1.654 + * UTF-8 string. If the current location was already at the
1.655 + * beginning of the string, the return value will also be a
1.656 + * pointer to the beginning of the string.
1.657 + *
1.658 + * Side effects:
1.659 + * None.
1.660 + *
1.661 + *---------------------------------------------------------------------------
1.662 + */
1.663 +
1.664 +EXPORT_C CONST char *
1.665 +Tcl_UtfPrev(str, start)
1.666 + CONST char *str; /* The current location in the string. */
1.667 + CONST char *start; /* Pointer to the beginning of the
1.668 + * string, to avoid going backwards too
1.669 + * far. */
1.670 +{
1.671 + CONST char *look;
1.672 + int i, byte;
1.673 +
1.674 + str--;
1.675 + look = str;
1.676 + for (i = 0; i < TCL_UTF_MAX; i++) {
1.677 + if (look < start) {
1.678 + if (str < start) {
1.679 + str = start;
1.680 + }
1.681 + break;
1.682 + }
1.683 + byte = *((unsigned char *) look);
1.684 + if (byte < 0x80) {
1.685 + break;
1.686 + }
1.687 + if (byte >= 0xC0) {
1.688 + return look;
1.689 + }
1.690 + look--;
1.691 + }
1.692 + return str;
1.693 +}
1.694 +
1.695 +/*
1.696 + *---------------------------------------------------------------------------
1.697 + *
1.698 + * Tcl_UniCharAtIndex --
1.699 + *
1.700 + * Returns the Unicode character represented at the specified
1.701 + * character (not byte) position in the UTF-8 string.
1.702 + *
1.703 + * Results:
1.704 + * As above.
1.705 + *
1.706 + * Side effects:
1.707 + * None.
1.708 + *
1.709 + *---------------------------------------------------------------------------
1.710 + */
1.711 +
1.712 +EXPORT_C Tcl_UniChar
1.713 +Tcl_UniCharAtIndex(src, index)
1.714 + register CONST char *src; /* The UTF-8 string to dereference. */
1.715 + register int index; /* The position of the desired character. */
1.716 +{
1.717 + Tcl_UniChar ch;
1.718 +
1.719 + while (index >= 0) {
1.720 + index--;
1.721 + src += TclUtfToUniChar(src, &ch);
1.722 + }
1.723 + return ch;
1.724 +}
1.725 +
1.726 +/*
1.727 + *---------------------------------------------------------------------------
1.728 + *
1.729 + * Tcl_UtfAtIndex --
1.730 + *
1.731 + * Returns a pointer to the specified character (not byte) position
1.732 + * in the UTF-8 string.
1.733 + *
1.734 + * Results:
1.735 + * As above.
1.736 + *
1.737 + * Side effects:
1.738 + * None.
1.739 + *
1.740 + *---------------------------------------------------------------------------
1.741 + */
1.742 +
1.743 +EXPORT_C CONST char *
1.744 +Tcl_UtfAtIndex(src, index)
1.745 + register CONST char *src; /* The UTF-8 string. */
1.746 + register int index; /* The position of the desired character. */
1.747 +{
1.748 + Tcl_UniChar ch;
1.749 +
1.750 + while (index > 0) {
1.751 + index--;
1.752 + src += TclUtfToUniChar(src, &ch);
1.753 + }
1.754 + return src;
1.755 +}
1.756 +
1.757 +/*
1.758 + *---------------------------------------------------------------------------
1.759 + *
1.760 + * Tcl_UtfBackslash --
1.761 + *
1.762 + * Figure out how to handle a backslash sequence.
1.763 + *
1.764 + * Results:
1.765 + * Stores the bytes represented by the backslash sequence in dst and
1.766 + * returns the number of bytes written to dst. At most TCL_UTF_MAX
1.767 + * bytes are written to dst; dst must have been large enough to accept
1.768 + * those bytes. If readPtr isn't NULL then it is filled in with a
1.769 + * count of the number of bytes in the backslash sequence.
1.770 + *
1.771 + * Side effects:
1.772 + * The maximum number of bytes it takes to represent a Unicode
1.773 + * character in UTF-8 is guaranteed to be less than the number of
1.774 + * bytes used to express the backslash sequence that represents
1.775 + * that Unicode character. If the target buffer into which the
1.776 + * caller is going to store the bytes that represent the Unicode
1.777 + * character is at least as large as the source buffer from which
1.778 + * the backslashed sequence was extracted, no buffer overruns should
1.779 + * occur.
1.780 + *
1.781 + *---------------------------------------------------------------------------
1.782 + */
1.783 +
1.784 +EXPORT_C int
1.785 +Tcl_UtfBackslash(src, readPtr, dst)
1.786 + CONST char *src; /* Points to the backslash character of
1.787 + * a backslash sequence. */
1.788 + int *readPtr; /* Fill in with number of characters read
1.789 + * from src, unless NULL. */
1.790 + char *dst; /* Filled with the bytes represented by the
1.791 + * backslash sequence. */
1.792 +{
1.793 +#define LINE_LENGTH 128
1.794 + int numRead;
1.795 + int result;
1.796 +
1.797 + result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);
1.798 + if (numRead == LINE_LENGTH) {
1.799 + /* We ate a whole line. Pay the price of a strlen() */
1.800 + result = TclParseBackslash(src, (int)strlen(src), &numRead, dst);
1.801 + }
1.802 + if (readPtr != NULL) {
1.803 + *readPtr = numRead;
1.804 + }
1.805 + return result;
1.806 +}
1.807 +
1.808 +/*
1.809 + *----------------------------------------------------------------------
1.810 + *
1.811 + * Tcl_UtfToUpper --
1.812 + *
1.813 + * Convert lowercase characters to uppercase characters in a UTF
1.814 + * string in place. The conversion may shrink the UTF string.
1.815 + *
1.816 + * Results:
1.817 + * Returns the number of bytes in the resulting string
1.818 + * excluding the trailing null.
1.819 + *
1.820 + * Side effects:
1.821 + * Writes a terminating null after the last converted character.
1.822 + *
1.823 + *----------------------------------------------------------------------
1.824 + */
1.825 +
1.826 +EXPORT_C int
1.827 +Tcl_UtfToUpper(str)
1.828 + char *str; /* String to convert in place. */
1.829 +{
1.830 + Tcl_UniChar ch, upChar;
1.831 + char *src, *dst;
1.832 + int bytes;
1.833 +
1.834 + /*
1.835 + * Iterate over the string until we hit the terminating null.
1.836 + */
1.837 +
1.838 + src = dst = str;
1.839 + while (*src) {
1.840 + bytes = TclUtfToUniChar(src, &ch);
1.841 + upChar = Tcl_UniCharToUpper(ch);
1.842 +
1.843 + /*
1.844 + * To keep badly formed Utf strings from getting inflated by
1.845 + * the conversion (thereby causing a segfault), only copy the
1.846 + * upper case char to dst if its size is <= the original char.
1.847 + */
1.848 +
1.849 + if (bytes < UtfCount(upChar)) {
1.850 + memcpy(dst, src, (size_t) bytes);
1.851 + dst += bytes;
1.852 + } else {
1.853 + dst += Tcl_UniCharToUtf(upChar, dst);
1.854 + }
1.855 + src += bytes;
1.856 + }
1.857 + *dst = '\0';
1.858 + return (dst - str);
1.859 +}
1.860 +
1.861 +/*
1.862 + *----------------------------------------------------------------------
1.863 + *
1.864 + * Tcl_UtfToLower --
1.865 + *
1.866 + * Convert uppercase characters to lowercase characters in a UTF
1.867 + * string in place. The conversion may shrink the UTF string.
1.868 + *
1.869 + * Results:
1.870 + * Returns the number of bytes in the resulting string
1.871 + * excluding the trailing null.
1.872 + *
1.873 + * Side effects:
1.874 + * Writes a terminating null after the last converted character.
1.875 + *
1.876 + *----------------------------------------------------------------------
1.877 + */
1.878 +
1.879 +EXPORT_C int
1.880 +Tcl_UtfToLower(str)
1.881 + char *str; /* String to convert in place. */
1.882 +{
1.883 + Tcl_UniChar ch, lowChar;
1.884 + char *src, *dst;
1.885 + int bytes;
1.886 +
1.887 + /*
1.888 + * Iterate over the string until we hit the terminating null.
1.889 + */
1.890 +
1.891 + src = dst = str;
1.892 + while (*src) {
1.893 + bytes = TclUtfToUniChar(src, &ch);
1.894 + lowChar = Tcl_UniCharToLower(ch);
1.895 +
1.896 + /*
1.897 + * To keep badly formed Utf strings from getting inflated by
1.898 + * the conversion (thereby causing a segfault), only copy the
1.899 + * lower case char to dst if its size is <= the original char.
1.900 + */
1.901 +
1.902 + if (bytes < UtfCount(lowChar)) {
1.903 + memcpy(dst, src, (size_t) bytes);
1.904 + dst += bytes;
1.905 + } else {
1.906 + dst += Tcl_UniCharToUtf(lowChar, dst);
1.907 + }
1.908 + src += bytes;
1.909 + }
1.910 + *dst = '\0';
1.911 + return (dst - str);
1.912 +}
1.913 +
1.914 +/*
1.915 + *----------------------------------------------------------------------
1.916 + *
1.917 + * Tcl_UtfToTitle --
1.918 + *
1.919 + * Changes the first character of a UTF string to title case or
1.920 + * uppercase and the rest of the string to lowercase. The
1.921 + * conversion happens in place and may shrink the UTF string.
1.922 + *
1.923 + * Results:
1.924 + * Returns the number of bytes in the resulting string
1.925 + * excluding the trailing null.
1.926 + *
1.927 + * Side effects:
1.928 + * Writes a terminating null after the last converted character.
1.929 + *
1.930 + *----------------------------------------------------------------------
1.931 + */
1.932 +
1.933 +EXPORT_C int
1.934 +Tcl_UtfToTitle(str)
1.935 + char *str; /* String to convert in place. */
1.936 +{
1.937 + Tcl_UniChar ch, titleChar, lowChar;
1.938 + char *src, *dst;
1.939 + int bytes;
1.940 +
1.941 + /*
1.942 + * Capitalize the first character and then lowercase the rest of the
1.943 + * characters until we get to a null.
1.944 + */
1.945 +
1.946 + src = dst = str;
1.947 +
1.948 + if (*src) {
1.949 + bytes = TclUtfToUniChar(src, &ch);
1.950 + titleChar = Tcl_UniCharToTitle(ch);
1.951 +
1.952 + if (bytes < UtfCount(titleChar)) {
1.953 + memcpy(dst, src, (size_t) bytes);
1.954 + dst += bytes;
1.955 + } else {
1.956 + dst += Tcl_UniCharToUtf(titleChar, dst);
1.957 + }
1.958 + src += bytes;
1.959 + }
1.960 + while (*src) {
1.961 + bytes = TclUtfToUniChar(src, &ch);
1.962 + lowChar = Tcl_UniCharToLower(ch);
1.963 +
1.964 + if (bytes < UtfCount(lowChar)) {
1.965 + memcpy(dst, src, (size_t) bytes);
1.966 + dst += bytes;
1.967 + } else {
1.968 + dst += Tcl_UniCharToUtf(lowChar, dst);
1.969 + }
1.970 + src += bytes;
1.971 + }
1.972 + *dst = '\0';
1.973 + return (dst - str);
1.974 +}
1.975 +
1.976 +/*
1.977 + *----------------------------------------------------------------------
1.978 + *
1.979 + * TclpUtfNcmp2 --
1.980 + *
1.981 + * Compare at most n bytes of utf-8 strings cs and ct. Both cs
1.982 + * and ct are assumed to be at least n bytes long.
1.983 + *
1.984 + * Results:
1.985 + * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
1.986 + *
1.987 + * Side effects:
1.988 + * None.
1.989 + *
1.990 + *----------------------------------------------------------------------
1.991 + */
1.992 +
1.993 +int
1.994 +TclpUtfNcmp2(cs, ct, n)
1.995 + CONST char *cs; /* UTF string to compare to ct. */
1.996 + CONST char *ct; /* UTF string cs is compared to. */
1.997 + unsigned long n; /* Number of *bytes* to compare. */
1.998 +{
1.999 + /*
1.1000 + * We can't simply call 'memcmp(cs, ct, n);' because we need to check
1.1001 + * for Tcl's \xC0\x80 non-utf-8 null encoding.
1.1002 + * Otherwise utf-8 lexes fine in the strcmp manner.
1.1003 + */
1.1004 + register int result = 0;
1.1005 +
1.1006 + for ( ; n != 0; n--, cs++, ct++) {
1.1007 + if (*cs != *ct) {
1.1008 + result = UCHAR(*cs) - UCHAR(*ct);
1.1009 + break;
1.1010 + }
1.1011 + }
1.1012 + if (n && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) {
1.1013 + unsigned char c1, c2;
1.1014 + c1 = ((UCHAR(*cs) == 0xC0) && (UCHAR(cs[1]) == 0x80)) ? 0 : UCHAR(*cs);
1.1015 + c2 = ((UCHAR(*ct) == 0xC0) && (UCHAR(ct[1]) == 0x80)) ? 0 : UCHAR(*ct);
1.1016 + result = (c1 - c2);
1.1017 + }
1.1018 + return result;
1.1019 +}
1.1020 +
1.1021 +/*
1.1022 + *----------------------------------------------------------------------
1.1023 + *
1.1024 + * Tcl_UtfNcmp --
1.1025 + *
1.1026 + * Compare at most n UTF chars of string cs to string ct. Both cs
1.1027 + * and ct are assumed to be at least n UTF chars long.
1.1028 + *
1.1029 + * Results:
1.1030 + * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
1.1031 + *
1.1032 + * Side effects:
1.1033 + * None.
1.1034 + *
1.1035 + *----------------------------------------------------------------------
1.1036 + */
1.1037 +
1.1038 +EXPORT_C int
1.1039 +Tcl_UtfNcmp(cs, ct, n)
1.1040 + CONST char *cs; /* UTF string to compare to ct. */
1.1041 + CONST char *ct; /* UTF string cs is compared to. */
1.1042 + unsigned long n; /* Number of UTF chars to compare. */
1.1043 +{
1.1044 + Tcl_UniChar ch1, ch2;
1.1045 + /*
1.1046 + * Cannot use 'memcmp(cs, ct, n);' as byte representation of
1.1047 + * \u0000 (the pair of bytes 0xc0,0x80) is larger than byte
1.1048 + * representation of \u0001 (the byte 0x01.)
1.1049 + */
1.1050 + while (n-- > 0) {
1.1051 + /*
1.1052 + * n must be interpreted as chars, not bytes.
1.1053 + * This should be called only when both strings are of
1.1054 + * at least n chars long (no need for \0 check)
1.1055 + */
1.1056 + cs += TclUtfToUniChar(cs, &ch1);
1.1057 + ct += TclUtfToUniChar(ct, &ch2);
1.1058 + if (ch1 != ch2) {
1.1059 + return (ch1 - ch2);
1.1060 + }
1.1061 + }
1.1062 + return 0;
1.1063 +}
1.1064 +
1.1065 +/*
1.1066 + *----------------------------------------------------------------------
1.1067 + *
1.1068 + * Tcl_UtfNcasecmp --
1.1069 + *
1.1070 + * Compare at most n UTF chars of string cs to string ct case
1.1071 + * insensitive. Both cs and ct are assumed to be at least n
1.1072 + * UTF chars long.
1.1073 + *
1.1074 + * Results:
1.1075 + * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
1.1076 + *
1.1077 + * Side effects:
1.1078 + * None.
1.1079 + *
1.1080 + *----------------------------------------------------------------------
1.1081 + */
1.1082 +
1.1083 +EXPORT_C int
1.1084 +Tcl_UtfNcasecmp(cs, ct, n)
1.1085 + CONST char *cs; /* UTF string to compare to ct. */
1.1086 + CONST char *ct; /* UTF string cs is compared to. */
1.1087 + unsigned long n; /* Number of UTF chars to compare. */
1.1088 +{
1.1089 + Tcl_UniChar ch1, ch2;
1.1090 + while (n-- > 0) {
1.1091 + /*
1.1092 + * n must be interpreted as chars, not bytes.
1.1093 + * This should be called only when both strings are of
1.1094 + * at least n chars long (no need for \0 check)
1.1095 + */
1.1096 + cs += TclUtfToUniChar(cs, &ch1);
1.1097 + ct += TclUtfToUniChar(ct, &ch2);
1.1098 + if (ch1 != ch2) {
1.1099 + ch1 = Tcl_UniCharToLower(ch1);
1.1100 + ch2 = Tcl_UniCharToLower(ch2);
1.1101 + if (ch1 != ch2) {
1.1102 + return (ch1 - ch2);
1.1103 + }
1.1104 + }
1.1105 + }
1.1106 + return 0;
1.1107 +}
1.1108 +
1.1109 +/*
1.1110 + *----------------------------------------------------------------------
1.1111 + *
1.1112 + * Tcl_UniCharToUpper --
1.1113 + *
1.1114 + * Compute the uppercase equivalent of the given Unicode character.
1.1115 + *
1.1116 + * Results:
1.1117 + * Returns the uppercase Unicode character.
1.1118 + *
1.1119 + * Side effects:
1.1120 + * None.
1.1121 + *
1.1122 + *----------------------------------------------------------------------
1.1123 + */
1.1124 +
1.1125 +EXPORT_C Tcl_UniChar
1.1126 +Tcl_UniCharToUpper(ch)
1.1127 + int ch; /* Unicode character to convert. */
1.1128 +{
1.1129 + int info = GetUniCharInfo(ch);
1.1130 +
1.1131 + if (GetCaseType(info) & 0x04) {
1.1132 + return (Tcl_UniChar) (ch - GetDelta(info));
1.1133 + } else {
1.1134 + return ch;
1.1135 + }
1.1136 +}
1.1137 +
1.1138 +/*
1.1139 + *----------------------------------------------------------------------
1.1140 + *
1.1141 + * Tcl_UniCharToLower --
1.1142 + *
1.1143 + * Compute the lowercase equivalent of the given Unicode character.
1.1144 + *
1.1145 + * Results:
1.1146 + * Returns the lowercase Unicode character.
1.1147 + *
1.1148 + * Side effects:
1.1149 + * None.
1.1150 + *
1.1151 + *----------------------------------------------------------------------
1.1152 + */
1.1153 +
1.1154 +EXPORT_C Tcl_UniChar
1.1155 +Tcl_UniCharToLower(ch)
1.1156 + int ch; /* Unicode character to convert. */
1.1157 +{
1.1158 + int info = GetUniCharInfo(ch);
1.1159 +
1.1160 + if (GetCaseType(info) & 0x02) {
1.1161 + return (Tcl_UniChar) (ch + GetDelta(info));
1.1162 + } else {
1.1163 + return ch;
1.1164 + }
1.1165 +}
1.1166 +
1.1167 +/*
1.1168 + *----------------------------------------------------------------------
1.1169 + *
1.1170 + * Tcl_UniCharToTitle --
1.1171 + *
1.1172 + * Compute the titlecase equivalent of the given Unicode character.
1.1173 + *
1.1174 + * Results:
1.1175 + * Returns the titlecase Unicode character.
1.1176 + *
1.1177 + * Side effects:
1.1178 + * None.
1.1179 + *
1.1180 + *----------------------------------------------------------------------
1.1181 + */
1.1182 +
1.1183 +EXPORT_C Tcl_UniChar
1.1184 +Tcl_UniCharToTitle(ch)
1.1185 + int ch; /* Unicode character to convert. */
1.1186 +{
1.1187 + int info = GetUniCharInfo(ch);
1.1188 + int mode = GetCaseType(info);
1.1189 +
1.1190 + if (mode & 0x1) {
1.1191 + /*
1.1192 + * Subtract or add one depending on the original case.
1.1193 + */
1.1194 +
1.1195 + return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1));
1.1196 + } else if (mode == 0x4) {
1.1197 + return (Tcl_UniChar) (ch - GetDelta(info));
1.1198 + } else {
1.1199 + return ch;
1.1200 + }
1.1201 +}
1.1202 +
1.1203 +/*
1.1204 + *----------------------------------------------------------------------
1.1205 + *
1.1206 + * Tcl_UniCharLen --
1.1207 + *
1.1208 + * Find the length of a UniChar string. The str input must be null
1.1209 + * terminated.
1.1210 + *
1.1211 + * Results:
1.1212 + * Returns the length of str in UniChars (not bytes).
1.1213 + *
1.1214 + * Side effects:
1.1215 + * None.
1.1216 + *
1.1217 + *----------------------------------------------------------------------
1.1218 + */
1.1219 +
1.1220 +EXPORT_C int
1.1221 +Tcl_UniCharLen(str)
1.1222 + CONST Tcl_UniChar *str; /* Unicode string to find length of. */
1.1223 +{
1.1224 + int len = 0;
1.1225 +
1.1226 + while (*str != '\0') {
1.1227 + len++;
1.1228 + str++;
1.1229 + }
1.1230 + return len;
1.1231 +}
1.1232 +
1.1233 +/*
1.1234 + *----------------------------------------------------------------------
1.1235 + *
1.1236 + * Tcl_UniCharNcmp --
1.1237 + *
1.1238 + * Compare at most n unichars of string cs to string ct. Both cs
1.1239 + * and ct are assumed to be at least n unichars long.
1.1240 + *
1.1241 + * Results:
1.1242 + * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
1.1243 + *
1.1244 + * Side effects:
1.1245 + * None.
1.1246 + *
1.1247 + *----------------------------------------------------------------------
1.1248 + */
1.1249 +
1.1250 +EXPORT_C int
1.1251 +Tcl_UniCharNcmp(cs, ct, n)
1.1252 + CONST Tcl_UniChar *cs; /* Unicode string to compare to ct. */
1.1253 + CONST Tcl_UniChar *ct; /* Unicode string cs is compared to. */
1.1254 + unsigned long n; /* Number of unichars to compare. */
1.1255 +{
1.1256 +#ifdef WORDS_BIGENDIAN
1.1257 + /*
1.1258 + * We are definitely on a big-endian machine; memcmp() is safe
1.1259 + */
1.1260 + return memcmp(cs, ct, n*sizeof(Tcl_UniChar));
1.1261 +
1.1262 +#else /* !WORDS_BIGENDIAN */
1.1263 + /*
1.1264 + * We can't simply call memcmp() because that is not lexically correct.
1.1265 + */
1.1266 + for ( ; n != 0; cs++, ct++, n--) {
1.1267 + if (*cs != *ct) {
1.1268 + return (*cs - *ct);
1.1269 + }
1.1270 + }
1.1271 + return 0;
1.1272 +#endif /* WORDS_BIGENDIAN */
1.1273 +}
1.1274 +
1.1275 +/*
1.1276 + *----------------------------------------------------------------------
1.1277 + *
1.1278 + * Tcl_UniCharNcasecmp --
1.1279 + *
1.1280 + * Compare at most n unichars of string cs to string ct case
1.1281 + * insensitive. Both cs and ct are assumed to be at least n
1.1282 + * unichars long.
1.1283 + *
1.1284 + * Results:
1.1285 + * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
1.1286 + *
1.1287 + * Side effects:
1.1288 + * None.
1.1289 + *
1.1290 + *----------------------------------------------------------------------
1.1291 + */
1.1292 +
1.1293 +EXPORT_C int
1.1294 +Tcl_UniCharNcasecmp(cs, ct, n)
1.1295 + CONST Tcl_UniChar *cs; /* Unicode string to compare to ct. */
1.1296 + CONST Tcl_UniChar *ct; /* Unicode string cs is compared to. */
1.1297 + unsigned long n; /* Number of unichars to compare. */
1.1298 +{
1.1299 + for ( ; n != 0; n--, cs++, ct++) {
1.1300 + if (*cs != *ct) {
1.1301 + Tcl_UniChar lcs = Tcl_UniCharToLower(*cs);
1.1302 + Tcl_UniChar lct = Tcl_UniCharToLower(*ct);
1.1303 + if (lcs != lct) {
1.1304 + return (lcs - lct);
1.1305 + }
1.1306 + }
1.1307 + }
1.1308 + return 0;
1.1309 +}
1.1310 +
1.1311 +/*
1.1312 + *----------------------------------------------------------------------
1.1313 + *
1.1314 + * Tcl_UniCharIsAlnum --
1.1315 + *
1.1316 + * Test if a character is an alphanumeric Unicode character.
1.1317 + *
1.1318 + * Results:
1.1319 + * Returns 1 if character is alphanumeric.
1.1320 + *
1.1321 + * Side effects:
1.1322 + * None.
1.1323 + *
1.1324 + *----------------------------------------------------------------------
1.1325 + */
1.1326 +
1.1327 +EXPORT_C int
1.1328 +Tcl_UniCharIsAlnum(ch)
1.1329 + int ch; /* Unicode character to test. */
1.1330 +{
1.1331 + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
1.1332 +
1.1333 + return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1);
1.1334 +}
1.1335 +
1.1336 +/*
1.1337 + *----------------------------------------------------------------------
1.1338 + *
1.1339 + * Tcl_UniCharIsAlpha --
1.1340 + *
1.1341 + * Test if a character is an alphabetic Unicode character.
1.1342 + *
1.1343 + * Results:
1.1344 + * Returns 1 if character is alphabetic.
1.1345 + *
1.1346 + * Side effects:
1.1347 + * None.
1.1348 + *
1.1349 + *----------------------------------------------------------------------
1.1350 + */
1.1351 +
1.1352 +EXPORT_C int
1.1353 +Tcl_UniCharIsAlpha(ch)
1.1354 + int ch; /* Unicode character to test. */
1.1355 +{
1.1356 + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
1.1357 + return ((ALPHA_BITS >> category) & 1);
1.1358 +}
1.1359 +
1.1360 +/*
1.1361 + *----------------------------------------------------------------------
1.1362 + *
1.1363 + * Tcl_UniCharIsControl --
1.1364 + *
1.1365 + * Test if a character is a Unicode control character.
1.1366 + *
1.1367 + * Results:
1.1368 + * Returns non-zero if character is a control.
1.1369 + *
1.1370 + * Side effects:
1.1371 + * None.
1.1372 + *
1.1373 + *----------------------------------------------------------------------
1.1374 + */
1.1375 +
1.1376 +EXPORT_C int
1.1377 +Tcl_UniCharIsControl(ch)
1.1378 + int ch; /* Unicode character to test. */
1.1379 +{
1.1380 + return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);
1.1381 +}
1.1382 +
1.1383 +/*
1.1384 + *----------------------------------------------------------------------
1.1385 + *
1.1386 + * Tcl_UniCharIsDigit --
1.1387 + *
1.1388 + * Test if a character is a numeric Unicode character.
1.1389 + *
1.1390 + * Results:
1.1391 + * Returns non-zero if character is a digit.
1.1392 + *
1.1393 + * Side effects:
1.1394 + * None.
1.1395 + *
1.1396 + *----------------------------------------------------------------------
1.1397 + */
1.1398 +
1.1399 +EXPORT_C int
1.1400 +Tcl_UniCharIsDigit(ch)
1.1401 + int ch; /* Unicode character to test. */
1.1402 +{
1.1403 + return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK)
1.1404 + == DECIMAL_DIGIT_NUMBER);
1.1405 +}
1.1406 +
1.1407 +/*
1.1408 + *----------------------------------------------------------------------
1.1409 + *
1.1410 + * Tcl_UniCharIsGraph --
1.1411 + *
1.1412 + * Test if a character is any Unicode print character except space.
1.1413 + *
1.1414 + * Results:
1.1415 + * Returns non-zero if character is printable, but not space.
1.1416 + *
1.1417 + * Side effects:
1.1418 + * None.
1.1419 + *
1.1420 + *----------------------------------------------------------------------
1.1421 + */
1.1422 +
1.1423 +EXPORT_C int
1.1424 +Tcl_UniCharIsGraph(ch)
1.1425 + int ch; /* Unicode character to test. */
1.1426 +{
1.1427 + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
1.1428 + return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));
1.1429 +}
1.1430 +
1.1431 +/*
1.1432 + *----------------------------------------------------------------------
1.1433 + *
1.1434 + * Tcl_UniCharIsLower --
1.1435 + *
1.1436 + * Test if a character is a lowercase Unicode character.
1.1437 + *
1.1438 + * Results:
1.1439 + * Returns non-zero if character is lowercase.
1.1440 + *
1.1441 + * Side effects:
1.1442 + * None.
1.1443 + *
1.1444 + *----------------------------------------------------------------------
1.1445 + */
1.1446 +
1.1447 +EXPORT_C int
1.1448 +Tcl_UniCharIsLower(ch)
1.1449 + int ch; /* Unicode character to test. */
1.1450 +{
1.1451 + return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER);
1.1452 +}
1.1453 +
1.1454 +/*
1.1455 + *----------------------------------------------------------------------
1.1456 + *
1.1457 + * Tcl_UniCharIsPrint --
1.1458 + *
1.1459 + * Test if a character is a Unicode print character.
1.1460 + *
1.1461 + * Results:
1.1462 + * Returns non-zero if character is printable.
1.1463 + *
1.1464 + * Side effects:
1.1465 + * None.
1.1466 + *
1.1467 + *----------------------------------------------------------------------
1.1468 + */
1.1469 +
1.1470 +EXPORT_C int
1.1471 +Tcl_UniCharIsPrint(ch)
1.1472 + int ch; /* Unicode character to test. */
1.1473 +{
1.1474 + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
1.1475 + return ((PRINT_BITS >> category) & 1);
1.1476 +}
1.1477 +
1.1478 +/*
1.1479 + *----------------------------------------------------------------------
1.1480 + *
1.1481 + * Tcl_UniCharIsPunct --
1.1482 + *
1.1483 + * Test if a character is a Unicode punctuation character.
1.1484 + *
1.1485 + * Results:
1.1486 + * Returns non-zero if character is punct.
1.1487 + *
1.1488 + * Side effects:
1.1489 + * None.
1.1490 + *
1.1491 + *----------------------------------------------------------------------
1.1492 + */
1.1493 +
1.1494 +EXPORT_C int
1.1495 +Tcl_UniCharIsPunct(ch)
1.1496 + int ch; /* Unicode character to test. */
1.1497 +{
1.1498 + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
1.1499 + return ((PUNCT_BITS >> category) & 1);
1.1500 +}
1.1501 +
1.1502 +/*
1.1503 + *----------------------------------------------------------------------
1.1504 + *
1.1505 + * Tcl_UniCharIsSpace --
1.1506 + *
1.1507 + * Test if a character is a whitespace Unicode character.
1.1508 + *
1.1509 + * Results:
1.1510 + * Returns non-zero if character is a space.
1.1511 + *
1.1512 + * Side effects:
1.1513 + * None.
1.1514 + *
1.1515 + *----------------------------------------------------------------------
1.1516 + */
1.1517 +
1.1518 +EXPORT_C int
1.1519 +Tcl_UniCharIsSpace(ch)
1.1520 + int ch; /* Unicode character to test. */
1.1521 +{
1.1522 + register int category;
1.1523 +
1.1524 + /*
1.1525 + * If the character is within the first 127 characters, just use the
1.1526 + * standard C function, otherwise consult the Unicode table.
1.1527 + */
1.1528 +
1.1529 + if (ch < 0x80) {
1.1530 + return isspace(UCHAR(ch)); /* INTL: ISO space */
1.1531 + } else {
1.1532 + category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
1.1533 + return ((SPACE_BITS >> category) & 1);
1.1534 + }
1.1535 +}
1.1536 +
1.1537 +/*
1.1538 + *----------------------------------------------------------------------
1.1539 + *
1.1540 + * Tcl_UniCharIsUpper --
1.1541 + *
1.1542 + * Test if a character is a uppercase Unicode character.
1.1543 + *
1.1544 + * Results:
1.1545 + * Returns non-zero if character is uppercase.
1.1546 + *
1.1547 + * Side effects:
1.1548 + * None.
1.1549 + *
1.1550 + *----------------------------------------------------------------------
1.1551 + */
1.1552 +
1.1553 +EXPORT_C int
1.1554 +Tcl_UniCharIsUpper(ch)
1.1555 + int ch; /* Unicode character to test. */
1.1556 +{
1.1557 + return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER);
1.1558 +}
1.1559 +
1.1560 +/*
1.1561 + *----------------------------------------------------------------------
1.1562 + *
1.1563 + * Tcl_UniCharIsWordChar --
1.1564 + *
1.1565 + * Test if a character is alphanumeric or a connector punctuation
1.1566 + * mark.
1.1567 + *
1.1568 + * Results:
1.1569 + * Returns 1 if character is a word character.
1.1570 + *
1.1571 + * Side effects:
1.1572 + * None.
1.1573 + *
1.1574 + *----------------------------------------------------------------------
1.1575 + */
1.1576 +
1.1577 +EXPORT_C int
1.1578 +Tcl_UniCharIsWordChar(ch)
1.1579 + int ch; /* Unicode character to test. */
1.1580 +{
1.1581 + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
1.1582 +
1.1583 + return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1);
1.1584 +}
1.1585 +
1.1586 +/*
1.1587 + *----------------------------------------------------------------------
1.1588 + *
1.1589 + * Tcl_UniCharCaseMatch --
1.1590 + *
1.1591 + * See if a particular Unicode string matches a particular pattern.
1.1592 + * Allows case insensitivity. This is the Unicode equivalent of
1.1593 + * the char* Tcl_StringCaseMatch. The UniChar strings must be
1.1594 + * NULL-terminated. This has no provision for counted UniChar
1.1595 + * strings, thus should not be used where NULLs are expected in the
1.1596 + * UniChar string. Use TclUniCharMatch where possible.
1.1597 + *
1.1598 + * Results:
1.1599 + * The return value is 1 if string matches pattern, and
1.1600 + * 0 otherwise. The matching operation permits the following
1.1601 + * special characters in the pattern: *?\[] (see the manual
1.1602 + * entry for details on what these mean).
1.1603 + *
1.1604 + * Side effects:
1.1605 + * None.
1.1606 + *
1.1607 + *----------------------------------------------------------------------
1.1608 + */
1.1609 +
1.1610 +EXPORT_C int
1.1611 +Tcl_UniCharCaseMatch(string, pattern, nocase)
1.1612 + CONST Tcl_UniChar *string; /* Unicode String. */
1.1613 + CONST Tcl_UniChar *pattern; /* Pattern, which may contain special
1.1614 + * characters. */
1.1615 + int nocase; /* 0 for case sensitive, 1 for insensitive */
1.1616 +{
1.1617 + Tcl_UniChar ch1, p;
1.1618 +
1.1619 + while (1) {
1.1620 + p = *pattern;
1.1621 +
1.1622 + /*
1.1623 + * See if we're at the end of both the pattern and the string. If
1.1624 + * so, we succeeded. If we're at the end of the pattern but not at
1.1625 + * the end of the string, we failed.
1.1626 + */
1.1627 +
1.1628 + if (p == 0) {
1.1629 + return (*string == 0);
1.1630 + }
1.1631 + if ((*string == 0) && (p != '*')) {
1.1632 + return 0;
1.1633 + }
1.1634 +
1.1635 + /*
1.1636 + * Check for a "*" as the next pattern character. It matches any
1.1637 + * substring. We handle this by skipping all the characters up to the
1.1638 + * next matching one in the pattern, and then calling ourselves
1.1639 + * recursively for each postfix of string, until either we match or we
1.1640 + * reach the end of the string.
1.1641 + */
1.1642 +
1.1643 + if (p == '*') {
1.1644 + /*
1.1645 + * Skip all successive *'s in the pattern
1.1646 + */
1.1647 + while (*(++pattern) == '*') {}
1.1648 + p = *pattern;
1.1649 + if (p == 0) {
1.1650 + return 1;
1.1651 + }
1.1652 + if (nocase) {
1.1653 + p = Tcl_UniCharToLower(p);
1.1654 + }
1.1655 + while (1) {
1.1656 + /*
1.1657 + * Optimization for matching - cruise through the string
1.1658 + * quickly if the next char in the pattern isn't a special
1.1659 + * character
1.1660 + */
1.1661 + if ((p != '[') && (p != '?') && (p != '\\')) {
1.1662 + if (nocase) {
1.1663 + while (*string && (p != *string)
1.1664 + && (p != Tcl_UniCharToLower(*string))) {
1.1665 + string++;
1.1666 + }
1.1667 + } else {
1.1668 + while (*string && (p != *string)) { string++; }
1.1669 + }
1.1670 + }
1.1671 + if (Tcl_UniCharCaseMatch(string, pattern, nocase)) {
1.1672 + return 1;
1.1673 + }
1.1674 + if (*string == 0) {
1.1675 + return 0;
1.1676 + }
1.1677 + string++;
1.1678 + }
1.1679 + }
1.1680 +
1.1681 + /*
1.1682 + * Check for a "?" as the next pattern character. It matches
1.1683 + * any single character.
1.1684 + */
1.1685 +
1.1686 + if (p == '?') {
1.1687 + pattern++;
1.1688 + string++;
1.1689 + continue;
1.1690 + }
1.1691 +
1.1692 + /*
1.1693 + * Check for a "[" as the next pattern character. It is followed
1.1694 + * by a list of characters that are acceptable, or by a range
1.1695 + * (two characters separated by "-").
1.1696 + */
1.1697 +
1.1698 + if (p == '[') {
1.1699 + Tcl_UniChar startChar, endChar;
1.1700 +
1.1701 + pattern++;
1.1702 + ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
1.1703 + string++;
1.1704 + while (1) {
1.1705 + if ((*pattern == ']') || (*pattern == 0)) {
1.1706 + return 0;
1.1707 + }
1.1708 + startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
1.1709 + pattern++;
1.1710 + if (*pattern == '-') {
1.1711 + pattern++;
1.1712 + if (*pattern == 0) {
1.1713 + return 0;
1.1714 + }
1.1715 + endChar = (nocase ? Tcl_UniCharToLower(*pattern)
1.1716 + : *pattern);
1.1717 + pattern++;
1.1718 + if (((startChar <= ch1) && (ch1 <= endChar))
1.1719 + || ((endChar <= ch1) && (ch1 <= startChar))) {
1.1720 + /*
1.1721 + * Matches ranges of form [a-z] or [z-a].
1.1722 + */
1.1723 + break;
1.1724 + }
1.1725 + } else if (startChar == ch1) {
1.1726 + break;
1.1727 + }
1.1728 + }
1.1729 + while (*pattern != ']') {
1.1730 + if (*pattern == 0) {
1.1731 + pattern--;
1.1732 + break;
1.1733 + }
1.1734 + pattern++;
1.1735 + }
1.1736 + pattern++;
1.1737 + continue;
1.1738 + }
1.1739 +
1.1740 + /*
1.1741 + * If the next pattern character is '\', just strip off the '\'
1.1742 + * so we do exact matching on the character that follows.
1.1743 + */
1.1744 +
1.1745 + if (p == '\\') {
1.1746 + if (*(++pattern) == '\0') {
1.1747 + return 0;
1.1748 + }
1.1749 + }
1.1750 +
1.1751 + /*
1.1752 + * There's no special character. Just make sure that the next
1.1753 + * bytes of each string match.
1.1754 + */
1.1755 +
1.1756 + if (nocase) {
1.1757 + if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
1.1758 + return 0;
1.1759 + }
1.1760 + } else if (*string != *pattern) {
1.1761 + return 0;
1.1762 + }
1.1763 + string++;
1.1764 + pattern++;
1.1765 + }
1.1766 +}
1.1767 +
1.1768 +/*
1.1769 + *----------------------------------------------------------------------
1.1770 + *
1.1771 + * TclUniCharMatch --
1.1772 + *
1.1773 + * See if a particular Unicode string matches a particular pattern.
1.1774 + * Allows case insensitivity. This is the Unicode equivalent of the
1.1775 + * char* Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch
1.1776 + * uses counted Strings, so embedded NULLs are allowed.
1.1777 + *
1.1778 + * Results:
1.1779 + * The return value is 1 if string matches pattern, and
1.1780 + * 0 otherwise. The matching operation permits the following
1.1781 + * special characters in the pattern: *?\[] (see the manual
1.1782 + * entry for details on what these mean).
1.1783 + *
1.1784 + * Side effects:
1.1785 + * None.
1.1786 + *
1.1787 + *----------------------------------------------------------------------
1.1788 + */
1.1789 +
1.1790 +int
1.1791 +TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
1.1792 + CONST Tcl_UniChar *string; /* Unicode String. */
1.1793 + int strLen; /* length of String */
1.1794 + CONST Tcl_UniChar *pattern; /* Pattern, which may contain special
1.1795 + * characters. */
1.1796 + int ptnLen; /* length of Pattern */
1.1797 + int nocase; /* 0 for case sensitive, 1 for insensitive */
1.1798 +{
1.1799 + CONST Tcl_UniChar *stringEnd, *patternEnd;
1.1800 + Tcl_UniChar p;
1.1801 +
1.1802 + stringEnd = string + strLen;
1.1803 + patternEnd = pattern + ptnLen;
1.1804 +
1.1805 + while (1) {
1.1806 + /*
1.1807 + * See if we're at the end of both the pattern and the string. If
1.1808 + * so, we succeeded. If we're at the end of the pattern but not at
1.1809 + * the end of the string, we failed.
1.1810 + */
1.1811 +
1.1812 + if (pattern == patternEnd) {
1.1813 + return (string == stringEnd);
1.1814 + }
1.1815 + p = *pattern;
1.1816 + if ((string == stringEnd) && (p != '*')) {
1.1817 + return 0;
1.1818 + }
1.1819 +
1.1820 + /*
1.1821 + * Check for a "*" as the next pattern character. It matches any
1.1822 + * substring. We handle this by skipping all the characters up to the
1.1823 + * next matching one in the pattern, and then calling ourselves
1.1824 + * recursively for each postfix of string, until either we match or we
1.1825 + * reach the end of the string.
1.1826 + */
1.1827 +
1.1828 + if (p == '*') {
1.1829 + /*
1.1830 + * Skip all successive *'s in the pattern
1.1831 + */
1.1832 + while (*(++pattern) == '*') {}
1.1833 + if (pattern == patternEnd) {
1.1834 + return 1;
1.1835 + }
1.1836 + p = *pattern;
1.1837 + if (nocase) {
1.1838 + p = Tcl_UniCharToLower(p);
1.1839 + }
1.1840 + while (1) {
1.1841 + /*
1.1842 + * Optimization for matching - cruise through the string
1.1843 + * quickly if the next char in the pattern isn't a special
1.1844 + * character
1.1845 + */
1.1846 + if ((p != '[') && (p != '?') && (p != '\\')) {
1.1847 + if (nocase) {
1.1848 + while ((string < stringEnd) && (p != *string)
1.1849 + && (p != Tcl_UniCharToLower(*string))) {
1.1850 + string++;
1.1851 + }
1.1852 + } else {
1.1853 + while ((string < stringEnd) && (p != *string)) {
1.1854 + string++;
1.1855 + }
1.1856 + }
1.1857 + }
1.1858 + if (TclUniCharMatch(string, stringEnd - string,
1.1859 + pattern, patternEnd - pattern, nocase)) {
1.1860 + return 1;
1.1861 + }
1.1862 + if (string == stringEnd) {
1.1863 + return 0;
1.1864 + }
1.1865 + string++;
1.1866 + }
1.1867 + }
1.1868 +
1.1869 + /*
1.1870 + * Check for a "?" as the next pattern character. It matches
1.1871 + * any single character.
1.1872 + */
1.1873 +
1.1874 + if (p == '?') {
1.1875 + pattern++;
1.1876 + string++;
1.1877 + continue;
1.1878 + }
1.1879 +
1.1880 + /*
1.1881 + * Check for a "[" as the next pattern character. It is followed
1.1882 + * by a list of characters that are acceptable, or by a range
1.1883 + * (two characters separated by "-").
1.1884 + */
1.1885 +
1.1886 + if (p == '[') {
1.1887 + Tcl_UniChar ch1, startChar, endChar;
1.1888 +
1.1889 + pattern++;
1.1890 + ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
1.1891 + string++;
1.1892 + while (1) {
1.1893 + if ((*pattern == ']') || (pattern == patternEnd)) {
1.1894 + return 0;
1.1895 + }
1.1896 + startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
1.1897 + pattern++;
1.1898 + if (*pattern == '-') {
1.1899 + pattern++;
1.1900 + if (pattern == patternEnd) {
1.1901 + return 0;
1.1902 + }
1.1903 + endChar = (nocase ? Tcl_UniCharToLower(*pattern)
1.1904 + : *pattern);
1.1905 + pattern++;
1.1906 + if (((startChar <= ch1) && (ch1 <= endChar))
1.1907 + || ((endChar <= ch1) && (ch1 <= startChar))) {
1.1908 + /*
1.1909 + * Matches ranges of form [a-z] or [z-a].
1.1910 + */
1.1911 + break;
1.1912 + }
1.1913 + } else if (startChar == ch1) {
1.1914 + break;
1.1915 + }
1.1916 + }
1.1917 + while (*pattern != ']') {
1.1918 + if (pattern == patternEnd) {
1.1919 + pattern--;
1.1920 + break;
1.1921 + }
1.1922 + pattern++;
1.1923 + }
1.1924 + pattern++;
1.1925 + continue;
1.1926 + }
1.1927 +
1.1928 + /*
1.1929 + * If the next pattern character is '\', just strip off the '\'
1.1930 + * so we do exact matching on the character that follows.
1.1931 + */
1.1932 +
1.1933 + if (p == '\\') {
1.1934 + if (++pattern == patternEnd) {
1.1935 + return 0;
1.1936 + }
1.1937 + }
1.1938 +
1.1939 + /*
1.1940 + * There's no special character. Just make sure that the next
1.1941 + * bytes of each string match.
1.1942 + */
1.1943 +
1.1944 + if (nocase) {
1.1945 + if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
1.1946 + return 0;
1.1947 + }
1.1948 + } else if (*string != *pattern) {
1.1949 + return 0;
1.1950 + }
1.1951 + string++;
1.1952 + pattern++;
1.1953 + }
1.1954 +}