os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclRegexp.c
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclRegexp.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,1041 @@
1.4 +/*
1.5 + * tclRegexp.c --
1.6 + *
1.7 + * This file contains the public interfaces to the Tcl regular
1.8 + * expression mechanism.
1.9 + *
1.10 + * Copyright (c) 1998 by Sun Microsystems, Inc.
1.11 + * Copyright (c) 1998-1999 by Scriptics Corporation.
1.12 + * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved.
1.13 + *
1.14 + * See the file "license.terms" for information on usage and redistribution
1.15 + * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
1.16 + *
1.17 + * RCS: @(#) $Id: tclRegexp.c,v 1.14.4.2 2006/04/07 01:14:28 hobbs Exp $
1.18 + */
1.19 +
1.20 +#include "tclInt.h"
1.21 +#include "tclPort.h"
1.22 +#include "tclRegexp.h"
1.23 +#if defined(__SYMBIAN32__) && defined(__WINSCW__)
1.24 +#include "tclSymbianGlobals.h"
1.25 +#define dataKey getdataKey(6)
1.26 +#endif
1.27 +
1.28 +/*
1.29 + *----------------------------------------------------------------------
1.30 + * The routines in this file use Henry Spencer's regular expression
1.31 + * package contained in the following additional source files:
1.32 + *
1.33 + * regc_color.c regc_cvec.c regc_lex.c
1.34 + * regc_nfa.c regcomp.c regcustom.h
1.35 + * rege_dfa.c regerror.c regerrs.h
1.36 + * regex.h regexec.c regfree.c
1.37 + * regfronts.c regguts.h
1.38 + *
1.39 + * Copyright (c) 1998 Henry Spencer. All rights reserved.
1.40 + *
1.41 + * Development of this software was funded, in part, by Cray Research Inc.,
1.42 + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
1.43 + * Corporation, none of whom are responsible for the results. The author
1.44 + * thanks all of them.
1.45 + *
1.46 + * Redistribution and use in source and binary forms -- with or without
1.47 + * modification -- are permitted for any purpose, provided that
1.48 + * redistributions in source form retain this entire copyright notice and
1.49 + * indicate the origin and nature of any modifications.
1.50 + *
1.51 + * I'd appreciate being given credit for this package in the documentation
1.52 + * of software which uses it, but that is not a requirement.
1.53 + *
1.54 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
1.55 + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
1.56 + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
1.57 + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
1.58 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
1.59 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
1.60 + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1.61 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
1.62 + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
1.63 + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1.64 + *
1.65 + * *** NOTE: this code has been altered slightly for use in Tcl: ***
1.66 + * *** 1. Names have been changed, e.g. from re_comp to ***
1.67 + * *** TclRegComp, to avoid clashes with other ***
1.68 + * *** regexp implementations used by applications. ***
1.69 + */
1.70 +
1.71 +/*
1.72 + * Thread local storage used to maintain a per-thread cache of compiled
1.73 + * regular expressions.
1.74 + */
1.75 +
1.76 +#define NUM_REGEXPS 30
1.77 +
1.78 +typedef struct ThreadSpecificData {
1.79 + int initialized; /* Set to 1 when the module is initialized. */
1.80 + char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
1.81 + * regular expression patterns. NULL
1.82 + * means that this slot isn't used.
1.83 + * Malloc-ed. */
1.84 + int patLengths[NUM_REGEXPS];/* Number of non-null characters in
1.85 + * corresponding entry in patterns.
1.86 + * -1 means entry isn't used. */
1.87 + struct TclRegexp *regexps[NUM_REGEXPS];
1.88 + /* Compiled forms of above strings. Also
1.89 + * malloc-ed, or NULL if not in use yet. */
1.90 +} ThreadSpecificData;
1.91 +
1.92 +#if !defined(__SYMBIAN32__) || !defined(__WINSCW__)
1.93 +static Tcl_ThreadDataKey dataKey;
1.94 +#endif
1.95 +
1.96 +/*
1.97 + * Declarations for functions used only in this file.
1.98 + */
1.99 +
1.100 +static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
1.101 + CONST char *pattern, int length, int flags));
1.102 +static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
1.103 + Tcl_Obj *copyPtr));
1.104 +static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
1.105 +static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
1.106 +static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
1.107 +static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
1.108 + Tcl_RegExp re, CONST Tcl_UniChar *uniString,
1.109 + int numChars, int nmatches, int flags));
1.110 +static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
1.111 + Tcl_Obj *objPtr));
1.112 +
1.113 +/*
1.114 + * The regular expression Tcl object type. This serves as a cache
1.115 + * of the compiled form of the regular expression.
1.116 + */
1.117 +
1.118 +static Tcl_ObjType tclRegexpType = {
1.119 + "regexp", /* name */
1.120 + FreeRegexpInternalRep, /* freeIntRepProc */
1.121 + DupRegexpInternalRep, /* dupIntRepProc */
1.122 + NULL, /* updateStringProc */
1.123 + SetRegexpFromAny /* setFromAnyProc */
1.124 +};
1.125 +
1.126 +
1.127 +/*
1.128 + *----------------------------------------------------------------------
1.129 + *
1.130 + * Tcl_RegExpCompile --
1.131 + *
1.132 + * Compile a regular expression into a form suitable for fast
1.133 + * matching. This procedure is DEPRECATED in favor of the
1.134 + * object version of the command.
1.135 + *
1.136 + * Results:
1.137 + * The return value is a pointer to the compiled form of string,
1.138 + * suitable for passing to Tcl_RegExpExec. This compiled form
1.139 + * is only valid up until the next call to this procedure, so
1.140 + * don't keep these around for a long time! If an error occurred
1.141 + * while compiling the pattern, then NULL is returned and an error
1.142 + * message is left in the interp's result.
1.143 + *
1.144 + * Side effects:
1.145 + * Updates the cache of compiled regexps.
1.146 + *
1.147 + *----------------------------------------------------------------------
1.148 + */
1.149 +
1.150 +EXPORT_C Tcl_RegExp
1.151 +Tcl_RegExpCompile(interp, string)
1.152 + Tcl_Interp *interp; /* For use in error reporting and
1.153 + * to access the interp regexp cache. */
1.154 + CONST char *string; /* String for which to produce
1.155 + * compiled regular expression. */
1.156 +{
1.157 + return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
1.158 + REG_ADVANCED);
1.159 +}
1.160 +
1.161 +/*
1.162 + *----------------------------------------------------------------------
1.163 + *
1.164 + * Tcl_RegExpExec --
1.165 + *
1.166 + * Execute the regular expression matcher using a compiled form
1.167 + * of a regular expression and save information about any match
1.168 + * that is found.
1.169 + *
1.170 + * Results:
1.171 + * If an error occurs during the matching operation then -1
1.172 + * is returned and the interp's result contains an error message.
1.173 + * Otherwise the return value is 1 if a matching range is
1.174 + * found and 0 if there is no matching range.
1.175 + *
1.176 + * Side effects:
1.177 + * None.
1.178 + *
1.179 + *----------------------------------------------------------------------
1.180 + */
1.181 +
1.182 +EXPORT_C int
1.183 +Tcl_RegExpExec(interp, re, string, start)
1.184 + Tcl_Interp *interp; /* Interpreter to use for error reporting. */
1.185 + Tcl_RegExp re; /* Compiled regular expression; must have
1.186 + * been returned by previous call to
1.187 + * Tcl_GetRegExpFromObj. */
1.188 + CONST char *string; /* String against which to match re. */
1.189 + CONST char *start; /* If string is part of a larger string,
1.190 + * this identifies beginning of larger
1.191 + * string, so that "^" won't match. */
1.192 +{
1.193 + int flags, result, numChars;
1.194 + TclRegexp *regexp = (TclRegexp *)re;
1.195 + Tcl_DString ds;
1.196 + CONST Tcl_UniChar *ustr;
1.197 +
1.198 + /*
1.199 + * If the starting point is offset from the beginning of the buffer,
1.200 + * then we need to tell the regexp engine not to match "^".
1.201 + */
1.202 +
1.203 + if (string > start) {
1.204 + flags = REG_NOTBOL;
1.205 + } else {
1.206 + flags = 0;
1.207 + }
1.208 +
1.209 + /*
1.210 + * Remember the string for use by Tcl_RegExpRange().
1.211 + */
1.212 +
1.213 + regexp->string = string;
1.214 + regexp->objPtr = NULL;
1.215 +
1.216 + /*
1.217 + * Convert the string to Unicode and perform the match.
1.218 + */
1.219 +
1.220 + Tcl_DStringInit(&ds);
1.221 + ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
1.222 + numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
1.223 + result = RegExpExecUniChar(interp, re, ustr, numChars,
1.224 + -1 /* nmatches */, flags);
1.225 + Tcl_DStringFree(&ds);
1.226 +
1.227 + return result;
1.228 +}
1.229 +
1.230 +/*
1.231 + *---------------------------------------------------------------------------
1.232 + *
1.233 + * Tcl_RegExpRange --
1.234 + *
1.235 + * Returns pointers describing the range of a regular expression match,
1.236 + * or one of the subranges within the match.
1.237 + *
1.238 + * Results:
1.239 + * The variables at *startPtr and *endPtr are modified to hold the
1.240 + * addresses of the endpoints of the range given by index. If the
1.241 + * specified range doesn't exist then NULLs are returned.
1.242 + *
1.243 + * Side effects:
1.244 + * None.
1.245 + *
1.246 + *---------------------------------------------------------------------------
1.247 + */
1.248 +
1.249 +EXPORT_C void
1.250 +Tcl_RegExpRange(re, index, startPtr, endPtr)
1.251 + Tcl_RegExp re; /* Compiled regular expression that has
1.252 + * been passed to Tcl_RegExpExec. */
1.253 + int index; /* 0 means give the range of the entire
1.254 + * match, > 0 means give the range of
1.255 + * a matching subrange. */
1.256 + CONST char **startPtr; /* Store address of first character in
1.257 + * (sub-) range here. */
1.258 + CONST char **endPtr; /* Store address of character just after last
1.259 + * in (sub-) range here. */
1.260 +{
1.261 + TclRegexp *regexpPtr = (TclRegexp *) re;
1.262 + CONST char *string;
1.263 +
1.264 + if ((size_t) index > regexpPtr->re.re_nsub) {
1.265 + *startPtr = *endPtr = NULL;
1.266 + } else if (regexpPtr->matches[index].rm_so < 0) {
1.267 + *startPtr = *endPtr = NULL;
1.268 + } else {
1.269 + if (regexpPtr->objPtr) {
1.270 + string = Tcl_GetString(regexpPtr->objPtr);
1.271 + } else {
1.272 + string = regexpPtr->string;
1.273 + }
1.274 + *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
1.275 + *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
1.276 + }
1.277 +}
1.278 +
1.279 +/*
1.280 + *---------------------------------------------------------------------------
1.281 + *
1.282 + * RegExpExecUniChar --
1.283 + *
1.284 + * Execute the regular expression matcher using a compiled form of a
1.285 + * regular expression and save information about any match that is
1.286 + * found.
1.287 + *
1.288 + * Results:
1.289 + * If an error occurs during the matching operation then -1 is
1.290 + * returned and an error message is left in interp's result.
1.291 + * Otherwise the return value is 1 if a matching range was found or
1.292 + * 0 if there was no matching range.
1.293 + *
1.294 + * Side effects:
1.295 + * None.
1.296 + *
1.297 + *----------------------------------------------------------------------
1.298 + */
1.299 +
1.300 +static int
1.301 +RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
1.302 + Tcl_Interp *interp; /* Interpreter to use for error reporting. */
1.303 + Tcl_RegExp re; /* Compiled regular expression; returned by
1.304 + * a previous call to Tcl_GetRegExpFromObj */
1.305 + CONST Tcl_UniChar *wString; /* String against which to match re. */
1.306 + int numChars; /* Length of Tcl_UniChar string (must
1.307 + * be >= 0). */
1.308 + int nmatches; /* How many subexpression matches (counting
1.309 + * the whole match as subexpression 0) are
1.310 + * of interest. -1 means "don't know". */
1.311 + int flags; /* Regular expression flags. */
1.312 +{
1.313 + int status;
1.314 + TclRegexp *regexpPtr = (TclRegexp *) re;
1.315 + size_t last = regexpPtr->re.re_nsub + 1;
1.316 + size_t nm = last;
1.317 +
1.318 + if (nmatches >= 0 && (size_t) nmatches < nm) {
1.319 + nm = (size_t) nmatches;
1.320 + }
1.321 +
1.322 + status = TclReExec(®expPtr->re, wString, (size_t) numChars,
1.323 + ®expPtr->details, nm, regexpPtr->matches, flags);
1.324 +
1.325 + /*
1.326 + * Check for errors.
1.327 + */
1.328 +
1.329 + if (status != REG_OKAY) {
1.330 + if (status == REG_NOMATCH) {
1.331 + return 0;
1.332 + }
1.333 + if (interp != NULL) {
1.334 + TclRegError(interp, "error while matching regular expression: ",
1.335 + status);
1.336 + }
1.337 + return -1;
1.338 + }
1.339 + return 1;
1.340 +}
1.341 +
1.342 +/*
1.343 + *---------------------------------------------------------------------------
1.344 + *
1.345 + * TclRegExpRangeUniChar --
1.346 + *
1.347 + * Returns pointers describing the range of a regular expression match,
1.348 + * or one of the subranges within the match, or the hypothetical range
1.349 + * represented by the rm_extend field of the rm_detail_t.
1.350 + *
1.351 + * Results:
1.352 + * The variables at *startPtr and *endPtr are modified to hold the
1.353 + * offsets of the endpoints of the range given by index. If the
1.354 + * specified range doesn't exist then -1s are supplied.
1.355 + *
1.356 + * Side effects:
1.357 + * None.
1.358 + *
1.359 + *---------------------------------------------------------------------------
1.360 + */
1.361 +
1.362 +void
1.363 +TclRegExpRangeUniChar(re, index, startPtr, endPtr)
1.364 + Tcl_RegExp re; /* Compiled regular expression that has
1.365 + * been passed to Tcl_RegExpExec. */
1.366 + int index; /* 0 means give the range of the entire
1.367 + * match, > 0 means give the range of
1.368 + * a matching subrange, -1 means the
1.369 + * range of the rm_extend field. */
1.370 + int *startPtr; /* Store address of first character in
1.371 + * (sub-) range here. */
1.372 + int *endPtr; /* Store address of character just after last
1.373 + * in (sub-) range here. */
1.374 +{
1.375 + TclRegexp *regexpPtr = (TclRegexp *) re;
1.376 +
1.377 + if ((regexpPtr->flags®_EXPECT) && index == -1) {
1.378 + *startPtr = regexpPtr->details.rm_extend.rm_so;
1.379 + *endPtr = regexpPtr->details.rm_extend.rm_eo;
1.380 + } else if ((size_t) index > regexpPtr->re.re_nsub) {
1.381 + *startPtr = -1;
1.382 + *endPtr = -1;
1.383 + } else {
1.384 + *startPtr = regexpPtr->matches[index].rm_so;
1.385 + *endPtr = regexpPtr->matches[index].rm_eo;
1.386 + }
1.387 +}
1.388 +
1.389 +/*
1.390 + *----------------------------------------------------------------------
1.391 + *
1.392 + * Tcl_RegExpMatch --
1.393 + *
1.394 + * See if a string matches a regular expression.
1.395 + *
1.396 + * Results:
1.397 + * If an error occurs during the matching operation then -1
1.398 + * is returned and the interp's result contains an error message.
1.399 + * Otherwise the return value is 1 if "string" matches "pattern"
1.400 + * and 0 otherwise.
1.401 + *
1.402 + * Side effects:
1.403 + * None.
1.404 + *
1.405 + *----------------------------------------------------------------------
1.406 + */
1.407 +
1.408 +EXPORT_C int
1.409 +Tcl_RegExpMatch(interp, string, pattern)
1.410 + Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
1.411 + CONST char *string; /* String. */
1.412 + CONST char *pattern; /* Regular expression to match against
1.413 + * string. */
1.414 +{
1.415 + Tcl_RegExp re;
1.416 +
1.417 + re = Tcl_RegExpCompile(interp, pattern);
1.418 + if (re == NULL) {
1.419 + return -1;
1.420 + }
1.421 + return Tcl_RegExpExec(interp, re, string, string);
1.422 +}
1.423 +
1.424 +/*
1.425 + *----------------------------------------------------------------------
1.426 + *
1.427 + * Tcl_RegExpExecObj --
1.428 + *
1.429 + * Execute a precompiled regexp against the given object.
1.430 + *
1.431 + * Results:
1.432 + * If an error occurs during the matching operation then -1
1.433 + * is returned and the interp's result contains an error message.
1.434 + * Otherwise the return value is 1 if "string" matches "pattern"
1.435 + * and 0 otherwise.
1.436 + *
1.437 + * Side effects:
1.438 + * Converts the object to a Unicode object.
1.439 + *
1.440 + *----------------------------------------------------------------------
1.441 + */
1.442 +
1.443 +EXPORT_C int
1.444 +Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
1.445 + Tcl_Interp *interp; /* Interpreter to use for error reporting. */
1.446 + Tcl_RegExp re; /* Compiled regular expression; must have
1.447 + * been returned by previous call to
1.448 + * Tcl_GetRegExpFromObj. */
1.449 + Tcl_Obj *objPtr; /* String against which to match re. */
1.450 + int offset; /* Character index that marks where matching
1.451 + * should begin. */
1.452 + int nmatches; /* How many subexpression matches (counting
1.453 + * the whole match as subexpression 0) are
1.454 + * of interest. -1 means all of them. */
1.455 + int flags; /* Regular expression execution flags. */
1.456 +{
1.457 + TclRegexp *regexpPtr = (TclRegexp *) re;
1.458 + Tcl_UniChar *udata;
1.459 + int length;
1.460 +
1.461 + /*
1.462 + * Save the target object so we can extract strings from it later.
1.463 + */
1.464 +
1.465 + regexpPtr->string = NULL;
1.466 + regexpPtr->objPtr = objPtr;
1.467 +
1.468 + udata = Tcl_GetUnicodeFromObj(objPtr, &length);
1.469 +
1.470 + if (offset > length) {
1.471 + offset = length;
1.472 + }
1.473 + udata += offset;
1.474 + length -= offset;
1.475 +
1.476 + return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
1.477 +}
1.478 +
1.479 +/*
1.480 + *----------------------------------------------------------------------
1.481 + *
1.482 + * Tcl_RegExpMatchObj --
1.483 + *
1.484 + * See if an object matches a regular expression.
1.485 + *
1.486 + * Results:
1.487 + * If an error occurs during the matching operation then -1
1.488 + * is returned and the interp's result contains an error message.
1.489 + * Otherwise the return value is 1 if "string" matches "pattern"
1.490 + * and 0 otherwise.
1.491 + *
1.492 + * Side effects:
1.493 + * Changes the internal rep of the pattern and string objects.
1.494 + *
1.495 + *----------------------------------------------------------------------
1.496 + */
1.497 +
1.498 +EXPORT_C int
1.499 +Tcl_RegExpMatchObj(interp, stringObj, patternObj)
1.500 + Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
1.501 + Tcl_Obj *stringObj; /* Object containing the String to search. */
1.502 + Tcl_Obj *patternObj; /* Regular expression to match against
1.503 + * string. */
1.504 +{
1.505 + Tcl_RegExp re;
1.506 +
1.507 + re = Tcl_GetRegExpFromObj(interp, patternObj,
1.508 + TCL_REG_ADVANCED | TCL_REG_NOSUB);
1.509 + if (re == NULL) {
1.510 + return -1;
1.511 + }
1.512 + return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
1.513 + 0 /* nmatches */, 0 /* flags */);
1.514 +}
1.515 +
1.516 +/*
1.517 + *----------------------------------------------------------------------
1.518 + *
1.519 + * Tcl_RegExpGetInfo --
1.520 + *
1.521 + * Retrieve information about the current match.
1.522 + *
1.523 + * Results:
1.524 + * None.
1.525 + *
1.526 + * Side effects:
1.527 + * None.
1.528 + *
1.529 + *----------------------------------------------------------------------
1.530 + */
1.531 +
1.532 +EXPORT_C void
1.533 +Tcl_RegExpGetInfo(regexp, infoPtr)
1.534 + Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
1.535 + Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
1.536 +{
1.537 + TclRegexp *regexpPtr = (TclRegexp *) regexp;
1.538 +
1.539 + infoPtr->nsubs = regexpPtr->re.re_nsub;
1.540 + infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
1.541 + infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
1.542 +}
1.543 +
1.544 +/*
1.545 + *----------------------------------------------------------------------
1.546 + *
1.547 + * Tcl_GetRegExpFromObj --
1.548 + *
1.549 + * Compile a regular expression into a form suitable for fast
1.550 + * matching. This procedure caches the result in a Tcl_Obj.
1.551 + *
1.552 + * Results:
1.553 + * The return value is a pointer to the compiled form of string,
1.554 + * suitable for passing to Tcl_RegExpExec. If an error occurred
1.555 + * while compiling the pattern, then NULL is returned and an error
1.556 + * message is left in the interp's result.
1.557 + *
1.558 + * Side effects:
1.559 + * Updates the native rep of the Tcl_Obj.
1.560 + *
1.561 + *----------------------------------------------------------------------
1.562 + */
1.563 +
1.564 +EXPORT_C Tcl_RegExp
1.565 +Tcl_GetRegExpFromObj(interp, objPtr, flags)
1.566 + Tcl_Interp *interp; /* For use in error reporting, and to access
1.567 + * the interp regexp cache. */
1.568 + Tcl_Obj *objPtr; /* Object whose string rep contains regular
1.569 + * expression pattern. Internal rep will be
1.570 + * changed to compiled form of this regular
1.571 + * expression. */
1.572 + int flags; /* Regular expression compilation flags. */
1.573 +{
1.574 + int length;
1.575 + Tcl_ObjType *typePtr;
1.576 + TclRegexp *regexpPtr;
1.577 + char *pattern;
1.578 +
1.579 + typePtr = objPtr->typePtr;
1.580 + regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
1.581 +
1.582 + if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
1.583 + pattern = Tcl_GetStringFromObj(objPtr, &length);
1.584 +
1.585 + regexpPtr = CompileRegexp(interp, pattern, length, flags);
1.586 + if (regexpPtr == NULL) {
1.587 + return NULL;
1.588 + }
1.589 +
1.590 + /*
1.591 + * Add a reference to the regexp so it will persist even if it is
1.592 + * pushed out of the current thread's regexp cache. This reference
1.593 + * will be removed when the object's internal rep is freed.
1.594 + */
1.595 +
1.596 + regexpPtr->refCount++;
1.597 +
1.598 + /*
1.599 + * Free the old representation and set our type.
1.600 + */
1.601 +
1.602 + if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
1.603 + (*typePtr->freeIntRepProc)(objPtr);
1.604 + }
1.605 + objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
1.606 + objPtr->typePtr = &tclRegexpType;
1.607 + }
1.608 + return (Tcl_RegExp) regexpPtr;
1.609 +}
1.610 +
1.611 +/*
1.612 + *----------------------------------------------------------------------
1.613 + *
1.614 + * TclRegAbout --
1.615 + *
1.616 + * Return information about a compiled regular expression.
1.617 + *
1.618 + * Results:
1.619 + * The return value is -1 for failure, 0 for success, although at
1.620 + * the moment there's nothing that could fail. On success, a list
1.621 + * is left in the interp's result: first element is the subexpression
1.622 + * count, second is a list of re_info bit names.
1.623 + *
1.624 + * Side effects:
1.625 + * None.
1.626 + *
1.627 + *----------------------------------------------------------------------
1.628 + */
1.629 +
1.630 +int
1.631 +TclRegAbout(interp, re)
1.632 + Tcl_Interp *interp; /* For use in variable assignment. */
1.633 + Tcl_RegExp re; /* The compiled regular expression. */
1.634 +{
1.635 + TclRegexp *regexpPtr = (TclRegexp *)re;
1.636 + char buf[TCL_INTEGER_SPACE];
1.637 + static struct infoname {
1.638 + int bit;
1.639 + char *text;
1.640 + } infonames[] = {
1.641 + {REG_UBACKREF, "REG_UBACKREF"},
1.642 + {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"},
1.643 + {REG_UBOUNDS, "REG_UBOUNDS"},
1.644 + {REG_UBRACES, "REG_UBRACES"},
1.645 + {REG_UBSALNUM, "REG_UBSALNUM"},
1.646 + {REG_UPBOTCH, "REG_UPBOTCH"},
1.647 + {REG_UBBS, "REG_UBBS"},
1.648 + {REG_UNONPOSIX, "REG_UNONPOSIX"},
1.649 + {REG_UUNSPEC, "REG_UUNSPEC"},
1.650 + {REG_UUNPORT, "REG_UUNPORT"},
1.651 + {REG_ULOCALE, "REG_ULOCALE"},
1.652 + {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
1.653 + {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
1.654 + {REG_USHORTEST, "REG_USHORTEST"},
1.655 + {0, ""}
1.656 + };
1.657 + struct infoname *inf;
1.658 + int n;
1.659 +
1.660 + Tcl_ResetResult(interp);
1.661 +
1.662 + sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
1.663 + Tcl_AppendElement(interp, buf);
1.664 +
1.665 + /*
1.666 + * Must count bits before generating list, because we must know
1.667 + * whether {} are needed before we start appending names.
1.668 + */
1.669 + n = 0;
1.670 + for (inf = infonames; inf->bit != 0; inf++) {
1.671 + if (regexpPtr->re.re_info&inf->bit) {
1.672 + n++;
1.673 + }
1.674 + }
1.675 + if (n != 1) {
1.676 + Tcl_AppendResult(interp, " {", NULL);
1.677 + }
1.678 + for (inf = infonames; inf->bit != 0; inf++) {
1.679 + if (regexpPtr->re.re_info&inf->bit) {
1.680 + Tcl_AppendElement(interp, inf->text);
1.681 + }
1.682 + }
1.683 + if (n != 1) {
1.684 + Tcl_AppendResult(interp, "}", NULL);
1.685 + }
1.686 +
1.687 + return 0;
1.688 +}
1.689 +
1.690 +/*
1.691 + *----------------------------------------------------------------------
1.692 + *
1.693 + * TclRegError --
1.694 + *
1.695 + * Generate an error message based on the regexp status code.
1.696 + *
1.697 + * Results:
1.698 + * Places an error in the interpreter.
1.699 + *
1.700 + * Side effects:
1.701 + * Sets errorCode as well.
1.702 + *
1.703 + *----------------------------------------------------------------------
1.704 + */
1.705 +
1.706 +void
1.707 +TclRegError(interp, msg, status)
1.708 + Tcl_Interp *interp; /* Interpreter for error reporting. */
1.709 + CONST char *msg; /* Message to prepend to error. */
1.710 + int status; /* Status code to report. */
1.711 +{
1.712 + char buf[100]; /* ample in practice */
1.713 + char cbuf[100]; /* lots in practice */
1.714 + size_t n;
1.715 + char *p;
1.716 +
1.717 + Tcl_ResetResult(interp);
1.718 + n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
1.719 + p = (n > sizeof(buf)) ? "..." : "";
1.720 + Tcl_AppendResult(interp, msg, buf, p, NULL);
1.721 +
1.722 + sprintf(cbuf, "%d", status);
1.723 + (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
1.724 + Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
1.725 +}
1.726 +
1.727 +
1.728 +/*
1.729 + *----------------------------------------------------------------------
1.730 + *
1.731 + * FreeRegexpInternalRep --
1.732 + *
1.733 + * Deallocate the storage associated with a regexp object's internal
1.734 + * representation.
1.735 + *
1.736 + * Results:
1.737 + * None.
1.738 + *
1.739 + * Side effects:
1.740 + * Frees the compiled regular expression.
1.741 + *
1.742 + *----------------------------------------------------------------------
1.743 + */
1.744 +
1.745 +static void
1.746 +FreeRegexpInternalRep(objPtr)
1.747 + Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */
1.748 +{
1.749 + TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
1.750 +
1.751 + /*
1.752 + * If this is the last reference to the regexp, free it.
1.753 + */
1.754 +
1.755 + if (--(regexpRepPtr->refCount) <= 0) {
1.756 + FreeRegexp(regexpRepPtr);
1.757 + }
1.758 +}
1.759 +
1.760 +/*
1.761 + *----------------------------------------------------------------------
1.762 + *
1.763 + * DupRegexpInternalRep --
1.764 + *
1.765 + * We copy the reference to the compiled regexp and bump its
1.766 + * reference count.
1.767 + *
1.768 + * Results:
1.769 + * None.
1.770 + *
1.771 + * Side effects:
1.772 + * Increments the reference count of the regexp.
1.773 + *
1.774 + *----------------------------------------------------------------------
1.775 + */
1.776 +
1.777 +static void
1.778 +DupRegexpInternalRep(srcPtr, copyPtr)
1.779 + Tcl_Obj *srcPtr; /* Object with internal rep to copy. */
1.780 + Tcl_Obj *copyPtr; /* Object with internal rep to set. */
1.781 +{
1.782 + TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
1.783 + regexpPtr->refCount++;
1.784 + copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
1.785 + copyPtr->typePtr = &tclRegexpType;
1.786 +}
1.787 +
1.788 +/*
1.789 + *----------------------------------------------------------------------
1.790 + *
1.791 + * SetRegexpFromAny --
1.792 + *
1.793 + * Attempt to generate a compiled regular expression for the Tcl object
1.794 + * "objPtr".
1.795 + *
1.796 + * Results:
1.797 + * The return value is TCL_OK or TCL_ERROR. If an error occurs during
1.798 + * conversion, an error message is left in the interpreter's result
1.799 + * unless "interp" is NULL.
1.800 + *
1.801 + * Side effects:
1.802 + * If no error occurs, a regular expression is stored as "objPtr"s
1.803 + * internal representation.
1.804 + *
1.805 + *----------------------------------------------------------------------
1.806 + */
1.807 +
1.808 +static int
1.809 +SetRegexpFromAny(interp, objPtr)
1.810 + Tcl_Interp *interp; /* Used for error reporting if not NULL. */
1.811 + Tcl_Obj *objPtr; /* The object to convert. */
1.812 +{
1.813 + if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
1.814 + return TCL_ERROR;
1.815 + }
1.816 + return TCL_OK;
1.817 +}
1.818 +
1.819 +/*
1.820 + *---------------------------------------------------------------------------
1.821 + *
1.822 + * CompileRegexp --
1.823 + *
1.824 + * Attempt to compile the given regexp pattern. If the compiled
1.825 + * regular expression can be found in the per-thread cache, it
1.826 + * will be used instead of compiling a new copy.
1.827 + *
1.828 + * Results:
1.829 + * The return value is a pointer to a newly allocated TclRegexp
1.830 + * that represents the compiled pattern, or NULL if the pattern
1.831 + * could not be compiled. If NULL is returned, an error message is
1.832 + * left in the interp's result.
1.833 + *
1.834 + * Side effects:
1.835 + * The thread-local regexp cache is updated and a new TclRegexp may
1.836 + * be allocated.
1.837 + *
1.838 + *----------------------------------------------------------------------
1.839 + */
1.840 +
1.841 +static TclRegexp *
1.842 +CompileRegexp(interp, string, length, flags)
1.843 + Tcl_Interp *interp; /* Used for error reporting if not NULL. */
1.844 + CONST char *string; /* The regexp to compile (UTF-8). */
1.845 + int length; /* The length of the string in bytes. */
1.846 + int flags; /* Compilation flags. */
1.847 +{
1.848 + TclRegexp *regexpPtr;
1.849 + CONST Tcl_UniChar *uniString;
1.850 + int numChars;
1.851 + Tcl_DString stringBuf;
1.852 + int status, i;
1.853 + ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1.854 +
1.855 + if (!tsdPtr->initialized) {
1.856 + tsdPtr->initialized = 1;
1.857 + Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
1.858 + }
1.859 +
1.860 + /*
1.861 + * This routine maintains a second-level regular expression cache in
1.862 + * addition to the per-object regexp cache. The per-thread cache is needed
1.863 + * to handle the case where for various reasons the object is lost between
1.864 + * invocations of the regexp command, but the literal pattern is the same.
1.865 + */
1.866 +
1.867 + /*
1.868 + * Check the per-thread compiled regexp cache. We can only reuse
1.869 + * a regexp if it has the same pattern and the same flags.
1.870 + */
1.871 +
1.872 + for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1.873 + if ((length == tsdPtr->patLengths[i])
1.874 + && (tsdPtr->regexps[i]->flags == flags)
1.875 + && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
1.876 + /*
1.877 + * Move the matched pattern to the first slot in the
1.878 + * cache and shift the other patterns down one position.
1.879 + */
1.880 +
1.881 + if (i != 0) {
1.882 + int j;
1.883 + char *cachedString;
1.884 +
1.885 + cachedString = tsdPtr->patterns[i];
1.886 + regexpPtr = tsdPtr->regexps[i];
1.887 + for (j = i-1; j >= 0; j--) {
1.888 + tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
1.889 + tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
1.890 + tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
1.891 + }
1.892 + tsdPtr->patterns[0] = cachedString;
1.893 + tsdPtr->patLengths[0] = length;
1.894 + tsdPtr->regexps[0] = regexpPtr;
1.895 + }
1.896 + return tsdPtr->regexps[0];
1.897 + }
1.898 + }
1.899 +
1.900 + /*
1.901 + * This is a new expression, so compile it and add it to the cache.
1.902 + */
1.903 +
1.904 + regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
1.905 + regexpPtr->objPtr = NULL;
1.906 + regexpPtr->string = NULL;
1.907 + regexpPtr->details.rm_extend.rm_so = -1;
1.908 + regexpPtr->details.rm_extend.rm_eo = -1;
1.909 +
1.910 + /*
1.911 + * Get the up-to-date string representation and map to unicode.
1.912 + */
1.913 +
1.914 + Tcl_DStringInit(&stringBuf);
1.915 + uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
1.916 + numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
1.917 +
1.918 + /*
1.919 + * Compile the string and check for errors.
1.920 + */
1.921 +
1.922 + regexpPtr->flags = flags;
1.923 + status = TclReComp(®expPtr->re, uniString, (size_t) numChars, flags);
1.924 + Tcl_DStringFree(&stringBuf);
1.925 +
1.926 + if (status != REG_OKAY) {
1.927 + /*
1.928 + * Clean up and report errors in the interpreter, if possible.
1.929 + */
1.930 +
1.931 + ckfree((char *)regexpPtr);
1.932 + if (interp) {
1.933 + TclRegError(interp,
1.934 + "couldn't compile regular expression pattern: ",
1.935 + status);
1.936 + }
1.937 + return NULL;
1.938 + }
1.939 +
1.940 + /*
1.941 + * Allocate enough space for all of the subexpressions, plus one
1.942 + * extra for the entire pattern.
1.943 + */
1.944 +
1.945 + regexpPtr->matches = (regmatch_t *) ckalloc(
1.946 + sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
1.947 +
1.948 + /*
1.949 + * Initialize the refcount to one initially, since it is in the cache.
1.950 + */
1.951 +
1.952 + regexpPtr->refCount = 1;
1.953 +
1.954 + /*
1.955 + * Free the last regexp, if necessary, and make room at the head of the
1.956 + * list for the new regexp.
1.957 + */
1.958 +
1.959 + if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
1.960 + TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
1.961 + if (--(oldRegexpPtr->refCount) <= 0) {
1.962 + FreeRegexp(oldRegexpPtr);
1.963 + }
1.964 + ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
1.965 + }
1.966 + for (i = NUM_REGEXPS - 2; i >= 0; i--) {
1.967 + tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
1.968 + tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
1.969 + tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
1.970 + }
1.971 + tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
1.972 + strcpy(tsdPtr->patterns[0], string);
1.973 + tsdPtr->patLengths[0] = length;
1.974 + tsdPtr->regexps[0] = regexpPtr;
1.975 +
1.976 + return regexpPtr;
1.977 +}
1.978 +
1.979 +/*
1.980 + *----------------------------------------------------------------------
1.981 + *
1.982 + * FreeRegexp --
1.983 + *
1.984 + * Release the storage associated with a TclRegexp.
1.985 + *
1.986 + * Results:
1.987 + * None.
1.988 + *
1.989 + * Side effects:
1.990 + * None.
1.991 + *
1.992 + *----------------------------------------------------------------------
1.993 + */
1.994 +
1.995 +static void
1.996 +FreeRegexp(regexpPtr)
1.997 + TclRegexp *regexpPtr; /* Compiled regular expression to free. */
1.998 +{
1.999 + TclReFree(®expPtr->re);
1.1000 + if (regexpPtr->matches) {
1.1001 + ckfree((char *) regexpPtr->matches);
1.1002 + }
1.1003 + ckfree((char *) regexpPtr);
1.1004 +}
1.1005 +
1.1006 +/*
1.1007 + *----------------------------------------------------------------------
1.1008 + *
1.1009 + * FinalizeRegexp --
1.1010 + *
1.1011 + * Release the storage associated with the per-thread regexp
1.1012 + * cache.
1.1013 + *
1.1014 + * Results:
1.1015 + * None.
1.1016 + *
1.1017 + * Side effects:
1.1018 + * None.
1.1019 + *
1.1020 + *----------------------------------------------------------------------
1.1021 + */
1.1022 +
1.1023 +static void
1.1024 +FinalizeRegexp(clientData)
1.1025 + ClientData clientData; /* Not used. */
1.1026 +{
1.1027 + int i;
1.1028 + TclRegexp *regexpPtr;
1.1029 + ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1.1030 +
1.1031 + for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1.1032 + regexpPtr = tsdPtr->regexps[i];
1.1033 + if (--(regexpPtr->refCount) <= 0) {
1.1034 + FreeRegexp(regexpPtr);
1.1035 + }
1.1036 + ckfree(tsdPtr->patterns[i]);
1.1037 + tsdPtr->patterns[i] = NULL;
1.1038 + }
1.1039 + /*
1.1040 + * We may find ourselves reinitialized if another finalization routine
1.1041 + * invokes regexps.
1.1042 + */
1.1043 + tsdPtr->initialized = 0;
1.1044 +}