diff -r 000000000000 -r bde4ae8d615e os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclRegexp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclRegexp.c Fri Jun 15 03:10:57 2012 +0200 @@ -0,0 +1,1041 @@ +/* + * tclRegexp.c -- + * + * This file contains the public interfaces to the Tcl regular + * expression mechanism. + * + * Copyright (c) 1998 by Sun Microsystems, Inc. + * Copyright (c) 1998-1999 by Scriptics Corporation. + * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved. + * + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * + * RCS: @(#) $Id: tclRegexp.c,v 1.14.4.2 2006/04/07 01:14:28 hobbs Exp $ + */ + +#include "tclInt.h" +#include "tclPort.h" +#include "tclRegexp.h" +#if defined(__SYMBIAN32__) && defined(__WINSCW__) +#include "tclSymbianGlobals.h" +#define dataKey getdataKey(6) +#endif + +/* + *---------------------------------------------------------------------- + * The routines in this file use Henry Spencer's regular expression + * package contained in the following additional source files: + * + * regc_color.c regc_cvec.c regc_lex.c + * regc_nfa.c regcomp.c regcustom.h + * rege_dfa.c regerror.c regerrs.h + * regex.h regexec.c regfree.c + * regfronts.c regguts.h + * + * Copyright (c) 1998 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * *** NOTE: this code has been altered slightly for use in Tcl: *** + * *** 1. Names have been changed, e.g. from re_comp to *** + * *** TclRegComp, to avoid clashes with other *** + * *** regexp implementations used by applications. *** + */ + +/* + * Thread local storage used to maintain a per-thread cache of compiled + * regular expressions. + */ + +#define NUM_REGEXPS 30 + +typedef struct ThreadSpecificData { + int initialized; /* Set to 1 when the module is initialized. */ + char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled + * regular expression patterns. NULL + * means that this slot isn't used. + * Malloc-ed. */ + int patLengths[NUM_REGEXPS];/* Number of non-null characters in + * corresponding entry in patterns. + * -1 means entry isn't used. */ + struct TclRegexp *regexps[NUM_REGEXPS]; + /* Compiled forms of above strings. Also + * malloc-ed, or NULL if not in use yet. */ +} ThreadSpecificData; + +#if !defined(__SYMBIAN32__) || !defined(__WINSCW__) +static Tcl_ThreadDataKey dataKey; +#endif + +/* + * Declarations for functions used only in this file. + */ + +static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp, + CONST char *pattern, int length, int flags)); +static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, + Tcl_Obj *copyPtr)); +static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData)); +static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr)); +static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); +static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_RegExp re, CONST Tcl_UniChar *uniString, + int numChars, int nmatches, int flags)); +static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_Obj *objPtr)); + +/* + * The regular expression Tcl object type. This serves as a cache + * of the compiled form of the regular expression. + */ + +static Tcl_ObjType tclRegexpType = { + "regexp", /* name */ + FreeRegexpInternalRep, /* freeIntRepProc */ + DupRegexpInternalRep, /* dupIntRepProc */ + NULL, /* updateStringProc */ + SetRegexpFromAny /* setFromAnyProc */ +}; + + +/* + *---------------------------------------------------------------------- + * + * Tcl_RegExpCompile -- + * + * Compile a regular expression into a form suitable for fast + * matching. This procedure is DEPRECATED in favor of the + * object version of the command. + * + * Results: + * The return value is a pointer to the compiled form of string, + * suitable for passing to Tcl_RegExpExec. This compiled form + * is only valid up until the next call to this procedure, so + * don't keep these around for a long time! If an error occurred + * while compiling the pattern, then NULL is returned and an error + * message is left in the interp's result. + * + * Side effects: + * Updates the cache of compiled regexps. + * + *---------------------------------------------------------------------- + */ + +EXPORT_C Tcl_RegExp +Tcl_RegExpCompile(interp, string) + Tcl_Interp *interp; /* For use in error reporting and + * to access the interp regexp cache. */ + CONST char *string; /* String for which to produce + * compiled regular expression. */ +{ + return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string), + REG_ADVANCED); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_RegExpExec -- + * + * Execute the regular expression matcher using a compiled form + * of a regular expression and save information about any match + * that is found. + * + * Results: + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if a matching range is + * found and 0 if there is no matching range. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +EXPORT_C int +Tcl_RegExpExec(interp, re, string, start) + Tcl_Interp *interp; /* Interpreter to use for error reporting. */ + Tcl_RegExp re; /* Compiled regular expression; must have + * been returned by previous call to + * Tcl_GetRegExpFromObj. */ + CONST char *string; /* String against which to match re. */ + CONST char *start; /* If string is part of a larger string, + * this identifies beginning of larger + * string, so that "^" won't match. */ +{ + int flags, result, numChars; + TclRegexp *regexp = (TclRegexp *)re; + Tcl_DString ds; + CONST Tcl_UniChar *ustr; + + /* + * If the starting point is offset from the beginning of the buffer, + * then we need to tell the regexp engine not to match "^". + */ + + if (string > start) { + flags = REG_NOTBOL; + } else { + flags = 0; + } + + /* + * Remember the string for use by Tcl_RegExpRange(). + */ + + regexp->string = string; + regexp->objPtr = NULL; + + /* + * Convert the string to Unicode and perform the match. + */ + + Tcl_DStringInit(&ds); + ustr = Tcl_UtfToUniCharDString(string, -1, &ds); + numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar); + result = RegExpExecUniChar(interp, re, ustr, numChars, + -1 /* nmatches */, flags); + Tcl_DStringFree(&ds); + + return result; +} + +/* + *--------------------------------------------------------------------------- + * + * Tcl_RegExpRange -- + * + * Returns pointers describing the range of a regular expression match, + * or one of the subranges within the match. + * + * Results: + * The variables at *startPtr and *endPtr are modified to hold the + * addresses of the endpoints of the range given by index. If the + * specified range doesn't exist then NULLs are returned. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +EXPORT_C void +Tcl_RegExpRange(re, index, startPtr, endPtr) + Tcl_RegExp re; /* Compiled regular expression that has + * been passed to Tcl_RegExpExec. */ + int index; /* 0 means give the range of the entire + * match, > 0 means give the range of + * a matching subrange. */ + CONST char **startPtr; /* Store address of first character in + * (sub-) range here. */ + CONST char **endPtr; /* Store address of character just after last + * in (sub-) range here. */ +{ + TclRegexp *regexpPtr = (TclRegexp *) re; + CONST char *string; + + if ((size_t) index > regexpPtr->re.re_nsub) { + *startPtr = *endPtr = NULL; + } else if (regexpPtr->matches[index].rm_so < 0) { + *startPtr = *endPtr = NULL; + } else { + if (regexpPtr->objPtr) { + string = Tcl_GetString(regexpPtr->objPtr); + } else { + string = regexpPtr->string; + } + *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); + *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); + } +} + +/* + *--------------------------------------------------------------------------- + * + * RegExpExecUniChar -- + * + * Execute the regular expression matcher using a compiled form of a + * regular expression and save information about any match that is + * found. + * + * Results: + * If an error occurs during the matching operation then -1 is + * returned and an error message is left in interp's result. + * Otherwise the return value is 1 if a matching range was found or + * 0 if there was no matching range. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static int +RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) + Tcl_Interp *interp; /* Interpreter to use for error reporting. */ + Tcl_RegExp re; /* Compiled regular expression; returned by + * a previous call to Tcl_GetRegExpFromObj */ + CONST Tcl_UniChar *wString; /* String against which to match re. */ + int numChars; /* Length of Tcl_UniChar string (must + * be >= 0). */ + int nmatches; /* How many subexpression matches (counting + * the whole match as subexpression 0) are + * of interest. -1 means "don't know". */ + int flags; /* Regular expression flags. */ +{ + int status; + TclRegexp *regexpPtr = (TclRegexp *) re; + size_t last = regexpPtr->re.re_nsub + 1; + size_t nm = last; + + if (nmatches >= 0 && (size_t) nmatches < nm) { + nm = (size_t) nmatches; + } + + status = TclReExec(®expPtr->re, wString, (size_t) numChars, + ®expPtr->details, nm, regexpPtr->matches, flags); + + /* + * Check for errors. + */ + + if (status != REG_OKAY) { + if (status == REG_NOMATCH) { + return 0; + } + if (interp != NULL) { + TclRegError(interp, "error while matching regular expression: ", + status); + } + return -1; + } + return 1; +} + +/* + *--------------------------------------------------------------------------- + * + * TclRegExpRangeUniChar -- + * + * Returns pointers describing the range of a regular expression match, + * or one of the subranges within the match, or the hypothetical range + * represented by the rm_extend field of the rm_detail_t. + * + * Results: + * The variables at *startPtr and *endPtr are modified to hold the + * offsets of the endpoints of the range given by index. If the + * specified range doesn't exist then -1s are supplied. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +void +TclRegExpRangeUniChar(re, index, startPtr, endPtr) + Tcl_RegExp re; /* Compiled regular expression that has + * been passed to Tcl_RegExpExec. */ + int index; /* 0 means give the range of the entire + * match, > 0 means give the range of + * a matching subrange, -1 means the + * range of the rm_extend field. */ + int *startPtr; /* Store address of first character in + * (sub-) range here. */ + int *endPtr; /* Store address of character just after last + * in (sub-) range here. */ +{ + TclRegexp *regexpPtr = (TclRegexp *) re; + + if ((regexpPtr->flags®_EXPECT) && index == -1) { + *startPtr = regexpPtr->details.rm_extend.rm_so; + *endPtr = regexpPtr->details.rm_extend.rm_eo; + } else if ((size_t) index > regexpPtr->re.re_nsub) { + *startPtr = -1; + *endPtr = -1; + } else { + *startPtr = regexpPtr->matches[index].rm_so; + *endPtr = regexpPtr->matches[index].rm_eo; + } +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_RegExpMatch -- + * + * See if a string matches a regular expression. + * + * Results: + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if "string" matches "pattern" + * and 0 otherwise. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +EXPORT_C int +Tcl_RegExpMatch(interp, string, pattern) + Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ + CONST char *string; /* String. */ + CONST char *pattern; /* Regular expression to match against + * string. */ +{ + Tcl_RegExp re; + + re = Tcl_RegExpCompile(interp, pattern); + if (re == NULL) { + return -1; + } + return Tcl_RegExpExec(interp, re, string, string); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_RegExpExecObj -- + * + * Execute a precompiled regexp against the given object. + * + * Results: + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if "string" matches "pattern" + * and 0 otherwise. + * + * Side effects: + * Converts the object to a Unicode object. + * + *---------------------------------------------------------------------- + */ + +EXPORT_C int +Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags) + Tcl_Interp *interp; /* Interpreter to use for error reporting. */ + Tcl_RegExp re; /* Compiled regular expression; must have + * been returned by previous call to + * Tcl_GetRegExpFromObj. */ + Tcl_Obj *objPtr; /* String against which to match re. */ + int offset; /* Character index that marks where matching + * should begin. */ + int nmatches; /* How many subexpression matches (counting + * the whole match as subexpression 0) are + * of interest. -1 means all of them. */ + int flags; /* Regular expression execution flags. */ +{ + TclRegexp *regexpPtr = (TclRegexp *) re; + Tcl_UniChar *udata; + int length; + + /* + * Save the target object so we can extract strings from it later. + */ + + regexpPtr->string = NULL; + regexpPtr->objPtr = objPtr; + + udata = Tcl_GetUnicodeFromObj(objPtr, &length); + + if (offset > length) { + offset = length; + } + udata += offset; + length -= offset; + + return RegExpExecUniChar(interp, re, udata, length, nmatches, flags); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_RegExpMatchObj -- + * + * See if an object matches a regular expression. + * + * Results: + * If an error occurs during the matching operation then -1 + * is returned and the interp's result contains an error message. + * Otherwise the return value is 1 if "string" matches "pattern" + * and 0 otherwise. + * + * Side effects: + * Changes the internal rep of the pattern and string objects. + * + *---------------------------------------------------------------------- + */ + +EXPORT_C int +Tcl_RegExpMatchObj(interp, stringObj, patternObj) + Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ + Tcl_Obj *stringObj; /* Object containing the String to search. */ + Tcl_Obj *patternObj; /* Regular expression to match against + * string. */ +{ + Tcl_RegExp re; + + re = Tcl_GetRegExpFromObj(interp, patternObj, + TCL_REG_ADVANCED | TCL_REG_NOSUB); + if (re == NULL) { + return -1; + } + return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */, + 0 /* nmatches */, 0 /* flags */); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_RegExpGetInfo -- + * + * Retrieve information about the current match. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +EXPORT_C void +Tcl_RegExpGetInfo(regexp, infoPtr) + Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */ + Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */ +{ + TclRegexp *regexpPtr = (TclRegexp *) regexp; + + infoPtr->nsubs = regexpPtr->re.re_nsub; + infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches; + infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetRegExpFromObj -- + * + * Compile a regular expression into a form suitable for fast + * matching. This procedure caches the result in a Tcl_Obj. + * + * Results: + * The return value is a pointer to the compiled form of string, + * suitable for passing to Tcl_RegExpExec. If an error occurred + * while compiling the pattern, then NULL is returned and an error + * message is left in the interp's result. + * + * Side effects: + * Updates the native rep of the Tcl_Obj. + * + *---------------------------------------------------------------------- + */ + +EXPORT_C Tcl_RegExp +Tcl_GetRegExpFromObj(interp, objPtr, flags) + Tcl_Interp *interp; /* For use in error reporting, and to access + * the interp regexp cache. */ + Tcl_Obj *objPtr; /* Object whose string rep contains regular + * expression pattern. Internal rep will be + * changed to compiled form of this regular + * expression. */ + int flags; /* Regular expression compilation flags. */ +{ + int length; + Tcl_ObjType *typePtr; + TclRegexp *regexpPtr; + char *pattern; + + typePtr = objPtr->typePtr; + regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; + + if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) { + pattern = Tcl_GetStringFromObj(objPtr, &length); + + regexpPtr = CompileRegexp(interp, pattern, length, flags); + if (regexpPtr == NULL) { + return NULL; + } + + /* + * Add a reference to the regexp so it will persist even if it is + * pushed out of the current thread's regexp cache. This reference + * will be removed when the object's internal rep is freed. + */ + + regexpPtr->refCount++; + + /* + * Free the old representation and set our type. + */ + + if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) { + (*typePtr->freeIntRepProc)(objPtr); + } + objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr; + objPtr->typePtr = &tclRegexpType; + } + return (Tcl_RegExp) regexpPtr; +} + +/* + *---------------------------------------------------------------------- + * + * TclRegAbout -- + * + * Return information about a compiled regular expression. + * + * Results: + * The return value is -1 for failure, 0 for success, although at + * the moment there's nothing that could fail. On success, a list + * is left in the interp's result: first element is the subexpression + * count, second is a list of re_info bit names. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclRegAbout(interp, re) + Tcl_Interp *interp; /* For use in variable assignment. */ + Tcl_RegExp re; /* The compiled regular expression. */ +{ + TclRegexp *regexpPtr = (TclRegexp *)re; + char buf[TCL_INTEGER_SPACE]; + static struct infoname { + int bit; + char *text; + } infonames[] = { + {REG_UBACKREF, "REG_UBACKREF"}, + {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"}, + {REG_UBOUNDS, "REG_UBOUNDS"}, + {REG_UBRACES, "REG_UBRACES"}, + {REG_UBSALNUM, "REG_UBSALNUM"}, + {REG_UPBOTCH, "REG_UPBOTCH"}, + {REG_UBBS, "REG_UBBS"}, + {REG_UNONPOSIX, "REG_UNONPOSIX"}, + {REG_UUNSPEC, "REG_UUNSPEC"}, + {REG_UUNPORT, "REG_UUNPORT"}, + {REG_ULOCALE, "REG_ULOCALE"}, + {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"}, + {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"}, + {REG_USHORTEST, "REG_USHORTEST"}, + {0, ""} + }; + struct infoname *inf; + int n; + + Tcl_ResetResult(interp); + + sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub)); + Tcl_AppendElement(interp, buf); + + /* + * Must count bits before generating list, because we must know + * whether {} are needed before we start appending names. + */ + n = 0; + for (inf = infonames; inf->bit != 0; inf++) { + if (regexpPtr->re.re_info&inf->bit) { + n++; + } + } + if (n != 1) { + Tcl_AppendResult(interp, " {", NULL); + } + for (inf = infonames; inf->bit != 0; inf++) { + if (regexpPtr->re.re_info&inf->bit) { + Tcl_AppendElement(interp, inf->text); + } + } + if (n != 1) { + Tcl_AppendResult(interp, "}", NULL); + } + + return 0; +} + +/* + *---------------------------------------------------------------------- + * + * TclRegError -- + * + * Generate an error message based on the regexp status code. + * + * Results: + * Places an error in the interpreter. + * + * Side effects: + * Sets errorCode as well. + * + *---------------------------------------------------------------------- + */ + +void +TclRegError(interp, msg, status) + Tcl_Interp *interp; /* Interpreter for error reporting. */ + CONST char *msg; /* Message to prepend to error. */ + int status; /* Status code to report. */ +{ + char buf[100]; /* ample in practice */ + char cbuf[100]; /* lots in practice */ + size_t n; + char *p; + + Tcl_ResetResult(interp); + n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf)); + p = (n > sizeof(buf)) ? "..." : ""; + Tcl_AppendResult(interp, msg, buf, p, NULL); + + sprintf(cbuf, "%d", status); + (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf)); + Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL); +} + + +/* + *---------------------------------------------------------------------- + * + * FreeRegexpInternalRep -- + * + * Deallocate the storage associated with a regexp object's internal + * representation. + * + * Results: + * None. + * + * Side effects: + * Frees the compiled regular expression. + * + *---------------------------------------------------------------------- + */ + +static void +FreeRegexpInternalRep(objPtr) + Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */ +{ + TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; + + /* + * If this is the last reference to the regexp, free it. + */ + + if (--(regexpRepPtr->refCount) <= 0) { + FreeRegexp(regexpRepPtr); + } +} + +/* + *---------------------------------------------------------------------- + * + * DupRegexpInternalRep -- + * + * We copy the reference to the compiled regexp and bump its + * reference count. + * + * Results: + * None. + * + * Side effects: + * Increments the reference count of the regexp. + * + *---------------------------------------------------------------------- + */ + +static void +DupRegexpInternalRep(srcPtr, copyPtr) + Tcl_Obj *srcPtr; /* Object with internal rep to copy. */ + Tcl_Obj *copyPtr; /* Object with internal rep to set. */ +{ + TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr; + regexpPtr->refCount++; + copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr; + copyPtr->typePtr = &tclRegexpType; +} + +/* + *---------------------------------------------------------------------- + * + * SetRegexpFromAny -- + * + * Attempt to generate a compiled regular expression for the Tcl object + * "objPtr". + * + * Results: + * The return value is TCL_OK or TCL_ERROR. If an error occurs during + * conversion, an error message is left in the interpreter's result + * unless "interp" is NULL. + * + * Side effects: + * If no error occurs, a regular expression is stored as "objPtr"s + * internal representation. + * + *---------------------------------------------------------------------- + */ + +static int +SetRegexpFromAny(interp, objPtr) + Tcl_Interp *interp; /* Used for error reporting if not NULL. */ + Tcl_Obj *objPtr; /* The object to convert. */ +{ + if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) { + return TCL_ERROR; + } + return TCL_OK; +} + +/* + *--------------------------------------------------------------------------- + * + * CompileRegexp -- + * + * Attempt to compile the given regexp pattern. If the compiled + * regular expression can be found in the per-thread cache, it + * will be used instead of compiling a new copy. + * + * Results: + * The return value is a pointer to a newly allocated TclRegexp + * that represents the compiled pattern, or NULL if the pattern + * could not be compiled. If NULL is returned, an error message is + * left in the interp's result. + * + * Side effects: + * The thread-local regexp cache is updated and a new TclRegexp may + * be allocated. + * + *---------------------------------------------------------------------- + */ + +static TclRegexp * +CompileRegexp(interp, string, length, flags) + Tcl_Interp *interp; /* Used for error reporting if not NULL. */ + CONST char *string; /* The regexp to compile (UTF-8). */ + int length; /* The length of the string in bytes. */ + int flags; /* Compilation flags. */ +{ + TclRegexp *regexpPtr; + CONST Tcl_UniChar *uniString; + int numChars; + Tcl_DString stringBuf; + int status, i; + ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); + + if (!tsdPtr->initialized) { + tsdPtr->initialized = 1; + Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL); + } + + /* + * This routine maintains a second-level regular expression cache in + * addition to the per-object regexp cache. The per-thread cache is needed + * to handle the case where for various reasons the object is lost between + * invocations of the regexp command, but the literal pattern is the same. + */ + + /* + * Check the per-thread compiled regexp cache. We can only reuse + * a regexp if it has the same pattern and the same flags. + */ + + for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { + if ((length == tsdPtr->patLengths[i]) + && (tsdPtr->regexps[i]->flags == flags) + && (strcmp(string, tsdPtr->patterns[i]) == 0)) { + /* + * Move the matched pattern to the first slot in the + * cache and shift the other patterns down one position. + */ + + if (i != 0) { + int j; + char *cachedString; + + cachedString = tsdPtr->patterns[i]; + regexpPtr = tsdPtr->regexps[i]; + for (j = i-1; j >= 0; j--) { + tsdPtr->patterns[j+1] = tsdPtr->patterns[j]; + tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j]; + tsdPtr->regexps[j+1] = tsdPtr->regexps[j]; + } + tsdPtr->patterns[0] = cachedString; + tsdPtr->patLengths[0] = length; + tsdPtr->regexps[0] = regexpPtr; + } + return tsdPtr->regexps[0]; + } + } + + /* + * This is a new expression, so compile it and add it to the cache. + */ + + regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp)); + regexpPtr->objPtr = NULL; + regexpPtr->string = NULL; + regexpPtr->details.rm_extend.rm_so = -1; + regexpPtr->details.rm_extend.rm_eo = -1; + + /* + * Get the up-to-date string representation and map to unicode. + */ + + Tcl_DStringInit(&stringBuf); + uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf); + numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar); + + /* + * Compile the string and check for errors. + */ + + regexpPtr->flags = flags; + status = TclReComp(®expPtr->re, uniString, (size_t) numChars, flags); + Tcl_DStringFree(&stringBuf); + + if (status != REG_OKAY) { + /* + * Clean up and report errors in the interpreter, if possible. + */ + + ckfree((char *)regexpPtr); + if (interp) { + TclRegError(interp, + "couldn't compile regular expression pattern: ", + status); + } + return NULL; + } + + /* + * Allocate enough space for all of the subexpressions, plus one + * extra for the entire pattern. + */ + + regexpPtr->matches = (regmatch_t *) ckalloc( + sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1)); + + /* + * Initialize the refcount to one initially, since it is in the cache. + */ + + regexpPtr->refCount = 1; + + /* + * Free the last regexp, if necessary, and make room at the head of the + * list for the new regexp. + */ + + if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) { + TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1]; + if (--(oldRegexpPtr->refCount) <= 0) { + FreeRegexp(oldRegexpPtr); + } + ckfree(tsdPtr->patterns[NUM_REGEXPS-1]); + } + for (i = NUM_REGEXPS - 2; i >= 0; i--) { + tsdPtr->patterns[i+1] = tsdPtr->patterns[i]; + tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i]; + tsdPtr->regexps[i+1] = tsdPtr->regexps[i]; + } + tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1)); + strcpy(tsdPtr->patterns[0], string); + tsdPtr->patLengths[0] = length; + tsdPtr->regexps[0] = regexpPtr; + + return regexpPtr; +} + +/* + *---------------------------------------------------------------------- + * + * FreeRegexp -- + * + * Release the storage associated with a TclRegexp. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static void +FreeRegexp(regexpPtr) + TclRegexp *regexpPtr; /* Compiled regular expression to free. */ +{ + TclReFree(®expPtr->re); + if (regexpPtr->matches) { + ckfree((char *) regexpPtr->matches); + } + ckfree((char *) regexpPtr); +} + +/* + *---------------------------------------------------------------------- + * + * FinalizeRegexp -- + * + * Release the storage associated with the per-thread regexp + * cache. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static void +FinalizeRegexp(clientData) + ClientData clientData; /* Not used. */ +{ + int i; + TclRegexp *regexpPtr; + ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); + + for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { + regexpPtr = tsdPtr->regexps[i]; + if (--(regexpPtr->refCount) <= 0) { + FreeRegexp(regexpPtr); + } + ckfree(tsdPtr->patterns[i]); + tsdPtr->patterns[i] = NULL; + } + /* + * We may find ourselves reinitialized if another finalization routine + * invokes regexps. + */ + tsdPtr->initialized = 0; +}