os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclRegexp.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclRegexp.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,1041 @@
     1.4 +/* 
     1.5 + * tclRegexp.c --
     1.6 + *
     1.7 + *	This file contains the public interfaces to the Tcl regular
     1.8 + *	expression mechanism.
     1.9 + *
    1.10 + * Copyright (c) 1998 by Sun Microsystems, Inc.
    1.11 + * Copyright (c) 1998-1999 by Scriptics Corporation.
    1.12 + * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved.  
    1.13 + *
    1.14 + * See the file "license.terms" for information on usage and redistribution
    1.15 + * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
    1.16 + *
    1.17 + * RCS: @(#) $Id: tclRegexp.c,v 1.14.4.2 2006/04/07 01:14:28 hobbs Exp $
    1.18 + */
    1.19 +
    1.20 +#include "tclInt.h"
    1.21 +#include "tclPort.h"
    1.22 +#include "tclRegexp.h"
    1.23 +#if defined(__SYMBIAN32__) && defined(__WINSCW__)
    1.24 +#include "tclSymbianGlobals.h"
    1.25 +#define dataKey getdataKey(6)
    1.26 +#endif 
    1.27 +
    1.28 +/*
    1.29 + *----------------------------------------------------------------------
    1.30 + * The routines in this file use Henry Spencer's regular expression
    1.31 + * package contained in the following additional source files:
    1.32 + *
    1.33 + *	regc_color.c	regc_cvec.c	regc_lex.c
    1.34 + *	regc_nfa.c	regcomp.c	regcustom.h
    1.35 + *	rege_dfa.c	regerror.c	regerrs.h
    1.36 + *	regex.h		regexec.c	regfree.c
    1.37 + *	regfronts.c	regguts.h
    1.38 + *
    1.39 + * Copyright (c) 1998 Henry Spencer.  All rights reserved.
    1.40 + * 
    1.41 + * Development of this software was funded, in part, by Cray Research Inc.,
    1.42 + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
    1.43 + * Corporation, none of whom are responsible for the results.  The author
    1.44 + * thanks all of them. 
    1.45 + * 
    1.46 + * Redistribution and use in source and binary forms -- with or without
    1.47 + * modification -- are permitted for any purpose, provided that
    1.48 + * redistributions in source form retain this entire copyright notice and
    1.49 + * indicate the origin and nature of any modifications.
    1.50 + * 
    1.51 + * I'd appreciate being given credit for this package in the documentation
    1.52 + * of software which uses it, but that is not a requirement.
    1.53 + * 
    1.54 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
    1.55 + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    1.56 + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
    1.57 + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    1.58 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    1.59 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    1.60 + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    1.61 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
    1.62 + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
    1.63 + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.64 + *
    1.65 + * *** NOTE: this code has been altered slightly for use in Tcl: ***
    1.66 + * *** 1. Names have been changed, e.g. from re_comp to		 ***
    1.67 + * ***    TclRegComp, to avoid clashes with other 		 ***
    1.68 + * ***    regexp implementations used by applications. 		 ***
    1.69 + */
    1.70 +
    1.71 +/*
    1.72 + * Thread local storage used to maintain a per-thread cache of compiled
    1.73 + * regular expressions.
    1.74 + */
    1.75 +
    1.76 +#define NUM_REGEXPS 30
    1.77 +
    1.78 +typedef struct ThreadSpecificData {
    1.79 +    int initialized;		/* Set to 1 when the module is initialized. */
    1.80 +    char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
    1.81 +				 * regular expression patterns.	 NULL
    1.82 +				 * means that this slot isn't used.
    1.83 +				 * Malloc-ed. */
    1.84 +    int patLengths[NUM_REGEXPS];/* Number of non-null characters in
    1.85 +				 * corresponding entry in patterns.
    1.86 +				 * -1 means entry isn't used. */
    1.87 +    struct TclRegexp *regexps[NUM_REGEXPS];
    1.88 +				/* Compiled forms of above strings.  Also
    1.89 +				 * malloc-ed, or NULL if not in use yet. */
    1.90 +} ThreadSpecificData;
    1.91 +
    1.92 +#if !defined(__SYMBIAN32__) || !defined(__WINSCW__)
    1.93 +static Tcl_ThreadDataKey dataKey;
    1.94 +#endif
    1.95 +
    1.96 +/*
    1.97 + * Declarations for functions used only in this file.
    1.98 + */
    1.99 +
   1.100 +static TclRegexp *	CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
   1.101 +			    CONST char *pattern, int length, int flags));
   1.102 +static void		DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
   1.103 +			    Tcl_Obj *copyPtr));
   1.104 +static void		FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
   1.105 +static void		FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
   1.106 +static void		FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
   1.107 +static int		RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
   1.108 +			    Tcl_RegExp re, CONST Tcl_UniChar *uniString,
   1.109 +			    int numChars, int nmatches, int flags));
   1.110 +static int		SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
   1.111 +			    Tcl_Obj *objPtr));
   1.112 +
   1.113 +/*
   1.114 + * The regular expression Tcl object type.  This serves as a cache
   1.115 + * of the compiled form of the regular expression.
   1.116 + */
   1.117 +
   1.118 +static Tcl_ObjType tclRegexpType = {
   1.119 +    "regexp",				/* name */
   1.120 +    FreeRegexpInternalRep,		/* freeIntRepProc */
   1.121 +    DupRegexpInternalRep,		/* dupIntRepProc */
   1.122 +    NULL,				/* updateStringProc */
   1.123 +    SetRegexpFromAny			/* setFromAnyProc */
   1.124 +};
   1.125 +
   1.126 +
   1.127 +/*
   1.128 + *----------------------------------------------------------------------
   1.129 + *
   1.130 + * Tcl_RegExpCompile --
   1.131 + *
   1.132 + *	Compile a regular expression into a form suitable for fast
   1.133 + *	matching.  This procedure is DEPRECATED in favor of the
   1.134 + *	object version of the command.
   1.135 + *
   1.136 + * Results:
   1.137 + *	The return value is a pointer to the compiled form of string,
   1.138 + *	suitable for passing to Tcl_RegExpExec.  This compiled form
   1.139 + *	is only valid up until the next call to this procedure, so
   1.140 + *	don't keep these around for a long time!  If an error occurred
   1.141 + *	while compiling the pattern, then NULL is returned and an error
   1.142 + *	message is left in the interp's result.
   1.143 + *
   1.144 + * Side effects:
   1.145 + *	Updates the cache of compiled regexps.
   1.146 + *
   1.147 + *----------------------------------------------------------------------
   1.148 + */
   1.149 +
   1.150 +EXPORT_C Tcl_RegExp
   1.151 +Tcl_RegExpCompile(interp, string)
   1.152 +    Tcl_Interp *interp;		/* For use in error reporting and
   1.153 +				 * to access the interp regexp cache. */
   1.154 +    CONST char *string;		/* String for which to produce
   1.155 +				 * compiled regular expression. */
   1.156 +{
   1.157 +    return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
   1.158 +	    REG_ADVANCED);
   1.159 +}
   1.160 +
   1.161 +/*
   1.162 + *----------------------------------------------------------------------
   1.163 + *
   1.164 + * Tcl_RegExpExec --
   1.165 + *
   1.166 + *	Execute the regular expression matcher using a compiled form
   1.167 + *	of a regular expression and save information about any match
   1.168 + *	that is found.
   1.169 + *
   1.170 + * Results:
   1.171 + *	If an error occurs during the matching operation then -1
   1.172 + *	is returned and the interp's result contains an error message.
   1.173 + *	Otherwise the return value is 1 if a matching range is
   1.174 + *	found and 0 if there is no matching range.
   1.175 + *
   1.176 + * Side effects:
   1.177 + *	None.
   1.178 + *
   1.179 + *----------------------------------------------------------------------
   1.180 + */
   1.181 +
   1.182 +EXPORT_C int
   1.183 +Tcl_RegExpExec(interp, re, string, start)
   1.184 +    Tcl_Interp *interp;		/* Interpreter to use for error reporting. */
   1.185 +    Tcl_RegExp re;		/* Compiled regular expression;  must have
   1.186 +				 * been returned by previous call to
   1.187 +				 * Tcl_GetRegExpFromObj. */
   1.188 +    CONST char *string;		/* String against which to match re. */
   1.189 +    CONST char *start;		/* If string is part of a larger string,
   1.190 +				 * this identifies beginning of larger
   1.191 +				 * string, so that "^" won't match. */
   1.192 +{
   1.193 +    int flags, result, numChars;
   1.194 +    TclRegexp *regexp = (TclRegexp *)re;
   1.195 +    Tcl_DString ds;
   1.196 +    CONST Tcl_UniChar *ustr;
   1.197 +
   1.198 +    /*
   1.199 +     * If the starting point is offset from the beginning of the buffer,
   1.200 +     * then we need to tell the regexp engine not to match "^".
   1.201 +     */
   1.202 +
   1.203 +    if (string > start) {
   1.204 +	flags = REG_NOTBOL;
   1.205 +    } else {
   1.206 +	flags = 0;
   1.207 +    }
   1.208 +
   1.209 +    /*
   1.210 +     * Remember the string for use by Tcl_RegExpRange().
   1.211 +     */
   1.212 +
   1.213 +    regexp->string = string;
   1.214 +    regexp->objPtr = NULL;
   1.215 +
   1.216 +    /*
   1.217 +     * Convert the string to Unicode and perform the match.
   1.218 +     */
   1.219 +
   1.220 +    Tcl_DStringInit(&ds);
   1.221 +    ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
   1.222 +    numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
   1.223 +    result = RegExpExecUniChar(interp, re, ustr, numChars,
   1.224 +	    -1 /* nmatches */, flags);
   1.225 +    Tcl_DStringFree(&ds);
   1.226 +
   1.227 +    return result;
   1.228 +}
   1.229 +
   1.230 +/*
   1.231 + *---------------------------------------------------------------------------
   1.232 + *
   1.233 + * Tcl_RegExpRange --
   1.234 + *
   1.235 + *	Returns pointers describing the range of a regular expression match,
   1.236 + *	or one of the subranges within the match.
   1.237 + *
   1.238 + * Results:
   1.239 + *	The variables at *startPtr and *endPtr are modified to hold the
   1.240 + *	addresses of the endpoints of the range given by index.  If the
   1.241 + *	specified range doesn't exist then NULLs are returned.
   1.242 + *
   1.243 + * Side effects:
   1.244 + *	None.
   1.245 + *
   1.246 + *---------------------------------------------------------------------------
   1.247 + */
   1.248 +
   1.249 +EXPORT_C void
   1.250 +Tcl_RegExpRange(re, index, startPtr, endPtr)
   1.251 +    Tcl_RegExp re;		/* Compiled regular expression that has
   1.252 +				 * been passed to Tcl_RegExpExec. */
   1.253 +    int index;			/* 0 means give the range of the entire
   1.254 +				 * match, > 0 means give the range of
   1.255 +				 * a matching subrange. */
   1.256 +    CONST char **startPtr;	/* Store address of first character in
   1.257 +				 * (sub-) range here. */
   1.258 +    CONST char **endPtr;	/* Store address of character just after last
   1.259 +				 * in (sub-) range here. */
   1.260 +{
   1.261 +    TclRegexp *regexpPtr = (TclRegexp *) re;
   1.262 +    CONST char *string;
   1.263 +
   1.264 +    if ((size_t) index > regexpPtr->re.re_nsub) {
   1.265 +	*startPtr = *endPtr = NULL;
   1.266 +    } else if (regexpPtr->matches[index].rm_so < 0) {
   1.267 +	*startPtr = *endPtr = NULL;
   1.268 +    } else {
   1.269 +	if (regexpPtr->objPtr) {
   1.270 +	    string = Tcl_GetString(regexpPtr->objPtr);
   1.271 +	} else {
   1.272 +	    string = regexpPtr->string;
   1.273 +	}
   1.274 +	*startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
   1.275 +	*endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
   1.276 +    }
   1.277 +}
   1.278 +
   1.279 +/*
   1.280 + *---------------------------------------------------------------------------
   1.281 + *
   1.282 + * RegExpExecUniChar --
   1.283 + *
   1.284 + *	Execute the regular expression matcher using a compiled form of a
   1.285 + *	regular expression and save information about any match that is
   1.286 + *	found.
   1.287 + *
   1.288 + * Results:
   1.289 + *	If an error occurs during the matching operation then -1 is
   1.290 + *	returned and an error message is left in interp's result.
   1.291 + *	Otherwise the return value is 1 if a matching range was found or
   1.292 + *	0 if there was no matching range.
   1.293 + *
   1.294 + * Side effects:
   1.295 + *	None.
   1.296 + *
   1.297 + *----------------------------------------------------------------------
   1.298 + */
   1.299 +
   1.300 +static int
   1.301 +RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
   1.302 +    Tcl_Interp *interp;		/* Interpreter to use for error reporting. */
   1.303 +    Tcl_RegExp re;		/* Compiled regular expression; returned by
   1.304 +				 * a previous call to Tcl_GetRegExpFromObj */
   1.305 +    CONST Tcl_UniChar *wString;	/* String against which to match re. */
   1.306 +    int numChars;		/* Length of Tcl_UniChar string (must
   1.307 +				 * be >= 0). */
   1.308 +    int nmatches;		/* How many subexpression matches (counting
   1.309 +				 * the whole match as subexpression 0) are
   1.310 +				 * of interest.  -1 means "don't know". */
   1.311 +    int flags;			/* Regular expression flags. */
   1.312 +{
   1.313 +    int status;
   1.314 +    TclRegexp *regexpPtr = (TclRegexp *) re;
   1.315 +    size_t last = regexpPtr->re.re_nsub + 1;
   1.316 +    size_t nm = last;
   1.317 +
   1.318 +    if (nmatches >= 0 && (size_t) nmatches < nm) {
   1.319 +	nm = (size_t) nmatches;
   1.320 +    }
   1.321 +
   1.322 +    status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
   1.323 +	    &regexpPtr->details, nm, regexpPtr->matches, flags);
   1.324 +
   1.325 +    /*
   1.326 +     * Check for errors.
   1.327 +     */
   1.328 +
   1.329 +    if (status != REG_OKAY) {
   1.330 +	if (status == REG_NOMATCH) {
   1.331 +	    return 0;
   1.332 +	}
   1.333 +	if (interp != NULL) {
   1.334 +	    TclRegError(interp, "error while matching regular expression: ",
   1.335 +		    status);
   1.336 +	}
   1.337 +	return -1;
   1.338 +    }
   1.339 +    return 1;
   1.340 +}
   1.341 +
   1.342 +/*
   1.343 + *---------------------------------------------------------------------------
   1.344 + *
   1.345 + * TclRegExpRangeUniChar --
   1.346 + *
   1.347 + *	Returns pointers describing the range of a regular expression match,
   1.348 + *	or one of the subranges within the match, or the hypothetical range
   1.349 + *	represented by the rm_extend field of the rm_detail_t.
   1.350 + *
   1.351 + * Results:
   1.352 + *	The variables at *startPtr and *endPtr are modified to hold the
   1.353 + *	offsets of the endpoints of the range given by index.  If the
   1.354 + *	specified range doesn't exist then -1s are supplied.
   1.355 + *
   1.356 + * Side effects:
   1.357 + *	None.
   1.358 + *
   1.359 + *---------------------------------------------------------------------------
   1.360 + */
   1.361 +
   1.362 +void
   1.363 +TclRegExpRangeUniChar(re, index, startPtr, endPtr)
   1.364 +    Tcl_RegExp re;		/* Compiled regular expression that has
   1.365 +				 * been passed to Tcl_RegExpExec. */
   1.366 +    int index;			/* 0 means give the range of the entire
   1.367 +				 * match, > 0 means give the range of
   1.368 +				 * a matching subrange, -1 means the
   1.369 +				 * range of the rm_extend field. */
   1.370 +    int *startPtr;		/* Store address of first character in
   1.371 +				 * (sub-) range here. */
   1.372 +    int *endPtr;		/* Store address of character just after last
   1.373 +				 * in (sub-) range here. */
   1.374 +{
   1.375 +    TclRegexp *regexpPtr = (TclRegexp *) re;
   1.376 +
   1.377 +    if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
   1.378 +	*startPtr = regexpPtr->details.rm_extend.rm_so;
   1.379 +	*endPtr = regexpPtr->details.rm_extend.rm_eo;
   1.380 +    } else if ((size_t) index > regexpPtr->re.re_nsub) {
   1.381 +	*startPtr = -1;
   1.382 +	*endPtr = -1;
   1.383 +    } else {
   1.384 +	*startPtr = regexpPtr->matches[index].rm_so;
   1.385 +	*endPtr = regexpPtr->matches[index].rm_eo;
   1.386 +    }
   1.387 +}
   1.388 +
   1.389 +/*
   1.390 + *----------------------------------------------------------------------
   1.391 + *
   1.392 + * Tcl_RegExpMatch --
   1.393 + *
   1.394 + *	See if a string matches a regular expression.
   1.395 + *
   1.396 + * Results:
   1.397 + *	If an error occurs during the matching operation then -1
   1.398 + *	is returned and the interp's result contains an error message.
   1.399 + *	Otherwise the return value is 1 if "string" matches "pattern"
   1.400 + *	and 0 otherwise.
   1.401 + *
   1.402 + * Side effects:
   1.403 + *	None.
   1.404 + *
   1.405 + *----------------------------------------------------------------------
   1.406 + */
   1.407 +
   1.408 +EXPORT_C int
   1.409 +Tcl_RegExpMatch(interp, string, pattern)
   1.410 +    Tcl_Interp *interp;		/* Used for error reporting. May be NULL. */
   1.411 +    CONST char *string;		/* String. */
   1.412 +    CONST char *pattern;	/* Regular expression to match against
   1.413 +				 * string. */
   1.414 +{
   1.415 +    Tcl_RegExp re;
   1.416 +
   1.417 +    re = Tcl_RegExpCompile(interp, pattern);
   1.418 +    if (re == NULL) {
   1.419 +	return -1;
   1.420 +    }
   1.421 +    return Tcl_RegExpExec(interp, re, string, string);
   1.422 +}
   1.423 +
   1.424 +/*
   1.425 + *----------------------------------------------------------------------
   1.426 + *
   1.427 + * Tcl_RegExpExecObj --
   1.428 + *
   1.429 + *	Execute a precompiled regexp against the given object.
   1.430 + *
   1.431 + * Results:
   1.432 + *	If an error occurs during the matching operation then -1
   1.433 + *	is returned and the interp's result contains an error message.
   1.434 + *	Otherwise the return value is 1 if "string" matches "pattern"
   1.435 + *	and 0 otherwise.
   1.436 + *
   1.437 + * Side effects:
   1.438 + *	Converts the object to a Unicode object.
   1.439 + *
   1.440 + *----------------------------------------------------------------------
   1.441 + */
   1.442 +
   1.443 +EXPORT_C int
   1.444 +Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
   1.445 +    Tcl_Interp *interp;		/* Interpreter to use for error reporting. */
   1.446 +    Tcl_RegExp re;		/* Compiled regular expression;  must have
   1.447 +				 * been returned by previous call to
   1.448 +				 * Tcl_GetRegExpFromObj. */
   1.449 +    Tcl_Obj *objPtr;		/* String against which to match re. */
   1.450 +    int offset;			/* Character index that marks where matching
   1.451 +				 * should begin. */
   1.452 +    int nmatches;		/* How many subexpression matches (counting
   1.453 +				 * the whole match as subexpression 0) are
   1.454 +				 * of interest.  -1 means all of them. */
   1.455 +    int flags;			/* Regular expression execution flags. */
   1.456 +{
   1.457 +    TclRegexp *regexpPtr = (TclRegexp *) re;
   1.458 +    Tcl_UniChar *udata;
   1.459 +    int length;
   1.460 +
   1.461 +    /*
   1.462 +     * Save the target object so we can extract strings from it later.
   1.463 +     */
   1.464 +
   1.465 +    regexpPtr->string = NULL;
   1.466 +    regexpPtr->objPtr = objPtr;
   1.467 +
   1.468 +    udata = Tcl_GetUnicodeFromObj(objPtr, &length);
   1.469 +
   1.470 +    if (offset > length) {
   1.471 +	offset = length;
   1.472 +    }
   1.473 +    udata += offset;
   1.474 +    length -= offset;
   1.475 +    
   1.476 +    return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
   1.477 +}
   1.478 +
   1.479 +/*
   1.480 + *----------------------------------------------------------------------
   1.481 + *
   1.482 + * Tcl_RegExpMatchObj --
   1.483 + *
   1.484 + *	See if an object matches a regular expression.
   1.485 + *
   1.486 + * Results:
   1.487 + *	If an error occurs during the matching operation then -1
   1.488 + *	is returned and the interp's result contains an error message.
   1.489 + *	Otherwise the return value is 1 if "string" matches "pattern"
   1.490 + *	and 0 otherwise.
   1.491 + *
   1.492 + * Side effects:
   1.493 + *	Changes the internal rep of the pattern and string objects.
   1.494 + *
   1.495 + *----------------------------------------------------------------------
   1.496 + */
   1.497 +
   1.498 +EXPORT_C int
   1.499 +Tcl_RegExpMatchObj(interp, stringObj, patternObj)
   1.500 +    Tcl_Interp *interp;		/* Used for error reporting. May be NULL. */
   1.501 +    Tcl_Obj *stringObj;		/* Object containing the String to search. */
   1.502 +    Tcl_Obj *patternObj;	/* Regular expression to match against
   1.503 +				 * string. */
   1.504 +{
   1.505 +    Tcl_RegExp re;
   1.506 +
   1.507 +    re = Tcl_GetRegExpFromObj(interp, patternObj,
   1.508 +	    TCL_REG_ADVANCED | TCL_REG_NOSUB);
   1.509 +    if (re == NULL) {
   1.510 +	return -1;
   1.511 +    }
   1.512 +    return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
   1.513 +	    0 /* nmatches */, 0 /* flags */);
   1.514 +}
   1.515 +
   1.516 +/*
   1.517 + *----------------------------------------------------------------------
   1.518 + *
   1.519 + * Tcl_RegExpGetInfo --
   1.520 + *
   1.521 + *	Retrieve information about the current match.
   1.522 + *
   1.523 + * Results:
   1.524 + *	None.
   1.525 + *
   1.526 + * Side effects:
   1.527 + *	None.
   1.528 + *
   1.529 + *----------------------------------------------------------------------
   1.530 + */
   1.531 +
   1.532 +EXPORT_C void
   1.533 +Tcl_RegExpGetInfo(regexp, infoPtr)
   1.534 +    Tcl_RegExp regexp;		/* Pattern from which to get subexpressions. */
   1.535 +    Tcl_RegExpInfo *infoPtr;	/* Match information is stored here.  */
   1.536 +{
   1.537 +    TclRegexp *regexpPtr = (TclRegexp *) regexp;
   1.538 +
   1.539 +    infoPtr->nsubs = regexpPtr->re.re_nsub;
   1.540 +    infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
   1.541 +    infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
   1.542 +}
   1.543 +
   1.544 +/*
   1.545 + *----------------------------------------------------------------------
   1.546 + *
   1.547 + * Tcl_GetRegExpFromObj --
   1.548 + *
   1.549 + *	Compile a regular expression into a form suitable for fast
   1.550 + *	matching.  This procedure caches the result in a Tcl_Obj.
   1.551 + *
   1.552 + * Results:
   1.553 + *	The return value is a pointer to the compiled form of string,
   1.554 + *	suitable for passing to Tcl_RegExpExec.  If an error occurred
   1.555 + *	while compiling the pattern, then NULL is returned and an error
   1.556 + *	message is left in the interp's result.
   1.557 + *
   1.558 + * Side effects:
   1.559 + *	Updates the native rep of the Tcl_Obj.
   1.560 + *
   1.561 + *----------------------------------------------------------------------
   1.562 + */
   1.563 +
   1.564 +EXPORT_C Tcl_RegExp
   1.565 +Tcl_GetRegExpFromObj(interp, objPtr, flags)
   1.566 +    Tcl_Interp *interp;		/* For use in error reporting, and to access
   1.567 +				 * the interp regexp cache. */
   1.568 +    Tcl_Obj *objPtr;		/* Object whose string rep contains regular
   1.569 +				 * expression pattern.  Internal rep will be
   1.570 +				 * changed to compiled form of this regular
   1.571 +				 * expression. */
   1.572 +    int flags;			/* Regular expression compilation flags. */
   1.573 +{
   1.574 +    int length;
   1.575 +    Tcl_ObjType *typePtr;
   1.576 +    TclRegexp *regexpPtr;
   1.577 +    char *pattern;
   1.578 +
   1.579 +    typePtr = objPtr->typePtr;
   1.580 +    regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
   1.581 +
   1.582 +    if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
   1.583 +	pattern = Tcl_GetStringFromObj(objPtr, &length);
   1.584 +
   1.585 +	regexpPtr = CompileRegexp(interp, pattern, length, flags);
   1.586 +	if (regexpPtr == NULL) {
   1.587 +	    return NULL;
   1.588 +	}
   1.589 +
   1.590 +	/*
   1.591 +	 * Add a reference to the regexp so it will persist even if it is
   1.592 +	 * pushed out of the current thread's regexp cache.  This reference
   1.593 +	 * will be removed when the object's internal rep is freed.
   1.594 +	 */
   1.595 +
   1.596 +	regexpPtr->refCount++;
   1.597 +
   1.598 +	/*
   1.599 +	 * Free the old representation and set our type.
   1.600 +	 */
   1.601 +
   1.602 +	if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
   1.603 +	    (*typePtr->freeIntRepProc)(objPtr);
   1.604 +	}
   1.605 +	objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
   1.606 +	objPtr->typePtr = &tclRegexpType;
   1.607 +    }
   1.608 +    return (Tcl_RegExp) regexpPtr;
   1.609 +}
   1.610 +
   1.611 +/*
   1.612 + *----------------------------------------------------------------------
   1.613 + *
   1.614 + * TclRegAbout --
   1.615 + *
   1.616 + *	Return information about a compiled regular expression.
   1.617 + *
   1.618 + * Results:
   1.619 + *	The return value is -1 for failure, 0 for success, although at
   1.620 + *	the moment there's nothing that could fail.  On success, a list
   1.621 + *	is left in the interp's result:  first element is the subexpression
   1.622 + *	count, second is a list of re_info bit names.
   1.623 + *
   1.624 + * Side effects:
   1.625 + *	None.
   1.626 + *
   1.627 + *----------------------------------------------------------------------
   1.628 + */
   1.629 +
   1.630 +int
   1.631 +TclRegAbout(interp, re)
   1.632 +    Tcl_Interp *interp;		/* For use in variable assignment. */
   1.633 +    Tcl_RegExp re;		/* The compiled regular expression. */
   1.634 +{
   1.635 +    TclRegexp *regexpPtr = (TclRegexp *)re;
   1.636 +    char buf[TCL_INTEGER_SPACE];
   1.637 +    static struct infoname {
   1.638 +	int bit;
   1.639 +	char *text;
   1.640 +    } infonames[] = {
   1.641 +	{REG_UBACKREF,		"REG_UBACKREF"},
   1.642 +	{REG_ULOOKAHEAD,	"REG_ULOOKAHEAD"},
   1.643 +	{REG_UBOUNDS,		"REG_UBOUNDS"},
   1.644 +	{REG_UBRACES,		"REG_UBRACES"},
   1.645 +	{REG_UBSALNUM,		"REG_UBSALNUM"},
   1.646 +	{REG_UPBOTCH,		"REG_UPBOTCH"},
   1.647 +	{REG_UBBS,		"REG_UBBS"},
   1.648 +	{REG_UNONPOSIX,		"REG_UNONPOSIX"},
   1.649 +	{REG_UUNSPEC,		"REG_UUNSPEC"},
   1.650 +	{REG_UUNPORT,		"REG_UUNPORT"},
   1.651 +	{REG_ULOCALE,		"REG_ULOCALE"},
   1.652 +	{REG_UEMPTYMATCH,	"REG_UEMPTYMATCH"},
   1.653 +	{REG_UIMPOSSIBLE,	"REG_UIMPOSSIBLE"},
   1.654 +	{REG_USHORTEST,		"REG_USHORTEST"},
   1.655 +	{0,			""}
   1.656 +    };
   1.657 +    struct infoname *inf;
   1.658 +    int n;
   1.659 +
   1.660 +    Tcl_ResetResult(interp);
   1.661 +
   1.662 +    sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
   1.663 +    Tcl_AppendElement(interp, buf);
   1.664 +
   1.665 +    /*
   1.666 +     * Must count bits before generating list, because we must know
   1.667 +     * whether {} are needed before we start appending names.
   1.668 +     */
   1.669 +    n = 0;
   1.670 +    for (inf = infonames; inf->bit != 0; inf++) {
   1.671 +	if (regexpPtr->re.re_info&inf->bit) {
   1.672 +	    n++;
   1.673 +	}
   1.674 +    }
   1.675 +    if (n != 1) {
   1.676 +	Tcl_AppendResult(interp, " {", NULL);
   1.677 +    }
   1.678 +    for (inf = infonames; inf->bit != 0; inf++) {
   1.679 +	if (regexpPtr->re.re_info&inf->bit) {
   1.680 +	    Tcl_AppendElement(interp, inf->text);
   1.681 +	}
   1.682 +    }
   1.683 +    if (n != 1) {
   1.684 +	Tcl_AppendResult(interp, "}", NULL);
   1.685 +    }
   1.686 +
   1.687 +    return 0;
   1.688 +}
   1.689 +
   1.690 +/*
   1.691 + *----------------------------------------------------------------------
   1.692 + *
   1.693 + * TclRegError --
   1.694 + *
   1.695 + *	Generate an error message based on the regexp status code.
   1.696 + *
   1.697 + * Results:
   1.698 + *	Places an error in the interpreter.
   1.699 + *
   1.700 + * Side effects:
   1.701 + *	Sets errorCode as well.
   1.702 + *
   1.703 + *----------------------------------------------------------------------
   1.704 + */
   1.705 +
   1.706 +void
   1.707 +TclRegError(interp, msg, status)
   1.708 +    Tcl_Interp *interp;		/* Interpreter for error reporting. */
   1.709 +    CONST char *msg;		/* Message to prepend to error. */
   1.710 +    int status;			/* Status code to report. */
   1.711 +{
   1.712 +    char buf[100];		/* ample in practice */
   1.713 +    char cbuf[100];		/* lots in practice */
   1.714 +    size_t n;
   1.715 +    char *p;
   1.716 +
   1.717 +    Tcl_ResetResult(interp);
   1.718 +    n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
   1.719 +    p = (n > sizeof(buf)) ? "..." : "";
   1.720 +    Tcl_AppendResult(interp, msg, buf, p, NULL);
   1.721 +
   1.722 +    sprintf(cbuf, "%d", status);
   1.723 +    (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
   1.724 +    Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
   1.725 +}
   1.726 +
   1.727 +
   1.728 +/*
   1.729 + *----------------------------------------------------------------------
   1.730 + *
   1.731 + * FreeRegexpInternalRep --
   1.732 + *
   1.733 + *	Deallocate the storage associated with a regexp object's internal
   1.734 + *	representation.
   1.735 + *
   1.736 + * Results:
   1.737 + *	None.
   1.738 + *
   1.739 + * Side effects:
   1.740 + *	Frees the compiled regular expression.
   1.741 + *
   1.742 + *----------------------------------------------------------------------
   1.743 + */
   1.744 +
   1.745 +static void
   1.746 +FreeRegexpInternalRep(objPtr)
   1.747 +    Tcl_Obj *objPtr;		/* Regexp object with internal rep to free. */
   1.748 +{
   1.749 +    TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
   1.750 +
   1.751 +    /*
   1.752 +     * If this is the last reference to the regexp, free it.
   1.753 +     */
   1.754 +
   1.755 +    if (--(regexpRepPtr->refCount) <= 0) {
   1.756 +	FreeRegexp(regexpRepPtr);
   1.757 +    }
   1.758 +}
   1.759 +
   1.760 +/*
   1.761 + *----------------------------------------------------------------------
   1.762 + *
   1.763 + * DupRegexpInternalRep --
   1.764 + *
   1.765 + *	We copy the reference to the compiled regexp and bump its
   1.766 + *	reference count.
   1.767 + *
   1.768 + * Results:
   1.769 + *	None.
   1.770 + *
   1.771 + * Side effects:
   1.772 + *	Increments the reference count of the regexp.
   1.773 + *
   1.774 + *----------------------------------------------------------------------
   1.775 + */
   1.776 +
   1.777 +static void
   1.778 +DupRegexpInternalRep(srcPtr, copyPtr)
   1.779 +    Tcl_Obj *srcPtr;		/* Object with internal rep to copy. */
   1.780 +    Tcl_Obj *copyPtr;		/* Object with internal rep to set. */
   1.781 +{
   1.782 +    TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
   1.783 +    regexpPtr->refCount++;
   1.784 +    copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
   1.785 +    copyPtr->typePtr = &tclRegexpType;
   1.786 +}
   1.787 +
   1.788 +/*
   1.789 + *----------------------------------------------------------------------
   1.790 + *
   1.791 + * SetRegexpFromAny --
   1.792 + *
   1.793 + *	Attempt to generate a compiled regular expression for the Tcl object
   1.794 + *	"objPtr".
   1.795 + *
   1.796 + * Results:
   1.797 + *	The return value is TCL_OK or TCL_ERROR. If an error occurs during
   1.798 + *	conversion, an error message is left in the interpreter's result
   1.799 + *	unless "interp" is NULL.
   1.800 + *
   1.801 + * Side effects:
   1.802 + *	If no error occurs, a regular expression is stored as "objPtr"s
   1.803 + *	internal representation.
   1.804 + *
   1.805 + *----------------------------------------------------------------------
   1.806 + */
   1.807 +
   1.808 +static int
   1.809 +SetRegexpFromAny(interp, objPtr)
   1.810 +    Tcl_Interp *interp;		/* Used for error reporting if not NULL. */
   1.811 +    Tcl_Obj *objPtr;		/* The object to convert. */
   1.812 +{
   1.813 +    if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
   1.814 +	return TCL_ERROR;
   1.815 +    }
   1.816 +    return TCL_OK;
   1.817 +}
   1.818 +
   1.819 +/*
   1.820 + *---------------------------------------------------------------------------
   1.821 + *
   1.822 + * CompileRegexp --
   1.823 + *
   1.824 + *	Attempt to compile the given regexp pattern.  If the compiled
   1.825 + *	regular expression can be found in the per-thread cache, it
   1.826 + *	will be used instead of compiling a new copy.
   1.827 + *
   1.828 + * Results:
   1.829 + *	The return value is a pointer to a newly allocated TclRegexp
   1.830 + *	that represents the compiled pattern, or NULL if the pattern
   1.831 + *	could not be compiled.  If NULL is returned, an error message is
   1.832 + *	left in the interp's result.
   1.833 + *
   1.834 + * Side effects:
   1.835 + *	The thread-local regexp cache is updated and a new TclRegexp may
   1.836 + *	be allocated.
   1.837 + *
   1.838 + *----------------------------------------------------------------------
   1.839 + */
   1.840 +
   1.841 +static TclRegexp *
   1.842 +CompileRegexp(interp, string, length, flags)
   1.843 +    Tcl_Interp *interp;		/* Used for error reporting if not NULL. */
   1.844 +    CONST char *string;		/* The regexp to compile (UTF-8). */
   1.845 +    int length;			/* The length of the string in bytes. */
   1.846 +    int flags;			/* Compilation flags. */
   1.847 +{
   1.848 +    TclRegexp *regexpPtr;
   1.849 +    CONST Tcl_UniChar *uniString;
   1.850 +    int numChars;
   1.851 +    Tcl_DString stringBuf;
   1.852 +    int status, i;
   1.853 +    ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
   1.854 + 
   1.855 +    if (!tsdPtr->initialized) {
   1.856 +	tsdPtr->initialized = 1;
   1.857 +	Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
   1.858 +    }
   1.859 +
   1.860 +    /*
   1.861 +     * This routine maintains a second-level regular expression cache in
   1.862 +     * addition to the per-object regexp cache.  The per-thread cache is needed
   1.863 +     * to handle the case where for various reasons the object is lost between
   1.864 +     * invocations of the regexp command, but the literal pattern is the same.
   1.865 +     */
   1.866 +
   1.867 +    /*
   1.868 +     * Check the per-thread compiled regexp cache.  We can only reuse
   1.869 +     * a regexp if it has the same pattern and the same flags.
   1.870 +     */
   1.871 +
   1.872 +    for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
   1.873 +	if ((length == tsdPtr->patLengths[i])
   1.874 +		&& (tsdPtr->regexps[i]->flags == flags)
   1.875 +		&& (strcmp(string, tsdPtr->patterns[i]) == 0)) {
   1.876 +	    /*
   1.877 +	     * Move the matched pattern to the first slot in the
   1.878 +	     * cache and shift the other patterns down one position.
   1.879 +	     */
   1.880 +
   1.881 +	    if (i != 0) {
   1.882 +		int j;
   1.883 +		char *cachedString;
   1.884 +
   1.885 +		cachedString = tsdPtr->patterns[i];
   1.886 +		regexpPtr = tsdPtr->regexps[i];
   1.887 +		for (j = i-1; j >= 0; j--) {
   1.888 +		    tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
   1.889 +		    tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
   1.890 +		    tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
   1.891 +		}
   1.892 +		tsdPtr->patterns[0] = cachedString;
   1.893 +		tsdPtr->patLengths[0] = length;
   1.894 +		tsdPtr->regexps[0] = regexpPtr;
   1.895 +	    }
   1.896 +	    return tsdPtr->regexps[0];
   1.897 +	}
   1.898 +    }
   1.899 +
   1.900 +    /*
   1.901 +     * This is a new expression, so compile it and add it to the cache.
   1.902 +     */
   1.903 +    
   1.904 +    regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
   1.905 +    regexpPtr->objPtr = NULL;
   1.906 +    regexpPtr->string = NULL;
   1.907 +    regexpPtr->details.rm_extend.rm_so = -1;
   1.908 +    regexpPtr->details.rm_extend.rm_eo = -1;
   1.909 +
   1.910 +    /*
   1.911 +     * Get the up-to-date string representation and map to unicode.
   1.912 +     */
   1.913 +
   1.914 +    Tcl_DStringInit(&stringBuf);
   1.915 +    uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
   1.916 +    numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
   1.917 +
   1.918 +    /*
   1.919 +     * Compile the string and check for errors.
   1.920 +     */
   1.921 +
   1.922 +    regexpPtr->flags = flags;
   1.923 +    status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
   1.924 +    Tcl_DStringFree(&stringBuf);
   1.925 +
   1.926 +    if (status != REG_OKAY) {
   1.927 +	/*
   1.928 +	 * Clean up and report errors in the interpreter, if possible.
   1.929 +	 */
   1.930 +
   1.931 +	ckfree((char *)regexpPtr);
   1.932 +	if (interp) {
   1.933 +	    TclRegError(interp,
   1.934 +		    "couldn't compile regular expression pattern: ",
   1.935 +		    status);
   1.936 +	}
   1.937 +	return NULL;
   1.938 +    }
   1.939 +
   1.940 +    /*
   1.941 +     * Allocate enough space for all of the subexpressions, plus one
   1.942 +     * extra for the entire pattern.
   1.943 +     */
   1.944 +
   1.945 +    regexpPtr->matches = (regmatch_t *) ckalloc(
   1.946 +	    sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
   1.947 +
   1.948 +    /*
   1.949 +     * Initialize the refcount to one initially, since it is in the cache.
   1.950 +     */
   1.951 +
   1.952 +    regexpPtr->refCount = 1;
   1.953 +
   1.954 +    /*
   1.955 +     * Free the last regexp, if necessary, and make room at the head of the
   1.956 +     * list for the new regexp.
   1.957 +     */
   1.958 +
   1.959 +    if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
   1.960 +	TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
   1.961 +	if (--(oldRegexpPtr->refCount) <= 0) {
   1.962 +	    FreeRegexp(oldRegexpPtr);
   1.963 +	}
   1.964 +	ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
   1.965 +    }
   1.966 +    for (i = NUM_REGEXPS - 2; i >= 0; i--) {
   1.967 +	tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
   1.968 +	tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
   1.969 +	tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
   1.970 +    }
   1.971 +    tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
   1.972 +    strcpy(tsdPtr->patterns[0], string);
   1.973 +    tsdPtr->patLengths[0] = length;
   1.974 +    tsdPtr->regexps[0] = regexpPtr;
   1.975 +
   1.976 +    return regexpPtr;
   1.977 +}
   1.978 +
   1.979 +/*
   1.980 + *----------------------------------------------------------------------
   1.981 + *
   1.982 + * FreeRegexp --
   1.983 + *
   1.984 + *	Release the storage associated with a TclRegexp.
   1.985 + *
   1.986 + * Results:
   1.987 + *	None.
   1.988 + *
   1.989 + * Side effects:
   1.990 + *	None.
   1.991 + *
   1.992 + *----------------------------------------------------------------------
   1.993 + */
   1.994 +
   1.995 +static void
   1.996 +FreeRegexp(regexpPtr)
   1.997 +    TclRegexp *regexpPtr;	/* Compiled regular expression to free. */
   1.998 +{
   1.999 +    TclReFree(&regexpPtr->re);
  1.1000 +    if (regexpPtr->matches) {
  1.1001 +	ckfree((char *) regexpPtr->matches);
  1.1002 +    }
  1.1003 +    ckfree((char *) regexpPtr);
  1.1004 +}
  1.1005 +
  1.1006 +/*
  1.1007 + *----------------------------------------------------------------------
  1.1008 + *
  1.1009 + * FinalizeRegexp --
  1.1010 + *
  1.1011 + *	Release the storage associated with the per-thread regexp
  1.1012 + *	cache.
  1.1013 + *
  1.1014 + * Results:
  1.1015 + *	None.
  1.1016 + *
  1.1017 + * Side effects:
  1.1018 + *	None.
  1.1019 + *
  1.1020 + *----------------------------------------------------------------------
  1.1021 + */
  1.1022 +
  1.1023 +static void
  1.1024 +FinalizeRegexp(clientData)
  1.1025 +    ClientData clientData;	/* Not used. */
  1.1026 +{
  1.1027 +    int i;
  1.1028 +    TclRegexp *regexpPtr;
  1.1029 +    ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
  1.1030 +
  1.1031 +    for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
  1.1032 +	regexpPtr = tsdPtr->regexps[i];
  1.1033 +	if (--(regexpPtr->refCount) <= 0) {
  1.1034 +	    FreeRegexp(regexpPtr);
  1.1035 +	}
  1.1036 +	ckfree(tsdPtr->patterns[i]);
  1.1037 +	tsdPtr->patterns[i] = NULL;
  1.1038 +    }
  1.1039 +    /*
  1.1040 +     * We may find ourselves reinitialized if another finalization routine
  1.1041 +     * invokes regexps.
  1.1042 +     */
  1.1043 +    tsdPtr->initialized = 0;
  1.1044 +}