os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclRegexp.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 /* 
     2  * tclRegexp.c --
     3  *
     4  *	This file contains the public interfaces to the Tcl regular
     5  *	expression mechanism.
     6  *
     7  * Copyright (c) 1998 by Sun Microsystems, Inc.
     8  * Copyright (c) 1998-1999 by Scriptics Corporation.
     9  * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved.  
    10  *
    11  * See the file "license.terms" for information on usage and redistribution
    12  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
    13  *
    14  * RCS: @(#) $Id: tclRegexp.c,v 1.14.4.2 2006/04/07 01:14:28 hobbs Exp $
    15  */
    16 
    17 #include "tclInt.h"
    18 #include "tclPort.h"
    19 #include "tclRegexp.h"
    20 #if defined(__SYMBIAN32__) && defined(__WINSCW__)
    21 #include "tclSymbianGlobals.h"
    22 #define dataKey getdataKey(6)
    23 #endif 
    24 
    25 /*
    26  *----------------------------------------------------------------------
    27  * The routines in this file use Henry Spencer's regular expression
    28  * package contained in the following additional source files:
    29  *
    30  *	regc_color.c	regc_cvec.c	regc_lex.c
    31  *	regc_nfa.c	regcomp.c	regcustom.h
    32  *	rege_dfa.c	regerror.c	regerrs.h
    33  *	regex.h		regexec.c	regfree.c
    34  *	regfronts.c	regguts.h
    35  *
    36  * Copyright (c) 1998 Henry Spencer.  All rights reserved.
    37  * 
    38  * Development of this software was funded, in part, by Cray Research Inc.,
    39  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
    40  * Corporation, none of whom are responsible for the results.  The author
    41  * thanks all of them. 
    42  * 
    43  * Redistribution and use in source and binary forms -- with or without
    44  * modification -- are permitted for any purpose, provided that
    45  * redistributions in source form retain this entire copyright notice and
    46  * indicate the origin and nature of any modifications.
    47  * 
    48  * I'd appreciate being given credit for this package in the documentation
    49  * of software which uses it, but that is not a requirement.
    50  * 
    51  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
    52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    53  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
    54  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    55  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    56  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    57  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    58  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
    59  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
    60  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    61  *
    62  * *** NOTE: this code has been altered slightly for use in Tcl: ***
    63  * *** 1. Names have been changed, e.g. from re_comp to		 ***
    64  * ***    TclRegComp, to avoid clashes with other 		 ***
    65  * ***    regexp implementations used by applications. 		 ***
    66  */
    67 
    68 /*
    69  * Thread local storage used to maintain a per-thread cache of compiled
    70  * regular expressions.
    71  */
    72 
    73 #define NUM_REGEXPS 30
    74 
    75 typedef struct ThreadSpecificData {
    76     int initialized;		/* Set to 1 when the module is initialized. */
    77     char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
    78 				 * regular expression patterns.	 NULL
    79 				 * means that this slot isn't used.
    80 				 * Malloc-ed. */
    81     int patLengths[NUM_REGEXPS];/* Number of non-null characters in
    82 				 * corresponding entry in patterns.
    83 				 * -1 means entry isn't used. */
    84     struct TclRegexp *regexps[NUM_REGEXPS];
    85 				/* Compiled forms of above strings.  Also
    86 				 * malloc-ed, or NULL if not in use yet. */
    87 } ThreadSpecificData;
    88 
    89 #if !defined(__SYMBIAN32__) || !defined(__WINSCW__)
    90 static Tcl_ThreadDataKey dataKey;
    91 #endif
    92 
    93 /*
    94  * Declarations for functions used only in this file.
    95  */
    96 
    97 static TclRegexp *	CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
    98 			    CONST char *pattern, int length, int flags));
    99 static void		DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
   100 			    Tcl_Obj *copyPtr));
   101 static void		FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
   102 static void		FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
   103 static void		FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
   104 static int		RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
   105 			    Tcl_RegExp re, CONST Tcl_UniChar *uniString,
   106 			    int numChars, int nmatches, int flags));
   107 static int		SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
   108 			    Tcl_Obj *objPtr));
   109 
   110 /*
   111  * The regular expression Tcl object type.  This serves as a cache
   112  * of the compiled form of the regular expression.
   113  */
   114 
   115 static Tcl_ObjType tclRegexpType = {
   116     "regexp",				/* name */
   117     FreeRegexpInternalRep,		/* freeIntRepProc */
   118     DupRegexpInternalRep,		/* dupIntRepProc */
   119     NULL,				/* updateStringProc */
   120     SetRegexpFromAny			/* setFromAnyProc */
   121 };
   122 
   123 
   124 /*
   125  *----------------------------------------------------------------------
   126  *
   127  * Tcl_RegExpCompile --
   128  *
   129  *	Compile a regular expression into a form suitable for fast
   130  *	matching.  This procedure is DEPRECATED in favor of the
   131  *	object version of the command.
   132  *
   133  * Results:
   134  *	The return value is a pointer to the compiled form of string,
   135  *	suitable for passing to Tcl_RegExpExec.  This compiled form
   136  *	is only valid up until the next call to this procedure, so
   137  *	don't keep these around for a long time!  If an error occurred
   138  *	while compiling the pattern, then NULL is returned and an error
   139  *	message is left in the interp's result.
   140  *
   141  * Side effects:
   142  *	Updates the cache of compiled regexps.
   143  *
   144  *----------------------------------------------------------------------
   145  */
   146 
   147 EXPORT_C Tcl_RegExp
   148 Tcl_RegExpCompile(interp, string)
   149     Tcl_Interp *interp;		/* For use in error reporting and
   150 				 * to access the interp regexp cache. */
   151     CONST char *string;		/* String for which to produce
   152 				 * compiled regular expression. */
   153 {
   154     return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
   155 	    REG_ADVANCED);
   156 }
   157 
   158 /*
   159  *----------------------------------------------------------------------
   160  *
   161  * Tcl_RegExpExec --
   162  *
   163  *	Execute the regular expression matcher using a compiled form
   164  *	of a regular expression and save information about any match
   165  *	that is found.
   166  *
   167  * Results:
   168  *	If an error occurs during the matching operation then -1
   169  *	is returned and the interp's result contains an error message.
   170  *	Otherwise the return value is 1 if a matching range is
   171  *	found and 0 if there is no matching range.
   172  *
   173  * Side effects:
   174  *	None.
   175  *
   176  *----------------------------------------------------------------------
   177  */
   178 
   179 EXPORT_C int
   180 Tcl_RegExpExec(interp, re, string, start)
   181     Tcl_Interp *interp;		/* Interpreter to use for error reporting. */
   182     Tcl_RegExp re;		/* Compiled regular expression;  must have
   183 				 * been returned by previous call to
   184 				 * Tcl_GetRegExpFromObj. */
   185     CONST char *string;		/* String against which to match re. */
   186     CONST char *start;		/* If string is part of a larger string,
   187 				 * this identifies beginning of larger
   188 				 * string, so that "^" won't match. */
   189 {
   190     int flags, result, numChars;
   191     TclRegexp *regexp = (TclRegexp *)re;
   192     Tcl_DString ds;
   193     CONST Tcl_UniChar *ustr;
   194 
   195     /*
   196      * If the starting point is offset from the beginning of the buffer,
   197      * then we need to tell the regexp engine not to match "^".
   198      */
   199 
   200     if (string > start) {
   201 	flags = REG_NOTBOL;
   202     } else {
   203 	flags = 0;
   204     }
   205 
   206     /*
   207      * Remember the string for use by Tcl_RegExpRange().
   208      */
   209 
   210     regexp->string = string;
   211     regexp->objPtr = NULL;
   212 
   213     /*
   214      * Convert the string to Unicode and perform the match.
   215      */
   216 
   217     Tcl_DStringInit(&ds);
   218     ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
   219     numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
   220     result = RegExpExecUniChar(interp, re, ustr, numChars,
   221 	    -1 /* nmatches */, flags);
   222     Tcl_DStringFree(&ds);
   223 
   224     return result;
   225 }
   226 
   227 /*
   228  *---------------------------------------------------------------------------
   229  *
   230  * Tcl_RegExpRange --
   231  *
   232  *	Returns pointers describing the range of a regular expression match,
   233  *	or one of the subranges within the match.
   234  *
   235  * Results:
   236  *	The variables at *startPtr and *endPtr are modified to hold the
   237  *	addresses of the endpoints of the range given by index.  If the
   238  *	specified range doesn't exist then NULLs are returned.
   239  *
   240  * Side effects:
   241  *	None.
   242  *
   243  *---------------------------------------------------------------------------
   244  */
   245 
   246 EXPORT_C void
   247 Tcl_RegExpRange(re, index, startPtr, endPtr)
   248     Tcl_RegExp re;		/* Compiled regular expression that has
   249 				 * been passed to Tcl_RegExpExec. */
   250     int index;			/* 0 means give the range of the entire
   251 				 * match, > 0 means give the range of
   252 				 * a matching subrange. */
   253     CONST char **startPtr;	/* Store address of first character in
   254 				 * (sub-) range here. */
   255     CONST char **endPtr;	/* Store address of character just after last
   256 				 * in (sub-) range here. */
   257 {
   258     TclRegexp *regexpPtr = (TclRegexp *) re;
   259     CONST char *string;
   260 
   261     if ((size_t) index > regexpPtr->re.re_nsub) {
   262 	*startPtr = *endPtr = NULL;
   263     } else if (regexpPtr->matches[index].rm_so < 0) {
   264 	*startPtr = *endPtr = NULL;
   265     } else {
   266 	if (regexpPtr->objPtr) {
   267 	    string = Tcl_GetString(regexpPtr->objPtr);
   268 	} else {
   269 	    string = regexpPtr->string;
   270 	}
   271 	*startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
   272 	*endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
   273     }
   274 }
   275 
   276 /*
   277  *---------------------------------------------------------------------------
   278  *
   279  * RegExpExecUniChar --
   280  *
   281  *	Execute the regular expression matcher using a compiled form of a
   282  *	regular expression and save information about any match that is
   283  *	found.
   284  *
   285  * Results:
   286  *	If an error occurs during the matching operation then -1 is
   287  *	returned and an error message is left in interp's result.
   288  *	Otherwise the return value is 1 if a matching range was found or
   289  *	0 if there was no matching range.
   290  *
   291  * Side effects:
   292  *	None.
   293  *
   294  *----------------------------------------------------------------------
   295  */
   296 
   297 static int
   298 RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
   299     Tcl_Interp *interp;		/* Interpreter to use for error reporting. */
   300     Tcl_RegExp re;		/* Compiled regular expression; returned by
   301 				 * a previous call to Tcl_GetRegExpFromObj */
   302     CONST Tcl_UniChar *wString;	/* String against which to match re. */
   303     int numChars;		/* Length of Tcl_UniChar string (must
   304 				 * be >= 0). */
   305     int nmatches;		/* How many subexpression matches (counting
   306 				 * the whole match as subexpression 0) are
   307 				 * of interest.  -1 means "don't know". */
   308     int flags;			/* Regular expression flags. */
   309 {
   310     int status;
   311     TclRegexp *regexpPtr = (TclRegexp *) re;
   312     size_t last = regexpPtr->re.re_nsub + 1;
   313     size_t nm = last;
   314 
   315     if (nmatches >= 0 && (size_t) nmatches < nm) {
   316 	nm = (size_t) nmatches;
   317     }
   318 
   319     status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
   320 	    &regexpPtr->details, nm, regexpPtr->matches, flags);
   321 
   322     /*
   323      * Check for errors.
   324      */
   325 
   326     if (status != REG_OKAY) {
   327 	if (status == REG_NOMATCH) {
   328 	    return 0;
   329 	}
   330 	if (interp != NULL) {
   331 	    TclRegError(interp, "error while matching regular expression: ",
   332 		    status);
   333 	}
   334 	return -1;
   335     }
   336     return 1;
   337 }
   338 
   339 /*
   340  *---------------------------------------------------------------------------
   341  *
   342  * TclRegExpRangeUniChar --
   343  *
   344  *	Returns pointers describing the range of a regular expression match,
   345  *	or one of the subranges within the match, or the hypothetical range
   346  *	represented by the rm_extend field of the rm_detail_t.
   347  *
   348  * Results:
   349  *	The variables at *startPtr and *endPtr are modified to hold the
   350  *	offsets of the endpoints of the range given by index.  If the
   351  *	specified range doesn't exist then -1s are supplied.
   352  *
   353  * Side effects:
   354  *	None.
   355  *
   356  *---------------------------------------------------------------------------
   357  */
   358 
   359 void
   360 TclRegExpRangeUniChar(re, index, startPtr, endPtr)
   361     Tcl_RegExp re;		/* Compiled regular expression that has
   362 				 * been passed to Tcl_RegExpExec. */
   363     int index;			/* 0 means give the range of the entire
   364 				 * match, > 0 means give the range of
   365 				 * a matching subrange, -1 means the
   366 				 * range of the rm_extend field. */
   367     int *startPtr;		/* Store address of first character in
   368 				 * (sub-) range here. */
   369     int *endPtr;		/* Store address of character just after last
   370 				 * in (sub-) range here. */
   371 {
   372     TclRegexp *regexpPtr = (TclRegexp *) re;
   373 
   374     if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
   375 	*startPtr = regexpPtr->details.rm_extend.rm_so;
   376 	*endPtr = regexpPtr->details.rm_extend.rm_eo;
   377     } else if ((size_t) index > regexpPtr->re.re_nsub) {
   378 	*startPtr = -1;
   379 	*endPtr = -1;
   380     } else {
   381 	*startPtr = regexpPtr->matches[index].rm_so;
   382 	*endPtr = regexpPtr->matches[index].rm_eo;
   383     }
   384 }
   385 
   386 /*
   387  *----------------------------------------------------------------------
   388  *
   389  * Tcl_RegExpMatch --
   390  *
   391  *	See if a string matches a regular expression.
   392  *
   393  * Results:
   394  *	If an error occurs during the matching operation then -1
   395  *	is returned and the interp's result contains an error message.
   396  *	Otherwise the return value is 1 if "string" matches "pattern"
   397  *	and 0 otherwise.
   398  *
   399  * Side effects:
   400  *	None.
   401  *
   402  *----------------------------------------------------------------------
   403  */
   404 
   405 EXPORT_C int
   406 Tcl_RegExpMatch(interp, string, pattern)
   407     Tcl_Interp *interp;		/* Used for error reporting. May be NULL. */
   408     CONST char *string;		/* String. */
   409     CONST char *pattern;	/* Regular expression to match against
   410 				 * string. */
   411 {
   412     Tcl_RegExp re;
   413 
   414     re = Tcl_RegExpCompile(interp, pattern);
   415     if (re == NULL) {
   416 	return -1;
   417     }
   418     return Tcl_RegExpExec(interp, re, string, string);
   419 }
   420 
   421 /*
   422  *----------------------------------------------------------------------
   423  *
   424  * Tcl_RegExpExecObj --
   425  *
   426  *	Execute a precompiled regexp against the given object.
   427  *
   428  * Results:
   429  *	If an error occurs during the matching operation then -1
   430  *	is returned and the interp's result contains an error message.
   431  *	Otherwise the return value is 1 if "string" matches "pattern"
   432  *	and 0 otherwise.
   433  *
   434  * Side effects:
   435  *	Converts the object to a Unicode object.
   436  *
   437  *----------------------------------------------------------------------
   438  */
   439 
   440 EXPORT_C int
   441 Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
   442     Tcl_Interp *interp;		/* Interpreter to use for error reporting. */
   443     Tcl_RegExp re;		/* Compiled regular expression;  must have
   444 				 * been returned by previous call to
   445 				 * Tcl_GetRegExpFromObj. */
   446     Tcl_Obj *objPtr;		/* String against which to match re. */
   447     int offset;			/* Character index that marks where matching
   448 				 * should begin. */
   449     int nmatches;		/* How many subexpression matches (counting
   450 				 * the whole match as subexpression 0) are
   451 				 * of interest.  -1 means all of them. */
   452     int flags;			/* Regular expression execution flags. */
   453 {
   454     TclRegexp *regexpPtr = (TclRegexp *) re;
   455     Tcl_UniChar *udata;
   456     int length;
   457 
   458     /*
   459      * Save the target object so we can extract strings from it later.
   460      */
   461 
   462     regexpPtr->string = NULL;
   463     regexpPtr->objPtr = objPtr;
   464 
   465     udata = Tcl_GetUnicodeFromObj(objPtr, &length);
   466 
   467     if (offset > length) {
   468 	offset = length;
   469     }
   470     udata += offset;
   471     length -= offset;
   472     
   473     return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
   474 }
   475 
   476 /*
   477  *----------------------------------------------------------------------
   478  *
   479  * Tcl_RegExpMatchObj --
   480  *
   481  *	See if an object matches a regular expression.
   482  *
   483  * Results:
   484  *	If an error occurs during the matching operation then -1
   485  *	is returned and the interp's result contains an error message.
   486  *	Otherwise the return value is 1 if "string" matches "pattern"
   487  *	and 0 otherwise.
   488  *
   489  * Side effects:
   490  *	Changes the internal rep of the pattern and string objects.
   491  *
   492  *----------------------------------------------------------------------
   493  */
   494 
   495 EXPORT_C int
   496 Tcl_RegExpMatchObj(interp, stringObj, patternObj)
   497     Tcl_Interp *interp;		/* Used for error reporting. May be NULL. */
   498     Tcl_Obj *stringObj;		/* Object containing the String to search. */
   499     Tcl_Obj *patternObj;	/* Regular expression to match against
   500 				 * string. */
   501 {
   502     Tcl_RegExp re;
   503 
   504     re = Tcl_GetRegExpFromObj(interp, patternObj,
   505 	    TCL_REG_ADVANCED | TCL_REG_NOSUB);
   506     if (re == NULL) {
   507 	return -1;
   508     }
   509     return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
   510 	    0 /* nmatches */, 0 /* flags */);
   511 }
   512 
   513 /*
   514  *----------------------------------------------------------------------
   515  *
   516  * Tcl_RegExpGetInfo --
   517  *
   518  *	Retrieve information about the current match.
   519  *
   520  * Results:
   521  *	None.
   522  *
   523  * Side effects:
   524  *	None.
   525  *
   526  *----------------------------------------------------------------------
   527  */
   528 
   529 EXPORT_C void
   530 Tcl_RegExpGetInfo(regexp, infoPtr)
   531     Tcl_RegExp regexp;		/* Pattern from which to get subexpressions. */
   532     Tcl_RegExpInfo *infoPtr;	/* Match information is stored here.  */
   533 {
   534     TclRegexp *regexpPtr = (TclRegexp *) regexp;
   535 
   536     infoPtr->nsubs = regexpPtr->re.re_nsub;
   537     infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
   538     infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
   539 }
   540 
   541 /*
   542  *----------------------------------------------------------------------
   543  *
   544  * Tcl_GetRegExpFromObj --
   545  *
   546  *	Compile a regular expression into a form suitable for fast
   547  *	matching.  This procedure caches the result in a Tcl_Obj.
   548  *
   549  * Results:
   550  *	The return value is a pointer to the compiled form of string,
   551  *	suitable for passing to Tcl_RegExpExec.  If an error occurred
   552  *	while compiling the pattern, then NULL is returned and an error
   553  *	message is left in the interp's result.
   554  *
   555  * Side effects:
   556  *	Updates the native rep of the Tcl_Obj.
   557  *
   558  *----------------------------------------------------------------------
   559  */
   560 
   561 EXPORT_C Tcl_RegExp
   562 Tcl_GetRegExpFromObj(interp, objPtr, flags)
   563     Tcl_Interp *interp;		/* For use in error reporting, and to access
   564 				 * the interp regexp cache. */
   565     Tcl_Obj *objPtr;		/* Object whose string rep contains regular
   566 				 * expression pattern.  Internal rep will be
   567 				 * changed to compiled form of this regular
   568 				 * expression. */
   569     int flags;			/* Regular expression compilation flags. */
   570 {
   571     int length;
   572     Tcl_ObjType *typePtr;
   573     TclRegexp *regexpPtr;
   574     char *pattern;
   575 
   576     typePtr = objPtr->typePtr;
   577     regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
   578 
   579     if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
   580 	pattern = Tcl_GetStringFromObj(objPtr, &length);
   581 
   582 	regexpPtr = CompileRegexp(interp, pattern, length, flags);
   583 	if (regexpPtr == NULL) {
   584 	    return NULL;
   585 	}
   586 
   587 	/*
   588 	 * Add a reference to the regexp so it will persist even if it is
   589 	 * pushed out of the current thread's regexp cache.  This reference
   590 	 * will be removed when the object's internal rep is freed.
   591 	 */
   592 
   593 	regexpPtr->refCount++;
   594 
   595 	/*
   596 	 * Free the old representation and set our type.
   597 	 */
   598 
   599 	if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
   600 	    (*typePtr->freeIntRepProc)(objPtr);
   601 	}
   602 	objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
   603 	objPtr->typePtr = &tclRegexpType;
   604     }
   605     return (Tcl_RegExp) regexpPtr;
   606 }
   607 
   608 /*
   609  *----------------------------------------------------------------------
   610  *
   611  * TclRegAbout --
   612  *
   613  *	Return information about a compiled regular expression.
   614  *
   615  * Results:
   616  *	The return value is -1 for failure, 0 for success, although at
   617  *	the moment there's nothing that could fail.  On success, a list
   618  *	is left in the interp's result:  first element is the subexpression
   619  *	count, second is a list of re_info bit names.
   620  *
   621  * Side effects:
   622  *	None.
   623  *
   624  *----------------------------------------------------------------------
   625  */
   626 
   627 int
   628 TclRegAbout(interp, re)
   629     Tcl_Interp *interp;		/* For use in variable assignment. */
   630     Tcl_RegExp re;		/* The compiled regular expression. */
   631 {
   632     TclRegexp *regexpPtr = (TclRegexp *)re;
   633     char buf[TCL_INTEGER_SPACE];
   634     static struct infoname {
   635 	int bit;
   636 	char *text;
   637     } infonames[] = {
   638 	{REG_UBACKREF,		"REG_UBACKREF"},
   639 	{REG_ULOOKAHEAD,	"REG_ULOOKAHEAD"},
   640 	{REG_UBOUNDS,		"REG_UBOUNDS"},
   641 	{REG_UBRACES,		"REG_UBRACES"},
   642 	{REG_UBSALNUM,		"REG_UBSALNUM"},
   643 	{REG_UPBOTCH,		"REG_UPBOTCH"},
   644 	{REG_UBBS,		"REG_UBBS"},
   645 	{REG_UNONPOSIX,		"REG_UNONPOSIX"},
   646 	{REG_UUNSPEC,		"REG_UUNSPEC"},
   647 	{REG_UUNPORT,		"REG_UUNPORT"},
   648 	{REG_ULOCALE,		"REG_ULOCALE"},
   649 	{REG_UEMPTYMATCH,	"REG_UEMPTYMATCH"},
   650 	{REG_UIMPOSSIBLE,	"REG_UIMPOSSIBLE"},
   651 	{REG_USHORTEST,		"REG_USHORTEST"},
   652 	{0,			""}
   653     };
   654     struct infoname *inf;
   655     int n;
   656 
   657     Tcl_ResetResult(interp);
   658 
   659     sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
   660     Tcl_AppendElement(interp, buf);
   661 
   662     /*
   663      * Must count bits before generating list, because we must know
   664      * whether {} are needed before we start appending names.
   665      */
   666     n = 0;
   667     for (inf = infonames; inf->bit != 0; inf++) {
   668 	if (regexpPtr->re.re_info&inf->bit) {
   669 	    n++;
   670 	}
   671     }
   672     if (n != 1) {
   673 	Tcl_AppendResult(interp, " {", NULL);
   674     }
   675     for (inf = infonames; inf->bit != 0; inf++) {
   676 	if (regexpPtr->re.re_info&inf->bit) {
   677 	    Tcl_AppendElement(interp, inf->text);
   678 	}
   679     }
   680     if (n != 1) {
   681 	Tcl_AppendResult(interp, "}", NULL);
   682     }
   683 
   684     return 0;
   685 }
   686 
   687 /*
   688  *----------------------------------------------------------------------
   689  *
   690  * TclRegError --
   691  *
   692  *	Generate an error message based on the regexp status code.
   693  *
   694  * Results:
   695  *	Places an error in the interpreter.
   696  *
   697  * Side effects:
   698  *	Sets errorCode as well.
   699  *
   700  *----------------------------------------------------------------------
   701  */
   702 
   703 void
   704 TclRegError(interp, msg, status)
   705     Tcl_Interp *interp;		/* Interpreter for error reporting. */
   706     CONST char *msg;		/* Message to prepend to error. */
   707     int status;			/* Status code to report. */
   708 {
   709     char buf[100];		/* ample in practice */
   710     char cbuf[100];		/* lots in practice */
   711     size_t n;
   712     char *p;
   713 
   714     Tcl_ResetResult(interp);
   715     n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
   716     p = (n > sizeof(buf)) ? "..." : "";
   717     Tcl_AppendResult(interp, msg, buf, p, NULL);
   718 
   719     sprintf(cbuf, "%d", status);
   720     (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
   721     Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
   722 }
   723 
   724 
   725 /*
   726  *----------------------------------------------------------------------
   727  *
   728  * FreeRegexpInternalRep --
   729  *
   730  *	Deallocate the storage associated with a regexp object's internal
   731  *	representation.
   732  *
   733  * Results:
   734  *	None.
   735  *
   736  * Side effects:
   737  *	Frees the compiled regular expression.
   738  *
   739  *----------------------------------------------------------------------
   740  */
   741 
   742 static void
   743 FreeRegexpInternalRep(objPtr)
   744     Tcl_Obj *objPtr;		/* Regexp object with internal rep to free. */
   745 {
   746     TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
   747 
   748     /*
   749      * If this is the last reference to the regexp, free it.
   750      */
   751 
   752     if (--(regexpRepPtr->refCount) <= 0) {
   753 	FreeRegexp(regexpRepPtr);
   754     }
   755 }
   756 
   757 /*
   758  *----------------------------------------------------------------------
   759  *
   760  * DupRegexpInternalRep --
   761  *
   762  *	We copy the reference to the compiled regexp and bump its
   763  *	reference count.
   764  *
   765  * Results:
   766  *	None.
   767  *
   768  * Side effects:
   769  *	Increments the reference count of the regexp.
   770  *
   771  *----------------------------------------------------------------------
   772  */
   773 
   774 static void
   775 DupRegexpInternalRep(srcPtr, copyPtr)
   776     Tcl_Obj *srcPtr;		/* Object with internal rep to copy. */
   777     Tcl_Obj *copyPtr;		/* Object with internal rep to set. */
   778 {
   779     TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
   780     regexpPtr->refCount++;
   781     copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
   782     copyPtr->typePtr = &tclRegexpType;
   783 }
   784 
   785 /*
   786  *----------------------------------------------------------------------
   787  *
   788  * SetRegexpFromAny --
   789  *
   790  *	Attempt to generate a compiled regular expression for the Tcl object
   791  *	"objPtr".
   792  *
   793  * Results:
   794  *	The return value is TCL_OK or TCL_ERROR. If an error occurs during
   795  *	conversion, an error message is left in the interpreter's result
   796  *	unless "interp" is NULL.
   797  *
   798  * Side effects:
   799  *	If no error occurs, a regular expression is stored as "objPtr"s
   800  *	internal representation.
   801  *
   802  *----------------------------------------------------------------------
   803  */
   804 
   805 static int
   806 SetRegexpFromAny(interp, objPtr)
   807     Tcl_Interp *interp;		/* Used for error reporting if not NULL. */
   808     Tcl_Obj *objPtr;		/* The object to convert. */
   809 {
   810     if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
   811 	return TCL_ERROR;
   812     }
   813     return TCL_OK;
   814 }
   815 
   816 /*
   817  *---------------------------------------------------------------------------
   818  *
   819  * CompileRegexp --
   820  *
   821  *	Attempt to compile the given regexp pattern.  If the compiled
   822  *	regular expression can be found in the per-thread cache, it
   823  *	will be used instead of compiling a new copy.
   824  *
   825  * Results:
   826  *	The return value is a pointer to a newly allocated TclRegexp
   827  *	that represents the compiled pattern, or NULL if the pattern
   828  *	could not be compiled.  If NULL is returned, an error message is
   829  *	left in the interp's result.
   830  *
   831  * Side effects:
   832  *	The thread-local regexp cache is updated and a new TclRegexp may
   833  *	be allocated.
   834  *
   835  *----------------------------------------------------------------------
   836  */
   837 
   838 static TclRegexp *
   839 CompileRegexp(interp, string, length, flags)
   840     Tcl_Interp *interp;		/* Used for error reporting if not NULL. */
   841     CONST char *string;		/* The regexp to compile (UTF-8). */
   842     int length;			/* The length of the string in bytes. */
   843     int flags;			/* Compilation flags. */
   844 {
   845     TclRegexp *regexpPtr;
   846     CONST Tcl_UniChar *uniString;
   847     int numChars;
   848     Tcl_DString stringBuf;
   849     int status, i;
   850     ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
   851  
   852     if (!tsdPtr->initialized) {
   853 	tsdPtr->initialized = 1;
   854 	Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
   855     }
   856 
   857     /*
   858      * This routine maintains a second-level regular expression cache in
   859      * addition to the per-object regexp cache.  The per-thread cache is needed
   860      * to handle the case where for various reasons the object is lost between
   861      * invocations of the regexp command, but the literal pattern is the same.
   862      */
   863 
   864     /*
   865      * Check the per-thread compiled regexp cache.  We can only reuse
   866      * a regexp if it has the same pattern and the same flags.
   867      */
   868 
   869     for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
   870 	if ((length == tsdPtr->patLengths[i])
   871 		&& (tsdPtr->regexps[i]->flags == flags)
   872 		&& (strcmp(string, tsdPtr->patterns[i]) == 0)) {
   873 	    /*
   874 	     * Move the matched pattern to the first slot in the
   875 	     * cache and shift the other patterns down one position.
   876 	     */
   877 
   878 	    if (i != 0) {
   879 		int j;
   880 		char *cachedString;
   881 
   882 		cachedString = tsdPtr->patterns[i];
   883 		regexpPtr = tsdPtr->regexps[i];
   884 		for (j = i-1; j >= 0; j--) {
   885 		    tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
   886 		    tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
   887 		    tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
   888 		}
   889 		tsdPtr->patterns[0] = cachedString;
   890 		tsdPtr->patLengths[0] = length;
   891 		tsdPtr->regexps[0] = regexpPtr;
   892 	    }
   893 	    return tsdPtr->regexps[0];
   894 	}
   895     }
   896 
   897     /*
   898      * This is a new expression, so compile it and add it to the cache.
   899      */
   900     
   901     regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
   902     regexpPtr->objPtr = NULL;
   903     regexpPtr->string = NULL;
   904     regexpPtr->details.rm_extend.rm_so = -1;
   905     regexpPtr->details.rm_extend.rm_eo = -1;
   906 
   907     /*
   908      * Get the up-to-date string representation and map to unicode.
   909      */
   910 
   911     Tcl_DStringInit(&stringBuf);
   912     uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
   913     numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
   914 
   915     /*
   916      * Compile the string and check for errors.
   917      */
   918 
   919     regexpPtr->flags = flags;
   920     status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
   921     Tcl_DStringFree(&stringBuf);
   922 
   923     if (status != REG_OKAY) {
   924 	/*
   925 	 * Clean up and report errors in the interpreter, if possible.
   926 	 */
   927 
   928 	ckfree((char *)regexpPtr);
   929 	if (interp) {
   930 	    TclRegError(interp,
   931 		    "couldn't compile regular expression pattern: ",
   932 		    status);
   933 	}
   934 	return NULL;
   935     }
   936 
   937     /*
   938      * Allocate enough space for all of the subexpressions, plus one
   939      * extra for the entire pattern.
   940      */
   941 
   942     regexpPtr->matches = (regmatch_t *) ckalloc(
   943 	    sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
   944 
   945     /*
   946      * Initialize the refcount to one initially, since it is in the cache.
   947      */
   948 
   949     regexpPtr->refCount = 1;
   950 
   951     /*
   952      * Free the last regexp, if necessary, and make room at the head of the
   953      * list for the new regexp.
   954      */
   955 
   956     if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
   957 	TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
   958 	if (--(oldRegexpPtr->refCount) <= 0) {
   959 	    FreeRegexp(oldRegexpPtr);
   960 	}
   961 	ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
   962     }
   963     for (i = NUM_REGEXPS - 2; i >= 0; i--) {
   964 	tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
   965 	tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
   966 	tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
   967     }
   968     tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
   969     strcpy(tsdPtr->patterns[0], string);
   970     tsdPtr->patLengths[0] = length;
   971     tsdPtr->regexps[0] = regexpPtr;
   972 
   973     return regexpPtr;
   974 }
   975 
   976 /*
   977  *----------------------------------------------------------------------
   978  *
   979  * FreeRegexp --
   980  *
   981  *	Release the storage associated with a TclRegexp.
   982  *
   983  * Results:
   984  *	None.
   985  *
   986  * Side effects:
   987  *	None.
   988  *
   989  *----------------------------------------------------------------------
   990  */
   991 
   992 static void
   993 FreeRegexp(regexpPtr)
   994     TclRegexp *regexpPtr;	/* Compiled regular expression to free. */
   995 {
   996     TclReFree(&regexpPtr->re);
   997     if (regexpPtr->matches) {
   998 	ckfree((char *) regexpPtr->matches);
   999     }
  1000     ckfree((char *) regexpPtr);
  1001 }
  1002 
  1003 /*
  1004  *----------------------------------------------------------------------
  1005  *
  1006  * FinalizeRegexp --
  1007  *
  1008  *	Release the storage associated with the per-thread regexp
  1009  *	cache.
  1010  *
  1011  * Results:
  1012  *	None.
  1013  *
  1014  * Side effects:
  1015  *	None.
  1016  *
  1017  *----------------------------------------------------------------------
  1018  */
  1019 
  1020 static void
  1021 FinalizeRegexp(clientData)
  1022     ClientData clientData;	/* Not used. */
  1023 {
  1024     int i;
  1025     TclRegexp *regexpPtr;
  1026     ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
  1027 
  1028     for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
  1029 	regexpPtr = tsdPtr->regexps[i];
  1030 	if (--(regexpPtr->refCount) <= 0) {
  1031 	    FreeRegexp(regexpPtr);
  1032 	}
  1033 	ckfree(tsdPtr->patterns[i]);
  1034 	tsdPtr->patterns[i] = NULL;
  1035     }
  1036     /*
  1037      * We may find ourselves reinitialized if another finalization routine
  1038      * invokes regexps.
  1039      */
  1040     tsdPtr->initialized = 0;
  1041 }