os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclRegexp.c
First public contribution.
4 * This file contains the public interfaces to the Tcl regular
5 * expression mechanism.
7 * Copyright (c) 1998 by Sun Microsystems, Inc.
8 * Copyright (c) 1998-1999 by Scriptics Corporation.
9 * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved.
11 * See the file "license.terms" for information on usage and redistribution
12 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
14 * RCS: @(#) $Id: tclRegexp.c,v 1.14.4.2 2006/04/07 01:14:28 hobbs Exp $
19 #include "tclRegexp.h"
20 #if defined(__SYMBIAN32__) && defined(__WINSCW__)
21 #include "tclSymbianGlobals.h"
22 #define dataKey getdataKey(6)
26 *----------------------------------------------------------------------
27 * The routines in this file use Henry Spencer's regular expression
28 * package contained in the following additional source files:
30 * regc_color.c regc_cvec.c regc_lex.c
31 * regc_nfa.c regcomp.c regcustom.h
32 * rege_dfa.c regerror.c regerrs.h
33 * regex.h regexec.c regfree.c
34 * regfronts.c regguts.h
36 * Copyright (c) 1998 Henry Spencer. All rights reserved.
38 * Development of this software was funded, in part, by Cray Research Inc.,
39 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
40 * Corporation, none of whom are responsible for the results. The author
43 * Redistribution and use in source and binary forms -- with or without
44 * modification -- are permitted for any purpose, provided that
45 * redistributions in source form retain this entire copyright notice and
46 * indicate the origin and nature of any modifications.
48 * I'd appreciate being given credit for this package in the documentation
49 * of software which uses it, but that is not a requirement.
51 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
53 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
54 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
55 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
56 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
57 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
58 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
59 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
60 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62 * *** NOTE: this code has been altered slightly for use in Tcl: ***
63 * *** 1. Names have been changed, e.g. from re_comp to ***
64 * *** TclRegComp, to avoid clashes with other ***
65 * *** regexp implementations used by applications. ***
69 * Thread local storage used to maintain a per-thread cache of compiled
70 * regular expressions.
73 #define NUM_REGEXPS 30
75 typedef struct ThreadSpecificData {
76 int initialized; /* Set to 1 when the module is initialized. */
77 char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
78 * regular expression patterns. NULL
79 * means that this slot isn't used.
81 int patLengths[NUM_REGEXPS];/* Number of non-null characters in
82 * corresponding entry in patterns.
83 * -1 means entry isn't used. */
84 struct TclRegexp *regexps[NUM_REGEXPS];
85 /* Compiled forms of above strings. Also
86 * malloc-ed, or NULL if not in use yet. */
89 #if !defined(__SYMBIAN32__) || !defined(__WINSCW__)
90 static Tcl_ThreadDataKey dataKey;
94 * Declarations for functions used only in this file.
97 static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
98 CONST char *pattern, int length, int flags));
99 static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
101 static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
102 static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
103 static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
104 static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
105 Tcl_RegExp re, CONST Tcl_UniChar *uniString,
106 int numChars, int nmatches, int flags));
107 static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
111 * The regular expression Tcl object type. This serves as a cache
112 * of the compiled form of the regular expression.
115 static Tcl_ObjType tclRegexpType = {
117 FreeRegexpInternalRep, /* freeIntRepProc */
118 DupRegexpInternalRep, /* dupIntRepProc */
119 NULL, /* updateStringProc */
120 SetRegexpFromAny /* setFromAnyProc */
125 *----------------------------------------------------------------------
127 * Tcl_RegExpCompile --
129 * Compile a regular expression into a form suitable for fast
130 * matching. This procedure is DEPRECATED in favor of the
131 * object version of the command.
134 * The return value is a pointer to the compiled form of string,
135 * suitable for passing to Tcl_RegExpExec. This compiled form
136 * is only valid up until the next call to this procedure, so
137 * don't keep these around for a long time! If an error occurred
138 * while compiling the pattern, then NULL is returned and an error
139 * message is left in the interp's result.
142 * Updates the cache of compiled regexps.
144 *----------------------------------------------------------------------
148 Tcl_RegExpCompile(interp, string)
149 Tcl_Interp *interp; /* For use in error reporting and
150 * to access the interp regexp cache. */
151 CONST char *string; /* String for which to produce
152 * compiled regular expression. */
154 return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
159 *----------------------------------------------------------------------
163 * Execute the regular expression matcher using a compiled form
164 * of a regular expression and save information about any match
168 * If an error occurs during the matching operation then -1
169 * is returned and the interp's result contains an error message.
170 * Otherwise the return value is 1 if a matching range is
171 * found and 0 if there is no matching range.
176 *----------------------------------------------------------------------
180 Tcl_RegExpExec(interp, re, string, start)
181 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
182 Tcl_RegExp re; /* Compiled regular expression; must have
183 * been returned by previous call to
184 * Tcl_GetRegExpFromObj. */
185 CONST char *string; /* String against which to match re. */
186 CONST char *start; /* If string is part of a larger string,
187 * this identifies beginning of larger
188 * string, so that "^" won't match. */
190 int flags, result, numChars;
191 TclRegexp *regexp = (TclRegexp *)re;
193 CONST Tcl_UniChar *ustr;
196 * If the starting point is offset from the beginning of the buffer,
197 * then we need to tell the regexp engine not to match "^".
200 if (string > start) {
207 * Remember the string for use by Tcl_RegExpRange().
210 regexp->string = string;
211 regexp->objPtr = NULL;
214 * Convert the string to Unicode and perform the match.
217 Tcl_DStringInit(&ds);
218 ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
219 numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
220 result = RegExpExecUniChar(interp, re, ustr, numChars,
221 -1 /* nmatches */, flags);
222 Tcl_DStringFree(&ds);
228 *---------------------------------------------------------------------------
232 * Returns pointers describing the range of a regular expression match,
233 * or one of the subranges within the match.
236 * The variables at *startPtr and *endPtr are modified to hold the
237 * addresses of the endpoints of the range given by index. If the
238 * specified range doesn't exist then NULLs are returned.
243 *---------------------------------------------------------------------------
247 Tcl_RegExpRange(re, index, startPtr, endPtr)
248 Tcl_RegExp re; /* Compiled regular expression that has
249 * been passed to Tcl_RegExpExec. */
250 int index; /* 0 means give the range of the entire
251 * match, > 0 means give the range of
252 * a matching subrange. */
253 CONST char **startPtr; /* Store address of first character in
254 * (sub-) range here. */
255 CONST char **endPtr; /* Store address of character just after last
256 * in (sub-) range here. */
258 TclRegexp *regexpPtr = (TclRegexp *) re;
261 if ((size_t) index > regexpPtr->re.re_nsub) {
262 *startPtr = *endPtr = NULL;
263 } else if (regexpPtr->matches[index].rm_so < 0) {
264 *startPtr = *endPtr = NULL;
266 if (regexpPtr->objPtr) {
267 string = Tcl_GetString(regexpPtr->objPtr);
269 string = regexpPtr->string;
271 *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
272 *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
277 *---------------------------------------------------------------------------
279 * RegExpExecUniChar --
281 * Execute the regular expression matcher using a compiled form of a
282 * regular expression and save information about any match that is
286 * If an error occurs during the matching operation then -1 is
287 * returned and an error message is left in interp's result.
288 * Otherwise the return value is 1 if a matching range was found or
289 * 0 if there was no matching range.
294 *----------------------------------------------------------------------
298 RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
299 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
300 Tcl_RegExp re; /* Compiled regular expression; returned by
301 * a previous call to Tcl_GetRegExpFromObj */
302 CONST Tcl_UniChar *wString; /* String against which to match re. */
303 int numChars; /* Length of Tcl_UniChar string (must
305 int nmatches; /* How many subexpression matches (counting
306 * the whole match as subexpression 0) are
307 * of interest. -1 means "don't know". */
308 int flags; /* Regular expression flags. */
311 TclRegexp *regexpPtr = (TclRegexp *) re;
312 size_t last = regexpPtr->re.re_nsub + 1;
315 if (nmatches >= 0 && (size_t) nmatches < nm) {
316 nm = (size_t) nmatches;
319 status = TclReExec(®expPtr->re, wString, (size_t) numChars,
320 ®expPtr->details, nm, regexpPtr->matches, flags);
326 if (status != REG_OKAY) {
327 if (status == REG_NOMATCH) {
330 if (interp != NULL) {
331 TclRegError(interp, "error while matching regular expression: ",
340 *---------------------------------------------------------------------------
342 * TclRegExpRangeUniChar --
344 * Returns pointers describing the range of a regular expression match,
345 * or one of the subranges within the match, or the hypothetical range
346 * represented by the rm_extend field of the rm_detail_t.
349 * The variables at *startPtr and *endPtr are modified to hold the
350 * offsets of the endpoints of the range given by index. If the
351 * specified range doesn't exist then -1s are supplied.
356 *---------------------------------------------------------------------------
360 TclRegExpRangeUniChar(re, index, startPtr, endPtr)
361 Tcl_RegExp re; /* Compiled regular expression that has
362 * been passed to Tcl_RegExpExec. */
363 int index; /* 0 means give the range of the entire
364 * match, > 0 means give the range of
365 * a matching subrange, -1 means the
366 * range of the rm_extend field. */
367 int *startPtr; /* Store address of first character in
368 * (sub-) range here. */
369 int *endPtr; /* Store address of character just after last
370 * in (sub-) range here. */
372 TclRegexp *regexpPtr = (TclRegexp *) re;
374 if ((regexpPtr->flags®_EXPECT) && index == -1) {
375 *startPtr = regexpPtr->details.rm_extend.rm_so;
376 *endPtr = regexpPtr->details.rm_extend.rm_eo;
377 } else if ((size_t) index > regexpPtr->re.re_nsub) {
381 *startPtr = regexpPtr->matches[index].rm_so;
382 *endPtr = regexpPtr->matches[index].rm_eo;
387 *----------------------------------------------------------------------
391 * See if a string matches a regular expression.
394 * If an error occurs during the matching operation then -1
395 * is returned and the interp's result contains an error message.
396 * Otherwise the return value is 1 if "string" matches "pattern"
402 *----------------------------------------------------------------------
406 Tcl_RegExpMatch(interp, string, pattern)
407 Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
408 CONST char *string; /* String. */
409 CONST char *pattern; /* Regular expression to match against
414 re = Tcl_RegExpCompile(interp, pattern);
418 return Tcl_RegExpExec(interp, re, string, string);
422 *----------------------------------------------------------------------
424 * Tcl_RegExpExecObj --
426 * Execute a precompiled regexp against the given object.
429 * If an error occurs during the matching operation then -1
430 * is returned and the interp's result contains an error message.
431 * Otherwise the return value is 1 if "string" matches "pattern"
435 * Converts the object to a Unicode object.
437 *----------------------------------------------------------------------
441 Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
442 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
443 Tcl_RegExp re; /* Compiled regular expression; must have
444 * been returned by previous call to
445 * Tcl_GetRegExpFromObj. */
446 Tcl_Obj *objPtr; /* String against which to match re. */
447 int offset; /* Character index that marks where matching
449 int nmatches; /* How many subexpression matches (counting
450 * the whole match as subexpression 0) are
451 * of interest. -1 means all of them. */
452 int flags; /* Regular expression execution flags. */
454 TclRegexp *regexpPtr = (TclRegexp *) re;
459 * Save the target object so we can extract strings from it later.
462 regexpPtr->string = NULL;
463 regexpPtr->objPtr = objPtr;
465 udata = Tcl_GetUnicodeFromObj(objPtr, &length);
467 if (offset > length) {
473 return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
477 *----------------------------------------------------------------------
479 * Tcl_RegExpMatchObj --
481 * See if an object matches a regular expression.
484 * If an error occurs during the matching operation then -1
485 * is returned and the interp's result contains an error message.
486 * Otherwise the return value is 1 if "string" matches "pattern"
490 * Changes the internal rep of the pattern and string objects.
492 *----------------------------------------------------------------------
496 Tcl_RegExpMatchObj(interp, stringObj, patternObj)
497 Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
498 Tcl_Obj *stringObj; /* Object containing the String to search. */
499 Tcl_Obj *patternObj; /* Regular expression to match against
504 re = Tcl_GetRegExpFromObj(interp, patternObj,
505 TCL_REG_ADVANCED | TCL_REG_NOSUB);
509 return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
510 0 /* nmatches */, 0 /* flags */);
514 *----------------------------------------------------------------------
516 * Tcl_RegExpGetInfo --
518 * Retrieve information about the current match.
526 *----------------------------------------------------------------------
530 Tcl_RegExpGetInfo(regexp, infoPtr)
531 Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
532 Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
534 TclRegexp *regexpPtr = (TclRegexp *) regexp;
536 infoPtr->nsubs = regexpPtr->re.re_nsub;
537 infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
538 infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
542 *----------------------------------------------------------------------
544 * Tcl_GetRegExpFromObj --
546 * Compile a regular expression into a form suitable for fast
547 * matching. This procedure caches the result in a Tcl_Obj.
550 * The return value is a pointer to the compiled form of string,
551 * suitable for passing to Tcl_RegExpExec. If an error occurred
552 * while compiling the pattern, then NULL is returned and an error
553 * message is left in the interp's result.
556 * Updates the native rep of the Tcl_Obj.
558 *----------------------------------------------------------------------
562 Tcl_GetRegExpFromObj(interp, objPtr, flags)
563 Tcl_Interp *interp; /* For use in error reporting, and to access
564 * the interp regexp cache. */
565 Tcl_Obj *objPtr; /* Object whose string rep contains regular
566 * expression pattern. Internal rep will be
567 * changed to compiled form of this regular
569 int flags; /* Regular expression compilation flags. */
572 Tcl_ObjType *typePtr;
573 TclRegexp *regexpPtr;
576 typePtr = objPtr->typePtr;
577 regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
579 if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
580 pattern = Tcl_GetStringFromObj(objPtr, &length);
582 regexpPtr = CompileRegexp(interp, pattern, length, flags);
583 if (regexpPtr == NULL) {
588 * Add a reference to the regexp so it will persist even if it is
589 * pushed out of the current thread's regexp cache. This reference
590 * will be removed when the object's internal rep is freed.
593 regexpPtr->refCount++;
596 * Free the old representation and set our type.
599 if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
600 (*typePtr->freeIntRepProc)(objPtr);
602 objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
603 objPtr->typePtr = &tclRegexpType;
605 return (Tcl_RegExp) regexpPtr;
609 *----------------------------------------------------------------------
613 * Return information about a compiled regular expression.
616 * The return value is -1 for failure, 0 for success, although at
617 * the moment there's nothing that could fail. On success, a list
618 * is left in the interp's result: first element is the subexpression
619 * count, second is a list of re_info bit names.
624 *----------------------------------------------------------------------
628 TclRegAbout(interp, re)
629 Tcl_Interp *interp; /* For use in variable assignment. */
630 Tcl_RegExp re; /* The compiled regular expression. */
632 TclRegexp *regexpPtr = (TclRegexp *)re;
633 char buf[TCL_INTEGER_SPACE];
634 static struct infoname {
638 {REG_UBACKREF, "REG_UBACKREF"},
639 {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"},
640 {REG_UBOUNDS, "REG_UBOUNDS"},
641 {REG_UBRACES, "REG_UBRACES"},
642 {REG_UBSALNUM, "REG_UBSALNUM"},
643 {REG_UPBOTCH, "REG_UPBOTCH"},
644 {REG_UBBS, "REG_UBBS"},
645 {REG_UNONPOSIX, "REG_UNONPOSIX"},
646 {REG_UUNSPEC, "REG_UUNSPEC"},
647 {REG_UUNPORT, "REG_UUNPORT"},
648 {REG_ULOCALE, "REG_ULOCALE"},
649 {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
650 {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
651 {REG_USHORTEST, "REG_USHORTEST"},
654 struct infoname *inf;
657 Tcl_ResetResult(interp);
659 sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
660 Tcl_AppendElement(interp, buf);
663 * Must count bits before generating list, because we must know
664 * whether {} are needed before we start appending names.
667 for (inf = infonames; inf->bit != 0; inf++) {
668 if (regexpPtr->re.re_info&inf->bit) {
673 Tcl_AppendResult(interp, " {", NULL);
675 for (inf = infonames; inf->bit != 0; inf++) {
676 if (regexpPtr->re.re_info&inf->bit) {
677 Tcl_AppendElement(interp, inf->text);
681 Tcl_AppendResult(interp, "}", NULL);
688 *----------------------------------------------------------------------
692 * Generate an error message based on the regexp status code.
695 * Places an error in the interpreter.
698 * Sets errorCode as well.
700 *----------------------------------------------------------------------
704 TclRegError(interp, msg, status)
705 Tcl_Interp *interp; /* Interpreter for error reporting. */
706 CONST char *msg; /* Message to prepend to error. */
707 int status; /* Status code to report. */
709 char buf[100]; /* ample in practice */
710 char cbuf[100]; /* lots in practice */
714 Tcl_ResetResult(interp);
715 n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
716 p = (n > sizeof(buf)) ? "..." : "";
717 Tcl_AppendResult(interp, msg, buf, p, NULL);
719 sprintf(cbuf, "%d", status);
720 (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
721 Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
726 *----------------------------------------------------------------------
728 * FreeRegexpInternalRep --
730 * Deallocate the storage associated with a regexp object's internal
737 * Frees the compiled regular expression.
739 *----------------------------------------------------------------------
743 FreeRegexpInternalRep(objPtr)
744 Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */
746 TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
749 * If this is the last reference to the regexp, free it.
752 if (--(regexpRepPtr->refCount) <= 0) {
753 FreeRegexp(regexpRepPtr);
758 *----------------------------------------------------------------------
760 * DupRegexpInternalRep --
762 * We copy the reference to the compiled regexp and bump its
769 * Increments the reference count of the regexp.
771 *----------------------------------------------------------------------
775 DupRegexpInternalRep(srcPtr, copyPtr)
776 Tcl_Obj *srcPtr; /* Object with internal rep to copy. */
777 Tcl_Obj *copyPtr; /* Object with internal rep to set. */
779 TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
780 regexpPtr->refCount++;
781 copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
782 copyPtr->typePtr = &tclRegexpType;
786 *----------------------------------------------------------------------
788 * SetRegexpFromAny --
790 * Attempt to generate a compiled regular expression for the Tcl object
794 * The return value is TCL_OK or TCL_ERROR. If an error occurs during
795 * conversion, an error message is left in the interpreter's result
796 * unless "interp" is NULL.
799 * If no error occurs, a regular expression is stored as "objPtr"s
800 * internal representation.
802 *----------------------------------------------------------------------
806 SetRegexpFromAny(interp, objPtr)
807 Tcl_Interp *interp; /* Used for error reporting if not NULL. */
808 Tcl_Obj *objPtr; /* The object to convert. */
810 if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
817 *---------------------------------------------------------------------------
821 * Attempt to compile the given regexp pattern. If the compiled
822 * regular expression can be found in the per-thread cache, it
823 * will be used instead of compiling a new copy.
826 * The return value is a pointer to a newly allocated TclRegexp
827 * that represents the compiled pattern, or NULL if the pattern
828 * could not be compiled. If NULL is returned, an error message is
829 * left in the interp's result.
832 * The thread-local regexp cache is updated and a new TclRegexp may
835 *----------------------------------------------------------------------
839 CompileRegexp(interp, string, length, flags)
840 Tcl_Interp *interp; /* Used for error reporting if not NULL. */
841 CONST char *string; /* The regexp to compile (UTF-8). */
842 int length; /* The length of the string in bytes. */
843 int flags; /* Compilation flags. */
845 TclRegexp *regexpPtr;
846 CONST Tcl_UniChar *uniString;
848 Tcl_DString stringBuf;
850 ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
852 if (!tsdPtr->initialized) {
853 tsdPtr->initialized = 1;
854 Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
858 * This routine maintains a second-level regular expression cache in
859 * addition to the per-object regexp cache. The per-thread cache is needed
860 * to handle the case where for various reasons the object is lost between
861 * invocations of the regexp command, but the literal pattern is the same.
865 * Check the per-thread compiled regexp cache. We can only reuse
866 * a regexp if it has the same pattern and the same flags.
869 for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
870 if ((length == tsdPtr->patLengths[i])
871 && (tsdPtr->regexps[i]->flags == flags)
872 && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
874 * Move the matched pattern to the first slot in the
875 * cache and shift the other patterns down one position.
882 cachedString = tsdPtr->patterns[i];
883 regexpPtr = tsdPtr->regexps[i];
884 for (j = i-1; j >= 0; j--) {
885 tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
886 tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
887 tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
889 tsdPtr->patterns[0] = cachedString;
890 tsdPtr->patLengths[0] = length;
891 tsdPtr->regexps[0] = regexpPtr;
893 return tsdPtr->regexps[0];
898 * This is a new expression, so compile it and add it to the cache.
901 regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
902 regexpPtr->objPtr = NULL;
903 regexpPtr->string = NULL;
904 regexpPtr->details.rm_extend.rm_so = -1;
905 regexpPtr->details.rm_extend.rm_eo = -1;
908 * Get the up-to-date string representation and map to unicode.
911 Tcl_DStringInit(&stringBuf);
912 uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
913 numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
916 * Compile the string and check for errors.
919 regexpPtr->flags = flags;
920 status = TclReComp(®expPtr->re, uniString, (size_t) numChars, flags);
921 Tcl_DStringFree(&stringBuf);
923 if (status != REG_OKAY) {
925 * Clean up and report errors in the interpreter, if possible.
928 ckfree((char *)regexpPtr);
931 "couldn't compile regular expression pattern: ",
938 * Allocate enough space for all of the subexpressions, plus one
939 * extra for the entire pattern.
942 regexpPtr->matches = (regmatch_t *) ckalloc(
943 sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
946 * Initialize the refcount to one initially, since it is in the cache.
949 regexpPtr->refCount = 1;
952 * Free the last regexp, if necessary, and make room at the head of the
953 * list for the new regexp.
956 if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
957 TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
958 if (--(oldRegexpPtr->refCount) <= 0) {
959 FreeRegexp(oldRegexpPtr);
961 ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
963 for (i = NUM_REGEXPS - 2; i >= 0; i--) {
964 tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
965 tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
966 tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
968 tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
969 strcpy(tsdPtr->patterns[0], string);
970 tsdPtr->patLengths[0] = length;
971 tsdPtr->regexps[0] = regexpPtr;
977 *----------------------------------------------------------------------
981 * Release the storage associated with a TclRegexp.
989 *----------------------------------------------------------------------
993 FreeRegexp(regexpPtr)
994 TclRegexp *regexpPtr; /* Compiled regular expression to free. */
996 TclReFree(®expPtr->re);
997 if (regexpPtr->matches) {
998 ckfree((char *) regexpPtr->matches);
1000 ckfree((char *) regexpPtr);
1004 *----------------------------------------------------------------------
1008 * Release the storage associated with the per-thread regexp
1017 *----------------------------------------------------------------------
1021 FinalizeRegexp(clientData)
1022 ClientData clientData; /* Not used. */
1025 TclRegexp *regexpPtr;
1026 ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1028 for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1029 regexpPtr = tsdPtr->regexps[i];
1030 if (--(regexpPtr->refCount) <= 0) {
1031 FreeRegexp(regexpPtr);
1033 ckfree(tsdPtr->patterns[i]);
1034 tsdPtr->patterns[i] = NULL;
1037 * We may find ourselves reinitialized if another finalization routine
1040 tsdPtr->initialized = 0;