Symaptic: os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/regcomp.c@260cb5ec6c19

     1 /*

     2  * re_*comp and friends - compile REs

     3  * This file #includes several others (see the bottom).

     4  *

     5  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.

     6  *

     7  * Development of this software was funded, in part, by Cray Research Inc.,

     8  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics

     9  * Corporation, none of whom are responsible for the results.  The author

    10  * thanks all of them.

    11  *

    12  * Redistribution and use in source and binary forms -- with or without

    13  * modification -- are permitted for any purpose, provided that

    14  * redistributions in source form retain this entire copyright notice and

    15  * indicate the origin and nature of any modifications.

    16  *

    17  * I'd appreciate being given credit for this package in the documentation

    18  * of software which uses it, but that is not a requirement.

    19  *

    20  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,

    21  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY

    22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL

    23  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

    24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

    25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

    26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,

    27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

    28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF

    29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    30  *

    31  */

    33 #include "regguts.h"

    35 /*

    36  * forward declarations, up here so forward datatypes etc. are defined early

    37  */

    38 /* =====^!^===== begin forwards =====^!^===== */

    39 /* automatically gathered by fwd; do not hand-edit */

    40 /* === regcomp.c === */

    41 int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));

    42 static VOID moresubs _ANSI_ARGS_((struct vars *, int));

    43 static int freev _ANSI_ARGS_((struct vars *, int));

    44 static VOID makesearch _ANSI_ARGS_((struct vars *, struct nfa *));

    45 static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));

    46 static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));

    47 static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *));

    48 static VOID nonword _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));

    49 static VOID word _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));

    50 static int scannum _ANSI_ARGS_((struct vars *));

    51 static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int));

    52 static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *));

    53 static VOID cbracket _ANSI_ARGS_((struct vars *, struct state *, struct state *));

    54 static VOID brackpart _ANSI_ARGS_((struct vars *, struct state *, struct state *));

    55 static chr *scanplain _ANSI_ARGS_((struct vars *));

    56 static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *));

    57 static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));

    58 static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *));

    59 static celt nextleader _ANSI_ARGS_((struct vars *, pchr, pchr));

    60 static VOID wordchrs _ANSI_ARGS_((struct vars *));

    61 static struct subre *subre _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));

    62 static VOID freesubre _ANSI_ARGS_((struct vars *, struct subre *));

    63 static VOID freesrnode _ANSI_ARGS_((struct vars *, struct subre *));

    64 static VOID optst _ANSI_ARGS_((struct vars *, struct subre *));

    65 static int numst _ANSI_ARGS_((struct subre *, int));

    66 static VOID markst _ANSI_ARGS_((struct subre *));

    67 static VOID cleanst _ANSI_ARGS_((struct vars *));

    68 static long nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *));

    69 static long nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));

    70 static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int));

    71 static VOID freelacons _ANSI_ARGS_((struct subre *, int));

    72 static VOID rfree _ANSI_ARGS_((regex_t *));

    73 static VOID dump _ANSI_ARGS_((regex_t *, FILE *));

    74 static VOID dumpst _ANSI_ARGS_((struct subre *, FILE *, int));

    75 static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int));

    76 static char *stid _ANSI_ARGS_((struct subre *, char *, size_t));

    77 /* === regc_lex.c === */

    78 static VOID lexstart _ANSI_ARGS_((struct vars *));

    79 static VOID prefixes _ANSI_ARGS_((struct vars *));

    80 static VOID lexnest _ANSI_ARGS_((struct vars *, chr *, chr *));

    81 static VOID lexword _ANSI_ARGS_((struct vars *));

    82 static int next _ANSI_ARGS_((struct vars *));

    83 static int lexescape _ANSI_ARGS_((struct vars *));

    84 static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int));

    85 static int brenext _ANSI_ARGS_((struct vars *, pchr));

    86 static VOID skip _ANSI_ARGS_((struct vars *));

    87 static chr newline _ANSI_ARGS_((NOPARMS));

    88 #ifdef REG_DEBUG

    89 static chr *ch _ANSI_ARGS_((NOPARMS));

    90 #endif

    91 static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr));

    92 /* === regc_color.c === */

    93 static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *));

    94 static VOID freecm _ANSI_ARGS_((struct colormap *));

    95 static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int));

    96 static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor));

    97 static color maxcolor _ANSI_ARGS_((struct colormap *));

    98 static color newcolor _ANSI_ARGS_((struct colormap *));

    99 static VOID freecolor _ANSI_ARGS_((struct colormap *, pcolor));

   100 static color pseudocolor _ANSI_ARGS_((struct colormap *));

   101 static color subcolor _ANSI_ARGS_((struct colormap *, pchr c));

   102 static color newsub _ANSI_ARGS_((struct colormap *, pcolor));

   103 static VOID subrange _ANSI_ARGS_((struct vars *, pchr, pchr, struct state *, struct state *));

   104 static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));

   105 static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *));

   106 static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *));

   107 static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *));

   108 static int singleton _ANSI_ARGS_((struct colormap *, pchr c));

   109 static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *));

   110 static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *));

   111 #ifdef REG_DEBUG

   112 static VOID dumpcolors _ANSI_ARGS_((struct colormap *, FILE *));

   113 static VOID fillcheck _ANSI_ARGS_((struct colormap *, union tree *, int, FILE *));

   114 static VOID dumpchr _ANSI_ARGS_((pchr, FILE *));

   115 #endif

   116 /* === regc_nfa.c === */

   117 static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *));

   118 static VOID freenfa _ANSI_ARGS_((struct nfa *));

   119 static struct state *newstate _ANSI_ARGS_((struct nfa *));

   120 static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag));

   121 static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *));

   122 static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *));

   123 static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *));

   124 static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *));

   125 static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *));

   126 static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *));

   127 static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor));

   128 static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *));

   129 static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));

   130 static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));

   131 static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));

   132 static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));

   133 static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int));

   134 static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *));

   135 static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));

   136 static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, struct state *));

   137 static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));

   138 static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *));

   139 static VOID specialcolors _ANSI_ARGS_((struct nfa *));

   140 static long optimize _ANSI_ARGS_((struct nfa *, FILE *));

   141 static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *));

   142 static int pull _ANSI_ARGS_((struct nfa *, struct arc *));

   143 static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *));

   144 static int push _ANSI_ARGS_((struct nfa *, struct arc *));

   145 #define	INCOMPATIBLE	1	/* destroys arc */

   146 #define	SATISFIED	2	/* constraint satisfied */

   147 #define	COMPATIBLE	3	/* compatible but not satisfied yet */

   148 static int combine _ANSI_ARGS_((struct arc *, struct arc *));

   149 static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *));

   150 static int unempty _ANSI_ARGS_((struct nfa *, struct arc *));

   151 static VOID cleanup _ANSI_ARGS_((struct nfa *));

   152 static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));

   153 static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));

   154 static long analyze _ANSI_ARGS_((struct nfa *));

   155 static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *));

   156 static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *));

   157 static VOID freecnfa _ANSI_ARGS_((struct cnfa *));

   158 static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *));

   159 #ifdef REG_DEBUG

   160 static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *));

   161 static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *));

   162 static int dumprarcs _ANSI_ARGS_((struct arc *, struct state *, FILE *, int));

   163 static VOID dumparc _ANSI_ARGS_((struct arc *, struct state *, FILE *));

   164 #endif

   165 static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *));

   166 #ifdef REG_DEBUG

   167 static VOID dumpcstate _ANSI_ARGS_((int, struct carc *, struct cnfa *, FILE *));

   168 #endif

   169 /* === regc_cvec.c === */

   170 static struct cvec *newcvec _ANSI_ARGS_((int, int, int));

   171 static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *));

   172 static VOID addchr _ANSI_ARGS_((struct cvec *, pchr));

   173 static VOID addrange _ANSI_ARGS_((struct cvec *, pchr, pchr));

   174 static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *));

   175 static int haschr _ANSI_ARGS_((struct cvec *, pchr));

   176 static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int, int));

   177 static VOID freecvec _ANSI_ARGS_((struct cvec *));

   178 /* === regc_locale.c === */

   179 static int nmcces _ANSI_ARGS_((struct vars *));

   180 static int nleaders _ANSI_ARGS_((struct vars *));

   181 static struct cvec *allmcces _ANSI_ARGS_((struct vars *, struct cvec *));

   182 static celt element _ANSI_ARGS_((struct vars *, chr *, chr *));

   183 static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int));

   184 static int before _ANSI_ARGS_((celt, celt));

   185 static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int));

   186 static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int));

   187 static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr));

   188 static int cmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));

   189 static int casecmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));

   190 /* automatically gathered by fwd; do not hand-edit */

   191 /* =====^!^===== end forwards =====^!^===== */

   195 /* internal variables, bundled for easy passing around */

   196 struct vars {

   197 	regex_t *re;

   198 	chr *now;		/* scan pointer into string */

   199 	chr *stop;		/* end of string */

   200 	chr *savenow;		/* saved now and stop for "subroutine call" */

   201 	chr *savestop;

   202 	int err;		/* error code (0 if none) */

   203 	int cflags;		/* copy of compile flags */

   204 	int lasttype;		/* type of previous token */

   205 	int nexttype;		/* type of next token */

   206 	chr nextvalue;		/* value (if any) of next token */

   207 	int lexcon;		/* lexical context type (see lex.c) */

   208 	int nsubexp;		/* subexpression count */

   209 	struct subre **subs;	/* subRE pointer vector */

   210 	size_t nsubs;		/* length of vector */

   211 	struct subre *sub10[10];	/* initial vector, enough for most */

   212 	struct nfa *nfa;	/* the NFA */

   213 	struct colormap *cm;	/* character color map */

   214 	color nlcolor;		/* color of newline */

   215 	struct state *wordchrs;	/* state in nfa holding word-char outarcs */

   216 	struct subre *tree;	/* subexpression tree */

   217 	struct subre *treechain;	/* all tree nodes allocated */

   218 	struct subre *treefree;		/* any free tree nodes */

   219 	int ntree;		/* number of tree nodes */

   220 	struct cvec *cv;	/* interface cvec */

   221 	struct cvec *cv2;	/* utility cvec */

   222 	struct cvec *mcces;	/* collating-element information */

   223 #		define	ISCELEADER(v,c)	(v->mcces != NULL && haschr(v->mcces, (c)))

   224 	struct state *mccepbegin;	/* in nfa, start of MCCE prototypes */

   225 	struct state *mccepend;	/* in nfa, end of MCCE prototypes */

   226 	struct subre *lacons;	/* lookahead-constraint vector */

   227 	int nlacons;		/* size of lacons */

   228 };

   230 /* parsing macros; most know that `v' is the struct vars pointer */

   231 #define	NEXT()	(next(v))		/* advance by one token */

   232 #define	SEE(t)	(v->nexttype == (t))	/* is next token this? */

   233 #define	EAT(t)	(SEE(t) && next(v))	/* if next is this, swallow it */

   234 #define	VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */

   235 #define	ISERR()	VISERR(v)

   236 #define	VERR(vv,e)	((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\

   237 							((vv)->err = (e)))

   238 #define	ERR(e)	VERR(v, e)		/* record an error */

   239 #define	NOERR()	{if (ISERR()) return;}	/* if error seen, return */

   240 #define	NOERRN()	{if (ISERR()) return NULL;}	/* NOERR with retval */

   241 #define	NOERRZ()	{if (ISERR()) return 0;}	/* NOERR with retval */

   242 #define	INSIST(c, e)	((c) ? 0 : ERR(e))	/* if condition false, error */

   243 #define	NOTE(b)	(v->re->re_info |= (b))		/* note visible condition */

   244 #define	EMPTYARC(x, y)	newarc(v->nfa, EMPTY, 0, x, y)

   246 /* token type codes, some also used as NFA arc types */

   247 #define	EMPTY	'n'		/* no token present */

   248 #define	EOS	'e'		/* end of string */

   249 #define	PLAIN	'p'		/* ordinary character */

   250 #define	DIGIT	'd'		/* digit (in bound) */

   251 #define	BACKREF	'b'		/* back reference */

   252 #define	COLLEL	'I'		/* start of [. */

   253 #define	ECLASS	'E'		/* start of [= */

   254 #define	CCLASS	'C'		/* start of [: */

   255 #define	END	'X'		/* end of [. [= [: */

   256 #define	RANGE	'R'		/* - within [] which might be range delim. */

   257 #define	LACON	'L'		/* lookahead constraint subRE */

   258 #define	AHEAD	'a'		/* color-lookahead arc */

   259 #define	BEHIND	'r'		/* color-lookbehind arc */

   260 #define	WBDRY	'w'		/* word boundary constraint */

   261 #define	NWBDRY	'W'		/* non-word-boundary constraint */

   262 #define	SBEGIN	'A'		/* beginning of string (even if not BOL) */

   263 #define	SEND	'Z'		/* end of string (even if not EOL) */

   264 #define	PREFER	'P'		/* length preference */

   266 /* is an arc colored, and hence on a color chain? */

   267 #define	COLORED(a)	((a)->type == PLAIN || (a)->type == AHEAD || \

   268 							(a)->type == BEHIND)

   272 /* static function list */

   273 static struct fns functions = {

   274 	rfree,			/* regfree insides */

   275 };

   279 /*

   280  - compile - compile regular expression

   281  ^ int compile(regex_t *, CONST chr *, size_t, int);

   282  */

   283 int

   284 compile(re, string, len, flags)

   285 regex_t *re;

   286 CONST chr *string;

   287 size_t len;

   288 int flags;

   289 {

   290 	struct vars var;

   291 	struct vars *v = &var;

   292 	struct guts *g;

   293 	int i;

   294 	size_t j;

   295 	FILE *debug = (flags&REG_PROGRESS) ? stdout : (FILE *)NULL;

   296 #	define	CNOERR()	{ if (ISERR()) return freev(v, v->err); }

   298 	/* sanity checks */

   300 	if (re == NULL || string == NULL)

   301 		return REG_INVARG;

   302 	if ((flags&REG_QUOTE) &&

   303 			(flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)))

   304 		return REG_INVARG;

   305 	if (!(flags&REG_EXTENDED) && (flags&REG_ADVF))

   306 		return REG_INVARG;

   308 	/* initial setup (after which freev() is callable) */

   309 	v->re = re;

   310 	v->now = (chr *)string;

   311 	v->stop = v->now + len;

   312 	v->savenow = v->savestop = NULL;

   313 	v->err = 0;

   314 	v->cflags = flags;

   315 	v->nsubexp = 0;

   316 	v->subs = v->sub10;

   317 	v->nsubs = 10;

   318 	for (j = 0; j < v->nsubs; j++)

   319 		v->subs[j] = NULL;

   320 	v->nfa = NULL;

   321 	v->cm = NULL;

   322 	v->nlcolor = COLORLESS;

   323 	v->wordchrs = NULL;

   324 	v->tree = NULL;

   325 	v->treechain = NULL;

   326 	v->treefree = NULL;

   327 	v->cv = NULL;

   328 	v->cv2 = NULL;

   329 	v->mcces = NULL;

   330 	v->lacons = NULL;

   331 	v->nlacons = 0;

   332 	re->re_magic = REMAGIC;

   333 	re->re_info = 0;		/* bits get set during parse */

   334 	re->re_csize = sizeof(chr);

   335 	re->re_guts = NULL;

   336 	re->re_fns = VS(&functions);

   338 	/* more complex setup, malloced things */

   339 	re->re_guts = VS(MALLOC(sizeof(struct guts)));

   340 	if (re->re_guts == NULL)

   341 		return freev(v, REG_ESPACE);

   342 	g = (struct guts *)re->re_guts;

   343 	g->tree = NULL;

   344 	initcm(v, &g->cmap);

   345 	v->cm = &g->cmap;

   346 	g->lacons = NULL;

   347 	g->nlacons = 0;

   348 	ZAPCNFA(g->search);

   349 	v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);

   350 	CNOERR();

   351 	v->cv = newcvec(100, 20, 10);

   352 	if (v->cv == NULL)

   353 		return freev(v, REG_ESPACE);

   354 	i = nmcces(v);

   355 	if (i > 0) {

   356 		v->mcces = newcvec(nleaders(v), 0, i);

   357 		CNOERR();

   358 		v->mcces = allmcces(v, v->mcces);

   359 		leaders(v, v->mcces);

   360 		addmcce(v->mcces, (chr *)NULL, (chr *)NULL);	/* dummy */

   361 	}

   362 	CNOERR();

   364 	/* parsing */

   365 	lexstart(v);			/* also handles prefixes */

   366 	if ((v->cflags&REG_NLSTOP) || (v->cflags&REG_NLANCH)) {

   367 		/* assign newline a unique color */

   368 		v->nlcolor = subcolor(v->cm, newline());

   369 		okcolors(v->nfa, v->cm);

   370 	}

   371 	CNOERR();

   372 	v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);

   373 	assert(SEE(EOS));		/* even if error; ISERR() => SEE(EOS) */

   374 	CNOERR();

   375 	assert(v->tree != NULL);

   377 	/* finish setup of nfa and its subre tree */

   378 	specialcolors(v->nfa);

   379 	CNOERR();

   380 	if (debug != NULL) {

   381 		fprintf(debug, "\n\n\n========= RAW ==========\n");

   382 		dumpnfa(v->nfa, debug);

   383 		dumpst(v->tree, debug, 1);

   384 	}

   385 	optst(v, v->tree);

   386 	v->ntree = numst(v->tree, 1);

   387 	markst(v->tree);

   388 	cleanst(v);

   389 	if (debug != NULL) {

   390 		fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");

   391 		dumpst(v->tree, debug, 1);

   392 	}

   394 	/* build compacted NFAs for tree and lacons */

   395 	re->re_info |= nfatree(v, v->tree, debug);

   396 	CNOERR();

   397 	assert(v->nlacons == 0 || v->lacons != NULL);

   398 	for (i = 1; i < v->nlacons; i++) {

   399 		if (debug != NULL)

   400 			fprintf(debug, "\n\n\n========= LA%d ==========\n", i);

   401 		nfanode(v, &v->lacons[i], debug);

   402 	}

   403 	CNOERR();

   404 	if (v->tree->flags&SHORTER)

   405 		NOTE(REG_USHORTEST);

   407 	/* build compacted NFAs for tree, lacons, fast search */

   408 	if (debug != NULL)

   409 		fprintf(debug, "\n\n\n========= SEARCH ==========\n");

   410 	/* can sacrifice main NFA now, so use it as work area */

   411 	(DISCARD)optimize(v->nfa, debug);

   412 	CNOERR();

   413 	makesearch(v, v->nfa);

   414 	CNOERR();

   415 	compact(v->nfa, &g->search);

   416 	CNOERR();

   418 	/* looks okay, package it up */

   419 	re->re_nsub = v->nsubexp;

   420 	v->re = NULL;			/* freev no longer frees re */

   421 	g->magic = GUTSMAGIC;

   422 	g->cflags = v->cflags;

   423 	g->info = re->re_info;

   424 	g->nsub = re->re_nsub;

   425 	g->tree = v->tree;

   426 	v->tree = NULL;

   427 	g->ntree = v->ntree;

   428 	g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;

   429 	g->lacons = v->lacons;

   430 	v->lacons = NULL;

   431 	g->nlacons = v->nlacons;

   433 	if (flags&REG_DUMP)

   434 		dump(re, stdout);

   436 	assert(v->err == 0);

   437 	return freev(v, 0);

   438 }

   440 /*

   441  - moresubs - enlarge subRE vector

   442  ^ static VOID moresubs(struct vars *, int);

   443  */

   444 static VOID

   445 moresubs(v, wanted)

   446 struct vars *v;

   447 int wanted;			/* want enough room for this one */

   448 {

   449 	struct subre **p;

   450 	size_t n;

   452 	assert(wanted > 0 && (size_t)wanted >= v->nsubs);

   453 	n = (size_t)wanted * 3 / 2 + 1;

   454 	if (v->subs == v->sub10) {

   455 		p = (struct subre **)MALLOC(n * sizeof(struct subre *));

   456 		if (p != NULL)

   457 			memcpy(VS(p), VS(v->subs),

   458 					v->nsubs * sizeof(struct subre *));

   459 	} else

   460 		p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *));

   461 	if (p == NULL) {

   462 		ERR(REG_ESPACE);

   463 		return;

   464 	}

   465 	v->subs = p;

   466 	for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)

   467 		*p = NULL;

   468 	assert(v->nsubs == n);

   469 	assert((size_t)wanted < v->nsubs);

   470 }

   472 /*

   473  - freev - free vars struct's substructures where necessary

   474  * Optionally does error-number setting, and always returns error code

   475  * (if any), to make error-handling code terser.

   476  ^ static int freev(struct vars *, int);

   477  */

   478 static int

   479 freev(v, err)

   480 struct vars *v;

   481 int err;

   482 {

   483 	if (v->re != NULL)

   484 		rfree(v->re);

   485 	if (v->subs != v->sub10)

   486 		FREE(v->subs);

   487 	if (v->nfa != NULL)

   488 		freenfa(v->nfa);

   489 	if (v->tree != NULL)

   490 		freesubre(v, v->tree);

   491 	if (v->treechain != NULL)

   492 		cleanst(v);

   493 	if (v->cv != NULL)

   494 		freecvec(v->cv);

   495 	if (v->cv2 != NULL)

   496 		freecvec(v->cv2);

   497 	if (v->mcces != NULL)

   498 		freecvec(v->mcces);

   499 	if (v->lacons != NULL)

   500 		freelacons(v->lacons, v->nlacons);

   501 	ERR(err);			/* nop if err==0 */

   503 	return v->err;

   504 }

   506 /*

   507  - makesearch - turn an NFA into a search NFA (implicit prepend of .*?)

   508  * NFA must have been optimize()d already.

   509  ^ static VOID makesearch(struct vars *, struct nfa *);

   510  */

   511 static VOID

   512 makesearch(v, nfa)

   513 struct vars *v;

   514 struct nfa *nfa;

   515 {

   516 	struct arc *a;

   517 	struct arc *b;

   518 	struct state *pre = nfa->pre;

   519 	struct state *s;

   520 	struct state *s2;

   521 	struct state *slist;

   523 	/* no loops are needed if it's anchored */

   524 	for (a = pre->outs; a != NULL; a = a->outchain) {

   525 		assert(a->type == PLAIN);

   526 		if (a->co != nfa->bos[0] && a->co != nfa->bos[1])

   527 			break;

   528 	}

   529 	if (a != NULL) {

   530 		/* add implicit .* in front */

   531 		rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);

   533 		/* and ^* and \A* too -- not always necessary, but harmless */

   534 		newarc(nfa, PLAIN, nfa->bos[0], pre, pre);

   535 		newarc(nfa, PLAIN, nfa->bos[1], pre, pre);

   536 	}

   538 	/*

   539 	 * Now here's the subtle part.  Because many REs have no lookback

   540 	 * constraints, often knowing when you were in the pre state tells

   541 	 * you little; it's the next state(s) that are informative.  But

   542 	 * some of them may have other inarcs, i.e. it may be possible to

   543 	 * make actual progress and then return to one of them.  We must

   544 	 * de-optimize such cases, splitting each such state into progress

   545 	 * and no-progress states.

   546 	 */

   548 	/* first, make a list of the states */

   549 	slist = NULL;

   550 	for (a = pre->outs; a != NULL; a = a->outchain) {

   551 		s = a->to;

   552 		for (b = s->ins; b != NULL; b = b->inchain)

   553 			if (b->from != pre)

   554 				break;

   555 		if (b != NULL) {		/* must be split */

   556 			if (s->tmp == NULL) {  /* if not already in the list */

   557 			                       /* (fixes bugs 505048, 230589, */

   558 			                       /* 840258, 504785) */

   559 				s->tmp = slist;

   560 				slist = s;

   561 			}

   562 		}

   563 	}

   565 	/* do the splits */

   566 	for (s = slist; s != NULL; s = s2) {

   567 		s2 = newstate(nfa);

   568 		copyouts(nfa, s, s2);

   569 		for (a = s->ins; a != NULL; a = b) {

   570 			b = a->inchain;

   571 			if (a->from != pre) {

   572 				cparc(nfa, a, a->from, s2);

   573 				freearc(nfa, a);

   574 			}

   575 		}

   576 		s2 = s->tmp;

   577 		s->tmp = NULL;		/* clean up while we're at it */

   578 	}

   579 }

   581 /*

   582  - parse - parse an RE

   583  * This is actually just the top level, which parses a bunch of branches

   584  * tied together with '|'.  They appear in the tree as the left children

   585  * of a chain of '|' subres.

   586  ^ static struct subre *parse(struct vars *, int, int, struct state *,

   587  ^ 	struct state *);

   588  */

   589 static struct subre *

   590 parse(v, stopper, type, init, final)

   591 struct vars *v;

   592 int stopper;			/* EOS or ')' */

   593 int type;			/* LACON (lookahead subRE) or PLAIN */

   594 struct state *init;		/* initial state */

   595 struct state *final;		/* final state */

   596 {

   597 	struct state *left;	/* scaffolding for branch */

   598 	struct state *right;

   599 	struct subre *branches;	/* top level */

   600 	struct subre *branch;	/* current branch */

   601 	struct subre *t;	/* temporary */

   602 	int firstbranch;	/* is this the first branch? */

   604 	assert(stopper == ')' || stopper == EOS);

   606 	branches = subre(v, '|', LONGER, init, final);

   607 	NOERRN();

   608 	branch = branches;

   609 	firstbranch = 1;

   610 	do {	/* a branch */

   611 		if (!firstbranch) {

   612 			/* need a place to hang it */

   613 			branch->right = subre(v, '|', LONGER, init, final);

   614 			NOERRN();

   615 			branch = branch->right;

   616 		}

   617 		firstbranch = 0;

   618 		left = newstate(v->nfa);

   619 		right = newstate(v->nfa);

   620 		NOERRN();

   621 		EMPTYARC(init, left);

   622 		EMPTYARC(right, final);

   623 		NOERRN();

   624 		branch->left = parsebranch(v, stopper, type, left, right, 0);

   625 		NOERRN();

   626 		branch->flags |= UP(branch->flags | branch->left->flags);

   627 		if ((branch->flags &~ branches->flags) != 0)	/* new flags */

   628 			for (t = branches; t != branch; t = t->right)

   629 				t->flags |= branch->flags;

   630 	} while (EAT('|'));

   631 	assert(SEE(stopper) || SEE(EOS));

   633 	if (!SEE(stopper)) {

   634 		assert(stopper == ')' && SEE(EOS));

   635 		ERR(REG_EPAREN);

   636 	}

   638 	/* optimize out simple cases */

   639 	if (branch == branches) {	/* only one branch */

   640 		assert(branch->right == NULL);

   641 		t = branch->left;

   642 		branch->left = NULL;

   643 		freesubre(v, branches);

   644 		branches = t;

   645 	} else if (!MESSY(branches->flags)) {	/* no interesting innards */

   646 		freesubre(v, branches->left);

   647 		branches->left = NULL;

   648 		freesubre(v, branches->right);

   649 		branches->right = NULL;

   650 		branches->op = '=';

   651 	}

   653 	return branches;

   654 }

   656 /*

   657  - parsebranch - parse one branch of an RE

   658  * This mostly manages concatenation, working closely with parseqatom().

   659  * Concatenated things are bundled up as much as possible, with separate

   660  * ',' nodes introduced only when necessary due to substructure.

   661  ^ static struct subre *parsebranch(struct vars *, int, int, struct state *,

   662  ^ 	struct state *, int);

   663  */

   664 static struct subre *

   665 parsebranch(v, stopper, type, left, right, partial)

   666 struct vars *v;

   667 int stopper;			/* EOS or ')' */

   668 int type;			/* LACON (lookahead subRE) or PLAIN */

   669 struct state *left;		/* leftmost state */

   670 struct state *right;		/* rightmost state */

   671 int partial;			/* is this only part of a branch? */

   672 {

   673 	struct state *lp;	/* left end of current construct */

   674 	int seencontent;	/* is there anything in this branch yet? */

   675 	struct subre *t;

   677 	lp = left;

   678 	seencontent = 0;

   679 	t = subre(v, '=', 0, left, right);	/* op '=' is tentative */

   680 	NOERRN();

   681 	while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {

   682 		if (seencontent) {	/* implicit concat operator */

   683 			lp = newstate(v->nfa);

   684 			NOERRN();

   685 			moveins(v->nfa, right, lp);

   686 		}

   687 		seencontent = 1;

   689 		/* NB, recursion in parseqatom() may swallow rest of branch */

   690 		parseqatom(v, stopper, type, lp, right, t);

   691 	}

   693 	if (!seencontent) {		/* empty branch */

   694 		if (!partial)

   695 			NOTE(REG_UUNSPEC);

   696 		assert(lp == left);

   697 		EMPTYARC(left, right);

   698 	}

   700 	return t;

   701 }

   703 /*

   704  - parseqatom - parse one quantified atom or constraint of an RE

   705  * The bookkeeping near the end cooperates very closely with parsebranch();

   706  * in particular, it contains a recursion that can involve parsing the rest

   707  * of the branch, making this function's name somewhat inaccurate.

   708  ^ static VOID parseqatom(struct vars *, int, int, struct state *,

   709  ^ 	struct state *, struct subre *);

   710  */

   711 static VOID

   712 parseqatom(v, stopper, type, lp, rp, top)

   713 struct vars *v;

   714 int stopper;			/* EOS or ')' */

   715 int type;			/* LACON (lookahead subRE) or PLAIN */

   716 struct state *lp;		/* left state to hang it on */

   717 struct state *rp;		/* right state to hang it on */

   718 struct subre *top;		/* subtree top */

   719 {

   720 	struct state *s;	/* temporaries for new states */

   721 	struct state *s2;

   722 #	define	ARCV(t, val)	newarc(v->nfa, t, val, lp, rp)

   723 	int m, n;

   724 	struct subre *atom;	/* atom's subtree */

   725 	struct subre *t;

   726 	int cap;		/* capturing parens? */

   727 	int pos;		/* positive lookahead? */

   728 	int subno;		/* capturing-parens or backref number */

   729 	int atomtype;

   730 	int qprefer;		/* quantifier short/long preference */

   731 	int f;

   732 	struct subre **atomp;	/* where the pointer to atom is */

   734 	/* initial bookkeeping */

   735 	atom = NULL;

   736 	assert(lp->nouts == 0);	/* must string new code */

   737 	assert(rp->nins == 0);	/*  between lp and rp */

   738 	subno = 0;		/* just to shut lint up */

   740 	/* an atom or constraint... */

   741 	atomtype = v->nexttype;

   742 	switch (atomtype) {

   743 	/* first, constraints, which end by returning */

   744 	case '^':

   745 		ARCV('^', 1);

   746 		if (v->cflags&REG_NLANCH)

   747 			ARCV(BEHIND, v->nlcolor);

   748 		NEXT();

   749 		return;

   750 		break;

   751 	case '$':

   752 		ARCV('$', 1);

   753 		if (v->cflags&REG_NLANCH)

   754 			ARCV(AHEAD, v->nlcolor);

   755 		NEXT();

   756 		return;

   757 		break;

   758 	case SBEGIN:

   759 		ARCV('^', 1);	/* BOL */

   760 		ARCV('^', 0);	/* or BOS */

   761 		NEXT();

   762 		return;

   763 		break;

   764 	case SEND:

   765 		ARCV('$', 1);	/* EOL */

   766 		ARCV('$', 0);	/* or EOS */

   767 		NEXT();

   768 		return;

   769 		break;

   770 	case '<':

   771 		wordchrs(v);	/* does NEXT() */

   772 		s = newstate(v->nfa);

   773 		NOERR();

   774 		nonword(v, BEHIND, lp, s);

   775 		word(v, AHEAD, s, rp);

   776 		return;

   777 		break;

   778 	case '>':

   779 		wordchrs(v);	/* does NEXT() */

   780 		s = newstate(v->nfa);

   781 		NOERR();

   782 		word(v, BEHIND, lp, s);

   783 		nonword(v, AHEAD, s, rp);

   784 		return;

   785 		break;

   786 	case WBDRY:

   787 		wordchrs(v);	/* does NEXT() */

   788 		s = newstate(v->nfa);

   789 		NOERR();

   790 		nonword(v, BEHIND, lp, s);

   791 		word(v, AHEAD, s, rp);

   792 		s = newstate(v->nfa);

   793 		NOERR();

   794 		word(v, BEHIND, lp, s);

   795 		nonword(v, AHEAD, s, rp);

   796 		return;

   797 		break;

   798 	case NWBDRY:

   799 		wordchrs(v);	/* does NEXT() */

   800 		s = newstate(v->nfa);

   801 		NOERR();

   802 		word(v, BEHIND, lp, s);

   803 		word(v, AHEAD, s, rp);

   804 		s = newstate(v->nfa);

   805 		NOERR();

   806 		nonword(v, BEHIND, lp, s);

   807 		nonword(v, AHEAD, s, rp);

   808 		return;

   809 		break;

   810 	case LACON:	/* lookahead constraint */

   811 		pos = v->nextvalue;

   812 		NEXT();

   813 		s = newstate(v->nfa);

   814 		s2 = newstate(v->nfa);

   815 		NOERR();

   816 		t = parse(v, ')', LACON, s, s2);

   817 		freesubre(v, t);	/* internal structure irrelevant */

   818 		assert(SEE(')') || ISERR());

   819 		NEXT();

   820 		n = newlacon(v, s, s2, pos);

   821 		NOERR();

   822 		ARCV(LACON, n);

   823 		return;

   824 		break;

   825 	/* then errors, to get them out of the way */

   826 	case '*':

   827 	case '+':

   828 	case '?':

   829 	case '{':

   830 		ERR(REG_BADRPT);

   831 		return;

   832 		break;

   833 	default:

   834 		ERR(REG_ASSERT);

   835 		return;

   836 		break;

   837 	/* then plain characters, and minor variants on that theme */

   838 	case ')':		/* unbalanced paren */

   839 		if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {

   840 			ERR(REG_EPAREN);

   841 			return;

   842 		}

   843 		/* legal in EREs due to specification botch */

   844 		NOTE(REG_UPBOTCH);

   845 		/* fallthrough into case PLAIN */

   846 	case PLAIN:

   847 		onechr(v, v->nextvalue, lp, rp);

   848 		okcolors(v->nfa, v->cm);

   849 		NOERR();

   850 		NEXT();

   851 		break;

   852 	case '[':

   853 		if (v->nextvalue == 1)

   854 			bracket(v, lp, rp);

   855 		else

   856 			cbracket(v, lp, rp);

   857 		assert(SEE(']') || ISERR());

   858 		NEXT();

   859 		break;

   860 	case '.':

   861 		rainbow(v->nfa, v->cm, PLAIN,

   862 				(v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS,

   863 				lp, rp);

   864 		NEXT();

   865 		break;

   866 	/* and finally the ugly stuff */

   867 	case '(':	/* value flags as capturing or non */

   868 		cap = (type == LACON) ? 0 : v->nextvalue;

   869 		if (cap) {

   870 			v->nsubexp++;

   871 			subno = v->nsubexp;

   872 			if ((size_t)subno >= v->nsubs)

   873 				moresubs(v, subno);

   874 			assert((size_t)subno < v->nsubs);

   875 		} else

   876 			atomtype = PLAIN;	/* something that's not '(' */

   877 		NEXT();

   878 		/* need new endpoints because tree will contain pointers */

   879 		s = newstate(v->nfa);

   880 		s2 = newstate(v->nfa);

   881 		NOERR();

   882 		EMPTYARC(lp, s);

   883 		EMPTYARC(s2, rp);

   884 		NOERR();

   885 		atom = parse(v, ')', PLAIN, s, s2);

   886 		assert(SEE(')') || ISERR());

   887 		NEXT();

   888 		NOERR();

   889 		if (cap) {

   890 			v->subs[subno] = atom;

   891 			t = subre(v, '(', atom->flags|CAP, lp, rp);

   892 			NOERR();

   893 			t->subno = subno;

   894 			t->left = atom;

   895 			atom = t;

   896 		}

   897 		/* postpone everything else pending possible {0} */

   898 		break;

   899 	case BACKREF:	/* the Feature From The Black Lagoon */

   900 		INSIST(type != LACON, REG_ESUBREG);

   901 		INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);

   902 		INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);

   903 		NOERR();

   904 		assert(v->nextvalue > 0);

   905 		atom = subre(v, 'b', BACKR, lp, rp);

   906 		subno = v->nextvalue;

   907 		atom->subno = subno;

   908 		EMPTYARC(lp, rp);	/* temporarily, so there's something */

   909 		NEXT();

   910 		break;

   911 	}

   913 	/* ...and an atom may be followed by a quantifier */

   914 	switch (v->nexttype) {

   915 	case '*':

   916 		m = 0;

   917 		n = INFINITY;

   918 		qprefer = (v->nextvalue) ? LONGER : SHORTER;

   919 		NEXT();

   920 		break;

   921 	case '+':

   922 		m = 1;

   923 		n = INFINITY;

   924 		qprefer = (v->nextvalue) ? LONGER : SHORTER;

   925 		NEXT();

   926 		break;

   927 	case '?':

   928 		m = 0;

   929 		n = 1;

   930 		qprefer = (v->nextvalue) ? LONGER : SHORTER;

   931 		NEXT();

   932 		break;

   933 	case '{':

   934 		NEXT();

   935 		m = scannum(v);

   936 		if (EAT(',')) {

   937 			if (SEE(DIGIT))

   938 				n = scannum(v);

   939 			else

   940 				n = INFINITY;

   941 			if (m > n) {

   942 				ERR(REG_BADBR);

   943 				return;

   944 			}

   945 			/* {m,n} exercises preference, even if it's {m,m} */

   946 			qprefer = (v->nextvalue) ? LONGER : SHORTER;

   947 		} else {

   948 			n = m;

   949 			/* {m} passes operand's preference through */

   950 			qprefer = 0;

   951 		}

   952 		if (!SEE('}')) {	/* catches errors too */

   953 			ERR(REG_BADBR);

   954 			return;

   955 		}

   956 		NEXT();

   957 		break;

   958 	default:		/* no quantifier */

   959 		m = n = 1;

   960 		qprefer = 0;

   961 		break;

   962 	}

   964 	/* annoying special case:  {0} or {0,0} cancels everything */

   965 	if (m == 0 && n == 0) {

   966 		if (atom != NULL)

   967 			freesubre(v, atom);

   968 		if (atomtype == '(')

   969 			v->subs[subno] = NULL;

   970 		delsub(v->nfa, lp, rp);

   971 		EMPTYARC(lp, rp);

   972 		return;

   973 	}

   975 	/* if not a messy case, avoid hard part */

   976 	assert(!MESSY(top->flags));

   977 	f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);

   978 	if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {

   979 		if (!(m == 1 && n == 1))

   980 			repeat(v, lp, rp, m, n);

   981 		if (atom != NULL)

   982 			freesubre(v, atom);

   983 		top->flags = f;

   984 		return;

   985 	}

   987 	/*

   988 	 * hard part:  something messy

   989 	 * That is, capturing parens, back reference, short/long clash, or

   990 	 * an atom with substructure containing one of those.

   991 	 */

   993 	/* now we'll need a subre for the contents even if they're boring */

   994 	if (atom == NULL) {

   995 		atom = subre(v, '=', 0, lp, rp);

   996 		NOERR();

   997 	}

   999 	/*

  1000 	 * prepare a general-purpose state skeleton

  1001 	 *

  1002 	 *    ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]

  1003 	 *   /                                            /

  1004 	 * [lp] ----> [s2] ----bypass---------------------

  1005 	 *

  1006 	 * where bypass is an empty, and prefix is some repetitions of atom

  1007 	 */

  1008 	s = newstate(v->nfa);		/* first, new endpoints for the atom */

  1009 	s2 = newstate(v->nfa);

  1010 	NOERR();

  1011 	moveouts(v->nfa, lp, s);

  1012 	moveins(v->nfa, rp, s2);

  1013 	NOERR();

  1014 	atom->begin = s;

  1015 	atom->end = s2;

  1016 	s = newstate(v->nfa);		/* and spots for prefix and bypass */

  1017 	s2 = newstate(v->nfa);

  1018 	NOERR();

  1019 	EMPTYARC(lp, s);

  1020 	EMPTYARC(lp, s2);

  1021 	NOERR();

  1023 	/* break remaining subRE into x{...} and what follows */

  1024 	t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);

  1025 	t->left = atom;

  1026 	atomp = &t->left;

  1027 	/* here we should recurse... but we must postpone that to the end */

  1029 	/* split top into prefix and remaining */

  1030 	assert(top->op == '=' && top->left == NULL && top->right == NULL);

  1031 	top->left = subre(v, '=', top->flags, top->begin, lp);

  1032 	top->op = '.';

  1033 	top->right = t;

  1035 	/* if it's a backref, now is the time to replicate the subNFA */

  1036 	if (atomtype == BACKREF) {

  1037 		assert(atom->begin->nouts == 1);	/* just the EMPTY */

  1038 		delsub(v->nfa, atom->begin, atom->end);

  1039 		assert(v->subs[subno] != NULL);

  1040 		/* and here's why the recursion got postponed:  it must */

  1041 		/* wait until the skeleton is filled in, because it may */

  1042 		/* hit a backref that wants to copy the filled-in skeleton */

  1043 		dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,

  1044 						atom->begin, atom->end);

  1045 		NOERR();

  1046 	}

  1048 	/* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */

  1049 	if (m == 0) {

  1050 		EMPTYARC(s2, atom->end);		/* the bypass */

  1051 		assert(PREF(qprefer) != 0);

  1052 		f = COMBINE(qprefer, atom->flags);

  1053 		t = subre(v, '|', f, lp, atom->end);

  1054 		NOERR();

  1055 		t->left = atom;

  1056 		t->right = subre(v, '|', PREF(f), s2, atom->end);

  1057 		NOERR();

  1058 		t->right->left = subre(v, '=', 0, s2, atom->end);

  1059 		NOERR();

  1060 		*atomp = t;

  1061 		atomp = &t->left;

  1062 		m = 1;

  1063 	}

  1065 	/* deal with the rest of the quantifier */

  1066 	if (atomtype == BACKREF) {

  1067 		/* special case:  backrefs have internal quantifiers */

  1068 		EMPTYARC(s, atom->begin);	/* empty prefix */

  1069 		/* just stuff everything into atom */

  1070 		repeat(v, atom->begin, atom->end, m, n);

  1071 		atom->min = (short)m;

  1072 		atom->max = (short)n;

  1073 		atom->flags |= COMBINE(qprefer, atom->flags);

  1074 	} else if (m == 1 && n == 1) {

  1075 		/* no/vacuous quantifier:  done */

  1076 		EMPTYARC(s, atom->begin);	/* empty prefix */

  1077 	} else {

  1078 		/* turn x{m,n} into x{m-1,n-1}x, with capturing */

  1079 		/*  parens in only second x */

  1080 		dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);

  1081 		assert(m >= 1 && m != INFINITY && n >= 1);

  1082 		repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);

  1083 		f = COMBINE(qprefer, atom->flags);

  1084 		t = subre(v, '.', f, s, atom->end);	/* prefix and atom */

  1085 		NOERR();

  1086 		t->left = subre(v, '=', PREF(f), s, atom->begin);

  1087 		NOERR();

  1088 		t->right = atom;

  1089 		*atomp = t;

  1090 	}

  1092 	/* and finally, look after that postponed recursion */

  1093 	t = top->right;

  1094 	if (!(SEE('|') || SEE(stopper) || SEE(EOS)))

  1095 		t->right = parsebranch(v, stopper, type, atom->end, rp, 1);

  1096 	else {

  1097 		EMPTYARC(atom->end, rp);

  1098 		t->right = subre(v, '=', 0, atom->end, rp);

  1099 	}

  1100 	assert(SEE('|') || SEE(stopper) || SEE(EOS));

  1101 	t->flags |= COMBINE(t->flags, t->right->flags);

  1102 	top->flags |= COMBINE(top->flags, t->flags);

  1103 }

  1105 /*

  1106  - nonword - generate arcs for non-word-character ahead or behind

  1107  ^ static VOID nonword(struct vars *, int, struct state *, struct state *);

  1108  */

  1109 static VOID

  1110 nonword(v, dir, lp, rp)

  1111 struct vars *v;

  1112 int dir;			/* AHEAD or BEHIND */

  1113 struct state *lp;

  1114 struct state *rp;

  1115 {

  1116 	int anchor = (dir == AHEAD) ? '$' : '^';

  1118 	assert(dir == AHEAD || dir == BEHIND);

  1119 	newarc(v->nfa, anchor, 1, lp, rp);

  1120 	newarc(v->nfa, anchor, 0, lp, rp);

  1121 	colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);

  1122 	/* (no need for special attention to \n) */

  1123 }

  1125 /*

  1126  - word - generate arcs for word character ahead or behind

  1127  ^ static VOID word(struct vars *, int, struct state *, struct state *);

  1128  */

  1129 static VOID

  1130 word(v, dir, lp, rp)

  1131 struct vars *v;

  1132 int dir;			/* AHEAD or BEHIND */

  1133 struct state *lp;

  1134 struct state *rp;

  1135 {

  1136 	assert(dir == AHEAD || dir == BEHIND);

  1137 	cloneouts(v->nfa, v->wordchrs, lp, rp, dir);

  1138 	/* (no need for special attention to \n) */

  1139 }

  1141 /*

  1142  - scannum - scan a number

  1143  ^ static int scannum(struct vars *);

  1144  */

  1145 static int			/* value, <= DUPMAX */

  1146 scannum(v)

  1147 struct vars *v;

  1148 {

  1149 	int n = 0;

  1151 	while (SEE(DIGIT) && n < DUPMAX) {

  1152 		n = n*10 + v->nextvalue;

  1153 		NEXT();

  1154 	}

  1155 	if (SEE(DIGIT) || n > DUPMAX) {

  1156 		ERR(REG_BADBR);

  1157 		return 0;

  1158 	}

  1159 	return n;

  1160 }

  1162 /*

  1163  - repeat - replicate subNFA for quantifiers

  1164  * The duplication sequences used here are chosen carefully so that any

  1165  * pointers starting out pointing into the subexpression end up pointing into

  1166  * the last occurrence.  (Note that it may not be strung between the same

  1167  * left and right end states, however!)  This used to be important for the

  1168  * subRE tree, although the important bits are now handled by the in-line

  1169  * code in parse(), and when this is called, it doesn't matter any more.

  1170  ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int);

  1171  */

  1172 static VOID

  1173 repeat(v, lp, rp, m, n)

  1174 struct vars *v;

  1175 struct state *lp;

  1176 struct state *rp;

  1177 int m;

  1178 int n;

  1179 {

  1180 #	define	SOME	2

  1181 #	define	INF	3

  1182 #	define	PAIR(x, y)	((x)*4 + (y))

  1183 #	define	REDUCE(x)	( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )

  1184 	CONST int rm = REDUCE(m);

  1185 	CONST int rn = REDUCE(n);

  1186 	struct state *s;

  1187 	struct state *s2;

  1189 	switch (PAIR(rm, rn)) {

  1190 	case PAIR(0, 0):		/* empty string */

  1191 		delsub(v->nfa, lp, rp);

  1192 		EMPTYARC(lp, rp);

  1193 		break;

  1194 	case PAIR(0, 1):		/* do as x| */

  1195 		EMPTYARC(lp, rp);

  1196 		break;

  1197 	case PAIR(0, SOME):		/* do as x{1,n}| */

  1198 		repeat(v, lp, rp, 1, n);

  1199 		NOERR();

  1200 		EMPTYARC(lp, rp);

  1201 		break;

  1202 	case PAIR(0, INF):		/* loop x around */

  1203 		s = newstate(v->nfa);

  1204 		NOERR();

  1205 		moveouts(v->nfa, lp, s);

  1206 		moveins(v->nfa, rp, s);

  1207 		EMPTYARC(lp, s);

  1208 		EMPTYARC(s, rp);

  1209 		break;

  1210 	case PAIR(1, 1):		/* no action required */

  1211 		break;

  1212 	case PAIR(1, SOME):		/* do as x{0,n-1}x = (x{1,n-1}|)x */

  1213 		s = newstate(v->nfa);

  1214 		NOERR();

  1215 		moveouts(v->nfa, lp, s);

  1216 		dupnfa(v->nfa, s, rp, lp, s);

  1217 		NOERR();

  1218 		repeat(v, lp, s, 1, n-1);

  1219 		NOERR();

  1220 		EMPTYARC(lp, s);

  1221 		break;

  1222 	case PAIR(1, INF):		/* add loopback arc */

  1223 		s = newstate(v->nfa);

  1224 		s2 = newstate(v->nfa);

  1225 		NOERR();

  1226 		moveouts(v->nfa, lp, s);

  1227 		moveins(v->nfa, rp, s2);

  1228 		EMPTYARC(lp, s);

  1229 		EMPTYARC(s2, rp);

  1230 		EMPTYARC(s2, s);

  1231 		break;

  1232 	case PAIR(SOME, SOME):		/* do as x{m-1,n-1}x */

  1233 		s = newstate(v->nfa);

  1234 		NOERR();

  1235 		moveouts(v->nfa, lp, s);

  1236 		dupnfa(v->nfa, s, rp, lp, s);

  1237 		NOERR();

  1238 		repeat(v, lp, s, m-1, n-1);

  1239 		break;

  1240 	case PAIR(SOME, INF):		/* do as x{m-1,}x */

  1241 		s = newstate(v->nfa);

  1242 		NOERR();

  1243 		moveouts(v->nfa, lp, s);

  1244 		dupnfa(v->nfa, s, rp, lp, s);

  1245 		NOERR();

  1246 		repeat(v, lp, s, m-1, n);

  1247 		break;

  1248 	default:

  1249 		ERR(REG_ASSERT);

  1250 		break;

  1251 	}

  1252 }

  1254 /*

  1255  - bracket - handle non-complemented bracket expression

  1256  * Also called from cbracket for complemented bracket expressions.

  1257  ^ static VOID bracket(struct vars *, struct state *, struct state *);

  1258  */

  1259 static VOID

  1260 bracket(v, lp, rp)

  1261 struct vars *v;

  1262 struct state *lp;

  1263 struct state *rp;

  1264 {

  1265 	assert(SEE('['));

  1266 	NEXT();

  1267 	while (!SEE(']') && !SEE(EOS))

  1268 		brackpart(v, lp, rp);

  1269 	assert(SEE(']') || ISERR());

  1270 	okcolors(v->nfa, v->cm);

  1271 }

  1273 /*

  1274  - cbracket - handle complemented bracket expression

  1275  * We do it by calling bracket() with dummy endpoints, and then complementing

  1276  * the result.  The alternative would be to invoke rainbow(), and then delete

  1277  * arcs as the b.e. is seen... but that gets messy.

  1278  ^ static VOID cbracket(struct vars *, struct state *, struct state *);

  1279  */

  1280 static VOID

  1281 cbracket(v, lp, rp)

  1282 struct vars *v;

  1283 struct state *lp;

  1284 struct state *rp;

  1285 {

  1286 	struct state *left = newstate(v->nfa);

  1287 	struct state *right = newstate(v->nfa);

  1288 	struct state *s;

  1289 	struct arc *a;			/* arc from lp */

  1290 	struct arc *ba;			/* arc from left, from bracket() */

  1291 	struct arc *pa;			/* MCCE-prototype arc */

  1292 	color co;

  1293 	chr *p;

  1294 	int i;

  1296 	NOERR();

  1297 	bracket(v, left, right);

  1298 	if (v->cflags&REG_NLSTOP)

  1299 		newarc(v->nfa, PLAIN, v->nlcolor, left, right);

  1300 	NOERR();

  1302 	assert(lp->nouts == 0);		/* all outarcs will be ours */

  1304 	/* easy part of complementing */

  1305 	colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);

  1306 	NOERR();

  1307 	if (v->mcces == NULL) {		/* no MCCEs -- we're done */

  1308 		dropstate(v->nfa, left);

  1309 		assert(right->nins == 0);

  1310 		freestate(v->nfa, right);

  1311 		return;

  1312 	}

  1314 	/* but complementing gets messy in the presence of MCCEs... */

  1315 	NOTE(REG_ULOCALE);

  1316 	for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {

  1317 		co = GETCOLOR(v->cm, *p);

  1318 		a = findarc(lp, PLAIN, co);

  1319 		ba = findarc(left, PLAIN, co);

  1320 		if (ba == NULL) {

  1321 			assert(a != NULL);

  1322 			freearc(v->nfa, a);

  1323 		} else {

  1324 			assert(a == NULL);

  1325 		}

  1326 		s = newstate(v->nfa);

  1327 		NOERR();

  1328 		newarc(v->nfa, PLAIN, co, lp, s);

  1329 		NOERR();

  1330 		pa = findarc(v->mccepbegin, PLAIN, co);

  1331 		assert(pa != NULL);

  1332 		if (ba == NULL) {	/* easy case, need all of them */

  1333 			cloneouts(v->nfa, pa->to, s, rp, PLAIN);

  1334 			newarc(v->nfa, '$', 1, s, rp);

  1335 			newarc(v->nfa, '$', 0, s, rp);

  1336 			colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);

  1337 		} else {		/* must be selective */

  1338 			if (findarc(ba->to, '$', 1) == NULL) {

  1339 				newarc(v->nfa, '$', 1, s, rp);

  1340 				newarc(v->nfa, '$', 0, s, rp);

  1341 				colorcomplement(v->nfa, v->cm, AHEAD, pa->to,

  1342 									 s, rp);

  1343 			}

  1344 			for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)

  1345 				if (findarc(ba->to, PLAIN, pa->co) == NULL)

  1346 					newarc(v->nfa, PLAIN, pa->co, s, rp);

  1347 			if (s->nouts == 0)	/* limit of selectivity: none */

  1348 				dropstate(v->nfa, s);	/* frees arc too */

  1349 		}

  1350 		NOERR();

  1351 	}

  1353 	delsub(v->nfa, left, right);

  1354 	assert(left->nouts == 0);

  1355 	freestate(v->nfa, left);

  1356 	assert(right->nins == 0);

  1357 	freestate(v->nfa, right);

  1358 }

  1360 /*

  1361  - brackpart - handle one item (or range) within a bracket expression

  1362  ^ static VOID brackpart(struct vars *, struct state *, struct state *);

  1363  */

  1364 static VOID

  1365 brackpart(v, lp, rp)

  1366 struct vars *v;

  1367 struct state *lp;

  1368 struct state *rp;

  1369 {

  1370 	celt startc;

  1371 	celt endc;

  1372 	struct cvec *cv;

  1373 	chr *startp;

  1374 	chr *endp;

  1375 	chr c[1];

  1377 	/* parse something, get rid of special cases, take shortcuts */

  1378 	switch (v->nexttype) {

  1379 	case RANGE:			/* a-b-c or other botch */

  1380 		ERR(REG_ERANGE);

  1381 		return;

  1382 		break;

  1383 	case PLAIN:

  1384 		c[0] = v->nextvalue;

  1385 		NEXT();

  1386 		/* shortcut for ordinary chr (not range, not MCCE leader) */

  1387 		if (!SEE(RANGE) && !ISCELEADER(v, c[0])) {

  1388 			onechr(v, c[0], lp, rp);

  1389 			return;

  1390 		}

  1391 		startc = element(v, c, c+1);

  1392 		NOERR();

  1393 		break;

  1394 	case COLLEL:

  1395 		startp = v->now;

  1396 		endp = scanplain(v);

  1397 		INSIST(startp < endp, REG_ECOLLATE);

  1398 		NOERR();

  1399 		startc = element(v, startp, endp);

  1400 		NOERR();

  1401 		break;

  1402 	case ECLASS:

  1403 		startp = v->now;

  1404 		endp = scanplain(v);

  1405 		INSIST(startp < endp, REG_ECOLLATE);

  1406 		NOERR();

  1407 		startc = element(v, startp, endp);

  1408 		NOERR();

  1409 		cv = eclass(v, startc, (v->cflags&REG_ICASE));

  1410 		NOERR();

  1411 		dovec(v, cv, lp, rp);

  1412 		return;

  1413 		break;

  1414 	case CCLASS:

  1415 		startp = v->now;

  1416 		endp = scanplain(v);

  1417 		INSIST(startp < endp, REG_ECTYPE);

  1418 		NOERR();

  1419 		cv = cclass(v, startp, endp, (v->cflags&REG_ICASE));

  1420 		NOERR();

  1421 		dovec(v, cv, lp, rp);

  1422 		return;

  1423 		break;

  1424 	default:

  1425 		ERR(REG_ASSERT);

  1426 		return;

  1427 		break;

  1428 	}

  1430 	if (SEE(RANGE)) {

  1431 		NEXT();

  1432 		switch (v->nexttype) {

  1433 		case PLAIN:

  1434 		case RANGE:

  1435 			c[0] = v->nextvalue;

  1436 			NEXT();

  1437 			endc = element(v, c, c+1);

  1438 			NOERR();

  1439 			break;

  1440 		case COLLEL:

  1441 			startp = v->now;

  1442 			endp = scanplain(v);

  1443 			INSIST(startp < endp, REG_ECOLLATE);

  1444 			NOERR();

  1445 			endc = element(v, startp, endp);

  1446 			NOERR();

  1447 			break;

  1448 		default:

  1449 			ERR(REG_ERANGE);

  1450 			return;

  1451 			break;

  1452 		}

  1453 	} else

  1454 		endc = startc;

  1456 	/*

  1457 	 * Ranges are unportable.  Actually, standard C does

  1458 	 * guarantee that digits are contiguous, but making

  1459 	 * that an exception is just too complicated.

  1460 	 */

  1461 	if (startc != endc)

  1462 		NOTE(REG_UUNPORT);

  1463 	cv = range(v, startc, endc, (v->cflags&REG_ICASE));

  1464 	NOERR();

  1465 	dovec(v, cv, lp, rp);

  1466 }

  1468 /*

  1469  - scanplain - scan PLAIN contents of [. etc.

  1470  * Certain bits of trickery in lex.c know that this code does not try

  1471  * to look past the final bracket of the [. etc.

  1472  ^ static chr *scanplain(struct vars *);

  1473  */

  1474 static chr *			/* just after end of sequence */

  1475 scanplain(v)

  1476 struct vars *v;

  1477 {

  1478 	chr *endp;

  1480 	assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));

  1481 	NEXT();

  1483 	endp = v->now;

  1484 	while (SEE(PLAIN)) {

  1485 		endp = v->now;

  1486 		NEXT();

  1487 	}

  1489 	assert(SEE(END) || ISERR());

  1490 	NEXT();

  1492 	return endp;

  1493 }

  1495 /*

  1496  - leaders - process a cvec of collating elements to also include leaders

  1497  * Also gives all characters involved their own colors, which is almost

  1498  * certainly necessary, and sets up little disconnected subNFA.

  1499  ^ static VOID leaders(struct vars *, struct cvec *);

  1500  */

  1501 static VOID

  1502 leaders(v, cv)

  1503 struct vars *v;

  1504 struct cvec *cv;

  1505 {

  1506 	int mcce;

  1507 	chr *p;

  1508 	chr leader;

  1509 	struct state *s;

  1510 	struct arc *a;

  1512 	v->mccepbegin = newstate(v->nfa);

  1513 	v->mccepend = newstate(v->nfa);

  1514 	NOERR();

  1516 	for (mcce = 0; mcce < cv->nmcces; mcce++) {

  1517 		p = cv->mcces[mcce];

  1518 		leader = *p;

  1519 		if (!haschr(cv, leader)) {

  1520 			addchr(cv, leader);

  1521 			s = newstate(v->nfa);

  1522 			newarc(v->nfa, PLAIN, subcolor(v->cm, leader),

  1523 							v->mccepbegin, s);

  1524 			okcolors(v->nfa, v->cm);

  1525 		} else {

  1526 			a = findarc(v->mccepbegin, PLAIN,

  1527 						GETCOLOR(v->cm, leader));

  1528 			assert(a != NULL);

  1529 			s = a->to;

  1530 			assert(s != v->mccepend);

  1531 		}

  1532 		p++;

  1533 		assert(*p != 0 && *(p+1) == 0);	/* only 2-char MCCEs for now */

  1534 		newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);

  1535 		okcolors(v->nfa, v->cm);

  1536 	}

  1537 }

  1539 /*

  1540  - onechr - fill in arcs for a plain character, and possible case complements

  1541  * This is mostly a shortcut for efficient handling of the common case.

  1542  ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *);

  1543  */

  1544 static VOID

  1545 onechr(v, c, lp, rp)

  1546 struct vars *v;

  1547 pchr c;

  1548 struct state *lp;

  1549 struct state *rp;

  1550 {

  1551 	if (!(v->cflags&REG_ICASE)) {

  1552 		newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);

  1553 		return;

  1554 	}

  1556 	/* rats, need general case anyway... */

  1557 	dovec(v, allcases(v, c), lp, rp);

  1558 }

  1560 /*

  1561  - dovec - fill in arcs for each element of a cvec

  1562  * This one has to handle the messy cases, like MCCEs and MCCE leaders.

  1563  ^ static VOID dovec(struct vars *, struct cvec *, struct state *,

  1564  ^ 	struct state *);

  1565  */

  1566 static VOID

  1567 dovec(v, cv, lp, rp)

  1568 struct vars *v;

  1569 struct cvec *cv;

  1570 struct state *lp;

  1571 struct state *rp;

  1572 {

  1573 	chr ch, from, to;

  1574 	celt ce;

  1575 	chr *p;

  1576 	int i;

  1577 	color co;

  1578 	struct cvec *leads;

  1579 	struct arc *a;

  1580 	struct arc *pa;		/* arc in prototype */

  1581 	struct state *s;

  1582 	struct state *ps;	/* state in prototype */

  1584 	/* need a place to store leaders, if any */

  1585 	if (nmcces(v) > 0) {

  1586 		assert(v->mcces != NULL);

  1587 		if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) {

  1588 			if (v->cv2 != NULL)

  1589 				free(v->cv2);

  1590 			v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);

  1591 			NOERR();

  1592 			leads = v->cv2;

  1593 		} else

  1594 			leads = clearcvec(v->cv2);

  1595 	} else

  1596 		leads = NULL;

  1598 	/* first, get the ordinary characters out of the way */

  1599 	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {

  1600 		ch = *p;

  1601 		if (!ISCELEADER(v, ch))

  1602 			newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);

  1603 		else {

  1604 			assert(singleton(v->cm, ch));

  1605 			assert(leads != NULL);

  1606 			if (!haschr(leads, ch))

  1607 				addchr(leads, ch);

  1608 		}

  1609 	}

  1611 	/* and the ranges */

  1612 	for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {

  1613 		from = *p;

  1614 		to = *(p+1);

  1615 		while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {

  1616 			if (from < ce)

  1617 				subrange(v, from, ce - 1, lp, rp);

  1618 			assert(singleton(v->cm, ce));

  1619 			assert(leads != NULL);

  1620 			if (!haschr(leads, ce))

  1621 				addchr(leads, ce);

  1622 			from = ce + 1;

  1623 		}

  1624 		if (from <= to)

  1625 			subrange(v, from, to, lp, rp);

  1626 	}

  1628 	if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)

  1629 		return;

  1631 	/* deal with the MCCE leaders */

  1632 	NOTE(REG_ULOCALE);

  1633 	for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {

  1634 		co = GETCOLOR(v->cm, *p);

  1635 		a = findarc(lp, PLAIN, co);

  1636 		if (a != NULL)

  1637 			s = a->to;

  1638 		else {

  1639 			s = newstate(v->nfa);

  1640 			NOERR();

  1641 			newarc(v->nfa, PLAIN, co, lp, s);

  1642 			NOERR();

  1643 		}

  1644 		pa = findarc(v->mccepbegin, PLAIN, co);

  1645 		assert(pa != NULL);

  1646 		ps = pa->to;

  1647 		newarc(v->nfa, '$', 1, s, rp);

  1648 		newarc(v->nfa, '$', 0, s, rp);

  1649 		colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);

  1650 		NOERR();

  1651 	}

  1653 	/* and the MCCEs */

  1654 	for (i = 0; i < cv->nmcces; i++) {

  1655 		p = cv->mcces[i];

  1656 		assert(singleton(v->cm, *p));

  1657 		if (!singleton(v->cm, *p)) {

  1658 			ERR(REG_ASSERT);

  1659 			return;

  1660 		}

  1661 		ch = *p++;

  1662 		co = GETCOLOR(v->cm, ch);

  1663 		a = findarc(lp, PLAIN, co);

  1664 		if (a != NULL)

  1665 			s = a->to;

  1666 		else {

  1667 			s = newstate(v->nfa);

  1668 			NOERR();

  1669 			newarc(v->nfa, PLAIN, co, lp, s);

  1670 			NOERR();

  1671 		}

  1672 		assert(*p != 0);	/* at least two chars */

  1673 		assert(singleton(v->cm, *p));

  1674 		ch = *p++;

  1675 		co = GETCOLOR(v->cm, ch);

  1676 		assert(*p == 0);	/* and only two, for now */

  1677 		newarc(v->nfa, PLAIN, co, s, rp);

  1678 		NOERR();

  1679 	}

  1680 }

  1682 /*

  1683  - nextleader - find next MCCE leader within range

  1684  ^ static celt nextleader(struct vars *, pchr, pchr);

  1685  */

  1686 static celt			/* NOCELT means none */

  1687 nextleader(v, from, to)

  1688 struct vars *v;

  1689 pchr from;

  1690 pchr to;

  1691 {

  1692 	int i;

  1693 	chr *p;

  1694 	chr ch;

  1695 	celt it = NOCELT;

  1697 	if (v->mcces == NULL)

  1698 		return it;

  1700 	for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) {

  1701 		ch = *p;

  1702 		if (from <= ch && ch <= to)

  1703 			if (it == NOCELT || ch < it)

  1704 				it = ch;

  1705 	}

  1706 	return it;

  1707 }

  1709 /*

  1710  - wordchrs - set up word-chr list for word-boundary stuff, if needed

  1711  * The list is kept as a bunch of arcs between two dummy states; it's

  1712  * disposed of by the unreachable-states sweep in NFA optimization.

  1713  * Does NEXT().  Must not be called from any unusual lexical context.

  1714  * This should be reconciled with the \w etc. handling in lex.c, and

  1715  * should be cleaned up to reduce dependencies on input scanning.

  1716  ^ static VOID wordchrs(struct vars *);

  1717  */

  1718 static VOID

  1719 wordchrs(v)

  1720 struct vars *v;

  1721 {

  1722 	struct state *left;

  1723 	struct state *right;

  1725 	if (v->wordchrs != NULL) {

  1726 		NEXT();		/* for consistency */

  1727 		return;

  1728 	}

  1730 	left = newstate(v->nfa);

  1731 	right = newstate(v->nfa);

  1732 	NOERR();

  1733 	/* fine point:  implemented with [::], and lexer will set REG_ULOCALE */

  1734 	lexword(v);

  1735 	NEXT();

  1736 	assert(v->savenow != NULL && SEE('['));

  1737 	bracket(v, left, right);

  1738 	assert((v->savenow != NULL && SEE(']')) || ISERR());

  1739 	NEXT();

  1740 	NOERR();

  1741 	v->wordchrs = left;

  1742 }

  1744 /*

  1745  - subre - allocate a subre

  1746  ^ static struct subre *subre(struct vars *, int, int, struct state *,

  1747  ^	struct state *);

  1748  */

  1749 static struct subre *

  1750 subre(v, op, flags, begin, end)

  1751 struct vars *v;

  1752 int op;

  1753 int flags;

  1754 struct state *begin;

  1755 struct state *end;

  1756 {

  1757 	struct subre *ret;

  1759 	ret = v->treefree;

  1760 	if (ret != NULL)

  1761 		v->treefree = ret->left;

  1762 	else {

  1763 		ret = (struct subre *)MALLOC(sizeof(struct subre));

  1764 		if (ret == NULL) {

  1765 			ERR(REG_ESPACE);

  1766 			return NULL;

  1767 		}

  1768 		ret->chain = v->treechain;

  1769 		v->treechain = ret;

  1770 	}

  1772 	assert(strchr("|.b(=", op) != NULL);

  1774 	ret->op = op;

  1775 	ret->flags = flags;

  1776 	ret->retry = 0;

  1777 	ret->subno = 0;

  1778 	ret->min = ret->max = 1;

  1779 	ret->left = NULL;

  1780 	ret->right = NULL;

  1781 	ret->begin = begin;

  1782 	ret->end = end;

  1783 	ZAPCNFA(ret->cnfa);

  1785 	return ret;

  1786 }

  1788 /*

  1789  - freesubre - free a subRE subtree

  1790  ^ static VOID freesubre(struct vars *, struct subre *);

  1791  */

  1792 static VOID

  1793 freesubre(v, sr)

  1794 struct vars *v;			/* might be NULL */

  1795 struct subre *sr;

  1796 {

  1797 	if (sr == NULL)

  1798 		return;

  1800 	if (sr->left != NULL)

  1801 		freesubre(v, sr->left);

  1802 	if (sr->right != NULL)

  1803 		freesubre(v, sr->right);

  1805 	freesrnode(v, sr);

  1806 }

  1808 /*

  1809  - freesrnode - free one node in a subRE subtree

  1810  ^ static VOID freesrnode(struct vars *, struct subre *);

  1811  */

  1812 static VOID

  1813 freesrnode(v, sr)

  1814 struct vars *v;			/* might be NULL */

  1815 struct subre *sr;

  1816 {

  1817 	if (sr == NULL)

  1818 		return;

  1820 	if (!NULLCNFA(sr->cnfa))

  1821 		freecnfa(&sr->cnfa);

  1822 	sr->flags = 0;

  1824 	if (v != NULL) {

  1825 		sr->left = v->treefree;

  1826 		v->treefree = sr;

  1827 	} else

  1828 		FREE(sr);

  1829 }

  1831 /*

  1832  - optst - optimize a subRE subtree

  1833  ^ static VOID optst(struct vars *, struct subre *);

  1834  */

  1835 static VOID

  1836 optst(v, t)

  1837 struct vars *v;

  1838 struct subre *t;

  1839 {

  1840 	if (t == NULL)

  1841 		return;

  1843 	/* recurse through children */

  1844 	if (t->left != NULL)

  1845 		optst(v, t->left);

  1846 	if (t->right != NULL)

  1847 		optst(v, t->right);

  1848 }

  1850 /*

  1851  - numst - number tree nodes (assigning retry indexes)

  1852  ^ static int numst(struct subre *, int);

  1853  */

  1854 static int			/* next number */

  1855 numst(t, start)

  1856 struct subre *t;

  1857 int start;			/* starting point for subtree numbers */

  1858 {

  1859 	int i;

  1861 	assert(t != NULL);

  1863 	i = start;

  1864 	t->retry = (short)i++;

  1865 	if (t->left != NULL)

  1866 		i = numst(t->left, i);

  1867 	if (t->right != NULL)

  1868 		i = numst(t->right, i);

  1869 	return i;

  1870 }

  1872 /*

  1873  - markst - mark tree nodes as INUSE

  1874  ^ static VOID markst(struct subre *);

  1875  */

  1876 static VOID

  1877 markst(t)

  1878 struct subre *t;

  1879 {

  1880 	assert(t != NULL);

  1882 	t->flags |= INUSE;

  1883 	if (t->left != NULL)

  1884 		markst(t->left);

  1885 	if (t->right != NULL)

  1886 		markst(t->right);

  1887 }

  1889 /*

  1890  - cleanst - free any tree nodes not marked INUSE

  1891  ^ static VOID cleanst(struct vars *);

  1892  */

  1893 static VOID

  1894 cleanst(v)

  1895 struct vars *v;

  1896 {

  1897 	struct subre *t;

  1898 	struct subre *next;

  1900 	for (t = v->treechain; t != NULL; t = next) {

  1901 		next = t->chain;

  1902 		if (!(t->flags&INUSE))

  1903 			FREE(t);

  1904 	}

  1905 	v->treechain = NULL;

  1906 	v->treefree = NULL;		/* just on general principles */

  1907 }

  1909 /*

  1910  - nfatree - turn a subRE subtree into a tree of compacted NFAs

  1911  ^ static long nfatree(struct vars *, struct subre *, FILE *);

  1912  */

  1913 static long			/* optimize results from top node */

  1914 nfatree(v, t, f)

  1915 struct vars *v;

  1916 struct subre *t;

  1917 FILE *f;			/* for debug output */

  1918 {

  1919 	assert(t != NULL && t->begin != NULL);

  1921 	if (t->left != NULL)

  1922 		(DISCARD)nfatree(v, t->left, f);

  1923 	if (t->right != NULL)

  1924 		(DISCARD)nfatree(v, t->right, f);

  1926 	return nfanode(v, t, f);

  1927 }

  1929 /*

  1930  - nfanode - do one NFA for nfatree

  1931  ^ static long nfanode(struct vars *, struct subre *, FILE *);

  1932  */

  1933 static long			/* optimize results */

  1934 nfanode(v, t, f)

  1935 struct vars *v;

  1936 struct subre *t;

  1937 FILE *f;			/* for debug output */

  1938 {

  1939 	struct nfa *nfa;

  1940 	long ret = 0;

  1941 	char idbuf[50];

  1943 	assert(t->begin != NULL);

  1945 	if (f != NULL)

  1946 		fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",

  1947 						stid(t, idbuf, sizeof(idbuf)));

  1948 	nfa = newnfa(v, v->cm, v->nfa);

  1949 	NOERRZ();

  1950 	dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);

  1951 	if (!ISERR()) {

  1952 		specialcolors(nfa);

  1953 		ret = optimize(nfa, f);

  1954 	}

  1955 	if (!ISERR())

  1956 		compact(nfa, &t->cnfa);

  1958 	freenfa(nfa);

  1959 	return ret;

  1960 }

  1962 /*

  1963  - newlacon - allocate a lookahead-constraint subRE

  1964  ^ static int newlacon(struct vars *, struct state *, struct state *, int);

  1965  */

  1966 static int			/* lacon number */

  1967 newlacon(v, begin, end, pos)

  1968 struct vars *v;

  1969 struct state *begin;

  1970 struct state *end;

  1971 int pos;

  1972 {

  1973 	int n;

  1974 	struct subre *sub;

  1976 	if (v->nlacons == 0) {

  1977 		v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre));

  1978 		n = 1;		/* skip 0th */

  1979 		v->nlacons = 2;

  1980 	} else {

  1981 		v->lacons = (struct subre *)REALLOC(v->lacons,

  1982 					(v->nlacons+1)*sizeof(struct subre));

  1983 		n = v->nlacons++;

  1984 	}

  1985 	if (v->lacons == NULL) {

  1986 		ERR(REG_ESPACE);

  1987 		return 0;

  1988 	}

  1989 	sub = &v->lacons[n];

  1990 	sub->begin = begin;

  1991 	sub->end = end;

  1992 	sub->subno = pos;

  1993 	ZAPCNFA(sub->cnfa);

  1994 	return n;

  1995 }

  1997 /*

  1998  - freelacons - free lookahead-constraint subRE vector

  1999  ^ static VOID freelacons(struct subre *, int);

  2000  */

  2001 static VOID

  2002 freelacons(subs, n)

  2003 struct subre *subs;

  2004 int n;

  2005 {

  2006 	struct subre *sub;

  2007 	int i;

  2009 	assert(n > 0);

  2010 	for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)	/* no 0th */

  2011 		if (!NULLCNFA(sub->cnfa))

  2012 			freecnfa(&sub->cnfa);

  2013 	FREE(subs);

  2014 }

  2016 /*

  2017  - rfree - free a whole RE (insides of regfree)

  2018  ^ static VOID rfree(regex_t *);

  2019  */

  2020 static VOID

  2021 rfree(re)

  2022 regex_t *re;

  2023 {

  2024 	struct guts *g;

  2026 	if (re == NULL || re->re_magic != REMAGIC)

  2027 		return;

  2029 	re->re_magic = 0;	/* invalidate RE */

  2030 	g = (struct guts *)re->re_guts;

  2031 	re->re_guts = NULL;

  2032 	re->re_fns = NULL;

  2033 	g->magic = 0;

  2034 	freecm(&g->cmap);

  2035 	if (g->tree != NULL)

  2036 		freesubre((struct vars *)NULL, g->tree);

  2037 	if (g->lacons != NULL)

  2038 		freelacons(g->lacons, g->nlacons);

  2039 	if (!NULLCNFA(g->search))

  2040 		freecnfa(&g->search);

  2041 	FREE(g);

  2042 }

  2044 /*

  2045  - dump - dump an RE in human-readable form

  2046  ^ static VOID dump(regex_t *, FILE *);

  2047  */

  2048 static VOID

  2049 dump(re, f)

  2050 regex_t *re;

  2051 FILE *f;

  2052 {

  2053 #ifdef REG_DEBUG

  2054 	struct guts *g;

  2055 	int i;

  2057 	if (re->re_magic != REMAGIC)

  2058 		fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic,

  2059 								REMAGIC);

  2060 	if (re->re_guts == NULL) {

  2061 		fprintf(f, "NULL guts!!!\n");

  2062 		return;

  2063 	}

  2064 	g = (struct guts *)re->re_guts;

  2065 	if (g->magic != GUTSMAGIC)

  2066 		fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic,

  2067 								GUTSMAGIC);

  2069 	fprintf(f, "\n\n\n========= DUMP ==========\n");

  2070 	fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",

  2071 		re->re_nsub, re->re_info, re->re_csize, g->ntree);

  2073 	dumpcolors(&g->cmap, f);

  2074 	if (!NULLCNFA(g->search)) {

  2075 		printf("\nsearch:\n");

  2076 		dumpcnfa(&g->search, f);

  2077 	}

  2078 	for (i = 1; i < g->nlacons; i++) {

  2079 		fprintf(f, "\nla%d (%s):\n", i,

  2080 				(g->lacons[i].subno) ? "positive" : "negative");

  2081 		dumpcnfa(&g->lacons[i].cnfa, f);

  2082 	}

  2083 	fprintf(f, "\n");

  2084 	dumpst(g->tree, f, 0);

  2085 #endif

  2086 }

  2088 /*

  2089  - dumpst - dump a subRE tree

  2090  ^ static VOID dumpst(struct subre *, FILE *, int);

  2091  */

  2092 static VOID

  2093 dumpst(t, f, nfapresent)

  2094 struct subre *t;

  2095 FILE *f;

  2096 int nfapresent;			/* is the original NFA still around? */

  2097 {

  2098 	if (t == NULL)

  2099 		fprintf(f, "null tree\n");

  2100 	else

  2101 		stdump(t, f, nfapresent);

  2102 	fflush(f);

  2103 }

  2105 /*

  2106  - stdump - recursive guts of dumpst

  2107  ^ static VOID stdump(struct subre *, FILE *, int);

  2108  */

  2109 static VOID

  2110 stdump(t, f, nfapresent)

  2111 struct subre *t;

  2112 FILE *f;

  2113 int nfapresent;			/* is the original NFA still around? */

  2114 {

  2115 	char idbuf[50];

  2117 	fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);

  2118 	if (t->flags&LONGER)

  2119 		fprintf(f, " longest");

  2120 	if (t->flags&SHORTER)

  2121 		fprintf(f, " shortest");

  2122 	if (t->flags&MIXED)

  2123 		fprintf(f, " hasmixed");

  2124 	if (t->flags&CAP)

  2125 		fprintf(f, " hascapture");

  2126 	if (t->flags&BACKR)

  2127 		fprintf(f, " hasbackref");

  2128 	if (!(t->flags&INUSE))

  2129 		fprintf(f, " UNUSED");

  2130 	if (t->subno != 0)

  2131 		fprintf(f, " (#%d)", t->subno);

  2132 	if (t->min != 1 || t->max != 1) {

  2133 		fprintf(f, " {%d,", t->min);

  2134 		if (t->max != INFINITY)

  2135 			fprintf(f, "%d", t->max);

  2136 		fprintf(f, "}");

  2137 	}

  2138 	if (nfapresent)

  2139 		fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);

  2140 	if (t->left != NULL)

  2141 		fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));

  2142 	if (t->right != NULL)

  2143 		fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));

  2144 	if (!NULLCNFA(t->cnfa)) {

  2145 		fprintf(f, "\n");

  2146 		dumpcnfa(&t->cnfa, f);

  2147 		fprintf(f, "\n");

  2148 	}

  2149 	if (t->left != NULL)

  2150 		stdump(t->left, f, nfapresent);

  2151 	if (t->right != NULL)

  2152 		stdump(t->right, f, nfapresent);

  2153 }

  2155 /*

  2156  - stid - identify a subtree node for dumping

  2157  ^ static char *stid(struct subre *, char *, size_t);

  2158  */

  2159 static char *			/* points to buf or constant string */

  2160 stid(t, buf, bufsize)

  2161 struct subre *t;

  2162 char *buf;

  2163 size_t bufsize;

  2164 {

  2165 	/* big enough for hex int or decimal t->retry? */

  2166 	if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1)

  2167 		return "unable";

  2168 	if (t->retry != 0)

  2169 		sprintf(buf, "%d", t->retry);

  2170 	else

  2171 		sprintf(buf, "%p", t);

  2172 	return buf;

  2173 }

  2175 #include "regc_lex.c"

  2176 #include "regc_color.c"

  2177 #include "regc_nfa.c"

  2178 #include "regc_cvec.c"

  2179 #include "regc_locale.c"

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--