os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/regexec.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  * re_*exec and friends - match REs
     3  *
     4  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
     5  * 
     6  * Development of this software was funded, in part, by Cray Research Inc.,
     7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
     8  * Corporation, none of whom are responsible for the results.  The author
     9  * thanks all of them. 
    10  * 
    11  * Redistribution and use in source and binary forms -- with or without
    12  * modification -- are permitted for any purpose, provided that
    13  * redistributions in source form retain this entire copyright notice and
    14  * indicate the origin and nature of any modifications.
    15  * 
    16  * I'd appreciate being given credit for this package in the documentation
    17  * of software which uses it, but that is not a requirement.
    18  * 
    19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
    20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
    22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
    27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
    28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29  *
    30  */
    31 
    32 #include "regguts.h"
    33 
    34 
    35 
    36 /* lazy-DFA representation */
    37 struct arcp {			/* "pointer" to an outarc */
    38 	struct sset *ss;
    39 	color co;
    40 };
    41 
    42 struct sset {			/* state set */
    43 	unsigned *states;	/* pointer to bitvector */
    44 	unsigned hash;		/* hash of bitvector */
    45 #		define	HASH(bv, nw)	(((nw) == 1) ? *(bv) : hash(bv, nw))
    46 #	define	HIT(h,bv,ss,nw)	((ss)->hash == (h) && ((nw) == 1 || \
    47 		memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
    48 	int flags;
    49 #		define	STARTER		01	/* the initial state set */
    50 #		define	POSTSTATE	02	/* includes the goal state */
    51 #		define	LOCKED		04	/* locked in cache */
    52 #		define	NOPROGRESS	010	/* zero-progress state set */
    53 	struct arcp ins;	/* chain of inarcs pointing here */
    54 	chr *lastseen;		/* last entered on arrival here */
    55 	struct sset **outs;	/* outarc vector indexed by color */
    56 	struct arcp *inchain;	/* chain-pointer vector for outarcs */
    57 };
    58 
    59 struct dfa {
    60 	int nssets;		/* size of cache */
    61 	int nssused;		/* how many entries occupied yet */
    62 	int nstates;		/* number of states */
    63 	int ncolors;		/* length of outarc and inchain vectors */
    64 	int wordsper;		/* length of state-set bitvectors */
    65 	struct sset *ssets;	/* state-set cache */
    66 	unsigned *statesarea;	/* bitvector storage */
    67 	unsigned *work;		/* pointer to work area within statesarea */
    68 	struct sset **outsarea;	/* outarc-vector storage */
    69 	struct arcp *incarea;	/* inchain storage */
    70 	struct cnfa *cnfa;
    71 	struct colormap *cm;
    72 	chr *lastpost;		/* location of last cache-flushed success */
    73 	chr *lastnopr;		/* location of last cache-flushed NOPROGRESS */
    74 	struct sset *search;	/* replacement-search-pointer memory */
    75 	int cptsmalloced;	/* were the areas individually malloced? */
    76 	char *mallocarea;	/* self, or master malloced area, or NULL */
    77 };
    78 
    79 #define	WORK	1		/* number of work bitvectors needed */
    80 
    81 /* setup for non-malloc allocation for small cases */
    82 #define	FEWSTATES	20	/* must be less than UBITS */
    83 #define	FEWCOLORS	15
    84 struct smalldfa {
    85 	struct dfa dfa;
    86 	struct sset ssets[FEWSTATES*2];
    87 	unsigned statesarea[FEWSTATES*2 + WORK];
    88 	struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
    89 	struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
    90 };
    91 #define	DOMALLOC	((struct smalldfa *)NULL)	/* force malloc */
    92 
    93 
    94 
    95 /* internal variables, bundled for easy passing around */
    96 struct vars {
    97 	regex_t *re;
    98 	struct guts *g;
    99 	int eflags;		/* copies of arguments */
   100 	size_t nmatch;
   101 	regmatch_t *pmatch;
   102 	rm_detail_t *details;
   103 	chr *start;		/* start of string */
   104 	chr *stop;		/* just past end of string */
   105 	int err;		/* error code if any (0 none) */
   106 	regoff_t *mem;		/* memory vector for backtracking */
   107 	struct smalldfa dfa1;
   108 	struct smalldfa dfa2;
   109 };
   110 #define	VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */
   111 #define	ISERR()	VISERR(v)
   112 #define	VERR(vv,e)	(((vv)->err) ? (vv)->err : ((vv)->err = (e)))
   113 #define	ERR(e)	VERR(v, e)		/* record an error */
   114 #define	NOERR()	{if (ISERR()) return v->err;}	/* if error seen, return it */
   115 #define	OFF(p)	((p) - v->start)
   116 #define	LOFF(p)	((long)OFF(p))
   117 
   118 
   119 
   120 /*
   121  * forward declarations
   122  */
   123 /* =====^!^===== begin forwards =====^!^===== */
   124 /* automatically gathered by fwd; do not hand-edit */
   125 /* === regexec.c === */
   126 int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
   127 static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
   128 static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
   129 static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));
   130 static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
   131 static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
   132 static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   133 static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   134 static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   135 static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   136 static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   137 static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   138 static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   139 static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   140 static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
   141 /* === rege_dfa.c === */
   142 static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
   143 static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
   144 static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
   145 static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
   146 static VOID freedfa _ANSI_ARGS_((struct dfa *));
   147 static unsigned hash _ANSI_ARGS_((unsigned *, int));
   148 static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
   149 static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
   150 static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
   151 static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
   152 static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
   153 /* automatically gathered by fwd; do not hand-edit */
   154 /* =====^!^===== end forwards =====^!^===== */
   155 
   156 
   157 
   158 /*
   159  - exec - match regular expression
   160  ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
   161  ^					size_t, regmatch_t [], int);
   162  */
   163 int
   164 exec(re, string, len, details, nmatch, pmatch, flags)
   165 regex_t *re;
   166 CONST chr *string;
   167 size_t len;
   168 rm_detail_t *details;
   169 size_t nmatch;
   170 regmatch_t pmatch[];
   171 int flags;
   172 {
   173 	struct vars var;
   174 	register struct vars *v = &var;
   175 	int st;
   176 	size_t n;
   177 	int backref;
   178 #	define	LOCALMAT	20
   179 	regmatch_t mat[LOCALMAT];
   180 #	define	LOCALMEM	40
   181 	regoff_t mem[LOCALMEM];
   182 
   183 	/* sanity checks */
   184 	if (re == NULL || string == NULL || re->re_magic != REMAGIC)
   185 		return REG_INVARG;
   186 	if (re->re_csize != sizeof(chr))
   187 		return REG_MIXED;
   188 
   189 	/* setup */
   190 	v->re = re;
   191 	v->g = (struct guts *)re->re_guts;
   192 	if ((v->g->cflags&REG_EXPECT) && details == NULL)
   193 		return REG_INVARG;
   194 	if (v->g->info&REG_UIMPOSSIBLE)
   195 		return REG_NOMATCH;
   196 	backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
   197 	v->eflags = flags;
   198 	if (v->g->cflags&REG_NOSUB)
   199 		nmatch = 0;		/* override client */
   200 	v->nmatch = nmatch;
   201 	if (backref) {
   202 		/* need work area */
   203 		if (v->g->nsub + 1 <= LOCALMAT)
   204 			v->pmatch = mat;
   205 		else
   206 			v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
   207 							sizeof(regmatch_t));
   208 		if (v->pmatch == NULL)
   209 			return REG_ESPACE;
   210 		v->nmatch = v->g->nsub + 1;
   211 	} else
   212 		v->pmatch = pmatch;
   213 	v->details = details;
   214 	v->start = (chr *)string;
   215 	v->stop = (chr *)string + len;
   216 	v->err = 0;
   217 	if (backref) {
   218 		/* need retry memory */
   219 		assert(v->g->ntree >= 0);
   220 		n = (size_t)v->g->ntree;
   221 		if (n <= LOCALMEM)
   222 			v->mem = mem;
   223 		else
   224 			v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
   225 		if (v->mem == NULL) {
   226 			if (v->pmatch != pmatch && v->pmatch != mat)
   227 				FREE(v->pmatch);
   228 			return REG_ESPACE;
   229 		}
   230 	} else
   231 		v->mem = NULL;
   232 
   233 	/* do it */
   234 	assert(v->g->tree != NULL);
   235 	if (backref)
   236 		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
   237 	else
   238 		st = find(v, &v->g->tree->cnfa, &v->g->cmap);
   239 
   240 	/* copy (portion of) match vector over if necessary */
   241 	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
   242 		zapsubs(pmatch, nmatch);
   243 		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
   244 		memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
   245 	}
   246 
   247 	/* clean up */
   248 	if (v->pmatch != pmatch && v->pmatch != mat)
   249 		FREE(v->pmatch);
   250 	if (v->mem != NULL && v->mem != mem)
   251 		FREE(v->mem);
   252 	return st;
   253 }
   254 
   255 /*
   256  - find - find a match for the main NFA (no-complications case)
   257  ^ static int find(struct vars *, struct cnfa *, struct colormap *);
   258  */
   259 static int
   260 find(v, cnfa, cm)
   261 struct vars *v;
   262 struct cnfa *cnfa;
   263 struct colormap *cm;
   264 {
   265 	struct dfa *s;
   266 	struct dfa *d;
   267 	chr *begin;
   268 	chr *end = NULL;
   269 	chr *cold;
   270 	chr *open;		/* open and close of range of possible starts */
   271 	chr *close;
   272 	int hitend;
   273 	int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
   274 
   275 	/* first, a shot with the search RE */
   276 	s = newdfa(v, &v->g->search, cm, &v->dfa1);
   277 	assert(!(ISERR() && s != NULL));
   278 	NOERR();
   279 	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
   280 	cold = NULL;
   281 	close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
   282 	freedfa(s);
   283 	NOERR();
   284 	if (v->g->cflags&REG_EXPECT) {
   285 		assert(v->details != NULL);
   286 		if (cold != NULL)
   287 			v->details->rm_extend.rm_so = OFF(cold);
   288 		else
   289 			v->details->rm_extend.rm_so = OFF(v->stop);
   290 		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
   291 	}
   292 	if (close == NULL)		/* not found */
   293 		return REG_NOMATCH;
   294 	if (v->nmatch == 0)		/* found, don't need exact location */
   295 		return REG_OKAY;
   296 
   297 	/* find starting point and match */
   298 	assert(cold != NULL);
   299 	open = cold;
   300 	cold = NULL;
   301 	MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
   302 	d = newdfa(v, cnfa, cm, &v->dfa1);
   303 	assert(!(ISERR() && d != NULL));
   304 	NOERR();
   305 	for (begin = open; begin <= close; begin++) {
   306 		MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
   307 		if (shorter)
   308 			end = shortest(v, d, begin, begin, v->stop,
   309 							(chr **)NULL, &hitend);
   310 		else
   311 			end = longest(v, d, begin, v->stop, &hitend);
   312 		NOERR();
   313 		if (hitend && cold == NULL)
   314 			cold = begin;
   315 		if (end != NULL)
   316 			break;		/* NOTE BREAK OUT */
   317 	}
   318 	assert(end != NULL);		/* search RE succeeded so loop should */
   319 	freedfa(d);
   320 
   321 	/* and pin down details */
   322 	assert(v->nmatch > 0);
   323 	v->pmatch[0].rm_so = OFF(begin);
   324 	v->pmatch[0].rm_eo = OFF(end);
   325 	if (v->g->cflags&REG_EXPECT) {
   326 		if (cold != NULL)
   327 			v->details->rm_extend.rm_so = OFF(cold);
   328 		else
   329 			v->details->rm_extend.rm_so = OFF(v->stop);
   330 		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
   331 	}
   332 	if (v->nmatch == 1)		/* no need for submatches */
   333 		return REG_OKAY;
   334 
   335 	/* submatches */
   336 	zapsubs(v->pmatch, v->nmatch);
   337 	return dissect(v, v->g->tree, begin, end);
   338 }
   339 
   340 /*
   341  - cfind - find a match for the main NFA (with complications)
   342  ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
   343  */
   344 static int
   345 cfind(v, cnfa, cm)
   346 struct vars *v;
   347 struct cnfa *cnfa;
   348 struct colormap *cm;
   349 {
   350 	struct dfa *s;
   351 	struct dfa *d;
   352 	chr *cold = NULL; /* silence gcc 4 warning */
   353 	int ret;
   354 
   355 	s = newdfa(v, &v->g->search, cm, &v->dfa1);
   356 	NOERR();
   357 	d = newdfa(v, cnfa, cm, &v->dfa2);
   358 	if (ISERR()) {
   359 		assert(d == NULL);
   360 		freedfa(s);
   361 		return v->err;
   362 	}
   363 
   364 	ret = cfindloop(v, cnfa, cm, d, s, &cold);
   365 
   366 	freedfa(d);
   367 	freedfa(s);
   368 	NOERR();
   369 	if (v->g->cflags&REG_EXPECT) {
   370 		assert(v->details != NULL);
   371 		if (cold != NULL)
   372 			v->details->rm_extend.rm_so = OFF(cold);
   373 		else
   374 			v->details->rm_extend.rm_so = OFF(v->stop);
   375 		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
   376 	}
   377 	return ret;
   378 }
   379 
   380 /*
   381  - cfindloop - the heart of cfind
   382  ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
   383  ^	struct dfa *, struct dfa *, chr **);
   384  */
   385 static int
   386 cfindloop(v, cnfa, cm, d, s, coldp)
   387 struct vars *v;
   388 struct cnfa *cnfa;
   389 struct colormap *cm;
   390 struct dfa *d;
   391 struct dfa *s;
   392 chr **coldp;			/* where to put coldstart pointer */
   393 {
   394 	chr *begin;
   395 	chr *end;
   396 	chr *cold;
   397 	chr *open;		/* open and close of range of possible starts */
   398 	chr *close;
   399 	chr *estart;
   400 	chr *estop;
   401 	int er;
   402 	int shorter = v->g->tree->flags&SHORTER;
   403 	int hitend;
   404 
   405 	assert(d != NULL && s != NULL);
   406 	cold = NULL;
   407 	close = v->start;
   408 	do {
   409 		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
   410 		close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
   411 		if (close == NULL)
   412 			break;				/* NOTE BREAK */
   413 		assert(cold != NULL);
   414 		open = cold;
   415 		cold = NULL;
   416 		MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
   417 		for (begin = open; begin <= close; begin++) {
   418 			MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
   419 			estart = begin;
   420 			estop = v->stop;
   421 			for (;;) {
   422 				if (shorter)
   423 					end = shortest(v, d, begin, estart,
   424 						estop, (chr **)NULL, &hitend);
   425 				else
   426 					end = longest(v, d, begin, estop,
   427 								&hitend);
   428 				if (hitend && cold == NULL)
   429 					cold = begin;
   430 				if (end == NULL)
   431 					break;		/* NOTE BREAK OUT */
   432 				MDEBUG(("tentative end %ld\n", LOFF(end)));
   433 				zapsubs(v->pmatch, v->nmatch);
   434 				zapmem(v, v->g->tree);
   435 				er = cdissect(v, v->g->tree, begin, end);
   436 				if (er == REG_OKAY) {
   437 					if (v->nmatch > 0) {
   438 						v->pmatch[0].rm_so = OFF(begin);
   439 						v->pmatch[0].rm_eo = OFF(end);
   440 					}
   441 					*coldp = cold;
   442 					return REG_OKAY;
   443 				}
   444 				if (er != REG_NOMATCH) {
   445 					ERR(er);
   446 					return er;
   447 				}
   448 				if ((shorter) ? end == estop : end == begin) {
   449 					/* no point in trying again */
   450 					*coldp = cold;
   451 					return REG_NOMATCH;
   452 				}
   453 				/* go around and try again */
   454 				if (shorter)
   455 					estart = end + 1;
   456 				else
   457 					estop = end - 1;
   458 			}
   459 		}
   460 	} while (close < v->stop);
   461 
   462 	*coldp = cold;
   463 	return REG_NOMATCH;
   464 }
   465 
   466 /*
   467  - zapsubs - initialize the subexpression matches to "no match"
   468  ^ static VOID zapsubs(regmatch_t *, size_t);
   469  */
   470 static VOID
   471 zapsubs(p, n)
   472 regmatch_t *p;
   473 size_t n;
   474 {
   475 	size_t i;
   476 
   477 	for (i = n-1; i > 0; i--) {
   478 		p[i].rm_so = -1;
   479 		p[i].rm_eo = -1;
   480 	}
   481 }
   482 
   483 /*
   484  - zapmem - initialize the retry memory of a subtree to zeros
   485  ^ static VOID zapmem(struct vars *, struct subre *);
   486  */
   487 static VOID
   488 zapmem(v, t)
   489 struct vars *v;
   490 struct subre *t;
   491 {
   492 	if (t == NULL)
   493 		return;
   494 
   495 	assert(v->mem != NULL);
   496 	v->mem[t->retry] = 0;
   497 	if (t->op == '(') {
   498 		assert(t->subno > 0);
   499 		v->pmatch[t->subno].rm_so = -1;
   500 		v->pmatch[t->subno].rm_eo = -1;
   501 	}
   502 
   503 	if (t->left != NULL)
   504 		zapmem(v, t->left);
   505 	if (t->right != NULL)
   506 		zapmem(v, t->right);
   507 }
   508 
   509 /*
   510  - subset - set any subexpression relevant to a successful subre
   511  ^ static VOID subset(struct vars *, struct subre *, chr *, chr *);
   512  */
   513 static VOID
   514 subset(v, sub, begin, end)
   515 struct vars *v;
   516 struct subre *sub;
   517 chr *begin;
   518 chr *end;
   519 {
   520 	int n = sub->subno;
   521 
   522 	assert(n > 0);
   523 	if ((size_t)n >= v->nmatch)
   524 		return;
   525 
   526 	MDEBUG(("setting %d\n", n));
   527 	v->pmatch[n].rm_so = OFF(begin);
   528 	v->pmatch[n].rm_eo = OFF(end);
   529 }
   530 
   531 /*
   532  - dissect - determine subexpression matches (uncomplicated case)
   533  ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
   534  */
   535 static int			/* regexec return code */
   536 dissect(v, t, begin, end)
   537 struct vars *v;
   538 struct subre *t;
   539 chr *begin;			/* beginning of relevant substring */
   540 chr *end;			/* end of same */
   541 {
   542 	assert(t != NULL);
   543 	MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
   544 
   545 	switch (t->op) {
   546 	case '=':		/* terminal node */
   547 		assert(t->left == NULL && t->right == NULL);
   548 		return REG_OKAY;	/* no action, parent did the work */
   549 		break;
   550 	case '|':		/* alternation */
   551 		assert(t->left != NULL);
   552 		return altdissect(v, t, begin, end);
   553 		break;
   554 	case 'b':		/* back ref -- shouldn't be calling us! */
   555 		return REG_ASSERT;
   556 		break;
   557 	case '.':		/* concatenation */
   558 		assert(t->left != NULL && t->right != NULL);
   559 		return condissect(v, t, begin, end);
   560 		break;
   561 	case '(':		/* capturing */
   562 		assert(t->left != NULL && t->right == NULL);
   563 		assert(t->subno > 0);
   564 		subset(v, t, begin, end);
   565 		return dissect(v, t->left, begin, end);
   566 		break;
   567 	default:
   568 		return REG_ASSERT;
   569 		break;
   570 	}
   571 }
   572 
   573 /*
   574  - condissect - determine concatenation subexpression matches (uncomplicated)
   575  ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
   576  */
   577 static int			/* regexec return code */
   578 condissect(v, t, begin, end)
   579 struct vars *v;
   580 struct subre *t;
   581 chr *begin;			/* beginning of relevant substring */
   582 chr *end;			/* end of same */
   583 {
   584 	struct dfa *d;
   585 	struct dfa *d2;
   586 	chr *mid;
   587 	int i;
   588 	int shorter = (t->left->flags&SHORTER) ? 1 : 0;
   589 	chr *stop = (shorter) ? end : begin;
   590 
   591 	assert(t->op == '.');
   592 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
   593 	assert(t->right != NULL && t->right->cnfa.nstates > 0);
   594 
   595 	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
   596 	NOERR();
   597 	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
   598 	if (ISERR()) {
   599 		assert(d2 == NULL);
   600 		freedfa(d);
   601 		return v->err;
   602 	}
   603 
   604 	/* pick a tentative midpoint */
   605 	if (shorter)
   606 		mid = shortest(v, d, begin, begin, end, (chr **)NULL,
   607 								(int *)NULL);
   608 	else
   609 		mid = longest(v, d, begin, end, (int *)NULL);
   610 	if (mid == NULL) {
   611 		freedfa(d);
   612 		freedfa(d2);
   613 		return REG_ASSERT;
   614 	}
   615 	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
   616 
   617 	/* iterate until satisfaction or failure */
   618 	while (longest(v, d2, mid, end, (int *)NULL) != end) {
   619 		/* that midpoint didn't work, find a new one */
   620 		if (mid == stop) {
   621 			/* all possibilities exhausted! */
   622 			MDEBUG(("no midpoint!\n"));
   623 			freedfa(d);
   624 			freedfa(d2);
   625 			return REG_ASSERT;
   626 		}
   627 		if (shorter)
   628 			mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,
   629 								(int *)NULL);
   630 		else
   631 			mid = longest(v, d, begin, mid-1, (int *)NULL);
   632 		if (mid == NULL) {
   633 			/* failed to find a new one! */
   634 			MDEBUG(("failed midpoint!\n"));
   635 			freedfa(d);
   636 			freedfa(d2);
   637 			return REG_ASSERT;
   638 		}
   639 		MDEBUG(("new midpoint %ld\n", LOFF(mid)));
   640 	}
   641 
   642 	/* satisfaction */
   643 	MDEBUG(("successful\n"));
   644 	freedfa(d);
   645 	freedfa(d2);
   646 	i = dissect(v, t->left, begin, mid);
   647 	if (i != REG_OKAY)
   648 		return i;
   649 	return dissect(v, t->right, mid, end);
   650 }
   651 
   652 /*
   653  - altdissect - determine alternative subexpression matches (uncomplicated)
   654  ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
   655  */
   656 static int			/* regexec return code */
   657 altdissect(v, t, begin, end)
   658 struct vars *v;
   659 struct subre *t;
   660 chr *begin;			/* beginning of relevant substring */
   661 chr *end;			/* end of same */
   662 {
   663 	struct dfa *d;
   664 	int i;
   665 
   666 	assert(t != NULL);
   667 	assert(t->op == '|');
   668 
   669 	for (i = 0; t != NULL; t = t->right, i++) {
   670 		MDEBUG(("trying %dth\n", i));
   671 		assert(t->left != NULL && t->left->cnfa.nstates > 0);
   672 		d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
   673 		if (ISERR())
   674 			return v->err;
   675 		if (longest(v, d, begin, end, (int *)NULL) == end) {
   676 			MDEBUG(("success\n"));
   677 			freedfa(d);
   678 			return dissect(v, t->left, begin, end);
   679 		}
   680 		freedfa(d);
   681 	}
   682 	return REG_ASSERT;	/* none of them matched?!? */
   683 }
   684 
   685 /*
   686  - cdissect - determine subexpression matches (with complications)
   687  * The retry memory stores the offset of the trial midpoint from begin, 
   688  * plus 1 so that 0 uniquely means "clean slate".
   689  ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
   690  */
   691 static int			/* regexec return code */
   692 cdissect(v, t, begin, end)
   693 struct vars *v;
   694 struct subre *t;
   695 chr *begin;			/* beginning of relevant substring */
   696 chr *end;			/* end of same */
   697 {
   698 	int er;
   699 
   700 	assert(t != NULL);
   701 	MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
   702 
   703 	switch (t->op) {
   704 	case '=':		/* terminal node */
   705 		assert(t->left == NULL && t->right == NULL);
   706 		return REG_OKAY;	/* no action, parent did the work */
   707 		break;
   708 	case '|':		/* alternation */
   709 		assert(t->left != NULL);
   710 		return caltdissect(v, t, begin, end);
   711 		break;
   712 	case 'b':		/* back ref -- shouldn't be calling us! */
   713 		assert(t->left == NULL && t->right == NULL);
   714 		return cbrdissect(v, t, begin, end);
   715 		break;
   716 	case '.':		/* concatenation */
   717 		assert(t->left != NULL && t->right != NULL);
   718 		return ccondissect(v, t, begin, end);
   719 		break;
   720 	case '(':		/* capturing */
   721 		assert(t->left != NULL && t->right == NULL);
   722 		assert(t->subno > 0);
   723 		er = cdissect(v, t->left, begin, end);
   724 		if (er == REG_OKAY)
   725 			subset(v, t, begin, end);
   726 		return er;
   727 		break;
   728 	default:
   729 		return REG_ASSERT;
   730 		break;
   731 	}
   732 }
   733 
   734 /*
   735  - ccondissect - concatenation subexpression matches (with complications)
   736  * The retry memory stores the offset of the trial midpoint from begin, 
   737  * plus 1 so that 0 uniquely means "clean slate".
   738  ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
   739  */
   740 static int			/* regexec return code */
   741 ccondissect(v, t, begin, end)
   742 struct vars *v;
   743 struct subre *t;
   744 chr *begin;			/* beginning of relevant substring */
   745 chr *end;			/* end of same */
   746 {
   747 	struct dfa *d;
   748 	struct dfa *d2;
   749 	chr *mid;
   750 	int er;
   751 
   752 	assert(t->op == '.');
   753 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
   754 	assert(t->right != NULL && t->right->cnfa.nstates > 0);
   755 
   756 	if (t->left->flags&SHORTER)		/* reverse scan */
   757 		return crevdissect(v, t, begin, end);
   758 
   759 	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
   760 	if (ISERR())
   761 		return v->err;
   762 	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
   763 	if (ISERR()) {
   764 		freedfa(d);
   765 		return v->err;
   766 	}
   767 	MDEBUG(("cconcat %d\n", t->retry));
   768 
   769 	/* pick a tentative midpoint */
   770 	if (v->mem[t->retry] == 0) {
   771 		mid = longest(v, d, begin, end, (int *)NULL);
   772 		if (mid == NULL) {
   773 			freedfa(d);
   774 			freedfa(d2);
   775 			return REG_NOMATCH;
   776 		}
   777 		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
   778 		v->mem[t->retry] = (mid - begin) + 1;
   779 	} else {
   780 		mid = begin + (v->mem[t->retry] - 1);
   781 		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
   782 	}
   783 
   784 	/* iterate until satisfaction or failure */
   785 	for (;;) {
   786 		/* try this midpoint on for size */
   787 		er = cdissect(v, t->left, begin, mid);
   788 		if (er == REG_OKAY &&
   789 				longest(v, d2, mid, end, (int *)NULL) == end &&
   790 				(er = cdissect(v, t->right, mid, end)) == 
   791 								REG_OKAY)
   792 			break;			/* NOTE BREAK OUT */
   793 		if (er != REG_OKAY && er != REG_NOMATCH) {
   794 			freedfa(d);
   795 			freedfa(d2);
   796 			return er;
   797 		}
   798 
   799 		/* that midpoint didn't work, find a new one */
   800 		if (mid == begin) {
   801 			/* all possibilities exhausted */
   802 			MDEBUG(("%d no midpoint\n", t->retry));
   803 			freedfa(d);
   804 			freedfa(d2);
   805 			return REG_NOMATCH;
   806 		}
   807 		mid = longest(v, d, begin, mid-1, (int *)NULL);
   808 		if (mid == NULL) {
   809 			/* failed to find a new one */
   810 			MDEBUG(("%d failed midpoint\n", t->retry));
   811 			freedfa(d);
   812 			freedfa(d2);
   813 			return REG_NOMATCH;
   814 		}
   815 		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
   816 		v->mem[t->retry] = (mid - begin) + 1;
   817 		zapmem(v, t->left);
   818 		zapmem(v, t->right);
   819 	}
   820 
   821 	/* satisfaction */
   822 	MDEBUG(("successful\n"));
   823 	freedfa(d);
   824 	freedfa(d2);
   825 	return REG_OKAY;
   826 }
   827 
   828 /*
   829  - crevdissect - determine backref shortest-first subexpression matches
   830  * The retry memory stores the offset of the trial midpoint from begin, 
   831  * plus 1 so that 0 uniquely means "clean slate".
   832  ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
   833  */
   834 static int			/* regexec return code */
   835 crevdissect(v, t, begin, end)
   836 struct vars *v;
   837 struct subre *t;
   838 chr *begin;			/* beginning of relevant substring */
   839 chr *end;			/* end of same */
   840 {
   841 	struct dfa *d;
   842 	struct dfa *d2;
   843 	chr *mid;
   844 	int er;
   845 
   846 	assert(t->op == '.');
   847 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
   848 	assert(t->right != NULL && t->right->cnfa.nstates > 0);
   849 	assert(t->left->flags&SHORTER);
   850 
   851 	/* concatenation -- need to split the substring between parts */
   852 	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
   853 	if (ISERR())
   854 		return v->err;
   855 	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
   856 	if (ISERR()) {
   857 		freedfa(d);
   858 		return v->err;
   859 	}
   860 	MDEBUG(("crev %d\n", t->retry));
   861 
   862 	/* pick a tentative midpoint */
   863 	if (v->mem[t->retry] == 0) {
   864 		mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
   865 		if (mid == NULL) {
   866 			freedfa(d);
   867 			freedfa(d2);
   868 			return REG_NOMATCH;
   869 		}
   870 		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
   871 		v->mem[t->retry] = (mid - begin) + 1;
   872 	} else {
   873 		mid = begin + (v->mem[t->retry] - 1);
   874 		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
   875 	}
   876 
   877 	/* iterate until satisfaction or failure */
   878 	for (;;) {
   879 		/* try this midpoint on for size */
   880 		er = cdissect(v, t->left, begin, mid);
   881 		if (er == REG_OKAY &&
   882 				longest(v, d2, mid, end, (int *)NULL) == end &&
   883 				(er = cdissect(v, t->right, mid, end)) == 
   884 								REG_OKAY)
   885 			break;			/* NOTE BREAK OUT */
   886 		if (er != REG_OKAY && er != REG_NOMATCH) {
   887 			freedfa(d);
   888 			freedfa(d2);
   889 			return er;
   890 		}
   891 
   892 		/* that midpoint didn't work, find a new one */
   893 		if (mid == end) {
   894 			/* all possibilities exhausted */
   895 			MDEBUG(("%d no midpoint\n", t->retry));
   896 			freedfa(d);
   897 			freedfa(d2);
   898 			return REG_NOMATCH;
   899 		}
   900 		mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
   901 		if (mid == NULL) {
   902 			/* failed to find a new one */
   903 			MDEBUG(("%d failed midpoint\n", t->retry));
   904 			freedfa(d);
   905 			freedfa(d2);
   906 			return REG_NOMATCH;
   907 		}
   908 		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
   909 		v->mem[t->retry] = (mid - begin) + 1;
   910 		zapmem(v, t->left);
   911 		zapmem(v, t->right);
   912 	}
   913 
   914 	/* satisfaction */
   915 	MDEBUG(("successful\n"));
   916 	freedfa(d);
   917 	freedfa(d2);
   918 	return REG_OKAY;
   919 }
   920 
   921 /*
   922  - cbrdissect - determine backref subexpression matches
   923  ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
   924  */
   925 static int			/* regexec return code */
   926 cbrdissect(v, t, begin, end)
   927 struct vars *v;
   928 struct subre *t;
   929 chr *begin;			/* beginning of relevant substring */
   930 chr *end;			/* end of same */
   931 {
   932 	int i;
   933 	int n = t->subno;
   934 	size_t len;
   935 	chr *paren;
   936 	chr *p;
   937 	chr *stop;
   938 	int min = t->min;
   939 	int max = t->max;
   940 
   941 	assert(t != NULL);
   942 	assert(t->op == 'b');
   943 	assert(n >= 0);
   944 	assert((size_t)n < v->nmatch);
   945 
   946 	MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
   947 
   948 	if (v->pmatch[n].rm_so == -1)
   949 		return REG_NOMATCH;
   950 	paren = v->start + v->pmatch[n].rm_so;
   951 	len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
   952 
   953 	/* no room to maneuver -- retries are pointless */
   954 	if (v->mem[t->retry])
   955 		return REG_NOMATCH;
   956 	v->mem[t->retry] = 1;
   957 
   958 	/* special-case zero-length string */
   959 	if (len == 0) {
   960 		if (begin == end)
   961 			return REG_OKAY;
   962 		return REG_NOMATCH;
   963 	}
   964 
   965 	/* and too-short string */
   966 	assert(end >= begin);
   967 	if ((size_t)(end - begin) < len)
   968 		return REG_NOMATCH;
   969 	stop = end - len;
   970 
   971 	/* count occurrences */
   972 	i = 0;
   973 	for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
   974 		if ((*v->g->compare)(paren, p, len) != 0)
   975 				break;
   976 		i++;
   977 	}
   978 	MDEBUG(("cbackref found %d\n", i));
   979 
   980 	/* and sort it out */
   981 	if (p != end)			/* didn't consume all of it */
   982 		return REG_NOMATCH;
   983 	if (min <= i && (i <= max || max == INFINITY))
   984 		return REG_OKAY;
   985 	return REG_NOMATCH;		/* out of range */
   986 }
   987 
   988 /*
   989  - caltdissect - determine alternative subexpression matches (w. complications)
   990  ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
   991  */
   992 static int			/* regexec return code */
   993 caltdissect(v, t, begin, end)
   994 struct vars *v;
   995 struct subre *t;
   996 chr *begin;			/* beginning of relevant substring */
   997 chr *end;			/* end of same */
   998 {
   999 	struct dfa *d;
  1000 	int er;
  1001 #	define	UNTRIED	0	/* not yet tried at all */
  1002 #	define	TRYING	1	/* top matched, trying submatches */
  1003 #	define	TRIED	2	/* top didn't match or submatches exhausted */
  1004 
  1005 	if (t == NULL)
  1006 		return REG_NOMATCH;
  1007 	assert(t->op == '|');
  1008 	if (v->mem[t->retry] == TRIED)
  1009 		return caltdissect(v, t->right, begin, end);
  1010 
  1011 	MDEBUG(("calt n%d\n", t->retry));
  1012 	assert(t->left != NULL);
  1013 
  1014 	if (v->mem[t->retry] == UNTRIED) {
  1015 		d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
  1016 		if (ISERR())
  1017 			return v->err;
  1018 		if (longest(v, d, begin, end, (int *)NULL) != end) {
  1019 			freedfa(d);
  1020 			v->mem[t->retry] = TRIED;
  1021 			return caltdissect(v, t->right, begin, end);
  1022 		}
  1023 		freedfa(d);
  1024 		MDEBUG(("calt matched\n"));
  1025 		v->mem[t->retry] = TRYING;
  1026 	}
  1027 
  1028 	er = cdissect(v, t->left, begin, end);
  1029 	if (er != REG_NOMATCH)
  1030 		return er;
  1031 
  1032 	v->mem[t->retry] = TRIED;
  1033 	return caltdissect(v, t->right, begin, end);
  1034 }
  1035 
  1036 
  1037 
  1038 #include "rege_dfa.c"