os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/regexec.c
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/regexec.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,1038 @@
1.4 +/*
1.5 + * re_*exec and friends - match REs
1.6 + *
1.7 + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
1.8 + *
1.9 + * Development of this software was funded, in part, by Cray Research Inc.,
1.10 + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
1.11 + * Corporation, none of whom are responsible for the results. The author
1.12 + * thanks all of them.
1.13 + *
1.14 + * Redistribution and use in source and binary forms -- with or without
1.15 + * modification -- are permitted for any purpose, provided that
1.16 + * redistributions in source form retain this entire copyright notice and
1.17 + * indicate the origin and nature of any modifications.
1.18 + *
1.19 + * I'd appreciate being given credit for this package in the documentation
1.20 + * of software which uses it, but that is not a requirement.
1.21 + *
1.22 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
1.23 + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
1.24 + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
1.25 + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
1.26 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
1.27 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
1.28 + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1.29 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
1.30 + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
1.31 + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1.32 + *
1.33 + */
1.34 +
1.35 +#include "regguts.h"
1.36 +
1.37 +
1.38 +
1.39 +/* lazy-DFA representation */
1.40 +struct arcp { /* "pointer" to an outarc */
1.41 + struct sset *ss;
1.42 + color co;
1.43 +};
1.44 +
1.45 +struct sset { /* state set */
1.46 + unsigned *states; /* pointer to bitvector */
1.47 + unsigned hash; /* hash of bitvector */
1.48 +# define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
1.49 +# define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
1.50 + memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
1.51 + int flags;
1.52 +# define STARTER 01 /* the initial state set */
1.53 +# define POSTSTATE 02 /* includes the goal state */
1.54 +# define LOCKED 04 /* locked in cache */
1.55 +# define NOPROGRESS 010 /* zero-progress state set */
1.56 + struct arcp ins; /* chain of inarcs pointing here */
1.57 + chr *lastseen; /* last entered on arrival here */
1.58 + struct sset **outs; /* outarc vector indexed by color */
1.59 + struct arcp *inchain; /* chain-pointer vector for outarcs */
1.60 +};
1.61 +
1.62 +struct dfa {
1.63 + int nssets; /* size of cache */
1.64 + int nssused; /* how many entries occupied yet */
1.65 + int nstates; /* number of states */
1.66 + int ncolors; /* length of outarc and inchain vectors */
1.67 + int wordsper; /* length of state-set bitvectors */
1.68 + struct sset *ssets; /* state-set cache */
1.69 + unsigned *statesarea; /* bitvector storage */
1.70 + unsigned *work; /* pointer to work area within statesarea */
1.71 + struct sset **outsarea; /* outarc-vector storage */
1.72 + struct arcp *incarea; /* inchain storage */
1.73 + struct cnfa *cnfa;
1.74 + struct colormap *cm;
1.75 + chr *lastpost; /* location of last cache-flushed success */
1.76 + chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
1.77 + struct sset *search; /* replacement-search-pointer memory */
1.78 + int cptsmalloced; /* were the areas individually malloced? */
1.79 + char *mallocarea; /* self, or master malloced area, or NULL */
1.80 +};
1.81 +
1.82 +#define WORK 1 /* number of work bitvectors needed */
1.83 +
1.84 +/* setup for non-malloc allocation for small cases */
1.85 +#define FEWSTATES 20 /* must be less than UBITS */
1.86 +#define FEWCOLORS 15
1.87 +struct smalldfa {
1.88 + struct dfa dfa;
1.89 + struct sset ssets[FEWSTATES*2];
1.90 + unsigned statesarea[FEWSTATES*2 + WORK];
1.91 + struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
1.92 + struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
1.93 +};
1.94 +#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
1.95 +
1.96 +
1.97 +
1.98 +/* internal variables, bundled for easy passing around */
1.99 +struct vars {
1.100 + regex_t *re;
1.101 + struct guts *g;
1.102 + int eflags; /* copies of arguments */
1.103 + size_t nmatch;
1.104 + regmatch_t *pmatch;
1.105 + rm_detail_t *details;
1.106 + chr *start; /* start of string */
1.107 + chr *stop; /* just past end of string */
1.108 + int err; /* error code if any (0 none) */
1.109 + regoff_t *mem; /* memory vector for backtracking */
1.110 + struct smalldfa dfa1;
1.111 + struct smalldfa dfa2;
1.112 +};
1.113 +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
1.114 +#define ISERR() VISERR(v)
1.115 +#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
1.116 +#define ERR(e) VERR(v, e) /* record an error */
1.117 +#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
1.118 +#define OFF(p) ((p) - v->start)
1.119 +#define LOFF(p) ((long)OFF(p))
1.120 +
1.121 +
1.122 +
1.123 +/*
1.124 + * forward declarations
1.125 + */
1.126 +/* =====^!^===== begin forwards =====^!^===== */
1.127 +/* automatically gathered by fwd; do not hand-edit */
1.128 +/* === regexec.c === */
1.129 +int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
1.130 +static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
1.131 +static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
1.132 +static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));
1.133 +static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
1.134 +static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
1.135 +static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.136 +static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.137 +static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.138 +static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.139 +static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.140 +static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.141 +static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.142 +static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.143 +static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
1.144 +/* === rege_dfa.c === */
1.145 +static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
1.146 +static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
1.147 +static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
1.148 +static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
1.149 +static VOID freedfa _ANSI_ARGS_((struct dfa *));
1.150 +static unsigned hash _ANSI_ARGS_((unsigned *, int));
1.151 +static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
1.152 +static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
1.153 +static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
1.154 +static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
1.155 +static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
1.156 +/* automatically gathered by fwd; do not hand-edit */
1.157 +/* =====^!^===== end forwards =====^!^===== */
1.158 +
1.159 +
1.160 +
1.161 +/*
1.162 + - exec - match regular expression
1.163 + ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
1.164 + ^ size_t, regmatch_t [], int);
1.165 + */
1.166 +int
1.167 +exec(re, string, len, details, nmatch, pmatch, flags)
1.168 +regex_t *re;
1.169 +CONST chr *string;
1.170 +size_t len;
1.171 +rm_detail_t *details;
1.172 +size_t nmatch;
1.173 +regmatch_t pmatch[];
1.174 +int flags;
1.175 +{
1.176 + struct vars var;
1.177 + register struct vars *v = &var;
1.178 + int st;
1.179 + size_t n;
1.180 + int backref;
1.181 +# define LOCALMAT 20
1.182 + regmatch_t mat[LOCALMAT];
1.183 +# define LOCALMEM 40
1.184 + regoff_t mem[LOCALMEM];
1.185 +
1.186 + /* sanity checks */
1.187 + if (re == NULL || string == NULL || re->re_magic != REMAGIC)
1.188 + return REG_INVARG;
1.189 + if (re->re_csize != sizeof(chr))
1.190 + return REG_MIXED;
1.191 +
1.192 + /* setup */
1.193 + v->re = re;
1.194 + v->g = (struct guts *)re->re_guts;
1.195 + if ((v->g->cflags®_EXPECT) && details == NULL)
1.196 + return REG_INVARG;
1.197 + if (v->g->info®_UIMPOSSIBLE)
1.198 + return REG_NOMATCH;
1.199 + backref = (v->g->info®_UBACKREF) ? 1 : 0;
1.200 + v->eflags = flags;
1.201 + if (v->g->cflags®_NOSUB)
1.202 + nmatch = 0; /* override client */
1.203 + v->nmatch = nmatch;
1.204 + if (backref) {
1.205 + /* need work area */
1.206 + if (v->g->nsub + 1 <= LOCALMAT)
1.207 + v->pmatch = mat;
1.208 + else
1.209 + v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
1.210 + sizeof(regmatch_t));
1.211 + if (v->pmatch == NULL)
1.212 + return REG_ESPACE;
1.213 + v->nmatch = v->g->nsub + 1;
1.214 + } else
1.215 + v->pmatch = pmatch;
1.216 + v->details = details;
1.217 + v->start = (chr *)string;
1.218 + v->stop = (chr *)string + len;
1.219 + v->err = 0;
1.220 + if (backref) {
1.221 + /* need retry memory */
1.222 + assert(v->g->ntree >= 0);
1.223 + n = (size_t)v->g->ntree;
1.224 + if (n <= LOCALMEM)
1.225 + v->mem = mem;
1.226 + else
1.227 + v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
1.228 + if (v->mem == NULL) {
1.229 + if (v->pmatch != pmatch && v->pmatch != mat)
1.230 + FREE(v->pmatch);
1.231 + return REG_ESPACE;
1.232 + }
1.233 + } else
1.234 + v->mem = NULL;
1.235 +
1.236 + /* do it */
1.237 + assert(v->g->tree != NULL);
1.238 + if (backref)
1.239 + st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
1.240 + else
1.241 + st = find(v, &v->g->tree->cnfa, &v->g->cmap);
1.242 +
1.243 + /* copy (portion of) match vector over if necessary */
1.244 + if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
1.245 + zapsubs(pmatch, nmatch);
1.246 + n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
1.247 + memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
1.248 + }
1.249 +
1.250 + /* clean up */
1.251 + if (v->pmatch != pmatch && v->pmatch != mat)
1.252 + FREE(v->pmatch);
1.253 + if (v->mem != NULL && v->mem != mem)
1.254 + FREE(v->mem);
1.255 + return st;
1.256 +}
1.257 +
1.258 +/*
1.259 + - find - find a match for the main NFA (no-complications case)
1.260 + ^ static int find(struct vars *, struct cnfa *, struct colormap *);
1.261 + */
1.262 +static int
1.263 +find(v, cnfa, cm)
1.264 +struct vars *v;
1.265 +struct cnfa *cnfa;
1.266 +struct colormap *cm;
1.267 +{
1.268 + struct dfa *s;
1.269 + struct dfa *d;
1.270 + chr *begin;
1.271 + chr *end = NULL;
1.272 + chr *cold;
1.273 + chr *open; /* open and close of range of possible starts */
1.274 + chr *close;
1.275 + int hitend;
1.276 + int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
1.277 +
1.278 + /* first, a shot with the search RE */
1.279 + s = newdfa(v, &v->g->search, cm, &v->dfa1);
1.280 + assert(!(ISERR() && s != NULL));
1.281 + NOERR();
1.282 + MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
1.283 + cold = NULL;
1.284 + close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
1.285 + freedfa(s);
1.286 + NOERR();
1.287 + if (v->g->cflags®_EXPECT) {
1.288 + assert(v->details != NULL);
1.289 + if (cold != NULL)
1.290 + v->details->rm_extend.rm_so = OFF(cold);
1.291 + else
1.292 + v->details->rm_extend.rm_so = OFF(v->stop);
1.293 + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
1.294 + }
1.295 + if (close == NULL) /* not found */
1.296 + return REG_NOMATCH;
1.297 + if (v->nmatch == 0) /* found, don't need exact location */
1.298 + return REG_OKAY;
1.299 +
1.300 + /* find starting point and match */
1.301 + assert(cold != NULL);
1.302 + open = cold;
1.303 + cold = NULL;
1.304 + MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
1.305 + d = newdfa(v, cnfa, cm, &v->dfa1);
1.306 + assert(!(ISERR() && d != NULL));
1.307 + NOERR();
1.308 + for (begin = open; begin <= close; begin++) {
1.309 + MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
1.310 + if (shorter)
1.311 + end = shortest(v, d, begin, begin, v->stop,
1.312 + (chr **)NULL, &hitend);
1.313 + else
1.314 + end = longest(v, d, begin, v->stop, &hitend);
1.315 + NOERR();
1.316 + if (hitend && cold == NULL)
1.317 + cold = begin;
1.318 + if (end != NULL)
1.319 + break; /* NOTE BREAK OUT */
1.320 + }
1.321 + assert(end != NULL); /* search RE succeeded so loop should */
1.322 + freedfa(d);
1.323 +
1.324 + /* and pin down details */
1.325 + assert(v->nmatch > 0);
1.326 + v->pmatch[0].rm_so = OFF(begin);
1.327 + v->pmatch[0].rm_eo = OFF(end);
1.328 + if (v->g->cflags®_EXPECT) {
1.329 + if (cold != NULL)
1.330 + v->details->rm_extend.rm_so = OFF(cold);
1.331 + else
1.332 + v->details->rm_extend.rm_so = OFF(v->stop);
1.333 + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
1.334 + }
1.335 + if (v->nmatch == 1) /* no need for submatches */
1.336 + return REG_OKAY;
1.337 +
1.338 + /* submatches */
1.339 + zapsubs(v->pmatch, v->nmatch);
1.340 + return dissect(v, v->g->tree, begin, end);
1.341 +}
1.342 +
1.343 +/*
1.344 + - cfind - find a match for the main NFA (with complications)
1.345 + ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
1.346 + */
1.347 +static int
1.348 +cfind(v, cnfa, cm)
1.349 +struct vars *v;
1.350 +struct cnfa *cnfa;
1.351 +struct colormap *cm;
1.352 +{
1.353 + struct dfa *s;
1.354 + struct dfa *d;
1.355 + chr *cold = NULL; /* silence gcc 4 warning */
1.356 + int ret;
1.357 +
1.358 + s = newdfa(v, &v->g->search, cm, &v->dfa1);
1.359 + NOERR();
1.360 + d = newdfa(v, cnfa, cm, &v->dfa2);
1.361 + if (ISERR()) {
1.362 + assert(d == NULL);
1.363 + freedfa(s);
1.364 + return v->err;
1.365 + }
1.366 +
1.367 + ret = cfindloop(v, cnfa, cm, d, s, &cold);
1.368 +
1.369 + freedfa(d);
1.370 + freedfa(s);
1.371 + NOERR();
1.372 + if (v->g->cflags®_EXPECT) {
1.373 + assert(v->details != NULL);
1.374 + if (cold != NULL)
1.375 + v->details->rm_extend.rm_so = OFF(cold);
1.376 + else
1.377 + v->details->rm_extend.rm_so = OFF(v->stop);
1.378 + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
1.379 + }
1.380 + return ret;
1.381 +}
1.382 +
1.383 +/*
1.384 + - cfindloop - the heart of cfind
1.385 + ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
1.386 + ^ struct dfa *, struct dfa *, chr **);
1.387 + */
1.388 +static int
1.389 +cfindloop(v, cnfa, cm, d, s, coldp)
1.390 +struct vars *v;
1.391 +struct cnfa *cnfa;
1.392 +struct colormap *cm;
1.393 +struct dfa *d;
1.394 +struct dfa *s;
1.395 +chr **coldp; /* where to put coldstart pointer */
1.396 +{
1.397 + chr *begin;
1.398 + chr *end;
1.399 + chr *cold;
1.400 + chr *open; /* open and close of range of possible starts */
1.401 + chr *close;
1.402 + chr *estart;
1.403 + chr *estop;
1.404 + int er;
1.405 + int shorter = v->g->tree->flags&SHORTER;
1.406 + int hitend;
1.407 +
1.408 + assert(d != NULL && s != NULL);
1.409 + cold = NULL;
1.410 + close = v->start;
1.411 + do {
1.412 + MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
1.413 + close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
1.414 + if (close == NULL)
1.415 + break; /* NOTE BREAK */
1.416 + assert(cold != NULL);
1.417 + open = cold;
1.418 + cold = NULL;
1.419 + MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
1.420 + for (begin = open; begin <= close; begin++) {
1.421 + MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
1.422 + estart = begin;
1.423 + estop = v->stop;
1.424 + for (;;) {
1.425 + if (shorter)
1.426 + end = shortest(v, d, begin, estart,
1.427 + estop, (chr **)NULL, &hitend);
1.428 + else
1.429 + end = longest(v, d, begin, estop,
1.430 + &hitend);
1.431 + if (hitend && cold == NULL)
1.432 + cold = begin;
1.433 + if (end == NULL)
1.434 + break; /* NOTE BREAK OUT */
1.435 + MDEBUG(("tentative end %ld\n", LOFF(end)));
1.436 + zapsubs(v->pmatch, v->nmatch);
1.437 + zapmem(v, v->g->tree);
1.438 + er = cdissect(v, v->g->tree, begin, end);
1.439 + if (er == REG_OKAY) {
1.440 + if (v->nmatch > 0) {
1.441 + v->pmatch[0].rm_so = OFF(begin);
1.442 + v->pmatch[0].rm_eo = OFF(end);
1.443 + }
1.444 + *coldp = cold;
1.445 + return REG_OKAY;
1.446 + }
1.447 + if (er != REG_NOMATCH) {
1.448 + ERR(er);
1.449 + return er;
1.450 + }
1.451 + if ((shorter) ? end == estop : end == begin) {
1.452 + /* no point in trying again */
1.453 + *coldp = cold;
1.454 + return REG_NOMATCH;
1.455 + }
1.456 + /* go around and try again */
1.457 + if (shorter)
1.458 + estart = end + 1;
1.459 + else
1.460 + estop = end - 1;
1.461 + }
1.462 + }
1.463 + } while (close < v->stop);
1.464 +
1.465 + *coldp = cold;
1.466 + return REG_NOMATCH;
1.467 +}
1.468 +
1.469 +/*
1.470 + - zapsubs - initialize the subexpression matches to "no match"
1.471 + ^ static VOID zapsubs(regmatch_t *, size_t);
1.472 + */
1.473 +static VOID
1.474 +zapsubs(p, n)
1.475 +regmatch_t *p;
1.476 +size_t n;
1.477 +{
1.478 + size_t i;
1.479 +
1.480 + for (i = n-1; i > 0; i--) {
1.481 + p[i].rm_so = -1;
1.482 + p[i].rm_eo = -1;
1.483 + }
1.484 +}
1.485 +
1.486 +/*
1.487 + - zapmem - initialize the retry memory of a subtree to zeros
1.488 + ^ static VOID zapmem(struct vars *, struct subre *);
1.489 + */
1.490 +static VOID
1.491 +zapmem(v, t)
1.492 +struct vars *v;
1.493 +struct subre *t;
1.494 +{
1.495 + if (t == NULL)
1.496 + return;
1.497 +
1.498 + assert(v->mem != NULL);
1.499 + v->mem[t->retry] = 0;
1.500 + if (t->op == '(') {
1.501 + assert(t->subno > 0);
1.502 + v->pmatch[t->subno].rm_so = -1;
1.503 + v->pmatch[t->subno].rm_eo = -1;
1.504 + }
1.505 +
1.506 + if (t->left != NULL)
1.507 + zapmem(v, t->left);
1.508 + if (t->right != NULL)
1.509 + zapmem(v, t->right);
1.510 +}
1.511 +
1.512 +/*
1.513 + - subset - set any subexpression relevant to a successful subre
1.514 + ^ static VOID subset(struct vars *, struct subre *, chr *, chr *);
1.515 + */
1.516 +static VOID
1.517 +subset(v, sub, begin, end)
1.518 +struct vars *v;
1.519 +struct subre *sub;
1.520 +chr *begin;
1.521 +chr *end;
1.522 +{
1.523 + int n = sub->subno;
1.524 +
1.525 + assert(n > 0);
1.526 + if ((size_t)n >= v->nmatch)
1.527 + return;
1.528 +
1.529 + MDEBUG(("setting %d\n", n));
1.530 + v->pmatch[n].rm_so = OFF(begin);
1.531 + v->pmatch[n].rm_eo = OFF(end);
1.532 +}
1.533 +
1.534 +/*
1.535 + - dissect - determine subexpression matches (uncomplicated case)
1.536 + ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
1.537 + */
1.538 +static int /* regexec return code */
1.539 +dissect(v, t, begin, end)
1.540 +struct vars *v;
1.541 +struct subre *t;
1.542 +chr *begin; /* beginning of relevant substring */
1.543 +chr *end; /* end of same */
1.544 +{
1.545 + assert(t != NULL);
1.546 + MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
1.547 +
1.548 + switch (t->op) {
1.549 + case '=': /* terminal node */
1.550 + assert(t->left == NULL && t->right == NULL);
1.551 + return REG_OKAY; /* no action, parent did the work */
1.552 + break;
1.553 + case '|': /* alternation */
1.554 + assert(t->left != NULL);
1.555 + return altdissect(v, t, begin, end);
1.556 + break;
1.557 + case 'b': /* back ref -- shouldn't be calling us! */
1.558 + return REG_ASSERT;
1.559 + break;
1.560 + case '.': /* concatenation */
1.561 + assert(t->left != NULL && t->right != NULL);
1.562 + return condissect(v, t, begin, end);
1.563 + break;
1.564 + case '(': /* capturing */
1.565 + assert(t->left != NULL && t->right == NULL);
1.566 + assert(t->subno > 0);
1.567 + subset(v, t, begin, end);
1.568 + return dissect(v, t->left, begin, end);
1.569 + break;
1.570 + default:
1.571 + return REG_ASSERT;
1.572 + break;
1.573 + }
1.574 +}
1.575 +
1.576 +/*
1.577 + - condissect - determine concatenation subexpression matches (uncomplicated)
1.578 + ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
1.579 + */
1.580 +static int /* regexec return code */
1.581 +condissect(v, t, begin, end)
1.582 +struct vars *v;
1.583 +struct subre *t;
1.584 +chr *begin; /* beginning of relevant substring */
1.585 +chr *end; /* end of same */
1.586 +{
1.587 + struct dfa *d;
1.588 + struct dfa *d2;
1.589 + chr *mid;
1.590 + int i;
1.591 + int shorter = (t->left->flags&SHORTER) ? 1 : 0;
1.592 + chr *stop = (shorter) ? end : begin;
1.593 +
1.594 + assert(t->op == '.');
1.595 + assert(t->left != NULL && t->left->cnfa.nstates > 0);
1.596 + assert(t->right != NULL && t->right->cnfa.nstates > 0);
1.597 +
1.598 + d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
1.599 + NOERR();
1.600 + d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
1.601 + if (ISERR()) {
1.602 + assert(d2 == NULL);
1.603 + freedfa(d);
1.604 + return v->err;
1.605 + }
1.606 +
1.607 + /* pick a tentative midpoint */
1.608 + if (shorter)
1.609 + mid = shortest(v, d, begin, begin, end, (chr **)NULL,
1.610 + (int *)NULL);
1.611 + else
1.612 + mid = longest(v, d, begin, end, (int *)NULL);
1.613 + if (mid == NULL) {
1.614 + freedfa(d);
1.615 + freedfa(d2);
1.616 + return REG_ASSERT;
1.617 + }
1.618 + MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
1.619 +
1.620 + /* iterate until satisfaction or failure */
1.621 + while (longest(v, d2, mid, end, (int *)NULL) != end) {
1.622 + /* that midpoint didn't work, find a new one */
1.623 + if (mid == stop) {
1.624 + /* all possibilities exhausted! */
1.625 + MDEBUG(("no midpoint!\n"));
1.626 + freedfa(d);
1.627 + freedfa(d2);
1.628 + return REG_ASSERT;
1.629 + }
1.630 + if (shorter)
1.631 + mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,
1.632 + (int *)NULL);
1.633 + else
1.634 + mid = longest(v, d, begin, mid-1, (int *)NULL);
1.635 + if (mid == NULL) {
1.636 + /* failed to find a new one! */
1.637 + MDEBUG(("failed midpoint!\n"));
1.638 + freedfa(d);
1.639 + freedfa(d2);
1.640 + return REG_ASSERT;
1.641 + }
1.642 + MDEBUG(("new midpoint %ld\n", LOFF(mid)));
1.643 + }
1.644 +
1.645 + /* satisfaction */
1.646 + MDEBUG(("successful\n"));
1.647 + freedfa(d);
1.648 + freedfa(d2);
1.649 + i = dissect(v, t->left, begin, mid);
1.650 + if (i != REG_OKAY)
1.651 + return i;
1.652 + return dissect(v, t->right, mid, end);
1.653 +}
1.654 +
1.655 +/*
1.656 + - altdissect - determine alternative subexpression matches (uncomplicated)
1.657 + ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
1.658 + */
1.659 +static int /* regexec return code */
1.660 +altdissect(v, t, begin, end)
1.661 +struct vars *v;
1.662 +struct subre *t;
1.663 +chr *begin; /* beginning of relevant substring */
1.664 +chr *end; /* end of same */
1.665 +{
1.666 + struct dfa *d;
1.667 + int i;
1.668 +
1.669 + assert(t != NULL);
1.670 + assert(t->op == '|');
1.671 +
1.672 + for (i = 0; t != NULL; t = t->right, i++) {
1.673 + MDEBUG(("trying %dth\n", i));
1.674 + assert(t->left != NULL && t->left->cnfa.nstates > 0);
1.675 + d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
1.676 + if (ISERR())
1.677 + return v->err;
1.678 + if (longest(v, d, begin, end, (int *)NULL) == end) {
1.679 + MDEBUG(("success\n"));
1.680 + freedfa(d);
1.681 + return dissect(v, t->left, begin, end);
1.682 + }
1.683 + freedfa(d);
1.684 + }
1.685 + return REG_ASSERT; /* none of them matched?!? */
1.686 +}
1.687 +
1.688 +/*
1.689 + - cdissect - determine subexpression matches (with complications)
1.690 + * The retry memory stores the offset of the trial midpoint from begin,
1.691 + * plus 1 so that 0 uniquely means "clean slate".
1.692 + ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
1.693 + */
1.694 +static int /* regexec return code */
1.695 +cdissect(v, t, begin, end)
1.696 +struct vars *v;
1.697 +struct subre *t;
1.698 +chr *begin; /* beginning of relevant substring */
1.699 +chr *end; /* end of same */
1.700 +{
1.701 + int er;
1.702 +
1.703 + assert(t != NULL);
1.704 + MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
1.705 +
1.706 + switch (t->op) {
1.707 + case '=': /* terminal node */
1.708 + assert(t->left == NULL && t->right == NULL);
1.709 + return REG_OKAY; /* no action, parent did the work */
1.710 + break;
1.711 + case '|': /* alternation */
1.712 + assert(t->left != NULL);
1.713 + return caltdissect(v, t, begin, end);
1.714 + break;
1.715 + case 'b': /* back ref -- shouldn't be calling us! */
1.716 + assert(t->left == NULL && t->right == NULL);
1.717 + return cbrdissect(v, t, begin, end);
1.718 + break;
1.719 + case '.': /* concatenation */
1.720 + assert(t->left != NULL && t->right != NULL);
1.721 + return ccondissect(v, t, begin, end);
1.722 + break;
1.723 + case '(': /* capturing */
1.724 + assert(t->left != NULL && t->right == NULL);
1.725 + assert(t->subno > 0);
1.726 + er = cdissect(v, t->left, begin, end);
1.727 + if (er == REG_OKAY)
1.728 + subset(v, t, begin, end);
1.729 + return er;
1.730 + break;
1.731 + default:
1.732 + return REG_ASSERT;
1.733 + break;
1.734 + }
1.735 +}
1.736 +
1.737 +/*
1.738 + - ccondissect - concatenation subexpression matches (with complications)
1.739 + * The retry memory stores the offset of the trial midpoint from begin,
1.740 + * plus 1 so that 0 uniquely means "clean slate".
1.741 + ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
1.742 + */
1.743 +static int /* regexec return code */
1.744 +ccondissect(v, t, begin, end)
1.745 +struct vars *v;
1.746 +struct subre *t;
1.747 +chr *begin; /* beginning of relevant substring */
1.748 +chr *end; /* end of same */
1.749 +{
1.750 + struct dfa *d;
1.751 + struct dfa *d2;
1.752 + chr *mid;
1.753 + int er;
1.754 +
1.755 + assert(t->op == '.');
1.756 + assert(t->left != NULL && t->left->cnfa.nstates > 0);
1.757 + assert(t->right != NULL && t->right->cnfa.nstates > 0);
1.758 +
1.759 + if (t->left->flags&SHORTER) /* reverse scan */
1.760 + return crevdissect(v, t, begin, end);
1.761 +
1.762 + d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1.763 + if (ISERR())
1.764 + return v->err;
1.765 + d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
1.766 + if (ISERR()) {
1.767 + freedfa(d);
1.768 + return v->err;
1.769 + }
1.770 + MDEBUG(("cconcat %d\n", t->retry));
1.771 +
1.772 + /* pick a tentative midpoint */
1.773 + if (v->mem[t->retry] == 0) {
1.774 + mid = longest(v, d, begin, end, (int *)NULL);
1.775 + if (mid == NULL) {
1.776 + freedfa(d);
1.777 + freedfa(d2);
1.778 + return REG_NOMATCH;
1.779 + }
1.780 + MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
1.781 + v->mem[t->retry] = (mid - begin) + 1;
1.782 + } else {
1.783 + mid = begin + (v->mem[t->retry] - 1);
1.784 + MDEBUG(("working midpoint %ld\n", LOFF(mid)));
1.785 + }
1.786 +
1.787 + /* iterate until satisfaction or failure */
1.788 + for (;;) {
1.789 + /* try this midpoint on for size */
1.790 + er = cdissect(v, t->left, begin, mid);
1.791 + if (er == REG_OKAY &&
1.792 + longest(v, d2, mid, end, (int *)NULL) == end &&
1.793 + (er = cdissect(v, t->right, mid, end)) ==
1.794 + REG_OKAY)
1.795 + break; /* NOTE BREAK OUT */
1.796 + if (er != REG_OKAY && er != REG_NOMATCH) {
1.797 + freedfa(d);
1.798 + freedfa(d2);
1.799 + return er;
1.800 + }
1.801 +
1.802 + /* that midpoint didn't work, find a new one */
1.803 + if (mid == begin) {
1.804 + /* all possibilities exhausted */
1.805 + MDEBUG(("%d no midpoint\n", t->retry));
1.806 + freedfa(d);
1.807 + freedfa(d2);
1.808 + return REG_NOMATCH;
1.809 + }
1.810 + mid = longest(v, d, begin, mid-1, (int *)NULL);
1.811 + if (mid == NULL) {
1.812 + /* failed to find a new one */
1.813 + MDEBUG(("%d failed midpoint\n", t->retry));
1.814 + freedfa(d);
1.815 + freedfa(d2);
1.816 + return REG_NOMATCH;
1.817 + }
1.818 + MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
1.819 + v->mem[t->retry] = (mid - begin) + 1;
1.820 + zapmem(v, t->left);
1.821 + zapmem(v, t->right);
1.822 + }
1.823 +
1.824 + /* satisfaction */
1.825 + MDEBUG(("successful\n"));
1.826 + freedfa(d);
1.827 + freedfa(d2);
1.828 + return REG_OKAY;
1.829 +}
1.830 +
1.831 +/*
1.832 + - crevdissect - determine backref shortest-first subexpression matches
1.833 + * The retry memory stores the offset of the trial midpoint from begin,
1.834 + * plus 1 so that 0 uniquely means "clean slate".
1.835 + ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
1.836 + */
1.837 +static int /* regexec return code */
1.838 +crevdissect(v, t, begin, end)
1.839 +struct vars *v;
1.840 +struct subre *t;
1.841 +chr *begin; /* beginning of relevant substring */
1.842 +chr *end; /* end of same */
1.843 +{
1.844 + struct dfa *d;
1.845 + struct dfa *d2;
1.846 + chr *mid;
1.847 + int er;
1.848 +
1.849 + assert(t->op == '.');
1.850 + assert(t->left != NULL && t->left->cnfa.nstates > 0);
1.851 + assert(t->right != NULL && t->right->cnfa.nstates > 0);
1.852 + assert(t->left->flags&SHORTER);
1.853 +
1.854 + /* concatenation -- need to split the substring between parts */
1.855 + d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1.856 + if (ISERR())
1.857 + return v->err;
1.858 + d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
1.859 + if (ISERR()) {
1.860 + freedfa(d);
1.861 + return v->err;
1.862 + }
1.863 + MDEBUG(("crev %d\n", t->retry));
1.864 +
1.865 + /* pick a tentative midpoint */
1.866 + if (v->mem[t->retry] == 0) {
1.867 + mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
1.868 + if (mid == NULL) {
1.869 + freedfa(d);
1.870 + freedfa(d2);
1.871 + return REG_NOMATCH;
1.872 + }
1.873 + MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
1.874 + v->mem[t->retry] = (mid - begin) + 1;
1.875 + } else {
1.876 + mid = begin + (v->mem[t->retry] - 1);
1.877 + MDEBUG(("working midpoint %ld\n", LOFF(mid)));
1.878 + }
1.879 +
1.880 + /* iterate until satisfaction or failure */
1.881 + for (;;) {
1.882 + /* try this midpoint on for size */
1.883 + er = cdissect(v, t->left, begin, mid);
1.884 + if (er == REG_OKAY &&
1.885 + longest(v, d2, mid, end, (int *)NULL) == end &&
1.886 + (er = cdissect(v, t->right, mid, end)) ==
1.887 + REG_OKAY)
1.888 + break; /* NOTE BREAK OUT */
1.889 + if (er != REG_OKAY && er != REG_NOMATCH) {
1.890 + freedfa(d);
1.891 + freedfa(d2);
1.892 + return er;
1.893 + }
1.894 +
1.895 + /* that midpoint didn't work, find a new one */
1.896 + if (mid == end) {
1.897 + /* all possibilities exhausted */
1.898 + MDEBUG(("%d no midpoint\n", t->retry));
1.899 + freedfa(d);
1.900 + freedfa(d2);
1.901 + return REG_NOMATCH;
1.902 + }
1.903 + mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
1.904 + if (mid == NULL) {
1.905 + /* failed to find a new one */
1.906 + MDEBUG(("%d failed midpoint\n", t->retry));
1.907 + freedfa(d);
1.908 + freedfa(d2);
1.909 + return REG_NOMATCH;
1.910 + }
1.911 + MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
1.912 + v->mem[t->retry] = (mid - begin) + 1;
1.913 + zapmem(v, t->left);
1.914 + zapmem(v, t->right);
1.915 + }
1.916 +
1.917 + /* satisfaction */
1.918 + MDEBUG(("successful\n"));
1.919 + freedfa(d);
1.920 + freedfa(d2);
1.921 + return REG_OKAY;
1.922 +}
1.923 +
1.924 +/*
1.925 + - cbrdissect - determine backref subexpression matches
1.926 + ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
1.927 + */
1.928 +static int /* regexec return code */
1.929 +cbrdissect(v, t, begin, end)
1.930 +struct vars *v;
1.931 +struct subre *t;
1.932 +chr *begin; /* beginning of relevant substring */
1.933 +chr *end; /* end of same */
1.934 +{
1.935 + int i;
1.936 + int n = t->subno;
1.937 + size_t len;
1.938 + chr *paren;
1.939 + chr *p;
1.940 + chr *stop;
1.941 + int min = t->min;
1.942 + int max = t->max;
1.943 +
1.944 + assert(t != NULL);
1.945 + assert(t->op == 'b');
1.946 + assert(n >= 0);
1.947 + assert((size_t)n < v->nmatch);
1.948 +
1.949 + MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
1.950 +
1.951 + if (v->pmatch[n].rm_so == -1)
1.952 + return REG_NOMATCH;
1.953 + paren = v->start + v->pmatch[n].rm_so;
1.954 + len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1.955 +
1.956 + /* no room to maneuver -- retries are pointless */
1.957 + if (v->mem[t->retry])
1.958 + return REG_NOMATCH;
1.959 + v->mem[t->retry] = 1;
1.960 +
1.961 + /* special-case zero-length string */
1.962 + if (len == 0) {
1.963 + if (begin == end)
1.964 + return REG_OKAY;
1.965 + return REG_NOMATCH;
1.966 + }
1.967 +
1.968 + /* and too-short string */
1.969 + assert(end >= begin);
1.970 + if ((size_t)(end - begin) < len)
1.971 + return REG_NOMATCH;
1.972 + stop = end - len;
1.973 +
1.974 + /* count occurrences */
1.975 + i = 0;
1.976 + for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
1.977 + if ((*v->g->compare)(paren, p, len) != 0)
1.978 + break;
1.979 + i++;
1.980 + }
1.981 + MDEBUG(("cbackref found %d\n", i));
1.982 +
1.983 + /* and sort it out */
1.984 + if (p != end) /* didn't consume all of it */
1.985 + return REG_NOMATCH;
1.986 + if (min <= i && (i <= max || max == INFINITY))
1.987 + return REG_OKAY;
1.988 + return REG_NOMATCH; /* out of range */
1.989 +}
1.990 +
1.991 +/*
1.992 + - caltdissect - determine alternative subexpression matches (w. complications)
1.993 + ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
1.994 + */
1.995 +static int /* regexec return code */
1.996 +caltdissect(v, t, begin, end)
1.997 +struct vars *v;
1.998 +struct subre *t;
1.999 +chr *begin; /* beginning of relevant substring */
1.1000 +chr *end; /* end of same */
1.1001 +{
1.1002 + struct dfa *d;
1.1003 + int er;
1.1004 +# define UNTRIED 0 /* not yet tried at all */
1.1005 +# define TRYING 1 /* top matched, trying submatches */
1.1006 +# define TRIED 2 /* top didn't match or submatches exhausted */
1.1007 +
1.1008 + if (t == NULL)
1.1009 + return REG_NOMATCH;
1.1010 + assert(t->op == '|');
1.1011 + if (v->mem[t->retry] == TRIED)
1.1012 + return caltdissect(v, t->right, begin, end);
1.1013 +
1.1014 + MDEBUG(("calt n%d\n", t->retry));
1.1015 + assert(t->left != NULL);
1.1016 +
1.1017 + if (v->mem[t->retry] == UNTRIED) {
1.1018 + d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1.1019 + if (ISERR())
1.1020 + return v->err;
1.1021 + if (longest(v, d, begin, end, (int *)NULL) != end) {
1.1022 + freedfa(d);
1.1023 + v->mem[t->retry] = TRIED;
1.1024 + return caltdissect(v, t->right, begin, end);
1.1025 + }
1.1026 + freedfa(d);
1.1027 + MDEBUG(("calt matched\n"));
1.1028 + v->mem[t->retry] = TRYING;
1.1029 + }
1.1030 +
1.1031 + er = cdissect(v, t->left, begin, end);
1.1032 + if (er != REG_NOMATCH)
1.1033 + return er;
1.1034 +
1.1035 + v->mem[t->retry] = TRIED;
1.1036 + return caltdissect(v, t->right, begin, end);
1.1037 +}
1.1038 +
1.1039 +
1.1040 +
1.1041 +#include "rege_dfa.c"