sl@0: /* sl@0: * re_*exec and friends - match REs sl@0: * sl@0: * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. sl@0: * sl@0: * Development of this software was funded, in part, by Cray Research Inc., sl@0: * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics sl@0: * Corporation, none of whom are responsible for the results. The author sl@0: * thanks all of them. sl@0: * sl@0: * Redistribution and use in source and binary forms -- with or without sl@0: * modification -- are permitted for any purpose, provided that sl@0: * redistributions in source form retain this entire copyright notice and sl@0: * indicate the origin and nature of any modifications. sl@0: * sl@0: * I'd appreciate being given credit for this package in the documentation sl@0: * of software which uses it, but that is not a requirement. sl@0: * sl@0: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, sl@0: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY sl@0: * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL sl@0: * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, sl@0: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, sl@0: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; sl@0: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, sl@0: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR sl@0: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF sl@0: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sl@0: * sl@0: */ sl@0: sl@0: #include "regguts.h" sl@0: sl@0: sl@0: sl@0: /* lazy-DFA representation */ sl@0: struct arcp { /* "pointer" to an outarc */ sl@0: struct sset *ss; sl@0: color co; sl@0: }; sl@0: sl@0: struct sset { /* state set */ sl@0: unsigned *states; /* pointer to bitvector */ sl@0: unsigned hash; /* hash of bitvector */ sl@0: # define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) sl@0: # define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ sl@0: memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) sl@0: int flags; sl@0: # define STARTER 01 /* the initial state set */ sl@0: # define POSTSTATE 02 /* includes the goal state */ sl@0: # define LOCKED 04 /* locked in cache */ sl@0: # define NOPROGRESS 010 /* zero-progress state set */ sl@0: struct arcp ins; /* chain of inarcs pointing here */ sl@0: chr *lastseen; /* last entered on arrival here */ sl@0: struct sset **outs; /* outarc vector indexed by color */ sl@0: struct arcp *inchain; /* chain-pointer vector for outarcs */ sl@0: }; sl@0: sl@0: struct dfa { sl@0: int nssets; /* size of cache */ sl@0: int nssused; /* how many entries occupied yet */ sl@0: int nstates; /* number of states */ sl@0: int ncolors; /* length of outarc and inchain vectors */ sl@0: int wordsper; /* length of state-set bitvectors */ sl@0: struct sset *ssets; /* state-set cache */ sl@0: unsigned *statesarea; /* bitvector storage */ sl@0: unsigned *work; /* pointer to work area within statesarea */ sl@0: struct sset **outsarea; /* outarc-vector storage */ sl@0: struct arcp *incarea; /* inchain storage */ sl@0: struct cnfa *cnfa; sl@0: struct colormap *cm; sl@0: chr *lastpost; /* location of last cache-flushed success */ sl@0: chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ sl@0: struct sset *search; /* replacement-search-pointer memory */ sl@0: int cptsmalloced; /* were the areas individually malloced? */ sl@0: char *mallocarea; /* self, or master malloced area, or NULL */ sl@0: }; sl@0: sl@0: #define WORK 1 /* number of work bitvectors needed */ sl@0: sl@0: /* setup for non-malloc allocation for small cases */ sl@0: #define FEWSTATES 20 /* must be less than UBITS */ sl@0: #define FEWCOLORS 15 sl@0: struct smalldfa { sl@0: struct dfa dfa; sl@0: struct sset ssets[FEWSTATES*2]; sl@0: unsigned statesarea[FEWSTATES*2 + WORK]; sl@0: struct sset *outsarea[FEWSTATES*2 * FEWCOLORS]; sl@0: struct arcp incarea[FEWSTATES*2 * FEWCOLORS]; sl@0: }; sl@0: #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ sl@0: sl@0: sl@0: sl@0: /* internal variables, bundled for easy passing around */ sl@0: struct vars { sl@0: regex_t *re; sl@0: struct guts *g; sl@0: int eflags; /* copies of arguments */ sl@0: size_t nmatch; sl@0: regmatch_t *pmatch; sl@0: rm_detail_t *details; sl@0: chr *start; /* start of string */ sl@0: chr *stop; /* just past end of string */ sl@0: int err; /* error code if any (0 none) */ sl@0: regoff_t *mem; /* memory vector for backtracking */ sl@0: struct smalldfa dfa1; sl@0: struct smalldfa dfa2; sl@0: }; sl@0: #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ sl@0: #define ISERR() VISERR(v) sl@0: #define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) sl@0: #define ERR(e) VERR(v, e) /* record an error */ sl@0: #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ sl@0: #define OFF(p) ((p) - v->start) sl@0: #define LOFF(p) ((long)OFF(p)) sl@0: sl@0: sl@0: sl@0: /* sl@0: * forward declarations sl@0: */ sl@0: /* =====^!^===== begin forwards =====^!^===== */ sl@0: /* automatically gathered by fwd; do not hand-edit */ sl@0: /* === regexec.c === */ sl@0: int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); sl@0: static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); sl@0: static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); sl@0: static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)); sl@0: static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t)); sl@0: static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *)); sl@0: static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); sl@0: /* === rege_dfa.c === */ sl@0: static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *)); sl@0: static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)); sl@0: static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *)); sl@0: static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)); sl@0: static VOID freedfa _ANSI_ARGS_((struct dfa *)); sl@0: static unsigned hash _ANSI_ARGS_((unsigned *, int)); sl@0: static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *)); sl@0: static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *)); sl@0: static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor)); sl@0: static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); sl@0: static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); sl@0: /* automatically gathered by fwd; do not hand-edit */ sl@0: /* =====^!^===== end forwards =====^!^===== */ sl@0: sl@0: sl@0: sl@0: /* sl@0: - exec - match regular expression sl@0: ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, sl@0: ^ size_t, regmatch_t [], int); sl@0: */ sl@0: int sl@0: exec(re, string, len, details, nmatch, pmatch, flags) sl@0: regex_t *re; sl@0: CONST chr *string; sl@0: size_t len; sl@0: rm_detail_t *details; sl@0: size_t nmatch; sl@0: regmatch_t pmatch[]; sl@0: int flags; sl@0: { sl@0: struct vars var; sl@0: register struct vars *v = &var; sl@0: int st; sl@0: size_t n; sl@0: int backref; sl@0: # define LOCALMAT 20 sl@0: regmatch_t mat[LOCALMAT]; sl@0: # define LOCALMEM 40 sl@0: regoff_t mem[LOCALMEM]; sl@0: sl@0: /* sanity checks */ sl@0: if (re == NULL || string == NULL || re->re_magic != REMAGIC) sl@0: return REG_INVARG; sl@0: if (re->re_csize != sizeof(chr)) sl@0: return REG_MIXED; sl@0: sl@0: /* setup */ sl@0: v->re = re; sl@0: v->g = (struct guts *)re->re_guts; sl@0: if ((v->g->cflags®_EXPECT) && details == NULL) sl@0: return REG_INVARG; sl@0: if (v->g->info®_UIMPOSSIBLE) sl@0: return REG_NOMATCH; sl@0: backref = (v->g->info®_UBACKREF) ? 1 : 0; sl@0: v->eflags = flags; sl@0: if (v->g->cflags®_NOSUB) sl@0: nmatch = 0; /* override client */ sl@0: v->nmatch = nmatch; sl@0: if (backref) { sl@0: /* need work area */ sl@0: if (v->g->nsub + 1 <= LOCALMAT) sl@0: v->pmatch = mat; sl@0: else sl@0: v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) * sl@0: sizeof(regmatch_t)); sl@0: if (v->pmatch == NULL) sl@0: return REG_ESPACE; sl@0: v->nmatch = v->g->nsub + 1; sl@0: } else sl@0: v->pmatch = pmatch; sl@0: v->details = details; sl@0: v->start = (chr *)string; sl@0: v->stop = (chr *)string + len; sl@0: v->err = 0; sl@0: if (backref) { sl@0: /* need retry memory */ sl@0: assert(v->g->ntree >= 0); sl@0: n = (size_t)v->g->ntree; sl@0: if (n <= LOCALMEM) sl@0: v->mem = mem; sl@0: else sl@0: v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t)); sl@0: if (v->mem == NULL) { sl@0: if (v->pmatch != pmatch && v->pmatch != mat) sl@0: FREE(v->pmatch); sl@0: return REG_ESPACE; sl@0: } sl@0: } else sl@0: v->mem = NULL; sl@0: sl@0: /* do it */ sl@0: assert(v->g->tree != NULL); sl@0: if (backref) sl@0: st = cfind(v, &v->g->tree->cnfa, &v->g->cmap); sl@0: else sl@0: st = find(v, &v->g->tree->cnfa, &v->g->cmap); sl@0: sl@0: /* copy (portion of) match vector over if necessary */ sl@0: if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { sl@0: zapsubs(pmatch, nmatch); sl@0: n = (nmatch < v->nmatch) ? nmatch : v->nmatch; sl@0: memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); sl@0: } sl@0: sl@0: /* clean up */ sl@0: if (v->pmatch != pmatch && v->pmatch != mat) sl@0: FREE(v->pmatch); sl@0: if (v->mem != NULL && v->mem != mem) sl@0: FREE(v->mem); sl@0: return st; sl@0: } sl@0: sl@0: /* sl@0: - find - find a match for the main NFA (no-complications case) sl@0: ^ static int find(struct vars *, struct cnfa *, struct colormap *); sl@0: */ sl@0: static int sl@0: find(v, cnfa, cm) sl@0: struct vars *v; sl@0: struct cnfa *cnfa; sl@0: struct colormap *cm; sl@0: { sl@0: struct dfa *s; sl@0: struct dfa *d; sl@0: chr *begin; sl@0: chr *end = NULL; sl@0: chr *cold; sl@0: chr *open; /* open and close of range of possible starts */ sl@0: chr *close; sl@0: int hitend; sl@0: int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; sl@0: sl@0: /* first, a shot with the search RE */ sl@0: s = newdfa(v, &v->g->search, cm, &v->dfa1); sl@0: assert(!(ISERR() && s != NULL)); sl@0: NOERR(); sl@0: MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); sl@0: cold = NULL; sl@0: close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL); sl@0: freedfa(s); sl@0: NOERR(); sl@0: if (v->g->cflags®_EXPECT) { sl@0: assert(v->details != NULL); sl@0: if (cold != NULL) sl@0: v->details->rm_extend.rm_so = OFF(cold); sl@0: else sl@0: v->details->rm_extend.rm_so = OFF(v->stop); sl@0: v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ sl@0: } sl@0: if (close == NULL) /* not found */ sl@0: return REG_NOMATCH; sl@0: if (v->nmatch == 0) /* found, don't need exact location */ sl@0: return REG_OKAY; sl@0: sl@0: /* find starting point and match */ sl@0: assert(cold != NULL); sl@0: open = cold; sl@0: cold = NULL; sl@0: MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); sl@0: d = newdfa(v, cnfa, cm, &v->dfa1); sl@0: assert(!(ISERR() && d != NULL)); sl@0: NOERR(); sl@0: for (begin = open; begin <= close; begin++) { sl@0: MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); sl@0: if (shorter) sl@0: end = shortest(v, d, begin, begin, v->stop, sl@0: (chr **)NULL, &hitend); sl@0: else sl@0: end = longest(v, d, begin, v->stop, &hitend); sl@0: NOERR(); sl@0: if (hitend && cold == NULL) sl@0: cold = begin; sl@0: if (end != NULL) sl@0: break; /* NOTE BREAK OUT */ sl@0: } sl@0: assert(end != NULL); /* search RE succeeded so loop should */ sl@0: freedfa(d); sl@0: sl@0: /* and pin down details */ sl@0: assert(v->nmatch > 0); sl@0: v->pmatch[0].rm_so = OFF(begin); sl@0: v->pmatch[0].rm_eo = OFF(end); sl@0: if (v->g->cflags®_EXPECT) { sl@0: if (cold != NULL) sl@0: v->details->rm_extend.rm_so = OFF(cold); sl@0: else sl@0: v->details->rm_extend.rm_so = OFF(v->stop); sl@0: v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ sl@0: } sl@0: if (v->nmatch == 1) /* no need for submatches */ sl@0: return REG_OKAY; sl@0: sl@0: /* submatches */ sl@0: zapsubs(v->pmatch, v->nmatch); sl@0: return dissect(v, v->g->tree, begin, end); sl@0: } sl@0: sl@0: /* sl@0: - cfind - find a match for the main NFA (with complications) sl@0: ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); sl@0: */ sl@0: static int sl@0: cfind(v, cnfa, cm) sl@0: struct vars *v; sl@0: struct cnfa *cnfa; sl@0: struct colormap *cm; sl@0: { sl@0: struct dfa *s; sl@0: struct dfa *d; sl@0: chr *cold = NULL; /* silence gcc 4 warning */ sl@0: int ret; sl@0: sl@0: s = newdfa(v, &v->g->search, cm, &v->dfa1); sl@0: NOERR(); sl@0: d = newdfa(v, cnfa, cm, &v->dfa2); sl@0: if (ISERR()) { sl@0: assert(d == NULL); sl@0: freedfa(s); sl@0: return v->err; sl@0: } sl@0: sl@0: ret = cfindloop(v, cnfa, cm, d, s, &cold); sl@0: sl@0: freedfa(d); sl@0: freedfa(s); sl@0: NOERR(); sl@0: if (v->g->cflags®_EXPECT) { sl@0: assert(v->details != NULL); sl@0: if (cold != NULL) sl@0: v->details->rm_extend.rm_so = OFF(cold); sl@0: else sl@0: v->details->rm_extend.rm_so = OFF(v->stop); sl@0: v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ sl@0: } sl@0: return ret; sl@0: } sl@0: sl@0: /* sl@0: - cfindloop - the heart of cfind sl@0: ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *, sl@0: ^ struct dfa *, struct dfa *, chr **); sl@0: */ sl@0: static int sl@0: cfindloop(v, cnfa, cm, d, s, coldp) sl@0: struct vars *v; sl@0: struct cnfa *cnfa; sl@0: struct colormap *cm; sl@0: struct dfa *d; sl@0: struct dfa *s; sl@0: chr **coldp; /* where to put coldstart pointer */ sl@0: { sl@0: chr *begin; sl@0: chr *end; sl@0: chr *cold; sl@0: chr *open; /* open and close of range of possible starts */ sl@0: chr *close; sl@0: chr *estart; sl@0: chr *estop; sl@0: int er; sl@0: int shorter = v->g->tree->flags&SHORTER; sl@0: int hitend; sl@0: sl@0: assert(d != NULL && s != NULL); sl@0: cold = NULL; sl@0: close = v->start; sl@0: do { sl@0: MDEBUG(("\ncsearch at %ld\n", LOFF(close))); sl@0: close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL); sl@0: if (close == NULL) sl@0: break; /* NOTE BREAK */ sl@0: assert(cold != NULL); sl@0: open = cold; sl@0: cold = NULL; sl@0: MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); sl@0: for (begin = open; begin <= close; begin++) { sl@0: MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); sl@0: estart = begin; sl@0: estop = v->stop; sl@0: for (;;) { sl@0: if (shorter) sl@0: end = shortest(v, d, begin, estart, sl@0: estop, (chr **)NULL, &hitend); sl@0: else sl@0: end = longest(v, d, begin, estop, sl@0: &hitend); sl@0: if (hitend && cold == NULL) sl@0: cold = begin; sl@0: if (end == NULL) sl@0: break; /* NOTE BREAK OUT */ sl@0: MDEBUG(("tentative end %ld\n", LOFF(end))); sl@0: zapsubs(v->pmatch, v->nmatch); sl@0: zapmem(v, v->g->tree); sl@0: er = cdissect(v, v->g->tree, begin, end); sl@0: if (er == REG_OKAY) { sl@0: if (v->nmatch > 0) { sl@0: v->pmatch[0].rm_so = OFF(begin); sl@0: v->pmatch[0].rm_eo = OFF(end); sl@0: } sl@0: *coldp = cold; sl@0: return REG_OKAY; sl@0: } sl@0: if (er != REG_NOMATCH) { sl@0: ERR(er); sl@0: return er; sl@0: } sl@0: if ((shorter) ? end == estop : end == begin) { sl@0: /* no point in trying again */ sl@0: *coldp = cold; sl@0: return REG_NOMATCH; sl@0: } sl@0: /* go around and try again */ sl@0: if (shorter) sl@0: estart = end + 1; sl@0: else sl@0: estop = end - 1; sl@0: } sl@0: } sl@0: } while (close < v->stop); sl@0: sl@0: *coldp = cold; sl@0: return REG_NOMATCH; sl@0: } sl@0: sl@0: /* sl@0: - zapsubs - initialize the subexpression matches to "no match" sl@0: ^ static VOID zapsubs(regmatch_t *, size_t); sl@0: */ sl@0: static VOID sl@0: zapsubs(p, n) sl@0: regmatch_t *p; sl@0: size_t n; sl@0: { sl@0: size_t i; sl@0: sl@0: for (i = n-1; i > 0; i--) { sl@0: p[i].rm_so = -1; sl@0: p[i].rm_eo = -1; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: - zapmem - initialize the retry memory of a subtree to zeros sl@0: ^ static VOID zapmem(struct vars *, struct subre *); sl@0: */ sl@0: static VOID sl@0: zapmem(v, t) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: { sl@0: if (t == NULL) sl@0: return; sl@0: sl@0: assert(v->mem != NULL); sl@0: v->mem[t->retry] = 0; sl@0: if (t->op == '(') { sl@0: assert(t->subno > 0); sl@0: v->pmatch[t->subno].rm_so = -1; sl@0: v->pmatch[t->subno].rm_eo = -1; sl@0: } sl@0: sl@0: if (t->left != NULL) sl@0: zapmem(v, t->left); sl@0: if (t->right != NULL) sl@0: zapmem(v, t->right); sl@0: } sl@0: sl@0: /* sl@0: - subset - set any subexpression relevant to a successful subre sl@0: ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static VOID sl@0: subset(v, sub, begin, end) sl@0: struct vars *v; sl@0: struct subre *sub; sl@0: chr *begin; sl@0: chr *end; sl@0: { sl@0: int n = sub->subno; sl@0: sl@0: assert(n > 0); sl@0: if ((size_t)n >= v->nmatch) sl@0: return; sl@0: sl@0: MDEBUG(("setting %d\n", n)); sl@0: v->pmatch[n].rm_so = OFF(begin); sl@0: v->pmatch[n].rm_eo = OFF(end); sl@0: } sl@0: sl@0: /* sl@0: - dissect - determine subexpression matches (uncomplicated case) sl@0: ^ static int dissect(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static int /* regexec return code */ sl@0: dissect(v, t, begin, end) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: chr *begin; /* beginning of relevant substring */ sl@0: chr *end; /* end of same */ sl@0: { sl@0: assert(t != NULL); sl@0: MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); sl@0: sl@0: switch (t->op) { sl@0: case '=': /* terminal node */ sl@0: assert(t->left == NULL && t->right == NULL); sl@0: return REG_OKAY; /* no action, parent did the work */ sl@0: break; sl@0: case '|': /* alternation */ sl@0: assert(t->left != NULL); sl@0: return altdissect(v, t, begin, end); sl@0: break; sl@0: case 'b': /* back ref -- shouldn't be calling us! */ sl@0: return REG_ASSERT; sl@0: break; sl@0: case '.': /* concatenation */ sl@0: assert(t->left != NULL && t->right != NULL); sl@0: return condissect(v, t, begin, end); sl@0: break; sl@0: case '(': /* capturing */ sl@0: assert(t->left != NULL && t->right == NULL); sl@0: assert(t->subno > 0); sl@0: subset(v, t, begin, end); sl@0: return dissect(v, t->left, begin, end); sl@0: break; sl@0: default: sl@0: return REG_ASSERT; sl@0: break; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: - condissect - determine concatenation subexpression matches (uncomplicated) sl@0: ^ static int condissect(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static int /* regexec return code */ sl@0: condissect(v, t, begin, end) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: chr *begin; /* beginning of relevant substring */ sl@0: chr *end; /* end of same */ sl@0: { sl@0: struct dfa *d; sl@0: struct dfa *d2; sl@0: chr *mid; sl@0: int i; sl@0: int shorter = (t->left->flags&SHORTER) ? 1 : 0; sl@0: chr *stop = (shorter) ? end : begin; sl@0: sl@0: assert(t->op == '.'); sl@0: assert(t->left != NULL && t->left->cnfa.nstates > 0); sl@0: assert(t->right != NULL && t->right->cnfa.nstates > 0); sl@0: sl@0: d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); sl@0: NOERR(); sl@0: d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); sl@0: if (ISERR()) { sl@0: assert(d2 == NULL); sl@0: freedfa(d); sl@0: return v->err; sl@0: } sl@0: sl@0: /* pick a tentative midpoint */ sl@0: if (shorter) sl@0: mid = shortest(v, d, begin, begin, end, (chr **)NULL, sl@0: (int *)NULL); sl@0: else sl@0: mid = longest(v, d, begin, end, (int *)NULL); sl@0: if (mid == NULL) { sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_ASSERT; sl@0: } sl@0: MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); sl@0: sl@0: /* iterate until satisfaction or failure */ sl@0: while (longest(v, d2, mid, end, (int *)NULL) != end) { sl@0: /* that midpoint didn't work, find a new one */ sl@0: if (mid == stop) { sl@0: /* all possibilities exhausted! */ sl@0: MDEBUG(("no midpoint!\n")); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_ASSERT; sl@0: } sl@0: if (shorter) sl@0: mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, sl@0: (int *)NULL); sl@0: else sl@0: mid = longest(v, d, begin, mid-1, (int *)NULL); sl@0: if (mid == NULL) { sl@0: /* failed to find a new one! */ sl@0: MDEBUG(("failed midpoint!\n")); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_ASSERT; sl@0: } sl@0: MDEBUG(("new midpoint %ld\n", LOFF(mid))); sl@0: } sl@0: sl@0: /* satisfaction */ sl@0: MDEBUG(("successful\n")); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: i = dissect(v, t->left, begin, mid); sl@0: if (i != REG_OKAY) sl@0: return i; sl@0: return dissect(v, t->right, mid, end); sl@0: } sl@0: sl@0: /* sl@0: - altdissect - determine alternative subexpression matches (uncomplicated) sl@0: ^ static int altdissect(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static int /* regexec return code */ sl@0: altdissect(v, t, begin, end) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: chr *begin; /* beginning of relevant substring */ sl@0: chr *end; /* end of same */ sl@0: { sl@0: struct dfa *d; sl@0: int i; sl@0: sl@0: assert(t != NULL); sl@0: assert(t->op == '|'); sl@0: sl@0: for (i = 0; t != NULL; t = t->right, i++) { sl@0: MDEBUG(("trying %dth\n", i)); sl@0: assert(t->left != NULL && t->left->cnfa.nstates > 0); sl@0: d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); sl@0: if (ISERR()) sl@0: return v->err; sl@0: if (longest(v, d, begin, end, (int *)NULL) == end) { sl@0: MDEBUG(("success\n")); sl@0: freedfa(d); sl@0: return dissect(v, t->left, begin, end); sl@0: } sl@0: freedfa(d); sl@0: } sl@0: return REG_ASSERT; /* none of them matched?!? */ sl@0: } sl@0: sl@0: /* sl@0: - cdissect - determine subexpression matches (with complications) sl@0: * The retry memory stores the offset of the trial midpoint from begin, sl@0: * plus 1 so that 0 uniquely means "clean slate". sl@0: ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static int /* regexec return code */ sl@0: cdissect(v, t, begin, end) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: chr *begin; /* beginning of relevant substring */ sl@0: chr *end; /* end of same */ sl@0: { sl@0: int er; sl@0: sl@0: assert(t != NULL); sl@0: MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); sl@0: sl@0: switch (t->op) { sl@0: case '=': /* terminal node */ sl@0: assert(t->left == NULL && t->right == NULL); sl@0: return REG_OKAY; /* no action, parent did the work */ sl@0: break; sl@0: case '|': /* alternation */ sl@0: assert(t->left != NULL); sl@0: return caltdissect(v, t, begin, end); sl@0: break; sl@0: case 'b': /* back ref -- shouldn't be calling us! */ sl@0: assert(t->left == NULL && t->right == NULL); sl@0: return cbrdissect(v, t, begin, end); sl@0: break; sl@0: case '.': /* concatenation */ sl@0: assert(t->left != NULL && t->right != NULL); sl@0: return ccondissect(v, t, begin, end); sl@0: break; sl@0: case '(': /* capturing */ sl@0: assert(t->left != NULL && t->right == NULL); sl@0: assert(t->subno > 0); sl@0: er = cdissect(v, t->left, begin, end); sl@0: if (er == REG_OKAY) sl@0: subset(v, t, begin, end); sl@0: return er; sl@0: break; sl@0: default: sl@0: return REG_ASSERT; sl@0: break; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: - ccondissect - concatenation subexpression matches (with complications) sl@0: * The retry memory stores the offset of the trial midpoint from begin, sl@0: * plus 1 so that 0 uniquely means "clean slate". sl@0: ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static int /* regexec return code */ sl@0: ccondissect(v, t, begin, end) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: chr *begin; /* beginning of relevant substring */ sl@0: chr *end; /* end of same */ sl@0: { sl@0: struct dfa *d; sl@0: struct dfa *d2; sl@0: chr *mid; sl@0: int er; sl@0: sl@0: assert(t->op == '.'); sl@0: assert(t->left != NULL && t->left->cnfa.nstates > 0); sl@0: assert(t->right != NULL && t->right->cnfa.nstates > 0); sl@0: sl@0: if (t->left->flags&SHORTER) /* reverse scan */ sl@0: return crevdissect(v, t, begin, end); sl@0: sl@0: d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); sl@0: if (ISERR()) sl@0: return v->err; sl@0: d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); sl@0: if (ISERR()) { sl@0: freedfa(d); sl@0: return v->err; sl@0: } sl@0: MDEBUG(("cconcat %d\n", t->retry)); sl@0: sl@0: /* pick a tentative midpoint */ sl@0: if (v->mem[t->retry] == 0) { sl@0: mid = longest(v, d, begin, end, (int *)NULL); sl@0: if (mid == NULL) { sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_NOMATCH; sl@0: } sl@0: MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); sl@0: v->mem[t->retry] = (mid - begin) + 1; sl@0: } else { sl@0: mid = begin + (v->mem[t->retry] - 1); sl@0: MDEBUG(("working midpoint %ld\n", LOFF(mid))); sl@0: } sl@0: sl@0: /* iterate until satisfaction or failure */ sl@0: for (;;) { sl@0: /* try this midpoint on for size */ sl@0: er = cdissect(v, t->left, begin, mid); sl@0: if (er == REG_OKAY && sl@0: longest(v, d2, mid, end, (int *)NULL) == end && sl@0: (er = cdissect(v, t->right, mid, end)) == sl@0: REG_OKAY) sl@0: break; /* NOTE BREAK OUT */ sl@0: if (er != REG_OKAY && er != REG_NOMATCH) { sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return er; sl@0: } sl@0: sl@0: /* that midpoint didn't work, find a new one */ sl@0: if (mid == begin) { sl@0: /* all possibilities exhausted */ sl@0: MDEBUG(("%d no midpoint\n", t->retry)); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_NOMATCH; sl@0: } sl@0: mid = longest(v, d, begin, mid-1, (int *)NULL); sl@0: if (mid == NULL) { sl@0: /* failed to find a new one */ sl@0: MDEBUG(("%d failed midpoint\n", t->retry)); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_NOMATCH; sl@0: } sl@0: MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); sl@0: v->mem[t->retry] = (mid - begin) + 1; sl@0: zapmem(v, t->left); sl@0: zapmem(v, t->right); sl@0: } sl@0: sl@0: /* satisfaction */ sl@0: MDEBUG(("successful\n")); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_OKAY; sl@0: } sl@0: sl@0: /* sl@0: - crevdissect - determine backref shortest-first subexpression matches sl@0: * The retry memory stores the offset of the trial midpoint from begin, sl@0: * plus 1 so that 0 uniquely means "clean slate". sl@0: ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static int /* regexec return code */ sl@0: crevdissect(v, t, begin, end) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: chr *begin; /* beginning of relevant substring */ sl@0: chr *end; /* end of same */ sl@0: { sl@0: struct dfa *d; sl@0: struct dfa *d2; sl@0: chr *mid; sl@0: int er; sl@0: sl@0: assert(t->op == '.'); sl@0: assert(t->left != NULL && t->left->cnfa.nstates > 0); sl@0: assert(t->right != NULL && t->right->cnfa.nstates > 0); sl@0: assert(t->left->flags&SHORTER); sl@0: sl@0: /* concatenation -- need to split the substring between parts */ sl@0: d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); sl@0: if (ISERR()) sl@0: return v->err; sl@0: d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); sl@0: if (ISERR()) { sl@0: freedfa(d); sl@0: return v->err; sl@0: } sl@0: MDEBUG(("crev %d\n", t->retry)); sl@0: sl@0: /* pick a tentative midpoint */ sl@0: if (v->mem[t->retry] == 0) { sl@0: mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL); sl@0: if (mid == NULL) { sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_NOMATCH; sl@0: } sl@0: MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); sl@0: v->mem[t->retry] = (mid - begin) + 1; sl@0: } else { sl@0: mid = begin + (v->mem[t->retry] - 1); sl@0: MDEBUG(("working midpoint %ld\n", LOFF(mid))); sl@0: } sl@0: sl@0: /* iterate until satisfaction or failure */ sl@0: for (;;) { sl@0: /* try this midpoint on for size */ sl@0: er = cdissect(v, t->left, begin, mid); sl@0: if (er == REG_OKAY && sl@0: longest(v, d2, mid, end, (int *)NULL) == end && sl@0: (er = cdissect(v, t->right, mid, end)) == sl@0: REG_OKAY) sl@0: break; /* NOTE BREAK OUT */ sl@0: if (er != REG_OKAY && er != REG_NOMATCH) { sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return er; sl@0: } sl@0: sl@0: /* that midpoint didn't work, find a new one */ sl@0: if (mid == end) { sl@0: /* all possibilities exhausted */ sl@0: MDEBUG(("%d no midpoint\n", t->retry)); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_NOMATCH; sl@0: } sl@0: mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL); sl@0: if (mid == NULL) { sl@0: /* failed to find a new one */ sl@0: MDEBUG(("%d failed midpoint\n", t->retry)); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_NOMATCH; sl@0: } sl@0: MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); sl@0: v->mem[t->retry] = (mid - begin) + 1; sl@0: zapmem(v, t->left); sl@0: zapmem(v, t->right); sl@0: } sl@0: sl@0: /* satisfaction */ sl@0: MDEBUG(("successful\n")); sl@0: freedfa(d); sl@0: freedfa(d2); sl@0: return REG_OKAY; sl@0: } sl@0: sl@0: /* sl@0: - cbrdissect - determine backref subexpression matches sl@0: ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static int /* regexec return code */ sl@0: cbrdissect(v, t, begin, end) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: chr *begin; /* beginning of relevant substring */ sl@0: chr *end; /* end of same */ sl@0: { sl@0: int i; sl@0: int n = t->subno; sl@0: size_t len; sl@0: chr *paren; sl@0: chr *p; sl@0: chr *stop; sl@0: int min = t->min; sl@0: int max = t->max; sl@0: sl@0: assert(t != NULL); sl@0: assert(t->op == 'b'); sl@0: assert(n >= 0); sl@0: assert((size_t)n < v->nmatch); sl@0: sl@0: MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); sl@0: sl@0: if (v->pmatch[n].rm_so == -1) sl@0: return REG_NOMATCH; sl@0: paren = v->start + v->pmatch[n].rm_so; sl@0: len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; sl@0: sl@0: /* no room to maneuver -- retries are pointless */ sl@0: if (v->mem[t->retry]) sl@0: return REG_NOMATCH; sl@0: v->mem[t->retry] = 1; sl@0: sl@0: /* special-case zero-length string */ sl@0: if (len == 0) { sl@0: if (begin == end) sl@0: return REG_OKAY; sl@0: return REG_NOMATCH; sl@0: } sl@0: sl@0: /* and too-short string */ sl@0: assert(end >= begin); sl@0: if ((size_t)(end - begin) < len) sl@0: return REG_NOMATCH; sl@0: stop = end - len; sl@0: sl@0: /* count occurrences */ sl@0: i = 0; sl@0: for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { sl@0: if ((*v->g->compare)(paren, p, len) != 0) sl@0: break; sl@0: i++; sl@0: } sl@0: MDEBUG(("cbackref found %d\n", i)); sl@0: sl@0: /* and sort it out */ sl@0: if (p != end) /* didn't consume all of it */ sl@0: return REG_NOMATCH; sl@0: if (min <= i && (i <= max || max == INFINITY)) sl@0: return REG_OKAY; sl@0: return REG_NOMATCH; /* out of range */ sl@0: } sl@0: sl@0: /* sl@0: - caltdissect - determine alternative subexpression matches (w. complications) sl@0: ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); sl@0: */ sl@0: static int /* regexec return code */ sl@0: caltdissect(v, t, begin, end) sl@0: struct vars *v; sl@0: struct subre *t; sl@0: chr *begin; /* beginning of relevant substring */ sl@0: chr *end; /* end of same */ sl@0: { sl@0: struct dfa *d; sl@0: int er; sl@0: # define UNTRIED 0 /* not yet tried at all */ sl@0: # define TRYING 1 /* top matched, trying submatches */ sl@0: # define TRIED 2 /* top didn't match or submatches exhausted */ sl@0: sl@0: if (t == NULL) sl@0: return REG_NOMATCH; sl@0: assert(t->op == '|'); sl@0: if (v->mem[t->retry] == TRIED) sl@0: return caltdissect(v, t->right, begin, end); sl@0: sl@0: MDEBUG(("calt n%d\n", t->retry)); sl@0: assert(t->left != NULL); sl@0: sl@0: if (v->mem[t->retry] == UNTRIED) { sl@0: d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); sl@0: if (ISERR()) sl@0: return v->err; sl@0: if (longest(v, d, begin, end, (int *)NULL) != end) { sl@0: freedfa(d); sl@0: v->mem[t->retry] = TRIED; sl@0: return caltdissect(v, t->right, begin, end); sl@0: } sl@0: freedfa(d); sl@0: MDEBUG(("calt matched\n")); sl@0: v->mem[t->retry] = TRYING; sl@0: } sl@0: sl@0: er = cdissect(v, t->left, begin, end); sl@0: if (er != REG_NOMATCH) sl@0: return er; sl@0: sl@0: v->mem[t->retry] = TRIED; sl@0: return caltdissect(v, t->right, begin, end); sl@0: } sl@0: sl@0: sl@0: sl@0: #include "rege_dfa.c"