sl@0: /* sl@0: * Internal interface definitions, etc., for the reg package sl@0: * sl@0: * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. sl@0: * sl@0: * Development of this software was funded, in part, by Cray Research Inc., sl@0: * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics sl@0: * Corporation, none of whom are responsible for the results. The author sl@0: * thanks all of them. sl@0: * sl@0: * Redistribution and use in source and binary forms -- with or without sl@0: * modification -- are permitted for any purpose, provided that sl@0: * redistributions in source form retain this entire copyright notice and sl@0: * indicate the origin and nature of any modifications. sl@0: * sl@0: * I'd appreciate being given credit for this package in the documentation sl@0: * of software which uses it, but that is not a requirement. sl@0: * sl@0: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, sl@0: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY sl@0: * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL sl@0: * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, sl@0: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, sl@0: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; sl@0: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, sl@0: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR sl@0: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF sl@0: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sl@0: */ sl@0: sl@0: sl@0: sl@0: /* sl@0: * Environmental customization. It should not (I hope) be necessary to sl@0: * alter the file you are now reading -- regcustom.h should handle it all, sl@0: * given care here and elsewhere. sl@0: */ sl@0: #include "regcustom.h" sl@0: sl@0: sl@0: sl@0: /* sl@0: * Things that regcustom.h might override. sl@0: */ sl@0: sl@0: /* standard header files (NULL is a reasonable indicator for them) */ sl@0: #ifndef NULL sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #endif sl@0: sl@0: /* assertions */ sl@0: #ifndef assert sl@0: # ifndef REG_DEBUG sl@0: # ifndef NDEBUG sl@0: # define NDEBUG /* no assertions */ sl@0: # endif sl@0: # endif sl@0: #include sl@0: #endif sl@0: sl@0: /* voids */ sl@0: #ifndef VOID sl@0: #define VOID void /* for function return values */ sl@0: #endif sl@0: #ifndef DISCARD sl@0: #define DISCARD VOID /* for throwing values away */ sl@0: #endif sl@0: #ifndef PVOID sl@0: #define PVOID VOID * /* generic pointer */ sl@0: #endif sl@0: #ifndef VS sl@0: #define VS(x) ((PVOID)(x)) /* cast something to generic ptr */ sl@0: #endif sl@0: #ifndef NOPARMS sl@0: #define NOPARMS VOID /* for empty parm lists */ sl@0: #endif sl@0: sl@0: /* const */ sl@0: #ifndef CONST sl@0: #define CONST const /* for old compilers, might be empty */ sl@0: #endif sl@0: sl@0: /* function-pointer declarator */ sl@0: #ifndef FUNCPTR sl@0: #if __STDC__ >= 1 sl@0: #define FUNCPTR(name, args) (*name)args sl@0: #else sl@0: #define FUNCPTR(name, args) (*name)() sl@0: #endif sl@0: #endif sl@0: sl@0: /* memory allocation */ sl@0: #ifndef MALLOC sl@0: #define MALLOC(n) malloc(n) sl@0: #endif sl@0: #ifndef REALLOC sl@0: #define REALLOC(p, n) realloc(VS(p), n) sl@0: #endif sl@0: #ifndef FREE sl@0: #define FREE(p) free(VS(p)) sl@0: #endif sl@0: sl@0: /* want size of a char in bits, and max value in bounded quantifiers */ sl@0: #ifndef CHAR_BIT sl@0: #include sl@0: #endif sl@0: #ifndef _POSIX2_RE_DUP_MAX sl@0: #define _POSIX2_RE_DUP_MAX 255 /* normally from */ sl@0: #endif sl@0: sl@0: sl@0: sl@0: /* sl@0: * misc sl@0: */ sl@0: sl@0: #define NOTREACHED 0 sl@0: #define xxx 1 sl@0: sl@0: #define DUPMAX _POSIX2_RE_DUP_MAX sl@0: #define INFINITY (DUPMAX+1) sl@0: sl@0: #define REMAGIC 0xfed7 /* magic number for main struct */ sl@0: sl@0: sl@0: sl@0: /* sl@0: * debugging facilities sl@0: */ sl@0: #ifdef REG_DEBUG sl@0: /* FDEBUG does finite-state tracing */ sl@0: #define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } sl@0: /* MDEBUG does higher-level tracing */ sl@0: #define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } sl@0: #else sl@0: #define FDEBUG(arglist) {} sl@0: #define MDEBUG(arglist) {} sl@0: #endif sl@0: sl@0: sl@0: sl@0: /* sl@0: * bitmap manipulation sl@0: */ sl@0: #define UBITS (CHAR_BIT * sizeof(unsigned)) sl@0: #define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) sl@0: #define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) sl@0: sl@0: sl@0: sl@0: /* sl@0: * We dissect a chr into byts for colormap table indexing. Here we define sl@0: * a byt, which will be the same as a byte on most machines... The exact sl@0: * size of a byt is not critical, but about 8 bits is good, and extraction sl@0: * of 8-bit chunks is sometimes especially fast. sl@0: */ sl@0: #ifndef BYTBITS sl@0: #define BYTBITS 8 /* bits in a byt */ sl@0: #endif sl@0: #define BYTTAB (1<flags&FREECOL) sl@0: union tree *block; /* block of solid color, if any */ sl@0: }; sl@0: sl@0: /* the color map itself */ sl@0: struct colormap { sl@0: int magic; sl@0: # define CMMAGIC 0x876 sl@0: struct vars *v; /* for compile error reporting */ sl@0: size_t ncds; /* number of colordescs */ sl@0: size_t max; /* highest in use */ sl@0: color free; /* beginning of free chain (if non-0) */ sl@0: struct colordesc *cd; sl@0: # define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) sl@0: # define NINLINECDS ((size_t)10) sl@0: struct colordesc cdspace[NINLINECDS]; sl@0: union tree tree[NBYTS]; /* tree top, plus fill blocks */ sl@0: }; sl@0: sl@0: /* optimization magic to do fast chr->color mapping */ sl@0: #define B0(c) ((c) & BYTMASK) sl@0: #define B1(c) (((c)>>BYTBITS) & BYTMASK) sl@0: #define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) sl@0: #define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) sl@0: #if NBYTS == 1 sl@0: #define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) sl@0: #endif sl@0: /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ sl@0: #if NBYTS == 2 sl@0: #define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) sl@0: #endif sl@0: #if NBYTS == 4 sl@0: #define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) sl@0: #endif sl@0: sl@0: sl@0: sl@0: /* sl@0: * Interface definitions for locale-interface functions in locale.c. sl@0: * Multi-character collating elements (MCCEs) cause most of the trouble. sl@0: */ sl@0: struct cvec { sl@0: int nchrs; /* number of chrs */ sl@0: int chrspace; /* number of chrs possible */ sl@0: chr *chrs; /* pointer to vector of chrs */ sl@0: int nranges; /* number of ranges (chr pairs) */ sl@0: int rangespace; /* number of chrs possible */ sl@0: chr *ranges; /* pointer to vector of chr pairs */ sl@0: int nmcces; /* number of MCCEs */ sl@0: int mccespace; /* number of MCCEs possible */ sl@0: int nmccechrs; /* number of chrs used for MCCEs */ sl@0: chr *mcces[1]; /* pointers to 0-terminated MCCEs */ sl@0: /* and both batches of chrs are on the end */ sl@0: }; sl@0: sl@0: /* caution: this value cannot be changed easily */ sl@0: #define MAXMCCE 2 /* length of longest MCCE */ sl@0: sl@0: sl@0: sl@0: /* sl@0: * definitions for NFA internal representation sl@0: * sl@0: * Having a "from" pointer within each arc may seem redundant, but it sl@0: * saves a lot of hassle. sl@0: */ sl@0: struct state; sl@0: sl@0: struct arc { sl@0: int type; sl@0: # define ARCFREE '\0' sl@0: color co; sl@0: struct state *from; /* where it's from (and contained within) */ sl@0: struct state *to; /* where it's to */ sl@0: struct arc *outchain; /* *from's outs chain or free chain */ sl@0: # define freechain outchain sl@0: struct arc *inchain; /* *to's ins chain */ sl@0: struct arc *colorchain; /* color's arc chain */ sl@0: }; sl@0: sl@0: struct arcbatch { /* for bulk allocation of arcs */ sl@0: struct arcbatch *next; sl@0: # define ABSIZE 10 sl@0: struct arc a[ABSIZE]; sl@0: }; sl@0: sl@0: struct state { sl@0: int no; sl@0: # define FREESTATE (-1) sl@0: char flag; /* marks special states */ sl@0: int nins; /* number of inarcs */ sl@0: struct arc *ins; /* chain of inarcs */ sl@0: int nouts; /* number of outarcs */ sl@0: struct arc *outs; /* chain of outarcs */ sl@0: struct arc *free; /* chain of free arcs */ sl@0: struct state *tmp; /* temporary for traversal algorithms */ sl@0: struct state *next; /* chain for traversing all */ sl@0: struct state *prev; /* back chain */ sl@0: struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ sl@0: int noas; /* number of arcs used in first arcbatch */ sl@0: }; sl@0: sl@0: struct nfa { sl@0: struct state *pre; /* pre-initial state */ sl@0: struct state *init; /* initial state */ sl@0: struct state *final; /* final state */ sl@0: struct state *post; /* post-final state */ sl@0: int nstates; /* for numbering states */ sl@0: struct state *states; /* state-chain header */ sl@0: struct state *slast; /* tail of the chain */ sl@0: struct state *free; /* free list */ sl@0: struct colormap *cm; /* the color map */ sl@0: color bos[2]; /* colors, if any, assigned to BOS and BOL */ sl@0: color eos[2]; /* colors, if any, assigned to EOS and EOL */ sl@0: struct vars *v; /* simplifies compile error reporting */ sl@0: struct nfa *parent; /* parent NFA, if any */ sl@0: }; sl@0: sl@0: sl@0: sl@0: /* sl@0: * definitions for compacted NFA sl@0: */ sl@0: struct carc { sl@0: color co; /* COLORLESS is list terminator */ sl@0: int to; /* state number */ sl@0: }; sl@0: sl@0: struct cnfa { sl@0: int nstates; /* number of states */ sl@0: int ncolors; /* number of colors */ sl@0: int flags; sl@0: # define HASLACONS 01 /* uses lookahead constraints */ sl@0: int pre; /* setup state number */ sl@0: int post; /* teardown state number */ sl@0: color bos[2]; /* colors, if any, assigned to BOS and BOL */ sl@0: color eos[2]; /* colors, if any, assigned to EOS and EOL */ sl@0: struct carc **states; /* vector of pointers to outarc lists */ sl@0: struct carc *arcs; /* the area for the lists */ sl@0: }; sl@0: #define ZAPCNFA(cnfa) ((cnfa).nstates = 0) sl@0: #define NULLCNFA(cnfa) ((cnfa).nstates == 0) sl@0: sl@0: sl@0: sl@0: /* sl@0: * subexpression tree sl@0: */ sl@0: struct subre { sl@0: char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */ sl@0: char flags; sl@0: # define LONGER 01 /* prefers longer match */ sl@0: # define SHORTER 02 /* prefers shorter match */ sl@0: # define MIXED 04 /* mixed preference below */ sl@0: # define CAP 010 /* capturing parens below */ sl@0: # define BACKR 020 /* back reference below */ sl@0: # define INUSE 0100 /* in use in final tree */ sl@0: # define LOCAL 03 /* bits which may not propagate up */ sl@0: # define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ sl@0: # define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ sl@0: # define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) sl@0: # define MESSY(f) ((f)&(MIXED|CAP|BACKR)) sl@0: # define PREF(f) ((f)&LOCAL) sl@0: # define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) sl@0: # define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) sl@0: short retry; /* index into retry memory */ sl@0: int subno; /* subexpression number (for 'b' and '(') */ sl@0: short min; /* min repetitions, for backref only */ sl@0: short max; /* max repetitions, for backref only */ sl@0: struct subre *left; /* left child, if any (also freelist chain) */ sl@0: struct subre *right; /* right child, if any */ sl@0: struct state *begin; /* outarcs from here... */ sl@0: struct state *end; /* ...ending in inarcs here */ sl@0: struct cnfa cnfa; /* compacted NFA, if any */ sl@0: struct subre *chain; /* for bookkeeping and error cleanup */ sl@0: }; sl@0: sl@0: sl@0: sl@0: /* sl@0: * table of function pointers for generic manipulation functions sl@0: * A regex_t's re_fns points to one of these. sl@0: */ sl@0: struct fns { sl@0: VOID FUNCPTR(free, (regex_t *)); sl@0: }; sl@0: sl@0: sl@0: sl@0: /* sl@0: * the insides of a regex_t, hidden behind a void * sl@0: */ sl@0: struct guts { sl@0: int magic; sl@0: # define GUTSMAGIC 0xfed9 sl@0: int cflags; /* copy of compile flags */ sl@0: long info; /* copy of re_info */ sl@0: size_t nsub; /* copy of re_nsub */ sl@0: struct subre *tree; sl@0: struct cnfa search; /* for fast preliminary search */ sl@0: int ntree; sl@0: struct colormap cmap; sl@0: int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t)); sl@0: struct subre *lacons; /* lookahead-constraint vector */ sl@0: int nlacons; /* size of lacons */ sl@0: };