os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/regcomp.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
 * re_*comp and friends - compile REs
sl@0
     3
 * This file #includes several others (see the bottom).
sl@0
     4
 *
sl@0
     5
 * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
sl@0
     6
 * 
sl@0
     7
 * Development of this software was funded, in part, by Cray Research Inc.,
sl@0
     8
 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
sl@0
     9
 * Corporation, none of whom are responsible for the results.  The author
sl@0
    10
 * thanks all of them. 
sl@0
    11
 * 
sl@0
    12
 * Redistribution and use in source and binary forms -- with or without
sl@0
    13
 * modification -- are permitted for any purpose, provided that
sl@0
    14
 * redistributions in source form retain this entire copyright notice and
sl@0
    15
 * indicate the origin and nature of any modifications.
sl@0
    16
 * 
sl@0
    17
 * I'd appreciate being given credit for this package in the documentation
sl@0
    18
 * of software which uses it, but that is not a requirement.
sl@0
    19
 * 
sl@0
    20
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
sl@0
    21
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
sl@0
    22
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
sl@0
    23
 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
sl@0
    24
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
sl@0
    25
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
sl@0
    26
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
sl@0
    27
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
sl@0
    28
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
sl@0
    29
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sl@0
    30
 *
sl@0
    31
 */
sl@0
    32
sl@0
    33
#include "regguts.h"
sl@0
    34
sl@0
    35
/*
sl@0
    36
 * forward declarations, up here so forward datatypes etc. are defined early
sl@0
    37
 */
sl@0
    38
/* =====^!^===== begin forwards =====^!^===== */
sl@0
    39
/* automatically gathered by fwd; do not hand-edit */
sl@0
    40
/* === regcomp.c === */
sl@0
    41
int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));
sl@0
    42
static VOID moresubs _ANSI_ARGS_((struct vars *, int));
sl@0
    43
static int freev _ANSI_ARGS_((struct vars *, int));
sl@0
    44
static VOID makesearch _ANSI_ARGS_((struct vars *, struct nfa *));
sl@0
    45
static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
sl@0
    46
static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));
sl@0
    47
static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *));
sl@0
    48
static VOID nonword _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));
sl@0
    49
static VOID word _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));
sl@0
    50
static int scannum _ANSI_ARGS_((struct vars *));
sl@0
    51
static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int));
sl@0
    52
static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *));
sl@0
    53
static VOID cbracket _ANSI_ARGS_((struct vars *, struct state *, struct state *));
sl@0
    54
static VOID brackpart _ANSI_ARGS_((struct vars *, struct state *, struct state *));
sl@0
    55
static chr *scanplain _ANSI_ARGS_((struct vars *));
sl@0
    56
static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *));
sl@0
    57
static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));
sl@0
    58
static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *));
sl@0
    59
static celt nextleader _ANSI_ARGS_((struct vars *, pchr, pchr));
sl@0
    60
static VOID wordchrs _ANSI_ARGS_((struct vars *));
sl@0
    61
static struct subre *subre _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
sl@0
    62
static VOID freesubre _ANSI_ARGS_((struct vars *, struct subre *));
sl@0
    63
static VOID freesrnode _ANSI_ARGS_((struct vars *, struct subre *));
sl@0
    64
static VOID optst _ANSI_ARGS_((struct vars *, struct subre *));
sl@0
    65
static int numst _ANSI_ARGS_((struct subre *, int));
sl@0
    66
static VOID markst _ANSI_ARGS_((struct subre *));
sl@0
    67
static VOID cleanst _ANSI_ARGS_((struct vars *));
sl@0
    68
static long nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
sl@0
    69
static long nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
sl@0
    70
static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int));
sl@0
    71
static VOID freelacons _ANSI_ARGS_((struct subre *, int));
sl@0
    72
static VOID rfree _ANSI_ARGS_((regex_t *));
sl@0
    73
static VOID dump _ANSI_ARGS_((regex_t *, FILE *));
sl@0
    74
static VOID dumpst _ANSI_ARGS_((struct subre *, FILE *, int));
sl@0
    75
static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int));
sl@0
    76
static char *stid _ANSI_ARGS_((struct subre *, char *, size_t));
sl@0
    77
/* === regc_lex.c === */
sl@0
    78
static VOID lexstart _ANSI_ARGS_((struct vars *));
sl@0
    79
static VOID prefixes _ANSI_ARGS_((struct vars *));
sl@0
    80
static VOID lexnest _ANSI_ARGS_((struct vars *, chr *, chr *));
sl@0
    81
static VOID lexword _ANSI_ARGS_((struct vars *));
sl@0
    82
static int next _ANSI_ARGS_((struct vars *));
sl@0
    83
static int lexescape _ANSI_ARGS_((struct vars *));
sl@0
    84
static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int));
sl@0
    85
static int brenext _ANSI_ARGS_((struct vars *, pchr));
sl@0
    86
static VOID skip _ANSI_ARGS_((struct vars *));
sl@0
    87
static chr newline _ANSI_ARGS_((NOPARMS));
sl@0
    88
#ifdef REG_DEBUG
sl@0
    89
static chr *ch _ANSI_ARGS_((NOPARMS));
sl@0
    90
#endif
sl@0
    91
static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr));
sl@0
    92
/* === regc_color.c === */
sl@0
    93
static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *));
sl@0
    94
static VOID freecm _ANSI_ARGS_((struct colormap *));
sl@0
    95
static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int));
sl@0
    96
static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor));
sl@0
    97
static color maxcolor _ANSI_ARGS_((struct colormap *));
sl@0
    98
static color newcolor _ANSI_ARGS_((struct colormap *));
sl@0
    99
static VOID freecolor _ANSI_ARGS_((struct colormap *, pcolor));
sl@0
   100
static color pseudocolor _ANSI_ARGS_((struct colormap *));
sl@0
   101
static color subcolor _ANSI_ARGS_((struct colormap *, pchr c));
sl@0
   102
static color newsub _ANSI_ARGS_((struct colormap *, pcolor));
sl@0
   103
static VOID subrange _ANSI_ARGS_((struct vars *, pchr, pchr, struct state *, struct state *));
sl@0
   104
static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));
sl@0
   105
static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *));
sl@0
   106
static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *));
sl@0
   107
static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *));
sl@0
   108
static int singleton _ANSI_ARGS_((struct colormap *, pchr c));
sl@0
   109
static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *));
sl@0
   110
static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *));
sl@0
   111
#ifdef REG_DEBUG
sl@0
   112
static VOID dumpcolors _ANSI_ARGS_((struct colormap *, FILE *));
sl@0
   113
static VOID fillcheck _ANSI_ARGS_((struct colormap *, union tree *, int, FILE *));
sl@0
   114
static VOID dumpchr _ANSI_ARGS_((pchr, FILE *));
sl@0
   115
#endif
sl@0
   116
/* === regc_nfa.c === */
sl@0
   117
static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *));
sl@0
   118
static VOID freenfa _ANSI_ARGS_((struct nfa *));
sl@0
   119
static struct state *newstate _ANSI_ARGS_((struct nfa *));
sl@0
   120
static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag));
sl@0
   121
static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *));
sl@0
   122
static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *));
sl@0
   123
static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *));
sl@0
   124
static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *));
sl@0
   125
static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *));
sl@0
   126
static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *));
sl@0
   127
static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor));
sl@0
   128
static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *));
sl@0
   129
static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
sl@0
   130
static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
sl@0
   131
static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
sl@0
   132
static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
sl@0
   133
static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int));
sl@0
   134
static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
sl@0
   135
static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
sl@0
   136
static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, struct state *));
sl@0
   137
static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
sl@0
   138
static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *));
sl@0
   139
static VOID specialcolors _ANSI_ARGS_((struct nfa *));
sl@0
   140
static long optimize _ANSI_ARGS_((struct nfa *, FILE *));
sl@0
   141
static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *));
sl@0
   142
static int pull _ANSI_ARGS_((struct nfa *, struct arc *));
sl@0
   143
static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *));
sl@0
   144
static int push _ANSI_ARGS_((struct nfa *, struct arc *));
sl@0
   145
#define	INCOMPATIBLE	1	/* destroys arc */
sl@0
   146
#define	SATISFIED	2	/* constraint satisfied */
sl@0
   147
#define	COMPATIBLE	3	/* compatible but not satisfied yet */
sl@0
   148
static int combine _ANSI_ARGS_((struct arc *, struct arc *));
sl@0
   149
static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *));
sl@0
   150
static int unempty _ANSI_ARGS_((struct nfa *, struct arc *));
sl@0
   151
static VOID cleanup _ANSI_ARGS_((struct nfa *));
sl@0
   152
static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
sl@0
   153
static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
sl@0
   154
static long analyze _ANSI_ARGS_((struct nfa *));
sl@0
   155
static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *));
sl@0
   156
static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *));
sl@0
   157
static VOID freecnfa _ANSI_ARGS_((struct cnfa *));
sl@0
   158
static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *));
sl@0
   159
#ifdef REG_DEBUG
sl@0
   160
static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *));
sl@0
   161
static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *));
sl@0
   162
static int dumprarcs _ANSI_ARGS_((struct arc *, struct state *, FILE *, int));
sl@0
   163
static VOID dumparc _ANSI_ARGS_((struct arc *, struct state *, FILE *));
sl@0
   164
#endif
sl@0
   165
static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *));
sl@0
   166
#ifdef REG_DEBUG
sl@0
   167
static VOID dumpcstate _ANSI_ARGS_((int, struct carc *, struct cnfa *, FILE *));
sl@0
   168
#endif
sl@0
   169
/* === regc_cvec.c === */
sl@0
   170
static struct cvec *newcvec _ANSI_ARGS_((int, int, int));
sl@0
   171
static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *));
sl@0
   172
static VOID addchr _ANSI_ARGS_((struct cvec *, pchr));
sl@0
   173
static VOID addrange _ANSI_ARGS_((struct cvec *, pchr, pchr));
sl@0
   174
static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *));
sl@0
   175
static int haschr _ANSI_ARGS_((struct cvec *, pchr));
sl@0
   176
static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int, int));
sl@0
   177
static VOID freecvec _ANSI_ARGS_((struct cvec *));
sl@0
   178
/* === regc_locale.c === */
sl@0
   179
static int nmcces _ANSI_ARGS_((struct vars *));
sl@0
   180
static int nleaders _ANSI_ARGS_((struct vars *));
sl@0
   181
static struct cvec *allmcces _ANSI_ARGS_((struct vars *, struct cvec *));
sl@0
   182
static celt element _ANSI_ARGS_((struct vars *, chr *, chr *));
sl@0
   183
static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int));
sl@0
   184
static int before _ANSI_ARGS_((celt, celt));
sl@0
   185
static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int));
sl@0
   186
static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int));
sl@0
   187
static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr));
sl@0
   188
static int cmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
sl@0
   189
static int casecmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
sl@0
   190
/* automatically gathered by fwd; do not hand-edit */
sl@0
   191
/* =====^!^===== end forwards =====^!^===== */
sl@0
   192
sl@0
   193
sl@0
   194
sl@0
   195
/* internal variables, bundled for easy passing around */
sl@0
   196
struct vars {
sl@0
   197
	regex_t *re;
sl@0
   198
	chr *now;		/* scan pointer into string */
sl@0
   199
	chr *stop;		/* end of string */
sl@0
   200
	chr *savenow;		/* saved now and stop for "subroutine call" */
sl@0
   201
	chr *savestop;
sl@0
   202
	int err;		/* error code (0 if none) */
sl@0
   203
	int cflags;		/* copy of compile flags */
sl@0
   204
	int lasttype;		/* type of previous token */
sl@0
   205
	int nexttype;		/* type of next token */
sl@0
   206
	chr nextvalue;		/* value (if any) of next token */
sl@0
   207
	int lexcon;		/* lexical context type (see lex.c) */
sl@0
   208
	int nsubexp;		/* subexpression count */
sl@0
   209
	struct subre **subs;	/* subRE pointer vector */
sl@0
   210
	size_t nsubs;		/* length of vector */
sl@0
   211
	struct subre *sub10[10];	/* initial vector, enough for most */
sl@0
   212
	struct nfa *nfa;	/* the NFA */
sl@0
   213
	struct colormap *cm;	/* character color map */
sl@0
   214
	color nlcolor;		/* color of newline */
sl@0
   215
	struct state *wordchrs;	/* state in nfa holding word-char outarcs */
sl@0
   216
	struct subre *tree;	/* subexpression tree */
sl@0
   217
	struct subre *treechain;	/* all tree nodes allocated */
sl@0
   218
	struct subre *treefree;		/* any free tree nodes */
sl@0
   219
	int ntree;		/* number of tree nodes */
sl@0
   220
	struct cvec *cv;	/* interface cvec */
sl@0
   221
	struct cvec *cv2;	/* utility cvec */
sl@0
   222
	struct cvec *mcces;	/* collating-element information */
sl@0
   223
#		define	ISCELEADER(v,c)	(v->mcces != NULL && haschr(v->mcces, (c)))
sl@0
   224
	struct state *mccepbegin;	/* in nfa, start of MCCE prototypes */
sl@0
   225
	struct state *mccepend;	/* in nfa, end of MCCE prototypes */
sl@0
   226
	struct subre *lacons;	/* lookahead-constraint vector */
sl@0
   227
	int nlacons;		/* size of lacons */
sl@0
   228
};
sl@0
   229
sl@0
   230
/* parsing macros; most know that `v' is the struct vars pointer */
sl@0
   231
#define	NEXT()	(next(v))		/* advance by one token */
sl@0
   232
#define	SEE(t)	(v->nexttype == (t))	/* is next token this? */
sl@0
   233
#define	EAT(t)	(SEE(t) && next(v))	/* if next is this, swallow it */
sl@0
   234
#define	VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */
sl@0
   235
#define	ISERR()	VISERR(v)
sl@0
   236
#define	VERR(vv,e)	((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\
sl@0
   237
							((vv)->err = (e)))
sl@0
   238
#define	ERR(e)	VERR(v, e)		/* record an error */
sl@0
   239
#define	NOERR()	{if (ISERR()) return;}	/* if error seen, return */
sl@0
   240
#define	NOERRN()	{if (ISERR()) return NULL;}	/* NOERR with retval */
sl@0
   241
#define	NOERRZ()	{if (ISERR()) return 0;}	/* NOERR with retval */
sl@0
   242
#define	INSIST(c, e)	((c) ? 0 : ERR(e))	/* if condition false, error */
sl@0
   243
#define	NOTE(b)	(v->re->re_info |= (b))		/* note visible condition */
sl@0
   244
#define	EMPTYARC(x, y)	newarc(v->nfa, EMPTY, 0, x, y)
sl@0
   245
sl@0
   246
/* token type codes, some also used as NFA arc types */
sl@0
   247
#define	EMPTY	'n'		/* no token present */
sl@0
   248
#define	EOS	'e'		/* end of string */
sl@0
   249
#define	PLAIN	'p'		/* ordinary character */
sl@0
   250
#define	DIGIT	'd'		/* digit (in bound) */
sl@0
   251
#define	BACKREF	'b'		/* back reference */
sl@0
   252
#define	COLLEL	'I'		/* start of [. */
sl@0
   253
#define	ECLASS	'E'		/* start of [= */
sl@0
   254
#define	CCLASS	'C'		/* start of [: */
sl@0
   255
#define	END	'X'		/* end of [. [= [: */
sl@0
   256
#define	RANGE	'R'		/* - within [] which might be range delim. */
sl@0
   257
#define	LACON	'L'		/* lookahead constraint subRE */
sl@0
   258
#define	AHEAD	'a'		/* color-lookahead arc */
sl@0
   259
#define	BEHIND	'r'		/* color-lookbehind arc */
sl@0
   260
#define	WBDRY	'w'		/* word boundary constraint */
sl@0
   261
#define	NWBDRY	'W'		/* non-word-boundary constraint */
sl@0
   262
#define	SBEGIN	'A'		/* beginning of string (even if not BOL) */
sl@0
   263
#define	SEND	'Z'		/* end of string (even if not EOL) */
sl@0
   264
#define	PREFER	'P'		/* length preference */
sl@0
   265
sl@0
   266
/* is an arc colored, and hence on a color chain? */
sl@0
   267
#define	COLORED(a)	((a)->type == PLAIN || (a)->type == AHEAD || \
sl@0
   268
							(a)->type == BEHIND)
sl@0
   269
sl@0
   270
sl@0
   271
sl@0
   272
/* static function list */
sl@0
   273
static struct fns functions = {
sl@0
   274
	rfree,			/* regfree insides */
sl@0
   275
};
sl@0
   276
sl@0
   277
sl@0
   278
sl@0
   279
/*
sl@0
   280
 - compile - compile regular expression
sl@0
   281
 ^ int compile(regex_t *, CONST chr *, size_t, int);
sl@0
   282
 */
sl@0
   283
int
sl@0
   284
compile(re, string, len, flags)
sl@0
   285
regex_t *re;
sl@0
   286
CONST chr *string;
sl@0
   287
size_t len;
sl@0
   288
int flags;
sl@0
   289
{
sl@0
   290
	struct vars var;
sl@0
   291
	struct vars *v = &var;
sl@0
   292
	struct guts *g;
sl@0
   293
	int i;
sl@0
   294
	size_t j;
sl@0
   295
	FILE *debug = (flags&REG_PROGRESS) ? stdout : (FILE *)NULL;
sl@0
   296
#	define	CNOERR()	{ if (ISERR()) return freev(v, v->err); }
sl@0
   297
sl@0
   298
	/* sanity checks */
sl@0
   299
sl@0
   300
	if (re == NULL || string == NULL)
sl@0
   301
		return REG_INVARG;
sl@0
   302
	if ((flags&REG_QUOTE) &&
sl@0
   303
			(flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)))
sl@0
   304
		return REG_INVARG;
sl@0
   305
	if (!(flags&REG_EXTENDED) && (flags&REG_ADVF))
sl@0
   306
		return REG_INVARG;
sl@0
   307
sl@0
   308
	/* initial setup (after which freev() is callable) */
sl@0
   309
	v->re = re;
sl@0
   310
	v->now = (chr *)string;
sl@0
   311
	v->stop = v->now + len;
sl@0
   312
	v->savenow = v->savestop = NULL;
sl@0
   313
	v->err = 0;
sl@0
   314
	v->cflags = flags;
sl@0
   315
	v->nsubexp = 0;
sl@0
   316
	v->subs = v->sub10;
sl@0
   317
	v->nsubs = 10;
sl@0
   318
	for (j = 0; j < v->nsubs; j++)
sl@0
   319
		v->subs[j] = NULL;
sl@0
   320
	v->nfa = NULL;
sl@0
   321
	v->cm = NULL;
sl@0
   322
	v->nlcolor = COLORLESS;
sl@0
   323
	v->wordchrs = NULL;
sl@0
   324
	v->tree = NULL;
sl@0
   325
	v->treechain = NULL;
sl@0
   326
	v->treefree = NULL;
sl@0
   327
	v->cv = NULL;
sl@0
   328
	v->cv2 = NULL;
sl@0
   329
	v->mcces = NULL;
sl@0
   330
	v->lacons = NULL;
sl@0
   331
	v->nlacons = 0;
sl@0
   332
	re->re_magic = REMAGIC;
sl@0
   333
	re->re_info = 0;		/* bits get set during parse */
sl@0
   334
	re->re_csize = sizeof(chr);
sl@0
   335
	re->re_guts = NULL;
sl@0
   336
	re->re_fns = VS(&functions);
sl@0
   337
sl@0
   338
	/* more complex setup, malloced things */
sl@0
   339
	re->re_guts = VS(MALLOC(sizeof(struct guts)));
sl@0
   340
	if (re->re_guts == NULL)
sl@0
   341
		return freev(v, REG_ESPACE);
sl@0
   342
	g = (struct guts *)re->re_guts;
sl@0
   343
	g->tree = NULL;
sl@0
   344
	initcm(v, &g->cmap);
sl@0
   345
	v->cm = &g->cmap;
sl@0
   346
	g->lacons = NULL;
sl@0
   347
	g->nlacons = 0;
sl@0
   348
	ZAPCNFA(g->search);
sl@0
   349
	v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);
sl@0
   350
	CNOERR();
sl@0
   351
	v->cv = newcvec(100, 20, 10);
sl@0
   352
	if (v->cv == NULL)
sl@0
   353
		return freev(v, REG_ESPACE);
sl@0
   354
	i = nmcces(v);
sl@0
   355
	if (i > 0) {
sl@0
   356
		v->mcces = newcvec(nleaders(v), 0, i);
sl@0
   357
		CNOERR();
sl@0
   358
		v->mcces = allmcces(v, v->mcces);
sl@0
   359
		leaders(v, v->mcces);
sl@0
   360
		addmcce(v->mcces, (chr *)NULL, (chr *)NULL);	/* dummy */
sl@0
   361
	}
sl@0
   362
	CNOERR();
sl@0
   363
sl@0
   364
	/* parsing */
sl@0
   365
	lexstart(v);			/* also handles prefixes */
sl@0
   366
	if ((v->cflags&REG_NLSTOP) || (v->cflags&REG_NLANCH)) {
sl@0
   367
		/* assign newline a unique color */
sl@0
   368
		v->nlcolor = subcolor(v->cm, newline());
sl@0
   369
		okcolors(v->nfa, v->cm);
sl@0
   370
	}
sl@0
   371
	CNOERR();
sl@0
   372
	v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
sl@0
   373
	assert(SEE(EOS));		/* even if error; ISERR() => SEE(EOS) */
sl@0
   374
	CNOERR();
sl@0
   375
	assert(v->tree != NULL);
sl@0
   376
sl@0
   377
	/* finish setup of nfa and its subre tree */
sl@0
   378
	specialcolors(v->nfa);
sl@0
   379
	CNOERR();
sl@0
   380
	if (debug != NULL) {
sl@0
   381
		fprintf(debug, "\n\n\n========= RAW ==========\n");
sl@0
   382
		dumpnfa(v->nfa, debug);
sl@0
   383
		dumpst(v->tree, debug, 1);
sl@0
   384
	}
sl@0
   385
	optst(v, v->tree);
sl@0
   386
	v->ntree = numst(v->tree, 1);
sl@0
   387
	markst(v->tree);
sl@0
   388
	cleanst(v);
sl@0
   389
	if (debug != NULL) {
sl@0
   390
		fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");
sl@0
   391
		dumpst(v->tree, debug, 1);
sl@0
   392
	}
sl@0
   393
sl@0
   394
	/* build compacted NFAs for tree and lacons */
sl@0
   395
	re->re_info |= nfatree(v, v->tree, debug);
sl@0
   396
	CNOERR();
sl@0
   397
	assert(v->nlacons == 0 || v->lacons != NULL);
sl@0
   398
	for (i = 1; i < v->nlacons; i++) {
sl@0
   399
		if (debug != NULL)
sl@0
   400
			fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
sl@0
   401
		nfanode(v, &v->lacons[i], debug);
sl@0
   402
	}
sl@0
   403
	CNOERR();
sl@0
   404
	if (v->tree->flags&SHORTER)
sl@0
   405
		NOTE(REG_USHORTEST);
sl@0
   406
sl@0
   407
	/* build compacted NFAs for tree, lacons, fast search */
sl@0
   408
	if (debug != NULL)
sl@0
   409
		fprintf(debug, "\n\n\n========= SEARCH ==========\n");
sl@0
   410
	/* can sacrifice main NFA now, so use it as work area */
sl@0
   411
	(DISCARD)optimize(v->nfa, debug);
sl@0
   412
	CNOERR();
sl@0
   413
	makesearch(v, v->nfa);
sl@0
   414
	CNOERR();
sl@0
   415
	compact(v->nfa, &g->search);
sl@0
   416
	CNOERR();
sl@0
   417
sl@0
   418
	/* looks okay, package it up */
sl@0
   419
	re->re_nsub = v->nsubexp;
sl@0
   420
	v->re = NULL;			/* freev no longer frees re */
sl@0
   421
	g->magic = GUTSMAGIC;
sl@0
   422
	g->cflags = v->cflags;
sl@0
   423
	g->info = re->re_info;
sl@0
   424
	g->nsub = re->re_nsub;
sl@0
   425
	g->tree = v->tree;
sl@0
   426
	v->tree = NULL;
sl@0
   427
	g->ntree = v->ntree;
sl@0
   428
	g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;
sl@0
   429
	g->lacons = v->lacons;
sl@0
   430
	v->lacons = NULL;
sl@0
   431
	g->nlacons = v->nlacons;
sl@0
   432
sl@0
   433
	if (flags&REG_DUMP)
sl@0
   434
		dump(re, stdout);
sl@0
   435
sl@0
   436
	assert(v->err == 0);
sl@0
   437
	return freev(v, 0);
sl@0
   438
}
sl@0
   439
sl@0
   440
/*
sl@0
   441
 - moresubs - enlarge subRE vector
sl@0
   442
 ^ static VOID moresubs(struct vars *, int);
sl@0
   443
 */
sl@0
   444
static VOID
sl@0
   445
moresubs(v, wanted)
sl@0
   446
struct vars *v;
sl@0
   447
int wanted;			/* want enough room for this one */
sl@0
   448
{
sl@0
   449
	struct subre **p;
sl@0
   450
	size_t n;
sl@0
   451
sl@0
   452
	assert(wanted > 0 && (size_t)wanted >= v->nsubs);
sl@0
   453
	n = (size_t)wanted * 3 / 2 + 1;
sl@0
   454
	if (v->subs == v->sub10) {
sl@0
   455
		p = (struct subre **)MALLOC(n * sizeof(struct subre *));
sl@0
   456
		if (p != NULL)
sl@0
   457
			memcpy(VS(p), VS(v->subs),
sl@0
   458
					v->nsubs * sizeof(struct subre *));
sl@0
   459
	} else
sl@0
   460
		p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *));
sl@0
   461
	if (p == NULL) {
sl@0
   462
		ERR(REG_ESPACE);
sl@0
   463
		return;
sl@0
   464
	}
sl@0
   465
	v->subs = p;
sl@0
   466
	for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)
sl@0
   467
		*p = NULL;
sl@0
   468
	assert(v->nsubs == n);
sl@0
   469
	assert((size_t)wanted < v->nsubs);
sl@0
   470
}
sl@0
   471
sl@0
   472
/*
sl@0
   473
 - freev - free vars struct's substructures where necessary
sl@0
   474
 * Optionally does error-number setting, and always returns error code
sl@0
   475
 * (if any), to make error-handling code terser.
sl@0
   476
 ^ static int freev(struct vars *, int);
sl@0
   477
 */
sl@0
   478
static int
sl@0
   479
freev(v, err)
sl@0
   480
struct vars *v;
sl@0
   481
int err;
sl@0
   482
{
sl@0
   483
	if (v->re != NULL)
sl@0
   484
		rfree(v->re);
sl@0
   485
	if (v->subs != v->sub10)
sl@0
   486
		FREE(v->subs);
sl@0
   487
	if (v->nfa != NULL)
sl@0
   488
		freenfa(v->nfa);
sl@0
   489
	if (v->tree != NULL)
sl@0
   490
		freesubre(v, v->tree);
sl@0
   491
	if (v->treechain != NULL)
sl@0
   492
		cleanst(v);
sl@0
   493
	if (v->cv != NULL)
sl@0
   494
		freecvec(v->cv);
sl@0
   495
	if (v->cv2 != NULL)
sl@0
   496
		freecvec(v->cv2);
sl@0
   497
	if (v->mcces != NULL)
sl@0
   498
		freecvec(v->mcces);
sl@0
   499
	if (v->lacons != NULL)
sl@0
   500
		freelacons(v->lacons, v->nlacons);
sl@0
   501
	ERR(err);			/* nop if err==0 */
sl@0
   502
sl@0
   503
	return v->err;
sl@0
   504
}
sl@0
   505
sl@0
   506
/*
sl@0
   507
 - makesearch - turn an NFA into a search NFA (implicit prepend of .*?)
sl@0
   508
 * NFA must have been optimize()d already.
sl@0
   509
 ^ static VOID makesearch(struct vars *, struct nfa *);
sl@0
   510
 */
sl@0
   511
static VOID
sl@0
   512
makesearch(v, nfa)
sl@0
   513
struct vars *v;
sl@0
   514
struct nfa *nfa;
sl@0
   515
{
sl@0
   516
	struct arc *a;
sl@0
   517
	struct arc *b;
sl@0
   518
	struct state *pre = nfa->pre;
sl@0
   519
	struct state *s;
sl@0
   520
	struct state *s2;
sl@0
   521
	struct state *slist;
sl@0
   522
sl@0
   523
	/* no loops are needed if it's anchored */
sl@0
   524
	for (a = pre->outs; a != NULL; a = a->outchain) {
sl@0
   525
		assert(a->type == PLAIN);
sl@0
   526
		if (a->co != nfa->bos[0] && a->co != nfa->bos[1])
sl@0
   527
			break;
sl@0
   528
	}
sl@0
   529
	if (a != NULL) {
sl@0
   530
		/* add implicit .* in front */
sl@0
   531
		rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
sl@0
   532
sl@0
   533
		/* and ^* and \A* too -- not always necessary, but harmless */
sl@0
   534
		newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
sl@0
   535
		newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
sl@0
   536
	}
sl@0
   537
sl@0
   538
	/*
sl@0
   539
	 * Now here's the subtle part.  Because many REs have no lookback
sl@0
   540
	 * constraints, often knowing when you were in the pre state tells
sl@0
   541
	 * you little; it's the next state(s) that are informative.  But
sl@0
   542
	 * some of them may have other inarcs, i.e. it may be possible to
sl@0
   543
	 * make actual progress and then return to one of them.  We must
sl@0
   544
	 * de-optimize such cases, splitting each such state into progress
sl@0
   545
	 * and no-progress states.
sl@0
   546
	 */
sl@0
   547
sl@0
   548
	/* first, make a list of the states */
sl@0
   549
	slist = NULL;
sl@0
   550
	for (a = pre->outs; a != NULL; a = a->outchain) {
sl@0
   551
		s = a->to;
sl@0
   552
		for (b = s->ins; b != NULL; b = b->inchain)
sl@0
   553
			if (b->from != pre)
sl@0
   554
				break;
sl@0
   555
		if (b != NULL) {		/* must be split */
sl@0
   556
			if (s->tmp == NULL) {  /* if not already in the list */
sl@0
   557
			                       /* (fixes bugs 505048, 230589, */
sl@0
   558
			                       /* 840258, 504785) */
sl@0
   559
				s->tmp = slist;
sl@0
   560
				slist = s;
sl@0
   561
			}
sl@0
   562
		}
sl@0
   563
	}
sl@0
   564
sl@0
   565
	/* do the splits */
sl@0
   566
	for (s = slist; s != NULL; s = s2) {
sl@0
   567
		s2 = newstate(nfa);
sl@0
   568
		copyouts(nfa, s, s2);
sl@0
   569
		for (a = s->ins; a != NULL; a = b) {
sl@0
   570
			b = a->inchain;
sl@0
   571
			if (a->from != pre) {
sl@0
   572
				cparc(nfa, a, a->from, s2);
sl@0
   573
				freearc(nfa, a);
sl@0
   574
			}
sl@0
   575
		}
sl@0
   576
		s2 = s->tmp;
sl@0
   577
		s->tmp = NULL;		/* clean up while we're at it */
sl@0
   578
	}
sl@0
   579
}
sl@0
   580
sl@0
   581
/*
sl@0
   582
 - parse - parse an RE
sl@0
   583
 * This is actually just the top level, which parses a bunch of branches
sl@0
   584
 * tied together with '|'.  They appear in the tree as the left children
sl@0
   585
 * of a chain of '|' subres.
sl@0
   586
 ^ static struct subre *parse(struct vars *, int, int, struct state *,
sl@0
   587
 ^ 	struct state *);
sl@0
   588
 */
sl@0
   589
static struct subre *
sl@0
   590
parse(v, stopper, type, init, final)
sl@0
   591
struct vars *v;
sl@0
   592
int stopper;			/* EOS or ')' */
sl@0
   593
int type;			/* LACON (lookahead subRE) or PLAIN */
sl@0
   594
struct state *init;		/* initial state */
sl@0
   595
struct state *final;		/* final state */
sl@0
   596
{
sl@0
   597
	struct state *left;	/* scaffolding for branch */
sl@0
   598
	struct state *right;
sl@0
   599
	struct subre *branches;	/* top level */
sl@0
   600
	struct subre *branch;	/* current branch */
sl@0
   601
	struct subre *t;	/* temporary */
sl@0
   602
	int firstbranch;	/* is this the first branch? */
sl@0
   603
sl@0
   604
	assert(stopper == ')' || stopper == EOS);
sl@0
   605
sl@0
   606
	branches = subre(v, '|', LONGER, init, final);
sl@0
   607
	NOERRN();
sl@0
   608
	branch = branches;
sl@0
   609
	firstbranch = 1;
sl@0
   610
	do {	/* a branch */
sl@0
   611
		if (!firstbranch) {
sl@0
   612
			/* need a place to hang it */
sl@0
   613
			branch->right = subre(v, '|', LONGER, init, final);
sl@0
   614
			NOERRN();
sl@0
   615
			branch = branch->right;
sl@0
   616
		}
sl@0
   617
		firstbranch = 0;
sl@0
   618
		left = newstate(v->nfa);
sl@0
   619
		right = newstate(v->nfa);
sl@0
   620
		NOERRN();
sl@0
   621
		EMPTYARC(init, left);
sl@0
   622
		EMPTYARC(right, final);
sl@0
   623
		NOERRN();
sl@0
   624
		branch->left = parsebranch(v, stopper, type, left, right, 0);
sl@0
   625
		NOERRN();
sl@0
   626
		branch->flags |= UP(branch->flags | branch->left->flags);
sl@0
   627
		if ((branch->flags &~ branches->flags) != 0)	/* new flags */
sl@0
   628
			for (t = branches; t != branch; t = t->right)
sl@0
   629
				t->flags |= branch->flags;
sl@0
   630
	} while (EAT('|'));
sl@0
   631
	assert(SEE(stopper) || SEE(EOS));
sl@0
   632
sl@0
   633
	if (!SEE(stopper)) {
sl@0
   634
		assert(stopper == ')' && SEE(EOS));
sl@0
   635
		ERR(REG_EPAREN);
sl@0
   636
	}
sl@0
   637
sl@0
   638
	/* optimize out simple cases */
sl@0
   639
	if (branch == branches) {	/* only one branch */
sl@0
   640
		assert(branch->right == NULL);
sl@0
   641
		t = branch->left;
sl@0
   642
		branch->left = NULL;
sl@0
   643
		freesubre(v, branches);
sl@0
   644
		branches = t;
sl@0
   645
	} else if (!MESSY(branches->flags)) {	/* no interesting innards */
sl@0
   646
		freesubre(v, branches->left);
sl@0
   647
		branches->left = NULL;
sl@0
   648
		freesubre(v, branches->right);
sl@0
   649
		branches->right = NULL;
sl@0
   650
		branches->op = '=';
sl@0
   651
	}
sl@0
   652
sl@0
   653
	return branches;
sl@0
   654
}
sl@0
   655
sl@0
   656
/*
sl@0
   657
 - parsebranch - parse one branch of an RE
sl@0
   658
 * This mostly manages concatenation, working closely with parseqatom().
sl@0
   659
 * Concatenated things are bundled up as much as possible, with separate
sl@0
   660
 * ',' nodes introduced only when necessary due to substructure.
sl@0
   661
 ^ static struct subre *parsebranch(struct vars *, int, int, struct state *,
sl@0
   662
 ^ 	struct state *, int);
sl@0
   663
 */
sl@0
   664
static struct subre *
sl@0
   665
parsebranch(v, stopper, type, left, right, partial)
sl@0
   666
struct vars *v;
sl@0
   667
int stopper;			/* EOS or ')' */
sl@0
   668
int type;			/* LACON (lookahead subRE) or PLAIN */
sl@0
   669
struct state *left;		/* leftmost state */
sl@0
   670
struct state *right;		/* rightmost state */
sl@0
   671
int partial;			/* is this only part of a branch? */
sl@0
   672
{
sl@0
   673
	struct state *lp;	/* left end of current construct */
sl@0
   674
	int seencontent;	/* is there anything in this branch yet? */
sl@0
   675
	struct subre *t;
sl@0
   676
sl@0
   677
	lp = left;
sl@0
   678
	seencontent = 0;
sl@0
   679
	t = subre(v, '=', 0, left, right);	/* op '=' is tentative */
sl@0
   680
	NOERRN();
sl@0
   681
	while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
sl@0
   682
		if (seencontent) {	/* implicit concat operator */
sl@0
   683
			lp = newstate(v->nfa);
sl@0
   684
			NOERRN();
sl@0
   685
			moveins(v->nfa, right, lp);
sl@0
   686
		}
sl@0
   687
		seencontent = 1;
sl@0
   688
sl@0
   689
		/* NB, recursion in parseqatom() may swallow rest of branch */
sl@0
   690
		parseqatom(v, stopper, type, lp, right, t);
sl@0
   691
	}
sl@0
   692
sl@0
   693
	if (!seencontent) {		/* empty branch */
sl@0
   694
		if (!partial)
sl@0
   695
			NOTE(REG_UUNSPEC);
sl@0
   696
		assert(lp == left);
sl@0
   697
		EMPTYARC(left, right);
sl@0
   698
	}
sl@0
   699
sl@0
   700
	return t;
sl@0
   701
}
sl@0
   702
sl@0
   703
/*
sl@0
   704
 - parseqatom - parse one quantified atom or constraint of an RE
sl@0
   705
 * The bookkeeping near the end cooperates very closely with parsebranch();
sl@0
   706
 * in particular, it contains a recursion that can involve parsing the rest
sl@0
   707
 * of the branch, making this function's name somewhat inaccurate.
sl@0
   708
 ^ static VOID parseqatom(struct vars *, int, int, struct state *,
sl@0
   709
 ^ 	struct state *, struct subre *);
sl@0
   710
 */
sl@0
   711
static VOID
sl@0
   712
parseqatom(v, stopper, type, lp, rp, top)
sl@0
   713
struct vars *v;
sl@0
   714
int stopper;			/* EOS or ')' */
sl@0
   715
int type;			/* LACON (lookahead subRE) or PLAIN */
sl@0
   716
struct state *lp;		/* left state to hang it on */
sl@0
   717
struct state *rp;		/* right state to hang it on */
sl@0
   718
struct subre *top;		/* subtree top */
sl@0
   719
{
sl@0
   720
	struct state *s;	/* temporaries for new states */
sl@0
   721
	struct state *s2;
sl@0
   722
#	define	ARCV(t, val)	newarc(v->nfa, t, val, lp, rp)
sl@0
   723
	int m, n;
sl@0
   724
	struct subre *atom;	/* atom's subtree */
sl@0
   725
	struct subre *t;
sl@0
   726
	int cap;		/* capturing parens? */
sl@0
   727
	int pos;		/* positive lookahead? */
sl@0
   728
	int subno;		/* capturing-parens or backref number */
sl@0
   729
	int atomtype;
sl@0
   730
	int qprefer;		/* quantifier short/long preference */
sl@0
   731
	int f;
sl@0
   732
	struct subre **atomp;	/* where the pointer to atom is */
sl@0
   733
sl@0
   734
	/* initial bookkeeping */
sl@0
   735
	atom = NULL;
sl@0
   736
	assert(lp->nouts == 0);	/* must string new code */
sl@0
   737
	assert(rp->nins == 0);	/*  between lp and rp */
sl@0
   738
	subno = 0;		/* just to shut lint up */
sl@0
   739
sl@0
   740
	/* an atom or constraint... */
sl@0
   741
	atomtype = v->nexttype;
sl@0
   742
	switch (atomtype) {
sl@0
   743
	/* first, constraints, which end by returning */
sl@0
   744
	case '^':
sl@0
   745
		ARCV('^', 1);
sl@0
   746
		if (v->cflags&REG_NLANCH)
sl@0
   747
			ARCV(BEHIND, v->nlcolor);
sl@0
   748
		NEXT();
sl@0
   749
		return;
sl@0
   750
		break;
sl@0
   751
	case '$':
sl@0
   752
		ARCV('$', 1);
sl@0
   753
		if (v->cflags&REG_NLANCH)
sl@0
   754
			ARCV(AHEAD, v->nlcolor);
sl@0
   755
		NEXT();
sl@0
   756
		return;
sl@0
   757
		break;
sl@0
   758
	case SBEGIN:
sl@0
   759
		ARCV('^', 1);	/* BOL */
sl@0
   760
		ARCV('^', 0);	/* or BOS */
sl@0
   761
		NEXT();
sl@0
   762
		return;
sl@0
   763
		break;
sl@0
   764
	case SEND:
sl@0
   765
		ARCV('$', 1);	/* EOL */
sl@0
   766
		ARCV('$', 0);	/* or EOS */
sl@0
   767
		NEXT();
sl@0
   768
		return;
sl@0
   769
		break;
sl@0
   770
	case '<':
sl@0
   771
		wordchrs(v);	/* does NEXT() */
sl@0
   772
		s = newstate(v->nfa);
sl@0
   773
		NOERR();
sl@0
   774
		nonword(v, BEHIND, lp, s);
sl@0
   775
		word(v, AHEAD, s, rp);
sl@0
   776
		return;
sl@0
   777
		break;
sl@0
   778
	case '>':
sl@0
   779
		wordchrs(v);	/* does NEXT() */
sl@0
   780
		s = newstate(v->nfa);
sl@0
   781
		NOERR();
sl@0
   782
		word(v, BEHIND, lp, s);
sl@0
   783
		nonword(v, AHEAD, s, rp);
sl@0
   784
		return;
sl@0
   785
		break;
sl@0
   786
	case WBDRY:
sl@0
   787
		wordchrs(v);	/* does NEXT() */
sl@0
   788
		s = newstate(v->nfa);
sl@0
   789
		NOERR();
sl@0
   790
		nonword(v, BEHIND, lp, s);
sl@0
   791
		word(v, AHEAD, s, rp);
sl@0
   792
		s = newstate(v->nfa);
sl@0
   793
		NOERR();
sl@0
   794
		word(v, BEHIND, lp, s);
sl@0
   795
		nonword(v, AHEAD, s, rp);
sl@0
   796
		return;
sl@0
   797
		break;
sl@0
   798
	case NWBDRY:
sl@0
   799
		wordchrs(v);	/* does NEXT() */
sl@0
   800
		s = newstate(v->nfa);
sl@0
   801
		NOERR();
sl@0
   802
		word(v, BEHIND, lp, s);
sl@0
   803
		word(v, AHEAD, s, rp);
sl@0
   804
		s = newstate(v->nfa);
sl@0
   805
		NOERR();
sl@0
   806
		nonword(v, BEHIND, lp, s);
sl@0
   807
		nonword(v, AHEAD, s, rp);
sl@0
   808
		return;
sl@0
   809
		break;
sl@0
   810
	case LACON:	/* lookahead constraint */
sl@0
   811
		pos = v->nextvalue;
sl@0
   812
		NEXT();
sl@0
   813
		s = newstate(v->nfa);
sl@0
   814
		s2 = newstate(v->nfa);
sl@0
   815
		NOERR();
sl@0
   816
		t = parse(v, ')', LACON, s, s2);
sl@0
   817
		freesubre(v, t);	/* internal structure irrelevant */
sl@0
   818
		assert(SEE(')') || ISERR());
sl@0
   819
		NEXT();
sl@0
   820
		n = newlacon(v, s, s2, pos);
sl@0
   821
		NOERR();
sl@0
   822
		ARCV(LACON, n);
sl@0
   823
		return;
sl@0
   824
		break;
sl@0
   825
	/* then errors, to get them out of the way */
sl@0
   826
	case '*':
sl@0
   827
	case '+':
sl@0
   828
	case '?':
sl@0
   829
	case '{':
sl@0
   830
		ERR(REG_BADRPT);
sl@0
   831
		return;
sl@0
   832
		break;
sl@0
   833
	default:
sl@0
   834
		ERR(REG_ASSERT);
sl@0
   835
		return;
sl@0
   836
		break;
sl@0
   837
	/* then plain characters, and minor variants on that theme */
sl@0
   838
	case ')':		/* unbalanced paren */
sl@0
   839
		if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {
sl@0
   840
			ERR(REG_EPAREN);
sl@0
   841
			return;
sl@0
   842
		}
sl@0
   843
		/* legal in EREs due to specification botch */
sl@0
   844
		NOTE(REG_UPBOTCH);
sl@0
   845
		/* fallthrough into case PLAIN */
sl@0
   846
	case PLAIN:
sl@0
   847
		onechr(v, v->nextvalue, lp, rp);
sl@0
   848
		okcolors(v->nfa, v->cm);
sl@0
   849
		NOERR();
sl@0
   850
		NEXT();
sl@0
   851
		break;
sl@0
   852
	case '[':
sl@0
   853
		if (v->nextvalue == 1)
sl@0
   854
			bracket(v, lp, rp);
sl@0
   855
		else
sl@0
   856
			cbracket(v, lp, rp);
sl@0
   857
		assert(SEE(']') || ISERR());
sl@0
   858
		NEXT();
sl@0
   859
		break;
sl@0
   860
	case '.':
sl@0
   861
		rainbow(v->nfa, v->cm, PLAIN,
sl@0
   862
				(v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS,
sl@0
   863
				lp, rp);
sl@0
   864
		NEXT();
sl@0
   865
		break;
sl@0
   866
	/* and finally the ugly stuff */
sl@0
   867
	case '(':	/* value flags as capturing or non */
sl@0
   868
		cap = (type == LACON) ? 0 : v->nextvalue;
sl@0
   869
		if (cap) {
sl@0
   870
			v->nsubexp++;
sl@0
   871
			subno = v->nsubexp;
sl@0
   872
			if ((size_t)subno >= v->nsubs)
sl@0
   873
				moresubs(v, subno);
sl@0
   874
			assert((size_t)subno < v->nsubs);
sl@0
   875
		} else
sl@0
   876
			atomtype = PLAIN;	/* something that's not '(' */
sl@0
   877
		NEXT();
sl@0
   878
		/* need new endpoints because tree will contain pointers */
sl@0
   879
		s = newstate(v->nfa);
sl@0
   880
		s2 = newstate(v->nfa);
sl@0
   881
		NOERR();
sl@0
   882
		EMPTYARC(lp, s);
sl@0
   883
		EMPTYARC(s2, rp);
sl@0
   884
		NOERR();
sl@0
   885
		atom = parse(v, ')', PLAIN, s, s2);
sl@0
   886
		assert(SEE(')') || ISERR());
sl@0
   887
		NEXT();
sl@0
   888
		NOERR();
sl@0
   889
		if (cap) {
sl@0
   890
			v->subs[subno] = atom;
sl@0
   891
			t = subre(v, '(', atom->flags|CAP, lp, rp);
sl@0
   892
			NOERR();
sl@0
   893
			t->subno = subno;
sl@0
   894
			t->left = atom;
sl@0
   895
			atom = t;
sl@0
   896
		}
sl@0
   897
		/* postpone everything else pending possible {0} */
sl@0
   898
		break;
sl@0
   899
	case BACKREF:	/* the Feature From The Black Lagoon */
sl@0
   900
		INSIST(type != LACON, REG_ESUBREG);
sl@0
   901
		INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
sl@0
   902
		INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
sl@0
   903
		NOERR();
sl@0
   904
		assert(v->nextvalue > 0);
sl@0
   905
		atom = subre(v, 'b', BACKR, lp, rp);
sl@0
   906
		subno = v->nextvalue;
sl@0
   907
		atom->subno = subno;
sl@0
   908
		EMPTYARC(lp, rp);	/* temporarily, so there's something */
sl@0
   909
		NEXT();
sl@0
   910
		break;
sl@0
   911
	}
sl@0
   912
sl@0
   913
	/* ...and an atom may be followed by a quantifier */
sl@0
   914
	switch (v->nexttype) {
sl@0
   915
	case '*':
sl@0
   916
		m = 0;
sl@0
   917
		n = INFINITY;
sl@0
   918
		qprefer = (v->nextvalue) ? LONGER : SHORTER;
sl@0
   919
		NEXT();
sl@0
   920
		break;
sl@0
   921
	case '+':
sl@0
   922
		m = 1;
sl@0
   923
		n = INFINITY;
sl@0
   924
		qprefer = (v->nextvalue) ? LONGER : SHORTER;
sl@0
   925
		NEXT();
sl@0
   926
		break;
sl@0
   927
	case '?':
sl@0
   928
		m = 0;
sl@0
   929
		n = 1;
sl@0
   930
		qprefer = (v->nextvalue) ? LONGER : SHORTER;
sl@0
   931
		NEXT();
sl@0
   932
		break;
sl@0
   933
	case '{':
sl@0
   934
		NEXT();
sl@0
   935
		m = scannum(v);
sl@0
   936
		if (EAT(',')) {
sl@0
   937
			if (SEE(DIGIT))
sl@0
   938
				n = scannum(v);
sl@0
   939
			else
sl@0
   940
				n = INFINITY;
sl@0
   941
			if (m > n) {
sl@0
   942
				ERR(REG_BADBR);
sl@0
   943
				return;
sl@0
   944
			}
sl@0
   945
			/* {m,n} exercises preference, even if it's {m,m} */
sl@0
   946
			qprefer = (v->nextvalue) ? LONGER : SHORTER;
sl@0
   947
		} else {
sl@0
   948
			n = m;
sl@0
   949
			/* {m} passes operand's preference through */
sl@0
   950
			qprefer = 0;
sl@0
   951
		}
sl@0
   952
		if (!SEE('}')) {	/* catches errors too */
sl@0
   953
			ERR(REG_BADBR);
sl@0
   954
			return;
sl@0
   955
		}
sl@0
   956
		NEXT();
sl@0
   957
		break;
sl@0
   958
	default:		/* no quantifier */
sl@0
   959
		m = n = 1;
sl@0
   960
		qprefer = 0;
sl@0
   961
		break;
sl@0
   962
	}
sl@0
   963
sl@0
   964
	/* annoying special case:  {0} or {0,0} cancels everything */
sl@0
   965
	if (m == 0 && n == 0) {
sl@0
   966
		if (atom != NULL)
sl@0
   967
			freesubre(v, atom);
sl@0
   968
		if (atomtype == '(')
sl@0
   969
			v->subs[subno] = NULL;
sl@0
   970
		delsub(v->nfa, lp, rp);
sl@0
   971
		EMPTYARC(lp, rp);
sl@0
   972
		return;
sl@0
   973
	}
sl@0
   974
sl@0
   975
	/* if not a messy case, avoid hard part */
sl@0
   976
	assert(!MESSY(top->flags));
sl@0
   977
	f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
sl@0
   978
	if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {
sl@0
   979
		if (!(m == 1 && n == 1))
sl@0
   980
			repeat(v, lp, rp, m, n);
sl@0
   981
		if (atom != NULL)
sl@0
   982
			freesubre(v, atom);
sl@0
   983
		top->flags = f;
sl@0
   984
		return;
sl@0
   985
	}
sl@0
   986
sl@0
   987
	/*
sl@0
   988
	 * hard part:  something messy
sl@0
   989
	 * That is, capturing parens, back reference, short/long clash, or
sl@0
   990
	 * an atom with substructure containing one of those.
sl@0
   991
	 */
sl@0
   992
sl@0
   993
	/* now we'll need a subre for the contents even if they're boring */
sl@0
   994
	if (atom == NULL) {
sl@0
   995
		atom = subre(v, '=', 0, lp, rp);
sl@0
   996
		NOERR();
sl@0
   997
	}
sl@0
   998
sl@0
   999
	/*
sl@0
  1000
	 * prepare a general-purpose state skeleton
sl@0
  1001
	 *
sl@0
  1002
	 *    ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
sl@0
  1003
	 *   /                                            /
sl@0
  1004
	 * [lp] ----> [s2] ----bypass---------------------
sl@0
  1005
	 *
sl@0
  1006
	 * where bypass is an empty, and prefix is some repetitions of atom
sl@0
  1007
	 */
sl@0
  1008
	s = newstate(v->nfa);		/* first, new endpoints for the atom */
sl@0
  1009
	s2 = newstate(v->nfa);
sl@0
  1010
	NOERR();
sl@0
  1011
	moveouts(v->nfa, lp, s);
sl@0
  1012
	moveins(v->nfa, rp, s2);
sl@0
  1013
	NOERR();
sl@0
  1014
	atom->begin = s;
sl@0
  1015
	atom->end = s2;
sl@0
  1016
	s = newstate(v->nfa);		/* and spots for prefix and bypass */
sl@0
  1017
	s2 = newstate(v->nfa);
sl@0
  1018
	NOERR();
sl@0
  1019
	EMPTYARC(lp, s);
sl@0
  1020
	EMPTYARC(lp, s2);
sl@0
  1021
	NOERR();
sl@0
  1022
sl@0
  1023
	/* break remaining subRE into x{...} and what follows */
sl@0
  1024
	t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
sl@0
  1025
	t->left = atom;
sl@0
  1026
	atomp = &t->left;
sl@0
  1027
	/* here we should recurse... but we must postpone that to the end */
sl@0
  1028
sl@0
  1029
	/* split top into prefix and remaining */
sl@0
  1030
	assert(top->op == '=' && top->left == NULL && top->right == NULL);
sl@0
  1031
	top->left = subre(v, '=', top->flags, top->begin, lp);
sl@0
  1032
	top->op = '.';
sl@0
  1033
	top->right = t;
sl@0
  1034
sl@0
  1035
	/* if it's a backref, now is the time to replicate the subNFA */
sl@0
  1036
	if (atomtype == BACKREF) {
sl@0
  1037
		assert(atom->begin->nouts == 1);	/* just the EMPTY */
sl@0
  1038
		delsub(v->nfa, atom->begin, atom->end);
sl@0
  1039
		assert(v->subs[subno] != NULL);
sl@0
  1040
		/* and here's why the recursion got postponed:  it must */
sl@0
  1041
		/* wait until the skeleton is filled in, because it may */
sl@0
  1042
		/* hit a backref that wants to copy the filled-in skeleton */
sl@0
  1043
		dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
sl@0
  1044
						atom->begin, atom->end);
sl@0
  1045
		NOERR();
sl@0
  1046
	}
sl@0
  1047
sl@0
  1048
	/* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */
sl@0
  1049
	if (m == 0) {
sl@0
  1050
		EMPTYARC(s2, atom->end);		/* the bypass */
sl@0
  1051
		assert(PREF(qprefer) != 0);
sl@0
  1052
		f = COMBINE(qprefer, atom->flags);
sl@0
  1053
		t = subre(v, '|', f, lp, atom->end);
sl@0
  1054
		NOERR();
sl@0
  1055
		t->left = atom;
sl@0
  1056
		t->right = subre(v, '|', PREF(f), s2, atom->end);
sl@0
  1057
		NOERR();
sl@0
  1058
		t->right->left = subre(v, '=', 0, s2, atom->end);
sl@0
  1059
		NOERR();
sl@0
  1060
		*atomp = t;
sl@0
  1061
		atomp = &t->left;
sl@0
  1062
		m = 1;
sl@0
  1063
	}
sl@0
  1064
sl@0
  1065
	/* deal with the rest of the quantifier */
sl@0
  1066
	if (atomtype == BACKREF) {
sl@0
  1067
		/* special case:  backrefs have internal quantifiers */
sl@0
  1068
		EMPTYARC(s, atom->begin);	/* empty prefix */
sl@0
  1069
		/* just stuff everything into atom */
sl@0
  1070
		repeat(v, atom->begin, atom->end, m, n);
sl@0
  1071
		atom->min = (short)m;
sl@0
  1072
		atom->max = (short)n;
sl@0
  1073
		atom->flags |= COMBINE(qprefer, atom->flags);
sl@0
  1074
	} else if (m == 1 && n == 1) {
sl@0
  1075
		/* no/vacuous quantifier:  done */
sl@0
  1076
		EMPTYARC(s, atom->begin);	/* empty prefix */
sl@0
  1077
	} else {
sl@0
  1078
		/* turn x{m,n} into x{m-1,n-1}x, with capturing */
sl@0
  1079
		/*  parens in only second x */
sl@0
  1080
		dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
sl@0
  1081
		assert(m >= 1 && m != INFINITY && n >= 1);
sl@0
  1082
		repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);
sl@0
  1083
		f = COMBINE(qprefer, atom->flags);
sl@0
  1084
		t = subre(v, '.', f, s, atom->end);	/* prefix and atom */
sl@0
  1085
		NOERR();
sl@0
  1086
		t->left = subre(v, '=', PREF(f), s, atom->begin);
sl@0
  1087
		NOERR();
sl@0
  1088
		t->right = atom;
sl@0
  1089
		*atomp = t;
sl@0
  1090
	}
sl@0
  1091
sl@0
  1092
	/* and finally, look after that postponed recursion */
sl@0
  1093
	t = top->right;
sl@0
  1094
	if (!(SEE('|') || SEE(stopper) || SEE(EOS)))
sl@0
  1095
		t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
sl@0
  1096
	else {
sl@0
  1097
		EMPTYARC(atom->end, rp);
sl@0
  1098
		t->right = subre(v, '=', 0, atom->end, rp);
sl@0
  1099
	}
sl@0
  1100
	assert(SEE('|') || SEE(stopper) || SEE(EOS));
sl@0
  1101
	t->flags |= COMBINE(t->flags, t->right->flags);
sl@0
  1102
	top->flags |= COMBINE(top->flags, t->flags);
sl@0
  1103
}
sl@0
  1104
sl@0
  1105
/*
sl@0
  1106
 - nonword - generate arcs for non-word-character ahead or behind
sl@0
  1107
 ^ static VOID nonword(struct vars *, int, struct state *, struct state *);
sl@0
  1108
 */
sl@0
  1109
static VOID
sl@0
  1110
nonword(v, dir, lp, rp)
sl@0
  1111
struct vars *v;
sl@0
  1112
int dir;			/* AHEAD or BEHIND */
sl@0
  1113
struct state *lp;
sl@0
  1114
struct state *rp;
sl@0
  1115
{
sl@0
  1116
	int anchor = (dir == AHEAD) ? '$' : '^';
sl@0
  1117
sl@0
  1118
	assert(dir == AHEAD || dir == BEHIND);
sl@0
  1119
	newarc(v->nfa, anchor, 1, lp, rp);
sl@0
  1120
	newarc(v->nfa, anchor, 0, lp, rp);
sl@0
  1121
	colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
sl@0
  1122
	/* (no need for special attention to \n) */
sl@0
  1123
}
sl@0
  1124
sl@0
  1125
/*
sl@0
  1126
 - word - generate arcs for word character ahead or behind
sl@0
  1127
 ^ static VOID word(struct vars *, int, struct state *, struct state *);
sl@0
  1128
 */
sl@0
  1129
static VOID
sl@0
  1130
word(v, dir, lp, rp)
sl@0
  1131
struct vars *v;
sl@0
  1132
int dir;			/* AHEAD or BEHIND */
sl@0
  1133
struct state *lp;
sl@0
  1134
struct state *rp;
sl@0
  1135
{
sl@0
  1136
	assert(dir == AHEAD || dir == BEHIND);
sl@0
  1137
	cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
sl@0
  1138
	/* (no need for special attention to \n) */
sl@0
  1139
}
sl@0
  1140
sl@0
  1141
/*
sl@0
  1142
 - scannum - scan a number
sl@0
  1143
 ^ static int scannum(struct vars *);
sl@0
  1144
 */
sl@0
  1145
static int			/* value, <= DUPMAX */
sl@0
  1146
scannum(v)
sl@0
  1147
struct vars *v;
sl@0
  1148
{
sl@0
  1149
	int n = 0;
sl@0
  1150
sl@0
  1151
	while (SEE(DIGIT) && n < DUPMAX) {
sl@0
  1152
		n = n*10 + v->nextvalue;
sl@0
  1153
		NEXT();
sl@0
  1154
	}
sl@0
  1155
	if (SEE(DIGIT) || n > DUPMAX) {
sl@0
  1156
		ERR(REG_BADBR);
sl@0
  1157
		return 0;
sl@0
  1158
	}
sl@0
  1159
	return n;
sl@0
  1160
}
sl@0
  1161
sl@0
  1162
/*
sl@0
  1163
 - repeat - replicate subNFA for quantifiers
sl@0
  1164
 * The duplication sequences used here are chosen carefully so that any
sl@0
  1165
 * pointers starting out pointing into the subexpression end up pointing into
sl@0
  1166
 * the last occurrence.  (Note that it may not be strung between the same
sl@0
  1167
 * left and right end states, however!)  This used to be important for the
sl@0
  1168
 * subRE tree, although the important bits are now handled by the in-line
sl@0
  1169
 * code in parse(), and when this is called, it doesn't matter any more.
sl@0
  1170
 ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int);
sl@0
  1171
 */
sl@0
  1172
static VOID
sl@0
  1173
repeat(v, lp, rp, m, n)
sl@0
  1174
struct vars *v;
sl@0
  1175
struct state *lp;
sl@0
  1176
struct state *rp;
sl@0
  1177
int m;
sl@0
  1178
int n;
sl@0
  1179
{
sl@0
  1180
#	define	SOME	2
sl@0
  1181
#	define	INF	3
sl@0
  1182
#	define	PAIR(x, y)	((x)*4 + (y))
sl@0
  1183
#	define	REDUCE(x)	( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
sl@0
  1184
	CONST int rm = REDUCE(m);
sl@0
  1185
	CONST int rn = REDUCE(n);
sl@0
  1186
	struct state *s;
sl@0
  1187
	struct state *s2;
sl@0
  1188
sl@0
  1189
	switch (PAIR(rm, rn)) {
sl@0
  1190
	case PAIR(0, 0):		/* empty string */
sl@0
  1191
		delsub(v->nfa, lp, rp);
sl@0
  1192
		EMPTYARC(lp, rp);
sl@0
  1193
		break;
sl@0
  1194
	case PAIR(0, 1):		/* do as x| */
sl@0
  1195
		EMPTYARC(lp, rp);
sl@0
  1196
		break;
sl@0
  1197
	case PAIR(0, SOME):		/* do as x{1,n}| */
sl@0
  1198
		repeat(v, lp, rp, 1, n);
sl@0
  1199
		NOERR();
sl@0
  1200
		EMPTYARC(lp, rp);
sl@0
  1201
		break;
sl@0
  1202
	case PAIR(0, INF):		/* loop x around */
sl@0
  1203
		s = newstate(v->nfa);
sl@0
  1204
		NOERR();
sl@0
  1205
		moveouts(v->nfa, lp, s);
sl@0
  1206
		moveins(v->nfa, rp, s);
sl@0
  1207
		EMPTYARC(lp, s);
sl@0
  1208
		EMPTYARC(s, rp);
sl@0
  1209
		break;
sl@0
  1210
	case PAIR(1, 1):		/* no action required */
sl@0
  1211
		break;
sl@0
  1212
	case PAIR(1, SOME):		/* do as x{0,n-1}x = (x{1,n-1}|)x */
sl@0
  1213
		s = newstate(v->nfa);
sl@0
  1214
		NOERR();
sl@0
  1215
		moveouts(v->nfa, lp, s);
sl@0
  1216
		dupnfa(v->nfa, s, rp, lp, s);
sl@0
  1217
		NOERR();
sl@0
  1218
		repeat(v, lp, s, 1, n-1);
sl@0
  1219
		NOERR();
sl@0
  1220
		EMPTYARC(lp, s);
sl@0
  1221
		break;
sl@0
  1222
	case PAIR(1, INF):		/* add loopback arc */
sl@0
  1223
		s = newstate(v->nfa);
sl@0
  1224
		s2 = newstate(v->nfa);
sl@0
  1225
		NOERR();
sl@0
  1226
		moveouts(v->nfa, lp, s);
sl@0
  1227
		moveins(v->nfa, rp, s2);
sl@0
  1228
		EMPTYARC(lp, s);
sl@0
  1229
		EMPTYARC(s2, rp);
sl@0
  1230
		EMPTYARC(s2, s);
sl@0
  1231
		break;
sl@0
  1232
	case PAIR(SOME, SOME):		/* do as x{m-1,n-1}x */
sl@0
  1233
		s = newstate(v->nfa);
sl@0
  1234
		NOERR();
sl@0
  1235
		moveouts(v->nfa, lp, s);
sl@0
  1236
		dupnfa(v->nfa, s, rp, lp, s);
sl@0
  1237
		NOERR();
sl@0
  1238
		repeat(v, lp, s, m-1, n-1);
sl@0
  1239
		break;
sl@0
  1240
	case PAIR(SOME, INF):		/* do as x{m-1,}x */
sl@0
  1241
		s = newstate(v->nfa);
sl@0
  1242
		NOERR();
sl@0
  1243
		moveouts(v->nfa, lp, s);
sl@0
  1244
		dupnfa(v->nfa, s, rp, lp, s);
sl@0
  1245
		NOERR();
sl@0
  1246
		repeat(v, lp, s, m-1, n);
sl@0
  1247
		break;
sl@0
  1248
	default:
sl@0
  1249
		ERR(REG_ASSERT);
sl@0
  1250
		break;
sl@0
  1251
	}
sl@0
  1252
}
sl@0
  1253
sl@0
  1254
/*
sl@0
  1255
 - bracket - handle non-complemented bracket expression
sl@0
  1256
 * Also called from cbracket for complemented bracket expressions.
sl@0
  1257
 ^ static VOID bracket(struct vars *, struct state *, struct state *);
sl@0
  1258
 */
sl@0
  1259
static VOID
sl@0
  1260
bracket(v, lp, rp)
sl@0
  1261
struct vars *v;
sl@0
  1262
struct state *lp;
sl@0
  1263
struct state *rp;
sl@0
  1264
{
sl@0
  1265
	assert(SEE('['));
sl@0
  1266
	NEXT();
sl@0
  1267
	while (!SEE(']') && !SEE(EOS))
sl@0
  1268
		brackpart(v, lp, rp);
sl@0
  1269
	assert(SEE(']') || ISERR());
sl@0
  1270
	okcolors(v->nfa, v->cm);
sl@0
  1271
}
sl@0
  1272
sl@0
  1273
/*
sl@0
  1274
 - cbracket - handle complemented bracket expression
sl@0
  1275
 * We do it by calling bracket() with dummy endpoints, and then complementing
sl@0
  1276
 * the result.  The alternative would be to invoke rainbow(), and then delete
sl@0
  1277
 * arcs as the b.e. is seen... but that gets messy.
sl@0
  1278
 ^ static VOID cbracket(struct vars *, struct state *, struct state *);
sl@0
  1279
 */
sl@0
  1280
static VOID
sl@0
  1281
cbracket(v, lp, rp)
sl@0
  1282
struct vars *v;
sl@0
  1283
struct state *lp;
sl@0
  1284
struct state *rp;
sl@0
  1285
{
sl@0
  1286
	struct state *left = newstate(v->nfa);
sl@0
  1287
	struct state *right = newstate(v->nfa);
sl@0
  1288
	struct state *s;
sl@0
  1289
	struct arc *a;			/* arc from lp */
sl@0
  1290
	struct arc *ba;			/* arc from left, from bracket() */
sl@0
  1291
	struct arc *pa;			/* MCCE-prototype arc */
sl@0
  1292
	color co;
sl@0
  1293
	chr *p;
sl@0
  1294
	int i;
sl@0
  1295
sl@0
  1296
	NOERR();
sl@0
  1297
	bracket(v, left, right);
sl@0
  1298
	if (v->cflags&REG_NLSTOP)
sl@0
  1299
		newarc(v->nfa, PLAIN, v->nlcolor, left, right);
sl@0
  1300
	NOERR();
sl@0
  1301
sl@0
  1302
	assert(lp->nouts == 0);		/* all outarcs will be ours */
sl@0
  1303
sl@0
  1304
	/* easy part of complementing */
sl@0
  1305
	colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
sl@0
  1306
	NOERR();
sl@0
  1307
	if (v->mcces == NULL) {		/* no MCCEs -- we're done */
sl@0
  1308
		dropstate(v->nfa, left);
sl@0
  1309
		assert(right->nins == 0);
sl@0
  1310
		freestate(v->nfa, right);
sl@0
  1311
		return;
sl@0
  1312
	}
sl@0
  1313
sl@0
  1314
	/* but complementing gets messy in the presence of MCCEs... */
sl@0
  1315
	NOTE(REG_ULOCALE);
sl@0
  1316
	for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
sl@0
  1317
		co = GETCOLOR(v->cm, *p);
sl@0
  1318
		a = findarc(lp, PLAIN, co);
sl@0
  1319
		ba = findarc(left, PLAIN, co);
sl@0
  1320
		if (ba == NULL) {
sl@0
  1321
			assert(a != NULL);
sl@0
  1322
			freearc(v->nfa, a);
sl@0
  1323
		} else {
sl@0
  1324
			assert(a == NULL);
sl@0
  1325
		}
sl@0
  1326
		s = newstate(v->nfa);
sl@0
  1327
		NOERR();
sl@0
  1328
		newarc(v->nfa, PLAIN, co, lp, s);
sl@0
  1329
		NOERR();
sl@0
  1330
		pa = findarc(v->mccepbegin, PLAIN, co);
sl@0
  1331
		assert(pa != NULL);
sl@0
  1332
		if (ba == NULL) {	/* easy case, need all of them */
sl@0
  1333
			cloneouts(v->nfa, pa->to, s, rp, PLAIN);
sl@0
  1334
			newarc(v->nfa, '$', 1, s, rp);
sl@0
  1335
			newarc(v->nfa, '$', 0, s, rp);
sl@0
  1336
			colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
sl@0
  1337
		} else {		/* must be selective */
sl@0
  1338
			if (findarc(ba->to, '$', 1) == NULL) {
sl@0
  1339
				newarc(v->nfa, '$', 1, s, rp);
sl@0
  1340
				newarc(v->nfa, '$', 0, s, rp);
sl@0
  1341
				colorcomplement(v->nfa, v->cm, AHEAD, pa->to,
sl@0
  1342
									 s, rp);
sl@0
  1343
			}
sl@0
  1344
			for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)
sl@0
  1345
				if (findarc(ba->to, PLAIN, pa->co) == NULL)
sl@0
  1346
					newarc(v->nfa, PLAIN, pa->co, s, rp);
sl@0
  1347
			if (s->nouts == 0)	/* limit of selectivity: none */
sl@0
  1348
				dropstate(v->nfa, s);	/* frees arc too */
sl@0
  1349
		}
sl@0
  1350
		NOERR();
sl@0
  1351
	}
sl@0
  1352
sl@0
  1353
	delsub(v->nfa, left, right);
sl@0
  1354
	assert(left->nouts == 0);
sl@0
  1355
	freestate(v->nfa, left);
sl@0
  1356
	assert(right->nins == 0);
sl@0
  1357
	freestate(v->nfa, right);
sl@0
  1358
}
sl@0
  1359
			
sl@0
  1360
/*
sl@0
  1361
 - brackpart - handle one item (or range) within a bracket expression
sl@0
  1362
 ^ static VOID brackpart(struct vars *, struct state *, struct state *);
sl@0
  1363
 */
sl@0
  1364
static VOID
sl@0
  1365
brackpart(v, lp, rp)
sl@0
  1366
struct vars *v;
sl@0
  1367
struct state *lp;
sl@0
  1368
struct state *rp;
sl@0
  1369
{
sl@0
  1370
	celt startc;
sl@0
  1371
	celt endc;
sl@0
  1372
	struct cvec *cv;
sl@0
  1373
	chr *startp;
sl@0
  1374
	chr *endp;
sl@0
  1375
	chr c[1];
sl@0
  1376
sl@0
  1377
	/* parse something, get rid of special cases, take shortcuts */
sl@0
  1378
	switch (v->nexttype) {
sl@0
  1379
	case RANGE:			/* a-b-c or other botch */
sl@0
  1380
		ERR(REG_ERANGE);
sl@0
  1381
		return;
sl@0
  1382
		break;
sl@0
  1383
	case PLAIN:
sl@0
  1384
		c[0] = v->nextvalue;
sl@0
  1385
		NEXT();
sl@0
  1386
		/* shortcut for ordinary chr (not range, not MCCE leader) */
sl@0
  1387
		if (!SEE(RANGE) && !ISCELEADER(v, c[0])) {
sl@0
  1388
			onechr(v, c[0], lp, rp);
sl@0
  1389
			return;
sl@0
  1390
		}
sl@0
  1391
		startc = element(v, c, c+1);
sl@0
  1392
		NOERR();
sl@0
  1393
		break;
sl@0
  1394
	case COLLEL:
sl@0
  1395
		startp = v->now;
sl@0
  1396
		endp = scanplain(v);
sl@0
  1397
		INSIST(startp < endp, REG_ECOLLATE);
sl@0
  1398
		NOERR();
sl@0
  1399
		startc = element(v, startp, endp);
sl@0
  1400
		NOERR();
sl@0
  1401
		break;
sl@0
  1402
	case ECLASS:
sl@0
  1403
		startp = v->now;
sl@0
  1404
		endp = scanplain(v);
sl@0
  1405
		INSIST(startp < endp, REG_ECOLLATE);
sl@0
  1406
		NOERR();
sl@0
  1407
		startc = element(v, startp, endp);
sl@0
  1408
		NOERR();
sl@0
  1409
		cv = eclass(v, startc, (v->cflags&REG_ICASE));
sl@0
  1410
		NOERR();
sl@0
  1411
		dovec(v, cv, lp, rp);
sl@0
  1412
		return;
sl@0
  1413
		break;
sl@0
  1414
	case CCLASS:
sl@0
  1415
		startp = v->now;
sl@0
  1416
		endp = scanplain(v);
sl@0
  1417
		INSIST(startp < endp, REG_ECTYPE);
sl@0
  1418
		NOERR();
sl@0
  1419
		cv = cclass(v, startp, endp, (v->cflags&REG_ICASE));
sl@0
  1420
		NOERR();
sl@0
  1421
		dovec(v, cv, lp, rp);
sl@0
  1422
		return;
sl@0
  1423
		break;
sl@0
  1424
	default:
sl@0
  1425
		ERR(REG_ASSERT);
sl@0
  1426
		return;
sl@0
  1427
		break;
sl@0
  1428
	}
sl@0
  1429
sl@0
  1430
	if (SEE(RANGE)) {
sl@0
  1431
		NEXT();
sl@0
  1432
		switch (v->nexttype) {
sl@0
  1433
		case PLAIN:
sl@0
  1434
		case RANGE:
sl@0
  1435
			c[0] = v->nextvalue;
sl@0
  1436
			NEXT();
sl@0
  1437
			endc = element(v, c, c+1);
sl@0
  1438
			NOERR();
sl@0
  1439
			break;
sl@0
  1440
		case COLLEL:
sl@0
  1441
			startp = v->now;
sl@0
  1442
			endp = scanplain(v);
sl@0
  1443
			INSIST(startp < endp, REG_ECOLLATE);
sl@0
  1444
			NOERR();
sl@0
  1445
			endc = element(v, startp, endp);
sl@0
  1446
			NOERR();
sl@0
  1447
			break;
sl@0
  1448
		default:
sl@0
  1449
			ERR(REG_ERANGE);
sl@0
  1450
			return;
sl@0
  1451
			break;
sl@0
  1452
		}
sl@0
  1453
	} else
sl@0
  1454
		endc = startc;
sl@0
  1455
sl@0
  1456
	/*
sl@0
  1457
	 * Ranges are unportable.  Actually, standard C does
sl@0
  1458
	 * guarantee that digits are contiguous, but making
sl@0
  1459
	 * that an exception is just too complicated.
sl@0
  1460
	 */
sl@0
  1461
	if (startc != endc)
sl@0
  1462
		NOTE(REG_UUNPORT);
sl@0
  1463
	cv = range(v, startc, endc, (v->cflags&REG_ICASE));
sl@0
  1464
	NOERR();
sl@0
  1465
	dovec(v, cv, lp, rp);
sl@0
  1466
}
sl@0
  1467
sl@0
  1468
/*
sl@0
  1469
 - scanplain - scan PLAIN contents of [. etc.
sl@0
  1470
 * Certain bits of trickery in lex.c know that this code does not try
sl@0
  1471
 * to look past the final bracket of the [. etc.
sl@0
  1472
 ^ static chr *scanplain(struct vars *);
sl@0
  1473
 */
sl@0
  1474
static chr *			/* just after end of sequence */
sl@0
  1475
scanplain(v)
sl@0
  1476
struct vars *v;
sl@0
  1477
{
sl@0
  1478
	chr *endp;
sl@0
  1479
sl@0
  1480
	assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
sl@0
  1481
	NEXT();
sl@0
  1482
sl@0
  1483
	endp = v->now;
sl@0
  1484
	while (SEE(PLAIN)) {
sl@0
  1485
		endp = v->now;
sl@0
  1486
		NEXT();
sl@0
  1487
	}
sl@0
  1488
sl@0
  1489
	assert(SEE(END) || ISERR());
sl@0
  1490
	NEXT();
sl@0
  1491
sl@0
  1492
	return endp;
sl@0
  1493
}
sl@0
  1494
sl@0
  1495
/*
sl@0
  1496
 - leaders - process a cvec of collating elements to also include leaders
sl@0
  1497
 * Also gives all characters involved their own colors, which is almost
sl@0
  1498
 * certainly necessary, and sets up little disconnected subNFA.
sl@0
  1499
 ^ static VOID leaders(struct vars *, struct cvec *);
sl@0
  1500
 */
sl@0
  1501
static VOID
sl@0
  1502
leaders(v, cv)
sl@0
  1503
struct vars *v;
sl@0
  1504
struct cvec *cv;
sl@0
  1505
{
sl@0
  1506
	int mcce;
sl@0
  1507
	chr *p;
sl@0
  1508
	chr leader;
sl@0
  1509
	struct state *s;
sl@0
  1510
	struct arc *a;
sl@0
  1511
sl@0
  1512
	v->mccepbegin = newstate(v->nfa);
sl@0
  1513
	v->mccepend = newstate(v->nfa);
sl@0
  1514
	NOERR();
sl@0
  1515
sl@0
  1516
	for (mcce = 0; mcce < cv->nmcces; mcce++) {
sl@0
  1517
		p = cv->mcces[mcce];
sl@0
  1518
		leader = *p;
sl@0
  1519
		if (!haschr(cv, leader)) {
sl@0
  1520
			addchr(cv, leader);
sl@0
  1521
			s = newstate(v->nfa);
sl@0
  1522
			newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
sl@0
  1523
							v->mccepbegin, s);
sl@0
  1524
			okcolors(v->nfa, v->cm);
sl@0
  1525
		} else {
sl@0
  1526
			a = findarc(v->mccepbegin, PLAIN,
sl@0
  1527
						GETCOLOR(v->cm, leader));
sl@0
  1528
			assert(a != NULL);
sl@0
  1529
			s = a->to;
sl@0
  1530
			assert(s != v->mccepend);
sl@0
  1531
		}
sl@0
  1532
		p++;
sl@0
  1533
		assert(*p != 0 && *(p+1) == 0);	/* only 2-char MCCEs for now */
sl@0
  1534
		newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
sl@0
  1535
		okcolors(v->nfa, v->cm);
sl@0
  1536
	}
sl@0
  1537
}
sl@0
  1538
sl@0
  1539
/*
sl@0
  1540
 - onechr - fill in arcs for a plain character, and possible case complements
sl@0
  1541
 * This is mostly a shortcut for efficient handling of the common case.
sl@0
  1542
 ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *);
sl@0
  1543
 */
sl@0
  1544
static VOID
sl@0
  1545
onechr(v, c, lp, rp)
sl@0
  1546
struct vars *v;
sl@0
  1547
pchr c;
sl@0
  1548
struct state *lp;
sl@0
  1549
struct state *rp;
sl@0
  1550
{
sl@0
  1551
	if (!(v->cflags&REG_ICASE)) {
sl@0
  1552
		newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
sl@0
  1553
		return;
sl@0
  1554
	}
sl@0
  1555
sl@0
  1556
	/* rats, need general case anyway... */
sl@0
  1557
	dovec(v, allcases(v, c), lp, rp);
sl@0
  1558
}
sl@0
  1559
sl@0
  1560
/*
sl@0
  1561
 - dovec - fill in arcs for each element of a cvec
sl@0
  1562
 * This one has to handle the messy cases, like MCCEs and MCCE leaders.
sl@0
  1563
 ^ static VOID dovec(struct vars *, struct cvec *, struct state *,
sl@0
  1564
 ^ 	struct state *);
sl@0
  1565
 */
sl@0
  1566
static VOID
sl@0
  1567
dovec(v, cv, lp, rp)
sl@0
  1568
struct vars *v;
sl@0
  1569
struct cvec *cv;
sl@0
  1570
struct state *lp;
sl@0
  1571
struct state *rp;
sl@0
  1572
{
sl@0
  1573
	chr ch, from, to;
sl@0
  1574
	celt ce;
sl@0
  1575
	chr *p;
sl@0
  1576
	int i;
sl@0
  1577
	color co;
sl@0
  1578
	struct cvec *leads;
sl@0
  1579
	struct arc *a;
sl@0
  1580
	struct arc *pa;		/* arc in prototype */
sl@0
  1581
	struct state *s;
sl@0
  1582
	struct state *ps;	/* state in prototype */
sl@0
  1583
sl@0
  1584
	/* need a place to store leaders, if any */
sl@0
  1585
	if (nmcces(v) > 0) {
sl@0
  1586
		assert(v->mcces != NULL);
sl@0
  1587
		if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) {
sl@0
  1588
			if (v->cv2 != NULL)
sl@0
  1589
				free(v->cv2);
sl@0
  1590
			v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
sl@0
  1591
			NOERR();
sl@0
  1592
			leads = v->cv2;
sl@0
  1593
		} else
sl@0
  1594
			leads = clearcvec(v->cv2);
sl@0
  1595
	} else
sl@0
  1596
		leads = NULL;
sl@0
  1597
sl@0
  1598
	/* first, get the ordinary characters out of the way */
sl@0
  1599
	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
sl@0
  1600
		ch = *p;
sl@0
  1601
		if (!ISCELEADER(v, ch))
sl@0
  1602
			newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
sl@0
  1603
		else {
sl@0
  1604
			assert(singleton(v->cm, ch));
sl@0
  1605
			assert(leads != NULL);
sl@0
  1606
			if (!haschr(leads, ch))
sl@0
  1607
				addchr(leads, ch);
sl@0
  1608
		}
sl@0
  1609
	}
sl@0
  1610
sl@0
  1611
	/* and the ranges */
sl@0
  1612
	for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
sl@0
  1613
		from = *p;
sl@0
  1614
		to = *(p+1);
sl@0
  1615
		while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {
sl@0
  1616
			if (from < ce)
sl@0
  1617
				subrange(v, from, ce - 1, lp, rp);
sl@0
  1618
			assert(singleton(v->cm, ce));
sl@0
  1619
			assert(leads != NULL);
sl@0
  1620
			if (!haschr(leads, ce))
sl@0
  1621
				addchr(leads, ce);
sl@0
  1622
			from = ce + 1;
sl@0
  1623
		}
sl@0
  1624
		if (from <= to)
sl@0
  1625
			subrange(v, from, to, lp, rp);
sl@0
  1626
	}
sl@0
  1627
sl@0
  1628
	if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
sl@0
  1629
		return;
sl@0
  1630
sl@0
  1631
	/* deal with the MCCE leaders */
sl@0
  1632
	NOTE(REG_ULOCALE);
sl@0
  1633
	for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {
sl@0
  1634
		co = GETCOLOR(v->cm, *p);
sl@0
  1635
		a = findarc(lp, PLAIN, co);
sl@0
  1636
		if (a != NULL)
sl@0
  1637
			s = a->to;
sl@0
  1638
		else {
sl@0
  1639
			s = newstate(v->nfa);
sl@0
  1640
			NOERR();
sl@0
  1641
			newarc(v->nfa, PLAIN, co, lp, s);
sl@0
  1642
			NOERR();
sl@0
  1643
		}
sl@0
  1644
		pa = findarc(v->mccepbegin, PLAIN, co);
sl@0
  1645
		assert(pa != NULL);
sl@0
  1646
		ps = pa->to;
sl@0
  1647
		newarc(v->nfa, '$', 1, s, rp);
sl@0
  1648
		newarc(v->nfa, '$', 0, s, rp);
sl@0
  1649
		colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
sl@0
  1650
		NOERR();
sl@0
  1651
	}
sl@0
  1652
sl@0
  1653
	/* and the MCCEs */
sl@0
  1654
	for (i = 0; i < cv->nmcces; i++) {
sl@0
  1655
		p = cv->mcces[i];
sl@0
  1656
		assert(singleton(v->cm, *p));
sl@0
  1657
		if (!singleton(v->cm, *p)) {
sl@0
  1658
			ERR(REG_ASSERT);
sl@0
  1659
			return;
sl@0
  1660
		}
sl@0
  1661
		ch = *p++;
sl@0
  1662
		co = GETCOLOR(v->cm, ch);
sl@0
  1663
		a = findarc(lp, PLAIN, co);
sl@0
  1664
		if (a != NULL)
sl@0
  1665
			s = a->to;
sl@0
  1666
		else {
sl@0
  1667
			s = newstate(v->nfa);
sl@0
  1668
			NOERR();
sl@0
  1669
			newarc(v->nfa, PLAIN, co, lp, s);
sl@0
  1670
			NOERR();
sl@0
  1671
		}
sl@0
  1672
		assert(*p != 0);	/* at least two chars */
sl@0
  1673
		assert(singleton(v->cm, *p));
sl@0
  1674
		ch = *p++;
sl@0
  1675
		co = GETCOLOR(v->cm, ch);
sl@0
  1676
		assert(*p == 0);	/* and only two, for now */
sl@0
  1677
		newarc(v->nfa, PLAIN, co, s, rp);
sl@0
  1678
		NOERR();
sl@0
  1679
	}
sl@0
  1680
}
sl@0
  1681
sl@0
  1682
/*
sl@0
  1683
 - nextleader - find next MCCE leader within range
sl@0
  1684
 ^ static celt nextleader(struct vars *, pchr, pchr);
sl@0
  1685
 */
sl@0
  1686
static celt			/* NOCELT means none */
sl@0
  1687
nextleader(v, from, to)
sl@0
  1688
struct vars *v;
sl@0
  1689
pchr from;
sl@0
  1690
pchr to;
sl@0
  1691
{
sl@0
  1692
	int i;
sl@0
  1693
	chr *p;
sl@0
  1694
	chr ch;
sl@0
  1695
	celt it = NOCELT;
sl@0
  1696
sl@0
  1697
	if (v->mcces == NULL)
sl@0
  1698
		return it;
sl@0
  1699
sl@0
  1700
	for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) {
sl@0
  1701
		ch = *p;
sl@0
  1702
		if (from <= ch && ch <= to)
sl@0
  1703
			if (it == NOCELT || ch < it)
sl@0
  1704
				it = ch;
sl@0
  1705
	}
sl@0
  1706
	return it;
sl@0
  1707
}
sl@0
  1708
sl@0
  1709
/*
sl@0
  1710
 - wordchrs - set up word-chr list for word-boundary stuff, if needed
sl@0
  1711
 * The list is kept as a bunch of arcs between two dummy states; it's
sl@0
  1712
 * disposed of by the unreachable-states sweep in NFA optimization.
sl@0
  1713
 * Does NEXT().  Must not be called from any unusual lexical context.
sl@0
  1714
 * This should be reconciled with the \w etc. handling in lex.c, and
sl@0
  1715
 * should be cleaned up to reduce dependencies on input scanning.
sl@0
  1716
 ^ static VOID wordchrs(struct vars *);
sl@0
  1717
 */
sl@0
  1718
static VOID
sl@0
  1719
wordchrs(v)
sl@0
  1720
struct vars *v;
sl@0
  1721
{
sl@0
  1722
	struct state *left;
sl@0
  1723
	struct state *right;
sl@0
  1724
sl@0
  1725
	if (v->wordchrs != NULL) {
sl@0
  1726
		NEXT();		/* for consistency */
sl@0
  1727
		return;
sl@0
  1728
	}
sl@0
  1729
sl@0
  1730
	left = newstate(v->nfa);
sl@0
  1731
	right = newstate(v->nfa);
sl@0
  1732
	NOERR();
sl@0
  1733
	/* fine point:  implemented with [::], and lexer will set REG_ULOCALE */
sl@0
  1734
	lexword(v);
sl@0
  1735
	NEXT();
sl@0
  1736
	assert(v->savenow != NULL && SEE('['));
sl@0
  1737
	bracket(v, left, right);
sl@0
  1738
	assert((v->savenow != NULL && SEE(']')) || ISERR());
sl@0
  1739
	NEXT();
sl@0
  1740
	NOERR();
sl@0
  1741
	v->wordchrs = left;
sl@0
  1742
}
sl@0
  1743
sl@0
  1744
/*
sl@0
  1745
 - subre - allocate a subre
sl@0
  1746
 ^ static struct subre *subre(struct vars *, int, int, struct state *,
sl@0
  1747
 ^	struct state *);
sl@0
  1748
 */
sl@0
  1749
static struct subre *
sl@0
  1750
subre(v, op, flags, begin, end)
sl@0
  1751
struct vars *v;
sl@0
  1752
int op;
sl@0
  1753
int flags;
sl@0
  1754
struct state *begin;
sl@0
  1755
struct state *end;
sl@0
  1756
{
sl@0
  1757
	struct subre *ret;
sl@0
  1758
sl@0
  1759
	ret = v->treefree;
sl@0
  1760
	if (ret != NULL)
sl@0
  1761
		v->treefree = ret->left;
sl@0
  1762
	else {
sl@0
  1763
		ret = (struct subre *)MALLOC(sizeof(struct subre));
sl@0
  1764
		if (ret == NULL) {
sl@0
  1765
			ERR(REG_ESPACE);
sl@0
  1766
			return NULL;
sl@0
  1767
		}
sl@0
  1768
		ret->chain = v->treechain;
sl@0
  1769
		v->treechain = ret;
sl@0
  1770
	}
sl@0
  1771
sl@0
  1772
	assert(strchr("|.b(=", op) != NULL);
sl@0
  1773
sl@0
  1774
	ret->op = op;
sl@0
  1775
	ret->flags = flags;
sl@0
  1776
	ret->retry = 0;
sl@0
  1777
	ret->subno = 0;
sl@0
  1778
	ret->min = ret->max = 1;
sl@0
  1779
	ret->left = NULL;
sl@0
  1780
	ret->right = NULL;
sl@0
  1781
	ret->begin = begin;
sl@0
  1782
	ret->end = end;
sl@0
  1783
	ZAPCNFA(ret->cnfa);
sl@0
  1784
sl@0
  1785
	return ret;
sl@0
  1786
}
sl@0
  1787
sl@0
  1788
/*
sl@0
  1789
 - freesubre - free a subRE subtree
sl@0
  1790
 ^ static VOID freesubre(struct vars *, struct subre *);
sl@0
  1791
 */
sl@0
  1792
static VOID
sl@0
  1793
freesubre(v, sr)
sl@0
  1794
struct vars *v;			/* might be NULL */
sl@0
  1795
struct subre *sr;
sl@0
  1796
{
sl@0
  1797
	if (sr == NULL)
sl@0
  1798
		return;
sl@0
  1799
sl@0
  1800
	if (sr->left != NULL)
sl@0
  1801
		freesubre(v, sr->left);
sl@0
  1802
	if (sr->right != NULL)
sl@0
  1803
		freesubre(v, sr->right);
sl@0
  1804
sl@0
  1805
	freesrnode(v, sr);
sl@0
  1806
}
sl@0
  1807
sl@0
  1808
/*
sl@0
  1809
 - freesrnode - free one node in a subRE subtree
sl@0
  1810
 ^ static VOID freesrnode(struct vars *, struct subre *);
sl@0
  1811
 */
sl@0
  1812
static VOID
sl@0
  1813
freesrnode(v, sr)
sl@0
  1814
struct vars *v;			/* might be NULL */
sl@0
  1815
struct subre *sr;
sl@0
  1816
{
sl@0
  1817
	if (sr == NULL)
sl@0
  1818
		return;
sl@0
  1819
sl@0
  1820
	if (!NULLCNFA(sr->cnfa))
sl@0
  1821
		freecnfa(&sr->cnfa);
sl@0
  1822
	sr->flags = 0;
sl@0
  1823
sl@0
  1824
	if (v != NULL) {
sl@0
  1825
		sr->left = v->treefree;
sl@0
  1826
		v->treefree = sr;
sl@0
  1827
	} else
sl@0
  1828
		FREE(sr);
sl@0
  1829
}
sl@0
  1830
sl@0
  1831
/*
sl@0
  1832
 - optst - optimize a subRE subtree
sl@0
  1833
 ^ static VOID optst(struct vars *, struct subre *);
sl@0
  1834
 */
sl@0
  1835
static VOID
sl@0
  1836
optst(v, t)
sl@0
  1837
struct vars *v;
sl@0
  1838
struct subre *t;
sl@0
  1839
{
sl@0
  1840
	if (t == NULL)
sl@0
  1841
		return;
sl@0
  1842
sl@0
  1843
	/* recurse through children */
sl@0
  1844
	if (t->left != NULL)
sl@0
  1845
		optst(v, t->left);
sl@0
  1846
	if (t->right != NULL)
sl@0
  1847
		optst(v, t->right);
sl@0
  1848
}
sl@0
  1849
sl@0
  1850
/*
sl@0
  1851
 - numst - number tree nodes (assigning retry indexes)
sl@0
  1852
 ^ static int numst(struct subre *, int);
sl@0
  1853
 */
sl@0
  1854
static int			/* next number */
sl@0
  1855
numst(t, start)
sl@0
  1856
struct subre *t;
sl@0
  1857
int start;			/* starting point for subtree numbers */
sl@0
  1858
{
sl@0
  1859
	int i;
sl@0
  1860
sl@0
  1861
	assert(t != NULL);
sl@0
  1862
sl@0
  1863
	i = start;
sl@0
  1864
	t->retry = (short)i++;
sl@0
  1865
	if (t->left != NULL)
sl@0
  1866
		i = numst(t->left, i);
sl@0
  1867
	if (t->right != NULL)
sl@0
  1868
		i = numst(t->right, i);
sl@0
  1869
	return i;
sl@0
  1870
}
sl@0
  1871
sl@0
  1872
/*
sl@0
  1873
 - markst - mark tree nodes as INUSE
sl@0
  1874
 ^ static VOID markst(struct subre *);
sl@0
  1875
 */
sl@0
  1876
static VOID
sl@0
  1877
markst(t)
sl@0
  1878
struct subre *t;
sl@0
  1879
{
sl@0
  1880
	assert(t != NULL);
sl@0
  1881
sl@0
  1882
	t->flags |= INUSE;
sl@0
  1883
	if (t->left != NULL)
sl@0
  1884
		markst(t->left);
sl@0
  1885
	if (t->right != NULL)
sl@0
  1886
		markst(t->right);
sl@0
  1887
}
sl@0
  1888
sl@0
  1889
/*
sl@0
  1890
 - cleanst - free any tree nodes not marked INUSE
sl@0
  1891
 ^ static VOID cleanst(struct vars *);
sl@0
  1892
 */
sl@0
  1893
static VOID
sl@0
  1894
cleanst(v)
sl@0
  1895
struct vars *v;
sl@0
  1896
{
sl@0
  1897
	struct subre *t;
sl@0
  1898
	struct subre *next;
sl@0
  1899
sl@0
  1900
	for (t = v->treechain; t != NULL; t = next) {
sl@0
  1901
		next = t->chain;
sl@0
  1902
		if (!(t->flags&INUSE))
sl@0
  1903
			FREE(t);
sl@0
  1904
	}
sl@0
  1905
	v->treechain = NULL;
sl@0
  1906
	v->treefree = NULL;		/* just on general principles */
sl@0
  1907
}
sl@0
  1908
sl@0
  1909
/*
sl@0
  1910
 - nfatree - turn a subRE subtree into a tree of compacted NFAs
sl@0
  1911
 ^ static long nfatree(struct vars *, struct subre *, FILE *);
sl@0
  1912
 */
sl@0
  1913
static long			/* optimize results from top node */
sl@0
  1914
nfatree(v, t, f)
sl@0
  1915
struct vars *v;
sl@0
  1916
struct subre *t;
sl@0
  1917
FILE *f;			/* for debug output */
sl@0
  1918
{
sl@0
  1919
	assert(t != NULL && t->begin != NULL);
sl@0
  1920
sl@0
  1921
	if (t->left != NULL)
sl@0
  1922
		(DISCARD)nfatree(v, t->left, f);
sl@0
  1923
	if (t->right != NULL)
sl@0
  1924
		(DISCARD)nfatree(v, t->right, f);
sl@0
  1925
sl@0
  1926
	return nfanode(v, t, f);
sl@0
  1927
}
sl@0
  1928
sl@0
  1929
/*
sl@0
  1930
 - nfanode - do one NFA for nfatree
sl@0
  1931
 ^ static long nfanode(struct vars *, struct subre *, FILE *);
sl@0
  1932
 */
sl@0
  1933
static long			/* optimize results */
sl@0
  1934
nfanode(v, t, f)
sl@0
  1935
struct vars *v;
sl@0
  1936
struct subre *t;
sl@0
  1937
FILE *f;			/* for debug output */
sl@0
  1938
{
sl@0
  1939
	struct nfa *nfa;
sl@0
  1940
	long ret = 0;
sl@0
  1941
	char idbuf[50];
sl@0
  1942
sl@0
  1943
	assert(t->begin != NULL);
sl@0
  1944
sl@0
  1945
	if (f != NULL)
sl@0
  1946
		fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",
sl@0
  1947
						stid(t, idbuf, sizeof(idbuf)));
sl@0
  1948
	nfa = newnfa(v, v->cm, v->nfa);
sl@0
  1949
	NOERRZ();
sl@0
  1950
	dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
sl@0
  1951
	if (!ISERR()) {
sl@0
  1952
		specialcolors(nfa);
sl@0
  1953
		ret = optimize(nfa, f);
sl@0
  1954
	}
sl@0
  1955
	if (!ISERR())
sl@0
  1956
		compact(nfa, &t->cnfa);
sl@0
  1957
sl@0
  1958
	freenfa(nfa);
sl@0
  1959
	return ret;
sl@0
  1960
}
sl@0
  1961
sl@0
  1962
/*
sl@0
  1963
 - newlacon - allocate a lookahead-constraint subRE
sl@0
  1964
 ^ static int newlacon(struct vars *, struct state *, struct state *, int);
sl@0
  1965
 */
sl@0
  1966
static int			/* lacon number */
sl@0
  1967
newlacon(v, begin, end, pos)
sl@0
  1968
struct vars *v;
sl@0
  1969
struct state *begin;
sl@0
  1970
struct state *end;
sl@0
  1971
int pos;
sl@0
  1972
{
sl@0
  1973
	int n;
sl@0
  1974
	struct subre *sub;
sl@0
  1975
sl@0
  1976
	if (v->nlacons == 0) {
sl@0
  1977
		v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre));
sl@0
  1978
		n = 1;		/* skip 0th */
sl@0
  1979
		v->nlacons = 2;
sl@0
  1980
	} else {
sl@0
  1981
		v->lacons = (struct subre *)REALLOC(v->lacons,
sl@0
  1982
					(v->nlacons+1)*sizeof(struct subre));
sl@0
  1983
		n = v->nlacons++;
sl@0
  1984
	}
sl@0
  1985
	if (v->lacons == NULL) {
sl@0
  1986
		ERR(REG_ESPACE);
sl@0
  1987
		return 0;
sl@0
  1988
	}
sl@0
  1989
	sub = &v->lacons[n];
sl@0
  1990
	sub->begin = begin;
sl@0
  1991
	sub->end = end;
sl@0
  1992
	sub->subno = pos;
sl@0
  1993
	ZAPCNFA(sub->cnfa);
sl@0
  1994
	return n;
sl@0
  1995
}
sl@0
  1996
sl@0
  1997
/*
sl@0
  1998
 - freelacons - free lookahead-constraint subRE vector
sl@0
  1999
 ^ static VOID freelacons(struct subre *, int);
sl@0
  2000
 */
sl@0
  2001
static VOID
sl@0
  2002
freelacons(subs, n)
sl@0
  2003
struct subre *subs;
sl@0
  2004
int n;
sl@0
  2005
{
sl@0
  2006
	struct subre *sub;
sl@0
  2007
	int i;
sl@0
  2008
sl@0
  2009
	assert(n > 0);
sl@0
  2010
	for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)	/* no 0th */
sl@0
  2011
		if (!NULLCNFA(sub->cnfa))
sl@0
  2012
			freecnfa(&sub->cnfa);
sl@0
  2013
	FREE(subs);
sl@0
  2014
}
sl@0
  2015
sl@0
  2016
/*
sl@0
  2017
 - rfree - free a whole RE (insides of regfree)
sl@0
  2018
 ^ static VOID rfree(regex_t *);
sl@0
  2019
 */
sl@0
  2020
static VOID
sl@0
  2021
rfree(re)
sl@0
  2022
regex_t *re;
sl@0
  2023
{
sl@0
  2024
	struct guts *g;
sl@0
  2025
sl@0
  2026
	if (re == NULL || re->re_magic != REMAGIC)
sl@0
  2027
		return;
sl@0
  2028
sl@0
  2029
	re->re_magic = 0;	/* invalidate RE */
sl@0
  2030
	g = (struct guts *)re->re_guts;
sl@0
  2031
	re->re_guts = NULL;
sl@0
  2032
	re->re_fns = NULL;
sl@0
  2033
	g->magic = 0;
sl@0
  2034
	freecm(&g->cmap);
sl@0
  2035
	if (g->tree != NULL)
sl@0
  2036
		freesubre((struct vars *)NULL, g->tree);
sl@0
  2037
	if (g->lacons != NULL)
sl@0
  2038
		freelacons(g->lacons, g->nlacons);
sl@0
  2039
	if (!NULLCNFA(g->search))
sl@0
  2040
		freecnfa(&g->search);
sl@0
  2041
	FREE(g);
sl@0
  2042
}
sl@0
  2043
sl@0
  2044
/*
sl@0
  2045
 - dump - dump an RE in human-readable form
sl@0
  2046
 ^ static VOID dump(regex_t *, FILE *);
sl@0
  2047
 */
sl@0
  2048
static VOID
sl@0
  2049
dump(re, f)
sl@0
  2050
regex_t *re;
sl@0
  2051
FILE *f;
sl@0
  2052
{
sl@0
  2053
#ifdef REG_DEBUG
sl@0
  2054
	struct guts *g;
sl@0
  2055
	int i;
sl@0
  2056
sl@0
  2057
	if (re->re_magic != REMAGIC)
sl@0
  2058
		fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic,
sl@0
  2059
								REMAGIC);
sl@0
  2060
	if (re->re_guts == NULL) {
sl@0
  2061
		fprintf(f, "NULL guts!!!\n");
sl@0
  2062
		return;
sl@0
  2063
	}
sl@0
  2064
	g = (struct guts *)re->re_guts;
sl@0
  2065
	if (g->magic != GUTSMAGIC)
sl@0
  2066
		fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic,
sl@0
  2067
								GUTSMAGIC);
sl@0
  2068
sl@0
  2069
	fprintf(f, "\n\n\n========= DUMP ==========\n");
sl@0
  2070
	fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", 
sl@0
  2071
		re->re_nsub, re->re_info, re->re_csize, g->ntree);
sl@0
  2072
sl@0
  2073
	dumpcolors(&g->cmap, f);
sl@0
  2074
	if (!NULLCNFA(g->search)) {
sl@0
  2075
		printf("\nsearch:\n");
sl@0
  2076
		dumpcnfa(&g->search, f);
sl@0
  2077
	}
sl@0
  2078
	for (i = 1; i < g->nlacons; i++) {
sl@0
  2079
		fprintf(f, "\nla%d (%s):\n", i,
sl@0
  2080
				(g->lacons[i].subno) ? "positive" : "negative");
sl@0
  2081
		dumpcnfa(&g->lacons[i].cnfa, f);
sl@0
  2082
	}
sl@0
  2083
	fprintf(f, "\n");
sl@0
  2084
	dumpst(g->tree, f, 0);
sl@0
  2085
#endif
sl@0
  2086
}
sl@0
  2087
sl@0
  2088
/*
sl@0
  2089
 - dumpst - dump a subRE tree
sl@0
  2090
 ^ static VOID dumpst(struct subre *, FILE *, int);
sl@0
  2091
 */
sl@0
  2092
static VOID
sl@0
  2093
dumpst(t, f, nfapresent)
sl@0
  2094
struct subre *t;
sl@0
  2095
FILE *f;
sl@0
  2096
int nfapresent;			/* is the original NFA still around? */
sl@0
  2097
{
sl@0
  2098
	if (t == NULL)
sl@0
  2099
		fprintf(f, "null tree\n");
sl@0
  2100
	else
sl@0
  2101
		stdump(t, f, nfapresent);
sl@0
  2102
	fflush(f);
sl@0
  2103
}
sl@0
  2104
sl@0
  2105
/*
sl@0
  2106
 - stdump - recursive guts of dumpst
sl@0
  2107
 ^ static VOID stdump(struct subre *, FILE *, int);
sl@0
  2108
 */
sl@0
  2109
static VOID
sl@0
  2110
stdump(t, f, nfapresent)
sl@0
  2111
struct subre *t;
sl@0
  2112
FILE *f;
sl@0
  2113
int nfapresent;			/* is the original NFA still around? */
sl@0
  2114
{
sl@0
  2115
	char idbuf[50];
sl@0
  2116
sl@0
  2117
	fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
sl@0
  2118
	if (t->flags&LONGER)
sl@0
  2119
		fprintf(f, " longest");
sl@0
  2120
	if (t->flags&SHORTER)
sl@0
  2121
		fprintf(f, " shortest");
sl@0
  2122
	if (t->flags&MIXED)
sl@0
  2123
		fprintf(f, " hasmixed");
sl@0
  2124
	if (t->flags&CAP)
sl@0
  2125
		fprintf(f, " hascapture");
sl@0
  2126
	if (t->flags&BACKR)
sl@0
  2127
		fprintf(f, " hasbackref");
sl@0
  2128
	if (!(t->flags&INUSE))
sl@0
  2129
		fprintf(f, " UNUSED");
sl@0
  2130
	if (t->subno != 0)
sl@0
  2131
		fprintf(f, " (#%d)", t->subno);
sl@0
  2132
	if (t->min != 1 || t->max != 1) {
sl@0
  2133
		fprintf(f, " {%d,", t->min);
sl@0
  2134
		if (t->max != INFINITY)
sl@0
  2135
			fprintf(f, "%d", t->max);
sl@0
  2136
		fprintf(f, "}");
sl@0
  2137
	}
sl@0
  2138
	if (nfapresent)
sl@0
  2139
		fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);
sl@0
  2140
	if (t->left != NULL)
sl@0
  2141
		fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
sl@0
  2142
	if (t->right != NULL)
sl@0
  2143
		fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
sl@0
  2144
	if (!NULLCNFA(t->cnfa)) {
sl@0
  2145
		fprintf(f, "\n");
sl@0
  2146
		dumpcnfa(&t->cnfa, f);
sl@0
  2147
		fprintf(f, "\n");
sl@0
  2148
	}
sl@0
  2149
	if (t->left != NULL)
sl@0
  2150
		stdump(t->left, f, nfapresent);
sl@0
  2151
	if (t->right != NULL)
sl@0
  2152
		stdump(t->right, f, nfapresent);
sl@0
  2153
}
sl@0
  2154
sl@0
  2155
/*
sl@0
  2156
 - stid - identify a subtree node for dumping
sl@0
  2157
 ^ static char *stid(struct subre *, char *, size_t);
sl@0
  2158
 */
sl@0
  2159
static char *			/* points to buf or constant string */
sl@0
  2160
stid(t, buf, bufsize)
sl@0
  2161
struct subre *t;
sl@0
  2162
char *buf;
sl@0
  2163
size_t bufsize;
sl@0
  2164
{
sl@0
  2165
	/* big enough for hex int or decimal t->retry? */
sl@0
  2166
	if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1)
sl@0
  2167
		return "unable";
sl@0
  2168
	if (t->retry != 0)
sl@0
  2169
		sprintf(buf, "%d", t->retry);
sl@0
  2170
	else
sl@0
  2171
		sprintf(buf, "%p", t);
sl@0
  2172
	return buf;
sl@0
  2173
}
sl@0
  2174
sl@0
  2175
#include "regc_lex.c"
sl@0
  2176
#include "regc_color.c"
sl@0
  2177
#include "regc_nfa.c"
sl@0
  2178
#include "regc_cvec.c"
sl@0
  2179
#include "regc_locale.c"