os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/regguts.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  * Internal interface definitions, etc., for the reg package
     3  *
     4  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
     5  * 
     6  * Development of this software was funded, in part, by Cray Research Inc.,
     7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
     8  * Corporation, none of whom are responsible for the results.  The author
     9  * thanks all of them. 
    10  * 
    11  * Redistribution and use in source and binary forms -- with or without
    12  * modification -- are permitted for any purpose, provided that
    13  * redistributions in source form retain this entire copyright notice and
    14  * indicate the origin and nature of any modifications.
    15  * 
    16  * I'd appreciate being given credit for this package in the documentation
    17  * of software which uses it, but that is not a requirement.
    18  * 
    19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
    20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
    22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
    27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
    28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29  */
    30 
    31 
    32 
    33 /*
    34  * Environmental customization.  It should not (I hope) be necessary to
    35  * alter the file you are now reading -- regcustom.h should handle it all,
    36  * given care here and elsewhere.
    37  */
    38 #include "regcustom.h"
    39 
    40 
    41 
    42 /*
    43  * Things that regcustom.h might override.
    44  */
    45 
    46 /* standard header files (NULL is a reasonable indicator for them) */
    47 #ifndef NULL
    48 #include <stdio.h>
    49 #include <stdlib.h>
    50 #include <ctype.h>
    51 #include <limits.h>
    52 #include <string.h>
    53 #endif
    54 
    55 /* assertions */
    56 #ifndef assert
    57 #	ifndef REG_DEBUG
    58 #	ifndef NDEBUG
    59 #	define	NDEBUG		/* no assertions */
    60 #	endif
    61 #	endif
    62 #include <assert.h>
    63 #endif
    64 
    65 /* voids */
    66 #ifndef VOID
    67 #define	VOID	void			/* for function return values */
    68 #endif
    69 #ifndef DISCARD
    70 #define	DISCARD	VOID			/* for throwing values away */
    71 #endif
    72 #ifndef PVOID
    73 #define	PVOID	VOID *			/* generic pointer */
    74 #endif
    75 #ifndef VS
    76 #define	VS(x)	((PVOID)(x))		/* cast something to generic ptr */
    77 #endif
    78 #ifndef NOPARMS
    79 #define	NOPARMS	VOID			/* for empty parm lists */
    80 #endif
    81 
    82 /* const */
    83 #ifndef CONST
    84 #define	CONST	const			/* for old compilers, might be empty */
    85 #endif
    86 
    87 /* function-pointer declarator */
    88 #ifndef FUNCPTR
    89 #if __STDC__ >= 1
    90 #define	FUNCPTR(name, args)	(*name)args
    91 #else
    92 #define	FUNCPTR(name, args)	(*name)()
    93 #endif
    94 #endif
    95 
    96 /* memory allocation */
    97 #ifndef MALLOC
    98 #define	MALLOC(n)	malloc(n)
    99 #endif
   100 #ifndef REALLOC
   101 #define	REALLOC(p, n)	realloc(VS(p), n)
   102 #endif
   103 #ifndef FREE
   104 #define	FREE(p)		free(VS(p))
   105 #endif
   106 
   107 /* want size of a char in bits, and max value in bounded quantifiers */
   108 #ifndef CHAR_BIT
   109 #include <limits.h>
   110 #endif
   111 #ifndef _POSIX2_RE_DUP_MAX
   112 #define	_POSIX2_RE_DUP_MAX	255	/* normally from <limits.h> */
   113 #endif
   114 
   115 
   116 
   117 /*
   118  * misc
   119  */
   120 
   121 #define	NOTREACHED	0
   122 #define	xxx		1
   123 
   124 #define	DUPMAX	_POSIX2_RE_DUP_MAX
   125 #define	INFINITY	(DUPMAX+1)
   126 
   127 #define	REMAGIC	0xfed7		/* magic number for main struct */
   128 
   129 
   130 
   131 /*
   132  * debugging facilities
   133  */
   134 #ifdef REG_DEBUG
   135 /* FDEBUG does finite-state tracing */
   136 #define	FDEBUG(arglist)	{ if (v->eflags&REG_FTRACE) printf arglist; }
   137 /* MDEBUG does higher-level tracing */
   138 #define	MDEBUG(arglist)	{ if (v->eflags&REG_MTRACE) printf arglist; }
   139 #else
   140 #define	FDEBUG(arglist)	{}
   141 #define	MDEBUG(arglist)	{}
   142 #endif
   143 
   144 
   145 
   146 /*
   147  * bitmap manipulation
   148  */
   149 #define	UBITS	(CHAR_BIT * sizeof(unsigned))
   150 #define	BSET(uv, sn)	((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
   151 #define	ISBSET(uv, sn)	((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
   152 
   153 
   154 
   155 /*
   156  * We dissect a chr into byts for colormap table indexing.  Here we define
   157  * a byt, which will be the same as a byte on most machines...  The exact
   158  * size of a byt is not critical, but about 8 bits is good, and extraction
   159  * of 8-bit chunks is sometimes especially fast.
   160  */
   161 #ifndef BYTBITS
   162 #define	BYTBITS	8		/* bits in a byt */
   163 #endif
   164 #define	BYTTAB	(1<<BYTBITS)	/* size of table with one entry per byt value */
   165 #define	BYTMASK	(BYTTAB-1)	/* bit mask for byt */
   166 #define	NBYTS	((CHRBITS+BYTBITS-1)/BYTBITS)
   167 /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
   168 
   169 
   170 
   171 /*
   172  * As soon as possible, we map chrs into equivalence classes -- "colors" --
   173  * which are of much more manageable number.
   174  */
   175 typedef short color;		/* colors of characters */
   176 typedef int pcolor;		/* what color promotes to */
   177 #define	COLORLESS	(-1)	/* impossible color */
   178 #define	WHITE		0	/* default color, parent of all others */
   179 
   180 
   181 
   182 /*
   183  * A colormap is a tree -- more precisely, a DAG -- indexed at each level
   184  * by a byt of the chr, to map the chr to a color efficiently.  Because
   185  * lower sections of the tree can be shared, it can exploit the usual
   186  * sparseness of such a mapping table.  The tree is always NBYTS levels
   187  * deep (in the past it was shallower during construction but was "filled"
   188  * to full depth at the end of that); areas that are unaltered as yet point
   189  * to "fill blocks" which are entirely WHITE in color.
   190  */
   191 
   192 /* the tree itself */
   193 struct colors {
   194 	color ccolor[BYTTAB];
   195 };
   196 struct ptrs {
   197 	union tree *pptr[BYTTAB];
   198 };
   199 union tree {
   200 	struct colors colors;
   201 	struct ptrs ptrs;
   202 };
   203 #define	tcolor	colors.ccolor
   204 #define	tptr	ptrs.pptr
   205 
   206 /* internal per-color structure for the color machinery */
   207 struct colordesc {
   208 	uchr nchrs;		/* number of chars of this color */
   209 	color sub;		/* open subcolor (if any); free chain ptr */
   210 #		define	NOSUB	COLORLESS
   211 	struct arc *arcs;	/* color chain */
   212 	int flags;
   213 #		define	FREECOL	01	/* currently free */
   214 #		define	PSEUDO	02	/* pseudocolor, no real chars */
   215 #	define	UNUSEDCOLOR(cd)	((cd)->flags&FREECOL)
   216 	union tree *block;	/* block of solid color, if any */
   217 };
   218 
   219 /* the color map itself */
   220 struct colormap {
   221 	int magic;
   222 #		define	CMMAGIC	0x876
   223 	struct vars *v;			/* for compile error reporting */
   224 	size_t ncds;			/* number of colordescs */
   225 	size_t max;			/* highest in use */
   226 	color free;			/* beginning of free chain (if non-0) */
   227 	struct colordesc *cd;
   228 #	define	CDEND(cm)	(&(cm)->cd[(cm)->max + 1])
   229 #		define	NINLINECDS	((size_t)10)
   230 	struct colordesc cdspace[NINLINECDS];
   231 	union tree tree[NBYTS];		/* tree top, plus fill blocks */
   232 };
   233 
   234 /* optimization magic to do fast chr->color mapping */
   235 #define	B0(c)	((c) & BYTMASK)
   236 #define	B1(c)	(((c)>>BYTBITS) & BYTMASK)
   237 #define	B2(c)	(((c)>>(2*BYTBITS)) & BYTMASK)
   238 #define	B3(c)	(((c)>>(3*BYTBITS)) & BYTMASK)
   239 #if NBYTS == 1
   240 #define	GETCOLOR(cm, c)	((cm)->tree->tcolor[B0(c)])
   241 #endif
   242 /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
   243 #if NBYTS == 2
   244 #define	GETCOLOR(cm, c)	((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
   245 #endif
   246 #if NBYTS == 4
   247 #define	GETCOLOR(cm, c)	((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
   248 #endif
   249 
   250 
   251 
   252 /*
   253  * Interface definitions for locale-interface functions in locale.c.
   254  * Multi-character collating elements (MCCEs) cause most of the trouble.
   255  */
   256 struct cvec {
   257 	int nchrs;		/* number of chrs */
   258 	int chrspace;		/* number of chrs possible */
   259 	chr *chrs;		/* pointer to vector of chrs */
   260 	int nranges;		/* number of ranges (chr pairs) */
   261 	int rangespace;		/* number of chrs possible */
   262 	chr *ranges;		/* pointer to vector of chr pairs */
   263 	int nmcces;		/* number of MCCEs */
   264 	int mccespace;		/* number of MCCEs possible */
   265 	int nmccechrs;		/* number of chrs used for MCCEs */
   266 	chr *mcces[1];		/* pointers to 0-terminated MCCEs */
   267 				/* and both batches of chrs are on the end */
   268 };
   269 
   270 /* caution:  this value cannot be changed easily */
   271 #define	MAXMCCE	2		/* length of longest MCCE */
   272 
   273 
   274 
   275 /*
   276  * definitions for NFA internal representation
   277  *
   278  * Having a "from" pointer within each arc may seem redundant, but it
   279  * saves a lot of hassle.
   280  */
   281 struct state;
   282 
   283 struct arc {
   284 	int type;
   285 #		define	ARCFREE	'\0'
   286 	color co;
   287 	struct state *from;	/* where it's from (and contained within) */
   288 	struct state *to;	/* where it's to */
   289 	struct arc *outchain;	/* *from's outs chain or free chain */
   290 #	define	freechain	outchain
   291 	struct arc *inchain;	/* *to's ins chain */
   292 	struct arc *colorchain;	/* color's arc chain */
   293 };
   294 
   295 struct arcbatch {		/* for bulk allocation of arcs */
   296 	struct arcbatch *next;
   297 #	define	ABSIZE	10
   298 	struct arc a[ABSIZE];
   299 };
   300 
   301 struct state {
   302 	int no;
   303 #		define	FREESTATE	(-1)
   304 	char flag;		/* marks special states */
   305 	int nins;		/* number of inarcs */
   306 	struct arc *ins;	/* chain of inarcs */
   307 	int nouts;		/* number of outarcs */
   308 	struct arc *outs;	/* chain of outarcs */
   309 	struct arc *free;	/* chain of free arcs */
   310 	struct state *tmp;	/* temporary for traversal algorithms */
   311 	struct state *next;	/* chain for traversing all */
   312 	struct state *prev;	/* back chain */
   313 	struct arcbatch oas;	/* first arcbatch, avoid malloc in easy case */
   314 	int noas;		/* number of arcs used in first arcbatch */
   315 };
   316 
   317 struct nfa {
   318 	struct state *pre;	/* pre-initial state */
   319 	struct state *init;	/* initial state */
   320 	struct state *final;	/* final state */
   321 	struct state *post;	/* post-final state */
   322 	int nstates;		/* for numbering states */
   323 	struct state *states;	/* state-chain header */
   324 	struct state *slast;	/* tail of the chain */
   325 	struct state *free;	/* free list */
   326 	struct colormap *cm;	/* the color map */
   327 	color bos[2];		/* colors, if any, assigned to BOS and BOL */
   328 	color eos[2];		/* colors, if any, assigned to EOS and EOL */
   329 	struct vars *v;		/* simplifies compile error reporting */
   330 	struct nfa *parent;	/* parent NFA, if any */
   331 };
   332 
   333 
   334 
   335 /*
   336  * definitions for compacted NFA
   337  */
   338 struct carc {
   339 	color co;		/* COLORLESS is list terminator */
   340 	int to;			/* state number */
   341 };
   342 
   343 struct cnfa {
   344 	int nstates;		/* number of states */
   345 	int ncolors;		/* number of colors */
   346 	int flags;
   347 #		define	HASLACONS	01	/* uses lookahead constraints */
   348 	int pre;		/* setup state number */
   349 	int post;		/* teardown state number */
   350 	color bos[2];		/* colors, if any, assigned to BOS and BOL */
   351 	color eos[2];		/* colors, if any, assigned to EOS and EOL */
   352 	struct carc **states;	/* vector of pointers to outarc lists */
   353 	struct carc *arcs;	/* the area for the lists */
   354 };
   355 #define	ZAPCNFA(cnfa)	((cnfa).nstates = 0)
   356 #define	NULLCNFA(cnfa)	((cnfa).nstates == 0)
   357 
   358 
   359 
   360 /*
   361  * subexpression tree
   362  */
   363 struct subre {
   364 	char op;		/* '|', '.' (concat), 'b' (backref), '(', '=' */
   365 	char flags;
   366 #		define	LONGER	01	/* prefers longer match */
   367 #		define	SHORTER	02	/* prefers shorter match */
   368 #		define	MIXED	04	/* mixed preference below */
   369 #		define	CAP	010	/* capturing parens below */
   370 #		define	BACKR	020	/* back reference below */
   371 #		define	INUSE	0100	/* in use in final tree */
   372 #		define	LOCAL	03	/* bits which may not propagate up */
   373 #		define	LMIX(f)	((f)<<2)	/* LONGER -> MIXED */
   374 #		define	SMIX(f)	((f)<<1)	/* SHORTER -> MIXED */
   375 #		define	UP(f)	(((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
   376 #		define	MESSY(f)	((f)&(MIXED|CAP|BACKR))
   377 #		define	PREF(f)	((f)&LOCAL)
   378 #		define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
   379 #		define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
   380 	short retry;		/* index into retry memory */
   381 	int subno;		/* subexpression number (for 'b' and '(') */
   382 	short min;		/* min repetitions, for backref only */
   383 	short max;		/* max repetitions, for backref only */
   384 	struct subre *left;	/* left child, if any (also freelist chain) */
   385 	struct subre *right;	/* right child, if any */
   386 	struct state *begin;	/* outarcs from here... */
   387 	struct state *end;	/* ...ending in inarcs here */
   388 	struct cnfa cnfa;	/* compacted NFA, if any */
   389 	struct subre *chain;	/* for bookkeeping and error cleanup */
   390 };
   391 
   392 
   393 
   394 /*
   395  * table of function pointers for generic manipulation functions
   396  * A regex_t's re_fns points to one of these.
   397  */
   398 struct fns {
   399 	VOID FUNCPTR(free, (regex_t *));
   400 };
   401 
   402 
   403 
   404 /*
   405  * the insides of a regex_t, hidden behind a void *
   406  */
   407 struct guts {
   408 	int magic;
   409 #		define	GUTSMAGIC	0xfed9
   410 	int cflags;		/* copy of compile flags */
   411 	long info;		/* copy of re_info */
   412 	size_t nsub;		/* copy of re_nsub */
   413 	struct subre *tree;
   414 	struct cnfa search;	/* for fast preliminary search */
   415 	int ntree;
   416 	struct colormap cmap;
   417 	int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
   418 	struct subre *lacons;	/* lookahead-constraint vector */
   419 	int nlacons;		/* size of lacons */
   420 };