os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/regguts.h
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
sl@0
     1
/*
sl@0
     2
 * Internal interface definitions, etc., for the reg package
sl@0
     3
 *
sl@0
     4
 * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
sl@0
     5
 * 
sl@0
     6
 * Development of this software was funded, in part, by Cray Research Inc.,
sl@0
     7
 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
sl@0
     8
 * Corporation, none of whom are responsible for the results.  The author
sl@0
     9
 * thanks all of them. 
sl@0
    10
 * 
sl@0
    11
 * Redistribution and use in source and binary forms -- with or without
sl@0
    12
 * modification -- are permitted for any purpose, provided that
sl@0
    13
 * redistributions in source form retain this entire copyright notice and
sl@0
    14
 * indicate the origin and nature of any modifications.
sl@0
    15
 * 
sl@0
    16
 * I'd appreciate being given credit for this package in the documentation
sl@0
    17
 * of software which uses it, but that is not a requirement.
sl@0
    18
 * 
sl@0
    19
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
sl@0
    20
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
sl@0
    21
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
sl@0
    22
 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
sl@0
    23
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
sl@0
    24
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
sl@0
    25
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
sl@0
    26
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
sl@0
    27
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
sl@0
    28
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sl@0
    29
 */
sl@0
    30
sl@0
    31
sl@0
    32
sl@0
    33
/*
sl@0
    34
 * Environmental customization.  It should not (I hope) be necessary to
sl@0
    35
 * alter the file you are now reading -- regcustom.h should handle it all,
sl@0
    36
 * given care here and elsewhere.
sl@0
    37
 */
sl@0
    38
#include "regcustom.h"
sl@0
    39
sl@0
    40
sl@0
    41
sl@0
    42
/*
sl@0
    43
 * Things that regcustom.h might override.
sl@0
    44
 */
sl@0
    45
sl@0
    46
/* standard header files (NULL is a reasonable indicator for them) */
sl@0
    47
#ifndef NULL
sl@0
    48
#include <stdio.h>
sl@0
    49
#include <stdlib.h>
sl@0
    50
#include <ctype.h>
sl@0
    51
#include <limits.h>
sl@0
    52
#include <string.h>
sl@0
    53
#endif
sl@0
    54
sl@0
    55
/* assertions */
sl@0
    56
#ifndef assert
sl@0
    57
#	ifndef REG_DEBUG
sl@0
    58
#	ifndef NDEBUG
sl@0
    59
#	define	NDEBUG		/* no assertions */
sl@0
    60
#	endif
sl@0
    61
#	endif
sl@0
    62
#include <assert.h>
sl@0
    63
#endif
sl@0
    64
sl@0
    65
/* voids */
sl@0
    66
#ifndef VOID
sl@0
    67
#define	VOID	void			/* for function return values */
sl@0
    68
#endif
sl@0
    69
#ifndef DISCARD
sl@0
    70
#define	DISCARD	VOID			/* for throwing values away */
sl@0
    71
#endif
sl@0
    72
#ifndef PVOID
sl@0
    73
#define	PVOID	VOID *			/* generic pointer */
sl@0
    74
#endif
sl@0
    75
#ifndef VS
sl@0
    76
#define	VS(x)	((PVOID)(x))		/* cast something to generic ptr */
sl@0
    77
#endif
sl@0
    78
#ifndef NOPARMS
sl@0
    79
#define	NOPARMS	VOID			/* for empty parm lists */
sl@0
    80
#endif
sl@0
    81
sl@0
    82
/* const */
sl@0
    83
#ifndef CONST
sl@0
    84
#define	CONST	const			/* for old compilers, might be empty */
sl@0
    85
#endif
sl@0
    86
sl@0
    87
/* function-pointer declarator */
sl@0
    88
#ifndef FUNCPTR
sl@0
    89
#if __STDC__ >= 1
sl@0
    90
#define	FUNCPTR(name, args)	(*name)args
sl@0
    91
#else
sl@0
    92
#define	FUNCPTR(name, args)	(*name)()
sl@0
    93
#endif
sl@0
    94
#endif
sl@0
    95
sl@0
    96
/* memory allocation */
sl@0
    97
#ifndef MALLOC
sl@0
    98
#define	MALLOC(n)	malloc(n)
sl@0
    99
#endif
sl@0
   100
#ifndef REALLOC
sl@0
   101
#define	REALLOC(p, n)	realloc(VS(p), n)
sl@0
   102
#endif
sl@0
   103
#ifndef FREE
sl@0
   104
#define	FREE(p)		free(VS(p))
sl@0
   105
#endif
sl@0
   106
sl@0
   107
/* want size of a char in bits, and max value in bounded quantifiers */
sl@0
   108
#ifndef CHAR_BIT
sl@0
   109
#include <limits.h>
sl@0
   110
#endif
sl@0
   111
#ifndef _POSIX2_RE_DUP_MAX
sl@0
   112
#define	_POSIX2_RE_DUP_MAX	255	/* normally from <limits.h> */
sl@0
   113
#endif
sl@0
   114
sl@0
   115
sl@0
   116
sl@0
   117
/*
sl@0
   118
 * misc
sl@0
   119
 */
sl@0
   120
sl@0
   121
#define	NOTREACHED	0
sl@0
   122
#define	xxx		1
sl@0
   123
sl@0
   124
#define	DUPMAX	_POSIX2_RE_DUP_MAX
sl@0
   125
#define	INFINITY	(DUPMAX+1)
sl@0
   126
sl@0
   127
#define	REMAGIC	0xfed7		/* magic number for main struct */
sl@0
   128
sl@0
   129
sl@0
   130
sl@0
   131
/*
sl@0
   132
 * debugging facilities
sl@0
   133
 */
sl@0
   134
#ifdef REG_DEBUG
sl@0
   135
/* FDEBUG does finite-state tracing */
sl@0
   136
#define	FDEBUG(arglist)	{ if (v->eflags&REG_FTRACE) printf arglist; }
sl@0
   137
/* MDEBUG does higher-level tracing */
sl@0
   138
#define	MDEBUG(arglist)	{ if (v->eflags&REG_MTRACE) printf arglist; }
sl@0
   139
#else
sl@0
   140
#define	FDEBUG(arglist)	{}
sl@0
   141
#define	MDEBUG(arglist)	{}
sl@0
   142
#endif
sl@0
   143
sl@0
   144
sl@0
   145
sl@0
   146
/*
sl@0
   147
 * bitmap manipulation
sl@0
   148
 */
sl@0
   149
#define	UBITS	(CHAR_BIT * sizeof(unsigned))
sl@0
   150
#define	BSET(uv, sn)	((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
sl@0
   151
#define	ISBSET(uv, sn)	((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
sl@0
   152
sl@0
   153
sl@0
   154
sl@0
   155
/*
sl@0
   156
 * We dissect a chr into byts for colormap table indexing.  Here we define
sl@0
   157
 * a byt, which will be the same as a byte on most machines...  The exact
sl@0
   158
 * size of a byt is not critical, but about 8 bits is good, and extraction
sl@0
   159
 * of 8-bit chunks is sometimes especially fast.
sl@0
   160
 */
sl@0
   161
#ifndef BYTBITS
sl@0
   162
#define	BYTBITS	8		/* bits in a byt */
sl@0
   163
#endif
sl@0
   164
#define	BYTTAB	(1<<BYTBITS)	/* size of table with one entry per byt value */
sl@0
   165
#define	BYTMASK	(BYTTAB-1)	/* bit mask for byt */
sl@0
   166
#define	NBYTS	((CHRBITS+BYTBITS-1)/BYTBITS)
sl@0
   167
/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
sl@0
   168
sl@0
   169
sl@0
   170
sl@0
   171
/*
sl@0
   172
 * As soon as possible, we map chrs into equivalence classes -- "colors" --
sl@0
   173
 * which are of much more manageable number.
sl@0
   174
 */
sl@0
   175
typedef short color;		/* colors of characters */
sl@0
   176
typedef int pcolor;		/* what color promotes to */
sl@0
   177
#define	COLORLESS	(-1)	/* impossible color */
sl@0
   178
#define	WHITE		0	/* default color, parent of all others */
sl@0
   179
sl@0
   180
sl@0
   181
sl@0
   182
/*
sl@0
   183
 * A colormap is a tree -- more precisely, a DAG -- indexed at each level
sl@0
   184
 * by a byt of the chr, to map the chr to a color efficiently.  Because
sl@0
   185
 * lower sections of the tree can be shared, it can exploit the usual
sl@0
   186
 * sparseness of such a mapping table.  The tree is always NBYTS levels
sl@0
   187
 * deep (in the past it was shallower during construction but was "filled"
sl@0
   188
 * to full depth at the end of that); areas that are unaltered as yet point
sl@0
   189
 * to "fill blocks" which are entirely WHITE in color.
sl@0
   190
 */
sl@0
   191
sl@0
   192
/* the tree itself */
sl@0
   193
struct colors {
sl@0
   194
	color ccolor[BYTTAB];
sl@0
   195
};
sl@0
   196
struct ptrs {
sl@0
   197
	union tree *pptr[BYTTAB];
sl@0
   198
};
sl@0
   199
union tree {
sl@0
   200
	struct colors colors;
sl@0
   201
	struct ptrs ptrs;
sl@0
   202
};
sl@0
   203
#define	tcolor	colors.ccolor
sl@0
   204
#define	tptr	ptrs.pptr
sl@0
   205
sl@0
   206
/* internal per-color structure for the color machinery */
sl@0
   207
struct colordesc {
sl@0
   208
	uchr nchrs;		/* number of chars of this color */
sl@0
   209
	color sub;		/* open subcolor (if any); free chain ptr */
sl@0
   210
#		define	NOSUB	COLORLESS
sl@0
   211
	struct arc *arcs;	/* color chain */
sl@0
   212
	int flags;
sl@0
   213
#		define	FREECOL	01	/* currently free */
sl@0
   214
#		define	PSEUDO	02	/* pseudocolor, no real chars */
sl@0
   215
#	define	UNUSEDCOLOR(cd)	((cd)->flags&FREECOL)
sl@0
   216
	union tree *block;	/* block of solid color, if any */
sl@0
   217
};
sl@0
   218
sl@0
   219
/* the color map itself */
sl@0
   220
struct colormap {
sl@0
   221
	int magic;
sl@0
   222
#		define	CMMAGIC	0x876
sl@0
   223
	struct vars *v;			/* for compile error reporting */
sl@0
   224
	size_t ncds;			/* number of colordescs */
sl@0
   225
	size_t max;			/* highest in use */
sl@0
   226
	color free;			/* beginning of free chain (if non-0) */
sl@0
   227
	struct colordesc *cd;
sl@0
   228
#	define	CDEND(cm)	(&(cm)->cd[(cm)->max + 1])
sl@0
   229
#		define	NINLINECDS	((size_t)10)
sl@0
   230
	struct colordesc cdspace[NINLINECDS];
sl@0
   231
	union tree tree[NBYTS];		/* tree top, plus fill blocks */
sl@0
   232
};
sl@0
   233
sl@0
   234
/* optimization magic to do fast chr->color mapping */
sl@0
   235
#define	B0(c)	((c) & BYTMASK)
sl@0
   236
#define	B1(c)	(((c)>>BYTBITS) & BYTMASK)
sl@0
   237
#define	B2(c)	(((c)>>(2*BYTBITS)) & BYTMASK)
sl@0
   238
#define	B3(c)	(((c)>>(3*BYTBITS)) & BYTMASK)
sl@0
   239
#if NBYTS == 1
sl@0
   240
#define	GETCOLOR(cm, c)	((cm)->tree->tcolor[B0(c)])
sl@0
   241
#endif
sl@0
   242
/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
sl@0
   243
#if NBYTS == 2
sl@0
   244
#define	GETCOLOR(cm, c)	((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
sl@0
   245
#endif
sl@0
   246
#if NBYTS == 4
sl@0
   247
#define	GETCOLOR(cm, c)	((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
sl@0
   248
#endif
sl@0
   249
sl@0
   250
sl@0
   251
sl@0
   252
/*
sl@0
   253
 * Interface definitions for locale-interface functions in locale.c.
sl@0
   254
 * Multi-character collating elements (MCCEs) cause most of the trouble.
sl@0
   255
 */
sl@0
   256
struct cvec {
sl@0
   257
	int nchrs;		/* number of chrs */
sl@0
   258
	int chrspace;		/* number of chrs possible */
sl@0
   259
	chr *chrs;		/* pointer to vector of chrs */
sl@0
   260
	int nranges;		/* number of ranges (chr pairs) */
sl@0
   261
	int rangespace;		/* number of chrs possible */
sl@0
   262
	chr *ranges;		/* pointer to vector of chr pairs */
sl@0
   263
	int nmcces;		/* number of MCCEs */
sl@0
   264
	int mccespace;		/* number of MCCEs possible */
sl@0
   265
	int nmccechrs;		/* number of chrs used for MCCEs */
sl@0
   266
	chr *mcces[1];		/* pointers to 0-terminated MCCEs */
sl@0
   267
				/* and both batches of chrs are on the end */
sl@0
   268
};
sl@0
   269
sl@0
   270
/* caution:  this value cannot be changed easily */
sl@0
   271
#define	MAXMCCE	2		/* length of longest MCCE */
sl@0
   272
sl@0
   273
sl@0
   274
sl@0
   275
/*
sl@0
   276
 * definitions for NFA internal representation
sl@0
   277
 *
sl@0
   278
 * Having a "from" pointer within each arc may seem redundant, but it
sl@0
   279
 * saves a lot of hassle.
sl@0
   280
 */
sl@0
   281
struct state;
sl@0
   282
sl@0
   283
struct arc {
sl@0
   284
	int type;
sl@0
   285
#		define	ARCFREE	'\0'
sl@0
   286
	color co;
sl@0
   287
	struct state *from;	/* where it's from (and contained within) */
sl@0
   288
	struct state *to;	/* where it's to */
sl@0
   289
	struct arc *outchain;	/* *from's outs chain or free chain */
sl@0
   290
#	define	freechain	outchain
sl@0
   291
	struct arc *inchain;	/* *to's ins chain */
sl@0
   292
	struct arc *colorchain;	/* color's arc chain */
sl@0
   293
};
sl@0
   294
sl@0
   295
struct arcbatch {		/* for bulk allocation of arcs */
sl@0
   296
	struct arcbatch *next;
sl@0
   297
#	define	ABSIZE	10
sl@0
   298
	struct arc a[ABSIZE];
sl@0
   299
};
sl@0
   300
sl@0
   301
struct state {
sl@0
   302
	int no;
sl@0
   303
#		define	FREESTATE	(-1)
sl@0
   304
	char flag;		/* marks special states */
sl@0
   305
	int nins;		/* number of inarcs */
sl@0
   306
	struct arc *ins;	/* chain of inarcs */
sl@0
   307
	int nouts;		/* number of outarcs */
sl@0
   308
	struct arc *outs;	/* chain of outarcs */
sl@0
   309
	struct arc *free;	/* chain of free arcs */
sl@0
   310
	struct state *tmp;	/* temporary for traversal algorithms */
sl@0
   311
	struct state *next;	/* chain for traversing all */
sl@0
   312
	struct state *prev;	/* back chain */
sl@0
   313
	struct arcbatch oas;	/* first arcbatch, avoid malloc in easy case */
sl@0
   314
	int noas;		/* number of arcs used in first arcbatch */
sl@0
   315
};
sl@0
   316
sl@0
   317
struct nfa {
sl@0
   318
	struct state *pre;	/* pre-initial state */
sl@0
   319
	struct state *init;	/* initial state */
sl@0
   320
	struct state *final;	/* final state */
sl@0
   321
	struct state *post;	/* post-final state */
sl@0
   322
	int nstates;		/* for numbering states */
sl@0
   323
	struct state *states;	/* state-chain header */
sl@0
   324
	struct state *slast;	/* tail of the chain */
sl@0
   325
	struct state *free;	/* free list */
sl@0
   326
	struct colormap *cm;	/* the color map */
sl@0
   327
	color bos[2];		/* colors, if any, assigned to BOS and BOL */
sl@0
   328
	color eos[2];		/* colors, if any, assigned to EOS and EOL */
sl@0
   329
	struct vars *v;		/* simplifies compile error reporting */
sl@0
   330
	struct nfa *parent;	/* parent NFA, if any */
sl@0
   331
};
sl@0
   332
sl@0
   333
sl@0
   334
sl@0
   335
/*
sl@0
   336
 * definitions for compacted NFA
sl@0
   337
 */
sl@0
   338
struct carc {
sl@0
   339
	color co;		/* COLORLESS is list terminator */
sl@0
   340
	int to;			/* state number */
sl@0
   341
};
sl@0
   342
sl@0
   343
struct cnfa {
sl@0
   344
	int nstates;		/* number of states */
sl@0
   345
	int ncolors;		/* number of colors */
sl@0
   346
	int flags;
sl@0
   347
#		define	HASLACONS	01	/* uses lookahead constraints */
sl@0
   348
	int pre;		/* setup state number */
sl@0
   349
	int post;		/* teardown state number */
sl@0
   350
	color bos[2];		/* colors, if any, assigned to BOS and BOL */
sl@0
   351
	color eos[2];		/* colors, if any, assigned to EOS and EOL */
sl@0
   352
	struct carc **states;	/* vector of pointers to outarc lists */
sl@0
   353
	struct carc *arcs;	/* the area for the lists */
sl@0
   354
};
sl@0
   355
#define	ZAPCNFA(cnfa)	((cnfa).nstates = 0)
sl@0
   356
#define	NULLCNFA(cnfa)	((cnfa).nstates == 0)
sl@0
   357
sl@0
   358
sl@0
   359
sl@0
   360
/*
sl@0
   361
 * subexpression tree
sl@0
   362
 */
sl@0
   363
struct subre {
sl@0
   364
	char op;		/* '|', '.' (concat), 'b' (backref), '(', '=' */
sl@0
   365
	char flags;
sl@0
   366
#		define	LONGER	01	/* prefers longer match */
sl@0
   367
#		define	SHORTER	02	/* prefers shorter match */
sl@0
   368
#		define	MIXED	04	/* mixed preference below */
sl@0
   369
#		define	CAP	010	/* capturing parens below */
sl@0
   370
#		define	BACKR	020	/* back reference below */
sl@0
   371
#		define	INUSE	0100	/* in use in final tree */
sl@0
   372
#		define	LOCAL	03	/* bits which may not propagate up */
sl@0
   373
#		define	LMIX(f)	((f)<<2)	/* LONGER -> MIXED */
sl@0
   374
#		define	SMIX(f)	((f)<<1)	/* SHORTER -> MIXED */
sl@0
   375
#		define	UP(f)	(((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
sl@0
   376
#		define	MESSY(f)	((f)&(MIXED|CAP|BACKR))
sl@0
   377
#		define	PREF(f)	((f)&LOCAL)
sl@0
   378
#		define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
sl@0
   379
#		define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
sl@0
   380
	short retry;		/* index into retry memory */
sl@0
   381
	int subno;		/* subexpression number (for 'b' and '(') */
sl@0
   382
	short min;		/* min repetitions, for backref only */
sl@0
   383
	short max;		/* max repetitions, for backref only */
sl@0
   384
	struct subre *left;	/* left child, if any (also freelist chain) */
sl@0
   385
	struct subre *right;	/* right child, if any */
sl@0
   386
	struct state *begin;	/* outarcs from here... */
sl@0
   387
	struct state *end;	/* ...ending in inarcs here */
sl@0
   388
	struct cnfa cnfa;	/* compacted NFA, if any */
sl@0
   389
	struct subre *chain;	/* for bookkeeping and error cleanup */
sl@0
   390
};
sl@0
   391
sl@0
   392
sl@0
   393
sl@0
   394
/*
sl@0
   395
 * table of function pointers for generic manipulation functions
sl@0
   396
 * A regex_t's re_fns points to one of these.
sl@0
   397
 */
sl@0
   398
struct fns {
sl@0
   399
	VOID FUNCPTR(free, (regex_t *));
sl@0
   400
};
sl@0
   401
sl@0
   402
sl@0
   403
sl@0
   404
/*
sl@0
   405
 * the insides of a regex_t, hidden behind a void *
sl@0
   406
 */
sl@0
   407
struct guts {
sl@0
   408
	int magic;
sl@0
   409
#		define	GUTSMAGIC	0xfed9
sl@0
   410
	int cflags;		/* copy of compile flags */
sl@0
   411
	long info;		/* copy of re_info */
sl@0
   412
	size_t nsub;		/* copy of re_nsub */
sl@0
   413
	struct subre *tree;
sl@0
   414
	struct cnfa search;	/* for fast preliminary search */
sl@0
   415
	int ntree;
sl@0
   416
	struct colormap cmap;
sl@0
   417
	int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
sl@0
   418
	struct subre *lacons;	/* lookahead-constraint vector */
sl@0
   419
	int nlacons;		/* size of lacons */
sl@0
   420
};