sl@0: # reg.test --
sl@0: #
sl@0: # This file contains a collection of tests for one or more of the Tcl
sl@0: # built-in commands.  Sourcing this file into Tcl runs the tests and
sl@0: # generates output for errors.  No output means no errors were found.
sl@0: # (Don't panic if you are seeing this as part of the reg distribution
sl@0: # and aren't using Tcl -- reg's own regression tester also knows how
sl@0: # to read this file, ignoring the Tcl-isms.)
sl@0: #
sl@0: # Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
sl@0: #
sl@0: # RCS: @(#) $Id: reg.test,v 1.16.2.3 2004/11/27 05:44:13 dgp Exp $
sl@0: 
sl@0: if {[lsearch [namespace children] ::tcltest] == -1} {
sl@0:     package require tcltest 2
sl@0:     namespace import -force ::tcltest::*
sl@0: }
sl@0: 
sl@0: # All tests require the testregexp command, return if this
sl@0: # command doesn't exist
sl@0: 
sl@0: ::tcltest::testConstraint testregexp \
sl@0: 	[expr {[info commands testregexp] != {}}]
sl@0: ::tcltest::testConstraint localeRegexp 0
sl@0: 
sl@0: # This file uses some custom procedures, defined below, for regexp regression
sl@0: # testing.  The name of the procedure indicates the general nature of the
sl@0: # test:
sl@0: #	e	compile error expected
sl@0: #	f	match failure expected
sl@0: #	m	successful match
sl@0: #	i	successful match with -indices (used in checking things like
sl@0: #		nonparticipating subexpressions)
sl@0: #	p	unsuccessful match with -indices (!!) (used in checking
sl@0: #		partial-match reporting)
sl@0: # There is also "doing" which sets up title and major test number for each
sl@0: # block of tests.
sl@0: 
sl@0: # The first 3 arguments are constant:  a minor number (which often gets
sl@0: # a letter or two suffixed to it internally), some flags, and the RE itself.
sl@0: # For e, the remaining argument is the name of the compile error expected,
sl@0: # less the leading "REG_".  For the rest, the next argument is the string
sl@0: # to try the match against.  Remaining arguments are the substring expected
sl@0: # to be matched, and any substrings expected to be matched by subexpressions.
sl@0: # (For f, these arguments are optional, and if present are ignored except
sl@0: # that they indicate how many subexpressions should be present in the RE.)
sl@0: # It is an error for the number of subexpression arguments to be wrong.
sl@0: # Cases involving nonparticipating subexpressions, checking where empty
sl@0: # substrings are located, etc. should be done using i and p.
sl@0: 
sl@0: # The flag characters are complex and a bit eclectic.  Generally speaking, 
sl@0: # lowercase letters are compile options, uppercase are expected re_info
sl@0: # bits, and nonalphabetics are match options, controls for how the test is 
sl@0: # run, or testing options.  The one small surprise is that AREs are the
sl@0: # default, and you must explicitly request lesser flavors of RE.  The flags
sl@0: # are as follows.  It is admitted that some are not very mnemonic.
sl@0: # There are some others which are purely debugging tools and are not
sl@0: # useful in this file.
sl@0: #
sl@0: #	-	no-op (placeholder)
sl@0: #	+	provide fake xy equivalence class and ch collating element
sl@0: #	%	force small state-set cache in matcher (to test cache replace)
sl@0: #	^	beginning of string is not beginning of line
sl@0: #	$	end of string is not end of line
sl@0: #	*	test is Unicode-specific, needs big character set
sl@0: #
sl@0: #	&	test as both ARE and BRE
sl@0: #	b	BRE
sl@0: #	e	ERE
sl@0: #	a	turn advanced-features bit on (error unless ERE already)
sl@0: #	q	literal string, no metacharacters at all
sl@0: #
sl@0: #	i	case-independent matching
sl@0: #	o	("opaque") no subexpression capture
sl@0: #	p	newlines are half-magic, excluded from . and [^ only
sl@0: #	w	newlines are half-magic, significant to ^ and $ only
sl@0: #	n	newlines are fully magic, both effects
sl@0: #	x	expanded RE syntax
sl@0: #	t	incomplete-match reporting
sl@0: #
sl@0: #	A	backslash-_a_lphanumeric seen
sl@0: #	B	ERE/ARE literal-_b_race heuristic used
sl@0: #	E	backslash (_e_scape) seen within []
sl@0: #	H	looka_h_ead constraint seen
sl@0: #	I	_i_mpossible to match
sl@0: #	L	_l_ocale-specific construct seen
sl@0: #	M	unportable (_m_achine-specific) construct seen
sl@0: #	N	RE can match empty (_n_ull) string
sl@0: #	P	non-_P_OSIX construct seen
sl@0: #	Q	{} _q_uantifier seen
sl@0: #	R	back _r_eference seen
sl@0: #	S	POSIX-un_s_pecified syntax seen
sl@0: #	T	prefers shortest (_t_iny)
sl@0: #	U	saw original-POSIX botch:  unmatched right paren in ERE (_u_gh)
sl@0: 
sl@0: # The one area we can't easily test is memory-allocation failures (which
sl@0: # are hard to provoke on command).  Embedded NULs also are not tested at
sl@0: # the moment, but this is a historical accident which should be fixed.
sl@0: 
sl@0: 
sl@0: 
sl@0: # test procedures and related
sl@0: 
sl@0: set ask "about"
sl@0: set xflags "xflags"
sl@0: set testbypassed 0
sl@0: 
sl@0: # re_info abbreviation mapping table
sl@0: set infonames(A) "REG_UBSALNUM"
sl@0: set infonames(B) "REG_UBRACES"
sl@0: set infonames(E) "REG_UBBS"
sl@0: set infonames(H) "REG_ULOOKAHEAD"
sl@0: set infonames(I) "REG_UIMPOSSIBLE"
sl@0: set infonames(L) "REG_ULOCALE"
sl@0: set infonames(M) "REG_UUNPORT"
sl@0: set infonames(N) "REG_UEMPTYMATCH"
sl@0: set infonames(P) "REG_UNONPOSIX"
sl@0: set infonames(Q) "REG_UBOUNDS"
sl@0: set infonames(R) "REG_UBACKREF"
sl@0: set infonames(S) "REG_UUNSPEC"
sl@0: set infonames(T) "REG_USHORTEST"
sl@0: set infonames(U) "REG_UPBOTCH"
sl@0: set infonameorder "RHQBAUEPSMLNIT"	;# must match bit order, lsb first
sl@0: 
sl@0: # set major test number and description
sl@0: proc doing {major desc} {
sl@0: 	global prefix description testbypassed
sl@0: 
sl@0: 	if {$testbypassed != 0} {
sl@0: 		puts stdout "!!! bypassed $testbypassed tests in\
sl@0: 					 $prefix, `$description'"
sl@0: 	}
sl@0: 
sl@0: 	set prefix reg-$major
sl@0: 	set description "reg $desc"
sl@0: 	set testbypassed 0
sl@0: }
sl@0: 
sl@0: # build test number (internal)
sl@0: proc tno {testid} {
sl@0: 	return [join $testid .]
sl@0: }
sl@0: 
sl@0: # build description, with possible modifiers (internal)
sl@0: proc desc {testid} {
sl@0: 	global description
sl@0: 
sl@0: 	set d $description
sl@0: 	if {[llength $testid] > 1} {
sl@0: 		set d "([lreplace $testid 0 0]) $d"
sl@0: 	}
sl@0: 	return $d
sl@0: }
sl@0: 
sl@0: # build trailing options and flags argument from a flags string (internal)
sl@0: proc flags {fl} {
sl@0: 	global xflags
sl@0: 
sl@0: 	set args [list]
sl@0: 	set flags ""
sl@0: 	foreach f [split $fl ""] {
sl@0: 		switch -exact -- $f {
sl@0: 		"i" { lappend args "-nocase" }
sl@0: 		"x" { lappend args "-expanded" }
sl@0: 		"n" { lappend args "-line" }
sl@0: 		"p" { lappend args "-linestop" }
sl@0: 		"w" { lappend args "-lineanchor" }
sl@0: 		"-" { }
sl@0: 		default { append flags $f }
sl@0: 		}
sl@0: 	}
sl@0: 	if {[string compare $flags ""] != 0} {
sl@0: 		lappend args -$xflags $flags
sl@0: 	}
sl@0: 	return $args
sl@0: }
sl@0: 
sl@0: # build info-flags list from a flags string (internal)
sl@0: proc infoflags {fl} {
sl@0: 	global infonames infonameorder
sl@0: 
sl@0: 	set ret [list]
sl@0: 	foreach f [split $infonameorder ""] {
sl@0: 		if {[string first $f $fl] >= 0} {
sl@0: 			lappend ret $infonames($f)
sl@0: 		}
sl@0: 	}
sl@0: 	return $ret
sl@0: }
sl@0: 
sl@0: # compilation error expected
sl@0: proc e {testid flags re err} {
sl@0: 	global prefix ask errorCode
sl@0: 
sl@0: 	# Tcl locale stuff doesn't do the ch/xy test fakery yet
sl@0: 	if {[string first "+" $flags] >= 0} {
sl@0: 	    # This will register as a skipped test
sl@0: 	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
sl@0: 	    return
sl@0: 	}
sl@0: 
sl@0: 	# if &, test as both ARE and BRE
sl@0: 	set amp [string first "&" $flags]
sl@0: 	if {$amp >= 0} {
sl@0: 		set f [string range $flags 0 [expr $amp - 1]]
sl@0: 		append f [string range $flags [expr $amp + 1] end]
sl@0: 		e [linsert $testid end ARE] ${f} $re $err
sl@0: 		e [linsert $testid end BRE] ${f}b $re $err
sl@0: 		return
sl@0: 	}
sl@0: 
sl@0: 	set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
sl@0: 	set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
sl@0: 	test $prefix.[tno $testid] [desc $testid] \
sl@0: 		{testregexp} $run [list 1 REG_$err]
sl@0: }
sl@0: 
sl@0: # match failure expected
sl@0: proc f {testid flags re target args} {
sl@0: 	global prefix description ask
sl@0: 
sl@0: 	# Tcl locale stuff doesn't do the ch/xy test fakery yet
sl@0: 	if {[string first "+" $flags] >= 0} {
sl@0: 	    # This will register as a skipped test
sl@0: 	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
sl@0: 	    return
sl@0: 	}
sl@0: 
sl@0: 	# if &, test as both ARE and BRE
sl@0: 	set amp [string first "&" $flags]
sl@0: 	if {$amp >= 0} {
sl@0: 		set f [string range $flags 0 [expr $amp - 1]]
sl@0: 		append f [string range $flags [expr $amp + 1] end]
sl@0: 		eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
sl@0: 								$target]
sl@0: 		eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
sl@0: 								$target]
sl@0: 		return
sl@0: 	}
sl@0: 
sl@0: 	set f [flags $flags]
sl@0: 	set infoflags [infoflags $flags]
sl@0: 	set ccmd [concat [list testregexp -$ask] $f [list $re]]
sl@0: 	set nsub [expr [llength $args] - 1]
sl@0: 	if {$nsub == -1} {
sl@0: 		# didn't tell us number of subexps
sl@0: 		set ccmd "lreplace \[$ccmd\] 0 0"
sl@0: 		set info [list $infoflags]
sl@0: 	} else {
sl@0: 		set info [list $nsub $infoflags]
sl@0: 	}
sl@0: 	lappend testid "compile"
sl@0: 	test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
sl@0: 
sl@0: 	set testid [lreplace $testid end end "execute"]
sl@0: 	set ecmd [concat [list testregexp] $f [list $re $target]]
sl@0: 	test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
sl@0: }
sl@0: 
sl@0: # match expected, internal routine that does the work
sl@0: # parameters like the "real" routines except they don't have "opts",
sl@0: #  which is a possibly-empty list of switches for the regexp match attempt
sl@0: # The ! flag is used to indicate expected match failure (for REG_EXPECT,
sl@0: #  which wants argument testing even in the event of failure).
sl@0: proc matchexpected {opts testid flags re target args} {
sl@0: 	global prefix description ask regBug
sl@0: 
sl@0:     if {[info exists regBug] && $regBug} {
sl@0: 	# This will register as a skipped test
sl@0: 	test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
sl@0: 	return
sl@0:     }
sl@0: 
sl@0: 	# Tcl locale stuff doesn't do the ch/xy test fakery yet
sl@0: 	if {[string first "+" $flags] >= 0} {
sl@0: 	    # This will register as a skipped test
sl@0: 	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
sl@0: 	    return
sl@0: 	}
sl@0: 
sl@0: 	# if &, test as both BRE and ARE
sl@0: 	set amp [string first "&" $flags]
sl@0: 	if {$amp >= 0} {
sl@0: 		set f [string range $flags 0 [expr $amp - 1]]
sl@0: 		append f [string range $flags [expr $amp + 1] end]
sl@0: 		eval [concat [list matchexpected $opts \
sl@0: 			[linsert $testid end ARE] ${f} $re $target] $args]
sl@0: 		eval [concat [list matchexpected $opts \
sl@0: 			[linsert $testid end BRE] ${f}b $re $target] $args]
sl@0: 		return
sl@0: 	}
sl@0: 
sl@0: 	set f [flags $flags]
sl@0: 	set infoflags [infoflags $flags]
sl@0: 	set ccmd [concat [list testregexp -$ask] $f [list $re]]
sl@0: 	set ecmd [concat [list testregexp] $opts $f [list $re $target]]
sl@0: 
sl@0: 	set nsub [expr [llength $args] - 1]
sl@0: 	set names [list]
sl@0: 	set refs ""
sl@0: 	for {set i 0} {$i <= $nsub} {incr i} {
sl@0: 		if {$i == 0} {
sl@0: 			set name match
sl@0: 		} else {
sl@0: 			set name sub$i
sl@0: 		}
sl@0: 		lappend names $name
sl@0: 		append refs " \$$name"
sl@0: 		set $name ""
sl@0: 	}
sl@0: 	if {[string first "o" $flags] >= 0} {	;# REG_NOSUB kludge
sl@0: 		set nsub 0		;# unsigned value cannot be -1
sl@0: 	}
sl@0: 	if {[string first "t" $flags] >= 0} {	;# REG_EXPECT
sl@0: 		incr nsub -1		;# the extra does not count
sl@0: 	}
sl@0: 	set ecmd [concat $ecmd $names]
sl@0: 	set erun "list \[$ecmd\] $refs"
sl@0: 	set retcode [list 1]
sl@0: 	if {[string first "!" $flags] >= 0} {
sl@0: 		set retcode [list 0]
sl@0: 	}
sl@0: 	set result [concat $retcode $args]
sl@0: 
sl@0: 	set info [list $nsub $infoflags]
sl@0: 	lappend testid "compile"
sl@0: 	test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
sl@0: 	set testid [lreplace $testid end end "execute"]
sl@0: 	test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
sl@0: }
sl@0: 
sl@0: # match expected (no missing, empty, or ambiguous submatches)
sl@0: # m testno flags re target mat submat ...
sl@0: proc m {args} {
sl@0: 	eval matchexpected [linsert $args 0 [list]]
sl@0: }
sl@0: 
sl@0: # match expected (full fanciness)
sl@0: # i testno flags re target mat submat ...
sl@0: proc i {args} {
sl@0: 	eval matchexpected [linsert $args 0 [list "-indices"]]
sl@0: }
sl@0: 
sl@0: # partial match expected
sl@0: # p testno flags re target mat "" ...
sl@0: # Quirk:  number of ""s must be one more than number of subREs.
sl@0: proc p {args} {
sl@0: 	set f [lindex $args 1]			;# add ! flag
sl@0: 	set args [lreplace $args 1 1 "!$f"]
sl@0: 	eval matchexpected [linsert $args 0 [list "-indices"]]
sl@0: }
sl@0: 
sl@0: # test is a knownBug
sl@0: proc knownBug {args} {
sl@0:     set ::regBug 1
sl@0:     uplevel #0 $args
sl@0:     set ::regBug 0
sl@0: }
sl@0: 
sl@0: 
sl@0: 
sl@0: # the tests themselves
sl@0: 
sl@0: 
sl@0: 
sl@0: # support functions and preliminary misc.
sl@0: # This is sensitive to changes in message wording, but we really have to
sl@0: # test the code->message expansion at least once.
sl@0: test reg-0.1 "regexp error reporting" {
sl@0: 	list [catch {regexp (*) ign} msg] $msg
sl@0: } {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 1 "basic sanity checks"
sl@0: m  1	&	abc		abc	abc
sl@0: f  2	&	abc		def
sl@0: m  3	&	abc		xyabxabce	abc
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 2 "invalid option combinations"
sl@0: e  1	qe	a		INVARG
sl@0: e  2	qa	a		INVARG
sl@0: e  3	qx	a		INVARG
sl@0: e  4	qn	a		INVARG
sl@0: e  5	ba	a		INVARG
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 3 "basic syntax"
sl@0: i  1	&NS	""		a	{0 -1}
sl@0: m  2	NS	a|		a	a
sl@0: m  3	-	a|b		a	a
sl@0: m  4	-	a|b		b	b
sl@0: m  5	NS	a||b		b	b
sl@0: m  6	&	ab		ab	ab
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 4 "parentheses"
sl@0: m  1	-	(a)e		ae	ae	a
sl@0: m  2	o	(a)e		ae
sl@0: m  3	b	{\(a\)b}	ab	ab	a
sl@0: m  4	-	a((b)c)		abc	abc	bc	b
sl@0: m  5	-	a(b)(c)		abc	abc	b	c
sl@0: e  6	-	a(b		EPAREN
sl@0: e  7	b	{a\(b}		EPAREN
sl@0: # sigh, we blew it on the specs here... someday this will be fixed in POSIX,
sl@0: #  but meanwhile, it's fixed in AREs
sl@0: m  8	eU	a)b		a)b	a)b
sl@0: e  9	-	a)b		EPAREN
sl@0: e 10	b	{a\)b}		EPAREN
sl@0: m 11	P	a(?:b)c		abc	abc
sl@0: e 12	e	a(?:b)c		BADRPT
sl@0: i 13	S	a()b		ab	{0 1}	{1 0}
sl@0: m 14	SP	a(?:)b		ab	ab
sl@0: i 15	S	a(|b)c		ac	{0 1}	{1 0}
sl@0: m 16	S	a(b|)c		abc	abc	b
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 5 "simple one-char matching"
sl@0: # general case of brackets done later
sl@0: m  1	&	a.b		axb	axb
sl@0: f  2	&n	"a.b"		"a\nb"
sl@0: m  3	&	{a[bc]d}	abd	abd
sl@0: m  4	&	{a[bc]d}	acd	acd
sl@0: f  5	&	{a[bc]d}	aed
sl@0: f  6	&	{a[^bc]d}	abd
sl@0: m  7	&	{a[^bc]d}	aed	aed
sl@0: f  8	&p	"a\[^bc]d"	"a\nd"
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 6 "context-dependent syntax"
sl@0: # plus odds and ends
sl@0: e  1	-	*		BADRPT
sl@0: m  2	b	*		*	*
sl@0: m  3	b	{\(*\)}		*	*	*
sl@0: e  4	-	(*)		BADRPT
sl@0: m  5	b	^*		*	*
sl@0: e  6	-	^*		BADRPT
sl@0: f  7	&	^b		^b
sl@0: m  8	b	x^		x^	x^
sl@0: f  9	I	x^		x
sl@0: m 10	n	"\n^"		"x\nb"	"\n"
sl@0: f 11	bS	{\(^b\)}	^b
sl@0: m 12	-	(^b)		b	b	b
sl@0: m 13	&	{x$}		x	x
sl@0: m 14	bS	{\(x$\)}	x	x	x
sl@0: m 15	-	{(x$)}		x	x	x
sl@0: m 16	b	{x$y}		"x\$y"	"x\$y"
sl@0: f 17	I	{x$y}		xy
sl@0: m 18	n	"x\$\n"		"x\n"	"x\n"
sl@0: e 19	-	+		BADRPT
sl@0: e 20	-	?		BADRPT
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 7 "simple quantifiers"
sl@0: m  1	&N	a*		aa	aa
sl@0: i  2	&N	a*		b	{0 -1}
sl@0: m  3	-	a+		aa	aa
sl@0: m  4	-	a?b		ab	ab
sl@0: m  5	-	a?b		b	b
sl@0: e  6	-	**		BADRPT
sl@0: m  7	bN	**		***	***
sl@0: e  8	&	a**		BADRPT
sl@0: e  9	&	a**b		BADRPT
sl@0: e 10	&	***		BADRPT
sl@0: e 11	-	a++		BADRPT
sl@0: e 12	-	a?+		BADRPT
sl@0: e 13	-	a?*		BADRPT
sl@0: e 14	-	a+*		BADRPT
sl@0: e 15	-	a*+		BADRPT
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 8 "braces"
sl@0: m  1	NQ	"a{0,1}"	""	""
sl@0: m  2	NQ	"a{0,1}"	ac	a
sl@0: e  3	-	"a{1,0}"	BADBR
sl@0: e  4	-	"a{1,2,3}"	BADBR
sl@0: e  5	-	"a{257}"	BADBR
sl@0: e  6	-	"a{1000}"	BADBR
sl@0: e  7	-	"a{1"		EBRACE
sl@0: e  8	-	"a{1n}"		BADBR
sl@0: m  9	BS	"a{b"		"a\{b"	"a\{b"
sl@0: m 10	BS	"a{"		"a\{"	"a\{"
sl@0: m 11	bQ	"a\\{0,1\\}b"	cb	b
sl@0: e 12	b	"a\\{0,1"	EBRACE
sl@0: e 13	-	"a{0,1\\"	BADBR
sl@0: m 14	Q	"a{0}b"		ab	b
sl@0: m 15	Q	"a{0,0}b"	ab	b
sl@0: m 16	Q	"a{0,1}b"	ab	ab
sl@0: m 17	Q	"a{0,2}b"	b	b
sl@0: m 18	Q	"a{0,2}b"	aab	aab
sl@0: m 19	Q	"a{0,}b"	aab	aab
sl@0: m 20	Q	"a{1,1}b"	aab	ab
sl@0: m 21	Q	"a{1,3}b"	aaaab	aaab
sl@0: f 22	Q	"a{1,3}b"	b
sl@0: m 23	Q	"a{1,}b"	aab	aab
sl@0: f 24	Q	"a{2,3}b"	ab
sl@0: m 25	Q	"a{2,3}b"	aaaab	aaab
sl@0: f 26	Q	"a{2,}b"	ab
sl@0: m 27	Q	"a{2,}b"	aaaab	aaaab
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 9 "brackets"
sl@0: m  1	&	{a[bc]}		ac	ac
sl@0: m  2	&	{a[-]}		a-	a-
sl@0: m  3	&	{a[[.-.]]}	a-	a-
sl@0: m  4	&L	{a[[.zero.]]}	a0	a0
sl@0: m  5	&LM	{a[[.zero.]-9]}	a2	a2
sl@0: m  6	&M	{a[0-[.9.]]}	a2	a2
sl@0: m  7	&+L	{a[[=x=]]}	ax	ax
sl@0: m  8	&+L	{a[[=x=]]}	ay	ay
sl@0: f  9	&+L	{a[[=x=]]}	az
sl@0: e 10	&	{a[0-[=x=]]}	ERANGE
sl@0: m 11	&L	{a[[:digit:]]}	a0	a0
sl@0: e 12	&	{a[[:woopsie:]]}	ECTYPE
sl@0: f 13	&L	{a[[:digit:]]}	ab
sl@0: e 14	&	{a[0-[:digit:]]}	ERANGE
sl@0: m 15	&LP	{[[:<:]]a}	a	a
sl@0: m 16	&LP	{a[[:>:]]}	a	a
sl@0: e 17	&	{a[[..]]b}	ECOLLATE
sl@0: e 18	&	{a[[==]]b}	ECOLLATE
sl@0: e 19	&	{a[[::]]b}	ECTYPE
sl@0: e 20	&	{a[[.a}		EBRACK
sl@0: e 21	&	{a[[=a}		EBRACK
sl@0: e 22	&	{a[[:a}		EBRACK
sl@0: e 23	&	{a[}		EBRACK
sl@0: e 24	&	{a[b}		EBRACK
sl@0: e 25	&	{a[b-}		EBRACK
sl@0: e 26	&	{a[b-c}		EBRACK
sl@0: m 27	&M	{a[b-c]}	ab	ab
sl@0: m 28	&	{a[b-b]}	ab	ab
sl@0: m 29	&M	{a[1-2]}	a2	a2
sl@0: e 30	&	{a[c-b]}	ERANGE
sl@0: e 31	&	{a[a-b-c]}	ERANGE
sl@0: m 32	&M	{a[--?]b}	a?b	a?b
sl@0: m 33	&	{a[---]b}	a-b	a-b
sl@0: m 34	&	{a[]b]c}	a]c	a]c
sl@0: m 35	EP	{a[\]]b}	a]b	a]b
sl@0: f 36	bE	{a[\]]b}	a]b
sl@0: m 37	bE	{a[\]]b}	"a\\]b"	"a\\]b"
sl@0: m 38	eE	{a[\]]b}	"a\\]b"	"a\\]b"
sl@0: m 39	EP	{a[\\]b}	"a\\b"	"a\\b"
sl@0: m 40	eE	{a[\\]b}	"a\\b"	"a\\b"
sl@0: m 41	bE	{a[\\]b}	"a\\b"	"a\\b"
sl@0: e 42	-	{a[\Z]b}	EESCAPE
sl@0: m 43	&	{a[[b]c}	"a\[c"	"a\[c"
sl@0: m 44	EMP*	{a[\u00fe-\u0507][\u00ff-\u0300]b} \
sl@0: 			"a\u0102\u02ffb"	"a\u0102\u02ffb"
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 10 "anchors and newlines"
sl@0: m  1	&	^a		a	a
sl@0: f  2	&^	^a		a
sl@0: i  3	&N	^		a	{0 -1}
sl@0: i  4	&	{a$}		aba	{2 2}
sl@0: f  5	{&$}	{a$}		a
sl@0: i  6	&N	{$}		ab	{2 1}
sl@0: m  7	&n	^a		a	a
sl@0: m  8	&n	"^a"		"b\na"	"a"
sl@0: i  9	&w	"^a"		"a\na"	{0 0}
sl@0: i 10	&n^	"^a"		"a\na"	{2 2}
sl@0: m 11	&n	{a$}		a	a
sl@0: m 12	&n	"a\$"		"a\nb"	"a"
sl@0: i 13	&n	"a\$"		"a\na"	{0 0}
sl@0: i 14	N	^^		a	{0 -1}
sl@0: m 15	b	^^		^	^
sl@0: i 16	N	{$$}		a	{1 0}
sl@0: m 17	b	{$$}		"\$"	"\$"
sl@0: m 18	&N	{^$}		""	""
sl@0: f 19	&N	{^$}		a
sl@0: i 20	&nN	"^\$"		"a\n\nb"	{2 1}
sl@0: m 21	N	{$^}		""	""
sl@0: m 22	b	{$^}		"\$^"	"\$^"
sl@0: m 23	P	{\Aa}		a	a
sl@0: m 24	^P	{\Aa}		a	a
sl@0: f 25	^nP	{\Aa}		"b\na"
sl@0: m 26	P	{a\Z}		a	a
sl@0: m 27	{$P}	{a\Z}		a	a
sl@0: f 28	{$nP}	{a\Z}		"a\nb"
sl@0: e 29	-	^*		BADRPT
sl@0: e 30	-	{$*}		BADRPT
sl@0: e 31	-	{\A*}		BADRPT
sl@0: e 32	-	{\Z*}		BADRPT
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 11 "boundary constraints"
sl@0: m  1	&LP	{[[:<:]]a}	a	a
sl@0: m  2	&LP	{[[:<:]]a}	-a	a
sl@0: f  3	&LP	{[[:<:]]a}	ba
sl@0: m  4	&LP	{a[[:>:]]}	a	a
sl@0: m  5	&LP	{a[[:>:]]}	a-	a
sl@0: f  6	&LP	{a[[:>:]]}	ab
sl@0: m  7	bLP	{\<a}		a	a
sl@0: f  8	bLP	{\<a}		ba
sl@0: m  9	bLP	{a\>}		a	a
sl@0: f 10	bLP	{a\>}		ab
sl@0: m 11	LP	{\ya}		a	a
sl@0: f 12	LP	{\ya}		ba
sl@0: m 13	LP	{a\y}		a	a
sl@0: f 14	LP	{a\y}		ab
sl@0: m 15	LP	{a\Y}		ab	a
sl@0: f 16	LP	{a\Y}		a-
sl@0: f 17	LP	{a\Y}		a
sl@0: f 18	LP	{-\Y}		-a
sl@0: m 19	LP	{-\Y}		-%	-
sl@0: f 20	LP	{\Y-}		a-
sl@0: e 21	-	{[[:<:]]*}	BADRPT
sl@0: e 22	-	{[[:>:]]*}	BADRPT
sl@0: e 23	b	{\<*}		BADRPT
sl@0: e 24	b	{\>*}		BADRPT
sl@0: e 25	-	{\y*}		BADRPT
sl@0: e 26	-	{\Y*}		BADRPT
sl@0: m 27	LP	{\ma}		a	a
sl@0: f 28	LP	{\ma}		ba
sl@0: m 29	LP	{a\M}		a	a
sl@0: f 30	LP	{a\M}		ab
sl@0: f 31	ILP	{\Ma}		a
sl@0: f 32	ILP	{a\m}		a
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 12 "character classes"
sl@0: m  1	LP	{a\db}		a0b	a0b
sl@0: f  2	LP	{a\db}		axb
sl@0: f  3	LP	{a\Db}		a0b
sl@0: m  4	LP	{a\Db}		axb	axb
sl@0: m  5	LP	"a\\sb"		"a b"	"a b"
sl@0: m  6	LP	"a\\sb"		"a\tb"	"a\tb"
sl@0: m  7	LP	"a\\sb"		"a\nb"	"a\nb"
sl@0: f  8	LP	{a\sb}		axb
sl@0: m  9	LP	{a\Sb}		axb	axb
sl@0: f 10	LP	"a\\Sb"		"a b"
sl@0: m 11	LP	{a\wb}		axb	axb
sl@0: f 12	LP	{a\wb}		a-b
sl@0: f 13	LP	{a\Wb}		axb
sl@0: m 14	LP	{a\Wb}		a-b	a-b
sl@0: m 15	LP	{\y\w+z\y}	adze-guz	guz
sl@0: m 16	LPE	{a[\d]b}	a1b	a1b
sl@0: m 17	LPE	"a\[\\s]b"	"a b"	"a b"
sl@0: m 18	LPE	{a[\w]b}	axb	axb
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 13 "escapes"
sl@0: e  1	&	"a\\"		EESCAPE
sl@0: m  2	-	{a\<b}		a<b	a<b
sl@0: m  3	e	{a\<b}		a<b	a<b
sl@0: m  4	bAS	{a\wb}		awb	awb
sl@0: m  5	eAS	{a\wb}		awb	awb
sl@0: m  6	PL	"a\\ab"		"a\007b"	"a\007b"
sl@0: m  7	P	"a\\bb"		"a\bb"	"a\bb"
sl@0: m  8	P	{a\Bb}		"a\\b"	"a\\b"
sl@0: m  9	MP	"a\\chb"	"a\bb"	"a\bb"
sl@0: m 10	MP	"a\\cHb"	"a\bb"	"a\bb"
sl@0: m 11	LMP	"a\\e"		"a\033"	"a\033"
sl@0: m 12	P	"a\\fb"		"a\fb"	"a\fb"
sl@0: m 13	P	"a\\nb"		"a\nb"	"a\nb"
sl@0: m 14	P	"a\\rb"		"a\rb"	"a\rb"
sl@0: m 15	P	"a\\tb"		"a\tb"	"a\tb"
sl@0: m 16	P	"a\\u0008x"	"a\bx"	"a\bx"
sl@0: e 17	-	{a\u008x}	EESCAPE
sl@0: m 18	P	"a\\u00088x"	"a\b8x"	"a\b8x"
sl@0: m 19	P	"a\\U00000008x"	"a\bx"	"a\bx"
sl@0: e 20	-	{a\U0000008x}	EESCAPE
sl@0: m 21	P	"a\\vb"		"a\vb"	"a\vb"
sl@0: m 22	MP	"a\\x08x"	"a\bx"	"a\bx"
sl@0: e 23	-	{a\xq}		EESCAPE
sl@0: m 24	MP	"a\\x0008x"	"a\bx"	"a\bx"
sl@0: e 25	-	{a\z}		EESCAPE
sl@0: m 26	MP	"a\\010b"	"a\bb"	"a\bb"
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 14 "back references"
sl@0: # ugh
sl@0: m  1	RP	{a(b*)c\1}	abbcbb	abbcbb	bb
sl@0: m  2	RP	{a(b*)c\1}	ac	ac	""
sl@0: f  3	RP	{a(b*)c\1}	abbcb
sl@0: m  4	RP	{a(b*)\1}	abbcbb	abb	b
sl@0: m  5	RP	{a(b|bb)\1}	abbcbb	abb	b
sl@0: m  6	RP	{a([bc])\1}	abb	abb	b
sl@0: f  7	RP	{a([bc])\1}	abc
sl@0: m  8	RP	{a([bc])\1}	abcabb	abb	b
sl@0: f  9	RP	{a([bc])*\1}	abc
sl@0: f 10	RP	{a([bc])\1}	abB
sl@0: m 11	iRP	{a([bc])\1}	abB	abB	b
sl@0: m 12	RP	{a([bc])\1+}	abbb	abbb	b
sl@0: m 13	QRP	"a(\[bc])\\1{3,4}"	abbbb	abbbb	b
sl@0: f 14	QRP	"a(\[bc])\\1{3,4}"	abbb
sl@0: m 15	RP	{a([bc])\1*}	abbb	abbb	b
sl@0: m 16	RP	{a([bc])\1*}	ab	ab	b
sl@0: m 17	RP	{a([bc])(\1*)}	ab	ab	b	""
sl@0: e 18	-	{a((b)\1)}	ESUBREG
sl@0: e 19	-	{a(b)c\2}	ESUBREG
sl@0: m 20	bR	{a\(b*\)c\1}	abbcbb	abbcbb	bb
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 15 "octal escapes vs back references"
sl@0: # initial zero is always octal
sl@0: m  1	MP	"a\\010b"	"a\bb"	"a\bb"
sl@0: m  2	MP	"a\\0070b"	"a\0070b"	"a\0070b"
sl@0: m  3	MP	"a\\07b"	"a\007b"	"a\007b"
sl@0: m  4	MP	"a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c"	"abbbbbbbbbb\007c" \
sl@0: 	"abbbbbbbbbb\007c"	"b"	"b"	"b"	"b"	"b"	"b" \
sl@0: 	"b"	"b"	"b"	"b"
sl@0: # a single digit is always a backref
sl@0: e  5	-	{a\7b}		ESUBREG
sl@0: # otherwise it's a backref only if within range (barf!)
sl@0: m  6	MP	"a\\10b"	"a\bb"	"a\bb"
sl@0: m  7	MP	{a\101b}	aAb	aAb
sl@0: m  8	RP	{a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c}	abbbbbbbbbbbc \
sl@0: 	abbbbbbbbbbbc	b	b	b	b	b	b	b \
sl@0: 	b	b	b
sl@0: # but we're fussy about border cases -- guys who want octal should use the zero
sl@0: e  9	-	{a((((((((((b\10))))))))))c}	ESUBREG
sl@0: # BREs don't have octal, EREs don't have backrefs
sl@0: m 10	MP	"a\\12b"	"a\nb"	"a\nb"
sl@0: e 11	b	{a\12b}		ESUBREG
sl@0: m 12	eAS	{a\12b}		a12b	a12b
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 16 "expanded syntax"
sl@0: m  1	xP	"a b c"		"abc"	"abc"
sl@0: m  2	xP	"a b #oops\nc\td"	"abcd"	"abcd"
sl@0: m  3	x	"a\\ b\\\tc"	"a b\tc"	"a b\tc"
sl@0: m  4	xP	"a b\\#c"	"ab#c"	"ab#c"
sl@0: m  5	xP	"a b\[c d]e"	"ab e"	"ab e"
sl@0: m  6	xP	"a b\[c#d]e"	"ab#e"	"ab#e"
sl@0: m  7	xP	"a b\[c#d]e"	"abde"	"abde"
sl@0: m  8	xSPB	"ab{ d"		"ab\{d"	"ab\{d"
sl@0: m  9	xPQ	"ab{ 1 , 2 }c"	"abc"	"abc"
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 17 "misc syntax"
sl@0: m  1	P	a(?#comment)b	ab	ab
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 18 "unmatchable REs"
sl@0: f  1	I	a^b		ab
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 19 "case independence"
sl@0: m  1	&i	ab		Ab	Ab
sl@0: m  2	&i	{a[bc]}		aC	aC
sl@0: f  3	&i	{a[^bc]}	aB
sl@0: m  4	&iM	{a[b-d]}	aC	aC
sl@0: f  5	&iM	{a[^b-d]}	aC
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 20 "directors and embedded options"
sl@0: e  1	&	***?		BADPAT
sl@0: m  2	q	***?		***?	***?
sl@0: m  3	&P	***=a*b		a*b	a*b
sl@0: m  4	q	***=a*b		***=a*b	***=a*b
sl@0: m  5	bLP	{***:\w+}	ab	ab
sl@0: m  6	eLP	{***:\w+}	ab	ab
sl@0: e  7	&	***:***=a*b	BADRPT
sl@0: m  8	&P	***:(?b)a+b	a+b	a+b
sl@0: m  9	P	(?b)a+b		a+b	a+b
sl@0: e 10	e	{(?b)\w+}	BADRPT
sl@0: m 11	bAS	{(?b)\w+}	(?b)w+	(?b)w+
sl@0: m 12	iP	(?c)a		a	a
sl@0: f 13	iP	(?c)a		A
sl@0: m 14	APS	{(?e)\W+}	WW	WW
sl@0: m 15	P	(?i)a+		Aa	Aa
sl@0: f 16	P	"(?m)a.b"	"a\nb"
sl@0: m 17	P	"(?m)^b"	"a\nb"	"b"
sl@0: f 18	P	"(?n)a.b"	"a\nb"
sl@0: m 19	P	"(?n)^b"	"a\nb"	"b"
sl@0: f 20	P	"(?p)a.b"	"a\nb"
sl@0: f 21	P	"(?p)^b"	"a\nb"
sl@0: m 22	P	(?q)a+b		a+b	a+b
sl@0: m 23	nP	"(?s)a.b"	"a\nb"	"a\nb"
sl@0: m 24	xP	"(?t)a b"	"a b"	"a b"
sl@0: m 25	P	"(?w)a.b"	"a\nb"	"a\nb"
sl@0: m 26	P	"(?w)^b"	"a\nb"	"b"
sl@0: m 27	P	"(?x)a b"	"ab"	"ab"
sl@0: e 28	-	(?z)ab		BADOPT
sl@0: m 29	P	(?ici)a+	Aa	Aa
sl@0: e 30	P	(?i)(?q)a+	BADRPT
sl@0: m 31	P	(?q)(?i)a+	(?i)a+	(?i)a+
sl@0: m 32	P	(?qe)a+		a	a
sl@0: m 33	xP	"(?q)a b"	"a b"	"a b"
sl@0: m 34	P	"(?qx)a b"	"a b"	"a b"
sl@0: m 35	P	(?qi)ab		Ab	Ab
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 21 "capturing"
sl@0: m  1	-	a(b)c		abc	abc	b
sl@0: m  2	P	a(?:b)c		xabc	abc
sl@0: m  3	-	a((b))c		xabcy	abc	b	b
sl@0: m  4	P	a(?:(b))c	abcy	abc	b
sl@0: m  5	P	a((?:b))c	abc	abc	b
sl@0: m  6	P	a(?:(?:b))c	abc	abc
sl@0: i  7	Q	"a(b){0}c"	ac	{0 1}	{-1 -1}
sl@0: m  8	-	a(b)c(d)e	abcde	abcde	b	d
sl@0: m  9	-	(b)c(d)e	bcde	bcde	b	d
sl@0: m 10	-	a(b)(d)e	abde	abde	b	d
sl@0: m 11	-	a(b)c(d)	abcd	abcd	b	d
sl@0: m 12	-	(ab)(cd)	xabcdy	abcd	ab	cd
sl@0: m 13	-	a(b)?c		xabcy	abc	b
sl@0: i 14	-	a(b)?c		xacy	{1 2}	{-1 -1}
sl@0: m 15	-	a(b)?c(d)?e	xabcdey	abcde	b	d
sl@0: i 16	-	a(b)?c(d)?e	xacdey	{1 4}	{-1 -1}	{3 3}
sl@0: i 17	-	a(b)?c(d)?e	xabcey	{1 4}	{2 2}	{-1 -1}
sl@0: i 18	-	a(b)?c(d)?e	xacey	{1 3}	{-1 -1}	{-1 -1}
sl@0: m 19	-	a(b)*c		xabcy	abc	b
sl@0: i 20	-	a(b)*c		xabbbcy	{1 5}	{4 4}
sl@0: i 21	-	a(b)*c		xacy	{1 2}	{-1 -1}
sl@0: m 22	-	a(b*)c		xabbbcy	abbbc	bbb
sl@0: m 23	-	a(b*)c		xacy	ac	""
sl@0: f 24	-	a(b)+c		xacy
sl@0: m 25	-	a(b)+c		xabcy	abc	b
sl@0: i 26	-	a(b)+c		xabbbcy	{1 5}	{4 4}
sl@0: m 27	-	a(b+)c		xabbbcy	abbbc	bbb
sl@0: i 28	Q	"a(b){2,3}c"	xabbbcy	{1 5}	{4 4}
sl@0: i 29	Q	"a(b){2,3}c"	xabbcy	{1 4}	{3 3}
sl@0: f 30	Q	"a(b){2,3}c"	xabcy
sl@0: m 31	LP	"\\y(\\w+)\\y"	"-- abc-"	"abc"	"abc"
sl@0: m 32	-	a((b|c)d+)+	abacdbd	acdbd	bd	b
sl@0: m 33	N	(.*).*		abc	abc	abc
sl@0: m 34	N	(a*)*		bc	""	""
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 22 "multicharacter collating elements"
sl@0: # again ugh
sl@0: m  1	&+L	{a[c]e}		ace	ace
sl@0: f  2	&+IL	{a[c]h}		ach
sl@0: m  3	&+L	{a[[.ch.]]}	ach	ach
sl@0: f  4	&+L	{a[[.ch.]]}	ace
sl@0: m  5	&+L	{a[c[.ch.]]}	ac	ac
sl@0: m  6	&+L	{a[c[.ch.]]}	ace	ac
sl@0: m  7	&+L	{a[c[.ch.]]}	ache	ach
sl@0: f  8	&+L	{a[^c]e}	ace
sl@0: m  9	&+L	{a[^c]e}	abe	abe
sl@0: m 10	&+L	{a[^c]e}	ache	ache
sl@0: f 11	&+L	{a[^[.ch.]]}	ach
sl@0: m 12	&+L	{a[^[.ch.]]}	ace	ac
sl@0: m 13	&+L	{a[^[.ch.]]}	ac	ac
sl@0: m 14	&+L	{a[^[.ch.]]}	abe	ab
sl@0: f 15	&+L	{a[^c[.ch.]]}	ach
sl@0: f 16	&+L	{a[^c[.ch.]]}	ace
sl@0: f 17	&+L	{a[^c[.ch.]]}	ac
sl@0: m 18	&+L	{a[^c[.ch.]]}	abe	ab
sl@0: m 19	&+L	{a[^b]}		ac	ac
sl@0: m 20	&+L	{a[^b]}		ace	ac
sl@0: m 21	&+L	{a[^b]}		ach	ach
sl@0: f 22	&+L	{a[^b]}		abe
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 23 "lookahead constraints"
sl@0: m  1	HP	a(?=b)b*	ab	ab
sl@0: f  2	HP	a(?=b)b*	a
sl@0: m  3	HP	a(?=b)b*(?=c)c*	abc	abc
sl@0: f  4	HP	a(?=b)b*(?=c)c*	ab
sl@0: f  5	HP	a(?!b)b*	ab
sl@0: m  6	HP	a(?!b)b*	a	a
sl@0: m  7	HP	(?=b)b		b	b
sl@0: f  8	HP	(?=b)b		a
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 24 "non-greedy quantifiers"
sl@0: m  1	PT	ab+?		abb	ab
sl@0: m  2	PT	ab+?c		abbc	abbc
sl@0: m  3	PT	ab*?		abb	a
sl@0: m  4	PT	ab*?c		abbc	abbc
sl@0: m  5	PT	ab??		ab	a
sl@0: m  6	PT	ab??c		abc	abc
sl@0: m  7	PQT	"ab{2,4}?"	abbbb	abb
sl@0: m  8	PQT	"ab{2,4}?c"	abbbbc	abbbbc
sl@0: m  9	-	3z*		123zzzz456	3zzzz
sl@0: m 10	PT	3z*?		123zzzz456	3
sl@0: m 11	-	z*4		123zzzz456	zzzz4
sl@0: m 12	PT	z*?4		123zzzz456	zzzz4
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 25 "mixed quantifiers"
sl@0: # this is very incomplete as yet
sl@0: # should include |
sl@0: m  1	PNT	{^(.*?)(a*)$}	xyza	xyza	xyz	a
sl@0: m  2	PNT	{^(.*?)(a*)$}	xyzaa	xyzaa	xyz	aa
sl@0: m  3	PNT	{^(.*?)(a*)$}	xyz	xyz	xyz	""
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 26 "tricky cases"
sl@0: # attempts to trick the matcher into accepting a short match
sl@0: m  1	-	(week|wee)(night|knights)	weeknights	weeknights \
sl@0: 	wee	knights
sl@0: m  2	RP	{a(bc*).*\1}	abccbccb	abccbccb	b
sl@0: m  3	-	{a(b.[bc]*)+}	abcbd	abcbd	bd
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 27 "implementation misc."
sl@0: # duplicate arcs are suppressed
sl@0: m  1	P	a(?:b|b)c	abc	abc
sl@0: # make color/subcolor relationship go back and forth
sl@0: m  2	&	{[ab][ab][ab]}	aba	aba
sl@0: m  3	&	{[ab][ab][ab][ab][ab][ab][ab]}	abababa	abababa
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 28 "boundary busters etc."
sl@0: # color-descriptor allocation changes at 10
sl@0: m  1	&	abcdefghijkl	abcdefghijkl	abcdefghijkl
sl@0: # so does arc allocation
sl@0: m  2	P	a(?:b|c|d|e|f|g|h|i|j|k|l|m)n	agn	agn
sl@0: # subexpression tracking also at 10
sl@0: m  3	-	a(((((((((((((b)))))))))))))c	abc	abc	b	b	b	b	b	b	b	b	b	b	b	b	b
sl@0: # state-set handling changes slightly at unsigned size (might be 64...)
sl@0: # (also stresses arc allocation)
sl@0: m  4	Q	"ab{1,100}c"	abbc	abbc
sl@0: m  5	Q	"ab{1,100}c"	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
sl@0: 	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
sl@0: m  6	Q	"ab{1,100}c" \
sl@0: 	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
sl@0: 	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
sl@0: # force small cache and bust it, several ways
sl@0: m  7	LP	{\w+abcdefgh}	xyzabcdefgh	xyzabcdefgh
sl@0: m  8	%LP	{\w+abcdefgh}	xyzabcdefgh	xyzabcdefgh
sl@0: m  9	%LP	{\w+abcdefghijklmnopqrst}	xyzabcdefghijklmnopqrst \
sl@0: 	xyzabcdefghijklmnopqrst
sl@0: i 10	%LP	{\w+(abcdefgh)?}	xyz	{0 2}	{-1 -1}
sl@0: i 11	%LP	{\w+(abcdefgh)?}	xyzabcdefg	{0 9}	{-1 -1}
sl@0: i 12	%LP	{\w+(abcdefghijklmnopqrst)?}	xyzabcdefghijklmnopqrs \
sl@0: 	{0 21}	{-1 -1}
sl@0: 
sl@0: 
sl@0: 
sl@0: doing 29 "incomplete matches"
sl@0: p  1	t	def		abc	{3 2}	""
sl@0: p  2	t	bcd		abc	{1 2}	""
sl@0: p  3	t	abc		abab	{0 3}	""
sl@0: p  4	t	abc		abdab	{3 4}	""
sl@0: i  5	t	abc		abc	{0 2}	{0 2}
sl@0: i  6	t	abc		xyabc	{2 4}	{2 4}
sl@0: p  7	t	abc+		xyab	{2 3}	""
sl@0: i  8	t	abc+		xyabc	{2 4}	{2 4}
sl@0: knownBug i  9	t	abc+		xyabcd	{2 4}	{6 5}
sl@0: i  10	t	abc+		xyabcdd	{2 4}	{7 6}
sl@0: p  11	tPT	abc+?		xyab	{2 3}	""
sl@0: # the retain numbers in these two may look wrong, but they aren't
sl@0: i  12	tPT	abc+?		xyabc	{2 4}	{5 4}
sl@0: i  13	tPT	abc+?		xyabcc	{2 4}	{6 5}
sl@0: i  14	tPT	abc+?		xyabcd	{2 4}	{6 5}
sl@0: i  15	tPT	abc+?		xyabcdd	{2 4}	{7 6}
sl@0: i  16	t	abcd|bc		xyabc	{3 4}	{2 4}
sl@0: p  17	tn	.*k		"xx\nyyy"	{3 5}	""
sl@0: 
sl@0: 
sl@0: doing 30 "misc. oddities and old bugs"
sl@0: e  1	&	***		BADRPT
sl@0: m  2	N	a?b*		abb	abb
sl@0: m  3	N	a?b*		bb	bb
sl@0: m  4	&	a*b		aab	aab
sl@0: m  5	&	^a*b		aaaab	aaaab
sl@0: m  6	&M	{[0-6][1-2][0-3][0-6][1-6][0-6]}	010010	010010
sl@0: # temporary REG_BOSONLY kludge
sl@0: m  7	s	abc		abcd	abc
sl@0: f  8	s	abc		xabcd
sl@0: # back to normal stuff
sl@0: m  9	HLP	{(?n)^(?![t#])\S+}	"tk\n\n#\n#\nit0"	it0
sl@0: 
sl@0: 
sl@0: # flush any leftover complaints
sl@0: doing 0 "flush"
sl@0: 
sl@0: # Tests resulting from bugs reported by users
sl@0: test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
sl@0:     set str {2:::DebugWin32}
sl@0:     set re {([[:xdigit:]])([[:space:]]*)}
sl@0:     list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
sl@0:     # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
sl@0: } {1 2 2 {}}
sl@0: 
sl@0: test reg-32.1 {canmatch functionality -- at end} testregexp {
sl@0:     set pat {blah}
sl@0:     set line "asd asd"
sl@0:     # can match at the final d, if '%' follows
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 7}
sl@0: 
sl@0: test reg-32.2 {canmatch functionality -- at end} testregexp {
sl@0:     set pat {s%$}
sl@0:     set line "asd asd"
sl@0:     # can only match after the end of the string
sl@0:     set res [testregexp -xflags -- c $pat $line resvar] 
sl@0:     lappend res $resvar
sl@0: } {0 7}
sl@0: 
sl@0: test reg-32.3 {canmatch functionality -- not last char} testregexp {
sl@0:     set pat {[^d]%$}
sl@0:     set line "asd asd"
sl@0:     # can only match after the end of the string
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 7}
sl@0: 
sl@0: test reg-32.3.1 {canmatch functionality -- no match} testregexp {
sl@0:     set pat {\Zx}
sl@0:     set line "asd asd"
sl@0:     # can match the last char, if followed by x
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 -1}
sl@0: 
sl@0: test reg-32.4 {canmatch functionality -- last char} {knownBug} {
sl@0:     set pat {.x}
sl@0:     set line "asd asd"
sl@0:     # can match the last char, if followed by x
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 6}
sl@0: 
sl@0: test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
sl@0:     set pat {.x$}
sl@0:     set line "asd asd"
sl@0:     # can match the last char, if followed by x
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 6}
sl@0: 
sl@0: test reg-32.5 {canmatch functionality -- last char} {knownBug} {
sl@0:     set pat {.[^d]x$}
sl@0:     set line "asd asd"
sl@0:     # can match the last char, if followed by not-d and x.
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 6}
sl@0: 
sl@0: test reg-32.6 {canmatch functionality -- last char} {knownBug} {
sl@0:     set pat {[^a]%[^\r\n]*$}
sl@0:     set line "asd asd"
sl@0:     # can match at the final d, if '%' follows
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 6}
sl@0: 
sl@0: test reg-32.7 {canmatch functionality -- last char} {knownBug} {
sl@0:     set pat {[^a]%$}
sl@0:     set line "asd asd"
sl@0:     # can match at the final d, if '%' follows
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 6}
sl@0: 
sl@0: test reg-32.8 {canmatch functionality -- last char} {knownBug} {
sl@0:     set pat {[^x]%$}
sl@0:     set line "asd asd"
sl@0:     # can match at the final d, if '%' follows
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 6}
sl@0: 
sl@0: test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
sl@0:     set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$}
sl@0:     set line "asd asd"
sl@0:     # can match at the final d, if '%' follows
sl@0:     set res [testregexp -xflags -- c $pat $line resvar]
sl@0:     lappend res $resvar
sl@0: } {0 6}
sl@0: 
sl@0: # Tests reg-33.*: Checks for bug fixes
sl@0: 
sl@0: test reg-33.1 {Bug 230589} {
sl@0:     regexp {[ ]*(^|[^%])%V} "*%V2" m s
sl@0: } 1
sl@0: 
sl@0: test reg-33.2 {Bug 504785} {
sl@0:     regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la
sl@0: } {bbcos_001_c01.q1la bbcos _001_c01 q1 l}
sl@0: 
sl@0: test reg-33.3 {Bug 505048} {
sl@0:     regexp {\A\s*[^<]*\s*<([^>]+)>} a<a>
sl@0: } 1
sl@0: 
sl@0: test reg-33.4 {Bug 505048} {
sl@0:     regexp {\A\s*([^b]*)b} ab
sl@0: } 1
sl@0: 
sl@0: test reg-33.5 {Bug 505048} {
sl@0:     regexp {\A\s*[^b]*(b)} ab
sl@0: } 1
sl@0: 
sl@0: test reg-33.6 {Bug 505048} {
sl@0:     regexp {\A(\s*)[^b]*(b)} ab
sl@0: } 1
sl@0: 
sl@0: test reg-33.7 {Bug 505048} {
sl@0:     regexp {\A\s*[^b]*b} ab
sl@0: } 1
sl@0: 
sl@0: test reg-33.8 {Bug 505048} {
sl@0:     regexp -inline {\A\s*[^b]*b} ab
sl@0: } ab
sl@0: 
sl@0: test reg-33.9 {Bug 505048} {
sl@0:     regexp -indices -inline {\A\s*[^b]*b} ab
sl@0: } {{0 1}}
sl@0: 
sl@0: test reg-33.10 {Bug 840258} {
sl@0:     regsub {(^|\n)+\.*b} \n.b {} tmp
sl@0: } 1
sl@0: 
sl@0: test reg-33.11 {Bug 840258} {
sl@0:     regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
sl@0:             "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
sl@0: } 1
sl@0: 
sl@0: # cleanup
sl@0: ::tcltest::cleanupTests
sl@0: return