Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/tests/lib/libc Atf-ify the regex test
details: https://anonhg.NetBSD.org/src/rev/f13fdb882ff2
branches: trunk
changeset: 760589:f13fdb882ff2
user: pgoyette <pgoyette%NetBSD.org@localhost>
date: Sat Jan 08 18:10:31 2011 +0000
description:
Atf-ify the regex test
diffstat:
tests/lib/libc/Makefile | 4 +-
tests/lib/libc/regex/README | 33 +
tests/lib/libc/regex/data/anchor.in | 33 +
tests/lib/libc/regex/data/backref.in | 21 +
tests/lib/libc/regex/data/basic.in | 5 +
tests/lib/libc/regex/data/bracket.in | 55 ++
tests/lib/libc/regex/data/c_comments.in | 17 +
tests/lib/libc/regex/data/complex.in | 23 +
tests/lib/libc/regex/data/error.in | 30 +
tests/lib/libc/regex/data/meta.in | 21 +
tests/lib/libc/regex/data/nospec.in | 7 +
tests/lib/libc/regex/data/nul.in | 7 +
tests/lib/libc/regex/data/paren.in | 19 +
tests/lib/libc/regex/data/regress.in | 9 +
tests/lib/libc/regex/data/repet_bounded.in | 45 ++
tests/lib/libc/regex/data/repet_multi.in | 21 +
tests/lib/libc/regex/data/repet_ordinary.in | 10 +
tests/lib/libc/regex/data/startend.in | 9 +
tests/lib/libc/regex/data/subexp.in | 57 +++
tests/lib/libc/regex/data/subtle.in | 21 +
tests/lib/libc/regex/data/word_bound.in | 13 +
tests/lib/libc/regex/debug.c | 268 ++++++++++++++
tests/lib/libc/regex/main.c | 523 ++++++++++++++++++++++++++++
tests/lib/libc/regex/split.c | 344 ++++++++++++++++++
tests/lib/libc/regex/t_regex.awk | 57 +++
tests/lib/libc/regex/t_regex.in | 61 +++
tests/lib/libc/regex/test_regex.h | 44 ++
27 files changed, 1755 insertions(+), 2 deletions(-)
diffs (truncated from 1872 to 300 lines):
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/Makefile
--- a/tests/lib/libc/Makefile Sat Jan 08 18:07:14 2011 +0000
+++ b/tests/lib/libc/Makefile Sat Jan 08 18:10:31 2011 +0000
@@ -1,9 +1,9 @@
-# $NetBSD: Makefile,v 1.25 2011/01/08 06:59:37 pgoyette Exp $
+# $NetBSD: Makefile,v 1.26 2011/01/08 18:10:31 pgoyette Exp $
.include <bsd.own.mk>
.include <bsd.sys.mk>
-TESTS_SUBDIRS+= db gen hash ieeefp rpc setjmp stdlib stdio string ttyio
+TESTS_SUBDIRS+= db gen hash ieeefp regex rpc setjmp stdlib stdio string ttyio
.if ${HAS_SSP} == "yes"
TESTS_SUBDIRS+= ssp
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/README Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,33 @@
+regular expression test set
+Lines are at least three fields, separated by one or more tabs. "" stands
+for an empty field. First field is an RE. Second field is flags. If
+C flag given, regcomp() is expected to fail, and the third field is the
+error name (minus the leading REG_).
+
+Otherwise it is expected to succeed, and the third field is the string to
+try matching it against. If there is no fourth field, the match is
+expected to fail. If there is a fourth field, it is the substring that
+the RE is expected to match. If there is a fifth field, it is a comma-
+separated list of what the subexpressions should match, with - indicating
+no match for that one. In both the fourth and fifth fields, a (sub)field
+starting with @ indicates that the (sub)expression is expected to match
+a null string followed by the stuff after the @; this provides a way to
+test where null strings match. The character `N' in REs and strings
+is newline, `S' is space, `T' is tab, `Z' is NUL.
+
+The full list of flags:
+ - placeholder, does nothing
+ b RE is a BRE, not an ERE
+ & try it as both an ERE and a BRE
+ C regcomp() error expected, third field is error name
+ i REG_ICASE
+ m ("mundane") REG_NOSPEC
+ s REG_NOSUB (not really testable)
+ n REG_NEWLINE
+ ^ REG_NOTBOL
+ $ REG_NOTEOL
+ # REG_STARTEND (see below)
+ p REG_PEND
+
+For REG_STARTEND, the start/end offsets are those of the substring
+enclosed in ().
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/anchor.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/anchor.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,33 @@
+# anchoring and REG_NEWLINE
+^abc$ & abc abc
+a^b - a^b
+a^b b a^b a^b
+a$b - a$b
+a$b b a$b a$b
+^ & abc @abc
+$ & abc @
+^$ & "" @
+$^ - "" @
+\($\)\(^\) b "" @
+# stop retching, those are legitimate (although disgusting)
+^^ - "" @
+$$ - "" @
+b$ & abNc
+b$ &n abNc b
+^b$ & aNbNc
+^b$ &n aNbNc b
+^$ &n aNNb @Nb
+^$ n abc
+^$ n abcN @
+$^ n aNNb @Nb
+\($\)\(^\) bn aNNb @Nb
+^^ n^ aNNb @Nb
+$$ n aNNb @NN
+^a ^ a
+a$ $ a
+^a ^n aNb
+^b ^n aNb b
+a$ $n bNa
+b$ $n bNa b
+a*(^b$)c* - b b
+a*\(^b$\)c* b b b
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/backref.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/backref.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,21 @@
+# back references, ugh
+a\(b\)\2c bC ESUBREG
+a\(b\1\)c bC ESUBREG
+a\(b*\)c\1d b abbcbbd abbcbbd bb
+a\(b*\)c\1d b abbcbd
+a\(b*\)c\1d b abbcbbbd
+^\(.\)\1 b abc
+a\([bc]\)\1d b abcdabbd abbd b
+a\(\([bc]\)\2\)*d b abbccd abbccd
+a\(\([bc]\)\2\)*d b abbcbd
+# actually, this next one probably ought to fail, but the spec is unclear
+a\(\(b\)*\2\)*d b abbbd abbbd
+# here is a case that no NFA implementation does right
+\(ab*\)[ab]*\1 b ababaaa ababaaa a
+# check out normal matching in the presence of back refs
+\(a\)\1bcd b aabcd aabcd
+\(a\)\1bc*d b aabcd aabcd
+\(a\)\1bc*d b aabd aabd
+\(a\)\1bc*d b aabcccd aabcccd
+\(a\)\1bc*[ce]d b aabcccd aabcccd
+^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/basic.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/basic.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,5 @@
+# basics
+a & a a
+abc & abc abc
+abc|de - abc abc
+a|b|c - abc a
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/bracket.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/bracket.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,55 @@
+# brackets, and numerous perversions thereof
+a[b]c & abc abc
+a[ab]c & abc abc
+a[^ab]c & adc adc
+a[]b]c & a]c a]c
+a[[b]c & a[c a[c
+a[-b]c & a-c a-c
+a[^]b]c & adc adc
+a[^-b]c & adc adc
+a[b-]c & a-c a-c
+a[b &C EBRACK
+a[] &C EBRACK
+a[1-3]c & a2c a2c
+a[3-1]c &C ERANGE
+a[1-3-5]c &C ERANGE
+a[[.-.]--]c & a-c a-c
+a[1- &C ERANGE
+a[[. &C EBRACK
+a[[.x &C EBRACK
+a[[.x. &C EBRACK
+a[[.x.] &C EBRACK
+a[[.x.]] & ax ax
+a[[.x,.]] &C ECOLLATE
+a[[.one.]]b & a1b a1b
+a[[.notdef.]]b &C ECOLLATE
+a[[.].]]b & a]b a]b
+a[[:alpha:]]c & abc abc
+a[[:notdef:]]c &C ECTYPE
+a[[: &C EBRACK
+a[[:alpha &C EBRACK
+a[[:alpha:] &C EBRACK
+a[[:alpha,:] &C ECTYPE
+a[[:]:]]b &C ECTYPE
+a[[:-:]]b &C ECTYPE
+a[[:alph:]] &C ECTYPE
+a[[:alphabet:]] &C ECTYPE
+[[:alnum:]]+ - -%@a0X- a0X
+[[:alpha:]]+ - -%@aX0- aX
+[[:blank:]]+ - aSSTb SST
+[[:cntrl:]]+ - aNTb NT
+[[:digit:]]+ - a019b 019
+[[:graph:]]+ - Sa%bS a%b
+[[:lower:]]+ - AabC ab
+[[:print:]]+ - NaSbN aSb
+[[:punct:]]+ - S%-&T %-&
+[[:space:]]+ - aSNTb SNT
+[[:upper:]]+ - aBCd BC
+[[:xdigit:]]+ - p0f3Cq 0f3C
+a[[=b=]]c & abc abc
+a[[= &C EBRACK
+a[[=b &C EBRACK
+a[[=b= &C EBRACK
+a[[=b=] &C EBRACK
+a[[=b,=]] &C ECOLLATE
+a[[=one=]]b & a1b a1b
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/c_comments.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/c_comments.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,17 @@
+# Let's have some fun -- try to match a C comment.
+# first the obvious, which looks okay at first glance...
+/\*.*\*/ - /*x*/ /*x*/
+# but...
+/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/
+# okay, we must not match */ inside; try to do that...
+/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/
+/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/
+# but...
+/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/
+# and a still fancier version, which does it right (I think)...
+/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/
+/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/
+/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/complex.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/complex.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,23 @@
+# complexities
+a(((b)))c - abc abc
+a(b|(c))d - abd abd
+a(b*|c)d - abbd abbd
+# just gotta have one DFA-buster, of course
+a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
+# and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
+# and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
+# fish for anomalies as the number of states passes 32
+12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789
+123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890
+1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901
+12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012
+123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
+# and one really big one, beyond any plausible word width
+1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b
1234567890123456789012345678901234567890123456789012345678901234567890
+# fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
+[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
+[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
+[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/error.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/error.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,30 @@
+# certain syntax errors and non-errors
+| C EMPTY
+| b | |
+* C BADRPT
+* b * *
++ C BADRPT
+? C BADRPT
+"" &C EMPTY
+() - abc @abc
+\(\) b abc @abc
+a||b C EMPTY
+|ab C EMPTY
+ab| C EMPTY
+(|a)b C EMPTY
+(a|)b C EMPTY
+(*a) C BADRPT
+(+a) C BADRPT
+(?a) C BADRPT
+({1}a) C BADRPT
+\(\{1\}a\) bC BADRPT
+(a|*b) C BADRPT
+(a|+b) C BADRPT
+(a|?b) C BADRPT
+(a|{1}b) C BADRPT
+^* C BADRPT
+^* b * *
+^+ C BADRPT
+^? C BADRPT
+^{1} C BADRPT
+^\{1\} bC BADRPT
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/meta.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/meta.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,21 @@
+# metacharacters, backslashes
+a.c & abc abc
+a[bc]d & abd abd
+a\*c & a*c a*c
+a\\b & a\b a\b
+a\\\*b & a\*b a\*b
+a\bc & abc abc
+a\ &C EESCAPE
+a\\bc & a\bc a\bc
+\{ bC BADRPT
+a\[b & a[b a[b
+a[b &C EBRACK
+# trailing $ is a peculiar special case for the BRE code
+a$ & a a
+a$ & a$
+a\$ & a
+a\$ & a$ a$
+a\\$ & a
+a\\$ & a$
+a\\$ & a\$
+a\\$ & a\ a\
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/nospec.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/nospec.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,7 @@
+# plain strings, with the NOSPEC flag
+abc m abc abc
+abc m xabcy abc
+abc m xyz
+a*b m aba*b a*b
+a*b m ab
+"" mC EMPTY
Home |
Main Index |
Thread Index |
Old Index