Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/tests/lib/libc Atf-ify the regex test



details:   https://anonhg.NetBSD.org/src/rev/f13fdb882ff2
branches:  trunk
changeset: 760589:f13fdb882ff2
user:      pgoyette <pgoyette%NetBSD.org@localhost>
date:      Sat Jan 08 18:10:31 2011 +0000

description:
Atf-ify the regex test

diffstat:

 tests/lib/libc/Makefile                     |    4 +-
 tests/lib/libc/regex/README                 |   33 +
 tests/lib/libc/regex/data/anchor.in         |   33 +
 tests/lib/libc/regex/data/backref.in        |   21 +
 tests/lib/libc/regex/data/basic.in          |    5 +
 tests/lib/libc/regex/data/bracket.in        |   55 ++
 tests/lib/libc/regex/data/c_comments.in     |   17 +
 tests/lib/libc/regex/data/complex.in        |   23 +
 tests/lib/libc/regex/data/error.in          |   30 +
 tests/lib/libc/regex/data/meta.in           |   21 +
 tests/lib/libc/regex/data/nospec.in         |    7 +
 tests/lib/libc/regex/data/nul.in            |    7 +
 tests/lib/libc/regex/data/paren.in          |   19 +
 tests/lib/libc/regex/data/regress.in        |    9 +
 tests/lib/libc/regex/data/repet_bounded.in  |   45 ++
 tests/lib/libc/regex/data/repet_multi.in    |   21 +
 tests/lib/libc/regex/data/repet_ordinary.in |   10 +
 tests/lib/libc/regex/data/startend.in       |    9 +
 tests/lib/libc/regex/data/subexp.in         |   57 +++
 tests/lib/libc/regex/data/subtle.in         |   21 +
 tests/lib/libc/regex/data/word_bound.in     |   13 +
 tests/lib/libc/regex/debug.c                |  268 ++++++++++++++
 tests/lib/libc/regex/main.c                 |  523 ++++++++++++++++++++++++++++
 tests/lib/libc/regex/split.c                |  344 ++++++++++++++++++
 tests/lib/libc/regex/t_regex.awk            |   57 +++
 tests/lib/libc/regex/t_regex.in             |   61 +++
 tests/lib/libc/regex/test_regex.h           |   44 ++
 27 files changed, 1755 insertions(+), 2 deletions(-)

diffs (truncated from 1872 to 300 lines):

diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/Makefile
--- a/tests/lib/libc/Makefile   Sat Jan 08 18:07:14 2011 +0000
+++ b/tests/lib/libc/Makefile   Sat Jan 08 18:10:31 2011 +0000
@@ -1,9 +1,9 @@
-# $NetBSD: Makefile,v 1.25 2011/01/08 06:59:37 pgoyette Exp $
+# $NetBSD: Makefile,v 1.26 2011/01/08 18:10:31 pgoyette Exp $
 
 .include <bsd.own.mk>
 .include <bsd.sys.mk>
 
-TESTS_SUBDIRS+=        db gen hash ieeefp rpc setjmp stdlib stdio string ttyio
+TESTS_SUBDIRS+=        db gen hash ieeefp regex rpc setjmp stdlib stdio string ttyio
 
 .if ${HAS_SSP} == "yes"
 TESTS_SUBDIRS+=        ssp
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/README
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/README       Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,33 @@
+regular expression test set
+Lines are at least three fields, separated by one or more tabs.  "" stands
+for an empty field.  First field is an RE.  Second field is flags.  If
+C flag given, regcomp() is expected to fail, and the third field is the
+error name (minus the leading REG_).
+
+Otherwise it is expected to succeed, and the third field is the string to
+try matching it against.  If there is no fourth field, the match is
+expected to fail.  If there is a fourth field, it is the substring that
+the RE is expected to match.  If there is a fifth field, it is a comma-
+separated list of what the subexpressions should match, with - indicating
+no match for that one.  In both the fourth and fifth fields, a (sub)field
+starting with @ indicates that the (sub)expression is expected to match
+a null string followed by the stuff after the @; this provides a way to
+test where null strings match.  The character `N' in REs and strings
+is newline, `S' is space, `T' is tab, `Z' is NUL.
+
+The full list of flags:
+  -    placeholder, does nothing
+  b    RE is a BRE, not an ERE
+  &    try it as both an ERE and a BRE
+  C    regcomp() error expected, third field is error name
+  i    REG_ICASE
+  m    ("mundane") REG_NOSPEC
+  s    REG_NOSUB (not really testable)
+  n    REG_NEWLINE
+  ^    REG_NOTBOL
+  $    REG_NOTEOL
+  #    REG_STARTEND (see below)
+  p    REG_PEND
+
+For REG_STARTEND, the start/end offsets are those of the substring
+enclosed in ().
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/anchor.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/anchor.in       Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,33 @@
+# anchoring and REG_NEWLINE
+^abc$          &       abc     abc
+a^b            -       a^b
+a^b            b       a^b     a^b
+a$b            -       a$b
+a$b            b       a$b     a$b
+^              &       abc     @abc
+$              &       abc     @
+^$             &       ""      @
+$^             -       ""      @
+\($\)\(^\)     b       ""      @
+# stop retching, those are legitimate (although disgusting)
+^^             -       ""      @
+$$             -       ""      @
+b$             &       abNc
+b$             &n      abNc    b
+^b$            &       aNbNc
+^b$            &n      aNbNc   b
+^$             &n      aNNb    @Nb
+^$             n       abc
+^$             n       abcN    @
+$^             n       aNNb    @Nb
+\($\)\(^\)     bn      aNNb    @Nb
+^^             n^      aNNb    @Nb
+$$             n       aNNb    @NN
+^a             ^       a
+a$             $       a
+^a             ^n      aNb
+^b             ^n      aNb     b
+a$             $n      bNa
+b$             $n      bNa     b
+a*(^b$)c*      -       b       b
+a*\(^b$\)c*    b       b       b
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/backref.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/backref.in      Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,21 @@
+# back references, ugh
+a\(b\)\2c      bC      ESUBREG
+a\(b\1\)c      bC      ESUBREG
+a\(b*\)c\1d    b       abbcbbd abbcbbd bb
+a\(b*\)c\1d    b       abbcbd
+a\(b*\)c\1d    b       abbcbbbd
+^\(.\)\1       b       abc
+a\([bc]\)\1d   b       abcdabbd        abbd    b
+a\(\([bc]\)\2\)*d      b       abbccd  abbccd
+a\(\([bc]\)\2\)*d      b       abbcbd
+# actually, this next one probably ought to fail, but the spec is unclear
+a\(\(b\)*\2\)*d                b       abbbd   abbbd
+# here is a case that no NFA implementation does right
+\(ab*\)[ab]*\1 b       ababaaa ababaaa a
+# check out normal matching in the presence of back refs
+\(a\)\1bcd     b       aabcd   aabcd
+\(a\)\1bc*d    b       aabcd   aabcd
+\(a\)\1bc*d    b       aabd    aabd
+\(a\)\1bc*d    b       aabcccd aabcccd
+\(a\)\1bc*[ce]d        b       aabcccd aabcccd
+^\(a\)\1b\(c\)*cd$     b       aabcccd aabcccd
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/basic.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/basic.in        Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,5 @@
+# basics
+a              &       a       a
+abc            &       abc     abc
+abc|de         -       abc     abc
+a|b|c          -       abc     a
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/bracket.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/bracket.in      Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,55 @@
+# brackets, and numerous perversions thereof
+a[b]c          &       abc     abc
+a[ab]c         &       abc     abc
+a[^ab]c                &       adc     adc
+a[]b]c         &       a]c     a]c
+a[[b]c         &       a[c     a[c
+a[-b]c         &       a-c     a-c
+a[^]b]c                &       adc     adc
+a[^-b]c                &       adc     adc
+a[b-]c         &       a-c     a-c
+a[b            &C      EBRACK
+a[]            &C      EBRACK
+a[1-3]c                &       a2c     a2c
+a[3-1]c                &C      ERANGE
+a[1-3-5]c      &C      ERANGE
+a[[.-.]--]c    &       a-c     a-c
+a[1-           &C      ERANGE
+a[[.           &C      EBRACK
+a[[.x          &C      EBRACK
+a[[.x.         &C      EBRACK
+a[[.x.]                &C      EBRACK
+a[[.x.]]       &       ax      ax
+a[[.x,.]]      &C      ECOLLATE
+a[[.one.]]b    &       a1b     a1b
+a[[.notdef.]]b &C      ECOLLATE
+a[[.].]]b      &       a]b     a]b
+a[[:alpha:]]c  &       abc     abc
+a[[:notdef:]]c &C      ECTYPE
+a[[:           &C      EBRACK
+a[[:alpha      &C      EBRACK
+a[[:alpha:]    &C      EBRACK
+a[[:alpha,:]   &C      ECTYPE
+a[[:]:]]b      &C      ECTYPE
+a[[:-:]]b      &C      ECTYPE
+a[[:alph:]]    &C      ECTYPE
+a[[:alphabet:]]        &C      ECTYPE
+[[:alnum:]]+   -       -%@a0X- a0X
+[[:alpha:]]+   -       -%@aX0- aX
+[[:blank:]]+   -       aSSTb   SST
+[[:cntrl:]]+   -       aNTb    NT
+[[:digit:]]+   -       a019b   019
+[[:graph:]]+   -       Sa%bS   a%b
+[[:lower:]]+   -       AabC    ab
+[[:print:]]+   -       NaSbN   aSb
+[[:punct:]]+   -       S%-&T   %-&
+[[:space:]]+   -       aSNTb   SNT
+[[:upper:]]+   -       aBCd    BC
+[[:xdigit:]]+  -       p0f3Cq  0f3C
+a[[=b=]]c      &       abc     abc
+a[[=           &C      EBRACK
+a[[=b          &C      EBRACK
+a[[=b=         &C      EBRACK
+a[[=b=]                &C      EBRACK
+a[[=b,=]]      &C      ECOLLATE
+a[[=one=]]b    &       a1b     a1b
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/c_comments.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/c_comments.in   Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,17 @@
+# Let's have some fun -- try to match a C comment.
+# first the obvious, which looks okay at first glance...
+/\*.*\*/       -       /*x*/   /*x*/
+# but...
+/\*.*\*/       -       /*x*/y/*z*/     /*x*/y/*z*/
+# okay, we must not match */ inside; try to do that...
+/\*([^*]|\*[^/])*\*/   -       /*x*/   /*x*/
+/\*([^*]|\*[^/])*\*/   -       /*x*/y/*z*/     /*x*/
+# but...
+/\*([^*]|\*[^/])*\*/   -       /*x**/y/*z*/    /*x**/y/*z*/
+# and a still fancier version, which does it right (I think)...
+/\*([^*]|\*+[^*/])*\*+/        -       /*x*/   /*x*/
+/\*([^*]|\*+[^*/])*\*+/        -       /*x*/y/*z*/     /*x*/
+/\*([^*]|\*+[^*/])*\*+/        -       /*x**/y/*z*/    /*x**/
+/\*([^*]|\*+[^*/])*\*+/        -       /*x****/y/*z*/  /*x****/
+/\*([^*]|\*+[^*/])*\*+/        -       /*x**x*/y/*z*/  /*x**x*/
+/\*([^*]|\*+[^*/])*\*+/        -       /*x***x/y/*z*/  /*x***x/y/*z*/
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/complex.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/complex.in      Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,23 @@
+# complexities
+a(((b)))c      -       abc     abc
+a(b|(c))d      -       abd     abd
+a(b*|c)d       -       abbd    abbd
+# just gotta have one DFA-buster, of course
+a[ab]{20}      -       aaaaabaaaabaaaabaaaab   aaaaabaaaabaaaabaaaab
+# and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]      -       aaaaabaaaabaaaabaaaab   aaaaabaaaabaaaabaaaab
+# and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)     -       aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
+# fish for anomalies as the number of states passes 32
+12345678901234567890123456789  -       a12345678901234567890123456789b 12345678901234567890123456789
+123456789012345678901234567890 -       a123456789012345678901234567890b        123456789012345678901234567890
+1234567890123456789012345678901        -       a1234567890123456789012345678901b       1234567890123456789012345678901
+12345678901234567890123456789012       -       a12345678901234567890123456789012b      12345678901234567890123456789012
+123456789012345678901234567890123      -       a123456789012345678901234567890123b     123456789012345678901234567890123
+# and one really big one, beyond any plausible word width
+1234567890123456789012345678901234567890123456789012345678901234567890 -       a1234567890123456789012345678901234567890123456789012345678901234567890b        
1234567890123456789012345678901234567890123456789012345678901234567890
+# fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn]   -       xacegikmoq      acegikm
+[ab][cd][ef][gh][ij][kl][mn][op]       -       xacegikmoq      acegikmo
+[ab][cd][ef][gh][ij][kl][mn][op][qr]   -       xacegikmoqy     acegikmoq
+[ab][cd][ef][gh][ij][kl][mn][op][q]    -       xacegikmoqy     acegikmoq
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/error.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/error.in        Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,30 @@
+# certain syntax errors and non-errors
+|              C       EMPTY
+|              b       |       |
+*              C       BADRPT
+*              b       *       *
++              C       BADRPT
+?              C       BADRPT
+""             &C      EMPTY
+()             -       abc     @abc
+\(\)           b       abc     @abc
+a||b           C       EMPTY
+|ab            C       EMPTY
+ab|            C       EMPTY
+(|a)b          C       EMPTY
+(a|)b          C       EMPTY
+(*a)           C       BADRPT
+(+a)           C       BADRPT
+(?a)           C       BADRPT
+({1}a)         C       BADRPT
+\(\{1\}a\)     bC      BADRPT
+(a|*b)         C       BADRPT
+(a|+b)         C       BADRPT
+(a|?b)         C       BADRPT
+(a|{1}b)       C       BADRPT
+^*             C       BADRPT
+^*             b       *       *
+^+             C       BADRPT
+^?             C       BADRPT
+^{1}           C       BADRPT
+^\{1\}         bC      BADRPT
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/meta.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/meta.in Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,21 @@
+# metacharacters, backslashes
+a.c            &       abc     abc
+a[bc]d         &       abd     abd
+a\*c           &       a*c     a*c
+a\\b           &       a\b     a\b
+a\\\*b         &       a\*b    a\*b
+a\bc           &       abc     abc
+a\             &C      EESCAPE
+a\\bc          &       a\bc    a\bc
+\{             bC      BADRPT
+a\[b           &       a[b     a[b
+a[b            &C      EBRACK
+# trailing $ is a peculiar special case for the BRE code
+a$             &       a       a
+a$             &       a$
+a\$            &       a
+a\$            &       a$      a$
+a\\$           &       a
+a\\$           &       a$
+a\\$           &       a\$
+a\\$           &       a\      a\
diff -r aeec9433fd23 -r f13fdb882ff2 tests/lib/libc/regex/data/nospec.in
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/regex/data/nospec.in       Sat Jan 08 18:10:31 2011 +0000
@@ -0,0 +1,7 @@
+# plain strings, with the NOSPEC flag
+abc            m       abc     abc
+abc            m       xabcy   abc
+abc            m       xyz
+a*b            m       aba*b   a*b
+a*b            m       ab
+""             mC      EMPTY



Home | Main Index | Thread Index | Old Index