Subject: /bin/test bugs and proposed patch
To: None <tech@openbsd.org, tech-userlevel@netbsd.org, freebsd-hackers@freebsd.org>
From: Andrew_L. Moore <alm@slewsys.org>
List: tech-userlevel
Date: 02/13/1999 04:41:51
Below is a patch to *BSD /bin/test which allows TEST.sh (from
the source directory) to complete with 0 syntax errors/fails.  Some
additional cases which the patch fixes are also included below.

In particular, the current /bin/test fails on reasonably common expressions
of the form:

  test -d X -o -f X,

where the filename, X, is also a valid /bin/test option (such as `-r').

The switch{} for POSIX compatibility is removed because it actually
adds inconsistency.  For instance,

  test \( = \) -a \( = \)

evaluates to true on many (all?) test(1)'s, yet a rigid interpretation
of POSIX says

  test \( = \)

is false.  Several test(1)'s, including Korn's ksh(1), evaluate this to true.

If no one disagrees with these changes, the following patches will be
submitted for inclusion in the source tree(s).

Finally, does anyone know of a comprehensive regression suite for test(1)?
Thanks.
-Andrew Moore <alm@SlewSys.org>

---- Cut Here ----
--- test.c	1999/02/12 22:39:53	1.1
+++ test.c	1999/02/13 11:48:53
@@ -117,6 +117,8 @@
 	struct filestat fs;
 	char  c, **ap, *opname, *p;
 	int binary, nest, op = 0, pri, ret_val, skipping;
+	int expecting_unary_arg;
+	int expecting_binary_arg;
 
 	if ((p = argv[0]) == NULL)
 		errx(2, "test: argc is zero");
@@ -130,52 +132,6 @@
 	fs.name = NULL;
 
 	/*
-	 * Test(1) implements an inherently ambiguous grammer.  In order to
-	 * assure some degree of consistency, we special case the POSIX 1003.2
-	 * requirements to assure correct evaluation for POSIX scripts.  The
-	 * following special cases comply with POSIX P1003.2/D11.2 Section
-	 * 4.62.4.
-	 */
-	switch(argc - 1) {
-	case 0:				/* % test */
-		return (1);
-		break;
-	case 1:				/* % test arg */
-		return (argv[1] == NULL || *argv[1] == '\0') ? 1 : 0;
-		break;
-	case 2:				/* % test op arg */
-		opname = argv[1];
-		if (IS_BANG(opname))
-			return (*argv[2] == '\0') ? 0 : 1;
-		else {
-			ret_val = posix_unary_op(&argv[1]);
-			if (ret_val >= 0)
-				return (ret_val);
-		}
-		break;
-	case 3:				/* % test arg1 op arg2 */
-		if (IS_BANG(argv[1])) {
-			ret_val = posix_unary_op(&argv[1]);
-			if (ret_val >= 0)
-				return (!ret_val);
-		} else if (lookup_op(argv[2], andor_op) < 0) {
-			ret_val = posix_binary_op(&argv[1]);
-			if (ret_val >= 0)
-				return (ret_val);
-		}
-		break;
-	case 4:				/* % test ! arg1 op arg2 */
-		if (IS_BANG(argv[1]) && lookup_op(argv[3], andor_op) < 0 ) {
-			ret_val = posix_binary_op(&argv[2]);
-			if (ret_val >= 0)
-				return (!ret_val);
-		}
-		break;
-	default:
-		break;
-	}
-
-	/*
 	 * We use operator precedence parsing, evaluating the expression as
 	 * we parse it.  Parentheses are handled by bumping up the priority
 	 * of operators using the variable "nest."  We use the variable
@@ -187,47 +143,70 @@
 	opsp = opstack + STACKSIZE;
 	valsp = valstack;
 	nest = skipping = 0;
+	expecting_unary_arg = 0;
+	expecting_binary_arg = 0;
 	if (*ap == NULL) {
 		valstack[0].type = BOOLEAN;
 		valstack[0].u.num = 0;
 		goto done;
 	}
 	for (;;) {
-		opname = *ap++;
-		if (opname == NULL)
+		if ((opname = *ap++) == NULL)
 			syntax();
-		if (opname[0] == '(' && opname[1] == '\0') {
+		else if (!expecting_binary_arg && !expecting_unary_arg &&
+		    *ap && opname[0] == '(' && opname[1] == '\0') {
 			nest += NESTINCR;
+			expecting_unary_arg = 0;
 			continue;
-		} else if (*ap && (op = lookup_op(opname, unary_op)) >= 0) {
+		} else if (!expecting_binary_arg && !expecting_unary_arg &&
+		    *ap && (op = lookup_op(opname, unary_op)) >= 0) {
 			if (opsp == &opstack[0])
 				overflow();
 			--opsp;
 			opsp->op = op;
 			opsp->pri = op_priority[op] + nest;
+			expecting_unary_arg = op != NOT;
 			continue;
+		} else if (expecting_unary_arg &&
+		    *ap && (op = lookup_op(opname, binary_op)) >= 0 &&
+		    lookup_op(opname, andor_op) < 0) {
+			op += FIRST_BINARY_OP;
+			pri = op_priority[op] + nest;
+
+			/* 
+			 * Previous unary op is evidently a binary arg,
+			 * so move it from opstack to valstack.
+			 */
+			++opsp;
+			valsp->type = STRING;
+			valsp->u.string = *(ap - 2);
+			valsp++;
+
+			expecting_unary_arg = 0;
+			expecting_binary_arg = 1;
 		} else {
 			valsp->type = STRING;
 			valsp->u.string = opname;
 			valsp++;
-		}
-		for (;;) {
-			opname = *ap++;
-			if (opname == NULL) {
-				if (nest != 0)
-					syntax();
-				pri = 0;
-				break;
-			}
-			if (opname[0] != ')' || opname[1] != '\0') {
-				if ((op = lookup_op(opname, binary_op)) < 0)
+			expecting_binary_arg = expecting_unary_arg = 0;
+			for (;;) {
+				if ((opname = *ap++) == NULL) {
+					if (nest != 0)
+						syntax();
+					pri = 0;
+					break;
+				} else if (opname[0] != ')' ||
+				    opname[1] != '\0') {
+					if ((op=lookup_op(opname, binary_op))<0)
+						syntax();
+					op += FIRST_BINARY_OP;
+					pri = op_priority[op] + nest;
+					expecting_binary_arg =
+					    lookup_op(opname, andor_op) < 0;
+					break;
+				} else if ((nest -= NESTINCR) < 0)
 					syntax();
-				op += FIRST_BINARY_OP;
-				pri = op_priority[op] + nest;
-				break;
 			}
-			if ((nest -= NESTINCR) < 0)
-				syntax();
 		}
 		while (opsp < &opstack[STACKSIZE] && opsp->pri >= pri) {
 			binary = opsp->op;
--- TEST.sh	1999/02/13 12:18:28	1.1
+++ TEST.sh	1999/02/13 11:52:38
@@ -131,6 +131,42 @@
 t 0 '"a" -a ! ""'
 t 1 '""'
 t 0 '! ""'
+
+t 1 '-e -r'
+t 1 '-e -r -a -e -r'
+t 0 '! -e -r -o -e -r'
+t 1 '-e -r -a ! -e -r'
+t 0 '-e -r -o ! -e -r'
+
+t 1 '-e -a'
+t 1 '-e -a -a -e -a'
+t 0 '! -e -a -o -e -a'
+t 1 '-e -a -a ! -e -a'
+t 0 '-e -a -o ! -e -a'
+
+t 1 '-e -a -o -e = -a'
+t 1 '-e -a -o -a = -e'
+t 0 '-e -a -o ! -e = -a'
+t 0 '-e -a -o ! -a = -e'
+
+t 1 '! b = b' 
+t 0 '! b = 0' 
+t 0 '! b = 1' 
+
+t 1 '\( -h = -h \) -a \( a = -h \)'
+t 0 '\( -h = -h \) -o \( a = -h \)'
+t 0 '\( a = -h \) -o \( -h = -h \)'
+
+t 0 '\('
+t 0 '\( = \)'
+t 0 '\( = \) -a \( = \)'
+
+t 1 '-e \('
+t 1 '\( -e \( \)'
+t 1 '\( -e \( \) -o \( -e \( \)'
+t 1 '\( -e \( \) -o \( -e = \( \)'
+
+t 0 '\( \( \( a = -h \) \) -o \( -h = -h \) \)'
 
 echo ""
 echo "Syntax errors: $ERROR Failed: $FAILED"