Subject: bin/36092: algorithmic inefficiency in bin/test/test.c:t_lex
To: None <gnats-admin@netbsd.org, netbsd-bugs@netbsd.org>
From: None <mtbakerguy@yahoo.com>
List: netbsd-bugs
Date: 03/27/2007 05:10:00
>Number: 36092
>Category: bin
>Synopsis: algorithmic inefficiency in bin/test/test.c:t_lex
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: bin-bug-people
>State: open
>Class: change-request
>Submitter-Id: net
>Arrival-Date: Tue Mar 27 05:10:00 +0000 2007
>Originator: Knotwell
>Release: 3.1
>Organization:
home
>Environment:
NetBSD bradwkstn 3.1 NetBSD 3.1 (GENERIC.PROF) #7: Thu Dec 14 21:54:41 PST 2006 root@bradwkstn:/usr/src/usr/src/sys/arch/i386/compile/GENERIC.PROF i386
>Description:
While profiling /bin/sh for fun, I noticed several things:
o the test builtin is *extremely* expensive with massive calls to strcmp in the t_lex function. Specifically, it does linear walks of the operations to do a match.
o profiling /bin/sh is difficult because bin/sh/trap.c:exitshell uses _exit instead of exit (NB: this isn't a bug but probably belongs in a README somewhere)
o arithmetic operations are also more expensive than they need to be since there are two calls made to ioctl (via isatty) when arith_lex_reset is called. In my limited testing, I commented out the YY_NEW_FILE stanza and cut the number of syscalls substantially in a shell doing a large amount of arithmetic.
At this cost of some codespace as well as readability*, I created a substantially improved parser for the test operations that dropped strcmp and t_lex off the profiler (almost) entirely. On a shell doing a significant number of boolean comparisons, I cut the run time by approximately 15%.
I've attached a diff below as well as a small shell that I used to validate the change (I noticed TEST.csh as I was preparing the diff).
*unfortunately, my change loses the data-driven clarity of the original code and it wasn't obvious to me how you'd get a data-driven solution nearly as efficient as mine. There's probably a simpler and more elegant solution but code review's difficult on a home project.
>How-To-Repeat:
>Fix:
Index: test.c
===================================================================
RCS file: /cvsroot/src/bin/test/test.c,v
retrieving revision 1.26
diff -d -u -r1.26 test.c
--- test.c 10 Feb 2005 06:56:55 -0000 1.26
+++ test.c 27 Mar 2007 04:44:08 -0000
@@ -95,51 +95,12 @@
PAREN
};
+
+#define MAXOPSIZE 3 /* strlen("-ne") */
static struct t_op {
- const char *op_text;
+ char op_text[MAXOPSIZE+1];
short op_num, op_type;
-} const ops [] = {
- {"-r", FILRD, UNOP},
- {"-w", FILWR, UNOP},
- {"-x", FILEX, UNOP},
- {"-e", FILEXIST,UNOP},
- {"-f", FILREG, UNOP},
- {"-d", FILDIR, UNOP},
- {"-c", FILCDEV,UNOP},
- {"-b", FILBDEV,UNOP},
- {"-p", FILFIFO,UNOP},
- {"-u", FILSUID,UNOP},
- {"-g", FILSGID,UNOP},
- {"-k", FILSTCK,UNOP},
- {"-s", FILGZ, UNOP},
- {"-t", FILTT, UNOP},
- {"-z", STREZ, UNOP},
- {"-n", STRNZ, UNOP},
- {"-h", FILSYM, UNOP}, /* for backwards compat */
- {"-O", FILUID, UNOP},
- {"-G", FILGID, UNOP},
- {"-L", FILSYM, UNOP},
- {"-S", FILSOCK,UNOP},
- {"=", STREQ, BINOP},
- {"!=", STRNE, BINOP},
- {"<", STRLT, BINOP},
- {">", STRGT, BINOP},
- {"-eq", INTEQ, BINOP},
- {"-ne", INTNE, BINOP},
- {"-ge", INTGE, BINOP},
- {"-gt", INTGT, BINOP},
- {"-le", INTLE, BINOP},
- {"-lt", INTLT, BINOP},
- {"-nt", FILNT, BINOP},
- {"-ot", FILOT, BINOP},
- {"-ef", FILEQ, BINOP},
- {"!", UNOT, BUNOP},
- {"-a", BAND, BBINOP},
- {"-o", BOR, BBINOP},
- {"(", LPAREN, PAREN},
- {")", RPAREN, PAREN},
- {0, 0, 0}
-};
+} tmpop;
static char **t_wp;
static struct t_op const *t_wp_op;
@@ -157,6 +118,7 @@
static int newerf(const char *, const char *);
static int olderf(const char *, const char *);
static int equalf(const char *, const char *);
+static int findoperand(const char *txt,short *operand, short *operand_type);
#if defined(SHELL)
extern void error(const char *, ...) __attribute__((__noreturn__));
@@ -387,45 +349,118 @@
static enum token
t_lex(char *s)
{
- struct t_op const *op;
-
- op = ops;
-
if (s == 0) {
t_wp_op = NULL;
return EOI;
}
- while (op->op_text) {
- if (strcmp(s, op->op_text) == 0) {
- if ((op->op_type == UNOP && isoperand()) ||
- (op->op_num == LPAREN && *(t_wp+1) == 0))
- break;
- t_wp_op = op;
- return op->op_num;
- }
- op++;
- }
+
+ if(findoperand(s,&tmpop.op_num,&tmpop.op_type) == 1) {
+ if((tmpop.op_type == UNOP && isoperand()) ||
+ (tmpop.op_num == LPAREN && *(t_wp+1) == 0))
+ goto out;
+
+ strcpy(tmpop.op_text,s);
+ t_wp_op = &tmpop;
+ return tmpop.op_num;
+ }
+out:
t_wp_op = NULL;
return OPERAND;
}
+static int
+findoperand(const char *txt,short *operand, short *operand_type) {
+ switch(*txt) {
+ default:
+ return(0);
+ case '-': {
+ if(*(txt + 1) == '\0')
+ return(0);
+ if(*(txt + 2) == '\0') {
+ switch(*(txt+1)) {
+ default:
+ return(0);
+#define DASH2(c,op,optype) case c: *operand=op;*operand_type=optype;return(1);
+ DASH2('a',BAND,BBINOP);
+ DASH2('o',BOR,BBINOP);
+ DASH2('r',FILRD,UNOP);
+ DASH2('w',FILWR,UNOP);
+ DASH2('x',FILEX,UNOP);
+ DASH2('e',FILEXIST,UNOP);
+ DASH2('f',FILREG,UNOP);
+ DASH2('d',FILDIR,UNOP);
+ DASH2('c',FILCDEV,UNOP);
+ DASH2('b',FILBDEV,UNOP);
+ DASH2('p',FILFIFO,UNOP);
+ DASH2('u',FILSUID,UNOP);
+ DASH2('g',FILSGID,UNOP);
+ DASH2('k',FILSTCK,UNOP);
+ DASH2('s',FILGZ,UNOP);
+ DASH2('t',FILTT,UNOP);
+ DASH2('z',STREZ,UNOP);
+ DASH2('n',STRNZ,UNOP);
+ DASH2('h',FILSYM,UNOP);
+ DASH2('O',FILUID,UNOP);
+ DASH2('G',FILGID,UNOP);
+ DASH2('L',FILSYM,UNOP);
+ DASH2('S',FILSOCK,UNOP);
+ }
+ }
+ if(*(txt + 3) == '\0') {
+#define DASH3(c1,c2,op,optype) if(*(txt + 1) == c1 && *(txt + 2) == c2) { \
+ *operand = op;*operand_type = optype; \
+ return(1); }
+ DASH3('e','q',INTEQ,BINOP);
+ DASH3('n','e',INTNE,BINOP);
+ DASH3('g','e',INTGE,BINOP);
+ DASH3('g','t',INTGE,BINOP);
+ DASH3('l','e',INTLE,BINOP);
+ DASH3('l','t',INTLT,BINOP);
+ DASH3('n','t',FILNT,BINOP);
+ DASH3('o','t',FILOT,BINOP);
+ DASH3('e','f',FILEQ,BINOP);
+ return(0);
+ }
+ return(0); /* the -NNN... case */
+ }
+#define SINGLECHAR(c,op,optype) case c: *operand = op;*operand_type = optype;return(1);
+ SINGLECHAR('(',LPAREN,PAREN);
+ SINGLECHAR(')',RPAREN,PAREN);
+ SINGLECHAR('=',STREQ,BINOP);
+ SINGLECHAR('<',STRLT,BINOP);
+ SINGLECHAR('>',STRGT,BINOP);
+
+ case '!': /* ! or != */
+ switch(*(txt + 1)) {
+ default:
+ return(0);
+ case '=':
+ if(*(txt + 2) == '\0') {
+ *operand = STRNE;
+ *operand_type = BINOP;
+ return(1);
+ }
+ return(0);
+ case '\0':
+ *operand = UNOT;
+ *operand_type = BUNOP;
+ return(1);
+ }
+ }
+}
static int
isoperand(void)
{
- struct t_op const *op;
char *s, *t;
+ short op_,optype_;
- op = ops;
if ((s = *(t_wp+1)) == 0)
return 1;
if ((t = *(t_wp+2)) == 0)
return 0;
- while (op->op_text) {
- if (strcmp(s, op->op_text) == 0)
- return op->op_type == BINOP &&
- (t[0] != ')' || t[1] != '\0');
- op++;
- }
+
+ if(findoperand(s,&op_,&optype_) == 1)
+ return(optype_ == BINOP && (t[0] != ')' || t[1] != '\0'));
return 0;
}
!#/bin/sh
dotest () {
./test $*
if [ $? -eq 0 ]
then
echo "($*) passed"
return
fi
echo "($*) failed"
exit 1
}
touch /tmp/$$.tst
chmod a+x /tmp/$$.tst
dotest -x /tmp/$$.tst
chmod a-x /tmp/$$.tst
dotest ! -x /tmp/$$.tst
chmod a+w /tmp/$$.tst
dotest -w /tmp/$$.tst
chmod a+r /tmp/$$.tst
dotest -r /tmp/$$.tst
dotest -e /tmp/$$.tst
dotest ! -e /tmp/$$.fdasfdastst
dotest -f /tmp/$$.tst
dotest ! -d /tmp/$$.tst
dotest ! -c /tmp/$$.tst
dotest ! -b /tmp/$$.tst
dotest ! -p /tmp/$$.tst
dotest ! -u /tmp/$$.tst
dotest ! -g /tmp/$$.tst
dotest ! -k /tmp/$$.tst
dotest ! -h /tmp/$$.tst
dotest -O /tmp/$$.tst
dotest -G /tmp/$$.tst
dotest ! -L /tmp/$$.tst
dotest ! -S /tmp/$$.tst
dotest ! "equal" = "notequal"
dotest "equal" != "notequal"
dotest "equal" = "equal"
dotest "equal" \< "fqual"
dotest "equal" \> "dqual"
dotest -n "equal"
dotest -z ""
dotest 0 -eq 0
dotest 0 -ne 1
dotest 1 -ge 0
dotest 1 -gt 0
dotest 0 -le 1
dotest 0 -lt 1
dotest 1 -a 1
dotest ! ! 0 -a 1
dotest 1 -o 0
dotest ! 0 -o 0
dotest \( 0 -eq 0 -a 1 -ne 0 \)
rm /tmp/$$.tst