Subject: Re: bin/36092: algorithmic inefficiency in bin/test/test.c:t_lex
To: None <gnats-admin@netbsd.org, netbsd-bugs@netbsd.org,>
From: Christos Zoulas <christos@zoulas.com>
List: netbsd-bugs
Date: 03/27/2007 16:55:02
The following reply was made to PR bin/36092; it has been noted by GNATS.
From: christos@zoulas.com (Christos Zoulas)
To: gnats-bugs@NetBSD.org, gnats-admin@netbsd.org,
netbsd-bugs@netbsd.org
Cc:
Subject: Re: bin/36092: algorithmic inefficiency in bin/test/test.c:t_lex
Date: Tue, 27 Mar 2007 12:54:24 -0400
On Mar 27, 5:10am, mtbakerguy@yahoo.com (mtbakerguy@yahoo.com) wrote:
-- Subject: bin/36092: algorithmic inefficiency in bin/test/test.c:t_lex
| >Number: 36092
| >Category: bin
| >Synopsis: algorithmic inefficiency in bin/test/test.c:t_lex
| >Confidential: no
| >Severity: serious
| >Priority: medium
| >Responsible: bin-bug-people
| >State: open
| >Class: change-request
| >Submitter-Id: net
| >Arrival-Date: Tue Mar 27 05:10:00 +0000 2007
| >Originator: Knotwell
| >Release: 3.1
| >Organization:
| home
| >Environment:
| NetBSD bradwkstn 3.1 NetBSD 3.1 (GENERIC.PROF) #7: Thu Dec 14 21:54:41 PST 2006 root@bradwkstn:/usr/src/usr/src/sys/arch/i386/compile/GENERIC.PROF i386
| >Description:
| While profiling /bin/sh for fun, I noticed several things:
|
| o the test builtin is *extremely* expensive with massive calls to strcmp in the t_lex function. Specifically, it does linear walks of the operations to do a match.
| o profiling /bin/sh is difficult because bin/sh/trap.c:exitshell uses _exit instead of exit (NB: this isn't a bug but probably belongs in a README somewhere)
| o arithmetic operations are also more expensive than they need to be since there are two calls made to ioctl (via isatty) when arith_lex_reset is called. In my limited testing, I commented out the YY_NEW_FILE stanza and cut the number of syscalls substantially in a shell doing a large amount of arithmetic.
|
| At this cost of some codespace as well as readability*, I created a substantially improved parser for the test operations that dropped strcmp and t_lex off the profiler (almost) entirely. On a shell doing a significant number of boolean comparisons, I cut the run time by approximately 15%.
|
| I've attached a diff below as well as a small shell that I used to validate the change (I noticed TEST.csh as I was preparing the diff).
|
| *unfortunately, my change loses the data-driven clarity of the original code and it wasn't obvious to me how you'd get a data-driven solution nearly as efficient as mine. There's probably a simpler and more elegant solution but code review's difficult on a home project.
How about this?
christos
Index: test.c
===================================================================
RCS file: /cvsroot/src/bin/test/test.c,v
retrieving revision 1.30
diff -u -u -r1.30 test.c
--- test.c 24 Sep 2006 13:24:08 -0000 1.30
+++ test.c 27 Mar 2007 16:53:02 -0000
@@ -95,50 +95,60 @@
PAREN
};
-static struct t_op {
+struct t_op {
const char *op_text;
short op_num, op_type;
-} const ops [] = {
- {"-r", FILRD, UNOP},
- {"-w", FILWR, UNOP},
- {"-x", FILEX, UNOP},
- {"-e", FILEXIST,UNOP},
- {"-f", FILREG, UNOP},
- {"-d", FILDIR, UNOP},
- {"-c", FILCDEV,UNOP},
- {"-b", FILBDEV,UNOP},
- {"-p", FILFIFO,UNOP},
- {"-u", FILSUID,UNOP},
- {"-g", FILSGID,UNOP},
- {"-k", FILSTCK,UNOP},
- {"-s", FILGZ, UNOP},
- {"-t", FILTT, UNOP},
- {"-z", STREZ, UNOP},
- {"-n", STRNZ, UNOP},
- {"-h", FILSYM, UNOP}, /* for backwards compat */
- {"-O", FILUID, UNOP},
- {"-G", FILGID, UNOP},
- {"-L", FILSYM, UNOP},
- {"-S", FILSOCK,UNOP},
- {"=", STREQ, BINOP},
- {"!=", STRNE, BINOP},
- {"<", STRLT, BINOP},
- {">", STRGT, BINOP},
- {"-eq", INTEQ, BINOP},
- {"-ne", INTNE, BINOP},
- {"-ge", INTGE, BINOP},
- {"-gt", INTGT, BINOP},
- {"-le", INTLE, BINOP},
- {"-lt", INTLT, BINOP},
- {"-nt", FILNT, BINOP},
- {"-ot", FILOT, BINOP},
- {"-ef", FILEQ, BINOP},
+};
+
+static const struct t_op cop[] = {
{"!", UNOT, BUNOP},
- {"-a", BAND, BBINOP},
- {"-o", BOR, BBINOP},
{"(", LPAREN, PAREN},
{")", RPAREN, PAREN},
- {0, 0, 0}
+ {"<", STRLT, BINOP},
+ {"=", STREQ, BINOP},
+ {">", STRGT, BINOP},
+};
+
+static const struct t_op cop2[] = {
+ {"!=", STRNE, BINOP},
+};
+
+static const struct t_op mop3[] = {
+ {"ef", FILEQ, BINOP},
+ {"eq", INTEQ, BINOP},
+ {"ge", INTGE, BINOP},
+ {"gt", INTGT, BINOP},
+ {"le", INTLE, BINOP},
+ {"lt", INTLT, BINOP},
+ {"ne", INTNE, BINOP},
+ {"nt", FILNT, BINOP},
+ {"ot", FILOT, BINOP},
+};
+
+static const struct t_op mop2[] = {
+ {"G", FILGID, UNOP},
+ {"L", FILSYM, UNOP},
+ {"O", FILUID, UNOP},
+ {"S", FILSOCK,UNOP},
+ {"a", BAND, BBINOP},
+ {"b", FILBDEV,UNOP},
+ {"c", FILCDEV,UNOP},
+ {"d", FILDIR, UNOP},
+ {"e", FILEXIST,UNOP},
+ {"f", FILREG, UNOP},
+ {"g", FILSGID,UNOP},
+ {"h", FILSYM, UNOP}, /* for backwards compat */
+ {"k", FILSTCK,UNOP},
+ {"n", STRNZ, UNOP},
+ {"o", BOR, BBINOP},
+ {"p", FILFIFO,UNOP},
+ {"r", FILRD, UNOP},
+ {"s", FILGZ, UNOP},
+ {"t", FILTT, UNOP},
+ {"u", FILSUID,UNOP},
+ {"w", FILWR, UNOP},
+ {"x", FILEX, UNOP},
+ {"z", STREZ, UNOP},
};
static char **t_wp;
@@ -390,26 +400,68 @@
}
}
+#define VTOC(x) (const unsigned char *)((const struct t_op *)x)->op_text
+
+static int
+compare1(const void *va, const void *vb)
+{
+ const unsigned char *a = va;
+ const unsigned char *b = VTOC(vb);
+
+ return a[0] - b[0];
+}
+
+static int
+compare2(const void *va, const void *vb)
+{
+ const unsigned char *a = va;
+ const unsigned char *b = VTOC(vb);
+ int z = a[0] - b[0];
+
+ return z ? z : (a[1] - b[1]);
+}
+
+static struct t_op const *
+findop(const char *s)
+{
+ if (s[0] == '-') {
+ if (s[1] == '\0')
+ return NULL;
+ if (s[2] == '\0')
+ return bsearch(s + 1, mop2, __arraycount(mop2),
+ sizeof(*mop2), compare1);
+ else if (s[3] != '\0')
+ return NULL;
+ else
+ return bsearch(s + 1, mop3, __arraycount(mop3),
+ sizeof(*mop3), compare2);
+ } else {
+ if (s[1] == '\0')
+ return bsearch(s, cop, __arraycount(cop), sizeof(*cop),
+ compare1);
+ else if (strcmp(s, cop2[0].op_text) == 0)
+ return cop2;
+ else
+ return NULL;
+ }
+}
+
static enum token
t_lex(char *s)
{
struct t_op const *op;
- op = ops;
-
- if (s == 0) {
+ if (s == NULL) {
t_wp_op = NULL;
return EOI;
}
- while (op->op_text) {
- if (strcmp(s, op->op_text) == 0) {
- if ((op->op_type == UNOP && isoperand()) ||
- (op->op_num == LPAREN && *(t_wp+1) == 0))
- break;
+
+ if ((op = findop(s)) != NULL) {
+ if (!((op->op_type == UNOP && isoperand()) ||
+ (op->op_num == LPAREN && *(t_wp+1) == 0))) {
t_wp_op = op;
return op->op_num;
}
- op++;
}
t_wp_op = NULL;
return OPERAND;
@@ -421,17 +473,12 @@
struct t_op const *op;
char *s, *t;
- op = ops;
if ((s = *(t_wp+1)) == 0)
return 1;
if ((t = *(t_wp+2)) == 0)
return 0;
- while (op->op_text) {
- if (strcmp(s, op->op_text) == 0)
- return op->op_type == BINOP &&
- (t[0] != ')' || t[1] != '\0');
- op++;
- }
+ if ((op = findop(s)) != NULL)
+ return op->op_type == BINOP && (t[0] != ')' || t[1] != '\0');
return 0;
}