Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/bin/sh Finish the fix for PR/48631 - that is, make the parse...



details:   https://anonhg.NetBSD.org/src/rev/1b9efaac36c8
branches:  trunk
changeset: 813854:1b9efaac36c8
user:      christos <christos%NetBSD.org@localhost>
date:      Mon Feb 22 19:38:10 2016 +0000

description:
Finish the fix for PR/48631 - that is, make the parser correctly
handle the token syntax it really should be handling (including
some that posix does not require, but is right anyway.) This is
quite similar to, and to some extent inspired by the way the FreeBSD
sh parser.c works, but the actual implementation is quite different.
(from kre)

diffstat:

 bin/sh/parser.c |  298 ++++++++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 224 insertions(+), 74 deletions(-)

diffs (truncated from 472 to 300 lines):

diff -r ff93df6e3781 -r 1b9efaac36c8 bin/sh/parser.c
--- a/bin/sh/parser.c   Mon Feb 22 19:25:26 2016 +0000
+++ b/bin/sh/parser.c   Mon Feb 22 19:38:10 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parser.c,v 1.95 2016/02/22 19:25:26 christos Exp $     */
+/*     $NetBSD: parser.c,v 1.96 2016/02/22 19:38:10 christos Exp $     */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c   8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.95 2016/02/22 19:25:26 christos Exp $");
+__RCSID("$NetBSD: parser.c,v 1.96 2016/02/22 19:38:10 christos Exp $");
 #endif
 #endif /* not lint */
 
@@ -902,6 +902,138 @@
  * will run code that appears at the end of readtoken1.
  */
 
+/*
+ * We used to remember only the current syntax, variable nesting level,
+ * double quote state for each var nexting level, and arith nesting
+ * level (unrelated to var nesting) and one prev syntax when in arith
+ * syntax.  This worked for simple cases, but can't handle arith inside
+ * var expansion inside arith inside var with some quoted and some not.
+ *
+ * Inspired by FreeBSD's implementation (though it was the obvious way)
+ * though implemented differently, we now have a stack that keeps track
+ * of what we are doing now, and what we were doing previously.
+ * Every time something changes, which will eventually end and should
+ * revert to the previous state, we push this stack, and then pop it
+ * again later (that is every ${} with an operator (to parse the word
+ * or pattern that follows) ${x} and $x are too * simple to need it)
+ * $(( )) $( ) and "...".   Always.   Really, always!
+ *
+ * The stack is implemented as one static (on the C stack) base block
+ * containing LEVELS_PER_BLOCK (8) stack entries, which should be
+ * enough for the vast majority of cases.  For torture tests, we
+ * malloc more blocks as needed.  All accesses through the inline
+ * functions below.
+ */
+
+/*
+ * varnest & arinest will typically be 0 or 1
+ * (varnest can increment in usages like ${x=${y}} but probably
+ *  does not really need to)
+ * parenlevel allows balancing parens inside a $(( )), it is reset
+ * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
+ * quoted is special - we need to know 2 things ... are we inside "..."
+ * (even if inherited from some previous nesting level) and was there
+ * an opening '"' at this level (so the next will be closing).
+ * "..." can span nexting levels, but cannot be opened in one and
+ * closed in a different one.
+ * To handle this, "quoted" has two fields, the bottom 4 (really 2)
+ * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
+ * is really so special that this setting is not very important)
+ * and 0x10 that indicates that an opening quote has been seen.
+ * The bottom 4 bits are inherited, the 0x10 bit is not.
+ */
+struct tokenstate {
+       const char *ts_syntax;
+       unsigned short ts_parenlevel;   /* counters */
+       unsigned short ts_varnest;      /* 64000 levels should be enough! */
+       unsigned short ts_arinest;
+       unsigned short ts_quoted;       /* 1 -> single, 2 -> double */
+};
+
+#define        NQ      0x00
+#define        SQ      0x01
+#define        DQ      0x02
+#define        QF      0x0F
+#define        QS      0x10
+
+#define        LEVELS_PER_BLOCK        8
+#define        VSS                     volatile struct statestack
+
+struct statestack {
+       VSS *prev;              /* previous block in list */
+       int cur;                /* which of our tokenstates is current */
+       struct tokenstate tokenstate[LEVELS_PER_BLOCK];
+};
+
+static inline volatile struct tokenstate *
+currentstate(VSS *stack)
+{
+       return &stack->tokenstate[stack->cur];
+}
+
+static inline volatile struct tokenstate *
+prevstate(VSS *stack)
+{
+       if (stack->cur != 0)
+               return &stack->tokenstate[stack->cur - 1];
+       if (stack->prev == NULL)        /* cannot drop below base */
+               return &stack->tokenstate[0];
+       return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
+}
+
+static inline VSS *
+bump_state_level(VSS *stack)
+{
+       volatile struct tokenstate *os, *ts;
+
+       os = currentstate(stack);
+
+       if (++stack->cur >= LEVELS_PER_BLOCK) {
+               VSS *ss;
+
+               ss = (VSS *)ckmalloc(sizeof (struct statestack));
+               ss->cur = 0;
+               ss->prev = stack;
+               stack = ss;
+       }
+
+       ts = currentstate(stack);
+
+       ts->ts_parenlevel = 0;  /* parens inside never match outside */
+
+       ts->ts_quoted  = os->ts_quoted & QF;    /* these are default settings */
+       ts->ts_varnest = os->ts_varnest;
+       ts->ts_arinest = os->ts_arinest;        /* when appropriate        */
+       ts->ts_syntax  = os->ts_syntax;         /*    they will be altered */
+
+       return stack;
+}
+
+static inline VSS *
+drop_state_level(VSS *stack)
+{
+       if (stack->cur == 0) {
+               VSS *ss;
+
+               ss = stack;
+               stack = ss->prev;
+               if (stack == NULL)
+                       return ss;
+               ckfree(__UNVOLATILE(ss));
+       }
+       --stack->cur;
+       return stack;
+}
+
+static inline void
+cleanup_state_stack(VSS *stack)
+{
+       while (stack->prev != NULL) {
+               stack->cur = 0;
+               stack = drop_state_level(stack);
+       }
+}
+
 #define        CHECKEND()      {goto checkend; checkend_return:;}
 #define        PARSEREDIR()    {goto parseredir; parseredir_return:;}
 #define        PARSESUB()      {goto parsesub; parsesub_return:;}
@@ -910,60 +1042,51 @@
 #define        PARSEARITH()    {goto parsearith; parsearith_return:;}
 
 /*
- * Keep track of nested doublequotes in dblquote and doublequotep.
- * We use dblquote for the first 32 levels, and we expand to a malloc'ed
- * region for levels above that. Usually we never need to malloc.
- * This code assumes that an int is 32 bits. We don't use uint32_t,
- * because the rest of the code does not.
+ * The following macros all assume the existance of a local var "stack"
+ * which contains a pointer to the current struct stackstate
  */
-#define ISDBLQUOTE() ((varnest < 32) ? (dblquote & (1 << varnest)) : \
-    (dblquotep[(varnest / 32) - 1] & (1 << (varnest % 32))))
-
-#define SETDBLQUOTE() \
-    if (varnest < 32) \
-       dblquote |= (1 << varnest); \
-    else \
-       dblquotep[(varnest / 32) - 1] |= (1 << (varnest % 32))
 
-#define CLRDBLQUOTE() \
-    if (varnest < 32) \
-       dblquote &= ~(1 << varnest); \
-    else \
-       dblquotep[(varnest / 32) - 1] &= ~(1 << (varnest % 32))
+/*
+ * These are macros rather than inline funcs to avoid code churn as much
+ * as possible - they replace macros of the same name used previously.
+ */
+#define        ISDBLQUOTE()    (currentstate(stack)->ts_quoted & QS)
+#define        SETDBLQUOTE()   (currentstate(stack)->ts_quoted = QS | DQ)
+#define        CLRDBLQUOTE()   (currentstate(stack)->ts_quoted =               \
+                           stack->cur != 0 || stack->prev ?            \
+                               prevstate(stack)->ts_quoted & QF : 0)
 
-#define INCREASENEST() \
-       do { \
-               if (varnest++ >= maxnest) { \
-                       dblquotep = ckrealloc(dblquotep, maxnest / 8); \
-                       dblquotep[(maxnest / 32) - 1] = 0; \
-                       maxnest += 32; \
-               } \
-       } while (/*CONSTCOND*/0)
+/*
+ * This set are just to avoid excess typing and line lengths...
+ * The ones that "look like" var names must be implemented to be lvalues
+ */
+#define        syntax          (currentstate(stack)->ts_syntax)
+#define        parenlevel      (currentstate(stack)->ts_parenlevel)
+#define        varnest         (currentstate(stack)->ts_varnest)
+#define        arinest         (currentstate(stack)->ts_arinest)
+#define        quoted          (currentstate(stack)->ts_quoted)
+#define        TS_PUSH()       (stack = bump_state_level(stack))
+#define        TS_POP()        (stack = drop_state_level(stack))
 
 STATIC int
 readtoken1(int firstc, char const *syn, char *eofmark, int striptabs)
 {
-       char const * volatile syntax = syn;
        int c = firstc;
        char * volatile out;
        int len;
        char line[EOFMARKLEN + 1];
        struct nodelist *bqlist;
        volatile int quotef;
-       int * volatile dblquotep = NULL;
-       volatile size_t maxnest = 32;
-       volatile int dblquote;
-       volatile size_t varnest;        /* levels of variables expansion */
-       volatile int arinest;   /* levels of arithmetic expansion */
-       volatile int parenlevel;        /* levels of parens in arithmetic */
        volatile int oldstyle;
-       char const * volatile prevsyntax;       /* syntax before arithmetic */
-#ifdef __GNUC__
-       prevsyntax = NULL;      /* XXX gcc4 */
-#endif
+       VSS static_stack;
+       VSS *stack = &static_stack;
+
+       stack->prev = NULL;
+       stack->cur = 0;
+
+       syntax = syn;
 
        startlinno = plinno;
-       dblquote = 0;
        varnest = 0;
        if (syntax == DQSYNTAX) {
                SETDBLQUOTE();
@@ -972,6 +1095,7 @@
        bqlist = NULL;
        arinest = 0;
        parenlevel = 0;
+       quoted = 0;
 
        STARTSTACKSTR(out);
        loop: { /* for each line, until end of word */
@@ -1044,7 +1168,9 @@
                                        if (eofmark == NULL)
                                                USTPUTC(CTLQUOTEMARK, out);
                                        quotef = 1;
+                                       TS_PUSH();
                                        syntax = SQSYNTAX;
+                                       quoted = SQ;
                                        break;
                                }
                                if (eofmark != NULL && arinest == 0 &&
@@ -1054,13 +1180,9 @@
                                        break;
                                }
                                /* End of single quotes... */
-                               if (arinest)
-                                       syntax = ARISYNTAX;
-                               else {
-                                       syntax = BASESYNTAX;
-                                       if (varnest != 0)
-                                               USTPUTC(CTLQUOTEEND, out);
-                               }
+                               TS_POP();
+                               if (syntax == BASESYNTAX && varnest != 0)
+                                       USTPUTC(CTLQUOTEEND, out);
                                break;
                        case CDQUOTE:
                                if (eofmark != NULL && arinest == 0 &&
@@ -1072,9 +1194,9 @@
                                quotef = 1;
                                if (arinest) {
                                        if (ISDBLQUOTE()) {
-                                               syntax = ARISYNTAX;
-                                               CLRDBLQUOTE();
+                                               TS_POP();
                                        } else {
+                                               TS_PUSH();
                                                syntax = DQSYNTAX;
                                                SETDBLQUOTE();
                                                USTPUTC(CTLQUOTEMARK, out);
@@ -1084,11 +1206,11 @@
                                if (eofmark != NULL)
                                        break;
                                if (ISDBLQUOTE()) {
+                                       TS_POP();
                                        if (varnest != 0)



Home | Main Index | Thread Index | Old Index