Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/bin/sh lexical analysis fixes. This fixes the tests just c...



details:   https://anonhg.NetBSD.org/src/rev/f0ff8f70b819
branches:  trunk
changeset: 448128:f0ff8f70b819
user:      kre <kre%NetBSD.org@localhost>
date:      Tue Jan 22 14:32:17 2019 +0000

description:
lexical analysis fixes.   This fixes the tests just committed in
src/tests/bin/sh/t_here.sh

The "magicq" magic was all wrong - it cannot be simply a parameter
to readtoken1() as its value needs to alter during that routine
(eg: when magicq is set - processing here doc text, or whatever)
and we encountered ${var%pattern} "magicq" needs to be off for
"pattern" - and it wasn't.

To handle this magicq needs to be included in the token stack struct,
and simply init'd from the arg to readtoken1 (which we rename).
Then it can be manipulated as required.

Once we no longer have that problem, some other issues can be cleaned
up as well (some of this unbelievably fragile code was attempting to
cope with this in various ad-hoc - and mostly broken - ways).

Also, remove the magicq parameter from parsebackq() - it was not
used (at all) and should never be, a command substitution, wherever
it appears, always starts a new parsing context.  How that applies
to old style command substitutions is less clear, but until we see
some real examples where we're not doing the right thing (slightly
less likely now than before ... nothing has changed here in the
way command substitutions are parsed, but quoting in general is
slightly better) I don't plan on worrying about it.

There are a couple of other minor cleanups, which make no actual
difference (like adding () around the use of the parameter in the
RETURN macro ... which is generally better, but makes no difference
here as the param is always a simple constant.

All the current ATF tests pass.

diffstat:

 bin/sh/parser.c |  30 ++++++++++++++++++------------
 1 files changed, 18 insertions(+), 12 deletions(-)

diffs (144 lines):

diff -r ba24c9e30992 -r f0ff8f70b819 bin/sh/parser.c
--- a/bin/sh/parser.c   Tue Jan 22 14:31:53 2019 +0000
+++ b/bin/sh/parser.c   Tue Jan 22 14:32:17 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parser.c,v 1.163 2019/01/22 13:48:28 kre Exp $ */
+/*     $NetBSD: parser.c,v 1.164 2019/01/22 14:32:17 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c   8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.163 2019/01/22 13:48:28 kre Exp $");
+__RCSID("$NetBSD: parser.c,v 1.164 2019/01/22 14:32:17 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -1066,7 +1066,7 @@
  *  have parseword (readtoken1?) handle both words and redirection.]
  */
 
-#define RETURN(token)  return lasttoken = token
+#define RETURN(token)  return lasttoken = (token)
 
 STATIC int
 xxreadtoken(void)
@@ -1240,6 +1240,7 @@
        unsigned short ts_varnest;      /* 64000 levels should be enough! */
        unsigned short ts_arinest;
        unsigned short ts_quoted;       /* 1 -> single, 2 -> double */
+       unsigned short ts_magicq;       /* heredoc or word expand */
 };
 
 #define        NQ      0x00    /* Unquoted */
@@ -1300,6 +1301,7 @@
        ts->ts_varnest = os->ts_varnest;
        ts->ts_arinest = os->ts_arinest;        /* when appropriate        */
        ts->ts_syntax  = os->ts_syntax;         /*    they will be altered */
+       ts->ts_magicq  = os->ts_magicq;
 
        return stack;
 }
@@ -1358,6 +1360,7 @@
 #define        varnest         (currentstate(stack)->ts_varnest)
 #define        arinest         (currentstate(stack)->ts_arinest)
 #define        quoted          (currentstate(stack)->ts_quoted)
+#define        magicq          (currentstate(stack)->ts_magicq)
 #define        TS_PUSH()       (stack = bump_state_level(stack))
 #define        TS_POP()        (stack = drop_state_level(stack))
 
@@ -1371,7 +1374,7 @@
  */
 static char *
 parsebackq(VSS *const stack, char * const in,
-    struct nodelist **const pbqlist, const int oldstyle, const int magicq)
+    struct nodelist **const pbqlist, const int oldstyle)
 {
        struct nodelist **nlpp;
        const int savepbq = parsebackquote;
@@ -1593,11 +1596,11 @@
                VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
                switch (c) {
                case '<':
-                       if (sizeof (struct nfile) != sizeof (struct nhere)) {
+                       /* if sizes differ, just discard the old one */
+                       if (sizeof (struct nfile) != sizeof (struct nhere))
                                np = stalloc(sizeof(struct nhere));
-                               np->nfile.fd = 0;
-                       }
                        np->type = NHERE;
+                       np->nhere.fd = 0;
                        heredoc = stalloc(sizeof(struct HereDoc));
                        heredoc->here = np;
                        heredoc->startline = plinno;
@@ -1817,7 +1820,7 @@
  * That will also need fixing, someday...
  */
 STATIC int
-readtoken1(int firstc, char const *syn, int magicq)
+readtoken1(int firstc, char const *syn, int oneword)
 {
        int c;
        char * out;
@@ -1850,6 +1853,7 @@
        arinest = 0;
        parenlevel = 0;
        elided_nl = 0;
+       magicq = oneword;
 
        CTRACE(DBG_LEXER, ("readtoken1(%c) syntax=%s %s%s(quoted=%x)\n",
            firstc&0xFF, SYNTAX, magicq ? "magic quotes" : "",
@@ -1924,7 +1928,7 @@
                        }
                        CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
                        quotef = 1;     /* current token is quoted */
-                       if (ISDBLQUOTE() && c != '\\' && c != '`' &&
+                       if (quoted && c != '\\' && c != '`' &&
                            c != '$' && (c != '"' || magicq)) {
                                /*
                                 * retain the \ (which we *know* needs CTLESC)
@@ -1976,7 +1980,7 @@
                                USTPUTC(CTLQUOTEEND, out);
                        continue;
                case CDQUOTE:
-                       if (magicq && arinest == 0 && varnest == 0) {
+                       if (magicq && arinest == 0 /* && varnest == 0 */) {
                                VTRACE(DBG_LEXER, ("<<\">>"));
                                /* Ignore inside here document */
                                USTPUTC(c, out);
@@ -2071,7 +2075,7 @@
                        continue;
                case CBQUOTE:   /* '`' */
                        VTRACE(DBG_LEXER, ("'`' -> parsebackq()\n"));
-                       out = parsebackq(stack, out, &bqlist, 1, magicq);
+                       out = parsebackq(stack, out, &bqlist, 1);
                        VTRACE(DBG_LEXER, ("parsebackq() -> readtoken1: "));
                        continue;
                case CEOF:              /* --> c == PEOF */
@@ -2156,7 +2160,7 @@
                        VTRACE(DBG_LEXER, ("\"$(\" CSUB->parsebackq()\n"));
                        out = insert_elided_nl(out);
                        pungetc();
-                       out = parsebackq(stack, out, &bqlist, 0, magicq);
+                       out = parsebackq(stack, out, &bqlist, 0);
                        VTRACE(DBG_LEXER, ("parseback()->readtoken1(): "));
                }
        } else if (c == OPENBRACE || is_name(c) || is_special(c)) {
@@ -2301,6 +2305,7 @@
                        if (subtype > VSASSIGN) {       /* # ## % %% */
                                syntax = BASESYNTAX;
                                quoted = 0;
+                               magicq = 0;
                        }
                        VTRACE(DBG_LEXER, (" TS_PUSH->%s vn=%d%s ",
                            SYNTAX, varnest, quoted ? " Q" : ""));
@@ -2359,6 +2364,7 @@
                syntax = ARISYNTAX;
                arinest = 1;
                varnest = 0;
+               magicq = 1;
        }
        goto parsearith_return;
 }



Home | Main Index | Thread Index | Old Index