[src/trunk]: src/bin/sh PR bin/50993 - this is a significant rewrite of the w...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/bin/sh PR bin/50993 - this is a significant rewrite of the w...
From: christos <christos%NetBSD.org@localhost>
Date: Tue, 31 Jul 2018 20:04:21 +0000
details:   https://anonhg.NetBSD.org/src/rev/6a7c47a063c8
branches:  trunk
changeset: 344413:6a7c47a063c8
user:      christos <christos%NetBSD.org@localhost>
date:      Sun Mar 27 14:39:33 2016 +0000

description:
PR bin/50993 - this is a significant rewrite of the way that here
documents are processed.  Now, when first detected, they are
simply read (the only change made to the text is to join lines
ended with a \ to the subsequent line, otherwise end marker detection
does not work correctly (for here docs with an unquoted endmarker
only of course.)  This patch also moves the "internal subroutine"
for looking for the end marker out of readtoken1() (which had to
happen as readtoken1 is no longer reading the here doc when it is
needed) - that uses code mostly taken from FreeBSD's sh (thanks!)
and along the way results in some restrictions on what the end
marker can be being removed.   We still do not allow all we should.
(from kre@)

diffstat:

 bin/sh/expand.c |   10 +-
 bin/sh/parser.c |  246 +++++++++++++++++++++++++++++++++++++++----------------
 bin/sh/parser.h |    3 +-
 3 files changed, 182 insertions(+), 77 deletions(-)

diffs (truncated from 489 to 300 lines):

diff -r 7dfab634b4e2 -r 6a7c47a063c8 bin/sh/expand.c
--- a/bin/sh/expand.c   Sun Mar 27 14:36:29 2016 +0000
+++ b/bin/sh/expand.c   Sun Mar 27 14:39:33 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.c,v 1.98 2016/03/27 14:34:46 christos Exp $     */
+/*     $NetBSD: expand.c,v 1.99 2016/03/27 14:39:33 christos Exp $     */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c   8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.98 2016/03/27 14:34:46 christos Exp $");
+__RCSID("$NetBSD: expand.c,v 1.99 2016/03/27 14:39:33 christos Exp $");
 #endif
 #endif /* not lint */
 
@@ -121,6 +121,12 @@
 void
 expandhere(union node *arg, int fd)
 {
+       /*
+        * First, parse the content of the here doc (to internal form)
+        * It was initially saved as (almost) unmodified text.
+        */
+       parse_heredoc(arg);
+
        herefd = fd;
        expandarg(arg, NULL, 0);
        xwrite(fd, stackblock(), expdest - stackblock());
diff -r 7dfab634b4e2 -r 6a7c47a063c8 bin/sh/parser.c
--- a/bin/sh/parser.c   Sun Mar 27 14:36:29 2016 +0000
+++ b/bin/sh/parser.c   Sun Mar 27 14:39:33 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parser.c,v 1.110 2016/03/27 14:36:29 christos Exp $    */
+/*     $NetBSD: parser.c,v 1.111 2016/03/27 14:39:33 christos Exp $    */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c   8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.110 2016/03/27 14:36:29 christos Exp $");
+__RCSID("$NetBSD: parser.c,v 1.111 2016/03/27 14:39:33 christos Exp $");
 #endif
 #endif /* not lint */
 
@@ -69,8 +69,6 @@
  * Shell command parser.
  */
 
-#define EOFMARKLEN 79
-
 /* values returned by readtoken */
 #include "token.h"
 
@@ -111,11 +109,12 @@
 STATIC union node *simplecmd(union node **, union node *);
 STATIC union node *makename(void);
 STATIC void parsefname(void);
-STATIC void parseheredoc(void);
+STATIC void slurp_heredoc(char *const, int, int);
+STATIC void readheredocs(void);
 STATIC int peektoken(void);
 STATIC int readtoken(void);
 STATIC int xxreadtoken(void);
-STATIC int readtoken1(int, char const *, char *, int);
+STATIC int readtoken1(int, char const *, int);
 STATIC int noexpand(char *);
 STATIC void synexpect(int, const char *) __dead;
 STATIC void synerror(const char *) __dead;
@@ -196,7 +195,7 @@
                        /* FALLTHROUGH */
                case TNL:
                        if (tok == TNL) {
-                               parseheredoc();
+                               readheredocs();
                                if (nlflag)
                                        return n1;
                        } else {
@@ -208,7 +207,7 @@
                        break;
                case TEOF:
                        if (heredoclist)
-                               parseheredoc();
+                               readheredocs();
                        else
                                pungetc();      /* push back EOF on input */
                        return n1;
@@ -671,7 +670,6 @@
        if (n->type == NHERE) {
                struct heredoc *here = heredoc;
                struct heredoc *p;
-               int i;
 
                if (quoteflag == 0)
                        n->type = NXHERE;
@@ -680,8 +678,21 @@
                        while (*wordtext == '\t')
                                wordtext++;
                }
-               if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
+
+               /*
+                * this test is not really necessary, we are not
+                * required to expand wordtext, but there's no reason
+                * it cannot be $$ or something like that - that would
+                * not mean the pid, but literally two '$' characters.
+                * There is no need for limits on what the word can be.
+                * However, it needs to stay literal as entered, not
+                * have $ converted to CTLVAR or something, which as
+                * the parser is, at the minute, is impossible to prevent.
+                * So, leave it like this until the rest of the parser is fixed.
+                */
+               if (! noexpand(wordtext))
                        synerror("Illegal eof marker for << redirection");
+
                rmescapes(wordtext);
                here->eofmark = wordtext;
                here->next = NULL;
@@ -699,32 +710,142 @@
        }
 }
 
+/*
+ * Check to see whether we are at the end of the here document.  When this
+ * is called, c is set to the first character of the next input line.  If
+ * we are at the end of the here document, this routine sets the c to PEOF.
+ * The new value of c is returned.
+ */
+
+static int
+checkend(int c, char * const eofmark, const int striptabs)
+{
+       if (striptabs) {
+               while (c == '\t')
+                       c = pgetc();
+       }
+       if (c == PEOF) {
+               if (*eofmark == '\0')
+                       return (c);
+               synerror(EOFhere);
+       }
+       if (c == *eofmark) {
+               int c2;
+               char *q;
+
+               for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
+                       ;
+               if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
+                       c = PEOF;
+                       if (c2 == '\n') {
+                               plinno++;
+                               needprompt = doprompt;
+                       }
+               } else {
+                       pungetc();
+                       pushstring(eofmark + 1, q - (eofmark + 1), NULL);
+               }
+       } else if (c == '\n' && *eofmark == '\0') {
+               c = PEOF;
+               plinno++;
+               needprompt = doprompt;
+       }
+       return (c);
+}
+
 
 /*
  * Input any here documents.
  */
 
 STATIC void
-parseheredoc(void)
+slurp_heredoc(char *const eofmark, int striptabs, int sq)
+{
+       int c;
+       char *out;
+
+       c = pgetc();
+
+       /*
+        * If we hit EOF on the input, and the eofmark is a null string ('')
+        * we consider this empty line to be the eofmark, and exit without err.
+        */
+       if (c == PEOF && *eofmark != '\0')
+               synerror(EOFhere);
+
+       STARTSTACKSTR(out);
+
+       while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
+               do {
+                       if (sq) {
+                               /*
+                                * in single quoted mode (eofmark quoted)
+                                * all we look for is \n so we can check
+                                * for the epfmark - everything saved literally.
+                                */
+                               STPUTC(c, out);
+                               if (c == '\n')
+                                       break;
+                               continue;
+                       }
+                       /*
+                        * In double quoted (non-quoted eofmark)
+                        * we must handle \ followed by \n here
+                        * otherwise we can mismatch the end mark.
+                        * All other uses of \ will be handled later
+                        * when the here doc is expanded.
+                        *
+                        * This also makes sure \\ followed by \n does
+                        * not suppress the newline (the \ quotes itself)
+                        */
+                       if (c == '\\') {                /* A backslash */
+                               c = pgetc();            /* followed by */
+                               if (c == '\n')          /* a newline?  */
+                                       continue;       /* y:drop both */
+                               STPUTC('\\', out);      /* else keep \ */
+                       }
+                       STPUTC(c, out);                 /* keep the char */
+                       if (c == '\n')                  /* at end of line */
+                               break;                  /* look for eofmark */
+
+               } while ((c = pgetc()) != PEOF);
+
+               /*
+                * If we have read a line, and reached EOF, without
+                * finding the eofmark, whether the EOF comes before
+                * or immediately after the \n, that is an error.
+                */
+               if (c == PEOF || (c = pgetc()) == PEOF)
+                       synerror(EOFhere);
+       }
+       STPUTC('\0', out);
+
+       c = out - stackblock();
+       out = stackblock();
+       grabstackblock(c);
+       wordtext = out;
+
+       TRACE(("Slurped a heredoc (to '%s')%s: len %d, \"%.16s\"...\n",
+               eofmark, striptabs ? " tab stripped" : "", c, wordtext));
+}
+
+STATIC void
+readheredocs(void)
 {
        struct heredoc *here;
        union node *n;
 
        while (heredoclist) {
-               int c;
-
                here = heredoclist;
                heredoclist = here->next;
                if (needprompt) {
                        setprompt(2);
                        needprompt = 0;
                }
-               if ((c = pgetc()) == PEOF) {
-                       synerror(EOFhere);
-                       /* NOTREACHED */
-               }
-               readtoken1(c, here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
-                   here->eofmark, here->striptabs);
+
+               slurp_heredoc(here->eofmark, here->striptabs,
+                   here->here->nhere.type == NHERE);
+
                n = stalloc(sizeof(struct narg));
                n->narg.type = NARG;
                n->narg.next = NULL;
@@ -734,6 +855,25 @@
        }
 }
 
+void
+parse_heredoc(union node *n)
+{
+       if (n->narg.type != NARG)
+               abort();
+
+       if (n->narg.text[0] == '\0')            /* nothing to do */
+               return;
+
+       setinputstring(n->narg.text, 1);
+
+       readtoken1(pgetc(), DQSYNTAX, 1);
+
+       n->narg.text = wordtext;
+       n->narg.backquote = backquotelist;
+
+       popfile();
+}
+
 STATIC int
 peektoken(void)
 {
Prev by Date: [src/trunk]: src/bin/sh General KNF and source code cleanups, avoid scatterin...
Next by Date: [src/trunk]: src/bin/sh Move the parseredir internal subroutine out of readto...
Previous by Thread: [src/trunk]: src/bin/sh General KNF and source code cleanups, avoid scatterin...
Next by Thread: [src/trunk]: src/bin/sh Move the parseredir internal subroutine out of readto...
Indexes:
Home | Main Index | Thread Index | Old Index