Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/bin/sh Finish the fixes from Feb 4 for handling of random da...



details:   https://anonhg.NetBSD.org/src/rev/ef742ebf51b9
branches:  trunk
changeset: 449219:ef742ebf51b9
user:      kre <kre%NetBSD.org@localhost>
date:      Wed Feb 27 04:10:56 2019 +0000

description:
Finish the fixes from Feb 4 for handling of random data that
matches the internal CTL* chars.

The earlier fixes handled CTL* char values in var expansions,
but not in various other places they can occur (positional
parameters, $@ $* -- even potentially $0 and ~ expansions,
as well as byte strings generated from a \u in a $'' string).

These should all be correctly handled now.   There is a new
ISCTL() macro to make the test, rather than using the old
BASESYNTAX[c]==CCTL form (which us still a viable alternative)
as the new way allows compiler optimisations, and less mem
references, so it should be smaller and faster.

Also, be sure in all cases to remove any CTLESC (or other)
CTL* chars from all strings before they are made available
for any external use (there was one case missed - which didn't
matter when we weren't bothering to escape the CTL* chars at
all.)

XXX pullup-8 (will need to be via a patch) along with the Feb 4 fixes.

diffstat:

 bin/sh/expand.c |  39 +++++++++++++++++--------------
 bin/sh/parser.c |  70 ++++++++++++++++++++++++++++++++++++++++++++------------
 bin/sh/syntax.h |   7 ++++-
 3 files changed, 82 insertions(+), 34 deletions(-)

diffs (266 lines):

diff -r 47089b5437d0 -r ef742ebf51b9 bin/sh/expand.c
--- a/bin/sh/expand.c   Wed Feb 27 04:03:06 2019 +0000
+++ b/bin/sh/expand.c   Wed Feb 27 04:10:56 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.c,v 1.130 2019/02/04 09:56:26 kre Exp $ */
+/*     $NetBSD: expand.c,v 1.131 2019/02/27 04:10:56 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c   8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.130 2019/02/04 09:56:26 kre Exp $");
+__RCSID("$NetBSD: expand.c,v 1.131 2019/02/27 04:10:56 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -208,9 +208,13 @@
                        expandmeta(exparg.list, flag);
                else
                        add_args(exparg.list);
+#if 0
+       } else if (flag & EXP_REDIR) {
+               /* if EXP_REDIR ever happens, it happens here */
+               /* for now just (below) remove escapes, and leave it alone */
+#endif
        } else {
-               if (flag & EXP_REDIR) /*XXX - for now, just remove escapes */
-                       rmescapes(p);
+               rmescapes(p);   /* we might have escaped CTL bytes to remove */
                sp = stalloc(sizeof(*sp));
                sp->text = p;
                *exparg.lastp = sp;
@@ -286,7 +290,7 @@
                        ifs_split = EXP_IFS_SPLIT;
                        break;
                case CTLESC:
-                       if (quotes)
+                       if (quotes || ISCTL(*p))
                                STPUTC(c, expdest);
                        c = *p++;
                        STPUTC(c, expdest);
@@ -437,7 +441,7 @@
                CTRACE(DBG_EXPAND, (": returning unused \"%s\"\n", startp));
                return startp;
        } while ((c = *home++) != '\0') {
-               if (quotes && NEEDESC(c))
+               if ((quotes && NEEDESC(c)) || ISCTL(c))
                        STPUTC(CTLESC, expdest);
                STPUTC(c, expdest);
        }
@@ -659,7 +663,8 @@
                                        }
                                        CHECKSTRSPACE(2, dest);
                                }
-                               if (quotes && quoted && NEEDESC(lastc))
+                               if ((quotes && quoted && NEEDESC(lastc)) ||
+                                   ISCTL(lastc))
                                        USTPUTC(CTLESC, dest);
                                USTPUTC(lastc, dest);
                        }
@@ -960,14 +965,9 @@
                                 * so make sure that any data char that
                                 * might be mistaken for one of our CTLxxx
                                 * magic chars is protected ... always.
-                                *
-                                * In BASESYNTAX only our internal CTLxxx
-                                * chars are CCTL, in other syntaxes other
-                                * chars are added (we could also use
-                                * ARISYNTAX, but this is safer)
                                 */
                                for (; (c = *val) != '\0'; val++) {
-                                       if (BASESYNTAX[c] == CCTL)
+                                       if (ISCTL(c))
                                                STPUTC(CTLESC, expdest);
                                        STPUTC(c, expdest);
                                }
@@ -1170,8 +1170,11 @@
                                STPUTC(*p++, expdest); \
                        } \
                } else \
-                       while (*p) \
+                       while (*p) { \
+                               if (ISCTL(*p)) \
+                                       STPUTC(CTLESC, expdest); \
                                STPUTC(*p++, expdest); \
+                       } \
        } while (0)
 
 
@@ -1990,7 +1993,7 @@
        char *p, *q;
 
        p = str;
-       while (BASESYNTAX[(int)*p] != CCTL) {
+       while (!ISCTL(*p)) {
                if (*p++ == '\0')
                        return;
        }
@@ -2008,7 +2011,7 @@
                if (*p == CTLESC)
                        p++;
 #ifdef DEBUG
-               else if (BASESYNTAX[(int)*p] == CCTL)
+               else if (ISCTL(*p))
                        abort();
 #endif
                *q++ = *p++;
@@ -2033,7 +2036,7 @@
        int nls = 0, holdnl = 0, holdlast;
 
        p = str;
-       while (BASESYNTAX[(int)*p] != CCTL) {
+       while (!ISCTL(*p)) {
                if (*p++ == '\0')
                        return;
        }
@@ -2058,7 +2061,7 @@
                if (*p == CTLESC)
                        p++;
 #ifdef DEBUG
-               else if (BASESYNTAX[(int)*p] == CCTL)
+               else if (ISCTL(*p))
                        abort();
 #endif
 
diff -r 47089b5437d0 -r ef742ebf51b9 bin/sh/parser.c
--- a/bin/sh/parser.c   Wed Feb 27 04:03:06 2019 +0000
+++ b/bin/sh/parser.c   Wed Feb 27 04:10:56 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parser.c,v 1.166 2019/02/09 09:50:31 kre Exp $ */
+/*     $NetBSD: parser.c,v 1.167 2019/02/27 04:10:56 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c   8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.166 2019/02/09 09:50:31 kre Exp $");
+__RCSID("$NetBSD: parser.c,v 1.167 2019/02/27 04:10:56 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -1657,7 +1657,7 @@
        switch (c) {
        case '\0':
        case PEOF:
-               synerror("Unterminated quoted string");
+               synerror("Unterminated quoted string ($'...)");
        case '\n':
                plinno++;
                VTRACE(DBG_LEXER, ("@%d ", plinno));
@@ -1739,22 +1739,62 @@
                                synerror("Invalid \\u escape sequence");
 
                        /* XXX should we use iconv here. What locale? */
-                       CHECKSTRSPACE(4, out);
+                       CHECKSTRSPACE(12, out);
 
+/*
+ * Add a byte to output string, while checking if it needs to
+ * be escaped -- if its value happens to match the value of one
+ * of our internal CTL* chars - which would (at a minumum) be
+ * summarily removed later, if not escaped.
+ *
+ * The current definition of ISCTL() allows the compiler to
+ * optimise away either half, or all, of the test in most of
+ * the cases here (0xc0 | anything) cannot be between 08x0 and 0x9f
+ * for example, so there a test is not needed).
+ *
+ * Which tests can be removed depends upon the actual values
+ * selected for the CTL* chars.
+ */
+#define        ESC_USTPUTC(c, o) do {                          \
+               char _ch = (c);                         \
+                                                       \
+               if (ISCTL(_ch))                         \
+                       USTPUTC(CTLESC, o);             \
+               USTPUTC(_ch, o);                        \
+       } while (0)
+
+                       VTRACE(DBG_LEXER, ("CSTR(\\%c%8.8x)", n==4?'u':'U', v));
                        if (v <= 0x7ff) {
-                               USTPUTC(0xc0 | v >> 6, out);
-                               USTPUTC(0x80 | (v & 0x3f), out);
+                               ESC_USTPUTC(0xc0 | v >> 6, out);
+                               ESC_USTPUTC(0x80 | (v & 0x3f), out);
                                return out;
                        } else if (v <= 0xffff) {
-                               USTPUTC(0xe0 | v >> 12, out);
-                               USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
-                               USTPUTC(0x80 | (v & 0x3f), out);
+                               ESC_USTPUTC(0xe0 | v >> 12, out);
+                               ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | (v & 0x3f), out);
                                return out;
                        } else if (v <= 0x10ffff) {
-                               USTPUTC(0xf0 | v >> 18, out);
-                               USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
-                               USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
-                               USTPUTC(0x80 | (v & 0x3f), out);
+                               ESC_USTPUTC(0xf0 | v >> 18, out);
+                               ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | (v & 0x3f), out);
+                               return out;
+
+       /* these next two are not very likely, but we may as well be complete */
+                       } else if (v <= 0x3FFFFFF) {
+                               ESC_USTPUTC(0xf8 | v >> 24, out);
+                               ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | (v & 0x3f), out);
+                               return out;
+                       } else if (v <= 0x7FFFFFFF) {
+                               ESC_USTPUTC(0xfC | v >> 30, out);
+                               ESC_USTPUTC(0x80 | ((v >> 24) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
+                               ESC_USTPUTC(0x80 | (v & 0x3f), out);
                                return out;
                        }
                        if (v > 127)
@@ -1789,7 +1829,7 @@
                        if (c == '\\')
                                c = pgetc();
                        if (c == PEOF)
-                               synerror("Unterminated quoted string");
+                               synerror("Unterminated quoted string ($'...)");
                        if (c == '\n') {
                                plinno++;
                                if (doprompt)
@@ -2408,7 +2448,7 @@
                        continue;
                if (c == CTLESC)
                        p++;
-               else if (BASESYNTAX[(int)c] == CCTL)
+               else if (ISCTL(c))
                        return 0;
        }
        return 1;
diff -r 47089b5437d0 -r ef742ebf51b9 bin/sh/syntax.h
--- a/bin/sh/syntax.h   Wed Feb 27 04:03:06 2019 +0000
+++ b/bin/sh/syntax.h   Wed Feb 27 04:10:56 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: syntax.h,v 1.11 2018/12/03 06:40:26 kre Exp $  */
+/*     $NetBSD: syntax.h,v 1.12 2019/02/27 04:10:56 kre Exp $  */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -91,6 +91,11 @@
 #define        NEEDESC(c)      (SQSYNTAX[(int)(c)] == CCTL || \
                         SQSYNTAX[(int)(c)] == CSBACK)
 
+#define        ISCTL(c)        ((c) >= CTL_FIRST && (c) <= CTL_LAST)
+#if 0                          /* alternative form (generally slower) */
+#define        ICCTL(c)        (BASESYNTAX[(int)(c)] == CCTL)
+#endif
+
 extern const char basesyntax[];
 extern const char dqsyntax[];
 extern const char sqsyntax[];



Home | Main Index | Thread Index | Old Index