Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/bin/sh Rationalise (slightly) the way that expansions are pr...



details:   https://anonhg.NetBSD.org/src/rev/8a39f12a1098
branches:  trunk
changeset: 837075:8a39f12a1098
user:      kre <kre%NetBSD.org@localhost>
date:      Sun Nov 18 17:23:37 2018 +0000

description:
Rationalise (slightly) the way that expansions are processed
to hide meta-characters in the result when the expansion was
in (double) quotes, and so should not be further processed.

Most of this has been OK for a long while, but \ needs hiding
as well, which complicates things, as \ cannot simply be hidden
in the syntax tables as one of the group of random special characters.

This was fixed earlier for simple variable expansions, but
every variety has its own code path ($var uses different code
than $n which is different than $(...), which is different
again from ~ expansions, and also from what $'...' produces).

This could be fixed by moving them all to a common code path,
but that's harder than it seems.  The form in which the data
is made available differs, so one common routine would need
a whole bunch of different "get the next char or indicate end"
methods - probably via passing in an accessor function.
That's all a lot of churn, and would probably slow the shell.

Instead, just make macros for doing the standard tests, and
use those instead of open coding (differently) each time.
This way some of the code paths don't end up forgetting to
handle '\' (which is different than all the others).

This removes one optimisation ... when no escaping is needed
(like just $var (unquoted) where magic chars (think '*') in
the value are intended to remain magic), the code avoided doing
two tests for each char ("do we need escapes" and "is this char
one that needs escaping") by choosing two different syntax
tables (choice made outside the loop) - one of which never
returns the magic "needs escaping" result, and the other does
when appropriate, and then just avoiding the "do we need escapes"
test for each character processed.   Then when '\' was fixed,
there needed to be another test for it, as it cannot (for other
reasons) be the same as all the others for which "this char
need escaping" is true.   So that added a 2nd test for each char...
Not all the code paths were updated.   Hence the bugs...

nb: this is all rarely seen in the wild, so it is no big
surprised that no-one ever noticed.

Now the "use two different syntax tables" is gone (the two
returned the same for '\' which is why '\' needed special
processing) - and in order to avoid two tests for each
char (plus the \ test) we duplicate the loops, one of which
tests each char to see if it needs an escape, the 2nd just
copies them.   This should be faster in the "no escapes"
code path (though that is not the point) and perhaps also
in the "escapes needed" path (no indirect reference to
the syntax table - though that would probably be in a
register) but makes the code slightly bigger.  For /bin/sh
the text segment (on amd64) has grown by 48 bytes.  But
it still uses the same number of 512 byte pages (and hence
also any bigger page size).  The resulting file size
(/bin/sh) is identical before and after.  So is /rescue/sh
(or /rescue/anything-else).

diffstat:

 bin/sh/expand.c |  44 ++++++++++++++++++++------------------------
 bin/sh/expand.h |   3 ++-
 bin/sh/parser.c |  15 +++++++++++----
 bin/sh/syntax.h |  15 ++++++++++++---
 4 files changed, 45 insertions(+), 32 deletions(-)

diffs (242 lines):

diff -r f33b62f6f49d -r 8a39f12a1098 bin/sh/expand.c
--- a/bin/sh/expand.c   Sun Nov 18 17:15:41 2018 +0000
+++ b/bin/sh/expand.c   Sun Nov 18 17:23:37 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.c,v 1.127 2018/07/22 23:07:48 kre Exp $ */
+/*     $NetBSD: expand.c,v 1.128 2018/11/18 17:23:37 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c   8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.127 2018/07/22 23:07:48 kre Exp $");
+__RCSID("$NetBSD: expand.c,v 1.128 2018/11/18 17:23:37 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -227,7 +227,7 @@
 argstr(const char *p, int flag)
 {
        char c;
-       int quotes = flag & (EXP_GLOB | EXP_CASE | EXP_REDIR);  /* do CTLESC */
+       const int quotes = flag & EXP_QNEEDED;          /* do CTLESC */
        int firsteq = 1;
        const char *ifs = NULL;
        int ifs_split = EXP_IFS_SPLIT;
@@ -353,7 +353,7 @@
        const char *startp = p;
        struct passwd *pw;
        const char *home;
-       int quotes = flag & (EXP_GLOB | EXP_CASE);
+       const int quotes = flag & EXP_QNEEDED;
        char *user;
        struct stackmark smark;
 #ifdef DEBUG
@@ -423,7 +423,7 @@
                CTRACE(DBG_EXPAND, (": returning unused \"%s\"\n", startp));
                return startp;
        } while ((c = *home++) != '\0') {
-               if (quotes && SQSYNTAX[(int)c] == CCTL)
+               if (quotes && NEEDESC(c))
                        STPUTC(CTLESC, expdest);
                STPUTC(c, expdest);
        }
@@ -570,9 +570,8 @@
        struct nodelist *saveargbackq;
        char lastc;
        int startloc = dest - stackblock();
-       char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
        int saveherefd;
-       int quotes = flag & (EXP_GLOB | EXP_CASE);
+       const int quotes = flag & EXP_QNEEDED;
        int nnl;
        struct stackmark smark;
 
@@ -646,7 +645,7 @@
                                        }
                                        CHECKSTRSPACE(2, dest);
                                }
-                               if (quotes && syntax[(int)lastc] == CCTL)
+                               if (quotes && quoted && NEEDESC(lastc))
                                        USTPUTC(CTLESC, dest);
                                USTPUTC(lastc, dest);
                        }
@@ -847,7 +846,7 @@
        int startloc;
        int varlen;
        int apply_ifs;
-       int quotes = flag & (EXP_GLOB | EXP_CASE | EXP_REDIR);
+       const int quotes = flag & EXP_QNEEDED;
 
        varflags = (unsigned char)*p++;
        subtype = varflags & VSTYPE;
@@ -907,7 +906,7 @@
                                 */
                                while (--special > 0) {
 /*                                             not needed, it is a number...
-                                       if (quotes && syntax[(int)*var] == CCTL)
+                                       if (quotes && NEEDESC(*var))
                                                STPUTC(CTLESC, expdest);
 */
                                        STPUTC(*var++, expdest);
@@ -919,20 +918,19 @@
                                STADJUST(-varlen, expdest);
                        }
                } else {
-                       char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
-                                                                 : BASESYNTAX;
 
                        if (subtype == VSLENGTH) {
                                for (;*val; val++)
                                        varlen++;
+                       } else if (quotes && varflags & VSQUOTE) {
+                               for (; (c = *val) != '\0'; val++) {
+                                       if (NEEDESC(c))
+                                               STPUTC(CTLESC, expdest);
+                                       STPUTC(c, expdest);
+                               }
                        } else {
-                               while (*val) {
-                                       if (quotes && (varflags & VSQUOTE) &&
-                                           (syntax[(int)*val] == CCTL ||
-                                            syntax[(int)*val] == CBACK))
-                                               STPUTC(CTLESC, expdest);
+                               while (*val)
                                        STPUTC(*val++, expdest);
-                               }
                        }
                }
        }
@@ -1113,17 +1111,15 @@
        int i;
        int sep;
        char **ap;
-       char const *syntax;
 
        if (subtype == VSLENGTH)        /* no magic required ... */
                flag &= ~EXP_FULL;
 
 #define STRTODEST(p) \
        do {\
-               if (flag & (EXP_GLOB | EXP_CASE) && subtype != VSLENGTH) { \
-                       syntax = quoted? DQSYNTAX : BASESYNTAX; \
+               if ((flag & EXP_QNEEDED) && quoted) { \
                        while (*p) { \
-                               if (syntax[(int)*p] == CCTL) \
+                               if (NEEDESC(*p)) \
                                        STPUTC(CTLESC, expdest); \
                                STPUTC(*p++, expdest); \
                        } \
@@ -1170,8 +1166,8 @@
                        if (!*ap)
                                break;
                        if (sep) {
-                               if (quoted && (flag & (EXP_GLOB|EXP_CASE)) &&
-                                   (SQSYNTAX[sep] == CCTL || SQSYNTAX[sep] == CSBACK))
+                               if (quoted && (flag & EXP_QNEEDED) &&
+                                   NEEDESC(sep))
                                        STPUTC(CTLESC, expdest);
                                STPUTC(sep, expdest);
                        } else
diff -r f33b62f6f49d -r 8a39f12a1098 bin/sh/expand.h
--- a/bin/sh/expand.h   Sun Nov 18 17:15:41 2018 +0000
+++ b/bin/sh/expand.h   Sun Nov 18 17:23:37 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.h,v 1.23 2017/06/07 05:08:32 kre Exp $  */
+/*     $NetBSD: expand.h,v 1.24 2018/11/18 17:23:37 kre Exp $  */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -61,6 +61,7 @@
 #define EXP_NL         0x100   /* keep CRTNONL in output */
 
 #define EXP_FULL       (EXP_SPLIT | EXP_GLOB)
+#define EXP_QNEEDED    (EXP_GLOB | EXP_CASE | EXP_REDIR)
 
 union node;
 
diff -r f33b62f6f49d -r 8a39f12a1098 bin/sh/parser.c
--- a/bin/sh/parser.c   Sun Nov 18 17:15:41 2018 +0000
+++ b/bin/sh/parser.c   Sun Nov 18 17:23:37 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parser.c,v 1.152 2018/11/09 02:11:04 kre Exp $ */
+/*     $NetBSD: parser.c,v 1.153 2018/11/18 17:23:37 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c   8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.152 2018/11/09 02:11:04 kre Exp $");
+__RCSID("$NetBSD: parser.c,v 1.153 2018/11/18 17:23:37 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -1725,7 +1725,7 @@
                pungetc();
                return out;
        }
-       if (SQSYNTAX[vc] == CCTL)
+       if (NEEDESC(vc))
                USTPUTC(CTLESC, out);
        USTPUTC(vc, out);
        return out;
@@ -1826,10 +1826,17 @@
                        quotef = 1;     /* current token is quoted */
                        if (ISDBLQUOTE() && c != '\\' && c != '`' &&
                            c != '$' && (c != '"' || magicq)) {
+                               /*
+                                * retain the \ (which we *know* needs CTLESC)
+                                * when in "..." and the following char is
+                                * not one of the magic few.)
+                                * Otherwise the \ has done its work, and
+                                * is dropped.
+                                */
                                USTPUTC(CTLESC, out);
                                USTPUTC('\\', out);
                        }
-                       if (SQSYNTAX[c] == CCTL || SQSYNTAX[c] == CSBACK)
+                       if (NEEDESC(c))
                                USTPUTC(CTLESC, out);
                        else if (!magicq) {
                                USTPUTC(CTLQUOTEMARK, out);
diff -r f33b62f6f49d -r 8a39f12a1098 bin/sh/syntax.h
--- a/bin/sh/syntax.h   Sun Nov 18 17:15:41 2018 +0000
+++ b/bin/sh/syntax.h   Sun Nov 18 17:23:37 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: syntax.h,v 1.9 2017/08/21 13:20:49 kre Exp $   */
+/*     $NetBSD: syntax.h,v 1.10 2018/11/18 17:23:37 kre Exp $  */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -47,9 +47,14 @@
 #define CLP 8                  /* a left paren in arithmetic */
 #define CRP 9                  /* a right paren in arithmetic */
 #define CEOF 10                        /* end of file */
-#define CCTL 11                        /* like CWORD, except it must be escaped */
-#define CSPCL 12               /* these terminate a word */
+#define CSPCL 11               /* these terminate a word */
+#define CCTL 12                        /* like CWORD, except it must be escaped */
 #define CSBACK 13              /* a backslash in a single quote syntax */
+       /*
+        * note CSBACK == (CCTL|1)
+        * the code does not rely upon that, but keeping it allows a
+        * smart enough compiler to optimise some tests
+        */
 
 /* Syntax classes for is_ functions */
 #define ISDIGIT 01             /* a digit */
@@ -80,6 +85,10 @@
 #define        is_space(c)     (sh_ctype(c) & ISSPACE)
 #define        digit_val(c)    ((c) - '0')
 
+/* true if the arg char needs CTLESC to protect it */
+#define        NEEDESC(c)      (SQSYNTAX[(int)(c)] == CCTL || \
+                        SQSYNTAX[(int)(c)] == CSBACK)
+
 extern const char basesyntax[];
 extern const char dqsyntax[];
 extern const char sqsyntax[];



Home | Main Index | Thread Index | Old Index