Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-8]: src/bin/sh Pull up following revision(s) via patch (requested...



details:   https://anonhg.NetBSD.org/src/rev/6abeb6a8aa75
branches:  netbsd-8
changeset: 435252:6abeb6a8aa75
user:      martin <martin%NetBSD.org@localhost>
date:      Mon Sep 10 15:45:11 2018 +0000

description:
Pull up following revision(s) via patch (requested by kre in ticket #1015):

        bin/sh/expand.c: revision 1.124
        bin/sh/expand.c: revision 1.127
        bin/sh/parser.c: revision 1.148
        bin/sh/parser.c: revision 1.149
        bin/sh/syntax.c: revision 1.6
        bin/sh/syntax.h: revision 1.9 (partial)

First pass at fixing some of the more arcane pattern matching
possibilities that we do not currently handle all that well.

This mostly means (for now) making sure that quoted pattern
magic characters (as well as quoted sh syntax magic chars)
are properly marked, so they remain known as being quoted,
and do not turn into pattern magic.   Also, make sure that an
unquoted \ in a pattern always quotes whatever comes next
(which, unlike in regular expressions, includes inside []
matches),

 -

Part 2 of pattern matching (glob etc) fixes.
Attempt to correctly deal with \ (both when it is a literal,
in appropriate cases, and when it appears as CTLESC when it was
detected as a quoting character during parsing).

In a pattern, in sh, no quoted character can ever be anything other
than a literal character.   This is quite different than regular
expressions, and even different than other uses of glob matching,
where shell quoting is not an issue.

In something like
        ls ?\*.c
the ? is a meta-character, the * is a literal (it was quoted).  This
is nothing new, sh has handled that properly for ever.

But the same happens with
        VAR='?\*.c'
and
        ls $VAR
which has not always been handled correctly.   Of course, in
        ls "$VAR"
nothing in VAR is a meta-character (the entire expansion is quoted)
so even the '\' must match literally (or more accurately, no matching
happens - VAR simply contains an "unusual" filename).  But if it had
been
        ls *"$VAR"
then we would be looking for filenames that end with the literal 5
characters that make up $VAR.

The same kinds of things are requires of matching patterns in case
statements, and sub-strings with the % and # operators in variable
expansions.

While here, the final remnant of the ancient !! pattern matching
hack has been removed (the code that actually implemented it was
long gone, but one small piece remained, not doing any real harm,
but potentially wasting time - if someone gave a pattern which would
once have invoked that hack.)

diffstat:

 bin/sh/expand.c |  111 ++++++++++++++++++++++++++++++++++++++++++++++---------
 bin/sh/parser.c |   15 +++++--
 bin/sh/syntax.c |   11 +++--
 bin/sh/syntax.h |    3 +-
 4 files changed, 112 insertions(+), 28 deletions(-)

diffs (truncated from 335 to 300 lines):

diff -r ef7c1e279b1b -r 6abeb6a8aa75 bin/sh/expand.c
--- a/bin/sh/expand.c   Fri Sep 07 12:38:23 2018 +0000
+++ b/bin/sh/expand.c   Mon Sep 10 15:45:11 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.c,v 1.110.2.4 2018/07/13 14:32:01 martin Exp $  */
+/*     $NetBSD: expand.c,v 1.110.2.5 2018/09/10 15:45:11 martin Exp $  */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c   8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.110.2.4 2018/07/13 14:32:01 martin Exp $");
+__RCSID("$NetBSD: expand.c,v 1.110.2.5 2018/09/10 15:45:11 martin Exp $");
 #endif
 #endif /* not lint */
 
@@ -924,7 +924,9 @@
                                        varlen++;
                        } else {
                                while (*val) {
-                                       if (quotes && syntax[(int)*val] == CCTL)
+                                       if (quotes && (varflags & VSQUOTE) &&
+                                           (syntax[(int)*val] == CCTL ||
+                                            syntax[(int)*val] == CBACK))
                                                STPUTC(CTLESC, expdest);
                                        STPUTC(*val++, expdest);
                                }
@@ -1106,7 +1108,7 @@
        int num;
        char *p;
        int i;
-       char sep;
+       int sep;
        char **ap;
        char const *syntax;
 
@@ -1164,10 +1166,14 @@
                        STRTODEST(p);
                        if (!*ap)
                                break;
-                       if (sep)
+                       if (sep) {
+                               if (quoted && (flag & (EXP_GLOB|EXP_CASE)) &&
+                                   (SQSYNTAX[sep] == CCTL || SQSYNTAX[sep] == CSBACK))
+                                       STPUTC(CTLESC, expdest);
                                STPUTC(sep, expdest);
-                       else if ((flag & (EXP_SPLIT|EXP_IN_QUOTES)) == EXP_SPLIT
-                           && !quoted && **ap != '\0')
+                       } else
+                           if ((flag & (EXP_SPLIT|EXP_IN_QUOTES)) == EXP_SPLIT
+                             && !quoted && **ap != '\0')
                                STPUTC('\0', expdest);
                }
                return;
@@ -1457,22 +1463,59 @@
                        metaflag = 1;
                else if (*p == '[') {
                        q = p + 1;
-                       if (*q == '!')
+                       if (*q == '!' || *q == '^')
                                q++;
                        for (;;) {
                                while (*q == CTLQUOTEMARK || *q == CTLNONL)
                                        q++;
-                               if (*q == CTLESC)
+                               if (*q == ']') {
                                        q++;
-                               if (*q == '/' || *q == '\0')
-                                       break;
-                               if (*++q == ']') {
                                        metaflag = 1;
                                        break;
                                }
+                               if (*q == '[' && q[1] == ':') {
+                                       /*
+                                        * character class, look for :] ending
+                                        * also stop on ']' (end bracket expr)
+                                        * or '\0' or '/' (end pattern)
+                                        */
+                                       while (*++q != '\0' && *q != ']' &&
+                                           *q != '/') {
+                                               if (*q == CTLESC) {
+                                                       if (*++q == '\0')
+                                                               break;
+                                                       if (*q == '/')
+                                                               break;
+                                               } else if (*q == ':' &&
+                                                   q[1] == ']')
+                                                       break;
+                                       }
+                                       if (*q == ':') {
+                                               /*
+                                                * stopped at ':]'
+                                                * still in [...]
+                                                * skip ":]" and continue;
+                                                */
+                                               q += 2;
+                                               continue;
+                                       }
+
+                                       /* done at end of pattern, not [...] */
+                                       if (*q == '\0' || *q == '/')
+                                               break;
+
+                                       /* found the ']', we have a [...] */
+                                       metaflag = 1;
+                                       q++;    /* skip ']' */
+                                       break;
+                               }
+                               if (*q == CTLESC)
+                                       q++;
+                               /* end of pattern cannot be escaped */
+                               if (*q == '/' || *q == '\0')
+                                       break;
+                               q++;
                        }
-               } else if (*p == '!' && p[1] == '!'     && (p == name || p[-1] == '/')) {
-                       metaflag = 1;
                } else if (*p == '\0')
                        break;
                else if (*p == CTLQUOTEMARK || *p == CTLNONL)
@@ -1690,12 +1733,26 @@
        for (;;) {
                switch (c = *p++) {
                case '\0':
+                       if (squoted && *q == CTLESC) {
+                               if (q[1] == '\0')
+                                       q++;
+                       }
                        if (*q != '\0')
                                goto backtrack;
                        return 1;
                case CTLESC:
                        if (squoted && *q == CTLESC)
                                q++;
+                       if (*p == '\0' && *q == '\0') {
+                               VTRACE(DBG_MATCH, ("match-\\\n"));
+                               return 1;
+                       }
+                       if (*q++ != *p++)
+                               goto backtrack;
+                       break;
+               case '\\':
+                       if (squoted && *q == CTLESC)
+                               q++;
                        if (*q++ != *p++)
                                goto backtrack;
                        break;
@@ -1725,6 +1782,10 @@
                                        q++;
                                }
                        }
+                       if (c == CTLESC && p[1] == '\0') {
+                               VTRACE(DBG_MATCH, ("match+\\\n"));
+                               return 1;
+                       }
                        /*
                         * First try the shortest match for the '*' that
                         * could work. We can forget any earlier '*' since
@@ -1739,19 +1800,31 @@
                        int invert, found;
                        unsigned char chr;
 
+                       /*
+                        * First quick check to see if there is a
+                        * possible matching ']' - if not, then this
+                        * is not a char class, and the '[' is just
+                        * a literal '['.
+                        *
+                        * This check will not detect all non classes, but
+                        * that's OK - It just means that we execute the
+                        * harder code sometimes when it it cannot succeed.
+                        */
                        endp = p;
-                       if (*endp == '!')
+                       if (*endp == '!' || *endp == '^')
                                endp++;
                        for (;;) {
                                while (*endp == CTLQUOTEMARK || *endp==CTLNONL)
                                        endp++;
                                if (*endp == '\0')
-                                       goto dft;               /* no matching ] */
+                                       goto dft;       /* no matching ] */
                                if (*endp == CTLESC)
                                        endp++;
                                if (*++endp == ']')
                                        break;
                        }
+                       /* end shortcut */
+
                        invert = 0;
                        savep = p, saveq = q;
                        invert = 0;
@@ -1762,6 +1835,8 @@
                        found = 0;
                        if (*q == '\0')
                                return 0;
+                       if (squoted && *q == CTLESC)
+                               q++;
                        chr = (unsigned char)*q++;
                        c = *p++;
                        do {
@@ -1779,12 +1854,12 @@
                                                continue;
                                        }
                                }
-                               if (c == CTLESC)
+                               if (c == CTLESC || c == '\\')
                                        c = *p++;
                                wc = (unsigned char)c;
                                if (*p == '-' && p[1] != ']') {
                                        p++;
-                                       if (*p == CTLESC)
+                                       if (*p == CTLESC || *p == '\\')
                                                p++;
                                        wc2 = (unsigned char)*p++;
                                        if (   collate_range_cmp(chr, wc) >= 0
diff -r ef7c1e279b1b -r 6abeb6a8aa75 bin/sh/parser.c
--- a/bin/sh/parser.c   Fri Sep 07 12:38:23 2018 +0000
+++ b/bin/sh/parser.c   Mon Sep 10 15:45:11 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parser.c,v 1.132.2.5 2018/08/25 14:45:37 martin Exp $  */
+/*     $NetBSD: parser.c,v 1.132.2.6 2018/09/10 15:45:11 martin Exp $  */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c   8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.132.2.5 2018/08/25 14:45:37 martin Exp $");
+__RCSID("$NetBSD: parser.c,v 1.132.2.6 2018/09/10 15:45:11 martin Exp $");
 #endif
 #endif /* not lint */
 
@@ -1633,7 +1633,7 @@
        for (c = firstc ;; c = pgetc_macro()) { /* until of token */
                if (syntax == ARISYNTAX)
                        out = insert_elided_nl(out);
-               CHECKSTRSPACE(4, out);  /* permit 4 calls to USTPUTC */
+               CHECKSTRSPACE(6, out);  /* permit 6 calls to USTPUTC */
                switch (syntax[c]) {
                case CNL:       /* '\n' */
                        if (syntax == BASESYNTAX && varnest == 0)
@@ -1646,6 +1646,9 @@
                                setprompt(0);
                        continue;
 
+               case CSBACK:    /* single quoted backslash */
+                       USTPUTC(CTLESC, out);
+                       /* FALLTHROUGH */
                case CWORD:
                        USTPUTC(c, out);
                        continue;
@@ -1672,9 +1675,11 @@
                        }
                        quotef = 1;     /* current token is quoted */
                        if (ISDBLQUOTE() && c != '\\' && c != '`' &&
-                           c != '$' && (c != '"' || magicq))
+                           c != '$' && (c != '"' || magicq)) {
+                               USTPUTC(CTLESC, out);
                                USTPUTC('\\', out);
-                       if (SQSYNTAX[c] == CCTL)
+                       }
+                       if (SQSYNTAX[c] == CCTL || SQSYNTAX[c] == CSBACK)
                                USTPUTC(CTLESC, out);
                        else if (!magicq) {
                                USTPUTC(CTLQUOTEMARK, out);
diff -r ef7c1e279b1b -r 6abeb6a8aa75 bin/sh/syntax.c
--- a/bin/sh/syntax.c   Fri Sep 07 12:38:23 2018 +0000
+++ b/bin/sh/syntax.c   Mon Sep 10 15:45:11 2018 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: syntax.c,v 1.3.26.1 2017/07/23 14:58:14 snj Exp $      */
+/*     $NetBSD: syntax.c,v 1.3.26.2 2018/09/10 15:45:11 martin Exp $   */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: syntax.c,v 1.3.26.1 2017/07/23 14:58:14 snj Exp $");
+__RCSID("$NetBSD: syntax.c,v 1.3.26.2 2018/09/10 15:45:11 martin Exp $");
 
 #include <limits.h>
 #include "shell.h"
@@ -46,7 +46,7 @@
     set('`', CBQUOTE)
     set('$', CVAR)
     set('}', CENDVAR)
-    /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */
+    /* ':/' for tilde expansion, '-]' for [a\-x] pattern ranges */
     set('!', CCTL)
     set('*', CCTL)
     set('?', CCTL)
@@ -56,6 +56,7 @@
     set(':', CCTL)
     set('/', CCTL)
     set('-', CCTL)
+    set(']', CCTL)
 };
 
 /* syntax table used when in single quotes */



Home | Main Index | Thread Index | Old Index