Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/make make: fix parsing of unevaluated subexpressions...



details:   https://anonhg.NetBSD.org/src/rev/36d6db11009c
branches:  trunk
changeset: 373569:36d6db11009c
user:      rillig <rillig%NetBSD.org@localhost>
date:      Sat Feb 18 11:16:09 2023 +0000

description:
make: fix parsing of unevaluated subexpressions with unbalanced '{}'

Since var.c 1.323 from 2020-07-26, modifiers containing unbalanced
braces or parentheses were parsed differently, depending on whether they
were relevant or not.

For example, the expression '${VAR:...}' is enclosed with braces. When
this expression has a modifier ':S,},}},g' that would double each '}' in
that expression, the parser got confused:

If the expression was relevant, the modifier was parsed as usual, taking
into account that the 3 '}' in the modifier are ordinary characters.

If the expression was irrelevant, the parser only counted the '{' and
the '}', without taking into account that a '}' might be escaped by a
'\' or be an ordinary character.  Parsing therefore stopped at the first
'}', assuming it would finish the expression '${VAR:S,}'.

This parsing mode of only counting balanced '{' and '}' makes sense for
the modifier ':@var@...@', which expands each word of the expression
using the template from the '...'.  These templates tend to be simple
enough that counting the '{' and '}' suffices.

diffstat:

 usr.bin/make/make.h                        |  10 ++++++-
 usr.bin/make/unit-tests/parse-var.exp      |   6 +---
 usr.bin/make/unit-tests/parse-var.mk       |  37 +++++++++++---------------
 usr.bin/make/unit-tests/var-eval-short.exp |   4 ++
 usr.bin/make/unit-tests/varmod-loop.mk     |   7 ++--
 usr.bin/make/var.c                         |  41 ++++++++++++-----------------
 6 files changed, 50 insertions(+), 55 deletions(-)

diffs (243 lines):

diff -r cdb9be804079 -r 36d6db11009c usr.bin/make/make.h
--- a/usr.bin/make/make.h       Sat Feb 18 07:58:34 2023 +0000
+++ b/usr.bin/make/make.h       Sat Feb 18 11:16:09 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: make.h,v 1.316 2023/02/15 06:52:58 rillig Exp $        */
+/*     $NetBSD: make.h,v 1.317 2023/02/18 11:16:09 rillig Exp $        */
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -911,6 +911,14 @@
         */
        VARE_PARSE_ONLY,
 
+       /*
+        * Parse text in which '${...}' and '$(...)' are not parsed as
+        * subexpressions (with all their individual escaping rules) but
+        * instead simply as text with balanced '${}' or '$()'.  Other '$'
+        * are copied verbatim.
+        */
+       VARE_PARSE_BALANCED,
+
        /* Parse and evaluate the expression. */
        VARE_WANTRES,
 
diff -r cdb9be804079 -r 36d6db11009c usr.bin/make/unit-tests/parse-var.exp
--- a/usr.bin/make/unit-tests/parse-var.exp     Sat Feb 18 07:58:34 2023 +0000
+++ b/usr.bin/make/unit-tests/parse-var.exp     Sat Feb 18 11:16:09 2023 +0000
@@ -1,5 +1,1 @@
-make: Unfinished modifier for "BRACE_GROUP" (',' missing)
-make: "parse-var.mk" line 129: Malformed conditional (0 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,})
-make: Fatal errors encountered -- cannot continue
-make: stopped in unit-tests
-exit status 1
+exit status 0
diff -r cdb9be804079 -r 36d6db11009c usr.bin/make/unit-tests/parse-var.mk
--- a/usr.bin/make/unit-tests/parse-var.mk      Sat Feb 18 07:58:34 2023 +0000
+++ b/usr.bin/make/unit-tests/parse-var.mk      Sat Feb 18 11:16:09 2023 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: parse-var.mk,v 1.7 2023/02/14 21:56:48 rillig Exp $
+# $NetBSD: parse-var.mk,v 1.8 2023/02/18 11:16:09 rillig Exp $
 #
 # Tests for parsing variable expressions.
 #
@@ -85,12 +85,9 @@
 .  error
 .endif
 
-# XXX: The following paragraph already uses past tense, in the hope that the
-# parsing behavior can be cleaned up soon.
-
-# Since var.c 1.323 from 2020-07-26 18:11 and except for var.c 1.1028 from
-# 2022-08-08, the exact way of parsing an expression depended on whether the
-# expression was actually evaluated or merely parsed.
+# Since var.c 1.323 from 2020-07-26 18:11 and until var.c 1.1047 from
+# 2023-02-18, the exact way of parsing an expression with subexpressions
+# depended on whether the expression was actually evaluated or merely parsed.
 #
 # If it was evaluated, nested expressions were parsed correctly, parsing each
 # modifier according to its exact definition (see varmod.mk).
@@ -102,30 +99,28 @@
 # expression was not parsed correctly.  Instead, make only counted the opening
 # and closing delimiters, which failed for nested modifiers with unbalanced
 # braces.
-#
-# This naive brace counting was implemented in ParseModifierPartDollar.  As of
-# var.c 1.1029, there are still several other places that merely count braces
-# instead of properly parsing subexpressions.
 
 #.MAKEFLAGS: -dcpv
 # Keep these braces outside the conditions below, to keep them simple to
-# understand.  If the BRACE_PAIR had been replaced with ':U{}', the '}' would
-# have to be escaped, but not the '{'.  This asymmetry would have made the
-# example even more complicated to understand.
+# understand.  If the expression ${BRACE_PAIR:...} had been replaced with the
+# literal ${:U{}}, the '}' would have to be escaped, but not the '{'.  This
+# asymmetry would have made the example even more complicated to understand.
 BRACE_PAIR=    {}
-# In this test word, the '{{}' in the middle will be replaced.
+# In this test word, the below conditions will replace the '{{}' in the middle
+# with the string '<lbraces>'.
 BRACE_GROUP=   {{{{}}}}
 
 # The inner ':S' modifier turns the word '{}' into '{{}'.
 # The outer ':S' modifier then replaces '{{}' with '<lbraces>'.
-# In the first case, the outer expression is relevant and is parsed correctly.
+# Due to the always-true condition '1', the outer expression is relevant and
+# is parsed correctly.
 .if 1 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,}
 .endif
-# In the second case, the outer expression was irrelevant.  In this case, in
-# the parts of the outer ':S' modifier, make only counted the braces, and since
-# the inner expression '${BRACE_PAIR:...}' contains more '{' than '}', parsing
-# failed with the error message 'Unfinished modifier for "BRACE_GROUP"'.  Fixed
-# in var.c 1.1028 from 2022-08-08, reverted in var.c 1.1029 from 2022-08-23.
+# Due to the always-false condition '0', the outer expression is irrelevant.
+# In this case, in the parts of the outer ':S' modifier, the expression parser
+# only counted the braces, and since the inner expression '${BRACE_PAIR:...}'
+# contains more '{' than '}', parsing failed with the error message 'Unfinished
+# modifier for "BRACE_GROUP"'.  Fixed in var.c 1.1047 from 2023-02-18.
 .if 0 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,}
 .endif
 #.MAKEFLAGS: -d0
diff -r cdb9be804079 -r 36d6db11009c usr.bin/make/unit-tests/var-eval-short.exp
--- a/usr.bin/make/unit-tests/var-eval-short.exp        Sat Feb 18 07:58:34 2023 +0000
+++ b/usr.bin/make/unit-tests/var-eval-short.exp        Sat Feb 18 11:16:09 2023 +0000
@@ -7,7 +7,9 @@
 CondParser_Eval: 0 && ${0:?${FAIL}then:${FAIL}else}
 Var_Parse: ${0:?${FAIL}then:${FAIL}else} (parse-only)
 Parsing modifier ${0:?...}
+Var_Parse: ${FAIL}then:${FAIL}else} (parse-only)
 Modifier part: "${FAIL}then"
+Var_Parse: ${FAIL}else} (parse-only)
 Modifier part: "${FAIL}else"
 Result of ${0:?${FAIL}then:${FAIL}else} is "" (parse-only, defined)
 Parsing line 163: DEFINED=     defined
@@ -17,7 +19,9 @@
 Parsing modifier ${DEFINED:L}
 Result of ${DEFINED:L} is "defined" (parse-only, regular)
 Parsing modifier ${DEFINED:?...}
+Var_Parse: ${FAIL}then:${FAIL}else} (parse-only)
 Modifier part: "${FAIL}then"
+Var_Parse: ${FAIL}else} (parse-only)
 Modifier part: "${FAIL}else"
 Result of ${DEFINED:?${FAIL}then:${FAIL}else} is "defined" (parse-only, regular)
 Parsing line 166: .MAKEFLAGS: -d0
diff -r cdb9be804079 -r 36d6db11009c usr.bin/make/unit-tests/varmod-loop.mk
--- a/usr.bin/make/unit-tests/varmod-loop.mk    Sat Feb 18 07:58:34 2023 +0000
+++ b/usr.bin/make/unit-tests/varmod-loop.mk    Sat Feb 18 11:16:09 2023 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: varmod-loop.mk,v 1.21 2022/08/23 21:13:46 rillig Exp $
+# $NetBSD: varmod-loop.mk,v 1.22 2023/02/18 11:16:09 rillig Exp $
 #
 # Tests for the :@var@...${var}...@ variable modifier.
 
@@ -192,15 +192,14 @@
 # except for '$i', which is replaced with the then-current value '1' of the
 # iteration variable.
 #
-# XXX: was broken in var.c 1.1028 from 2022-08-08, reverted in var.c 1.1029
-# from 2022-08-23; see parse-var.mk, keyword 'BRACE_GROUP'.
+# See parse-var.mk, keyword 'BRACE_GROUP'.
 all: varmod-loop-literal-dollar
 varmod-loop-literal-dollar: .PHONY
        : ${:U1:@i@ t=$$(( $${t:-0} + $i ))@}
 
 
 # When parsing the loop body, each '\$', '\@' and '\\' is unescaped to '$',
-# '@' and '\'; all other backslashes are retained.
+# '@' and '\', respectively; all other backslashes are retained.
 #
 # In practice, the '$' is not escaped as '\$', as there is a second round of
 # unescaping '$$' to '$' later when the loop body is expanded after setting the
diff -r cdb9be804079 -r 36d6db11009c usr.bin/make/var.c
--- a/usr.bin/make/var.c        Sat Feb 18 07:58:34 2023 +0000
+++ b/usr.bin/make/var.c        Sat Feb 18 11:16:09 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: var.c,v 1.1046 2023/02/15 06:52:58 rillig Exp $        */
+/*     $NetBSD: var.c,v 1.1047 2023/02/18 11:16:09 rillig Exp $        */
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -139,7 +139,7 @@
 #include "metachar.h"
 
 /*     "@(#)var.c      8.3 (Berkeley) 3/19/94" */
-MAKE_RCSID("$NetBSD: var.c,v 1.1046 2023/02/15 06:52:58 rillig Exp $");
+MAKE_RCSID("$NetBSD: var.c,v 1.1047 2023/02/18 11:16:09 rillig Exp $");
 
 /*
  * Variables are defined using one of the VAR=value assignments.  Their
@@ -328,6 +328,7 @@
 
 static const char VarEvalMode_Name[][32] = {
        "parse-only",
+       "parse-balanced",
        "eval",
        "eval-defined",
        "eval-keep-dollar",
@@ -2143,31 +2144,23 @@
        FStr nested_val = Var_Parse(&p, ch->expr->scope,
            VarEvalMode_WithoutKeepDollar(emode));
        /* TODO: handle errors */
-       LazyBuf_AddStr(part, nested_val.str);
+       if (VarEvalMode_ShouldEval(emode))
+               LazyBuf_AddStr(part, nested_val.str);
+       else
+               LazyBuf_AddSubstring(part, Substring_Init(*pp, p));
        FStr_Done(&nested_val);
        *pp = p;
 }
 
 /*
- * In a part of a modifier, parse a subexpression but don't evaluate it.
- *
- * XXX: This whole block is very similar to Var_Parse with VARE_PARSE_ONLY.
- * There may be subtle edge cases though that are not yet covered in the unit
- * tests and that are parsed differently, depending on whether they are
- * evaluated or not.
- *
- * This subtle difference is not documented in the manual page, neither is
- * the difference between parsing ':D' and ':M' documented.  No code should
- * ever depend on these details, but who knows.
- *
- * TODO: Before trying to replace this code with Var_Parse, there need to be
- * more unit tests in varmod-loop.mk.  The modifier ':@' uses Var_Subst
- * internally, in which a '$' is escaped as '$$', not as '\$' like in other
- * modifiers.  When parsing the body text '$${var}', skipping over the first
- * '$' would treat '${var}' as a make expression, not as a shell variable.
+ * In a part of a modifier, parse some text that looks like a subexpression.
+ * If the text starts with '$(', any '(' and ')' must be balanced.
+ * If the text starts with '${', any '{' and '}' must be balanced.
+ * If the text starts with '$', that '$' is copied, it is not parsed as a
+ * short-name variable expression.
  */
 static void
-ParseModifierPartDollar(const char **pp, LazyBuf *part)
+ParseModifierPartBalanced(const char **pp, LazyBuf *part)
 {
        const char *p = *pp;
        const char *start = *pp;
@@ -2234,10 +2227,10 @@
                        else
                                LazyBuf_Add(part, *p);
                        p++;
-               } else if (VarEvalMode_ShouldEval(emode))
+               } else if (emode == VARE_PARSE_BALANCED)
+                       ParseModifierPartBalanced(&p, part);
+               else
                        ParseModifierPartExpr(&p, part, ch, emode);
-               else
-                       ParseModifierPartDollar(&p, part);
        }
 
        if (*p != delim) {
@@ -2437,7 +2430,7 @@
                return AMR_CLEANUP;
        }
 
-       if (!ParseModifierPart(pp, '@', VARE_PARSE_ONLY, ch, &strBuf))
+       if (!ParseModifierPart(pp, '@', VARE_PARSE_BALANCED, ch, &strBuf))
                return AMR_CLEANUP;
        str = LazyBuf_DoneGet(&strBuf);
        args.body = str.str;



Home | Main Index | Thread Index | Old Index