Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[pkgsrc/trunk]: pkgsrc mk/subst.mk: fix edge case in detection of identity su...



details:   https://anonhg.NetBSD.org/pkgsrc/rev/210d470e6f04
branches:  trunk
changeset: 431305:210d470e6f04
user:      rillig <rillig%pkgsrc.org@localhost>
date:      Mon May 11 19:52:13 2020 +0000

description:
mk/subst.mk: fix edge case in detection of identity substitutions

In a basic regular expression, a dollar-sign only means end-of-string if
it appears at the end of the pattern, or (at the choice of the
implementation) at the end of a \(...\) subexpression.

This affects the package converters/help2man that uses a regular
expression containing a dollar in a non-final position.  This regular
expression had not been detected as an identity substitution even though
it is one.

diffstat:

 mk/scripts/subst-identity.awk    |  10 +++++++---
 regress/infra-unittests/subst.sh |  28 ++++++++++++++++++++++++----
 2 files changed, 31 insertions(+), 7 deletions(-)

diffs (81 lines):

diff -r 1f8d85e4426c -r 210d470e6f04 mk/scripts/subst-identity.awk
--- a/mk/scripts/subst-identity.awk     Mon May 11 19:45:54 2020 +0000
+++ b/mk/scripts/subst-identity.awk     Mon May 11 19:52:13 2020 +0000
@@ -1,5 +1,5 @@
 #! /usr/bin/awk -f
-# $NetBSD: subst-identity.awk,v 1.2 2020/05/06 06:14:56 rillig Exp $
+# $NetBSD: subst-identity.awk,v 1.3 2020/05/11 19:52:14 rillig Exp $
 #
 # Tests whether a sed(1) command line consists of only identity substitutions
 # like s,id,id,.
@@ -9,13 +9,17 @@
 
 # Returns the first character of the given regular expression,
 # if it is a single-character regular expression.
-function identity_char(s) {
+function identity_char(s, sep, i) {
        if (s ~ /^[\t -~]/ && s !~ /^[$&*.\[\\\]^]/)
                return substr(s, 1, 1);
        if (s ~ /^\\[$*.\[\]^]/)
                return substr(s, 2, 1) "x";
        if (s ~ /^\[[$*.]\]/)
                return substr(s, 2, 1) "xx";
+       if (substr(s, 1, 1) == "$" && substr(s, 2, 1) != sep)
+               return substr(s, 1, 1);
+       if (substr(s, 1, 1) == "^" && i > 3)
+               return substr(s, 1, 1);
        return "";
 }
 
@@ -29,7 +33,7 @@
        i = 3;
        pat_to = "";
        while (i < len && substr(s, i, 1) != sep) {
-               ch = identity_char(substr(s, i));
+               ch = identity_char(substr(s, i), sep, i);
                if (ch == "")
                        break;
                pat_to = pat_to substr(ch, 1, 1);
diff -r 1f8d85e4426c -r 210d470e6f04 regress/infra-unittests/subst.sh
--- a/regress/infra-unittests/subst.sh  Mon May 11 19:45:54 2020 +0000
+++ b/regress/infra-unittests/subst.sh  Mon May 11 19:52:13 2020 +0000
@@ -1,5 +1,5 @@
 #! /bin/sh
-# $NetBSD: subst.sh,v 1.35 2020/05/11 19:17:22 rillig Exp $
+# $NetBSD: subst.sh,v 1.36 2020/05/11 19:52:13 rillig Exp $
 #
 # Tests for mk/subst.mk.
 #
@@ -1219,9 +1219,29 @@
        # See converters/help2man for an example.
        assert_identity 'yes'   -e 's,\$(var),$(var),'
 
-       # An unescaped dollar means end-of-line and cannot be part of an
-       # identity substitution.  This may happen, but is clearly a typo.
-       assert_identity 'no'    -e 's,$(var),$(var),'
+       # POSIX 2004 and 2018 both define in section "9.3.8 BRE Expression
+       # Anchoring" that a dollar-sign at the end of the string means
+       # end-of-string.
+       #
+       # A dollar-sign followed by \) may or may not be an anchor.
+       # In all other cases the dollar is an ordinary character.
+       assert_identity 'yes'   -e 's,$(var),$(var),'
+
+       # Since this dollar-sign may or may not be an anchor, treat the
+       # whole regular expression as not-an-identity.
+       #
+       # Since a regular expression with a subexpression must contain
+       # \( and \), it does not count as an identity substitution anyway,
+       # which makes the implementation simple.
+       assert_identity 'no'    -e 's,aaa\(aaa$\),aaa\(aaa$\),'
+
+       assert_identity 'yes'   -e 's,$a,$a,'
+       assert_identity 'no'    -e 's,a$,a$,'
+
+       # Same for the circumflex.
+       assert_identity 'yes'   -e 's,a^,a^,'
+       assert_identity 'no'    -e 's,^a,^a,'
+       assert_identity 'no'    -e 's,\(^aaa\)aaa,\(^aaa\)aaa,'
 
        test_case_end
 fi



Home | Main Index | Thread Index | Old Index