pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc



Module Name:    pkgsrc
Committed By:   rillig
Date:           Mon May 11 19:52:14 UTC 2020

Modified Files:
        pkgsrc/mk/scripts: subst-identity.awk
        pkgsrc/regress/infra-unittests: subst.sh

Log Message:
mk/subst.mk: fix edge case in detection of identity substitutions

In a basic regular expression, a dollar-sign only means end-of-string if
it appears at the end of the pattern, or (at the choice of the
implementation) at the end of a \(...\) subexpression.

This affects the package converters/help2man that uses a regular
expression containing a dollar in a non-final position.  This regular
expression had not been detected as an identity substitution even though
it is one.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 pkgsrc/mk/scripts/subst-identity.awk
cvs rdiff -u -r1.35 -r1.36 pkgsrc/regress/infra-unittests/subst.sh

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/mk/scripts/subst-identity.awk
diff -u pkgsrc/mk/scripts/subst-identity.awk:1.2 pkgsrc/mk/scripts/subst-identity.awk:1.3
--- pkgsrc/mk/scripts/subst-identity.awk:1.2    Wed May  6 06:14:56 2020
+++ pkgsrc/mk/scripts/subst-identity.awk        Mon May 11 19:52:14 2020
@@ -1,5 +1,5 @@
 #! /usr/bin/awk -f
-# $NetBSD: subst-identity.awk,v 1.2 2020/05/06 06:14:56 rillig Exp $
+# $NetBSD: subst-identity.awk,v 1.3 2020/05/11 19:52:14 rillig Exp $
 #
 # Tests whether a sed(1) command line consists of only identity substitutions
 # like s,id,id,.
@@ -9,13 +9,17 @@
 
 # Returns the first character of the given regular expression,
 # if it is a single-character regular expression.
-function identity_char(s) {
+function identity_char(s, sep, i) {
        if (s ~ /^[\t -~]/ && s !~ /^[$&*.\[\\\]^]/)
                return substr(s, 1, 1);
        if (s ~ /^\\[$*.\[\]^]/)
                return substr(s, 2, 1) "x";
        if (s ~ /^\[[$*.]\]/)
                return substr(s, 2, 1) "xx";
+       if (substr(s, 1, 1) == "$" && substr(s, 2, 1) != sep)
+               return substr(s, 1, 1);
+       if (substr(s, 1, 1) == "^" && i > 3)
+               return substr(s, 1, 1);
        return "";
 }
 
@@ -29,7 +33,7 @@ function is_identity_subst(s,   len, i, 
        i = 3;
        pat_to = "";
        while (i < len && substr(s, i, 1) != sep) {
-               ch = identity_char(substr(s, i));
+               ch = identity_char(substr(s, i), sep, i);
                if (ch == "")
                        break;
                pat_to = pat_to substr(ch, 1, 1);

Index: pkgsrc/regress/infra-unittests/subst.sh
diff -u pkgsrc/regress/infra-unittests/subst.sh:1.35 pkgsrc/regress/infra-unittests/subst.sh:1.36
--- pkgsrc/regress/infra-unittests/subst.sh:1.35        Mon May 11 19:17:22 2020
+++ pkgsrc/regress/infra-unittests/subst.sh     Mon May 11 19:52:13 2020
@@ -1,5 +1,5 @@
 #! /bin/sh
-# $NetBSD: subst.sh,v 1.35 2020/05/11 19:17:22 rillig Exp $
+# $NetBSD: subst.sh,v 1.36 2020/05/11 19:52:13 rillig Exp $
 #
 # Tests for mk/subst.mk.
 #
@@ -1219,9 +1219,29 @@ if test_case_begin "identity substitutio
        # See converters/help2man for an example.
        assert_identity 'yes'   -e 's,\$(var),$(var),'
 
-       # An unescaped dollar means end-of-line and cannot be part of an
-       # identity substitution.  This may happen, but is clearly a typo.
-       assert_identity 'no'    -e 's,$(var),$(var),'
+       # POSIX 2004 and 2018 both define in section "9.3.8 BRE Expression
+       # Anchoring" that a dollar-sign at the end of the string means
+       # end-of-string.
+       #
+       # A dollar-sign followed by \) may or may not be an anchor.
+       # In all other cases the dollar is an ordinary character.
+       assert_identity 'yes'   -e 's,$(var),$(var),'
+
+       # Since this dollar-sign may or may not be an anchor, treat the
+       # whole regular expression as not-an-identity.
+       #
+       # Since a regular expression with a subexpression must contain
+       # \( and \), it does not count as an identity substitution anyway,
+       # which makes the implementation simple.
+       assert_identity 'no'    -e 's,aaa\(aaa$\),aaa\(aaa$\),'
+
+       assert_identity 'yes'   -e 's,$a,$a,'
+       assert_identity 'no'    -e 's,a$,a$,'
+
+       # Same for the circumflex.
+       assert_identity 'yes'   -e 's,a^,a^,'
+       assert_identity 'no'    -e 's,^a,^a,'
+       assert_identity 'no'    -e 's,\(^aaa\)aaa,\(^aaa\)aaa,'
 
        test_case_end
 fi



Home | Main Index | Thread Index | Old Index