Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[pkgsrc/trunk]: pkgsrc mk/subst.mk: allow identity substitutions with escaped...



details:   https://anonhg.NetBSD.org/pkgsrc/rev/b6fa1a8711b8
branches:  trunk
changeset: 430903:b6fa1a8711b8
user:      rillig <rillig%pkgsrc.org@localhost>
date:      Wed May 06 06:14:56 2020 +0000

description:
mk/subst.mk: allow identity substitutions with escaped dots

This fixes the build of converters/help2man in SUBST_NOOP_OK=no mode.

diffstat:

 mk/scripts/subst-identity.awk    |  36 +++++++++++++++++++++++++++---------
 regress/infra-unittests/subst.sh |  25 ++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 10 deletions(-)

diffs (100 lines):

diff -r 45fec838712e -r b6fa1a8711b8 mk/scripts/subst-identity.awk
--- a/mk/scripts/subst-identity.awk     Wed May 06 02:52:10 2020 +0000
+++ b/mk/scripts/subst-identity.awk     Wed May 06 06:14:56 2020 +0000
@@ -1,5 +1,5 @@
 #! /usr/bin/awk -f
-# $NetBSD: subst-identity.awk,v 1.1 2020/04/29 18:33:57 rillig Exp $
+# $NetBSD: subst-identity.awk,v 1.2 2020/05/06 06:14:56 rillig Exp $
 #
 # Tests whether a sed(1) command line consists of only identity substitutions
 # like s,id,id,.
@@ -7,23 +7,41 @@
 # See SUBST_NOOP_OK and regress/infra-unittests/subst.sh.
 #
 
-function is_safe_char(ch) {
-       return ch ~ /[\t -~]/ && ch !~ /[$&*.\[\\\]^]/;
+# Returns the first character of the given regular expression,
+# if it is a single-character regular expression.
+function identity_char(s) {
+       if (s ~ /^[\t -~]/ && s !~ /^[$&*.\[\\\]^]/)
+               return substr(s, 1, 1);
+       if (s ~ /^\\[$*.\[\]^]/)
+               return substr(s, 2, 1) "x";
+       if (s ~ /^\[[$*.]\]/)
+               return substr(s, 2, 1) "xx";
+       return "";
 }
 
-function is_identity_subst(s,   len, i, sep, pat) {
+# Tests whether a single "s,from,to," is an identity substitution.
+function is_identity_subst(s,   len, i, sep, pat_from, pat_to, ch, subst) {
        len = length(s);
        if (len < 6 || substr(s, 1, 1) != "s")
                return 0;
 
        sep = substr(s, 2, 1);
        i = 3;
-       while (i < len && substr(s, i, 1) != sep && is_safe_char(substr(s, i, 1)))
-               i++;
-       pat = substr(s, 3, i - 3);
+       pat_to = "";
+       while (i < len && substr(s, i, 1) != sep) {
+               ch = identity_char(substr(s, i));
+               if (ch == "")
+                       break;
+               pat_to = pat_to substr(ch, 1, 1);
+               i += length(ch);
+       }
 
-       return (s == "s" sep pat sep pat sep ||
-               s == "s" sep pat sep pat sep "g");
+       if (pat_to == "")
+               return 0; # only for GNU Awk 5.0.1 in -Lfatal mode
+       pat_from = substr(s, 3, i - 3);
+
+       subst = "s" sep pat_from sep pat_to sep;
+       return s == subst || s == subst "g";
 }
 
 function main(   i) {
diff -r 45fec838712e -r b6fa1a8711b8 regress/infra-unittests/subst.sh
--- a/regress/infra-unittests/subst.sh  Wed May 06 02:52:10 2020 +0000
+++ b/regress/infra-unittests/subst.sh  Wed May 06 06:14:56 2020 +0000
@@ -1,5 +1,5 @@
 #! /bin/sh
-# $NetBSD: subst.sh,v 1.31 2020/05/02 06:48:59 rillig Exp $
+# $NetBSD: subst.sh,v 1.32 2020/05/06 06:14:56 rillig Exp $
 #
 # Tests for mk/subst.mk.
 #
@@ -1189,6 +1189,29 @@
        specials='!"%'\''()+,-/:;<=>@_`{|}~'
        assert_identity "yes"   -e "sX${specials}X${specials}X"
 
+       # Regular expression meta-characters may be escaped using a
+       # backslash or be enclosed in square brackets.
+       assert_identity 'yes'   -e 's,library\.so,library.so,g'
+       assert_identity 'yes'   -e 's,library[.]so,library.so,g'
+       assert_identity 'yes'   -e 's,[*],*,'
+       assert_identity 'yes'   -e 's,[$],$,'
+
+       # When this happens, it is probably a mistake.
+       assert_identity 'no'    -e 's,,,'
+
+       # Backslashes are not considered identity substitutions since
+       # there might be tricky corner cases.
+       assert_identity 'no'    -e 's,\\,\\,'
+
+       # Back-references are not considered identity substitutions.
+       assert_identity 'no'    -e 's,\1,\1,'
+
+       # The & is interpreted specially in the replacement string.
+       assert_identity 'no'    -e 's,&&&,&&&,'
+       assert_identity 'no'    -e 's,\&,&,'
+       assert_identity 'no'    -e 's,[&],&,'
+       assert_identity 'no'    -e 's,&,\&,' # this would be an identity
+
        test_case_end
 fi
 



Home | Main Index | Thread Index | Old Index