Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/make make: speed up pattern matching in the ':M' and...



details:   https://anonhg.NetBSD.org/src/rev/655332ec3dbe
branches:  trunk
changeset: 376574:655332ec3dbe
user:      rillig <rillig%NetBSD.org@localhost>
date:      Thu Jun 22 12:59:54 2023 +0000

description:
make: speed up pattern matching in the ':M' and ':N' modifiers

In the code coverage report, the highest count for Str_Match goes from
5,298,924 down to 79,646.

diffstat:

 usr.bin/make/str.c                      |  67 ++++++++++++++++++++++++--------
 usr.bin/make/unit-tests/varmod-match.mk |  20 ++++++--
 2 files changed, 63 insertions(+), 24 deletions(-)

diffs (154 lines):

diff -r 2176722211aa -r 655332ec3dbe usr.bin/make/str.c
--- a/usr.bin/make/str.c        Thu Jun 22 09:09:08 2023 +0000
+++ b/usr.bin/make/str.c        Thu Jun 22 12:59:54 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: str.c,v 1.94 2022/12/07 10:28:48 rillig Exp $  */
+/*     $NetBSD: str.c,v 1.95 2023/06/22 12:59:54 rillig Exp $  */
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -71,7 +71,7 @@
 #include "make.h"
 
 /*     "@(#)str.c      5.8 (Berkeley) 6/1/90"  */
-MAKE_RCSID("$NetBSD: str.c,v 1.94 2022/12/07 10:28:48 rillig Exp $");
+MAKE_RCSID("$NetBSD: str.c,v 1.95 2023/06/22 12:59:54 rillig Exp $");
 
 
 static HashTable interned_strings;
@@ -323,19 +323,18 @@ in_range(char e1, char c, char e2)
 bool
 Str_Match(const char *str, const char *pat)
 {
-       for (; *pat != '\0'; pat++, str++) {
-               if (*pat == '*') {      /* match any substring */
-                       pat++;
-                       while (*pat == '*')
-                               pat++;
-                       if (*pat == '\0')
-                               return true;
-                       for (; *str != '\0'; str++)
-                               if (Str_Match(str, pat))
-                                       return true;
-                       return false;
-               }
+       enum { MFL_START, MFL_MIDDLE, MFL_END } mfl;
+       const char *str1, *pat1;
+       bool matched;
 
+       mfl = MFL_START;
+       str1 = str;
+       pat1 = pat;
+match_fixed_length:
+       str = str1;
+       pat = pat1;
+       matched = false;
+       for (; *pat != '\0' && *pat != '*'; str++, pat++) {
                if (*str == '\0')
                        return false;
 
@@ -350,7 +349,7 @@ Str_Match(const char *str, const char *p
                                if (*pat == ']' || *pat == '\0') {
                                        if (neg)
                                                break;
-                                       return false;
+                                       goto match_done;
                                }
                                if (*pat == *str)
                                        break;
@@ -364,7 +363,7 @@ Str_Match(const char *str, const char *p
                                pat++;
                        }
                        if (neg && *pat != ']' && *pat != '\0')
-                               return false;
+                               goto match_done;
                        while (*pat != ']' && *pat != '\0')
                                pat++;
                        if (*pat == '\0')
@@ -374,11 +373,43 @@ Str_Match(const char *str, const char *p
 
                if (*pat == '\\')       /* match the next character exactly */
                        pat++;
+               if (*pat != *str)
+                       goto match_done;
+       }
+       matched = true;
 
-               if (*pat != *str)
+match_done:
+       switch (mfl) {
+       case MFL_START:
+               if (!matched)
                        return false;
+               if (*pat == '\0')
+                       return *str == '\0';
+               mfl = MFL_MIDDLE;
+               break;
+       default:
+               if (!matched) {
+                       str1++;
+                       goto match_fixed_length;
+               }
+               if (*pat == '\0') {
+                       mfl = MFL_END;
+                       str1 = str + strlen(str) - (str - str1);
+                       goto match_fixed_length;
+               }
+               break;
+       case MFL_END:
+               return matched;
        }
-       return *str == '\0';
+
+       while (*pat == '*')
+               pat++;
+       if (*pat == '\0')
+               return true;
+       if (*str == '\0')
+               return false;
+       str1 = str, pat1 = pat;
+       goto match_fixed_length;
 }
 
 void
diff -r 2176722211aa -r 655332ec3dbe usr.bin/make/unit-tests/varmod-match.mk
--- a/usr.bin/make/unit-tests/varmod-match.mk   Thu Jun 22 09:09:08 2023 +0000
+++ b/usr.bin/make/unit-tests/varmod-match.mk   Thu Jun 22 12:59:54 2023 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: varmod-match.mk,v 1.13 2023/06/22 09:09:08 rillig Exp $
+# $NetBSD: varmod-match.mk,v 1.14 2023/06/22 12:59:54 rillig Exp $
 #
 # Tests for the :M variable modifier, which filters words that match the
 # given pattern.
@@ -33,11 +33,12 @@ NUMBERS=    One Two Three Four five six sev
 .if ${:U****************:M****************b}
 .endif
 
-# As of 2023-06-22, this expression calls Str_Match 2,621,112 times.
-# Adding another '*?' to the pattern calls Str_Match 20,630,572 times.
-# Adding another '*?' to the pattern calls Str_Match 136,405,672 times.
-# Adding another '*?' to the pattern calls Str_Match 773,168,722 times.
-# Adding another '*?' to the pattern calls Str_Match 3,815,481,072 times.
+# Before 2023-06-22, this expression called Str_Match 2,621,112 times.
+# Adding another '*?' to the pattern called Str_Match 20,630,572 times.
+# Adding another '*?' to the pattern called Str_Match 136,405,672 times.
+# Adding another '*?' to the pattern called Str_Match 773,168,722 times.
+# Adding another '*?' to the pattern called Str_Match 3,815,481,072 times.
+# Since 2023-06-22, Str_Match no longer backtracks.
 .if ${:U..................................................b:M*?*?*?*?*?a}
 .endif
 
@@ -217,6 +218,13 @@ WORDS=             - + x xx 0 1 2 3 4 [x1-3
 .  error
 .endif
 
+#      *[-x1-3 Incomplete character list after a wildcard, matches those
+#              words that end with one of the characters from the list.
+WORDS=         - + x xx 0 1 2 3 4 00 01 10 11 000 001 010 011 100 101 110 111 [x1-3
+.if ${WORDS:M*[-x1-3} != "- x xx 1 2 3 01 11 001 011 101 111 [x1-3"
+.  warning ${WORDS:M*[-x1-3}
+.endif
+
 #      [^-x1-3
 #              Incomplete negated character list, matches any character
 #              except those elements that can be parsed without lookahead.



Home | Main Index | Thread Index | Old Index