Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/make make(1): use hash table for looking up open dir...



details:   https://anonhg.NetBSD.org/src/rev/f77199954794
branches:  trunk
changeset: 940061:f77199954794
user:      rillig <rillig%NetBSD.org@localhost>
date:      Fri Oct 02 22:20:25 2020 +0000

description:
make(1): use hash table for looking up open directories by name

As long as there are less than 20 open directories, it's perfectly fine
to use a doubly-linked list for name lookup.  A singly linked list or
even an array list would have been better, but anyway.

When the number of directories rises above 1000, which happens with
dirdeps.mk, linear list lookup becomes too expensive, especially since
each list entry is compared using a strcmp call, in a callback function
that is not inlined.

Using a hash table is much more efficient than linear lookup.  While
here, abstract all operations regarding the openDirectories list into a
new data type that provides a simple and straight-forward API.  This
strongly typed API is especially important since the current
implementation of the list and hash table is weakly typed, using void *
for the actual data, and StringList and CachedDirList refer to the
exactly same type, they just have different names to help the human
readers but don't provide any type safety.

diffstat:

 usr.bin/make/dir.c |  87 +++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 64 insertions(+), 23 deletions(-)

diffs (169 lines):

diff -r ce7a07343954 -r f77199954794 usr.bin/make/dir.c
--- a/usr.bin/make/dir.c        Fri Oct 02 20:48:37 2020 +0000
+++ b/usr.bin/make/dir.c        Fri Oct 02 22:20:25 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: dir.c,v 1.154 2020/10/01 22:42:00 rillig Exp $ */
+/*     $NetBSD: dir.c,v 1.155 2020/10/02 22:20:25 rillig Exp $ */
 
 /*
  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
@@ -136,7 +136,7 @@
 #include "job.h"
 
 /*     "@(#)dir.c      8.2 (Berkeley) 1/2/94"  */
-MAKE_RCSID("$NetBSD: dir.c,v 1.154 2020/10/01 22:42:00 rillig Exp $");
+MAKE_RCSID("$NetBSD: dir.c,v 1.155 2020/10/02 22:20:25 rillig Exp $");
 
 #define DIR_DEBUG0(text) DEBUG0(DIR, text)
 #define DIR_DEBUG1(fmt, arg1) DEBUG1(DIR, fmt, arg1)
@@ -219,7 +219,58 @@
 
 SearchPath *dirSearchPath;             /* main search path */
 
-static CachedDirList *openDirectories; /* the list of all open directories */
+/* A list of cached directories, with fast lookup by directory name. */
+typedef struct OpenDirs {
+    CachedDirList *list;
+    Hash_Table /* of CachedDirListNode */ table;
+} OpenDirs;
+
+static void
+OpenDirs_Init(OpenDirs *odirs)
+{
+    odirs->list = Lst_Init();
+    Hash_InitTable(&odirs->table);
+}
+
+static void MAKE_ATTR_UNUSED
+OpenDirs_Done(OpenDirs *odirs)
+{
+    Dir_ClearPath(odirs->list);
+    Lst_Free(odirs->list);
+    Hash_DeleteTable(&odirs->table);
+}
+
+static CachedDir *
+OpenDirs_Find(OpenDirs *odirs, const char *name)
+{
+    CachedDirListNode *ln = Hash_FindValue(&odirs->table, name);
+    return ln != NULL ? ln->datum : NULL;
+}
+
+static void
+OpenDirs_Add(OpenDirs *odirs, CachedDir *cdir)
+{
+    Hash_Entry *he = Hash_FindEntry(&odirs->table, cdir->name);
+    if (he != NULL)
+       return;
+    he = Hash_CreateEntry(&odirs->table, cdir->name, NULL);
+    Lst_Append(odirs->list, cdir);
+    Hash_SetValue(he, odirs->list->last);
+}
+
+static void
+OpenDirs_Remove(OpenDirs *odirs, const char *name)
+{
+    Hash_Entry *he = Hash_FindEntry(&odirs->table, name);
+    CachedDirListNode *ln;
+    if (he == NULL)
+       return;
+    ln = Hash_GetValue(he);
+    Hash_DeleteEntry(&odirs->table, he);
+    Lst_Remove(odirs->list, ln);
+}
+
+static OpenDirs openDirs;      /* the list of all open directories */
 
 /*
  * Variables for gathering statistics on the efficiency of the hashing
@@ -337,7 +388,7 @@
 Dir_Init(void)
 {
     dirSearchPath = Lst_Init();
-    openDirectories = Lst_Init();
+    OpenDirs_Init(&openDirs);
     Hash_InitTable(&mtimes);
     Hash_InitTable(&lmtimes);
 }
@@ -387,11 +438,8 @@
 Dir_InitDot(void)
 {
     if (dot != NULL) {
-       CachedDirListNode *ln;
-
-       /* Remove old entry from openDirectories, but do not destroy. */
-       ln = Lst_FindDatum(openDirectories, dot);
-       Lst_Remove(openDirectories, ln);
+       /* Remove old entry from openDirs, but do not destroy. */
+       OpenDirs_Remove(&openDirs, dot->name);
     }
 
     dot = Dir_AddDir(NULL, ".");
@@ -424,8 +472,7 @@
     Dir_Destroy(dot);
     Dir_ClearPath(dirSearchPath);
     Lst_Free(dirSearchPath);
-    Dir_ClearPath(openDirectories);
-    Lst_Free(openDirectories);
+    OpenDirs_Done(&openDirs);
     Hash_DeleteTable(&mtimes);
 #endif
 }
@@ -1482,13 +1529,12 @@
 CachedDir *
 Dir_AddDir(SearchPath *path, const char *name)
 {
-    SearchPathNode *ln = NULL;
     CachedDir *dir = NULL;     /* the added directory */
     DIR *d;
     struct dirent *dp;
 
     if (path != NULL && strcmp(name, ".DOTLAST") == 0) {
-       ln = Lst_Find(path, DirFindName, name);
+       SearchPathNode *ln = Lst_Find(path, DirFindName, name);
        if (ln != NULL)
            return LstNode_Datum(ln);
 
@@ -1497,9 +1543,8 @@
     }
 
     if (path != NULL)
-       ln = Lst_Find(openDirectories, DirFindName, name);
-    if (ln != NULL) {
-       dir = LstNode_Datum(ln);
+       dir = OpenDirs_Find(&openDirs, name);
+    if (dir != NULL) {
        if (Lst_FindDatum(path, dir) == NULL) {
            dir->refCount++;
            Lst_Append(path, dir);
@@ -1530,7 +1575,7 @@
            (void)Hash_CreateEntry(&dir->files, dp->d_name, NULL);
        }
        (void)closedir(d);
-       Lst_Append(openDirectories, dir);
+       OpenDirs_Add(&openDirs, dir);
        if (path != NULL)
            Lst_Append(path, dir);
     }
@@ -1623,11 +1668,7 @@
     dir->refCount--;
 
     if (dir->refCount == 0) {
-       CachedDirListNode *node;
-
-       node = Lst_FindDatum(openDirectories, dir);
-       if (node != NULL)
-           Lst_Remove(openDirectories, node);
+       OpenDirs_Remove(&openDirs, dir->name);
 
        Hash_DeleteTable(&dir->files);
        free(dir->name);
@@ -1712,7 +1753,7 @@
                 percentage(hits, hits + bigmisses + nearmisses));
     debug_printf("# %-20s referenced\thits\n", "directory");
 
-    for (ln = openDirectories->first; ln != NULL; ln = ln->next) {
+    for (ln = openDirs.list->first; ln != NULL; ln = ln->next) {
        CachedDir *dir = ln->datum;
        debug_printf("# %-20s %10d\t%4d\n", dir->name, dir->refCount,
                     dir->hits);



Home | Main Index | Thread Index | Old Index