Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/make make(1): remove mmap for loading files, only al...



details:   https://anonhg.NetBSD.org/src/rev/51eae1bb0fc4
branches:  trunk
changeset: 948390:51eae1bb0fc4
user:      rillig <rillig%NetBSD.org@localhost>
date:      Mon Dec 28 15:21:33 2020 +0000

description:
make(1): remove mmap for loading files, only allow files < 1 GiB

Using mmap is beneficial if the loaded data is read-only, or if it is
accessed in random order.  Neither of these applies here.  When loading
a file, make reads it strictly from top to bottom, once.  During
parsing, the loaded data is modified in-place to insert '\0' and '\n'
for terminating strings and lines.  Because of all of this, there is no
benefit in using mmap.

Reading the file using 2 calls to read(2) (one for the data, one for
checking for EOF) loads the data in a single pass, instead of producing
a page fault whenever the parser passes another page boundary.

Use a Buffer for loading the file data, to avoid calling bmake_realloc
directly.

Do not resize the loaded buffer at the end.  Each loaded file is
short-lived anyway, and only a few files are loaded at the same time, so
there is no point in optimizing this part for low memory usage.

diffstat:

 usr.bin/make/parse.c |  164 +++++++++++++-------------------------------------
 1 files changed, 45 insertions(+), 119 deletions(-)

diffs (257 lines):

diff -r 0a630fe77fc8 -r 51eae1bb0fc4 usr.bin/make/parse.c
--- a/usr.bin/make/parse.c      Mon Dec 28 15:08:06 2020 +0000
+++ b/usr.bin/make/parse.c      Mon Dec 28 15:21:33 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parse.c,v 1.521 2020/12/28 00:46:24 rillig Exp $       */
+/*     $NetBSD: parse.c,v 1.522 2020/12/28 15:21:33 rillig Exp $       */
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -98,26 +98,18 @@
  */
 
 #include <sys/types.h>
-#include <sys/mman.h>
 #include <sys/stat.h>
 #include <errno.h>
 #include <stdarg.h>
 #include <stdint.h>
 
-#ifndef MAP_FILE
-#define MAP_FILE 0
-#endif
-#ifndef MAP_COPY
-#define MAP_COPY MAP_PRIVATE
-#endif
-
 #include "make.h"
 #include "dir.h"
 #include "job.h"
 #include "pathnames.h"
 
 /*     "@(#)parse.c    8.3 (Berkeley) 3/19/94" */
-MAKE_RCSID("$NetBSD: parse.c,v 1.521 2020/12/28 00:46:24 rillig Exp $");
+MAKE_RCSID("$NetBSD: parse.c,v 1.522 2020/12/28 15:21:33 rillig Exp $");
 
 /* types and constants */
 
@@ -354,21 +346,19 @@
        const char *path;       /* name, for error reports */
        char *buf;              /* contents buffer */
        size_t len;             /* length of contents */
-       size_t maplen;          /* length of mmap area, or 0 */
        Boolean used;           /* XXX: have we used the data yet */
 };
 
 /* XXX: What is the lifetime of the path? Who manages the memory? */
 static struct loadedfile *
-loadedfile_create(const char *path)
+loadedfile_create(const char *path, char *buf, size_t buflen)
 {
        struct loadedfile *lf;
 
        lf = bmake_malloc(sizeof *lf);
        lf->path = path == NULL ? "(stdin)" : path;
-       lf->buf = NULL;
-       lf->len = 0;
-       lf->maplen = 0;
+       lf->buf = buf;
+       lf->len = buflen;
        lf->used = FALSE;
        return lf;
 }
@@ -376,12 +366,7 @@
 static void
 loadedfile_destroy(struct loadedfile *lf)
 {
-       if (lf->buf != NULL) {
-               if (lf->maplen > 0)
-                       munmap(lf->buf, lf->maplen);
-               else
-                       free(lf->buf);
-       }
+       free(lf->buf);
        free(lf);
 }
 
@@ -420,65 +405,18 @@
         * st_size is an off_t, which is 64 bits signed; *ret is
         * size_t, which might be 32 bits unsigned or 64 bits
         * unsigned. Rather than being elaborate, just punt on
-        * files that are more than 2^31 bytes. We should never
-        * see a makefile that size in practice...
+        * files that are more than 1 GiB. We should never
+        * see a makefile that size in practice.
         *
         * While we're at it reject negative sizes too, just in case.
         */
-       if (st.st_size < 0 || st.st_size > 0x7fffffff)
+       if (st.st_size < 0 || st.st_size > 0x3fffffff)
                return FALSE;
 
        *ret = (size_t)st.st_size;
        return TRUE;
 }
 
-static Boolean
-loadedfile_mmap(struct loadedfile *lf, int fd)
-{
-       static unsigned long pagesize = 0;
-
-       if (!load_getsize(fd, &lf->len))
-               return FALSE;
-
-       /* found a size, try mmap */
-       if (pagesize == 0)
-               pagesize = (unsigned long)sysconf(_SC_PAGESIZE);
-       if (pagesize == 0 || pagesize == (unsigned long)-1)
-               pagesize = 0x1000;
-
-       /* round size up to a page */
-       lf->maplen = pagesize * ((lf->len + pagesize - 1) / pagesize);
-
-       /*
-        * XXX hack for dealing with empty files; remove when
-        * we're no longer limited by interfacing to the old
-        * logic elsewhere in this file.
-        */
-       if (lf->maplen == 0)
-               lf->maplen = pagesize;
-
-       /*
-        * FUTURE: remove PROT_WRITE when the parser no longer
-        * needs to scribble on the input.
-        */
-       lf->buf = mmap(NULL, lf->maplen, PROT_READ | PROT_WRITE,
-           MAP_FILE | MAP_COPY, fd, 0);
-       if (lf->buf == MAP_FAILED)
-               return FALSE;
-
-       if (lf->len > 0 && lf->buf[lf->len - 1] != '\n') {
-               if (lf->len == lf->maplen) {
-                       char *b = bmake_malloc(lf->len + 1);
-                       memcpy(b, lf->buf, lf->len);
-                       munmap(lf->buf, lf->maplen);
-                       lf->maplen = 0;
-               }
-               lf->buf[lf->len++] = '\n';
-       }
-
-       return TRUE;
-}
-
 /*
  * Read in a file.
  *
@@ -491,75 +429,63 @@
 static struct loadedfile *
 loadfile(const char *path, int fd)
 {
-       struct loadedfile *lf;
-       ssize_t result;
-       size_t bufpos;
-
-       lf = loadedfile_create(path);
+       ssize_t n;
+       Buffer buf;
+       size_t filesize;
+
 
        if (path == NULL) {
                assert(fd == -1);
                fd = STDIN_FILENO;
-       } else {
-#if 0 /* notyet */
-               fd = open(path, O_RDONLY);
-               if (fd < 0) {
-                       ...
-                       Error("%s: %s", path, strerror(errno));
-                       exit(1);
-               }
-#endif
        }
 
-       if (loadedfile_mmap(lf, fd))
-               goto done;
-
-       /* cannot mmap; load the traditional way */
-
-       lf->maplen = 0;
-       lf->len = 1024;
-       lf->buf = bmake_malloc(lf->len);
-
-       bufpos = 0;
+       if (load_getsize(fd, &filesize)) {
+               /*
+                * Avoid resizing the buffer later for no reason.
+                *
+                * At the same time leave space for adding a final '\n',
+                * just in case it is missing in the file.
+                */
+               filesize++;
+       } else
+               filesize = 1024;
+       Buf_InitSize(&buf, filesize);
+
        for (;;) {
-               assert(bufpos <= lf->len);
-               if (bufpos == lf->len) {
-                       if (lf->len > SIZE_MAX / 2) {
+               assert(buf.len <= buf.cap);
+               if (buf.len == buf.cap) {
+                       if (buf.cap > 0x1fffffff) {
                                errno = EFBIG;
                                Error("%s: file too large", path);
                                exit(2); /* Not 1 so -q can distinguish error */
                        }
-                       lf->len *= 2;
-                       lf->buf = bmake_realloc(lf->buf, lf->len);
+                       Buf_Expand_1(&buf);
                }
-               assert(bufpos < lf->len);
-               result = read(fd, lf->buf + bufpos, lf->len - bufpos);
-               if (result < 0) {
+               assert(buf.len < buf.cap);
+               n = read(fd, buf.data + buf.len, buf.cap - buf.len);
+               if (n < 0) {
                        Error("%s: read error: %s", path, strerror(errno));
                        exit(2);        /* Not 1 so -q can distinguish error */
                }
-               if (result == 0)
+               if (n == 0)
                        break;
 
-               bufpos += (size_t)result;
+               buf.len += (size_t)n;
        }
-       assert(bufpos <= lf->len);
-       lf->len = bufpos;
-
-       /* truncate malloc region to actual length (maybe not useful) */
-       if (lf->len > 0) {
-               /* as for mmap case, ensure trailing \n */
-               if (lf->buf[lf->len - 1] != '\n')
-                       lf->len++;
-               lf->buf = bmake_realloc(lf->buf, lf->len);
-               lf->buf[lf->len - 1] = '\n';
-       }
-
-done:
+       assert(buf.len <= buf.cap);
+
+       if (!Buf_EndsWith(&buf, '\n'))
+               Buf_AddByte(&buf, '\n');
+
        if (path != NULL)
                close(fd);
 
-       return lf;
+       {
+               struct loadedfile *lf = loadedfile_create(path,
+                   buf.data, buf.len);
+               Buf_Destroy(&buf, FALSE);
+               return lf;
+       }
 }
 
 /* old code */



Home | Main Index | Thread Index | Old Index