tech-toolchain archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: cleaning up the make parser



On Mon, Dec 13, 2010 at 07:16:36AM +0000, David Holland wrote:
 > Attached is a candidate patch for step 1.

er right, the patch:

Index: usr.bin/make/for.c
===================================================================
RCS file: /cvsroot/src/usr.bin/make/for.c,v
retrieving revision 1.47
diff -u -p -r1.47 for.c
--- usr.bin/make/for.c  6 Feb 2010 20:37:13 -0000       1.47
+++ usr.bin/make/for.c  13 Dec 2010 06:37:15 -0000
@@ -366,7 +366,7 @@ for_substitute(Buffer *cmds, strlist_t *
 }
 
 static char *
-For_Iterate(void *v_arg)
+For_Iterate(void *v_arg, size_t *ret_len)
 {
     For *arg = v_arg;
     int i, len;
@@ -377,6 +377,9 @@ For_Iterate(void *v_arg)
     char ch;
     Buffer cmds;
 
+    /* we return null-terminated strings */
+    *ret_len = 0;
+
     if (arg->sub_next + strlist_num(&arg->vars) > strlist_num(&arg->items)) {
        /* No more iterations */
        For_Free(arg);
Index: usr.bin/make/main.c
===================================================================
RCS file: /cvsroot/src/usr.bin/make/main.c,v
retrieving revision 1.192
diff -u -p -r1.192 main.c
--- usr.bin/make/main.c 13 Dec 2010 01:48:50 -0000      1.192
+++ usr.bin/make/main.c 13 Dec 2010 06:37:16 -0000
@@ -1307,7 +1307,7 @@ ReadMakefile(const void *p, const void *
        char *name, *path = bmake_malloc(len);
 
        if (!strcmp(fname, "-")) {
-               Parse_File("(stdin)", dup(fileno(stdin)));
+               Parse_File(NULL /*stdin*/, -1);
                Var_Set("MAKEFILE", "", VAR_GLOBAL, 0);
        } else {
                /* if we've chdir'd, rebuild the path name */
Index: usr.bin/make/nonints.h
===================================================================
RCS file: /cvsroot/src/usr.bin/make/nonints.h,v
retrieving revision 1.61
diff -u -p -r1.61 nonints.h
--- usr.bin/make/nonints.h      9 Dec 2010 22:30:17 -0000       1.61
+++ usr.bin/make/nonints.h      13 Dec 2010 06:37:16 -0000
@@ -136,7 +136,7 @@ void Parse_AddIncludeDir(char *);
 void Parse_File(const char *, int);
 void Parse_Init(void);
 void Parse_End(void);
-void Parse_SetInput(const char *, int, int, char *(*)(void *), void *);
+void Parse_SetInput(const char *, int, int, char *(*)(void *, size_t *), void 
*);
 Lst Parse_MainName(void);
 
 /* str.c */
Index: usr.bin/make/parse.c
===================================================================
RCS file: /cvsroot/src/usr.bin/make/parse.c,v
retrieving revision 1.168
diff -u -p -r1.168 parse.c
--- usr.bin/make/parse.c        13 Dec 2010 03:36:39 -0000      1.168
+++ usr.bin/make/parse.c        13 Dec 2010 06:37:18 -0000
@@ -123,12 +123,19 @@ __RCSID("$NetBSD: parse.c,v 1.168 2010/1
  *     Parse_MainName              Returns a Lst of the main target to create.
  */
 
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
 #include <stdio.h>
 
+#ifndef MAP_COPY
+#define MAP_COPY MAP_PRIVATE
+#endif
+
 #include "make.h"
 #include "hash.h"
 #include "dir.h"
@@ -146,17 +153,15 @@ typedef struct IFile {
     const char      *fname;         /* name of file */
     int             lineno;         /* current line number in file */
     int             first_lineno;   /* line number of start of text */
-    int             fd;             /* the open file */
     int             cond_depth;     /* 'if' nesting when file opened */
     char            *P_str;         /* point to base of string buffer */
     char            *P_ptr;         /* point to next char of string buffer */
     char            *P_end;         /* point to the end of string buffer */
-    int             P_buflen;       /* current size of file buffer */
-    char            *(*nextbuf)(void *); /* Function to get more data */
+    char            *(*nextbuf)(void *, size_t *); /* Function to get more 
data */
     void            *nextbuf_arg;   /* Opaque arg for nextbuf() */
+    struct loadedfile *lf;          /* loadedfile object, if any */
 } IFile;
 
-#define IFILE_BUFLEN 0x8000
 
 /*
  * These values are returned by ParseEOF to tell Parse_File whether to
@@ -359,7 +364,189 @@ static void ParseFinishLine(void);
 static void ParseMark(GNode *);
 
 ////////////////////////////////////////////////////////////
-// code
+// file loader
+
+struct loadedfile {
+       const char *path;               /* name, for error reports */
+       char *buf;                      /* contents buffer */
+       size_t len;                     /* length of contents */
+       size_t maplen;                  /* length of mmap area, or 0 */
+       Boolean used;                   /* XXX: have we used the data yet */
+};
+
+/*
+ * Constructor/destructor for loadedfile
+ */
+static struct loadedfile *
+loadedfile_create(const char *path)
+{
+       struct loadedfile *lf;
+
+       lf = bmake_malloc(sizeof(*lf));
+       lf->path = (path == NULL ? "(stdin)" : path);
+       lf->buf = NULL;
+       lf->len = 0;
+       lf->maplen = 0;
+       lf->used = FALSE;
+       return lf;
+}
+
+static void
+loadedfile_destroy(struct loadedfile *lf)
+{
+       if (lf->buf != NULL) {
+               if (lf->maplen > 0) {
+                       munmap(lf->buf, lf->maplen);
+               } else {
+                       free(lf->buf);
+               }
+       }
+       free(lf);
+}
+
+/*
+ * nextbuf() operation for loadedfile, as needed by the weird and twisted
+ * logic below. Once that's cleaned up, we can get rid of lf->used...
+ */
+static char *
+loadedfile_nextbuf(void *x, size_t *len)
+{
+       struct loadedfile *lf = x;
+
+       if (lf->used) {
+               return NULL;
+       }
+       lf->used = TRUE;
+       *len = lf->len;
+       return lf->buf;
+}
+
+/*
+ * Try to get the size of a file.
+ */
+static ReturnStatus
+load_getsize(int fd, size_t *ret)
+{
+       struct stat st;
+
+       if (fstat(fd, &st) < 0) {
+               return FAILURE;
+       }
+
+       if (!S_ISREG(st.st_mode)) {
+               return FAILURE;
+       }
+
+       /*
+        * st_size is an off_t, which is 64 bits signed; *ret is
+        * size_t, which might be 32 bits unsigned or 64 bits
+        * unsigned. Rather than being elaborate, just punt on
+        * files that are more than 2^31 bytes. We should never
+        * see a makefile that size in practice...
+        *
+        * While we're at it reject negative sizes too, just in case.
+        */
+       if (st.st_size < 0 || st.st_size > 0x7fffffff) {
+               return FAILURE;
+       }
+
+       *ret = (size_t) st.st_size;
+       return SUCCESS;
+}
+
+/*
+ * Read in a file.
+ *
+ * Until the path search logic can be moved under here instead of
+ * being in the caller in another source file, we need to have the fd
+ * passed in already open. Bleh.
+ *
+ * If the path is NULL use stdin and (to insure against fd leaks)
+ * assert that the caller passed in -1.
+ */
+static struct loadedfile *
+loadfile(const char *path, int fd)
+{
+       struct loadedfile *lf;
+       long pagesize;
+       ssize_t result;
+       size_t bufpos;
+
+       lf = loadedfile_create(path);
+
+       if (path == NULL) {
+               assert(fd == -1);
+               fd = STDIN_FILENO;
+       } else {
+#if 0 /* notyet */
+               fd = open(path, O_RDONLY);
+               if (fd < 0) {
+                       ...
+                       Error("%s: %s", path, strerror(errno));
+                       exit(1);
+               }
+#endif
+       }
+
+       if (load_getsize(fd, &lf->len) == SUCCESS) {
+               /* found a size, try mmap */
+               pagesize = sysconf(_SC_PAGESIZE);
+               if (pagesize <= 0) {
+                       pagesize = 0x1000;
+               }
+               /* round size up to a page */
+               lf->maplen = pagesize * ((lf->len + pagesize - 1)/pagesize);
+
+               /*
+                * FUTURE: remove PROT_WRITE when the parser no longer
+                * needs to scribble on the input.
+                */
+               lf->buf = mmap(NULL, lf->maplen, PROT_READ|PROT_WRITE,
+                              MAP_FILE|MAP_COPY, fd, 0);
+               if (lf->buf != MAP_FAILED) {
+                       /* succeeded */
+                       goto done;
+               }
+       }
+
+       /* cannot mmap; load the traditional way */
+
+       lf->maplen = 0;
+       lf->len = 1024;
+       lf->buf = bmake_malloc(lf->len);
+
+       bufpos = 0;
+       while (1) {
+               assert(bufpos <= lf->len);
+               if (bufpos == lf->len) {
+                       lf->len *= 2;
+                       lf->buf = bmake_realloc(lf->buf, lf->len);
+               }
+               result = read(fd, lf->buf + bufpos, lf->len - bufpos);
+               if (result < 0) {
+                       Error("%s: read error: %s", path, strerror(errno));
+                       exit(1);
+               }
+               if (result == 0) {
+                       break;
+               }
+               bufpos += result;
+       }
+       assert(bufpos <= lf->len);
+
+       /* truncate malloc region to actual length (maybe not useful) */
+       lf->len = bufpos;
+       lf->buf = bmake_realloc(lf->buf, lf->len);
+
+done:
+       if (path != NULL) {
+               close(fd);
+       }
+       return lf;
+}
+
+////////////////////////////////////////////////////////////
+// old code
 
 /*-
  *----------------------------------------------------------------------
@@ -1835,6 +2022,7 @@ Parse_AddIncludeDir(char *dir)
 static void
 Parse_include_file(char *file, Boolean isSystem, int silent)
 {
+    struct loadedfile *lf;
     char          *fullname;   /* full pathname of file */
     char          *newName;
     char          *prefEnd, *incdir;
@@ -1925,8 +2113,12 @@ Parse_include_file(char *file, Boolean i
        return;
     }
 
+    /* load it */
+    lf = loadfile(fullname, fd);
+
     /* Start reading from this file next */
-    Parse_SetInput(fullname, 0, fd, NULL, NULL);
+    Parse_SetInput(fullname, 0, -1, loadedfile_nextbuf, lf);
+    curFile->lf = lf;
 }
 
 static void
@@ -2063,9 +2255,11 @@ ParseTrackInput(const char *name)
  *---------------------------------------------------------------------
  */
 void
-Parse_SetInput(const char *name, int line, int fd, char *(*nextbuf)(void *), 
void *arg)
+Parse_SetInput(const char *name, int line, int fd,
+       char *(*nextbuf)(void *, size_t *), void *arg)
 {
     char *buf;
+    size_t len;
 
     if (name == NULL)
        name = curFile->fname;
@@ -2096,32 +2290,26 @@ Parse_SetInput(const char *name, int lin
     curFile->fname = name;
     curFile->lineno = line;
     curFile->first_lineno = line;
-    curFile->fd = fd;
     curFile->nextbuf = nextbuf;
     curFile->nextbuf_arg = arg;
+    curFile->lf = NULL;
 
-    if (nextbuf == NULL) {
-       /*
-        * Allocate a 32k data buffer (as stdio seems to).
-        * Set pointers so that first ParseReadc has to do a file read.
-        */
-       buf = bmake_malloc(IFILE_BUFLEN);
-       buf[0] = 0;
-       curFile->P_str = buf;
-       curFile->P_ptr = buf;
-       curFile->P_end = buf;
-       curFile->P_buflen = IFILE_BUFLEN;
-    } else {
-       /* Get first block of input data */
-       buf = curFile->nextbuf(curFile->nextbuf_arg);
-       if (buf == NULL) {
-           /* Was all a waste of time ... */
-           free(curFile);
-           return;
-       }
-       curFile->P_str = buf;
-       curFile->P_ptr = buf;
+    assert(nextbuf != NULL);
+
+    /* Get first block of input data */
+    buf = curFile->nextbuf(curFile->nextbuf_arg, &len);
+    if (buf == NULL) {
+        /* Was all a waste of time ... */
+       free(curFile);
+       return;
+    }
+    curFile->P_str = buf;
+    curFile->P_ptr = buf;
+    if (len == 0) {
+        /* null-terminated string */
        curFile->P_end = NULL;
+    } else {
+       curFile->P_end = buf+len;
     }
 
     curFile->cond_depth = Cond_save_depth();
@@ -2211,26 +2399,31 @@ static int
 ParseEOF(void)
 {
     char *ptr;
+    size_t len;
 
-    if (curFile->nextbuf != NULL) {
-       /* eg .for loop data, get next iteration */
-       ptr = curFile->nextbuf(curFile->nextbuf_arg);
-       curFile->P_ptr = ptr;
-       curFile->P_str = ptr;
-       curFile->lineno = curFile->first_lineno;
-       if (ptr != NULL) {
-           /* Iterate again */
-           return CONTINUE;
-       }
+    assert(curFile->nextbuf != NULL);
+
+    /* get next input buffer, if any */
+    ptr = curFile->nextbuf(curFile->nextbuf_arg, &len);
+    curFile->P_ptr = ptr;
+    curFile->P_str = ptr;
+    curFile->P_end = len ? ptr + len : NULL;
+    curFile->lineno = curFile->first_lineno;
+    if (ptr != NULL) {
+       /* Iterate again */
+       return CONTINUE;
     }
 
     /* Ensure the makefile (or loop) didn't have mismatched conditionals */
     Cond_restore_depth(curFile->cond_depth);
 
+    if (curFile->lf != NULL) {
+           loadedfile_destroy(curFile->lf);
+           curFile->lf = NULL;
+    }
+
     /* Dispose of curFile info */
     /* Leak curFile->fname because all the gnodes have pointers to it */
-    if (curFile->fd != -1)
-       close(curFile->fd);
     free(curFile->P_str);
     free(curFile);
 
@@ -2244,8 +2437,8 @@ ParseEOF(void)
     }
 
     if (DEBUG(PARSE))
-       fprintf(debug_file, "ParseEOF: returning to file %s, line %d, fd %d\n",
-           curFile->fname, curFile->lineno, curFile->fd);
+       fprintf(debug_file, "ParseEOF: returning to file %s, line %d\n",
+           curFile->fname, curFile->lineno);
 
     /* Restore the PARSEDIR/PARSEFILE variables */
     ParseSetParseFile(curFile->fname);
@@ -2266,7 +2459,6 @@ ParseGetLine(int flags, int *length)
     char *escaped;
     char *comment;
     char *tp;
-    int len, dist;
 
     /* Loop through blank lines and comment lines */
     for (;;) {
@@ -2277,67 +2469,25 @@ ParseGetLine(int flags, int *length)
        escaped = NULL;
        comment = NULL;
        for (;;) {
+           if (cf->P_end != NULL && ptr == cf->P_end) {
+               /* end of buffer */
+               ch = 0;
+               break;
+           }
            ch = *ptr;
            if (ch == 0 || (ch == '\\' && ptr[1] == 0)) {
                if (cf->P_end == NULL)
                    /* End of string (aka for loop) data */
                    break;
-               /* End of data read from file, read more data */
-               if (ptr != cf->P_end && (ch != '\\' || ptr + 1 != cf->P_end)) {
-                   Parse_Error(PARSE_FATAL, "Zero byte read from file");
-                   return NULL;
-               }
-               /* Move existing data to (near) start of file buffer */
-               len = cf->P_end - cf->P_ptr;
-               tp = cf->P_str + 32;
-               memmove(tp, cf->P_ptr, len);
-               dist = cf->P_ptr - tp;
-               /* Update all pointers to reflect moved data */
-               ptr -= dist;
-               line -= dist;
-               line_end -= dist;
-               if (escaped)
-                   escaped -= dist;
-               if (comment)
-                   comment -= dist;
-               cf->P_ptr = tp;
-               tp += len;
-               cf->P_end = tp;
-               /* Try to read more data from file into buffer space */
-               len = cf->P_str + cf->P_buflen - tp - 32;
-               if (len <= 0) {
-                   /* We need a bigger buffer to hold this line */
-                   tp = bmake_realloc(cf->P_str, cf->P_buflen + IFILE_BUFLEN);
-                   cf->P_ptr = cf->P_ptr - cf->P_str + tp;
-                   cf->P_end = cf->P_end - cf->P_str + tp;
-                   ptr = ptr - cf->P_str + tp;
-                   line = line - cf->P_str + tp;
-                   line_end = line_end - cf->P_str + tp;
-                   if (escaped)
-                       escaped = escaped - cf->P_str + tp;
-                   if (comment)
-                       comment = comment - cf->P_str + tp;
-                   cf->P_str = tp;
-                   tp = cf->P_end;
-                   len += IFILE_BUFLEN;
-                   cf->P_buflen += IFILE_BUFLEN;
-               }
-               len = read(cf->fd, tp, len);
-               if (len <= 0) {
-                   if (len < 0) {
-                       Parse_Error(PARSE_FATAL, "Makefile read error: %s",
-                               strerror(errno));
-                       return NULL;
-                   }
-                   /* End of file */
+               if (cf->nextbuf != NULL) {
+                   /*
+                    * End of this buffer; return EOF and outer logic
+                    * will get the next one. (eww)
+                    */
                    break;
                }
-               /* 0 terminate the data, and update end pointer */
-               tp += len;
-               cf->P_end = tp;
-               *tp = 0;
-               /* Process newly read characters */
-               continue;
+               Parse_Error(PARSE_FATAL, "Zero byte read from file");
+               return NULL;
            }
 
            if (ch == '\\') {
@@ -2571,11 +2721,15 @@ Parse_File(const char *name, int fd)
 {
     char         *cp;          /* pointer into the line */
     char          *line;       /* the line we're working on */
+    struct loadedfile *lf;
+
+    lf = loadfile(name, fd);
 
     inLine = FALSE;
     fatals = 0;
 
-    Parse_SetInput(name, 0, fd, NULL, NULL);
+    Parse_SetInput(name, 0, -1, loadedfile_nextbuf, lf);
+    curFile->lf = lf;
 
     do {
        for (; (line = ParseReadLine()) != NULL; ) {


-- 
David A. Holland
dholland%netbsd.org@localhost


Home | Main Index | Thread Index | Old Index