Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/external/historical/nawk/dist PR/46155: Miguel Pi?eiro Jr: F...



details:   https://anonhg.NetBSD.org/src/rev/dfe7a68b087c
branches:  trunk
changeset: 777939:dfe7a68b087c
user:      christos <christos%NetBSD.org@localhost>
date:      Sat Mar 10 19:18:48 2012 +0000

description:
PR/46155: Miguel Pi?eiro Jr: Fix RS processing. Apply the gawk-like patch
from the excellent PR. Many thanks for all the work you put on this,
explanation, tests, and patch!

diffstat:

 external/historical/nawk/dist/b.c     |  90 +++++++++++++++++++++++++++++++++++
 external/historical/nawk/dist/lib.c   |  78 ++++++++++++------------------
 external/historical/nawk/dist/proto.h |   5 +-
 external/historical/nawk/dist/run.c   |  25 +++++----
 4 files changed, 138 insertions(+), 60 deletions(-)

diffs (truncated from 342 to 300 lines):

diff -r 7112af47bebc -r dfe7a68b087c external/historical/nawk/dist/b.c
--- a/external/historical/nawk/dist/b.c Sat Mar 10 19:11:36 2012 +0000
+++ b/external/historical/nawk/dist/b.c Sat Mar 10 19:18:48 2012 +0000
@@ -624,6 +624,96 @@
        return (0);
 }
 
+
+/*
+ * NAME
+ *     fnematch
+ *
+ * DESCRIPTION
+ *     A stream-fed version of nematch which transfers characters to a
+ *     null-terminated buffer. All characters up to and including the last
+ *     character of the matching text or EOF are placed in the buffer. If
+ *     a match is found, patbeg and patlen are set appropriately.
+ *
+ * RETURN VALUES
+ *     0    No match found.
+ *     1    Match found.
+ */  
+
+int fnematch(fa *pfa, FILE *f, uschar **pbuf, int *pbufsize, int quantum)      
+{
+       uschar *buf = *pbuf;
+       int bufsize = *pbufsize;
+       int c, i, j, k, ns, s;
+
+       s = pfa->initstat;
+       assert(s < pfa->state_count);
+       patlen = 0;
+
+       /*
+        * All indices relative to buf.
+        * i <= j <= k <= bufsize
+        *
+        * i: origin of active substring
+        * j: current character
+        * k: destination of next getc()
+        */
+       i = -1, k = 0;
+        do {
+               j = i++;
+               do {
+                       if (++j == k) {
+                               if (k == bufsize)
+                                       if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
+                                               FATAL("stream '%.30s...' too long", buf);       
+                               buf[k++] = (c = getc(f)) != EOF ? c : 0;
+                       }
+                       c = buf[j];
+                       /* assert(c < NCHARS); */
+
+                       if ((ns = pfa->gototab[s][c]) != 0)
+                               s = ns;
+                       else
+                               s = cgoto(pfa, s, c);
+                       assert(s < pfa->state_count);
+
+                       if (pfa->out[s]) {      /* final state */
+                               patlen = j - i + 1;
+                               if (c == 0)     /* don't count $ */
+                                       patlen--;
+                       }
+               } while (buf[j] && s != 1);
+               s = 2;
+       } while (buf[i] && !patlen);
+
+       /* adjbuf() may have relocated a resized buffer. Inform the world. */
+       *pbuf = buf;
+       *pbufsize = bufsize;
+
+       if (patlen) {
+               patbeg = buf + i;
+               /*
+                * Under no circumstances is the last character fed to
+                * the automaton part of the match. It is EOF's nullbyte,
+                * or it sent the automaton into a state with no further
+                * transitions available (s==1), or both. Room for a
+                * terminating nullbyte is guaranteed.
+                *
+                * ungetc any chars after the end of matching text
+                * (except for EOF's nullbyte, if present) and null
+                * terminate the buffer.
+                */
+               do
+                       if (buf[--k] && ungetc(buf[k], f) == EOF)
+                               FATAL("unable to ungetc '%c'", buf[k]); 
+               while (k > i + patlen);
+               buf[k] = 0;
+               return 1;
+       }
+       else
+               return 0;
+}
+
 Node *reparse(const char *p)   /* parses regular expression pointed to by p */
 {                      /* uses relex() to scan regular expression */
        Node *np;
diff -r 7112af47bebc -r dfe7a68b087c external/historical/nawk/dist/lib.c
--- a/external/historical/nawk/dist/lib.c       Sat Mar 10 19:11:36 2012 +0000
+++ b/external/historical/nawk/dist/lib.c       Sat Mar 10 19:18:48 2012 +0000
@@ -38,6 +38,7 @@
 
 char   EMPTY[] = { '\0' };
 FILE   *infile = NULL;
+int    innew;          /* 1 = infile has not been read by readrec */
 char   *file   = EMPTY;
 uschar *record;
 int    recsize = RECSIZE;
@@ -104,6 +105,7 @@
                argno++;
        }
        infile = stdin;         /* no filenames, so use stdin */
+       innew = 1;
 }
 
 static int firsttime = 1;
@@ -146,9 +148,12 @@
                                infile = stdin;
                        else if ((infile = fopen(file, "r")) == NULL)
                                FATAL("can't open file %s", file);
+                       innew = 1;
                        setfval(fnrloc, 0.0);
                }
-               c = readrec(&buf, &bufsize, infile);
+               c = readrec(&buf, &bufsize, infile, innew);
+               if (innew)
+                       innew = 0;
                if (c != 0 || buf[0] != '\0') { /* normal record */
                        if (isrecord) {
                                if (freeable(fldtab[0]))
@@ -186,9 +191,9 @@
        argno++;
 }
 
-int readrec(uschar **pbuf, int *pbufsize, FILE *inf)   /* read one record into buf */
+int readrec(uschar **pbuf, int *pbufsize, FILE *inf, int newflag)      /* read one record into buf */
 {
-       int sep, c;
+       int sep, c, isrec, found, tempstat;
        uschar *rr, *buf = *pbuf;
        int bufsize = *pbufsize;
        size_t len;
@@ -202,48 +207,26 @@
                        FATAL("field separator %.10s... is too long", *FS);
                memcpy(inputFS, *FS, len_inputFS);
        }
-       if ((sep = **RS) == 0) {
-               sep = '\n';
-               while ((c=getc(inf)) == '\n' && c != EOF)       /* skip leading \n's */
-                       ;
-               if (c != EOF)
-                       ungetc(c, inf);
-       } else if ((*RS)[1]) {
+       if (**RS && (*RS)[1]) {
                fa *pfa = makedfa(*RS, 1);
-               int tempstat = pfa->initstat;
-               char *brr = buf;
-               char *rrr = NULL;
-               int x;
-               for (rr = buf; ; ) {
-                       while ((c = getc(inf)) != EOF) {
-                               if (rr-buf+3 > bufsize)
-                                       if (!adjbuf(&buf, &bufsize, 3+rr-buf,
-                                           recsize, &rr, "readrec 2"))
-                                               FATAL("input record `%.30s...'"
-                                                   " too long", buf);
-                               *rr++ = c;
-                               *rr = '\0';
-                               if (!(x = nematch(pfa, brr))) {
-                                       pfa->initstat = tempstat;
-                                       if (rrr) {
-                                               rr = rrr;
-                                               ungetc(c, inf);
-                                               break;
-                                       }
-                               } else {
-                                       pfa->initstat = 2;
-                                       brr = rrr = rr = patbeg;
-                               }
-                       }
-                       if (rrr || c == EOF)
-                               break;
-                       if ((c = getc(inf)) == '\n' || c == EOF)
-                               /* 2 in a row */
-                               break;
-                       *rr++ = '\n';
-                       *rr++ = c;
+               if (newflag)
+                       found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+               else {
+                       tempstat = pfa->initstat;
+                       pfa->initstat = 2;
+                       found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+                       pfa->initstat = tempstat;
                }
+               if (found)
+                       *patbeg = 0;
        } else {
+               if ((sep = **RS) == 0) {
+                       sep = '\n';
+                       while ((c=getc(inf)) == '\n' && c != EOF)       /* skip leading \n's */
+                               ;
+                       if (c != EOF)
+                               ungetc(c, inf);
+               }
                for (rr = buf; ; ) {
                        for (; (c=getc(inf)) != sep && c != EOF; ) {
                                if (rr-buf+1 > bufsize)
@@ -264,14 +247,15 @@
                        *rr++ = '\n';
                        *rr++ = c;
                }
+               if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
+                       FATAL("input record `%.30s...' too long", buf);
+               *rr = 0;
        }
-       if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
-               FATAL("input record `%.30s...' too long", buf);
-       *rr = 0;
-          dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
        *pbuf = buf;
        *pbufsize = bufsize;
-       return c == EOF && rr == buf ? 0 : 1;
+       isrec = *buf || !feof(inf);
+          dprintf( ("readrec saw <%s>, returns %d\n", buf, isrec) );
+       return isrec;
 }
 
 char *getargv(int n)   /* get ARGV[n] */
diff -r 7112af47bebc -r dfe7a68b087c external/historical/nawk/dist/proto.h
--- a/external/historical/nawk/dist/proto.h     Sat Mar 10 19:11:36 2012 +0000
+++ b/external/historical/nawk/dist/proto.h     Sat Mar 10 19:18:48 2012 +0000
@@ -54,6 +54,7 @@
 extern int     match(fa *, const char *);
 extern int     pmatch(fa *, const char *);
 extern int     nematch(fa *, const char *);
+extern int     fnematch(fa *, FILE *, uschar **, int *, int);
 extern Node    *reparse(const char *);
 extern Node    *regexp(void);
 extern Node    *primary(void);
@@ -122,7 +123,7 @@
 extern void    growfldtab(int n);
 extern int     getrec(uschar **, int *, int);
 extern void    nextfile(void);
-extern int     readrec(uschar **buf, int *bufsize, FILE *inf);
+extern int     readrec(uschar **buf, int *bufsize, FILE *inf, int newflag);
 extern char    *getargv(int);
 extern void    setclvar(char *);
 extern void    fldbld(void);
@@ -191,7 +192,7 @@
 extern Cell    *printstat(Node **, int);
 extern Cell    *nullproc(Node **, int);
 extern FILE    *redirect(int, Node *);
-extern FILE    *openfile(int, const char *);
+extern FILE    *openfile(int, const char *, int *);
 extern const char      *filename(FILE *);
 extern Cell    *closefile(Node **, int);
 extern void    closeall(void);
diff -r 7112af47bebc -r dfe7a68b087c external/historical/nawk/dist/run.c
--- a/external/historical/nawk/dist/run.c       Sat Mar 10 19:11:36 2012 +0000
+++ b/external/historical/nawk/dist/run.c       Sat Mar 10 19:18:48 2012 +0000
@@ -406,7 +406,7 @@
        FILE *fp;
        uschar *buf;
        int bufsize = recsize;
-       int mode;
+       int mode, newflag;
 
        if ((buf = malloc(bufsize)) == NULL)
                FATAL("out of memory in getline");
@@ -418,12 +418,12 @@
                mode = ptoi(a[1]);
                if (mode == '|')                /* input pipe */
                        mode = LE;      /* arbitrary flag */
-               fp = openfile(mode, getsval(x));
+               fp = openfile(mode, getsval(x), &newflag);
                tempfree(x);
                if (fp == NULL)
                        n = -1;
                else
-                       n = readrec(&buf, &bufsize, fp);
+                       n = readrec(&buf, &bufsize, fp, newflag);
                if (n <= 0) {
                        ;
                } else if (a[0] != NULL) {      /* getline var <file */
@@ -1623,7 +1623,7 @@
                if (isrec(x) || strlen(getsval(x)) == 0) {
                        flush_all();    /* fflush() or fflush("") -> all */
                        u = 0;
-               } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
+               } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
                        u = -1;
                else
                        u = fflush(fp);
@@ -1715,7 +1715,7 @@
 
        x = execute(b);



Home | Main Index | Thread Index | Old Index