Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/sort Move all the fopen() calls out of the record re...



details:   https://anonhg.NetBSD.org/src/rev/7ae68b17a3b8
branches:  trunk
changeset: 747670:7ae68b17a3b8
user:      dsl <dsl%NetBSD.org@localhost>
date:      Sat Sep 26 21:16:55 2009 +0000

description:
Move all the fopen() calls out of the record read routines into the callers.
Split the merge sort so that fsort() can pass the 'FILE *' of the temporary
files to be merged into the merge code.
Don't rely on realloc() not moving the end address of a buffer!
Rework merge sort so that it sorts pointers to 'struct mfile' and only
copies about sort record descriptors.
No functional change intended.

diffstat:

 usr.bin/sort/files.c |  130 ++++++----------------
 usr.bin/sort/fsort.c |   55 +++++----
 usr.bin/sort/fsort.h |   15 +--
 usr.bin/sort/msort.c |  283 ++++++++++++++++++++++++--------------------------
 usr.bin/sort/sort.c  |   20 +--
 usr.bin/sort/sort.h  |   21 +--
 usr.bin/sort/tmp.c   |    7 +-
 7 files changed, 224 insertions(+), 307 deletions(-)

diffs (truncated from 903 to 300 lines):

diff -r 230fe13da623 -r 7ae68b17a3b8 usr.bin/sort/files.c
--- a/usr.bin/sort/files.c      Sat Sep 26 19:58:53 2009 +0000
+++ b/usr.bin/sort/files.c      Sat Sep 26 21:16:55 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $   */
+/*     $NetBSD: files.c,v 1.38 2009/09/26 21:16:55 dsl Exp $   */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $");
+__RCSID("$NetBSD: files.c,v 1.38 2009/09/26 21:16:55 dsl Exp $");
 __SCCSID("@(#)files.c  8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -78,20 +78,15 @@
  * in the first fsort pass.
  */
 int
-makeline(int flno, int top, struct filelist *filelist, int nfiles,
-    RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
+makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
 {
-       static u_char *obufend;
+       static u_char *opos = NULL;
        static size_t osz;
        u_char *pos;
-       static int filenum = 0, overflow = 0;
-       static FILE *fp = 0;
        int c;
 
-       c = 0;          /* XXXGCC -Wuninitialized [pmppc] */
-
        pos = recbuf->data;
-       if (overflow) {
+       if (opos != NULL) {
                /*
                 * Buffer shortage is solved by either of two ways:
                 * o flush previous buffered data and start using the
@@ -101,77 +96,47 @@
                 * The former is preferred, realloc is only done when
                 * there is exactly one item in buffer which does not fit. 
                 */
-               if (bufend == obufend)
-                       memmove(pos, bufend - osz, osz);
+               if (pos != opos)
+                       memmove(pos, opos, osz);
 
                pos += osz;
-               overflow = 0;
+               opos = NULL;
        }
 
-       for (;;) {
-               if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
-                       return (EOF);
-               else if (fp == NULL) {
-                       if (filenum  >= nfiles)
-                               return (EOF);
-                       if (!(fp = fopen(filelist->names[filenum], "r")))
-                               err(2, "%s", filelist->names[filenum]);
-                       filenum++;
-               }
-               while ((pos < bufend) && ((c = getc(fp)) != EOF)) {
-                       *pos++ = c;
-                       if (c == REC_D) {
-                               recbuf->offset = 0;
-                               recbuf->length = pos - recbuf->data;
-                               recbuf->keylen = recbuf->length - 1;
-                               return (0);
-                       }
-               }
-               if (pos >= bufend) {
-                       if (recbuf->data < bufend) {
-                               overflow = 1;
-                               obufend = bufend;
-                               osz = (pos - recbuf->data);
+       while (pos < bufend) {
+               c = getc(fp);
+               if (c == EOF) {
+                       if (pos == recbuf->data) {
+                               FCLOSE(fp);
+                               return EOF;
                        }
-                       return (BUFFEND);
-               } else if (c == EOF) {
-                       if (recbuf->data != pos) {
-                               *pos++ = REC_D;
-                               recbuf->offset = 0;
-                               recbuf->length = pos - recbuf->data;
-                               recbuf->keylen = recbuf->length - 1;
-                               return (0);
-                       }
-                       FCLOSE(fp);
-                       fp = 0;
-                       if (flno >= 0)
-                               fstack[flno].fp = 0;
-               } else {
-                       
-                       warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
-
-                       /* Consume the rest of line from input */
-                       while ((c = getc(fp)) != REC_D && c != EOF)
-                               ;
-
+                       /* Add terminator to partial line */
+                       c = REC_D;
+               }
+               *pos++ = c;
+               if (c == REC_D) {
                        recbuf->offset = 0;
-                       recbuf->length = 0;
-                       recbuf->keylen = 0;
-
-                       return (BUFFEND);
+                       recbuf->length = pos - recbuf->data;
+                       recbuf->keylen = recbuf->length - 1;
+                       return (0);
                }
        }
+
+       /* Ran out of buffer space... */
+       if (recbuf->data < bufend) {
+               /* Remember where the partial record is */
+               osz = pos - recbuf->data;
+               opos = recbuf->data;
+       }
+       return (BUFFEND);
 }
 
 /*
  * This generates keys. It's only called in the first fsort pass
  */
 int
-makekey(int flno, int top, struct filelist *filelist, int nfiles,
-    RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
+makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
 {
-       static int filenum = 0;
-       static FILE *dbdesc = 0;
        static u_char *line_data;
        static ssize_t line_size;
        static int overflow = 0;
@@ -182,29 +147,10 @@
                return overflow ? BUFFEND : 0;
        }
 
-       /* Loop through files until we find a line of input */
-       for (;;) {
-               if (flno >= 0) {
-                       if (!(dbdesc = fstack[flno].fp))
-                               return (EOF);
-               } else if (!dbdesc) {
-                       if (filenum  >= nfiles)
-                               return (EOF);
-                       dbdesc = fopen(filelist->names[filenum], "r");
-                       if (!dbdesc)
-                               err(2, "%s", filelist->names[filenum]);
-                       filenum++;
-               }
-               line_size = seq(dbdesc, &line_data);
-               if (line_size != 0)
-                       /* Got a line */
-                       break;
-
-               /* End of file ... */
-               FCLOSE(dbdesc);
-               dbdesc = 0;
-               if (flno >= 0)
-                       fstack[flno].fp = 0;
+       line_size = seq(fp, &line_data);
+       if (line_size == 0) {
+               FCLOSE(fp);
+               return EOF;
        }
 
        if (line_size > bufend - recbuf->data) {
@@ -299,18 +245,14 @@
  * get a record from a temporary file. (Used by merge sort.)
  */
 int
-geteasy(int flno, int top, struct filelist *filelist, int nfiles,
-    RECHEADER *rec, u_char *end, struct field *dummy2)
+geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
 {
        int i;
-       FILE *fp;
 
-       fp = fstack[flno].fp;
        if ((u_char *)(rec + 1) > end)
                return (BUFFEND);
        if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) {
                fclose(fp);
-               fstack[flno].fp = 0;
                return (EOF);
        }
        if (end - rec->data < (ptrdiff_t)rec->length) {
diff -r 230fe13da623 -r 7ae68b17a3b8 usr.bin/sort/fsort.c
--- a/usr.bin/sort/fsort.c      Sat Sep 26 19:58:53 2009 +0000
+++ b/usr.bin/sort/fsort.c      Sat Sep 26 21:16:55 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: fsort.c,v 1.41 2009/09/10 22:02:40 dsl Exp $   */
+/*     $NetBSD: fsort.c,v 1.42 2009/09/26 21:16:55 dsl Exp $   */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -72,15 +72,13 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: fsort.c,v 1.41 2009/09/10 22:02:40 dsl Exp $");
+__RCSID("$NetBSD: fsort.c,v 1.42 2009/09/26 21:16:55 dsl Exp $");
 __SCCSID("@(#)fsort.c  8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
 #include <stdlib.h>
 #include <string.h>
 
-struct tempfile fstack[MAXFCT];
-
 #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
 
 void
@@ -96,12 +94,14 @@
        get_func_t get;
        RECHEADER *crec;
        RECHEADER *nbuffer;
-       FILE *fp;
+       FILE *fp, *tmp_fp;
+       int file_no;
+       int max_recs = DEBUG('m') ? 16 : MAXNUM;
 
        buffer = malloc(bufsize);
        bufend = (u_char *)buffer + bufsize;
        /* Allocate double length keymap for radix_sort */
-       keylist = malloc(2 * MAXNUM * sizeof(*keylist));
+       keylist = malloc(2 * max_recs * sizeof(*keylist));
        if (buffer == NULL || keylist == NULL)
                err(2, "failed to malloc initial buffer or keylist");
 
@@ -112,6 +112,11 @@
                /* Key (merged key fields) added before data */
                get = makekey;
 
+       file_no = 0;
+       fp = fopen(filelist->names[0], "r");
+       if (fp == NULL)
+               err(2, "%s", filelist->names[0]);
+
        /* Loop through reads of chunk of input files that get sorted
         * and then merged together. */
        for (;;) {
@@ -121,21 +126,29 @@
 
                /* Loop reading records */
                for (;;) {
-                       c = get(-1, 0, filelist, nfiles, crec, bufend, ftbl);
+                       c = get(fp, crec, bufend, ftbl);
                        /* 'c' is 0, EOF or BUFFEND */
                        if (c == 0) {
                                /* Save start of key in input buffer */
                                *keypos++ = crec;
-                               if (++nelem == MAXNUM) {
+                               if (++nelem == max_recs) {
                                        c = BUFFEND;
                                        break;
                                }
                                crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
                                continue;
                        }
-                       if (c == EOF)
-                               break;
-                       if (nelem >= MAXNUM || bufsize >= MAXBUFSIZE)
+                       if (c == EOF) {
+                               /* try next file */
+                               if (++file_no >= nfiles)
+                                       /* no more files */
+                                       break;
+                               fp = fopen(filelist->names[file_no], "r");
+                               if (fp == NULL)
+                                       err(2, "%s", filelist->names[file_no]);
+                               continue;
+                       }
+                       if (nelem >= max_recs || bufsize >= MAXBUFSIZE)
                                /* Need to sort and save this lot of data */
                                break;
 
@@ -158,7 +171,7 @@
                }
 
                /* Sort this set of records */
-               radix_sort(keylist, keylist + MAXNUM, nelem);
+               radix_sort(keylist, keylist + max_recs, nelem);
 
                if (c == EOF && mfct == 0) {
                        /* all the data is (sorted) in the buffer */
@@ -168,25 +181,17 @@



Home | Main Index | Thread Index | Old Index