Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/sort Now we have our own radix_sort() change the int...



details:   https://anonhg.NetBSD.org/src/rev/8f8f3b19c6c9
branches:  trunk
changeset: 747194:8f8f3b19c6c9
user:      dsl <dsl%NetBSD.org@localhost>
date:      Sat Sep 05 12:00:25 2009 +0000

description:
Now we have our own radix_sort() change the interface so that we pass
an array of 'RECHEADER *' and remove all the crappy stuff that backed up
by REC_DATA_OFFSET (etc).
Also change radix_sort() to return the number of elements, soon to be used
to drop duplicate keys (for sort -u).

diffstat:

 usr.bin/sort/append.c     |  18 ++++----
 usr.bin/sort/files.c      |  14 +++---
 usr.bin/sort/fsort.c      |  48 ++++++++++++-------------
 usr.bin/sort/fsort.h      |   5 +--
 usr.bin/sort/msort.c      |  17 ++++----
 usr.bin/sort/radix_sort.c |  90 ++++++++++++++++++++++------------------------
 usr.bin/sort/sort.h       |  14 ++-----
 7 files changed, 96 insertions(+), 110 deletions(-)

diffs (truncated from 511 to 300 lines):

diff -r 882be84ffa93 -r 8f8f3b19c6c9 usr.bin/sort/append.c
--- a/usr.bin/sort/append.c     Sat Sep 05 11:37:52 2009 +0000
+++ b/usr.bin/sort/append.c     Sat Sep 05 12:00:25 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $  */
+/*     $NetBSD: append.c,v 1.21 2009/09/05 12:00:25 dsl Exp $  */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
 #include "sort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: append.c,v 1.21 2009/09/05 12:00:25 dsl Exp $");
 __SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -88,16 +88,16 @@
  * copy sorted lines to output; check for uniqueness
  */
 void
-append(const u_char **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts)
+append(const RECHEADER **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts)
 {
-       const u_char **cpos, **lastkey;
-       const struct recheader *crec, *prec;
+       const RECHEADER **cpos, **lastkey;
+       const RECHEADER *crec, *prec;
        size_t plen;
 
        lastkey = keylist + nelem;
        if (!UNIQUE || wts == NULL) {
                for (cpos = keylist; cpos < lastkey; cpos++)
-                       put((const RECHEADER *)(*cpos - REC_DATA_OFFSET), fp);
+                       put(*cpos, fp);
                return;
        }
 
@@ -105,13 +105,13 @@
                return;
 
        cpos = keylist;
-       prec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+       prec = *cpos;
 
        if (!SINGL_FLD) {
                /* Key for each line is already in adjacent bytes */
                plen = prec->offset;
                for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
-                       crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+                       crec = *cpos;
                        if (crec->offset == plen
                            && memcmp(crec->data, prec->data, plen) == 0) {
                                /* Duplicate key */
@@ -130,7 +130,7 @@
        /* Key for each line is already in adjacent bytes */
        plen = prec->length;
        for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
-               crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+               crec = *cpos;
                if (crec->length == plen
                    && wt_cmp(crec->data, prec->data, plen, wts) == 0) {
                        /* Duplicate key */
diff -r 882be84ffa93 -r 8f8f3b19c6c9 usr.bin/sort/files.c
--- a/usr.bin/sort/files.c      Sat Sep 05 11:37:52 2009 +0000
+++ b/usr.bin/sort/files.c      Sat Sep 05 12:00:25 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $   */
+/*     $NetBSD: files.c,v 1.36 2009/09/05 12:00:25 dsl Exp $   */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: files.c,v 1.36 2009/09/05 12:00:25 dsl Exp $");
 __SCCSID("@(#)files.c  8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -271,7 +271,7 @@
 void
 putrec(const RECHEADER *rec, FILE *fp)
 {
-       EWRITE(rec, 1, rec->length + REC_DATA_OFFSET, fp);
+       EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->length, fp);
 }
 
 /*
@@ -289,7 +289,7 @@
 void
 putkeydump(const RECHEADER *rec, FILE *fp)
 {
-       EWRITE(rec, 1, rec->offset + REC_DATA_OFFSET, fp);
+       EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->offset, fp);
 }
 
 /*
@@ -303,15 +303,15 @@
        FILE *fp;
 
        fp = fstack[flno].fp;
-       if ((u_char *) rec > end - REC_DATA_OFFSET)
+       if ((u_char *)(rec + 1) > end)
                return (BUFFEND);
-       if (!fread(rec, 1, REC_DATA_OFFSET, fp)) {
+       if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) {
                fclose(fp);
                fstack[flno].fp = 0;
                return (EOF);
        }
        if (end - rec->data < (ptrdiff_t)rec->length) {
-               for (i = REC_DATA_OFFSET - 1; i >= 0;  i--)
+               for (i = offsetof(RECHEADER, data) - 1; i >= 0;  i--)
                        ungetc(*((char *) rec + i), fp);
                return (BUFFEND);
        }
diff -r 882be84ffa93 -r 8f8f3b19c6c9 usr.bin/sort/fsort.c
--- a/usr.bin/sort/fsort.c      Sat Sep 05 11:37:52 2009 +0000
+++ b/usr.bin/sort/fsort.c      Sat Sep 05 12:00:25 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $   */
+/*     $NetBSD: fsort.c,v 1.40 2009/09/05 12:00:25 dsl Exp $   */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -72,17 +72,13 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: fsort.c,v 1.40 2009/09/05 12:00:25 dsl Exp $");
 __SCCSID("@(#)fsort.c  8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
 #include <stdlib.h>
 #include <string.h>
 
-static const u_char **keylist = 0;
-u_char *buffer = 0;
-size_t bufsize = DEFBUFSIZE;
-
 struct tempfile fstack[MAXFCT];
 
 #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
@@ -90,21 +86,24 @@
 void
 fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
 {
-       const u_char **keypos, **keyp;
+       const RECHEADER **keylist;
+       const RECHEADER **keypos, **keyp;
+       RECHEADER *buffer;
+       size_t bufsize = DEFBUFSIZE;
        u_char *bufend;
        int mfct = 0;
        int c, nelem;
        get_func_t get;
-       struct recheader *crec;
-       u_char *nbuffer;
+       RECHEADER *crec;
+       RECHEADER *nbuffer;
        FILE *fp;
 
-       if (!buffer) {
-               buffer = malloc(bufsize);
-               keylist = malloc(MAXNUM * sizeof(u_char *));
-               memset(keylist, 0, MAXNUM * sizeof(u_char *));
-       }
-       bufend = buffer + bufsize;
+       buffer = malloc(bufsize);
+       bufend = (u_char *)buffer + bufsize;
+       /* Allocate double length keymap for radix_sort */
+       keylist = malloc(2 * MAXNUM * sizeof(*keylist));
+       if (buffer == NULL || keylist == NULL)
+               err(2, "failed to malloc initial buffer or keylist");
 
        if (SINGL_FLD)
                /* Key and data are one! */
@@ -118,7 +117,7 @@
        for (;;) {
                keypos = keylist;
                nelem = 0;
-               crec = (RECHEADER *) buffer;
+               crec = buffer;
 
                /* Loop reading records */
                for (;;) {
@@ -126,13 +125,12 @@
                        /* 'c' is 0, EOF or BUFFEND */
                        if (c == 0) {
                                /* Save start of key in input buffer */
-                               *keypos++ = crec->data;
+                               *keypos++ = crec;
                                if (++nelem == MAXNUM) {
                                        c = BUFFEND;
                                        break;
                                }
-                               crec = (RECHEADER *)((char *) crec +
-                                   SALIGN(crec->length) + REC_DATA_OFFSET);
+                               crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
                                continue;
                        }
                        if (c == EOF)
@@ -154,18 +152,18 @@
                        for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
                                *keyp = nbuffer + (*keyp - buffer);
 
-                       crec = (RECHEADER *) (nbuffer + ((u_char *)crec - buffer));
+                       crec = nbuffer + (crec - buffer);
                        buffer = nbuffer;
-                       bufend = buffer + bufsize;
+                       bufend = (u_char *)buffer + bufsize;
                }
 
                /* Sort this set of records */
                if (SINGL_FLD) {
-                       if (radix_sort(keylist, nelem, ftbl[0].weights, REC_D))
-                               err(2, "single field radix_sort");
+                       nelem = radix_sort(keylist, keylist + MAXNUM, nelem,
+                           ftbl[0].weights, REC_D);
                } else {
-                       if (radix_sort(keylist, nelem, unweighted, 0))
-                               err(2, "unweighted radix_sort");
+                       nelem = radix_sort(keylist, keylist + MAXNUM, nelem,
+                           unweighted, 0);
                }
 
                if (c == EOF && mfct == 0) {
diff -r 882be84ffa93 -r 8f8f3b19c6c9 usr.bin/sort/fsort.h
--- a/usr.bin/sort/fsort.h      Sat Sep 05 11:37:52 2009 +0000
+++ b/usr.bin/sort/fsort.h      Sat Sep 05 12:00:25 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: fsort.h,v 1.15 2009/08/20 06:36:25 dsl Exp $   */
+/*     $NetBSD: fsort.h,v 1.16 2009/09/05 12:00:25 dsl Exp $   */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -84,9 +84,6 @@
  */
 #define MERGE_FNUM     16
 
-extern u_char *buffer;
-extern size_t bufsize;
-
 /* Temporary files contian data (with record headers) in sorted order */
 struct tempfile {
        FILE *fp;
diff -r 882be84ffa93 -r 8f8f3b19c6c9 usr.bin/sort/msort.c
--- a/usr.bin/sort/msort.c      Sat Sep 05 11:37:52 2009 +0000
+++ b/usr.bin/sort/msort.c      Sat Sep 05 12:00:25 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $   */
+/*     $NetBSD: msort.c,v 1.25 2009/09/05 12:00:25 dsl Exp $   */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $");
+__RCSID("$NetBSD: msort.c,v 1.25 2009/09/05 12:00:25 dsl Exp $");
 __SCCSID("@(#)msort.c  8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -79,7 +79,7 @@
 typedef struct mfile {
        u_char *end;
        short flno;
-       struct recheader rec[1];
+       RECHEADER rec[1];
 } MFILE;
 
 static u_char *wts;
@@ -324,15 +324,14 @@
 void
 order(struct filelist *filelist, get_func_t get, struct field *ftbl)
 {
+       RECHEADER *crec, *prec, *trec;
        u_char *crec_end, *prec_end, *trec_end;
        int c;
-       RECHEADER *crec, *prec, *trec;
 
-       buffer = malloc(2 * (DEFLLEN + REC_DATA_OFFSET));
-       crec = (RECHEADER *) buffer;
-       crec_end = buffer + DEFLLEN + REC_DATA_OFFSET;
-       prec = (RECHEADER *) (buffer + DEFLLEN + REC_DATA_OFFSET);
-       prec_end = buffer + 2*(DEFLLEN + REC_DATA_OFFSET);
+       crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
+       crec_end = crec->data + DEFLLEN;
+       prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
+       prec_end = prec->data + DEFLLEN;
        wts = ftbl->weights;
 
        /* XXX this does exit(0) for overlong lines */
diff -r 882be84ffa93 -r 8f8f3b19c6c9 usr.bin/sort/radix_sort.c
--- a/usr.bin/sort/radix_sort.c Sat Sep 05 11:37:52 2009 +0000



Home | Main Index | Thread Index | Old Index