Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/sort Include a local copy of the sradixsort() code f...



details:   https://anonhg.NetBSD.org/src/rev/8283b679aee3
branches:  trunk
changeset: 747188:8283b679aee3
user:      dsl <dsl%NetBSD.org@localhost>
date:      Sat Sep 05 09:16:18 2009 +0000

description:
Include a local copy of the sradixsort() code from libc.
Currently unchanged apart from the deletion of the 'unstable' version and
other unneeded code.
Use fldtab[0]. not fldtab-> when we are referring to the global info
in the 0th entry to emphasise that this entry is different.
fldtab[0].weights is only needed in the SINGL_FLD case - so set it there.
Re-indent a big 'if' is setfield() so that the line breaks match the
logic - which looks dubious now!

diffstat:

 usr.bin/sort/Makefile     |    3 +-
 usr.bin/sort/init.c       |   16 ++-
 usr.bin/sort/radix_sort.c |  182 ++++++++++++++++++++++++++++++++++++++++++++++
 usr.bin/sort/sort.c       |   52 +++++++------
 usr.bin/sort/sort.h       |    6 +-
 5 files changed, 224 insertions(+), 35 deletions(-)

diffs (truncated from 405 to 300 lines):

diff -r f105068353ef -r 8283b679aee3 usr.bin/sort/Makefile
--- a/usr.bin/sort/Makefile     Sat Sep 05 08:55:40 2009 +0000
+++ b/usr.bin/sort/Makefile     Sat Sep 05 09:16:18 2009 +0000
@@ -1,7 +1,8 @@
-#      $NetBSD: Makefile,v 1.6 2009/04/14 22:15:26 lukem Exp $
+#      $NetBSD: Makefile,v 1.7 2009/09/05 09:16:18 dsl Exp $
 #      from: @(#)Makefile      8.1 (Berkeley) 6/6/93
 
 PROG=  sort
 SRCS=  append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
+SRCS+= radix_sort.c
 
 .include <bsd.prog.mk>
diff -r f105068353ef -r 8283b679aee3 usr.bin/sort/init.c
--- a/usr.bin/sort/init.c       Sat Sep 05 08:55:40 2009 +0000
+++ b/usr.bin/sort/init.c       Sat Sep 05 09:16:18 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: init.c,v 1.21 2009/08/22 21:50:32 dsl Exp $    */
+/*     $NetBSD: init.c,v 1.22 2009/09/05 09:16:18 dsl Exp $    */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
 #include "sort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: init.c,v 1.21 2009/08/22 21:50:32 dsl Exp $");
+__RCSID("$NetBSD: init.c,v 1.22 2009/09/05 09:16:18 dsl Exp $");
 __SCCSID("@(#)init.c   8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -210,10 +210,12 @@
        if (!cur_fld->tcol.indent)      /* BT has no meaning at end of field */
                cur_fld->flags &= ~BT;
 
-       if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
-           && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
-           && cur_fld->tcol.indent != 0 /* == 0 -> end of field, i.e. okay */
-           && cur_fld->tcol.indent < cur_fld->icol.indent))
+       if (cur_fld->tcol.num
+           && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
+           && (cur_fld->tcol.num <= cur_fld->icol.num
+                   /* indent if 0 -> end of field, i.e. okay */
+                   && cur_fld->tcol.indent != 0
+                   && cur_fld->tcol.indent < cur_fld->icol.indent))
                errx(2, "fields out of order");
        insertcol(cur_fld);
        return (cur_fld->tcol.num);
@@ -333,7 +335,7 @@
  * and -d (only sort blank and alphanumerics).
  */
 void
-settables(int gflags)
+settables(void)
 {
        int i;
        int next_weight = SINGL_FLD ? 1 : 2;
diff -r f105068353ef -r 8283b679aee3 usr.bin/sort/radix_sort.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.bin/sort/radix_sort.c Sat Sep 05 09:16:18 2009 +0000
@@ -0,0 +1,182 @@
+/*     $NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $       */
+
+/*-
+ * Copyright (c) 1990, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy and by Dan Bernstein at New York University, 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)radixsort.c        8.2 (Berkeley) 4/28/95";
+#else
+__RCSID("$NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $");
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * 'stable' radix sort initially from libc/stdlib/radixsort.c
+ */
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <errno.h>
+#include "sort.h"
+
+typedef struct {
+       const u_char **sa;
+       int sn, si;
+} stack;
+
+static inline void simplesort(const u_char **, int, int, const u_char *, u_int);
+static void r_sort_b(const u_char **,
+           const u_char **, int, int, const u_char *, u_int);
+
+#define        THRESHOLD       20              /* Divert to simplesort(). */
+#define        SIZE            512             /* Default stack size. */
+
+int
+radix_sort(const u_char **a, int n, const u_char *tab, u_int endch)
+{
+       const u_char **ta;
+
+       endch = tab[endch];
+       if (n < THRESHOLD)
+               simplesort(a, n, 0, tab, endch);
+       else {
+               if ((ta = malloc(n * sizeof(a))) == NULL)
+                       return (-1);
+               r_sort_b(a, ta, n, 0, tab, endch);
+               free(ta);
+       }
+       return (0);
+}
+
+#define empty(s)       (s >= sp)
+#define pop(a, n, i)   a = (--sp)->sa, n = sp->sn, i = sp->si
+#define push(a, n, i)  sp->sa = a, sp->sn = n, (sp++)->si = i
+#define swap(a, b, t)  t = a, a = b, b = t
+
+/* Stable sort, requiring additional memory. */
+static void
+r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr,
+    u_int endch)
+{
+       static u_int count[256], nc, bmin;
+       u_int c;
+       const u_char **ak, **ai;
+       stack s[512], *sp, *sp0, *sp1, temp;
+       const u_char **top[256];
+       u_int *cp, bigc;
+
+       _DIAGASSERT(a != NULL);
+       _DIAGASSERT(ta != NULL);
+       _DIAGASSERT(tr != NULL);
+
+       sp = s;
+       push(a, n, i);
+       while (!empty(s)) {
+               pop(a, n, i);
+               if (n < THRESHOLD) {
+                       simplesort(a, n, i, tr, endch);
+                       continue;
+               }
+
+               if (nc == 0) {
+                       bmin = 255;
+                       for (ak = a + n; --ak >= a;) {
+                               c = tr[(*ak)[i]];
+                               if (++count[c] == 1 && c != endch) {
+                                       if (c < bmin)
+                                               bmin = c;
+                                       nc++;
+                               }
+                       }
+                       if (sp + nc > s + SIZE) {
+                               r_sort_b(a, ta, n, i, tr, endch);
+                               continue;
+                       }
+               }
+
+               sp0 = sp1 = sp;
+               bigc = 2;
+               if (endch == 0) {
+                       top[0] = ak = a + count[0];
+                       count[0] = 0;
+               } else {
+                       ak = a;
+                       top[255] = a + n;
+                       count[255] = 0;
+               }
+               for (cp = count + bmin; nc > 0; cp++) {
+                       while (*cp == 0)
+                               cp++;
+                       if ((c = *cp) > 1) {
+                               if (c > bigc) {
+                                       bigc = c;
+                                       sp1 = sp;
+                               }
+                               push(ak, c, i+1);
+                       }
+                       top[cp-count] = ak += c;
+                       *cp = 0;                        /* Reset count[]. */
+                       nc--;
+               }
+               swap(*sp0, *sp1, temp);
+
+               for (ak = ta + n, ai = a+n; ak > ta;)   /* Copy to temp. */
+                       *--ak = *--ai;
+               for (ak = ta+n; --ak >= ta;)            /* Deal to piles. */
+                       *--top[tr[(*ak)[i]]] = *ak;
+       }
+}
+
+/* insertion sort */
+static inline void
+simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch)
+{
+       u_char ch;
+       const u_char  **ak, **ai, *s, *t;
+
+       _DIAGASSERT(a != NULL);
+       _DIAGASSERT(tr != NULL);
+
+       for (ak = a+1; --n >= 1; ak++)
+               for (ai = ak; ai > a; ai--) {
+                       for (s = ai[0] + b, t = ai[-1] + b;
+                           (ch = tr[*s]) != endch; s++, t++)
+                               if (ch != tr[*t])
+                                       break;
+                       if (ch >= tr[*t])
+                               break;
+                       swap(ai[0], ai[-1], s);
+               }
+}
diff -r f105068353ef -r 8283b679aee3 usr.bin/sort/sort.c
--- a/usr.bin/sort/sort.c       Sat Sep 05 08:55:40 2009 +0000
+++ b/usr.bin/sort/sort.c       Sat Sep 05 09:16:18 2009 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: sort.c,v 1.53 2009/08/22 21:43:53 dsl Exp $    */
+/*     $NetBSD: sort.c,v 1.54 2009/09/05 09:16:18 dsl Exp $    */
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
 #endif /* not lint */
 
 #ifndef lint
-__RCSID("$NetBSD: sort.c,v 1.53 2009/08/22 21:43:53 dsl Exp $");
+__RCSID("$NetBSD: sort.c,v 1.54 2009/09/05 09:16:18 dsl Exp $");
 __SCCSID("@(#)sort.c   8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -103,11 +103,6 @@
 u_char unweighted[NBINS];
 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
 
-/*
- * Default to stable sort.
- */
-int (*radix_sort)(const u_char **, int, const u_char *, u_int) = sradixsort;
-
 unsigned int debug_flags = 0;
 
 static char toutpath[MAXPATHLEN];
@@ -124,11 +119,11 @@
 main(int argc, char *argv[])
 {
        get_func_t get;
-       int ch, i, stdinflag = 0, tmp = 0;
+       int ch, i, stdinflag = 0;
        char cflag = 0, mflag = 0;
        char *outfile, *outpath = 0;
        struct field *fldtab, *p;
-       size_t fldtab_sz = 3, fidx = 0;
+       size_t fldtab_sz, fld_cnt;
        struct filelist filelist;
        int num_input_files;
        FILE *outfp = NULL;
@@ -147,6 +142,9 @@
        d_mask[REC_D = '\n'] = REC_D_F;
        d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
 
+       /* fldtab[0] is the global options. */
+       fldtab_sz = 3;
+       fld_cnt = 0;
        fldtab = malloc(fldtab_sz * sizeof(*fldtab));
        memset(fldtab, 0, fldtab_sz * sizeof(*fldtab));
 
@@ -159,7 +157,7 @@
        while ((ch = getopt(argc, argv, "bcdD:fik:mHno:rR:sSt:T:ux")) != -1) {
                switch (ch) {



Home | Main Index | Thread Index | Old Index