Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/usr.bin/sort Add some comments and clarifications to this in...
details: https://anonhg.NetBSD.org/src/rev/fb4e5171de43
branches: trunk
changeset: 746886:fb4e5171de43
user: dsl <dsl%NetBSD.org@localhost>
date: Sat Aug 22 15:16:50 2009 +0000
description:
Add some comments and clarifications to this inpeneterable code.
When merging ensure we accurable sort records with identical keys by
file-number, otherwise a 'stable' sort won't be!
diffstat:
usr.bin/sort/msort.c | 179 +++++++++++++++++++++++++++-----------------------
1 files changed, 97 insertions(+), 82 deletions(-)
diffs (237 lines):
diff -r 57c4ba7608d3 -r fb4e5171de43 usr.bin/sort/msort.c
--- a/usr.bin/sort/msort.c Sat Aug 22 10:53:28 2009 +0000
+++ b/usr.bin/sort/msort.c Sat Aug 22 15:16:50 2009 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: msort.c,v 1.23 2009/08/22 10:53:28 dsl Exp $ */
+/* $NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
#include "fsort.h"
#ifndef lint
-__RCSID("$NetBSD: msort.c,v 1.23 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $");
__SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
@@ -139,7 +139,7 @@
{
int c, i, j, nf = nfiles;
struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
- size_t availsz = bufsize;
+ size_t availsz;
static void *bufs[MERGE_FNUM + 1];
static size_t bufs_sz[MERGE_FNUM + 1];
@@ -191,47 +191,65 @@
}
}
+ /*
+ * We now loop reading a new record from the file with the
+ * 'sorted first' existing record.
+ * As each record is added, the 'first' record is written to the
+ * output file - maintaining one record from each file in the sorted
+ * list.
+ */
cfile = (struct mfile *) bufs[nf];
- cfile->flno = flist[0]->flno;
cfile->end = (u_char *) cfile + bufs_sz[nf];
- while (nfiles) {
- for (c = 1; c == 1;) {
- c = get(cfile->flno, 0, NULL, nfiles, cfile->rec,
- cfile->end, ftbl);
- if (c == EOF) {
- put(flist[0]->rec, outfp);
- if (--nfiles > 0) {
- flist++;
- cfile->flno = flist[0]->flno;
+ cfile->flno = flist[0]->flno;
+ for (;;) {
+ c = get(cfile->flno, 0, NULL, nfiles, cfile->rec,
+ cfile->end, ftbl);
+ if (c == EOF) {
+ /* Write out last record from now-empty input */
+ put(flist[0]->rec, outfp);
+ if (--nfiles == 0)
+ break;
+ /* Replace from file with now-first sorted record. */
+ /* (Moving base 'flist' saves copying everything!) */
+ flist++;
+ cfile->flno = flist[0]->flno;
+ continue;
+ }
+ if (c == BUFFEND) {
+ /* Buffer not large enough - double in size */
+ char *oldbuf = (char *) cfile;
+ availsz = (char *) cfile->end - oldbuf;
+ availsz *= 2;
+ cfile = realloc(oldbuf, availsz);
+ if (!cfile)
+ err(2, "merge: realloc");
+ cfile->end = (u_char *)cfile + availsz;
+
+ /* Update pointers we'll use for next merge */
+ for (i = 0; i < nf + 1; i++) {
+ if (bufs[i] == oldbuf) {
+ bufs[i] = (char *)cfile;
+ bufs_sz[i] = availsz;
+ break;
}
- break;
}
- if (c == BUFFEND) {
- char *oldbuf = (char *) cfile;
- availsz = (char *) cfile->end - oldbuf;
- availsz *= 2;
- cfile = realloc(oldbuf, availsz);
- if (!cfile)
- err(2, "merge: realloc");
+ /* Read again from same file into same buffer */
+ continue;
+ }
+
+ /* Add into sort, removing the original first entry */
+ c = insert(flist, &cfile, nfiles, DELETE);
- for (i = 0; i < nf + 1; i++) {
- if (bufs[i] == oldbuf) {
- bufs[i] = (char *)cfile;
- bufs_sz[i] = availsz;
- break;
- }
- }
-
- cfile->end = (u_char *)cfile + availsz;
- c = 1;
- continue;
- }
-
- c = insert(flist, &cfile, nfiles, DELETE);
- if (c == 0)
- put(cfile->rec, outfp);
- }
- }
+ /*
+ * 'cfile' is now the buffer from the old record from the
+ * file we just read, but with the file number of the
+ * current 'first record.
+ * (Unless we are rejecting a duplicate, when c == 1 and
+ * it is unchanged!)
+ */
+ if (c == 0)
+ put(cfile->rec, outfp);
+ }
}
/*
@@ -240,67 +258,64 @@
*/
static int
insert(struct mfile **flist, struct mfile **rec, int ttop, int delete)
- /* delete, ttop: delete = 0 or 1 */
{
struct mfile *tmprec = *rec;
int mid, top = ttop, bot = 0, cmpv = 1;
for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
cmpv = cmp(tmprec->rec, flist[mid]->rec);
+ if (cmpv == 0 ) {
+ if (UNIQUE)
+ /* Duplicate key, read another record */
+ return 1;
+ /*
+ * Apply sort by fileno, to give priority to earlier
+ * specified files, hence providing a stable sort.
+ * We could truncate the sort is the fileno are
+ * adjacent - but that is all too hard!
+ * The fileno cannot be equal, since we only have one
+ * record from each file (+ flist[0] which never
+ * comes here).
+ */
+ cmpv = tmprec->flno - flist[mid]->flno;
+ }
if (cmpv < 0)
top = mid;
- else if (cmpv > 0)
+ else
bot = mid;
- else {
- if (UNIQUE)
- break;
-
- /*
- * Apply sort by fileno, to give priority
- * to earlier specified files, hence providing
- * more stable sort.
- * If fileno is same, the new record should
- * be put _after_ the previous entry.
- */
- /* XXX (dsl) this doesn't seem right */
- cmpv = tmprec->flno - flist[mid]->flno;
- if (cmpv >= 0)
- bot = mid;
- else
- bot = mid - 1;
-
- break;
- }
}
+ /* At this point we haven't yet compared against flist[0] */
+
if (delete) {
- if (UNIQUE) {
- if (!bot && cmpv)
- cmpv = cmp(tmprec->rec, flist[0]->rec);
- if (!cmpv)
- return (1);
- }
+ /* flist[0] came from the same file, it cannot be earlier. */
+ if (UNIQUE && bot == 0 && cmp(tmprec->rec, flist[0]->rec) == 0)
+ /* Duplicate record (key) in original file */
+ return 1;
tmprec = flist[0];
if (bot)
memmove(flist, flist + 1, bot * sizeof(MFILE **));
flist[bot] = *rec;
+ tmprec->flno = flist[0]->flno;
*rec = tmprec;
- (*rec)->flno = flist[0]->flno;
- return (0);
- } else {
- if (!bot && !(UNIQUE && !cmpv)) {
- cmpv = cmp(tmprec->rec, flist[0]->rec);
- if (cmpv < 0)
- bot = -1;
- }
- if (UNIQUE && !cmpv)
- return (1);
- bot++;
- memmove(flist + bot + 1, flist + bot,
- (ttop - bot) * sizeof(MFILE **));
- flist[bot] = *rec;
- return (0);
+ return 0;
}
+
+ /* Inserting original set of records */
+
+ if (bot == 0 && cmpv != 0) {
+ /* Doesn't match flist[1], must compare with flist[0] */
+ cmpv = cmp(tmprec->rec, flist[0]->rec);
+ if (cmpv == 0 && UNIQUE)
+ return 1;
+ /* Add matching keys in file order (ie new is later) */
+ if (cmpv < 0)
+ bot = -1;
+ }
+ bot++;
+ memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE **));
+ flist[bot] = *rec;
+ return 0;
}
/*
Home |
Main Index |
Thread Index |
Old Index