Subject: Re: tail(1) fix for large files
To: Michael Graff <explorer@flame.org>
From: James Chacon <jchacon@genuity.net>
List: tech-userlevel
Date: 11/14/2001 14:49:36
Looks good to me

James

>
>
>Here's a patch I made to the -current version of tail(1), which
>gets around a long-standing and damned annoying problem that
>tail won't work on files greater than 2 GB, and sometimes less.
>
>The old "tail -f" and "tail -1234" formats would mmap() the file
>and search backwards.  The problem is, if the file is too big,
>the mmap() fails.
>
>The change I made was to walk backwards in 10 MB chunks, unmapping
>and remapping as needed, until the front of the file is found or
>the correct number of lines was found.
>
>I should probably walk the file again in the forward direction,
>using a mmap(), print, munmap() method, but I don't.  I let the
>caller deal with that -- it uses a rather bad loop of fgetc()
>and fputc() to print out the remainder of the file.  In practice, it
>probably doesn't matter, since 10 MB is large, and the file _is_
>output, just less efficiently than it could be.
>
>If I can get a few others to sanity check these diffs, I'll commit
>them ASAP.  I did them on little sleep, so I could miss some sort of
>edge condition (off by one will get me every damned time.)
>
>--Michael
>
>Index: forward.c
>===================================================================
>RCS file: /cvsroot/basesrc/usr.bin/tail/forward.c,v
>retrieving revision 1.16
>diff -u -r1.16 forward.c
>--- forward.c	1999/07/21 06:38:49	1.16
>+++ forward.c	2001/11/13 01:02:07
>@@ -249,40 +249,79 @@
> 	long off;
> 	struct stat *sbp;
> {
>-	off_t size;
>+	off_t file_size;
>+	off_t file_remaining;
> 	char *p;
> 	char *start;
>+	off_t mmap_size;
>+	off_t mmap_offset;
>+	off_t mmap_remaining;
> 
>-	if (!(size = sbp->st_size))
>+#define MMAP_MAXSIZE  (10 * 1024 * 1024)
>+
>+	if (!(file_size = sbp->st_size))
> 		return (0);
>+	file_remaining = file_size;
> 
>-	if (size > SIZE_T_MAX) {
>-		err(0, "%s: %s", fname, strerror(EFBIG));
>-		return (1);
>+	if (mmap_size > MMAP_MAXSIZE) {
>+		mmap_size = MMAP_MAXSIZE;
>+		mmap_offset = file_size - MMAP_MAXSIZE;
>+	} else {
>+		mmap_size = file_size;
>+		mmap_offset = 0;
> 	}
> 
>-	if ((start = mmap(NULL, (size_t)size, PROT_READ,
>-	    MAP_FILE|MAP_SHARED, fileno(fp), (off_t)0)) == (caddr_t)-1) {
>-		err(0, "%s: %s", fname, strerror(EFBIG));
>-		return (1);
>-	}
>+	while (off) {
>+		start = mmap(NULL, (size_t)mmap_size, PROT_READ,
>+			     MAP_FILE|MAP_SHARED, fileno(fp), mmap_offset);
>+		if (start == MAP_FAILED) {
>+			err(0, "%s: %s", fname, strerror(EFBIG));
>+			return (1);
>+		}
>+
>+		mmap_remaining = mmap_size;
>+		/* Last char is special, ignore whether newline or not. */
>+		for (p = start + mmap_remaining - 1 ; --mmap_remaining ; )
>+			if (*--p == '\n' && !--off) {
>+				++p;
>+				break;
>+			}
> 
>-	/* Last char is special, ignore whether newline or not. */
>-	for (p = start + size - 1; --size;)
>-		if (*--p == '\n' && !--off) {
>-			++p;
>+		file_remaining -= mmap_size - mmap_remaining;
>+
>+		if (off == 0)
> 			break;
>+
>+		if (munmap(start, mmap_size)) {
>+			err(0, "%s: %s", fname, strerror(errno));
>+			return (1);
> 		}
> 
>-	/* Set the file pointer to reflect the length displayed. */
>-	size = sbp->st_size - size;
>-	WR(p, size);
>-	if (fseek(fp, (long)sbp->st_size, SEEK_SET) == -1) {
>-		ierr();
>-		return (1);
>+		if (mmap_offset >= MMAP_MAXSIZE) {
>+			mmap_offset -= MMAP_MAXSIZE;
>+		} else {
>+			mmap_offset = 0;
>+			mmap_size = file_remaining;
>+		}
> 	}
>-	if (munmap(start, (size_t)sbp->st_size)) {
>+
>+	/*
>+	 * Output the (perhaps partial) data in this mmap'd block.
>+	 */
>+	WR(p, mmap_size - mmap_remaining);
>+	file_remaining += mmap_size - mmap_remaining;
>+	if (munmap(start, mmap_size)) {
> 		err(0, "%s: %s", fname, strerror(errno));
>+		return (1);
>+	}
>+
>+	/*
>+	 * Set the file pointer to reflect the length displayed.
>+	 * This will cause the caller to redisplay the data if/when
>+	 * needed.
>+	 */
>+	if (fseeko(fp, file_remaining, SEEK_SET) == -1) {
>+		ierr();
> 		return (1);
> 	}
> 	return (0);
>
>
>
>