Subject: gzip (well, zcat) bzip2 decompression handler
To: None <tech-userlevel@netbsd.org>
From: Simon Burge <simonb@telstra.com.au>
List: tech-userlevel
Date: 07/03/1999 00:52:59
I've been sitting on this so long I forgot about it until I saw
Brian Grayson's message on tech-kern about adding compression and
decompression layers in a portal filesystem.

What the following patches to gzip do is to allow decompression of
bzip2 files.  I haven't handled bzip2 compression - I don't think
it's necessary, but having "zcat" handle as many compression formats
(especially ones we ship!) seems like a "Good Thing(tm)".

Anyone mind if I commit this (after I add a bit to the man page)?

Simon.
--
diff -urN /NetBSD/src/gnu/usr.bin/gzip/Makefile ./Makefile
--- /NetBSD/src/gnu/usr.bin/gzip/Makefile	Tue Feb  2 16:41:15 1999
+++ ./Makefile	Sat Jul  3 00:38:59 1999
@@ -4,7 +4,7 @@
 
 PROG=	gzip
 SRCS=	gzip.c zip.c deflate.c trees.c bits.c unzip.c inflate.c util.c \
-	crypt.c lzw.c unlzw.c unlzh.c unpack.c getopt.c
+	crypt.c lzw.c unbzip2.c unlzw.c unlzh.c unpack.c getopt.c
 MAN=	gzexe.1 gzip.1 zdiff.1 zforce.1 zgrep.1 zmore.1 znew.1
 CPPFLAGS+=-DSTDC_HEADERS=1 -DHAVE_UNISTD_H=1 -DDIRENT=1 -Dunix
 
@@ -20,8 +20,8 @@
 .endif	# m68k || i386
 
 LDSTATIC?= -static
-LDADD+=	-lgnumalloc
-DPADD+=	${LIBGNUMALLOC}
+LDADD+=	-lgnumalloc -lbz2
+DPADD+=	${LIBGNUMALLOC} ${LIBBZ2}
 
 MLINKS+= gzip.1 gunzip.1 gzip.1 gzcat.1 gzip.1 zcat.1
 MLINKS+= zdiff.1 zcmp.1
diff -urN /NetBSD/src/gnu/usr.bin/gzip/gzip.c ./gzip.c
--- /NetBSD/src/gnu/usr.bin/gzip/gzip.c	Fri Apr 10 18:24:15 1998
+++ ./gzip.c	Sun Dec  6 07:00:52 1998
@@ -1278,6 +1278,14 @@
 	/* check_zipfile may get ofname from the local header */
 	last_member = 1;
 
+    } else if (memcmp(magic, BZIP2_MAGIC, 2) == 0 && inptr == 2
+	    && memcmp((char*)inbuf, BZIP2_MAGIC, 3) == 0
+	    && (inbuf[3] >= '0' && inbuf[3] <= '9')) {
+        inptr = 0;
+	work = unbzip2;
+	method = BZIP2ED;
+	last_member = 1;
+
     } else if (memcmp(magic, PACK_MAGIC, 2) == 0) {
 	work = unpack;
 	method = PACKED;
@@ -1327,7 +1335,8 @@
         "compr",  /* 1 */
         "pack ",  /* 2 */
         "lzh  ",  /* 3 */
-        "", "", "", "", /* 4 to 7 reserved */
+        "", "", "", /* 4 to 6 reserved */
+	"bzip2",
         "defla"}; /* 8 */
     char *date;
 
diff -urN /NetBSD/src/gnu/usr.bin/gzip/gzip.h ./gzip.h
--- /NetBSD/src/gnu/usr.bin/gzip/gzip.h	Tue Jan 20 12:21:18 1998
+++ ./gzip.h	Sun Dec  6 07:34:05 1998
@@ -55,7 +55,8 @@
 #define COMPRESSED  1
 #define PACKED      2
 #define LZHED       3
-/* methods 4 to 7 reserved */
+/* methods 4 to 6 reserved */
+#define BZIP2ED     7
 #define DEFLATED    8
 #define MAX_METHODS 9
 extern int method;         /* compression method */
@@ -157,6 +158,7 @@
 #define	OLD_GZIP_MAGIC "\037\236" /* Magic header for gzip 0.5 = freeze 1.x */
 #define	LZH_MAGIC      "\037\240" /* Magic header for SCO LZH Compress files*/
 #define PKZIP_MAGIC    "\120\113\003\004" /* Magic header for pkzip files */
+#define BZIP2_MAGIC    "\102\132\150" /* Magic header for bzip2 files */
 
 /* gzip flag byte */
 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
@@ -271,6 +273,9 @@
 
 	/* in unlzh.c */
 extern int unlzh      OF((int in, int out));
+
+	/* in unbzip2.c */
+extern int unbzip2    OF((int in, int out));
 
 	/* in gzip.c */
 RETSIGTYPE abort_gzip OF((void));
diff -urN /NetBSD/src/gnu/usr.bin/gzip/unbzip2.c ./unbzip2.c
--- /NetBSD/src/gnu/usr.bin/gzip/unbzip2.c	Thu Jan  1 10:00:00 1970
+++ ./unbzip2.c	Sun Dec  6 04:07:28 1998
@@ -0,0 +1,78 @@
+/* unbzip2.c -- decompress files in bzip2 format.
+ */
+
+#ifdef RCSID
+static char rcsid[] = "$Id$";
+#endif
+
+#define BZ_NO_STDIO
+#include <bzlib.h>
+#include <stddef.h>
+
+#include "gzip.h"
+
+
+/* ============================================================================
+ * Bunzip2 in to out.
+ */
+int unbzip2(in, out) 
+    int in, out;    /* input and output file descriptors */
+{
+	int		n, ret, end_of_file;
+	bz_stream	bzs;
+
+	bzs.bzalloc = NULL;
+	bzs.bzfree = NULL;
+	bzs.opaque = NULL;
+
+	end_of_file = 0;
+	if (bzDecompressInit(&bzs, 0, 0) != BZ_OK)
+		return(ERROR);
+
+	/* Use up the remainder of "inbuf" that's been read in already */
+	bzs.next_in = inbuf;
+	bzs.avail_in = insize;
+
+	while (1) {
+		if (bzs.avail_in == 0 && !end_of_file) {
+			n = read(in, inbuf, INBUFSIZ);
+			if (n < 0)
+				read_error();
+			if (n == 0)
+				end_of_file = 1;
+			bzs.next_in = inbuf;
+			bzs.avail_in = n;
+		}
+
+		bzs.next_out = outbuf;
+		bzs.avail_out = OUTBUFSIZ;
+		ret = bzDecompress(&bzs);
+
+		if (ret == BZ_STREAM_END) {
+			n = write(out, outbuf, OUTBUFSIZ - bzs.avail_out);
+			if (n < 0)
+				write_error();
+			break;
+		}
+		else if (ret == BZ_OK) {
+			if (end_of_file)
+				read_error();
+			n = write(out, outbuf, OUTBUFSIZ - bzs.avail_out);
+		}
+		else {
+			switch (ret) {
+			  case BZ_DATA_ERROR:
+				error("bzip2 data integrity error");
+			  case BZ_DATA_ERROR_MAGIC:
+				error("bzip2 magic number error");
+			  case BZ_MEM_ERROR:
+				error("bzip2 out of memory");
+			}
+		}
+	}
+
+	if (bzDecompressEnd(&bzs) != BZ_OK)
+		return(ERROR);
+
+	return(OK);
+}