tech-userlevel: base64 support for uu{de,en}code

Subject: base64 support for uu{de,en}code
To: None <tech-userlevel@NetBSD.org>
From: Thomas Klausner <wiz@NetBSD.org>
List: tech-userlevel
Date: 06/30/2005 02:11:32
--3V7upXqbjpZ4EhLz
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hi!

Dillo and I have added base64 support to uu{de,en}code, per IEEE
Std 1003.1-2004. The diff and a tarball with regression tests are
attached. (The regression Makefile will need some more work.)
While there, we also added support for -o to uudecode and made
some minor style fixes.

Since uudecode is a host tool, we'd like to know if this breaks
any hosts, we only tested on NetBSD/i386.

Any comments before we commit?

Cheers,
 Thomas

--3V7upXqbjpZ4EhLz
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="uu..code-base64.diff"

? uudecode/uuencode.o.out
? uudecode/uuencode.out
Index: uudecode/uudecode.c
===================================================================
RCS file: /cvsroot/src/usr.bin/uudecode/uudecode.c,v
retrieving revision 1.20
diff -u -r1.20 uudecode.c
--- uudecode/uudecode.c	29 Jun 2005 20:35:32 -0000	1.20
+++ uudecode/uudecode.c	30 Jun 2005 00:03:07 -0000
@@ -62,26 +62,37 @@
 #include <string.h>
 #include <unistd.h>
 
-static int decode(void);
+static int base64_check_end(FILE *, int);
+static int decode(FILE *, char *, int);
+static int decode_base64(FILE *, FILE *);
+static int decode_uu(FILE *, FILE *);
 static void usage(void);
 int main(int, char *[]);
 
-int pflag;
+const char *base64_str="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		       "abcdefghijklmnopqrstuvwxyz0123456789+/";
+
 char *filename;
 
 int
 main(int argc, char *argv[])
 {
-	int ch, rval;
+	char *outfn;
+	int ch, rval, to_stdout;
+	FILE *fin;
 
 	setlocale(LC_ALL, "");
 	setprogname(argv[0]);
 
-	pflag = 0;
-	while ((ch = getopt(argc, argv, "p")) != -1)
+	outfn = NULL;
+	to_stdout = 0;
+	while ((ch = getopt(argc, argv, "o:p")) != -1)
 		switch (ch) {
+		case 'o':
+			outfn = optarg;
+			break;
 		case 'p':
-			pflag = 1;
+			to_stdout = 1;
 			break;
 		default:
 			usage();
@@ -89,75 +100,99 @@
 	argc -= optind;
 	argv += optind;
 
+	if (to_stdout && outfn != NULL)
+		errx(1, "only one of -o and -p may be used");
+
 	if (*argv) {
 		rval = 0;
 		do {
-			if (!freopen(filename = *argv, "r", stdin)) {
+			if ((fin=fopen(filename = *argv, "r")) == NULL) {
 				warn("%s", *argv);
 				rval = 1;
 				continue;
 			}
-			rval |= decode();
+			rval |= decode(fin, outfn, to_stdout);
+			(void)fclose(fin);
 		} while (*++argv);
 	} else {
 		filename = "stdin";
-		rval = decode();
+		rval = decode(stdin, outfn, to_stdout);
 	}
 	exit(rval);
 }
 
+/* decode single uuencoded character */
+#define	DEC(c)	(((c) - ' ') & 077)
+
 static int
-decode(void)
+decode(FILE *fin, char *outfn, int to_stdout)
 {
 	struct passwd *pw;
 	int n;
-	char ch, *p;
+	char *p;
 	int n1;
 	long mode;
 	char *fn;
-	char buf[MAXPATHLEN];
+	char buf[MAXPATHLEN+32]; /* leave space for begin and mode */
+	int base64_fmt;
+	int ret;
+	FILE *fout;
 
 	/* search for header line */
-	do {
-		if (!fgets(buf, sizeof(buf), stdin)) {
+	for (;;) {
+		if (!fgets(buf, sizeof(buf), fin)) {
 			warnx("%s: no \"begin\" line", filename);
-			return(1);
+			return 1;
+		}
+		if (strncmp(buf, "begin ", 6) == 0) {
+		    base64_fmt = 0;
+		    p = buf+6;
+		    break;
 		}
-	} while (strncmp(buf, "begin ", 6));
+		if (strncmp(buf, "begin-base64 ", 13) == 0) {
+		    base64_fmt = 1;
+		    p = buf+13;
+		    break;
+		}
+	}
         /* must be followed by an octal mode and a space */
-	mode = strtol(buf + 6, &fn, 8);
-	if (fn == (buf+6) || !isspace((unsigned char)*fn) || mode==LONG_MIN || mode==LONG_MAX)
-	{
+	mode = strtol(p, &fn, 8);
+	if (fn == p || !isspace((unsigned char)*fn)
+	    || mode<0 || mode>07777) {
 	        warnx("%s: invalid mode on \"begin\" line", filename);
-		return(1);
+		return 1;
 	}
 	/* skip whitespace for file name */
-	while (*fn && isspace((unsigned char)*fn)) fn++;
+	while (*fn && isspace((unsigned char)*fn))
+	    fn++;
 	if (*fn == 0) {
                 warnx("%s: no filename on \"begin\" line", filename);
-		return(1);
+		return 1;
 	}
 	/* zap newline */
-	for (p = fn; *p && *p != '\n'; p++) 
-	        ;
-	if (*p) *p = 0;
+	p = fn+strlen(fn)-1;
+	if (*p != '\n') {
+	    warnx("%s: filename too long", filename);
+	    return 1;
+	}
+	*p = 0;
 	
 	/* handle ~user/file format */
 	if (*fn == '~') {
 		if (!(p = strchr(fn, '/'))) {
 			warnx("%s: illegal ~user.", filename);
-			return(1);
+			return 1;
 		}
 		*p++ = '\0';
 		if (!(pw = getpwnam(fn + 1))) {
 			warnx("%s: no user %s.", filename, buf);
-			return(1);
+			return 1;
 		}
 		n = strlen(pw->pw_dir);
 		n1 = strlen(p);
 		if (n + n1 + 2 > MAXPATHLEN) {
 			warnx("%s: path too long.", filename);
-			return(1);
+			return 1;
 		}
 		/* make space at beginning of buf by moving end of pathname */
 		memmove(buf + n + 1, p, n1 + 1);
@@ -166,61 +201,192 @@
 		fn = buf;
 	}
 
+	if (outfn != NULL)
+		fn = outfn;
+
 	/* create output file, set mode */
-	if (!pflag && (!freopen(fn, "w", stdout) ||
-	    fchmod(fileno(stdout), mode & 0666))) { 
-		warn("%s: %s", fn, filename);
-		return(1);
+	if (to_stdout || strcmp(fn, "-") == 0
+	    || strcmp(fn, "/dev/stdout") == 0) {
+		to_stdout = 1;
+		fout = stdout;
+	}
+	else {
+		if ((fout=fopen(fn, "w")) == NULL
+		    || fchmod(fileno(fout), mode & 0666)) {
+			warn("%s: %s", fn, filename);
+			if (fout)
+				fclose(fout);
+			return 1;
+		}
 	}
 
+	if (base64_fmt)
+		ret = decode_base64(fin, fout);
+	else
+		ret = decode_uu(fin, fout);
+
+	if (ret == 0)
+		if (ferror(fout)) {
+			warn("%s: write error", to_stdout ? "stdout" : fn);
+			ret = 1;
+		}
+	
+	if (!to_stdout)
+		fclose(fout);
+	
+	return ret;
+}
+
+static int
+decode_uu(FILE *fin, FILE *fout)
+{
+	char ch;
+	char buf[MAXPATHLEN];
+	char *p;
+	int n;
+	
 	/* for each input line */
 	for (;;) {
-		if (!fgets(p = buf, sizeof(buf), stdin)) {
-			warnx("%s: short file.", filename);
-			return(1);
+		if (!fgets(p = buf, sizeof(buf), fin)) {
+			warnx("%s: short file", filename);
+			return 1;
 		}
-#define	DEC(c)	(((c) - ' ') & 077)		/* single character decode */
 		/*
 		 * `n' is used to avoid writing out all the characters
 		 * at the end of the file.
 		 */
 		if ((n = DEC(*p)) <= 0)
 			break;
-		for (++p; n > 0; p += 4, n -= 3)
-			if (n >= 3) {
+		if (strlen(buf) < n-1) {
+			warnx("%s: short line", filename);
+			return 1;
+		}
+		for (++p; n > 0; p += 4, n -= 3) {
+			if (n >= 1) {
 				ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4;
-				putchar(ch);
+				putc(ch, fout);
+			}
+			if (n >= 2) {
 				ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2;
-				putchar(ch);
+				putc(ch, fout);
+			}
+			if (n >= 3) {
 				ch = DEC(p[2]) << 6 | DEC(p[3]);
-				putchar(ch);
+				putc(ch, fout);
 			}
-			else {
-				if (n >= 1) {
-					ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4;
-					putchar(ch);
-				}
-				if (n >= 2) {
-					ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2;
-					putchar(ch);
-				}
-				if (n >= 3) {
-					ch = DEC(p[2]) << 6 | DEC(p[3]);
-					putchar(ch);
+		}
+	}
+	
+	if (!fgets(buf, sizeof(buf), fin) || strcmp(buf, "end\n")) {
+		warnx("%s: no \"end\" line.", filename);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int
+decode_base64(FILE *fin, FILE *fout)
+{
+	char b[4], *p;
+	int c, end, nl, n;
+
+	nl = 1;
+	end = n = 0;
+	for (;;) {
+		if ((c=getc(fin)) == EOF) {
+			warnx("%s: short file", filename);
+			return 1;
+		}
+		if (c == '=') {
+			ungetc(c, fin);
+			if (n == 0) {
+				if (!nl) {
+					warnx("%s: illegal end", filename);
+					return 1;
 				}
+				return base64_check_end(fin, n);
+			}
+			end = 1;
+		}
+		else if (c == '\n') {
+			nl = 1;
+			continue;
+		}
+		else {
+			nl = 0;
+			p = strchr(base64_str, c);
+			if (p)
+				b[n++] = p-base64_str;
+		}
+		if (end || n == 4) {
+			if (n == 1) {
+				warnx("%s: short tuple", filename);
+				return 1;
+			}
+			c = b[0]<<2 | b[1]>>4;
+			(void)putc(c, fout);
+			if (n > 2) {
+				c = b[1]<<4 | b[2]>>2;
+				(void)putc(c, fout);
+			}
+			if (n > 3) {
+				c = b[2]<<6 | b[3];
+				(void)putc(c, fout);
 			}
+			if (end)
+				return base64_check_end(fin, n);
+
+			n = 0;
+		}
 	}
-	if (!fgets(buf, sizeof(buf), stdin) || strcmp(buf, "end\n")) {
-		warnx("%s: no \"end\" line.", filename);
-		return(1);
+
+	return 0;
+}
+
+static int
+base64_check_end(FILE *fin, int n)
+{
+	char b[6];
+	int c;
+
+	if (n > 0) {
+		/* check padding */
+		if (fgets(b, 5-n, fin) == NULL
+		    || strlen(b) != 4-n
+		    || strncmp(b, "====", 4-n) != 0) {
+			warnx("%s: padding missing (%d, _%s_)", filename, n, b);
+			return 1;
+		}
+
+		/* skip to end of line */
+		while ((c=getc(fin)) != '\n') {
+			if (c == EOF) {
+				warnx("%s: short file", filename);
+				return 1;
+			}
+			else if (c == '='
+				 || strchr(base64_str, c) != NULL) {
+				warnx("%s: illegal end", filename);
+				return 1;
+			}
+		}
 	}
-	return(0);
+	if (fgets(b, 6, fin) == NULL
+	    || strlen(b) != 5
+	    || strcmp(b, "====\n") != 0) {
+		warnx("%s: illegal end", filename);
+		return 1;
+	}
+
+	return 0;
 }
 
 static void
 usage()
 {
-	(void)fprintf(stderr, "usage: %s [-p] [file ...]\n",
-		      getprogname());
+	(void)fprintf(stderr, "usage: %s [-p] [file ...]\n"
+		      "       %s [-o outfile] [file ...]\n",
+		      getprogname(), getprogname());
 	exit(1);
 }
Index: uuencode/uuencode.1
===================================================================
RCS file: /cvsroot/src/usr.bin/uuencode/uuencode.1,v
retrieving revision 1.14
diff -u -r1.14 uuencode.1
--- uuencode/uuencode.1	7 Aug 2003 11:16:58 -0000	1.14
+++ uuencode/uuencode.1	30 Jun 2005 00:03:07 -0000
@@ -29,7 +29,7 @@
 .\"
 .\"     @(#)uuencode.1	8.1 (Berkeley) 6/6/93
 .\"
-.Dd March 19, 1999
+.Dd June 30, 2005
 .Dt UUENCODE 1
 .Os
 .Sh NAME
@@ -38,10 +38,12 @@
 .Nd encode/decode a binary file
 .Sh SYNOPSIS
 .Nm
+.Op Fl m
 .Op Ar file
 .Ar name
 .Nm uudecode
 .Op Fl p
+.Op Fl o Ar outfile
 .Op Ar file ...
 .Sh DESCRIPTION
 .Nm
@@ -72,11 +74,33 @@
 The resulting file is named
 .Ar name
 and will have the mode of the original file except that setuid
-and execute bits are not retained; if the
-.Fl p
-option is specified, the data will be written to the standard output.
+and execute bits are not retained.
+.Nm uudecode
+supports the following options:
+.Bl -tag -width XoXoutfileXX
+.It Fl o Ar outfile
+The data will be written to
+.Ar outfile .
+If
+.Ar outfile
+is
+.Dq Pa \&-
+or
+.Dq Pa /dev/stdout ,
+data will be written to the standard output.
+.It Fl p
+The data will be written to the standard output.
+.El
+.Pp
 .Nm uudecode
 ignores any leading and trailing lines.
+.Pp
+.Nm uuencode
+supports one option,
+.Fl m ,
+to output
+.Em base64-encoded
+data instead of the historical encoding format.
 .Sh EXIT STATUS
 The
 .Nm uudecode
@@ -115,6 +139,19 @@
 .Nm
 utilities appeared in
 .Bx 4.0 .
-.Sh BUGS
+.Sh CAVEATS
 The encoded form of the file is expanded by 35% (3 bytes become 4 plus
 control information).
+.Pp
+If more than one file is given and the
+.Fl o
+option is used, you will still only get one output file.
+.Sh SECURITY CONSIDERATIONS
+.Nm uudecode
+uses the file name specified in the file,
+and overwrites any existing file of the same name.
+This file name can include directory components and ~user
+expressions.
+When decoding a file from an untrusted source, use the
+.Fl o
+option, or sanitize the internal file name before decoding.
Index: uuencode/uuencode.5
===================================================================
RCS file: /cvsroot/src/usr.bin/uuencode/uuencode.5,v
retrieving revision 1.9
diff -u -r1.9 uuencode.5
--- uuencode/uuencode.5	7 Aug 2003 11:16:59 -0000	1.9
+++ uuencode/uuencode.5	30 Jun 2005 00:03:07 -0000
@@ -29,7 +29,7 @@
 .\"
 .\"	@(#)uuencode.format.5	8.2 (Berkeley) 1/12/94
 .\"
-.Dd April 9, 1997
+.Dd June 30, 2005
 .Dt UUENCODE 5
 .Os
 .Sh NAME
@@ -50,12 +50,21 @@
 look like a header.
 .Pp
 The header line starts with the word
-.Dq begin ,
+.Dq begin
+or
+.Dq begin-base64 ,
 a space,
 a file mode (in octal),
 a space,
 and finally a string which names the file being encoded.
 .Pp
+If the header line starts with
+.Dq begin-base64 ,
+the data consists of base64-encoded bytes terminated by
+a line of four equal signs.
+Otherwise, the data is encoded in the format described
+below.
+.Pp
 The central engine of
 .Xr uuencode 1
 is a six-bit encoding function which outputs an
@@ -84,8 +93,8 @@
 .Pp
 The byte count is a six-bit integer encoded with the above function,
 representing the number of bytes encoded in the rest of the line.
-The method used to encode the data expands its size by
-133% (described below).
+The method used to encode the data expands its size to
+133% of the original (described below).
 Therefore it is important to note that the byte count describes the size of
 the chunk of data before it is encoded, not afterwards.
 The six bit size of this number effectively limits the number of bytes
Index: uuencode/uuencode.c
===================================================================
RCS file: /cvsroot/src/usr.bin/uuencode/uuencode.c,v
retrieving revision 1.11
diff -u -r1.11 uuencode.c
--- uuencode/uuencode.c	29 Jun 2005 20:35:48 -0000	1.11
+++ uuencode/uuencode.c	30 Jun 2005 00:03:07 -0000
@@ -59,21 +59,34 @@
 #include <unistd.h>
 
 int main(int, char *[]);
-static void encode(void);
+static void encode(int);
 static void usage(void);
 
+const char *base64_str="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		       "abcdefghijklmnopqrstuvwxyz0123456789+/";
+
 int
 main(int argc, char *argv[])
 {
 	struct stat sb;
+	int base64_fmt;
 	int mode;
+	int c;
 
+	base64_fmt = 0;
 	mode = 0;
 	setlocale(LC_ALL, "");
 	setprogname(argv[0]);
 
-	while (getopt(argc, argv, "") != -1)
+	while ((c=getopt(argc, argv, "m")) != -1) {
+	    switch (c) {
+	    case 'm':
+		base64_fmt = 1;
+		break;
+	    default:
 		usage();
+	    }
+	}
 	argv += optind;
 	argc -= optind;
 
@@ -94,47 +107,69 @@
 		usage();
 	}
 
-	(void)printf("begin %o %s\n", mode, *argv);
-	encode();
-	(void)printf("end\n");
+	if (base64_fmt)
+	    (void)fputs("begin-base64", stdout);
+	else
+	    (void)fputs("begin", stdout);
+	(void)printf(" %o %s\n", mode, *argv);
+	encode(base64_fmt);
+	if (base64_fmt)
+	    (void)puts("====");
+	else
+	    (void)puts("end");
 	if (ferror(stdout))
 		err(1, "write error");
 	exit(0);
 }
 
-/* ENC is the basic 1 character encoding function to make a char printing */
-#define	ENC(c) ((c) ? ((c) & 077) + ' ': '`')
+/* ENC converts 6 bits to a printing character */
+#define ENC(fmt64, c) ((fmt64) ? ENC64(c) : ENCUU(c))
+
+/* ENCUU converts 6 bits to a uuencoded character */
+#define	ENCUU(c) ((c) ? ((c) & 077) + ' ': '`')
+
+/* ENC64 converts 6 bits to a base64-encoded character */
+#define	ENC64(c) base64_str[((c) & 077)]
 
 /*
  * copy from in to out, encoding as you go along.
  */
 static void
-encode(void)
+encode(int base64_fmt)
 {
 	int ch, n;
 	char *p;
 	char buf[80];
 
 	while ((n = fread(buf, 1, 45, stdin)) > 0) {
-		ch = ENC(n);
-		if (putchar(ch) == EOF)
-			break;
+		/* uuencode line length character */
+		if (!base64_fmt) {
+			if (putchar(ENCUU(n)) == EOF)
+				break;
+		}
+		buf[n] = '\0';
 		for (p = buf; n > 0; n -= 3, p += 3) {
 			ch = *p >> 2;
-			ch = ENC(ch);
-			if (putchar(ch) == EOF)
+			if (putchar(ENC(base64_fmt, ch)) == EOF)
 				break;
 			ch = ((*p << 4) & 060) | ((p[1] >> 4) & 017);
-			ch = ENC(ch);
-			if (putchar(ch) == EOF)
+			if (putchar(ENC(base64_fmt, ch)) == EOF)
+				break;
+			if (n == 1) {
+				(void)fputs(base64_fmt ? "==" : "``",
+					    stdout);
 				break;
+			}
 			ch = ((p[1] << 2) & 074) | ((p[2] >> 6) & 03);
-			ch = ENC(ch);
-			if (putchar(ch) == EOF)
+			if (putchar(ENC(base64_fmt, ch)) == EOF)
 				break;
+			if (n == 2) {
+				(void)fputs(base64_fmt ? "=" : "`",
+					    stdout);
+				break;
+			}
 			ch = p[2] & 077;
-			ch = ENC(ch);
-			if (putchar(ch) == EOF)
+			if (putchar(ENC(base64_fmt, ch)) == EOF)
 				break;
 		}
 		if (putchar('\n') == EOF)
@@ -142,15 +177,16 @@
 	}
 	if (ferror(stdin))
 		err(1, "read error.");
-	ch = ENC('\0');
-	(void)putchar(ch);
-	(void)putchar('\n');
+	if (!base64_fmt) {
+		(void)putchar(ENCUU('\0'));
+		(void)putchar('\n');
+	}
 }
 
 static void
 usage(void)
 {
-	(void)fprintf(stderr, "usage: %s [infile] remotefile\n",
+	(void)fprintf(stderr, "usage: %s [-m] [infile] remotefile\n",
 		      getprogname());
 	exit(1);
 }

--3V7upXqbjpZ4EhLz
Content-Type: application/x-tar-gz
Content-Disposition: attachment; filename="uudecode-regression-tests.tar.gz"
Content-Transfer-Encoding: base64

H4sIAIw2w0IAA+2a227aMBiAuV2ewlt7MU0EfIrToSFVGq2GNjp1rDtIu+AQl2ZtARHSqWx9
1r3KQoAkBLTQQpwW/k+KYiI7B/58tvMnrmvJds+SuRTBmGPTMLw1xoRzf80I9tdTcgRTgQUj
zBzXI0RgmjPSPKkZrjNsDhDK/bJH/63XHLQv7JtU/6cscKfxL9aal/LcvkrjAjHBWEzjviz+
BmZB/Knw48+8Ug6ncC4L7Hj8j6sfjupled0f3haaThs5F73BkIRFGhaZX7yyu3Jc0LSB7Ayk
45S0wnlvgMb3DrK7aP+3v8s77dns1kJ6DxUteVN0hlbPHXo1xnVL452UCy3B79Af1L7uT7ff
IX3Fpq4rlzaVXb/pbNtsHa0Z3UO0jX69SjP/nHWtILuWd+Va1iFci8D/4BbY/DGS/I/0/5gI
4fvPhQn+KyCIfyj+xo+RHH8SxJ9i04+/wQXEXwEk6xMAMmXef5qR/zT0n+Kp/wz8VwB92tMX
YE0C/2fz+hSOkeQ/YaH/3GS+/xTmf0qw74F2r8pZXxmwCvPjP8tm/KdG6L/XF/jjvwHzfxUQ
yrghzIPXOCj9BXV3h1j+pyX45o+R6D8P8z+GmIz/jBLwXwEt2bG7eqvpSMGRFyQU5AG1sgd0
BVvO/Pw/Ff0T/aeMLPhPCAf/FbDgf/B+p/n1qv+w5fL9eo2h41FHPP+fRg+QPP6LBf85gfFf
BQv+h++BtNqp5yHIuNXE8/+Px3/4/kMFy/2nE/87PfB/y4nn/zLx31zmP+T/VLDcfzbx/3N1
dFI5oyejjlGrHN3WRqfk5Gebf7QOYJK+JcTyf66bwh2e6H/k/Z9gfJL/p/D8rwLf/1jir6HJ
rgVu7wTz+b9U9E/O/0Xe/838h/y/GkL/g8RfrSSOqg9bKq/WbA1dj2Li+b80eoDk53+y4L/3
A/xXQOh/JPH3PI8bDXBxF4jn/x6P//D9nwpi/k8Sfy/yh+/A/50gnv/LxH9zif8Evv9XQcz/
SeLPyLOXdX2Pf9Hrh9/ze/uf8nV8pr99861Q/gFzdAAAAAAAAAAAAAAAgCfFP/nsQ3gAUAAA

--3V7upXqbjpZ4EhLz--