Subject: Re: base64 support for uu{de,en}code
To: Klaus Klein <kleink@mibh.de>
From: Dieter Baron <dillo@danbala.tuwien.ac.at>
List: tech-userlevel
Date: 07/07/2005 16:16:26
--EDJsL2R9iCFAt7IV
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

hi,

> The removal of optimization from the decode_uu() loop isn't a style
> fix, but the compiler should handle that.

  It reduces code duplication.  And compared to the I/O overhead these
two compares shouldn't make a difference.

> There should be no magical treatment of the "/dev/stdout" pathname in
> decode(), nor should that be part of the manual page. 

  We removed it from the manual page.  In the code, however, it is
needed since chmod of /dev/stdout might fail.  And chmod is required
if outputting to a file, since the mode is specified in the input
file.

> On a related
> note, given the availability of the -p flag since 1999 you should
> reconsider whether there's an obvious need to add the ambiguity of
> a "-" option argument meaning stdout, which also isn't supported for
> the input file operand.

  True, "-" for stdout should only be supported for the filename
contained in the input file, not the option argument.  We changed it
accordingly.

> There are several KNF problems with regard to the use of binary
> operators in the changes.

  Fixed.

  Thanks for the comments, new patch attached.

						yours,
						dillo

--EDJsL2R9iCFAt7IV
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="uu..code-base64v2.diff"

? uudecode/uuencode.o.out
? uudecode/uuencode.out
Index: uudecode/uudecode.c
===================================================================
RCS file: /cvsroot/src/usr.bin/uudecode/uudecode.c,v
retrieving revision 1.20
diff -u -r1.20 uudecode.c
--- uudecode/uudecode.c	29 Jun 2005 20:35:32 -0000	1.20
+++ uudecode/uudecode.c	3 Jul 2005 13:43:15 -0000
@@ -62,26 +62,37 @@
 #include <string.h>
 #include <unistd.h>
 
-static int decode(void);
+static int base64_check_end(FILE *, int);
+static int decode(FILE *, char *, int);
+static int decode_base64(FILE *, FILE *);
+static int decode_uu(FILE *, FILE *);
 static void usage(void);
 int main(int, char *[]);
 
-int pflag;
+const char *base64_str="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		       "abcdefghijklmnopqrstuvwxyz0123456789+/";
+
 char *filename;
 
 int
 main(int argc, char *argv[])
 {
-	int ch, rval;
+	char *outfn;
+	int ch, rval, to_stdout;
+	FILE *fin;
 
 	setlocale(LC_ALL, "");
 	setprogname(argv[0]);
 
-	pflag = 0;
-	while ((ch = getopt(argc, argv, "p")) != -1)
+	outfn = NULL;
+	to_stdout = 0;
+	while ((ch = getopt(argc, argv, "o:p")) != -1)
 		switch (ch) {
+		case 'o':
+			outfn = optarg;
+			break;
 		case 'p':
-			pflag = 1;
+			to_stdout = 1;
 			break;
 		default:
 			usage();
@@ -89,75 +100,99 @@
 	argc -= optind;
 	argv += optind;
 
+	if (to_stdout && outfn != NULL)
+		errx(1, "only one of -o and -p may be used");
+
 	if (*argv) {
 		rval = 0;
 		do {
-			if (!freopen(filename = *argv, "r", stdin)) {
+			if ((fin=fopen(filename = *argv, "r")) == NULL) {
 				warn("%s", *argv);
 				rval = 1;
 				continue;
 			}
-			rval |= decode();
+			rval |= decode(fin, outfn, to_stdout);
+			(void)fclose(fin);
 		} while (*++argv);
 	} else {
 		filename = "stdin";
-		rval = decode();
+		rval = decode(stdin, outfn, to_stdout);
 	}
 	exit(rval);
 }
 
+/* decode single uuencoded character */
+#define	DEC(c)	(((c) - ' ') & 077)
+
 static int
-decode(void)
+decode(FILE *fin, char *outfn, int to_stdout)
 {
 	struct passwd *pw;
 	int n;
-	char ch, *p;
+	char *p;
 	int n1;
 	long mode;
 	char *fn;
-	char buf[MAXPATHLEN];
+	char buf[MAXPATHLEN+32]; /* leave space for begin and mode */
+	int base64_fmt;
+	int ret;
+	FILE *fout;
 
 	/* search for header line */
-	do {
-		if (!fgets(buf, sizeof(buf), stdin)) {
+	for (;;) {
+		if (!fgets(buf, sizeof(buf), fin)) {
 			warnx("%s: no \"begin\" line", filename);
-			return(1);
+			return 1;
+		}
+		if (strncmp(buf, "begin ", 6) == 0) {
+		    base64_fmt = 0;
+		    p = buf+6;
+		    break;
 		}
-	} while (strncmp(buf, "begin ", 6));
+		if (strncmp(buf, "begin-base64 ", 13) == 0) {
+		    base64_fmt = 1;
+		    p = buf+13;
+		    break;
+		}
+	}
         /* must be followed by an octal mode and a space */
-	mode = strtol(buf + 6, &fn, 8);
-	if (fn == (buf+6) || !isspace((unsigned char)*fn) || mode==LONG_MIN || mode==LONG_MAX)
-	{
+	mode = strtol(p, &fn, 8);
+	if (fn == p || !isspace((unsigned char)*fn) ||
+	    mode<0 || mode>07777) {
 	        warnx("%s: invalid mode on \"begin\" line", filename);
-		return(1);
+		return 1;
 	}
 	/* skip whitespace for file name */
-	while (*fn && isspace((unsigned char)*fn)) fn++;
+	while (*fn && isspace((unsigned char)*fn))
+	    fn++;
 	if (*fn == 0) {
                 warnx("%s: no filename on \"begin\" line", filename);
-		return(1);
+		return 1;
 	}
 	/* zap newline */
-	for (p = fn; *p && *p != '\n'; p++) 
-	        ;
-	if (*p) *p = 0;
+	p = fn+strlen(fn)-1;
+	if (*p != '\n') {
+	    warnx("%s: filename too long", filename);
+	    return 1;
+	}
+	*p = 0;
 	
 	/* handle ~user/file format */
 	if (*fn == '~') {
 		if (!(p = strchr(fn, '/'))) {
 			warnx("%s: illegal ~user.", filename);
-			return(1);
+			return 1;
 		}
 		*p++ = '\0';
 		if (!(pw = getpwnam(fn + 1))) {
 			warnx("%s: no user %s.", filename, buf);
-			return(1);
+			return 1;
 		}
 		n = strlen(pw->pw_dir);
 		n1 = strlen(p);
 		if (n + n1 + 2 > MAXPATHLEN) {
 			warnx("%s: path too long.", filename);
-			return(1);
+			return 1;
 		}
 		/* make space at beginning of buf by moving end of pathname */
 		memmove(buf + n + 1, p, n1 + 1);
@@ -166,61 +201,189 @@
 		fn = buf;
 	}
 
+	if (outfn != NULL)
+		fn = outfn;
+
 	/* create output file, set mode */
-	if (!pflag && (!freopen(fn, "w", stdout) ||
-	    fchmod(fileno(stdout), mode & 0666))) { 
-		warn("%s: %s", fn, filename);
-		return(1);
+	if (to_stdout || (strcmp(fn, "/dev/stdout") == 0) ||
+	    ((outfn == NULL) && (strcmp(fn, "-") == 0))) {
+		to_stdout = 1;
+		fout = stdout;
+	}
+	else {
+		if ((fout=fopen(fn, "w")) == NULL ||
+		    fchmod(fileno(fout), mode & 0666)) {
+			warn("%s: %s", fn, filename);
+			if (fout)
+				fclose(fout);
+			return 1;
+		}
 	}
 
+	if (base64_fmt)
+		ret = decode_base64(fin, fout);
+	else
+		ret = decode_uu(fin, fout);
+
+	if (ret == 0)
+		if (ferror(fout)) {
+			warn("%s: write error", to_stdout ? "stdout" : fn);
+			ret = 1;
+		}
+	
+	if (!to_stdout)
+		fclose(fout);
+	
+	return ret;
+}
+
+static int
+decode_uu(FILE *fin, FILE *fout)
+{
+	char ch;
+	char buf[MAXPATHLEN];
+	char *p;
+	int n;
+	
 	/* for each input line */
 	for (;;) {
-		if (!fgets(p = buf, sizeof(buf), stdin)) {
-			warnx("%s: short file.", filename);
-			return(1);
+		if (!fgets(p = buf, sizeof(buf), fin)) {
+			warnx("%s: short file", filename);
+			return 1;
 		}
-#define	DEC(c)	(((c) - ' ') & 077)		/* single character decode */
 		/*
 		 * `n' is used to avoid writing out all the characters
 		 * at the end of the file.
 		 */
 		if ((n = DEC(*p)) <= 0)
 			break;
-		for (++p; n > 0; p += 4, n -= 3)
-			if (n >= 3) {
+		if (strlen(buf) < n-1) {
+			warnx("%s: short line", filename);
+			return 1;
+		}
+		for (++p; n > 0; p += 4, n -= 3) {
+			if (n >= 1) {
 				ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4;
-				putchar(ch);
+				putc(ch, fout);
+			}
+			if (n >= 2) {
 				ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2;
-				putchar(ch);
+				putc(ch, fout);
+			}
+			if (n >= 3) {
 				ch = DEC(p[2]) << 6 | DEC(p[3]);
-				putchar(ch);
+				putc(ch, fout);
 			}
-			else {
-				if (n >= 1) {
-					ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4;
-					putchar(ch);
-				}
-				if (n >= 2) {
-					ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2;
-					putchar(ch);
-				}
-				if (n >= 3) {
-					ch = DEC(p[2]) << 6 | DEC(p[3]);
-					putchar(ch);
+		}
+	}
+	
+	if (!fgets(buf, sizeof(buf), fin) || strcmp(buf, "end\n")) {
+		warnx("%s: no \"end\" line.", filename);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int
+decode_base64(FILE *fin, FILE *fout)
+{
+	char b[4], *p;
+	int c, end, nl, n;
+
+	nl = 1;
+	end = n = 0;
+	for (;;) {
+		if ((c=getc(fin)) == EOF) {
+			warnx("%s: short file", filename);
+			return 1;
+		}
+		if (c == '=') {
+			ungetc(c, fin);
+			if (n == 0) {
+				if (!nl) {
+					warnx("%s: illegal end", filename);
+					return 1;
 				}
+				return base64_check_end(fin, n);
+			}
+			end = 1;
+		}
+		else if (c == '\n') {
+			nl = 1;
+			continue;
+		}
+		else {
+			nl = 0;
+			p = strchr(base64_str, c);
+			if (p)
+				b[n++] = p-base64_str;
+		}
+		if (end || n == 4) {
+			if (n == 1) {
+				warnx("%s: short tuple", filename);
+				return 1;
+			}
+			c = b[0]<<2 | b[1]>>4;
+			(void)putc(c, fout);
+			if (n > 2) {
+				c = b[1]<<4 | b[2]>>2;
+				(void)putc(c, fout);
+			}
+			if (n > 3) {
+				c = b[2]<<6 | b[3];
+				(void)putc(c, fout);
 			}
+			if (end)
+				return base64_check_end(fin, n);
+
+			n = 0;
+		}
 	}
-	if (!fgets(buf, sizeof(buf), stdin) || strcmp(buf, "end\n")) {
-		warnx("%s: no \"end\" line.", filename);
-		return(1);
+
+	return 0;
+}
+
+static int
+base64_check_end(FILE *fin, int n)
+{
+	char b[6];
+	int c;
+
+	if (n > 0) {
+		/* check padding */
+		if (fgets(b, 5-n, fin) == NULL || strlen(b) != 4-n ||
+		    strncmp(b, "====", 4-n) != 0) {
+			warnx("%s: padding missing (%d, _%s_)", filename, n, b);
+			return 1;
+		}
+
+		/* skip to end of line */
+		while ((c=getc(fin)) != '\n') {
+			if (c == EOF) {
+				warnx("%s: short file", filename);
+				return 1;
+			}
+			else if (c == '=' || strchr(base64_str, c) != NULL) {
+				warnx("%s: illegal end", filename);
+				return 1;
+			}
+		}
 	}
-	return(0);
+	if (fgets(b, 6, fin) == NULL || strlen(b) != 5
+	    || strcmp(b, "====\n") != 0) {
+		warnx("%s: illegal end", filename);
+		return 1;
+	}
+
+	return 0;
 }
 
 static void
 usage()
 {
-	(void)fprintf(stderr, "usage: %s [-p] [file ...]\n",
-		      getprogname());
+	(void)fprintf(stderr, "usage: %s [-p] [file ...]\n"
+		      "       %s [-o outfile] [file ...]\n",
+		      getprogname(), getprogname());
 	exit(1);
 }
Index: uuencode/uuencode.1
===================================================================
RCS file: /cvsroot/src/usr.bin/uuencode/uuencode.1,v
retrieving revision 1.14
diff -u -r1.14 uuencode.1
--- uuencode/uuencode.1	7 Aug 2003 11:16:58 -0000	1.14
+++ uuencode/uuencode.1	3 Jul 2005 13:43:15 -0000
@@ -29,7 +29,7 @@
 .\"
 .\"     @(#)uuencode.1	8.1 (Berkeley) 6/6/93
 .\"
-.Dd March 19, 1999
+.Dd June 30, 2005
 .Dt UUENCODE 1
 .Os
 .Sh NAME
@@ -38,10 +38,12 @@
 .Nd encode/decode a binary file
 .Sh SYNOPSIS
 .Nm
+.Op Fl m
 .Op Ar file
 .Ar name
 .Nm uudecode
 .Op Fl p
+.Op Fl o Ar outfile
 .Op Ar file ...
 .Sh DESCRIPTION
 .Nm
@@ -72,11 +74,29 @@
 The resulting file is named
 .Ar name
 and will have the mode of the original file except that setuid
-and execute bits are not retained; if the
-.Fl p
-option is specified, the data will be written to the standard output.
+and execute bits are not retained.
+.Nm uudecode
+supports the following options:
+.Bl -tag -width XoXoutfileXX
+.It Fl o Ar outfile
+The data will be written to
+.Ar outfile .
+.It Fl p
+The data will be written to the standard output.
+.El
+.Pp
 .Nm uudecode
 ignores any leading and trailing lines.
+.Pp
+.Nm uuencode
+supports one option,
+.Fl m ,
+to output
+.Em base64-encoded
+data instead of the historical encoding format.
+If the internal filename is
+.Dq Pa \&-
+data will be written to the standard output.
 .Sh EXIT STATUS
 The
 .Nm uudecode
@@ -115,6 +135,19 @@
 .Nm
 utilities appeared in
 .Bx 4.0 .
-.Sh BUGS
+.Sh CAVEATS
 The encoded form of the file is expanded by 35% (3 bytes become 4 plus
 control information).
+.Pp
+If more than one file is given and the
+.Fl o
+option is used, you will still only get one output file.
+.Sh SECURITY CONSIDERATIONS
+.Nm uudecode
+uses the file name specified in the file,
+and overwrites any existing file of the same name.
+This file name can include directory components and ~user
+expressions.
+When decoding a file from an untrusted source, use the
+.Fl o
+option, or sanitize the internal file name before decoding.
Index: uuencode/uuencode.5
===================================================================
RCS file: /cvsroot/src/usr.bin/uuencode/uuencode.5,v
retrieving revision 1.9
diff -u -r1.9 uuencode.5
--- uuencode/uuencode.5	7 Aug 2003 11:16:59 -0000	1.9
+++ uuencode/uuencode.5	3 Jul 2005 13:43:16 -0000
@@ -29,7 +29,7 @@
 .\"
 .\"	@(#)uuencode.format.5	8.2 (Berkeley) 1/12/94
 .\"
-.Dd April 9, 1997
+.Dd June 30, 2005
 .Dt UUENCODE 5
 .Os
 .Sh NAME
@@ -50,12 +50,21 @@
 look like a header.
 .Pp
 The header line starts with the word
-.Dq begin ,
+.Dq begin
+or
+.Dq begin-base64 ,
 a space,
 a file mode (in octal),
 a space,
 and finally a string which names the file being encoded.
 .Pp
+If the header line starts with
+.Dq begin-base64 ,
+the data consists of base64-encoded bytes terminated by
+a line of four equal signs.
+Otherwise, the data is encoded in the format described
+below.
+.Pp
 The central engine of
 .Xr uuencode 1
 is a six-bit encoding function which outputs an
@@ -84,8 +93,8 @@
 .Pp
 The byte count is a six-bit integer encoded with the above function,
 representing the number of bytes encoded in the rest of the line.
-The method used to encode the data expands its size by
-133% (described below).
+The method used to encode the data expands its size to
+133% of the original (described below).
 Therefore it is important to note that the byte count describes the size of
 the chunk of data before it is encoded, not afterwards.
 The six bit size of this number effectively limits the number of bytes
Index: uuencode/uuencode.c
===================================================================
RCS file: /cvsroot/src/usr.bin/uuencode/uuencode.c,v
retrieving revision 1.11
diff -u -r1.11 uuencode.c
--- uuencode/uuencode.c	29 Jun 2005 20:35:48 -0000	1.11
+++ uuencode/uuencode.c	3 Jul 2005 13:43:16 -0000
@@ -59,21 +59,34 @@
 #include <unistd.h>
 
 int main(int, char *[]);
-static void encode(void);
+static void encode(int);
 static void usage(void);
 
+const char *base64_str="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		       "abcdefghijklmnopqrstuvwxyz0123456789+/";
+
 int
 main(int argc, char *argv[])
 {
 	struct stat sb;
+	int base64_fmt;
 	int mode;
+	int c;
 
+	base64_fmt = 0;
 	mode = 0;
 	setlocale(LC_ALL, "");
 	setprogname(argv[0]);
 
-	while (getopt(argc, argv, "") != -1)
+	while ((c=getopt(argc, argv, "m")) != -1) {
+	    switch (c) {
+	    case 'm':
+		base64_fmt = 1;
+		break;
+	    default:
 		usage();
+	    }
+	}
 	argv += optind;
 	argc -= optind;
 
@@ -94,47 +107,69 @@
 		usage();
 	}
 
-	(void)printf("begin %o %s\n", mode, *argv);
-	encode();
-	(void)printf("end\n");
+	if (base64_fmt)
+	    (void)fputs("begin-base64", stdout);
+	else
+	    (void)fputs("begin", stdout);
+	(void)printf(" %o %s\n", mode, *argv);
+	encode(base64_fmt);
+	if (base64_fmt)
+	    (void)puts("====");
+	else
+	    (void)puts("end");
 	if (ferror(stdout))
 		err(1, "write error");
 	exit(0);
 }
 
-/* ENC is the basic 1 character encoding function to make a char printing */
-#define	ENC(c) ((c) ? ((c) & 077) + ' ': '`')
+/* ENC converts 6 bits to a printing character */
+#define ENC(fmt64, c) ((fmt64) ? ENC64(c) : ENCUU(c))
+
+/* ENCUU converts 6 bits to a uuencoded character */
+#define	ENCUU(c) ((c) ? ((c) & 077) + ' ': '`')
+
+/* ENC64 converts 6 bits to a base64-encoded character */
+#define	ENC64(c) base64_str[((c) & 077)]
 
 /*
  * copy from in to out, encoding as you go along.
  */
 static void
-encode(void)
+encode(int base64_fmt)
 {
 	int ch, n;
 	char *p;
 	char buf[80];
 
 	while ((n = fread(buf, 1, 45, stdin)) > 0) {
-		ch = ENC(n);
-		if (putchar(ch) == EOF)
-			break;
+		/* uuencode line length character */
+		if (!base64_fmt) {
+			if (putchar(ENCUU(n)) == EOF)
+				break;
+		}
+		buf[n] = '\0';
 		for (p = buf; n > 0; n -= 3, p += 3) {
 			ch = *p >> 2;
-			ch = ENC(ch);
-			if (putchar(ch) == EOF)
+			if (putchar(ENC(base64_fmt, ch)) == EOF)
 				break;
 			ch = ((*p << 4) & 060) | ((p[1] >> 4) & 017);
-			ch = ENC(ch);
-			if (putchar(ch) == EOF)
+			if (putchar(ENC(base64_fmt, ch)) == EOF)
+				break;
+			if (n == 1) {
+				(void)fputs(base64_fmt ? "==" : "``",
+					    stdout);
 				break;
+			}
 			ch = ((p[1] << 2) & 074) | ((p[2] >> 6) & 03);
-			ch = ENC(ch);
-			if (putchar(ch) == EOF)
+			if (putchar(ENC(base64_fmt, ch)) == EOF)
 				break;
+			if (n == 2) {
+				(void)fputs(base64_fmt ? "=" : "`",
+					    stdout);
+				break;
+			}
 			ch = p[2] & 077;
-			ch = ENC(ch);
-			if (putchar(ch) == EOF)
+			if (putchar(ENC(base64_fmt, ch)) == EOF)
 				break;
 		}
 		if (putchar('\n') == EOF)
@@ -142,15 +177,16 @@
 	}
 	if (ferror(stdin))
 		err(1, "read error.");
-	ch = ENC('\0');
-	(void)putchar(ch);
-	(void)putchar('\n');
+	if (!base64_fmt) {
+		(void)putchar(ENCUU('\0'));
+		(void)putchar('\n');
+	}
 }
 
 static void
 usage(void)
 {
-	(void)fprintf(stderr, "usage: %s [infile] remotefile\n",
+	(void)fprintf(stderr, "usage: %s [-m] [infile] remotefile\n",
 		      getprogname());
 	exit(1);
 }

--EDJsL2R9iCFAt7IV--