tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

bin/pax: Adding support for base-256 (GNU-style) encoded file sizes



Hello, everyone.

During my recent study of tar implementations, I've noticed that
NetBSD's pax does not support base-256 / GNU-style encoded sizes.  While
it technically can read 12-digit octal sizes (i.e. up to 64 GiB), both
GNU tar and libarchive switch to base-256 after exceeding the 'safer'
11-digit size (i.e. 8 GiB).

I think adding base-256 decoding support to pax is rather
straightforward, so I've went ahead and wrote a patch.  It adds support
for reading archives using this extension (i.e. unpacking them).  It's
my first real patch to NetBSD, so I'd appreciate some pointers if I'm
doing things right.

One particular problem is that the code so far did not account for
possible failure return.  Now that the values can overflow or (in case
of broken files) be negative, I've went ahead and used UINTMAX_MAX
as erroneous return.  However, due to kinda-ugly implicit cast from
uintmax_t to off_t this becomes -1.  If you'd like to solve this problem
otherwise, please let me know what's your preference.

I've also included a test case that constructs a test tarball from
hardwired header data and random file contents, and tests that
the patched tar extracts it correctly.

TIA
---

Index: bin/pax/gen_subs.c
===================================================================
RCS file: /pub/NetBSD-CVS/src/bin/pax/gen_subs.c,v
retrieving revision 1.36
diff -u -B -r1.36 gen_subs.c
--- bin/pax/gen_subs.c	9 Aug 2012 08:09:21 -0000	1.36
+++ bin/pax/gen_subs.c	27 Nov 2018 23:10:07 -0000
@@ -306,12 +306,10 @@
 
 /*
  * asc_umax()
- *	convert hex/octal character string into a uintmax. We do
- *	not have to to check for overflow! (the headers in all supported
- *	formats are not large enough to create an overflow).
+ *	convert hex/octal/base-256 value into a uintmax.
  *	NOTE: strings passed to us are NOT TERMINATED.
  * Return:
- *	uintmax_t value
+ *	uintmax_t value; UINTMAX_MAX for overflow/negative
  */
 
 uintmax_t
@@ -323,6 +321,31 @@
 	stop = str + len;
 
 	/*
+	 * if the highest bit of first byte is set, it's base-256 encoded
+	 * (base-256 is basically (n-1)-bit big endian signed
+	 */
+	if (*str & 0x80) {
+		/*
+		 * uintmax_t can't be negative, so fail on negative numbers
+		 */
+		if (*str & 0x40)
+			return UINTMAX_MAX;
+
+		tval = *str++ & 0x3f;
+		while (str < stop) {
+			/*
+			 * check for overflow; if shifting the value strips highest
+			 * bits, it means the value is too large
+			 */
+			if (((tval << 8) >> 8) != tval)
+				return UINTMAX_MAX;
+			tval = (tval << 8) | (*str++);
+		}
+
+		return tval;
+	}
+
+	/*
 	 * skip over leading blanks and zeros
 	 */
 	while ((str < stop) && ((*str == ' ') || (*str == '0')))
Index: bin/pax/tar.c
===================================================================
RCS file: /pub/NetBSD-CVS/src/bin/pax/tar.c,v
retrieving revision 1.73
diff -u -B -r1.73 tar.c
--- bin/pax/tar.c	19 Dec 2015 18:28:54 -0000	1.73
+++ bin/pax/tar.c	27 Nov 2018 23:10:07 -0000
@@ -486,6 +486,8 @@
 	arcn->sb.st_uid = (uid_t)asc_u32(hd->uid, sizeof(hd->uid), OCT);
 	arcn->sb.st_gid = (gid_t)asc_u32(hd->gid, sizeof(hd->gid), OCT);
 	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
+	if (arcn->sb.st_size == -1)
+		return -1;
 	arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->mtime, sizeof(hd->mtime), OCT);
 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
 
@@ -860,6 +862,8 @@
 	arcn->sb.st_mode = (mode_t)(asc_u32(hd->mode, sizeof(hd->mode), OCT) &
 	    0xfff);
 	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
+	if (arcn->sb.st_size == -1)
+		return -1;
 	arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->mtime, sizeof(hd->mtime), OCT);
 	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
 
Index: tests/bin/tar/t_tar.sh
===================================================================
RCS file: /pub/NetBSD-CVS/src/tests/bin/tar/t_tar.sh,v
retrieving revision 1.1
diff -u -B -r1.1 t_tar.sh
--- tests/bin/tar/t_tar.sh	17 Mar 2012 16:33:11 -0000	1.1
+++ tests/bin/tar/t_tar.sh	27 Nov 2018 23:10:07 -0000
@@ -45,7 +45,51 @@
 	atf_check -s eq:0 -o empty -e empty cmp file1.tar file2.tar
 }
 
+atf_test_case rd_base256_size
+rd_base256_size_head() {
+	atf_set "descr" "Test extracting an archive whose member size" \
+	                "is encoded as base-256 number (GNU style)"
+}
+rd_base256_size_body() {
+	# prepare random file data for comparison
+	# take 0x123456 bytes in order to have the size field take up
+	# more than one byte
+	dd if=/dev/urandom of=reference.bin bs=1193046 count=1
+	# write test archive header
+	# - filename
+	printf 'output.bin' > test.tar
+	# - pad to 100 octets
+	head -c 90 /dev/zero >> test.tar
+	# - mode, uid, gid
+	printf '%07d\0%07d\0%07d\0' 644 177776 177775 >> test.tar
+	# - size (base-256)
+	printf '\x80\0\0\0\0\0\0\0\0\x12\x34\x56' >> test.tar
+	# - timestamp, checksum
+	printf '%011d\0%06d\0 0' 13377341775 12353 >> test.tar
+	# - pad empty linkname (100 octets)
+	head -c 100 /dev/zero >> test.tar
+	# - magic, user name
+	printf 'ustar  \0nobody' >> test.tar
+	# - pad user name field to 32 bytes
+	head -c 26 /dev/zero >> test.tar
+	# - group name
+	printf 'nogroup' >> test.tar
+	# - pad to full block
+	head -c 208 /dev/zero >> test.tar
+	# append file data to the test archive
+	cat reference.bin >> test.tar
+	# pad to full block + append two terminating null blocks
+	head -c 1450 /dev/zero >> test.tar
+
+	# test extracting the test archive
+	atf_check -s eq:0 -o empty -e empty tar -xf test.tar
+
+	# ensure that output.bin is equal to reference.bin
+	atf_check -s eq:0 -o empty -e empty cmp output.bin reference.bin
+}
+
 atf_init_test_cases()
 {
 	atf_add_test_case append
+	atf_add_test_case rd_base256_size
 }

-- 
Best regards,
Michał Górny

Attachment: signature.asc
Description: This is a digitally signed message part



Home | Main Index | Thread Index | Old Index