Subject: bin/9248: (gnu)tar has problems with files >2GB
To: None <gnats-bugs@gnats.netbsd.org>
From: Hubert Feyrer <feyrer@smaug.fh-regensburg.de>
List: netbsd-bugs
Date: 01/19/2000 12:39:46
>Number:         9248
>Category:       bin
>Synopsis:       (gnu)tar has problems with files >2GB
>Confidential:   no
>Severity:       critical
>Priority:       high
>Responsible:    bin-bug-people (Utility Bug People)
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Wed Jan 19 12:39:00 2000
>Last-Modified:
>Originator:     Hubert Feyrer
>Organization:
Hubert Feyrer <hubertf@channel.regensburg.org>
>Release:        20000119
>Environment:
	
System: NetBSD smaug 1.4.2_ALPHA NetBSD 1.4.2_ALPHA (SMAUG) #9: Tue Jan 18 23:24:11 MET 2000 feyrer@smaug:/disk1/cvs/src-1.4/sys/arch/sparc/compile/SMAUG sparc

I've tried gnutar from 1.4.2_ALPHA, -current as of 20000119 and the
gtar-base pkg, all show the same problems.

>Description:
	tar has problems taring up and extracting files which are bigger
	than 2GB. The problem are the routines that convert the filesizes
	from/to the octal representation used in the tar file's header. 

>How-To-Repeat:
	Take a file >2GB
	tar plcf - 2gfile | tar plvtf -
	tar plcf - 2gfile | ( cd somedir ; tar plvxf - )

>Fix:
	pax seems the way to go, eventually. 

	Until then, the patch below will fix two problems:
	* the size in the generated tar headers will be ok for files
	  >2GB (the format allows files of ~4GB, at that size I expect
	  any tar to have a problem)
	* in "tar vt", display the size of files >2GB correctly.

	The remaining problem with this patch is that files >2GB will
	still not be extracted, as the from_oct() routine in list.c
	uses signed return values to indicate errors. 

	I have no clue how to fix this now, and will leave this for someone
	else to fix.
	

Index: create.c
===================================================================
RCS file: /cvsroot/gnusrc/gnu/usr.bin/tar/create.c,v
retrieving revision 1.8
diff -u -r1.8 create.c
--- create.c	1999/09/05 23:34:40	1.8
+++ create.c	2000/01/19 20:30:07
@@ -378,7 +378,7 @@
     {
       int f;			/* File descriptor */
       long bufsize, count;
-      long sizeleft;
+      u_long sizeleft;
       register union record *start;
       int header_moved;
       char isextended = 0;
@@ -442,7 +442,7 @@
 			 * shrunken file size was the one that showed
 			 * up.
 			 */
-	      to_oct ((long) hstat.st_size, 1 + 12,
+	      to_oct ((u_long) hstat.st_size, 1 + 12,
 		      header->header.realsize);
 
 	      /*
@@ -454,7 +454,7 @@
 
 	      find_new_file_size (&filesize, upperbound);
 	      hstat.st_size = filesize;
-	      to_oct ((long) filesize, 1 + 12,
+	      to_oct ((u_long) filesize, 1 + 12,
 		      header->header.size);
 	      /*				to_oct((long) end_nulls, 1+12,
 						header->header.ending_blanks);*/
@@ -720,7 +720,7 @@
       if (f_gnudump)
 	{
 	  int sizeleft;
-	  int totsize;
+	  u_long totsize;
 	  int bufsize;
 	  union record *start;
 	  int count;
@@ -737,7 +737,7 @@
 	      p_buf += tmp;
 	    }
 	  totsize++;
-	  to_oct ((long) totsize, 1 + 12, header->header.size);
+	  to_oct (totsize, 1 + 12, header->header.size);
 	  finish_header (header);
 	  p_buf = buf;
 	  sizeleft = totsize;
@@ -1288,7 +1288,7 @@
 	  8, header->header.mode);
   to_oct ((long) st->st_uid, 8, header->header.uid);
   to_oct ((long) st->st_gid, 8, header->header.gid);
-  to_oct ((long) st->st_size, 1 + 12, header->header.size);
+  to_oct ((u_long) st->st_size, 1 + 12, header->header.size);
   to_oct ((long) st->st_mtime, 1 + 12, header->header.mtime);
   /* header->header.linkflag is left as null */
   if (f_gnudump)
@@ -1381,7 +1381,7 @@
  */
 void
 to_oct (value, digs, where)
-     register long value;
+     register u_long value;
      register int digs;
      register char *where;
 {
Index: list.c
===================================================================
RCS file: /cvsroot/gnusrc/gnu/usr.bin/tar/list.c,v
retrieving revision 1.6
diff -u -r1.6 list.c
--- list.c	1999/08/24 18:40:10	1.6
+++ list.c	2000/01/19 20:30:14
@@ -313,13 +313,14 @@
 read_header ()
 {
   register int i;
-  register long sum, signed_sum, recsum;
+  register u_long sum, signed_sum, recsum;
   register char *p;
   register union record *header;
   long from_oct ();
   char **longp;
   char *bp, *data;
-  int size, written;
+  off_t size;
+  int written;
   static char *next_long_name, *next_long_link;
   char *name;
 
@@ -371,7 +376,7 @@
   if (header->header.linkflag == LF_LINK)
     hstat.st_size = 0;		/* Links 0 size on tape */
   else
-    hstat.st_size = from_oct (1 + 12, header->header.size);
+    hstat.st_size = (u_long)from_oct (1 + 12, header->header.size);
 
   header->header.arch_name[NAMSIZ - 1] = '\0';
   if (header->header.linkflag == LF_LONGNAME
@@ -684,11 +691,11 @@
 	  break;
 #endif
 	case LF_SPARSE:
-	  (void) sprintf (size, "%ld",
-			  from_oct (1 + 12, head->header.realsize));
+	  (void) sprintf (size, "%ul",
+			  (u_long)from_oct (1 + 12, head->header.realsize));
 	  break;
 	default:
-	  (void) sprintf (size, "%ld", (long) hstat.st_size);
+	  (void) sprintf (size, "%ul", (u_long) hstat.st_size);
 	}
 
       /* Figure out padding and print the whole line. */
>Audit-Trail:
>Unformatted: