NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

bin/41497: diff against cp(1) to copy sparse files



>Number:         41497
>Category:       bin
>Synopsis:       diff against cp(1) to copy sparse files
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    bin-bug-people
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Thu May 28 00:50:00 +0000 2009
>Originator:     Matthew Mondor
>Release:        NetBSD 5.0
>Organization:
>Environment:
System: NetBSD ginseng.xisop 5.0 NetBSD 5.0 (GENERIC_MM) #5: Mon Apr 27 
00:04:56 EDT 2009 
root%hal.xisop@localhost:/usr/obj/sys/arch/i386/compile/GENERIC_MM i386
Architecture: i386
Machine: i386
>Description:
When copying large sparse files (an example is files created using
open(2)+lseek(2)/truncate(2)/mmap(2) such as used for temporary
transfer files or file systems on vnd(4)), the result are files
with fully allocated blocks.  Sometimes it is nice to be able to
have sparse destination files as a result.

Although Reinoud previously posted a diff on tech-kern mailing list
to allow utilities to truely detect holes, it did not seem to be
accepted (http://mail-index.netbsd.org/tech-kern/2006/09/).

So the technique used here is simply to lseek(2) to create a sparsed
result for zero blocks when copying if the optional -s flag is
used.  This may result in the destination file having more holes
than the original but it does what I need.
>How-To-Repeat:
        
>Fix:


Index: ./bin/cp/cp.1
===================================================================
RCS file: /nfs/ginseng/home/data/cvsup/netbsd/src/bin/cp/cp.1,v
retrieving revision 1.30
diff -u -r1.30 cp.1
--- ./bin/cp/cp.1       26 Oct 2007 16:31:16 -0000      1.30
+++ ./bin/cp/cp.1       27 May 2009 23:03:56 -0000
@@ -137,6 +137,14 @@
 to create special files rather than copying them as normal files.
 Created directories have the same mode as the corresponding source
 directory, unmodified by the process's umask.
+.It Fl s
+Produce a sparse destination file, by seeking forward instead of writing
+a block when it is full of zeros.  This may help to copy sparse files
+containing significant holes without causing them to have a
+disproportionate destination size.  Note that the destination file may
+contain more resulting holes than the source file;  as such, this does
+not produce an identical sparse copy, unlike
+.Xr dump 8 .
 .It Fl v
 Cause
 .Nm
Index: ./bin/cp/cp.c
===================================================================
RCS file: /nfs/ginseng/home/data/cvsup/netbsd/src/bin/cp/cp.c,v
retrieving revision 1.51
diff -u -r1.51 cp.c
--- ./bin/cp/cp.c       20 Jul 2008 00:52:39 -0000      1.51
+++ ./bin/cp/cp.c       27 May 2009 23:03:31 -0000
@@ -85,7 +85,8 @@
 PATH_T to = { .p_end = to.p_path, .target_end = empty  };
 
 uid_t myuid;
-int Hflag, Lflag, Rflag, Pflag, fflag, iflag, pflag, rflag, vflag, Nflag;
+int Hflag, Lflag, Rflag, Pflag, fflag, iflag, pflag, rflag, sflag, vflag,
+    Nflag;
 mode_t myumask;
 
 enum op { FILE_TO_FILE, FILE_TO_DIR, DIR_TO_DNE };
@@ -105,8 +106,8 @@
        setprogname(argv[0]);
        (void)setlocale(LC_ALL, "");
 
-       Hflag = Lflag = Pflag = Rflag = 0;
-       while ((ch = getopt(argc, argv, "HLNPRfiprv")) != -1) 
+       Hflag = Lflag = Pflag = Rflag = sflag = 0;
+       while ((ch = getopt(argc, argv, "HLNPRfiprsv")) != -1) 
                switch (ch) {
                case 'H':
                        Hflag = 1;
@@ -140,6 +141,9 @@
                case 'r':
                        rflag = 1;
                        break;
+               case 's':
+                       sflag = 1;
+                       break;
                case 'v':
                        vflag = 1;
                        break;
Index: ./bin/cp/extern.h
===================================================================
RCS file: /nfs/ginseng/home/data/cvsup/netbsd/src/bin/cp/extern.h,v
retrieving revision 1.15
diff -u -r1.15 extern.h
--- ./bin/cp/extern.h   16 Jul 2006 16:22:24 -0000      1.15
+++ ./bin/cp/extern.h   27 May 2009 23:03:41 -0000
@@ -42,7 +42,8 @@
 
 extern PATH_T to;
 extern uid_t myuid;
-extern int Rflag, rflag, Hflag, Lflag, Pflag, fflag, iflag, pflag, Nflag;
+extern int Rflag, rflag, Hflag, Lflag, Pflag, fflag, iflag, pflag, sflag,
+       Nflag;
 extern mode_t myumask;
 
 #include <sys/cdefs.h>
Index: ./bin/cp/utils.c
===================================================================
RCS file: /nfs/ginseng/home/data/cvsup/netbsd/src/bin/cp/utils.c,v
retrieving revision 1.34
diff -u -r1.34 utils.c
--- ./bin/cp/utils.c    26 Oct 2007 16:21:25 -0000      1.34
+++ ./bin/cp/utils.c    27 May 2009 23:03:48 -0000
@@ -48,6 +48,7 @@
 #include <fcntl.h>
 #include <fts.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -72,11 +73,20 @@
 int
 copy_file(FTSENT *entp, int dne)
 {
-       static char buf[MAXBSIZE];
+       static char *buf = NULL;
        struct stat to_stat, *fs;
        int ch, checkch, from_fd, rcount, rval, to_fd, tolnk, wcount;
+       size_t bsize, wbsize;
        char *p;
-       
+
+       /* This makes sure that our buffer is long-aligned. */
+       if (buf == NULL) {
+               if ((buf = malloc(MAXBSIZE)) == NULL) {
+                       warn("malloc: %u bytes", MAXBSIZE);
+                       return (1);
+               }
+       }
+
        if ((from_fd = open(entp->fts_path, O_RDONLY, 0)) == -1) {
                warn("%s", entp->fts_path);
                return (1);
@@ -87,6 +97,22 @@
        tolnk = ((Rflag && !(Lflag || Hflag)) || Pflag);
 
        /*
+        * For sparse files, use the source file optimal file system block
+        * size instead of the maximum psysical block size in an attempt to
+        * maximize sparseness on the destination file, assuming that the
+        * destination file system block size is the same (if it's not,
+        * let's not worry, this will still be fine).
+        */
+       if (sflag) {
+               bsize = (size_t)fs->st_blksize;
+               /* Shouldn't happen, but let's be careful */
+               if (bsize > MAXBSIZE)
+                       bsize = MAXBSIZE;
+       } else
+               bsize = MAXBSIZE;
+       wbsize = bsize / sizeof(unsigned long);
+
+       /*
         * If the file exists and we're interactive, verify with the user.
         * If the file DNE, set the mode to be the from file, minus setuid
         * bits, modified by the umask; arguably wrong, but it makes copying
@@ -153,7 +179,7 @@
                 * This is really a minor hack, but it wins some CPU back.
                 */
 
-               if (fs->st_size <= 8 * 1048576) {
+               if (!sflag && fs->st_size <= 8 * 1048576) {
                        size_t fsize = (size_t)fs->st_size;
                        p = mmap(NULL, fsize, PROT_READ, MAP_FILE|MAP_SHARED,
                            from_fd, (off_t)0);
@@ -164,27 +190,78 @@
                                     MADV_SEQUENTIAL);
                                if (write(to_fd, p, fsize) !=
                                    fs->st_size) {
-                                       warn("%s", to.p_path);
+                                       warn("write: %s", to.p_path);
                                        rval = 1;
                                }
                                if (munmap(p, fsize) < 0) {
-                                       warn("%s", entp->fts_path);
+                                       warn("munmap: %s", entp->fts_path);
                                        rval = 1;
                                }
                        }
                } else {
+                       int empty;
+
 mmap_failed:
-                       while ((rcount = read(from_fd, buf, MAXBSIZE)) > 0) {
-                               wcount = write(to_fd, buf, (size_t)rcount);
-                               if (rcount != wcount || wcount == -1) {
-                                       warn("%s", to.p_path);
-                                       rval = 1;
-                                       break;
+                       empty = 0;
+                       while ((rcount = read(from_fd, buf, bsize)) > 0) {
+
+                               /*
+                                * If sflag is set, verify if block only
+                                * consists of 0x00.
+                                */
+                               if (sflag) {
+                                       unsigned long *s, *e;
+
+                                       for (empty = 1,
+                                            s = (unsigned long *)buf,
+                                            e = &s[wbsize];
+                                            s < e; s++) {
+                                               if (*s != 0) {
+                                                       empty = 0;
+                                                       break;
+                                               }
+                                       }
+                               } else
+                                       empty = 0;
+
+                               /*
+                                * Seek instead of writing in case of a zero
+                                * block if sflag is set.
+                                */
+                               if (empty) {
+                                       if (lseek(to_fd, (size_t)rcount,
+                                           SEEK_CUR) == (off_t)-1) {
+                                               warn("lseek: %s", to.p_path);
+                                               rval = 1;
+                                               break;
+                                       }
+                               } else {
+                                       wcount = write(to_fd, buf,
+                                           (size_t)rcount);
+                                       if (rcount != wcount || wcount == -1) {
+                                               warn("write: %s", to.p_path);
+                                               rval = 1;
+                                               break;
+                                       }
                                }
                        }
+
                        if (rcount < 0) {
-                               warn("%s", entp->fts_path);
+                               warn("read: %s", entp->fts_path);
                                rval = 1;
+                       } else if (sflag && empty) {
+                               /*
+                                * We seeked for the last block, make sure
+                                * that we write a single byte to set proper
+                                * file size.
+                                */
+                               if (lseek(to_fd, (off_t)-1, SEEK_CUR) == -1) {
+                                       warn("lseek: %s", to.p_path);
+                                       rval = 1;
+                               } else if (write(to_fd, "\0", 1) != 1) {
+                                       warn("write: %s", to.p_path);
+                                       rval = 1;
+                               }
                        }
                }
        }
@@ -342,8 +419,9 @@
 usage(void)
 {
        (void)fprintf(stderr,
-           "usage: %s [-R [-H | -L | -P]] [-f | -i] [-Npv] src target\n"
-           "       %s [-R [-H | -L | -P]] [-f | -i] [-Npv] src1 ... srcN 
directory\n",
+           "usage: %s [-R [-H | -L | -P]] [-f | -i] [-Npsv] src target\n"
+           "       %s [-R [-H | -L | -P]] [-f | -i] [-Npsv] src1 ... srcN "
+           "directory\n",
            getprogname(), getprogname());
        exit(1);
        /* NOTREACHED */



Home | Main Index | Thread Index | Old Index