NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: bin/41497: diff against cp(1) to copy sparse files



The following reply was made to PR bin/41497; it has been noted by GNATS.

From: mmondor%pulsar-zone.net@localhost
To: gnats-bugs%gnats.netbsd.org@localhost
Cc: 
Subject: Re: bin/41497: diff against cp(1) to copy sparse files
Date: Mon, 1 Jun 2009 23:10:39 -0400

 Updated diff against -current:
 
 
 Index: bin/cp/cp.1
 ===================================================================
 RCS file: /nfs/ginseng/home/data/cvsup/netbsd/src/bin/cp/cp.1,v
 retrieving revision 1.30
 diff -u -p -r1.30 cp.1
 --- bin/cp/cp.1        26 Oct 2007 16:31:16 -0000      1.30
 +++ bin/cp/cp.1        2 Jun 2009 03:00:04 -0000
 @@ -1,4 +1,4 @@
 -.\"   $NetBSD: hira $
 +.\"   $NetBSD: cp.1,v 1.30 2007/10/26 16:31:16 hira Exp $
  .\"
  .\" Copyright (c) 1989, 1990, 1993, 1994
  .\"   The Regents of the University of California.  All rights reserved.
 @@ -137,6 +137,14 @@ indirected through, and for
  to create special files rather than copying them as normal files.
  Created directories have the same mode as the corresponding source
  directory, unmodified by the process's umask.
 +.It Fl s
 +Produce a sparse destination file, by seeking forward instead of writing
 +a block when it is full of zeros.  This may help to copy sparse files
 +containing significant holes without causing them to have a
 +disproportionate destination size.  Note that the destination file may
 +contain more resulting holes than the source file;  as such, this does
 +not produce an identical sparse copy, unlike
 +.Xr dump 8 .
  .It Fl v
  Cause
  .Nm
 Index: bin/cp/cp.c
 ===================================================================
 RCS file: /nfs/ginseng/home/data/cvsup/netbsd/src/bin/cp/cp.c,v
 retrieving revision 1.51
 diff -u -p -r1.51 cp.c
 --- bin/cp/cp.c        20 Jul 2008 00:52:39 -0000      1.51
 +++ bin/cp/cp.c        2 Jun 2009 03:00:04 -0000
 @@ -1,4 +1,4 @@
 -/* $NetBSD: lukem $ */
 +/* $NetBSD: cp.c,v 1.51 2008/07/20 00:52:39 lukem Exp $ */
  
  /*
   * Copyright (c) 1988, 1993, 1994
 @@ -43,7 +43,7 @@ __COPYRIGHT(
  #if 0
  static char sccsid[] = "@(#)cp.c      8.5 (Berkeley) 4/29/95";
  #else
 -__RCSID("$NetBSD: lukem $");
 +__RCSID("$NetBSD: cp.c,v 1.51 2008/07/20 00:52:39 lukem Exp $");
  #endif
  #endif /* not lint */
  
 @@ -85,7 +85,8 @@ static char empty[] = "";
  PATH_T to = { .p_end = to.p_path, .target_end = empty  };
  
  uid_t myuid;
 -int Hflag, Lflag, Rflag, Pflag, fflag, iflag, pflag, rflag, vflag, Nflag;
 +int Hflag, Lflag, Rflag, Pflag, fflag, iflag, pflag, rflag, sflag, vflag,
 +    Nflag;
  mode_t myumask;
  
  enum op { FILE_TO_FILE, FILE_TO_DIR, DIR_TO_DNE };
 @@ -105,8 +106,8 @@ main(int argc, char *argv[])
        setprogname(argv[0]);
        (void)setlocale(LC_ALL, "");
  
 -      Hflag = Lflag = Pflag = Rflag = 0;
 -      while ((ch = getopt(argc, argv, "HLNPRfiprv")) != -1) 
 +      Hflag = Lflag = Pflag = Rflag = sflag = 0;
 +      while ((ch = getopt(argc, argv, "HLNPRfiprsv")) != -1) 
                switch (ch) {
                case 'H':
                        Hflag = 1;
 @@ -140,6 +141,9 @@ main(int argc, char *argv[])
                case 'r':
                        rflag = 1;
                        break;
 +              case 's':
 +                      sflag = 1;
 +                      break;
                case 'v':
                        vflag = 1;
                        break;
 Index: bin/cp/extern.h
 ===================================================================
 RCS file: /nfs/ginseng/home/data/cvsup/netbsd/src/bin/cp/extern.h,v
 retrieving revision 1.15
 diff -u -p -r1.15 extern.h
 --- bin/cp/extern.h    16 Jul 2006 16:22:24 -0000      1.15
 +++ bin/cp/extern.h    2 Jun 2009 03:00:04 -0000
 @@ -1,4 +1,4 @@
 -/* $NetBSD: jschauma $ */
 +/* $NetBSD: extern.h,v 1.15 2006/07/16 16:22:24 jschauma Exp $ */
  
  /*-
   * Copyright (c) 1991, 1993, 1994
 @@ -42,7 +42,8 @@ typedef struct {
  
  extern PATH_T to;
  extern uid_t myuid;
 -extern int Rflag, rflag, Hflag, Lflag, Pflag, fflag, iflag, pflag, Nflag;
 +extern int Rflag, rflag, Hflag, Lflag, Pflag, fflag, iflag, pflag, sflag,
 +      Nflag;
  extern mode_t myumask;
  
  #include <sys/cdefs.h>
 Index: bin/cp/utils.c
 ===================================================================
 RCS file: /nfs/ginseng/home/data/cvsup/netbsd/src/bin/cp/utils.c,v
 retrieving revision 1.34
 diff -u -p -r1.34 utils.c
 --- bin/cp/utils.c     26 Oct 2007 16:21:25 -0000      1.34
 +++ bin/cp/utils.c     2 Jun 2009 03:00:04 -0000
 @@ -1,4 +1,4 @@
 -/* $NetBSD: hira $ */
 +/* $NetBSD: utils.c,v 1.34 2007/10/26 16:21:25 hira Exp $ */
  
  /*-
   * Copyright (c) 1991, 1993, 1994
 @@ -34,7 +34,7 @@
  #if 0
  static char sccsid[] = "@(#)utils.c   8.3 (Berkeley) 4/1/94";
  #else
 -__RCSID("$NetBSD: hira $");
 +__RCSID("$NetBSD: utils.c,v 1.34 2007/10/26 16:21:25 hira Exp $");
  #endif
  #endif /* not lint */
  
 @@ -48,6 +48,7 @@ __RCSID("$NetBSD: hira $");
  #include <fcntl.h>
  #include <fts.h>
  #include <stdio.h>
 +#include <stdint.h>
  #include <stdlib.h>
  #include <string.h>
  #include <unistd.h>
 @@ -72,11 +73,20 @@ set_utimes(const char *file, struct stat
  int
  copy_file(FTSENT *entp, int dne)
  {
 -      static char buf[MAXBSIZE];
 +      static char *buf = NULL;
        struct stat to_stat, *fs;
        int ch, checkch, from_fd, rcount, rval, to_fd, tolnk, wcount;
 +      size_t bsize, wbsize;
        char *p;
 -      
 +
 +      /* This makes sure that our buffer is long-aligned. */
 +      if (buf == NULL) {
 +              if ((buf = malloc(MAXBSIZE)) == NULL) {
 +                      warn("malloc: %u bytes", MAXBSIZE);
 +                      return (1);
 +              }
 +      }
 +
        if ((from_fd = open(entp->fts_path, O_RDONLY, 0)) == -1) {
                warn("%s", entp->fts_path);
                return (1);
 @@ -87,6 +97,22 @@ copy_file(FTSENT *entp, int dne)
        tolnk = ((Rflag && !(Lflag || Hflag)) || Pflag);
  
        /*
 +       * For sparse files, use the source file optimal file system block
 +       * size instead of the maximum psysical block size in an attempt to
 +       * maximize sparseness on the destination file, assuming that the
 +       * destination file system block size is the same (if it's not,
 +       * let's not worry, this will still be fine).
 +       */
 +      if (sflag) {
 +              bsize = (size_t)fs->st_blksize;
 +              /* Shouldn't happen, but let's be careful */
 +              if (bsize > MAXBSIZE)
 +                      bsize = MAXBSIZE;
 +      } else
 +              bsize = MAXBSIZE;
 +      wbsize = bsize / sizeof(unsigned long);
 +
 +      /*
         * If the file exists and we're interactive, verify with the user.
         * If the file DNE, set the mode to be the from file, minus setuid
         * bits, modified by the umask; arguably wrong, but it makes copying
 @@ -153,7 +179,7 @@ copy_file(FTSENT *entp, int dne)
                 * This is really a minor hack, but it wins some CPU back.
                 */
  
 -              if (fs->st_size <= 8 * 1048576) {
 +              if (!sflag && fs->st_size <= 8 * 1048576) {
                        size_t fsize = (size_t)fs->st_size;
                        p = mmap(NULL, fsize, PROT_READ, MAP_FILE|MAP_SHARED,
                            from_fd, (off_t)0);
 @@ -164,27 +190,78 @@ copy_file(FTSENT *entp, int dne)
                                     MADV_SEQUENTIAL);
                                if (write(to_fd, p, fsize) !=
                                    fs->st_size) {
 -                                      warn("%s", to.p_path);
 +                                      warn("write: %s", to.p_path);
                                        rval = 1;
                                }
                                if (munmap(p, fsize) < 0) {
 -                                      warn("%s", entp->fts_path);
 +                                      warn("munmap: %s", entp->fts_path);
                                        rval = 1;
                                }
                        }
                } else {
 +                      int empty;
 +
  mmap_failed:
 -                      while ((rcount = read(from_fd, buf, MAXBSIZE)) > 0) {
 -                              wcount = write(to_fd, buf, (size_t)rcount);
 -                              if (rcount != wcount || wcount == -1) {
 -                                      warn("%s", to.p_path);
 -                                      rval = 1;
 -                                      break;
 +                      empty = 0;
 +                      while ((rcount = read(from_fd, buf, bsize)) > 0) {
 +
 +                              /*
 +                               * If sflag is set, verify if block only
 +                               * consists of 0x00.
 +                               */
 +                              if (sflag) {
 +                                      unsigned long *s, *e;
 +
 +                                      for (empty = 1,
 +                                           s = (unsigned long *)buf,
 +                                           e = &s[wbsize];
 +                                           s < e; s++) {
 +                                              if (*s != 0) {
 +                                                      empty = 0;
 +                                                      break;
 +                                              }
 +                                      }
 +                              } else
 +                                      empty = 0;
 +
 +                              /*
 +                               * Seek instead of writing in case of a zero
 +                               * block if sflag is set.
 +                               */
 +                              if (empty) {
 +                                      if (lseek(to_fd, (size_t)rcount,
 +                                          SEEK_CUR) == (off_t)-1) {
 +                                              warn("lseek: %s", to.p_path);
 +                                              rval = 1;
 +                                              break;
 +                                      }
 +                              } else {
 +                                      wcount = write(to_fd, buf,
 +                                          (size_t)rcount);
 +                                      if (rcount != wcount || wcount == -1) {
 +                                              warn("write: %s", to.p_path);
 +                                              rval = 1;
 +                                              break;
 +                                      }
                                }
                        }
 +
                        if (rcount < 0) {
 -                              warn("%s", entp->fts_path);
 +                              warn("read: %s", entp->fts_path);
                                rval = 1;
 +                      } else if (sflag && empty) {
 +                              /*
 +                               * We seeked for the last block, make sure
 +                               * that we write a single byte to set proper
 +                               * file size.
 +                               */
 +                              if (lseek(to_fd, (off_t)-1, SEEK_CUR) == -1) {
 +                                      warn("lseek: %s", to.p_path);
 +                                      rval = 1;
 +                              } else if (write(to_fd, "\0", 1) != 1) {
 +                                      warn("write: %s", to.p_path);
 +                                      rval = 1;
 +                              }
                        }
                }
        }
 @@ -342,8 +419,9 @@ void
  usage(void)
  {
        (void)fprintf(stderr,
 -          "usage: %s [-R [-H | -L | -P]] [-f | -i] [-Npv] src target\n"
 -          "       %s [-R [-H | -L | -P]] [-f | -i] [-Npv] src1 ... srcN 
directory\n",
 +          "usage: %s [-R [-H | -L | -P]] [-f | -i] [-Npsv] src target\n"
 +          "       %s [-R [-H | -L | -P]] [-f | -i] [-Npsv] src1 ... srcN "
 +          "directory\n",
            getprogname(), getprogname());
        exit(1);
        /* NOTREACHED */
 


Home | Main Index | Thread Index | Old Index