Subject: Bug fixes and enhancements to cp(1)
To: None <current-users@sun-lamp.cs.berkeley.edu>
From: None <Mark_Weaver@brown.edu>
List: current-users
Date: 12/06/1993 01:40:08
I have made some fairly significant improvements to cp(1).  Here is
the patch.  For a description of the changes, look at the change to
the header comment, and the changes to the man page.

	Mark
--------------------------------------------------------------------
Email: Mark_Weaver@brown.edu           | Brown University
PGP Key: finger mhw@cs.brown.edu       | Dept of Computer Science



diff -c  bin/cp/cp.c.mhw1 bin/cp/cp.c
*** bin/cp/cp.c.mhw1	Wed Nov 10 05:46:26 1993
--- bin/cp/cp.c	Mon Dec  6 01:05:29 1993
***************
*** 50,62 ****
   * 
   * The global PATH_T structures "to" and "from" always contain paths to the
   * current source and target files, respectively.  Since cp does not change
!  * directories, these paths can be either absolute or dot-realative.
   * 
   * The basic algorithm is to initialize "to" and "from", and then call the
   * recursive copy() function to do the actual work.  If "from" is a file,
   * copy copies the data.  If "from" is a directory, copy creates the
   * corresponding "to" directory, and calls itself recursively on all of
   * the entries in the "from" directory.
   */
  
  #include <sys/param.h>
--- 50,70 ----
   * 
   * The global PATH_T structures "to" and "from" always contain paths to the
   * current source and target files, respectively.  Since cp does not change
!  * directories, these paths can be either absolute or dot-relative.
   * 
   * The basic algorithm is to initialize "to" and "from", and then call the
   * recursive copy() function to do the actual work.  If "from" is a file,
   * copy copies the data.  If "from" is a directory, copy creates the
   * corresponding "to" directory, and calls itself recursively on all of
   * the entries in the "from" directory.
+  *
+  * 12/6/93: Modifications made by Mark_Weaver@brown.edu
+  *     - Option added (-l) to handle hard links properly
+  *     - Bug fixed: when overwriting an existing file/directory with
+  *       anything other than a normal file or directory, it would
+  *       ignore the (-i) flag and unlink the file blindly.  If the
+  *       euid == 0, this could unlink directories improperly as well.
+  *     - Replaced many fprintf() calls with equivalent err() calls.
   */
  
  #include <sys/param.h>
***************
*** 72,77 ****
--- 80,98 ----
  #include <string.h>
  #include "extern.h"
  
+ #define HASH_TABLE_SIZE 511
+ 
+ typedef struct _cp_inode {
+ 	struct _cp_inode *next;
+ 	ino_t src_inode;
+ 	char dest_name[1];
+ } cp_inode;
+ 
+ static void hash_init __P((void));
+ static void hash_insert __P((ino_t, char *));
+ static cp_inode *hash_find __P((ino_t));
+ static int verify_unlink __P((void));
+ static int maybe_unlink __P((void));
  static void copy __P((void));
  static void copy_dir __P((void));
  static void copy_fifo __P((struct stat *, int));
***************
*** 86,95 ****
  
  uid_t myuid;
  int exit_val, myumask;
! int iflag, pflag, orflag, rflag;
  int (*statfcn)();
  char *progname;
  
  main(argc, argv)
  	int argc;
  	char **argv;
--- 107,156 ----
  
  uid_t myuid;
  int exit_val, myumask;
! int iflag, pflag, orflag, rflag, lflag;
  int (*statfcn)();
  char *progname;
  
+ cp_inode **hash_table;
+ 
+ static void
+ hash_init()
+ {
+ 	int i;
+ 	
+ 	hash_table = (cp_inode **)malloc(HASH_TABLE_SIZE * sizeof(cp_inode *));
+ 	for (i=0;i<HASH_TABLE_SIZE;i++)
+ 		hash_table[i] = NULL;
+ }
+ 
+ static void
+ hash_insert(src, dest)
+ 	ino_t src;
+ 	char *dest;
+ {
+ 	int index;
+ 	cp_inode *new;
+ 	
+ 	index = src % HASH_TABLE_SIZE;
+ 	new = (cp_inode *)malloc(sizeof(cp_inode) + strlen(dest));
+ 	new->next = hash_table[index];
+ 	new->src_inode = src;
+ 	strcpy(new->dest_name, dest);
+ 	hash_table[index] = new;
+ }
+ 
+ static cp_inode *
+ hash_find(src)
+ 	ino_t src;
+ {
+ 	cp_inode *p;
+ 	
+ 	for (p = hash_table[src % HASH_TABLE_SIZE] ; p ; p = p->next)
+ 		if (p->src_inode == src)
+ 			return p;
+ 	return NULL;
+ }
+ 
  main(argc, argv)
  	int argc;
  	char **argv;
***************
*** 107,113 ****
  	progname = (p = rindex(*argv,'/')) ? ++p : *argv;
  
  	symfollow = 0;
! 	while ((c = getopt(argc, argv, "Rfhipr")) != EOF) {
  	switch ((char)c) {
  		case 'f':
  			iflag = 0;
--- 168,174 ----
  	progname = (p = rindex(*argv,'/')) ? ++p : *argv;
  
  	symfollow = 0;
! 	while ((c = getopt(argc, argv, "Rfhlipr")) != EOF) {
  	switch ((char)c) {
  		case 'f':
  			iflag = 0;
***************
*** 115,120 ****
--- 176,184 ----
  		case 'h':
  			symfollow = 1;
  			break;
+ 		case 'l':
+ 			lflag = 1;
+ 			break;
  		case 'i':
  			iflag = isatty(fileno(stdin));
  			break;
***************
*** 140,149 ****
  		usage();
  
  	if (rflag && orflag) {
! 		(void)fprintf(stderr,
! 		    "cp: the -R and -r options are mutually exclusive.\n");
  		exit(1);
  	}
  
  	myuid = getuid();
  
--- 204,215 ----
  		usage();
  
  	if (rflag && orflag) {
! 		err("the -R and -r options are mutually exclusive.");
  		exit(1);
  	}
+ 	
+ 	if (lflag)
+ 		hash_init();
  
  	myuid = getuid();
  
***************
*** 208,219 ****
--- 274,314 ----
  	exit(exit_val);
  }
  
+ static int
+ verify_unlink()
+ {
+ 	int checkch, ch;
+ 	
+ 	if (iflag) {
+ 		(void)fprintf(stderr, "overwrite %s? ", to.p_path);
+ 		checkch = ch = getchar();
+ 		while (ch != '\n' && ch != EOF)
+ 			ch = getchar();
+ 		if (checkch != 'y' && checkch != 'Y')
+ 			return 0;
+ 	}
+ 	return 1;
+ }
+ 
+ static int
+ maybe_unlink()
+ {
+ 	if (!verify_unlink())
+ 		return 1;
+ 	if (unlink(to.p_path)) {
+ 		err("unlink: %s: %s", to.p_path, strerror(errno));
+ 		return 1;
+ 	}
+ 	return 0;
+ }
+ 
  /* copy file or directory at "from" to "to". */
  static void
  copy()
  {
  	struct stat from_stat, to_stat;
  	int dne, statval;
+ 	cp_inode *cpi;
  
  	statval = statfcn(from.p_path, &from_stat);
  	if (statval == -1) {
***************
*** 227,240 ****
  	else {
  		if (to_stat.st_dev == from_stat.st_dev &&
  		    to_stat.st_ino == from_stat.st_ino) {
! 			(void)fprintf(stderr,
! 			    "%s: %s and %s are identical (not copied).\n",
! 			    progname, to.p_path, from.p_path);
  			exit_val = 1;
  			return;
  		}
  		dne = 0;
  	}
  
  	switch(from_stat.st_mode & S_IFMT) {
  	case S_IFLNK:
--- 322,355 ----
  	else {
  		if (to_stat.st_dev == from_stat.st_dev &&
  		    to_stat.st_ino == from_stat.st_ino) {
! 			err("%s and %s are identical (not copied).",
! 				to.p_path, from.p_path);
! 			exit_val = 1;
! 			return;
! 		}
! 		if ((to_stat.st_mode & S_IFMT) == S_IFDIR &&
! 			(from_stat.st_mode & S_IFMT) != S_IFDIR) {
! 			err("directory %s already exists (not removed).",
! 				to.p_path);
  			exit_val = 1;
  			return;
  		}
  		dne = 0;
  	}
+ 	
+ 	if (lflag) {
+ 		if (cpi = hash_find(from_stat.st_ino)) {
+ 			if (!dne && maybe_unlink())
+ 				return;
+ 			if (link(cpi->dest_name, to.p_path) == -1) {
+ 				err("link: %s: %s", to.p_path, strerror(errno));
+ 				return;
+ 			}
+ 			return;
+ 		}
+ 		else if(from_stat.st_nlink > 1)
+ 			hash_insert(from_stat.st_ino, to.p_path);
+ 	}
  
  	switch(from_stat.st_mode & S_IFMT) {
  	case S_IFLNK:
***************
*** 242,250 ****
  		return;
  	case S_IFDIR:
  		if (!rflag && !orflag) {
! 			(void)fprintf(stderr,
! 			    "%s: %s is a directory (not copied).\n",
! 			    progname, from.p_path);
  			exit_val = 1;
  			return;
  		}
--- 357,363 ----
  		return;
  	case S_IFDIR:
  		if (!rflag && !orflag) {
! 			err("%s is a directory (not copied).", from.p_path);
  			exit_val = 1;
  			return;
  		}
***************
*** 263,270 ****
  			}
  		}
  		else if (!S_ISDIR(to_stat.st_mode)) {
! 			(void)fprintf(stderr, "%s: %s: not a directory.\n",
! 			    progname, to.p_path);
  			return;
  		}
  		copy_dir();
--- 376,382 ----
  			}
  		}
  		else if (!S_ISDIR(to_stat.st_mode)) {
! 			err("%s: not a directory.", to.p_path);
  			return;
  		}
  		copy_dir();
***************
*** 319,335 ****
  	 * modified by the umask.)
  	 */
  	if (!dne) {
! 		if (iflag) {
! 			int checkch, ch;
! 
! 			(void)fprintf(stderr, "overwrite %s? ", to.p_path);
! 			checkch = ch = getchar();
! 			while (ch != '\n' && ch != EOF)
! 				ch = getchar();
! 			if (checkch != 'y' && checkch != 'Y') {
! 				(void)close(from_fd);
! 				return;
! 			}
  		}
  		to_fd = open(to.p_path, O_WRONLY|O_TRUNC, 0);
  	} else
--- 431,439 ----
  	 * modified by the umask.)
  	 */
  	if (!dne) {
! 		if (!verify_unlink()) {
! 			(void)close(from_fd);
! 			return;
  		}
  		to_fd = open(to.p_path, O_WRONLY|O_TRUNC, 0);
  	} else
***************
*** 394,401 ****
  
  	dir_cnt = scandir(from.p_path, &dir_list, NULL, NULL);
  	if (dir_cnt == -1) {
! 		(void)fprintf(stderr, "%s: can't read directory %s.\n",
! 		    progname, from.p_path);
  		exit_val = 1;
  	}
  
--- 498,504 ----
  
  	dir_cnt = scandir(from.p_path, &dir_list, NULL, NULL);
  	if (dir_cnt == -1) {
! 		err("can't read directory %s.", from.p_path);
  		exit_val = 1;
  	}
  
***************
*** 473,482 ****
  		return;
  	}
  	link[len] = '\0';
! 	if (exists && unlink(to.p_path)) {
! 		err("unlink: %s: %s", to.p_path, strerror(errno));
  		return;
- 	}
  	if (symlink(link, to.p_path)) {
  		err("symlink: %s: %s", link, strerror(errno));
  		return;
--- 576,583 ----
  		return;
  	}
  	link[len] = '\0';
! 	if (exists && maybe_unlink())
  		return;
  	if (symlink(link, to.p_path)) {
  		err("symlink: %s: %s", link, strerror(errno));
  		return;
***************
*** 488,497 ****
  	struct stat *from_stat;
  	int exists;
  {
! 	if (exists && unlink(to.p_path)) {
! 		err("unlink: %s: %s", to.p_path, strerror(errno));
  		return;
- 	}
  	if (mkfifo(to.p_path, from_stat->st_mode)) {
  		err("mkfifo: %s: %s", to.p_path, strerror(errno));
  		return;
--- 589,596 ----
  	struct stat *from_stat;
  	int exists;
  {
! 	if (exists && maybe_unlink())
  		return;
  	if (mkfifo(to.p_path, from_stat->st_mode)) {
  		err("mkfifo: %s: %s", to.p_path, strerror(errno));
  		return;
***************
*** 505,514 ****
  	struct stat *from_stat;
  	int exists;
  {
! 	if (exists && unlink(to.p_path)) {
! 		err("unlink: %s: %s", to.p_path, strerror(errno));
  		return;
- 	}
  	if (mknod(to.p_path, from_stat->st_mode,  from_stat->st_rdev)) {
  		err("mknod: %s: %s", to.p_path, strerror(errno));
  		return;
--- 604,611 ----
  	struct stat *from_stat;
  	int exists;
  {
! 	if (exists && maybe_unlink())
  		return;
  	if (mknod(to.p_path, from_stat->st_mode,  from_stat->st_rdev)) {
  		err("mknod: %s: %s", to.p_path, strerror(errno));
  		return;
diff -c  bin/cp/cp.1.mhw1 bin/cp/cp.1
*** bin/cp/cp.1.mhw1	Sun Nov  7 02:12:19 1993
--- bin/cp/cp.1	Mon Dec  6 01:38:55 1993
***************
*** 43,52 ****
  .Nd copy files
  .Sh SYNOPSIS
  .Nm cp
! .Op Fl Rfhip
  .Ar source_file target_file
  .Nm cp
! .Op Fl Rfhip
  .Ar source_file ... target_directory
  .Sh DESCRIPTION
  In the first synopsis form, the
--- 43,52 ----
  .Nd copy files
  .Sh SYNOPSIS
  .Nm cp
! .Op Fl Rfhlip
  .Ar source_file target_file
  .Nm cp
! .Op Fl Rfhlip
  .Ar source_file ... target_directory
  .Sh DESCRIPTION
  In the first synopsis form, the
***************
*** 95,100 ****
--- 95,106 ----
  Provided for the
  .Fl R
  option which does not follow symbolic links by default.
+ .It Fl l
+ Causes
+ .Nm cp
+ to attempt to handle hard links correctly.  A hash table is maintained
+ of all inodes copied, and if a duplicate is found, a hard link is
+ created.
  .It Fl i
  Causes
  .Nm cp
***************
*** 153,159 ****
  .Sh SEE ALSO
  .Xr mv 1 ,
  .Xr rcp 1 ,
! .Xr umask 2
  .Sh HISTORY
  The
  .Nm cp
--- 159,183 ----
  .Sh SEE ALSO
  .Xr mv 1 ,
  .Xr rcp 1 ,
! .Xr ln 1 ,
! .Xr umask 2 ,
! .Xr link 2
! .Sh BUGS
! When the
! .Fl l
! option is used, and a source file is not copied for any reason
! (an existing file couldn't be overwritten), its inode is
! still recorded in the hash table.  If another file is copied later
! with a matching inode, a hard link will be made to the original
! existing file.
! .Pp
! Also, when the
! .Fl l
! option is used, and directories are copied onto more than one
! destination volume,
! .Nm cp
! may attempt to create some cross-device links, which will obviously
! fail.
  .Sh HISTORY
  The
  .Nm cp

------------------------------------------------------------------------------