Subject: bin/6790: find and xargs don't get along
To: None <gnats-bugs@gnats.netbsd.org>
From: Eric Fischer <eric@fudge.uchicago.edu>
List: netbsd-bugs
Date: 01/11/1999 11:54:30
>Number:         6790
>Category:       bin
>Synopsis:       find and xargs don't get along
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    bin-bug-people (Utility Bug People)
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Mon Jan 11 10:05:00 1999
>Last-Modified:
>Originator:     Eric Fischer
>Organization:
The University of Chicago
>Release:        1.2
>Environment:
	
System: NetBSD fudge 1.2 NetBSD 1.2 (FUDGE) #7: Sun Jan 19 16:26:56 CST 1997 eric@fudge:/usr/people/eric/usrsrc/sys/arch/sparc/compile/FUDGE sparc


>Description:

Find and xargs don't get along, and the -X option to find is no help
if you really need to do something to a hierarchy of files that have
spaces or quotes in their names.

Clearly, find should have the ability to produce output in a way that
xargs can use, and xargs should be able to deal reasonably with the
output of find.

>How-To-Repeat:

touch 'foo bar'
find . -print | xargs echo

>Fix:

The path below adds a -printx primary to find, which uses the quoting
conventions xargs wants to see.  It also adds a -0 option to xargs (like
in GNU xargs), which makes xargs able to deal with the output from the
-print0 primary which find already had.

diff -rc ../../../src/usr.bin/find/extern.h find/extern.h
*** ../../../src/usr.bin/find/extern.h	Thu Dec 30 15:15:18 1993
--- find/extern.h	Mon Jan 11 10:36:45 1999
***************
*** 67,72 ****
--- 67,73 ----
  PLAN	*c_perm __P((char *));
  PLAN	*c_print __P((void));
  PLAN	*c_print0 __P((void));
+ PLAN	*c_printx __P((void));
  PLAN	*c_prune __P((void));
  PLAN	*c_size __P((char *));
  PLAN	*c_type __P((char *));
Only in find: extern.h.orig
Only in find: find
diff -rc ../../../src/usr.bin/find/find.1 find/find.1
*** ../../../src/usr.bin/find/find.1	Tue May 21 08:43:50 1996
--- find/find.1	Mon Jan 11 10:36:46 1999
***************
*** 113,118 ****
--- 113,125 ----
  is skipped.
  The delimiting characters include single (`` ' '') and double (`` " '')
  quotes, backslash (``\e''), space, tab and newline characters.
+ Alternately, the
+ .Ic -printx
+ or
+ .Ic -print0
+ primary can be used to format the output in a way
+ .Nm xargs
+ can accept.
  .It Fl x
  The
  .Fl x
***************
*** 265,272 ****
  .Ic -exec ,
  .Ic -ls ,
  .Ic -ok ,
  nor
! .Ic -print0
  is specified, the given expression shall be effectively replaced by
  .Cm \&( Ns Ar given\& expression Ns Cm \&) 
  .Ic -print .
--- 272,280 ----
  .Ic -exec ,
  .Ic -ls ,
  .Ic -ok ,
+ .Ic -print0 ,
  nor
! .Ic -printx
  is specified, the given expression shall be effectively replaced by
  .Cm \&( Ns Ar given\& expression Ns Cm \&) 
  .Ic -print .
***************
*** 274,279 ****
--- 282,295 ----
  This primary always evaluates to true.
  It prints the pathname of the current file to standard output, followed
  by a null character.
+ .It Ic -printx
+ This primary always evaluates to true.
+ It prints the pathname of the current file to standard output,
+ with each space, tab, newline, backslash, and single or double
+ quotation mark prefixed by a backslash, so the output of
+ .Nm find
+ can safely be used as input to
+ .Nm xargs .
  .It Ic -prune
  This primary always evaluates to true.
  It causes
***************
*** 457,459 ****
--- 473,481 ----
  option and the
  .Xr getopt 3
  ``--'' construct.
+ .Sh HISTORY
+ A much simpler
+ .Nm find
+ command appeared in First Edition AT&T Unix.
+ The syntax had become similar to the present version by
+ the time of the Fifth Edition.
Only in find: find.1.orig
Only in find: find.cat1
diff -rc ../../../src/usr.bin/find/find.h find/find.h
*** ../../../src/usr.bin/find/find.h	Thu Dec 30 15:15:22 1993
--- find/find.h	Mon Jan 11 10:36:46 1999
***************
*** 43,49 ****
  	N_ATIME, N_CLOSEPAREN, N_CTIME, N_DEPTH, N_EXEC, N_EXPR, N_FOLLOW,
  	N_FSTYPE, N_GROUP, N_INUM, N_LINKS, N_LS, N_MTIME, N_NAME, N_NEWER,
  	N_NOGROUP, N_NOT, N_NOUSER, N_OK, N_OPENPAREN, N_OR, N_PATH,
! 	N_PERM, N_PRINT, N_PRINT0, N_PRUNE, N_SIZE, N_TYPE, N_USER, N_XDEV,
  };
  
  /* node definition */
--- 43,50 ----
  	N_ATIME, N_CLOSEPAREN, N_CTIME, N_DEPTH, N_EXEC, N_EXPR, N_FOLLOW,
  	N_FSTYPE, N_GROUP, N_INUM, N_LINKS, N_LS, N_MTIME, N_NAME, N_NEWER,
  	N_NOGROUP, N_NOT, N_NOUSER, N_OK, N_OPENPAREN, N_OR, N_PATH,
! 	N_PERM, N_PRINT, N_PRINT0, N_PRINTX, N_PRUNE, N_SIZE, N_TYPE,
! 	N_USER, N_XDEV,
  };
  
  /* node definition */
Only in find: find.h.orig
Only in find: find.o
diff -rc ../../../src/usr.bin/find/function.c find/function.c
*** ../../../src/usr.bin/find/function.c	Tue May 21 08:43:52 1996
--- find/function.c	Mon Jan 11 10:44:27 1999
***************
*** 771,776 ****
--- 771,796 ----
  	(void)fputc('\0', stdout);
  	return(1);
  }
+ 
+ f_printx(plan, entry)
+ 	PLAN *plan;
+ 	FTSENT *entry;
+ {
+ 	/* Eric Fischer <eric@rainbow.uchicago.edu>, January, 1999 */
+ 
+ 	char *cp;
+ 
+ 	for (cp = entry->fts_path; *cp; cp++) {
+ 		if (*cp == '\'' || *cp == '\"' || *cp == ' ' ||
+ 		    *cp == '\t' || *cp == '\n' || *cp == '\\')
+ 			fputc('\\', stdout);
+ 
+ 		fputc(*cp, stdout);
+ 	}
+ 
+ 	fputc('\n', stdout);
+ 	return 1;
+ }
   
  PLAN *
  c_print()
***************
*** 786,791 ****
--- 806,819 ----
  	isoutput = 1;
  
  	return(palloc(N_PRINT0, f_print0));
+ }
+ 
+ PLAN *
+ c_printx()
+ {
+ 	isoutput = 1;
+ 
+ 	return palloc(N_PRINTX, f_printx);
  }
   
  /*
Only in find: function.c.orig
Only in find: function.o
Only in find: ls.o
Only in find: main.o
Only in find: misc.o
Only in find: operator.o
diff -rc ../../../src/usr.bin/find/option.c find/option.c
*** ../../../src/usr.bin/find/option.c	Thu Dec 30 15:15:32 1993
--- find/option.c	Mon Jan 11 10:36:47 1999
***************
*** 79,84 ****
--- 79,85 ----
  	{ "-perm",	N_PERM,		c_perm,		O_ARGV },
  	{ "-print",	N_PRINT,	c_print,	O_ZERO },
  	{ "-print0",	N_PRINT0,	c_print0,	O_ZERO },
+ 	{ "-printx",	N_PRINTX,	c_printx,	O_ZERO },
  	{ "-prune",	N_PRUNE,	c_prune,	O_ZERO },
  	{ "-size",	N_SIZE,		c_size,		O_ARGV },
  	{ "-type",	N_TYPE,		c_type,		O_ARGV },
Only in find: option.c.orig
Only in find: option.o
Only in xargs: xargs
diff -rc ../../../src/usr.bin/xargs/xargs.1 xargs/xargs.1
*** ../../../src/usr.bin/xargs/xargs.1	Mon Nov 14 00:51:40 1994
--- xargs/xargs.1	Mon Jan 11 11:40:26 1999
***************
*** 46,51 ****
--- 46,52 ----
  .Sh SYNOPSIS
  .Nm xargs
  .Op Fl t
+ .Op Fl 0
  .Oo Op Fl x
  .Fl n Ar number
  .Oc
***************
*** 121,126 ****
--- 122,131 ----
  to terminate immediately if a command line containing
  .Ar number
  arguments will not fit in the specified (or default) command line length.
+ .It Fl 0
+ Disregard the quoting conventions above;
+ every character (including spaces and newlines) is treated as part
+ of the argument, except the null character, which terminates the argument.
  .El
  .Pp
  If no
***************
*** 181,184 ****
  .St -p1003.2
  compliant.
  .Sh HISTORY
! The meaning of 123, 124, and 125 exit values were taken from GNU xargs.
--- 186,196 ----
  .St -p1003.2
  compliant.
  .Sh HISTORY
! The
! .Nm xargs
! command appeared in PWB Unix 1.0.
! It made its first BSD appearance in the 4.3 Reno release.
! .Pp
! The meaning of 123, 124, and 125 exit values and the
! .Fl 0
! option were taken from GNU xargs.
Only in xargs: xargs.1.orig
diff -rc ../../../src/usr.bin/xargs/xargs.c xargs/xargs.c
*** ../../../src/usr.bin/xargs/xargs.c	Mon Nov 14 00:51:41 1994
--- xargs/xargs.c	Mon Jan 11 11:44:20 1999
***************
*** 61,67 ****
  #include <err.h>
  #include "pathnames.h"
  
! int tflag, rval;
  
  void run __P((char **));
  void usage __P((void));
--- 61,69 ----
  #include <err.h>
  #include "pathnames.h"
  
! #define DASH_ZERO ((UCHAR_MAX + 1) * 2)  /* offset for the -0 flag */
! 
! int tflag, rval, zeroflag;
  
  void run __P((char **));
  void usage __P((void));
***************
*** 93,100 ****
  	 */
  	nargs = 5000;
  	nline = ARG_MAX - 4 * 1024;
! 	nflag = xflag = 0;
! 	while ((ch = getopt(argc, argv, "n:s:tx")) != EOF)
  		switch(ch) {
  		case 'n':
  			nflag = 1;
--- 95,102 ----
  	 */
  	nargs = 5000;
  	nline = ARG_MAX - 4 * 1024;
! 	nflag = xflag = zeroflag = 0;
! 	while ((ch = getopt(argc, argv, "n:s:tx0")) != EOF)
  		switch(ch) {
  		case 'n':
  			nflag = 1;
***************
*** 110,115 ****
--- 112,120 ----
  		case 'x':
  			xflag = 1;
  			break;
+ 		case '0':	/* Eric Fischer, January, 1999 */
+ 			zeroflag = DASH_ZERO;
+ 			break;
  		case '?':
  		default:
  			usage();
***************
*** 167,174 ****
  	ebp = (argp = p = bbp) + nline - 1;
  
  	for (insingle = indouble = 0;;)
! 		switch(ch = getchar()) {
  		case EOF:
  			/* No arguments since last exec. */
  			if (p == bbp)
  				exit(rval);
--- 172,180 ----
  	ebp = (argp = p = bbp) + nline - 1;
  
  	for (insingle = indouble = 0;;)
! 		switch ((ch = getchar()) + zeroflag) {
  		case EOF:
+ 		case EOF + DASH_ZERO:
  			/* No arguments since last exec. */
  			if (p == bbp)
  				exit(rval);
***************
*** 187,192 ****
--- 193,199 ----
  				goto addch;
  			goto arg2;
  		case '\n':
+ 		case '\0' + DASH_ZERO:
  			/* Empty lines are skipped. */
  			if (argp == p)
  				continue;
Only in xargs: xargs.c.orig
Only in xargs: xargs.cat1
Only in xargs: xargs.o
>Audit-Trail:
>Unformatted: