Subject: bin/1996: Patch to add recursive options to grep(1)
To: None <gnats-bugs@NetBSD.ORG>
From: None <thieleke@icaen.uiowa.edu>
List: netbsd-bugs
Date: 01/30/1996 04:08:38
>Number:         1996
>Category:       bin
>Synopsis:       Patch to add recursive options to grep(1)
>Confidential:   yes
>Severity:       serious
>Priority:       medium
>Responsible:    bin-bug-people (Utility Bug People)
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue Jan 30 05:20:01 1996
>Last-Modified:
>Originator:     Jeff Thieleke
>Organization:
noun: The state or manner of being organized.
>Release:        <NetBSD-current source date>01/30/96
>Environment:
System: NetBSD picabo.icaen.uiowa.edu 1.1A NetBSD 1.1A (PICABO) #5: Tue Jan 16 08:52:54 CST 1996 root@picabo.icaen.uiowa.edu:/usr/src/sys/arch/amiga/compile/PICABO amiga
Architecture: amiga

>Description:
This patch file adds recursive directory searching features to the standard
grep, using the FTS functions.  It was written by a poster to the FreeBSD 
hackers mailing list - I have changed it to patch cleanly to our grep, 
fixed a man page typo, and added the -r command line option.  

There was a great deal of debate about this patch on the current-users 
mailing list, with most of the oppositon of the opinion that 'find | xargs' 
does the same job, without adding additional code bloat. 

In addition to the recursive searching options, this patch adds the -a 
command line option, to prevent searching binary files.  Reportedly, 
grep'ing large binaries would would grep to crash. 


>How-To-Repeat:
not applicable
>Fix:
	

diff -rc grep/Makefile grep_r/Makefile
*** grep/Makefile	Thu Jan 11 06:11:21 1996
--- grep_r/Makefile	Mon Jan 22 15:49:32 1996
***************
*** 4,10 ****
  SRCS=	dfa.c grep.c getopt.c kwset.c obstack.c regex.c search.c
  CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \
  	-DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 \
! 	-DHAVE_VALLOC=1
  
  LDADD+=	-lgnumalloc
  DPADD+=	/usr/lib/libgnumalloc.a
--- 4,10 ----
  SRCS=	dfa.c grep.c getopt.c kwset.c obstack.c regex.c search.c
  CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \
  	-DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 \
! 	-DHAVE_VALLOC=1 -DHAVE_MMAP=1 -DHAVE_FTS=1
  
  LDADD+=	-lgnumalloc
  DPADD+=	/usr/lib/libgnumalloc.a
diff -rc grep/grep.1 grep_r/grep.1
*** grep/grep.1	Thu Jan 11 06:11:21 1996
--- grep_r/grep.1	Mon Jan 22 16:24:33 1996
***************
*** 4,25 ****
  grep, egrep, fgrep \- print lines matching a pattern
  .SH SYNOPOSIS
  .B grep
! [
! .BR \- [[ AB "] ]\c"
! .I "num"
! ]
! [
! .BR \- [ CEFGVBchilnsvwx ]
! ]
! [
! .B \-e
! ]
! .I pattern
! |
! .BI \-f file
! ] [
! .I files...
! ]
  .SH DESCRIPTION
  .PP
  .B Grep
--- 4,16 ----
  grep, egrep, fgrep \- print lines matching a pattern
  .SH SYNOPOSIS
  .B grep
! [\-[AB] num]
! [\-HPRrS]
! [\-CEFGLVabchilnqsvwx]
! [\-e expr]
! [\-f file]
! files...
! 
  .SH DESCRIPTION
  .PP
  .B Grep
***************
*** 103,108 ****
--- 94,102 ----
  to standard error.  This version number should
  be included in all bug reports (see below).
  .TP
+ .B \-a
+ Don't search in binary files.
+ .TP
  .B \-b
  Print the byte offset within the input file before
  each line of output.
***************
*** 169,174 ****
--- 163,204 ----
  .TP
  .B \-x
  Select only those matches that exactly match the whole line.
+ 
+ .PP
+ Following options only avaible if compiled with FTS library:
+ .PD 0
+ .TP
+ .BI \-H
+ If the 
+ .I \-R
+ option is specified, symbolic links on the command line
+ are followed.  (Symbolic links encountered in the tree traversal
+ are not followed.)
+ .TP
+ 
+ .BI \-P
+ If the 
+ .I \-R
+ option is specified, no symbolic links are followed.
+ .TP
+ 
+ .BI \-R
+ Search in the file hierarchies
+ rooted in the files instead of just the files themselves.
+ .TP
+ 
+ .BI \-r
+ Equivalent to
+ .BR \-RS .
+ .TP
+ 
+ .BI \-S
+ If the
+ .I \-R
+ option is specified, all symbolic links are followed.
+ .TP
+ 
+ 
  .PD
  .SH "REGULAR EXPRESSIONS"
  .PP
diff -rc grep/grep.c grep_r/grep.c
*** grep/grep.c	Thu Jan 11 06:11:21 1996
--- grep_r/grep.c	Mon Jan 22 16:49:05 1996
***************
*** 57,62 ****
--- 57,63 ----
  #include <unistd.h>
  #else
  #define O_RDONLY 0
+ #define STDIN_FILENO 0
  extern int open(), read(), close();
  #endif
  
***************
*** 94,99 ****
--- 95,110 ----
    return 0;
  }
  #endif
+ 
+ /* traverse a file hierarchy library */
+ #ifdef HAVE_FTS
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fts.h>
+ #endif
+ 
+ /* don't search in binary files */
+ int aflag;
      
  /* Define flags declared in grep.h. */
  char *matcher;
***************
*** 500,505 ****
--- 511,550 ----
    return nlines;
  }
  
+ 
+ /*
+  * try to guess if fd belong to a binary file 
+  */
+ 
+ int isBinaryFile(fd)
+      int fd;
+ {
+ #define BINARY_BUF_LEN 32
+   static unsigned char buf[BINARY_BUF_LEN];
+   int i, n;
+ 
+   /* pipe, socket, fifo */
+   if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1)
+     return(0);
+ 
+  if ((n =(int) read(fd, buf, (size_t)BINARY_BUF_LEN)) == -1)
+    return(0);
+ 
+   /* look for non-printable chars */
+   for(i = 0; i < n; i++)
+     if (!isprint(buf[i]) && !isspace(buf[i]))
+       return(1);
+ 
+   /* reset fd to begin of file */
+   if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1)
+     return(0);
+ 
+   
+   return(0);
+ }
+ 
+ 
+ 
  /* Search a given file.  Return a count of lines printed. */
  static int
  grep(fd)
***************
*** 509,514 ****
--- 554,563 ----
    size_t residue, save;
    char *beg, *lim;
  
+   /* skip binary files */
+   if (aflag && isBinaryFile(fd))
+     return(0);
+ 
    reset(fd);
  
    totalcc = 0;
***************
*** 569,576 ****
  
  static char version[] = "GNU grep version 2.0";
  
  #define USAGE \
!   "usage: %s [-[[AB] ]<num>] [-[CEFGVchilnqsvwx]] [-[ef]] <expr> [<files...>]\n"
  
  static void
  usage()
--- 618,632 ----
  
  static char version[] = "GNU grep version 2.0";
  
+ #ifdef HAVE_FTS
+ #define USAGE \
+ "usage: %s [-[AB] <num>] [-HPRrS] [-CEFGLVabchilnqsvwx]\n\
+             [-e <expr>] [-f file] [files ...]\n"
+ #else
  #define USAGE \
! "usage: %s [-[AB] <num>] [-CEFGLVabchilnqsvwx]\n\
!             [-e <expr>] [-f file] [files ...]\n"
! #endif
  
  static void
  usage()
***************
*** 609,614 ****
--- 665,676 ----
    FILE *fp;
    extern char *optarg;
    extern int optind;
+ #ifdef HAVE_FTS
+   int Rflag, Hflag, Pflag, Lflag;
+   FTS *ftsp;
+   FTSENT *ftsent;
+   int fts_options;
+ #endif
  
    prog = argv[0];
    if (prog && strrchr(prog, '/'))
***************
*** 622,630 ****
    list_files = 0;
    suppress_errors = 0;
    matcher = NULL;
  
!   while ((opt = getopt(argc, argv, "0123456789A:B:CEFGVX:bce:f:hiLlnoqsvwxy"))
! 	 != EOF)
      switch (opt)
        {
        case '0':
--- 684,704 ----
    list_files = 0;
    suppress_errors = 0;
    matcher = NULL;
+   aflag = 0;
+ #ifdef HAVE_FTS
+   Rflag = Hflag = Pflag = Lflag = 0;
+ #endif
+ 
+   while ((opt = getopt(argc, argv,
+ 
+   #ifndef HAVE_FTS
+   "0123456789A:B:CEFGVX:abce:f:hiLlnqsvwxy"
+   #else
+   "0123456789A:B:CEFGHLPRSVX:abce:f:hiLlnqrsvwxy?"
+   #endif
  
!   )) != EOF)
!  
      switch (opt)
        {
        case '0':
***************
*** 676,681 ****
--- 750,788 ----
  	  fatal("matcher already specified", 0);
  	matcher = optarg;
  	break;
+ 
+ #ifdef HAVE_FTS
+ 	/* symbolic links on the command line are followed */
+       case 'H': 
+ 	Hflag = 1;
+ 	Lflag = Pflag = 0;
+ 	break;
+ 
+ 	/* no symbolic links are followed */
+       case 'P':
+ 	Pflag = 1;
+ 	Hflag = Lflag = 0;
+ 	break;
+ 
+ 	/* traverse file hierarchies */
+       case 'R':
+ 	Rflag = 1;
+ 	break;
+ 
+ 	/* all symbolic links are followed */
+       case 'S':
+ 	Lflag = 1;
+ 	Hflag = Pflag = 0;
+ 	break;
+ 
+         /* equivalent to -RS */
+       case 'r':
+         Lflag = Rflag = 1;
+         Hflag = Pflag = 0;
+ #endif
+       case 'a':
+ 	aflag = 1;
+ 	break;
        case 'b':
  	out_byte = 1;
  	break;
***************
*** 776,787 ****
--- 883,1008 ----
  
    (*compile)(keys, keycc);
  
+ #ifndef HAVE_FTS
    if (argc - optind > 1 && !no_filenames)
+ #else
+   if ((argc - optind > 1 || Rflag) && !no_filenames)
+ #endif
      out_file = 1;
  
    status = 1;
  
+ #if HAVE_FTS
+   if (Rflag) {
+     fts_options = FTS_PHYSICAL | FTS_NOCHDIR;
+ 
+     if (Hflag)
+       fts_options |= FTS_COMFOLLOW;
+ 
+     if (Lflag) {
+       fts_options |= FTS_LOGICAL;
+       fts_options &= ~FTS_PHYSICAL;
+     }
+ 
+     if (Pflag) {
+       fts_options &= ~FTS_LOGICAL & ~FTS_COMFOLLOW;
+       fts_options |= FTS_PHYSICAL;
+     }      
+   }
+ 
+   if (Rflag && optind < argc) {
+     int i;
+ 
+     /* replace "-" with "/dev/stdin" */
+     for (i = optind; i < argc; i++)
+       if (strcmp(argv[i], "-") == 0)
+ 	*(argv + i) = "/dev/stdin";
+ 
+     if ((ftsp = fts_open(argv + optind, fts_options,  
+ 			 (int(*)())NULL)) == NULL) {
+       if (!suppress_errors)
+ 	error("", errno);
+     } else {
+ 
+       while((ftsent = fts_read(ftsp)) != NULL) {
+ 	filename = ftsent->fts_accpath;
+ 
+ 	switch(ftsent->fts_info) {
+ 
+ 	  /* regular file */
+ 	case FTS_F:
+ 	  break;
+ 
+ 	  /* directory */
+ 	case FTS_D:
+ 	case FTS_DC:
+ 	case FTS_DP:
+ 	  continue; break;
+ 
+ 	  /* errors */
+ 	case FTS_DNR:
+ 	  error(filename, errno);
+ 	  continue; break;
+ 
+ 	case FTS_ERR:
+ 	case FTS_NS:
+ 	  error(filename, ftsent->fts_errno);
+ 	  continue; break;
+ 
+ 	  /* dead symlink */
+ 	case FTS_SLNONE:
+ 	  continue; break;
+ 
+ 	  /* symlink, don't skip */
+ 	case FTS_SL:
+ 	  break;
+ 
+ 	default:
+ 	  /* 
+ 	  if (!suppress_errors)
+ 	    fprintf(stderr, "%s: ignored\n", filename);
+ 	  continue; break;
+ 	  */
+ 
+ 	}
+ 
+ 	if ((desc = open(filename, O_RDONLY)) == -1) {
+ 	  error(filename, errno);
+ 	  continue;
+ 	}
+ 
+ 	count = grep(desc);
+ 	if (count_matches)
+ 	  {
+ 	    if (out_file)
+ 	      printf("%s:", filename);
+ 	    printf("%d\n", count);
+ 	  }
+ 	if (count)
+ 	  {
+ 	    status = 0;
+ 	    if (list_files == 1)
+ 	      printf("%s\n", filename);
+ 	  }
+ 	else if (list_files == -1)
+ 	  printf("%s\n", filename);
+ 
+ 	if (desc != STDIN_FILENO)
+ 	  close(desc);
+       }
+ 
+       if (fts_close(ftsp) == -1)
+ 	error("fts_close", errno);
+     }
+ 
+   /* ! Rflag */
+   } else
+ 
+ #endif /* HAVE_FTS */
+ 
+   /* search in file names from arguments, not from stdin */
    if (optind < argc)
+ 
      while (optind < argc)
        {
  	desc = strcmp(argv[optind], "-") ? open(argv[optind], O_RDONLY) : 0;
***************
*** 813,818 ****
--- 1034,1041 ----
  	  close(desc);
  	++optind;
        }
+ 
+   /* read input from stdin */
    else
      {
        filename = "(standard input)";
>Audit-Trail:
>Unformatted:
no
Patch that adds recursive file traveral features to grep(1)
non-critical
low
change-request