Subject: Re: find(1) with extended regexps
To: Luke Mewburn <lukem@NetBSD.org>
From: Daniel de Kok <danieldk@pobox.com>
List: tech-userlevel
Date: 07/18/2007 13:16:23
--4Ckj6UjgE2iN1+kY
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Wed, Jul 18, 2007 at 12:17:15PM +1000, Luke Mewburn wrote:
> On Tue, Jul 17, 2007 at 03:07:09PM +0200, Daniel de Kok wrote:
>   | Any objections to committing the attached patch for adding the '-E'
>   | option to find(1) to interpret regexps as extended regexps like grep
>   | and sed do? FreeBSD also seems to provide this functionality, and
>   | it is pretty handy, without having many downsides.
> 
> The idea in principle sounds good to me.
> 
> You should remove REG_BASIC from the argument list given to c_regex_common()
> by various functions that call it.

Sorry for missing that one. Since the only possible values are
case-sensitive/insensitive, I guess having a bool parameter for
this is also more appropriate.

If no one objects, I'll commit the attached patch.

-- Daniel

--4Ckj6UjgE2iN1+kY
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="find-extended-regex.diff"

Index: extern.h
===================================================================
RCS file: /cvsroot/src/usr.bin/find/extern.h,v
retrieving revision 1.27
diff -b -u -r1.27 extern.h
--- extern.h	6 Feb 2007 13:25:01 -0000	1.27
+++ extern.h	18 Jul 2007 11:08:38 -0000
@@ -95,4 +95,5 @@
 PLAN	*c_or(char ***, int);
 PLAN	*c_null(char ***, int);
 
-extern int ftsoptions, isdeprecated, isdepth, isoutput, issort, isxargs;
+extern int ftsoptions, isdeprecated, isdepth, isoutput, issort, isxargs,
+	regcomp_flags;
Index: find.1
===================================================================
RCS file: /cvsroot/src/usr.bin/find/find.1,v
retrieving revision 1.65
diff -b -u -r1.65 find.1
--- find.1	8 Mar 2007 21:23:00 -0000	1.65
+++ find.1	18 Jul 2007 11:08:38 -0000
@@ -32,7 +32,7 @@
 .\"
 .\"	from: @(#)find.1	8.7 (Berkeley) 5/9/95
 .\"
-.Dd February 8, 2007
+.Dd July 17, 2007
 .Dt FIND 1
 .Os
 .Sh NAME
@@ -41,13 +41,13 @@
 .Sh SYNOPSIS
 .Nm
 .Op Fl H | Fl L | Fl P
-.Op Fl dhsXx
+.Op Fl dEhsXx
 .Ar file
 .Op Ar file ...
 .Op Ar expression
 .Nm
 .Op Fl H | Fl L | Fl P
-.Op Fl dhsXx
+.Op Fl dEhsXx
 .Fl f Ar file
 .Op Ar file ...
 .Op Ar expression
@@ -107,6 +107,14 @@
 Note, the default is
 .Ar not
 a breadth-first traversal.
+.It Fl E
+The
+.Fl E
+option causes
+.Ar regexp
+arguments to primaries to be interpreted as extended regular
+expressions (see
+.Xr re_format 7 ) .
 .It Fl f
 The
 .Fl f
Index: function.c
===================================================================
RCS file: /cvsroot/src/usr.bin/find/function.c,v
retrieving revision 1.62
diff -b -u -r1.62 function.c
--- function.c	6 Feb 2007 13:25:01 -0000	1.62
+++ function.c	18 Jul 2007 11:08:38 -0000
@@ -55,6 +55,7 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <pwd.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -119,7 +120,7 @@
 	int	f_user(PLAN *, FTSENT *);
 	int	f_not(PLAN *, FTSENT *);
 	int	f_or(PLAN *, FTSENT *);
-static	PLAN   *c_regex_common(char ***, int, enum ntype, int);
+static	PLAN   *c_regex_common(char ***, int, enum ntype, bool);
 static	PLAN   *palloc(enum ntype, int (*)(PLAN *, FTSENT *));
 
 extern int dotfd;
@@ -1559,7 +1560,7 @@
 }
 
 static PLAN *
-c_regex_common(char ***argvp, int isok, enum ntype type, int regcomp_flags)
+c_regex_common(char ***argvp, int isok, enum ntype type, bool icase)
 {
 	char errbuf[LINE_MAX];
 	regex_t reg;
@@ -1578,7 +1579,8 @@
 	snprintf(lineregexp, len, "^%s(%s%s)$",
 	    (regcomp_flags & REG_EXTENDED) ? "" : "\\", regexp,
 	    (regcomp_flags & REG_EXTENDED) ? "" : "\\");
-	rv = regcomp(&reg, lineregexp, REG_NOSUB|regcomp_flags);
+	rv = regcomp(&reg, lineregexp, REG_NOSUB|regcomp_flags|
+	    (icase ? REG_ICASE : 0));
 	free(lineregexp);
 	if (rv != 0) {
 		regerror(rv, &reg, errbuf, sizeof errbuf);
@@ -1594,14 +1596,14 @@
 c_regex(char ***argvp, int isok)
 {
 
-	return (c_regex_common(argvp, isok, N_REGEX, REG_BASIC));
+	return (c_regex_common(argvp, isok, N_REGEX, false));
 }
 
 PLAN *
 c_iregex(char ***argvp, int isok)
 {
 
-	return (c_regex_common(argvp, isok, N_IREGEX, REG_BASIC|REG_ICASE));
+	return (c_regex_common(argvp, isok, N_IREGEX, true));
 }
 
 /*
Index: main.c
===================================================================
RCS file: /cvsroot/src/usr.bin/find/main.c,v
retrieving revision 1.26
diff -b -u -r1.26 main.c
--- main.c	9 Nov 2006 20:50:53 -0000	1.26
+++ main.c	18 Jul 2007 11:08:38 -0000
@@ -68,6 +68,7 @@
 int isoutput;			/* user specified output operator */
 int issort;			/* sort directory entries */
 int isxargs;			/* don't permit xargs delimiting chars */
+int regcomp_flags = REG_BASIC;	/* regex compilation flags */
 
 int main(int, char **);
 static void usage(void);
@@ -93,7 +94,7 @@
 		err(1, NULL);
 
 	ftsoptions = FTS_NOSTAT | FTS_PHYSICAL;
-	while ((ch = getopt(argc, argv, "HLPXdf:hsx")) != -1)
+	while ((ch = getopt(argc, argv, "HLPdEf:hsXx")) != -1)
 		switch (ch) {
 		case 'H':
 			ftsoptions &= ~FTS_LOGICAL;
@@ -107,12 +108,12 @@
 			ftsoptions &= ~(FTS_COMFOLLOW|FTS_LOGICAL);
 			ftsoptions |= FTS_PHYSICAL;
 			break;
-		case 'X':
-			isxargs = 1;
-			break;
 		case 'd':
 			isdepth = 1;
 			break;
+		case 'E':
+			regcomp_flags = REG_EXTENDED;
+			break;
 		case 'f':
 			*p++ = optarg;
 			break;
@@ -123,6 +124,9 @@
 		case 's':
 			issort = 1;
 			break;
+		case 'X':
+			isxargs = 1;
+			break;
 		case 'x':
 			ftsoptions |= FTS_XDEV;
 			break;
@@ -164,6 +168,6 @@
 {
 
 	(void)fprintf(stderr,
-"usage: find [-H | -L | -P] [-Xdhsx] [-f file] file [file ...] [expression]\n");
+"usage: find [-H | -L | -P] [-dEhsXx] [-f file] file [file ...] [expression]\n");
 	exit(1);
 }

--4Ckj6UjgE2iN1+kY--