Subject: Re: tar --exclude
To: None <tech-userlevel@netbsd.org>
From: Christos Zoulas <christos@zoulas.com>
List: tech-userlevel
Date: 12/05/2002 18:46:09
In article <87k7ioh3bf.wl@orinoco.my.domain>,
 <hiramatu@boreas.dti.ne.jp> wrote:
>-=-=-=-=-=-
>
>Hi,
>
>From man page of GNU tar,
>
> --exclude pattern       Exclude files matching the pattern (don't extract
>                         them, don't add them, don't list them).
>
>I think this option is useful, and here is my attempt to add --exclude
>to our tar.
>
>Thanks in advance for comments.

Thanks a lot for the patch. I've made some minor adjustments to it, to
simplify the code and minimize the diffs.

christos

Index: ar_subs.c
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/ar_subs.c,v
retrieving revision 1.20
diff -u -u -r1.20 ar_subs.c
--- ar_subs.c	2002/10/18 11:54:22	1.20
+++ ar_subs.c	2002/12/05 18:44:24
@@ -50,6 +50,7 @@
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <sys/param.h>
+#include <assert.h>
 #include <signal.h>
 #include <string.h>
 #include <stdio.h>
@@ -62,7 +63,8 @@
 #include "pax.h"
 #include "extern.h"
 
-static void wr_archive(ARCHD *, int is_app);
+static void wr_archive(ARCHD *, int);
+static int match(ARCHD *);
 static int get_arc(void);
 static int next_head(ARCHD *);
 extern sigset_t s_mask;
@@ -76,6 +78,23 @@
 u_long flcnt;				/* number of files processed */
 ARCHD archd;
 
+static int
+match(arcn)
+	ARCHD *arcn;
+{
+	int res;
+	switch (res = pat_match(arcn, EXCLUDE)) {
+	case DONE:
+	case SKIP:
+		return res;
+	case PROCEED:
+		return pat_match(arcn, INCLUDE);
+	default:
+		assert(res == DONE);
+		return DONE;
+	}
+}
+
 /*
  * list()
  *	list the contents of an archive which match user supplied pattern(s)
@@ -125,10 +144,10 @@
 		 * check for pattern, and user specified options match.
 		 * When all patterns are matched we are done.
 		 */
-		if ((res = pat_match(arcn)) < 0)
+		if ((res = match(arcn)) == DONE)
 			break;
 
-		if ((res == 0) && (sel_chk(arcn) == 0)) {
+		if ((res == PROCEED) && (sel_chk(arcn) == 0)) {
 			/*
 			 * pattern resulted in a selected file
 			 */
@@ -219,10 +238,10 @@
 		 * check for pattern, and user specified options match. When
 		 * all the patterns are matched we are done
 		 */
-		if ((res = pat_match(arcn)) < 0)
+		if ((res = match(arcn)) == DONE)
 			break;
 
-		if ((res > 0) || (sel_chk(arcn) != 0)) {
+		if ((res == SKIP) || (sel_chk(arcn) != 0)) {
 			/*
 			 * file is not selected. skip past any file
 			 * data and padding and go back for the next
Index: extern.h
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/extern.h,v
retrieving revision 1.35
diff -u -u -r1.35 extern.h
--- extern.h	2002/10/17 00:32:36	1.35
+++ extern.h	2002/12/05 18:44:24
@@ -209,10 +209,18 @@
  * pat_rep.c
  */
 int rep_add(char *);
-int pat_add(char *, char *);
+#define INCLUDE 0
+#define EXCLUDE 1
+int pat_add(char *, char *, int);
+void invert_patterns(void);
 void pat_chk(void);
+#define SELECT_OK 0
+#define SELECT_NG 1
 int pat_sel(ARCHD *);
-int pat_match(ARCHD *);
+#define SKIP 0
+#define PROCEED 1
+#define DONE 2
+int pat_match(ARCHD *, int);
 int mod_name(ARCHD *);
 int set_dest(ARCHD *, char *, int);
 
Index: options.c
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/options.c,v
retrieving revision 1.55
diff -u -u -r1.55 options.c
--- options.c	2002/10/18 13:45:05	1.55
+++ options.c	2002/12/05 18:44:24
@@ -639,7 +639,7 @@
 	case LIST:
 	case EXTRACT:
 		for (; optind < argc; optind++)
-			if (pat_add(argv[optind], NULL) < 0)
+			if (pat_add(argv[optind], NULL, INCLUDE) < 0)
 				pax_usage();
 		break;
 	case COPY:
@@ -663,6 +663,9 @@
 		maxflt = 0;
 		break;
 	}
+	if (cflag) {
+		invert_patterns();
+	}
 }
 
 
@@ -701,6 +704,8 @@
 	{ "absolute-paths",	no_argument,		0,	'P' },
 	{ "files-from",		required_argument,	0,	'T' },
 	{ "exclude-from",	required_argument,	0,	'X' },
+	{ "exclude",		required_argument,	0,
+						OPT_EXCLUDE },
 	{ "compress",		no_argument,		0,	'Z' },
 	{ "uncompress",		no_argument,		0,	'Z' },
 	{ "strict",		no_argument,		0,
@@ -754,8 +759,6 @@
 	{ "version",		no_argument,		0,
 						OPT_VERSION },
 	{ "verify",		no_argument,		0,	'W' },
-	{ "exclude",		required_argument,	0,
-						OPT_EXCLUDE },
 	{ "block-compress",	no_argument,		0,
 						OPT_BLOCK_COMPRESS },
 	{ "norecurse",		no_argument,		0,
@@ -772,11 +775,18 @@
 	int Oflag = 0;
 	int nincfiles = 0;
 	int incfiles_max = 0;
+	int nexfiles = 0;
+	int exfiles_max = 0;
 	struct incfile {
 		char *file;
 		char *dir;
 	};
+	struct exfile {
+		char *file;
+		char *dir;
+	};
 	struct incfile *incfiles = NULL;
+	struct exfile *exfiles = NULL;
 
 	/*
 	 * Set default values.
@@ -977,11 +987,18 @@
 			Aflag = 1;
 			break;
 		case 'X':
-			/*
-			 * GNU tar compat: exclude the files listed in optarg
-			 */
-			if (tar_gnutar_X_compat(optarg) != 0)
-				tar_usage();
+			if (++nexfiles > exfiles_max) {
+				exfiles_max = nexfiles + 3;
+				exfiles = realloc(exfiles,
+				    sizeof(*exfiles) * exfiles_max);
+				if (exfiles == NULL) {
+					tty_warn(0, "Unable to allocate space "
+					    "for option list");
+					exit(1);
+				}
+			}
+			exfiles[nexfiles - 1].file = optarg;
+			exfiles[nexfiles - 1].dir = chdname;
 			break;
 		case 'Z':
 			/*
@@ -1008,6 +1025,9 @@
 		case '8':
 			arcname = DEV_8;
 			break;
+		case OPT_EXCLUDE:
+			pat_add(optarg, NULL, EXCLUDE);
+			break;
 		case OPT_ATIME_PRESERVE:
 			patime = 1;
 			break;
@@ -1070,9 +1090,10 @@
 		{
 			int sawpat = 0;
 			int dirisnext = 0;
+			int pat_flg;
 			char *file, *dir;
 
-			while (nincfiles || *argv != NULL) {
+			while (nincfiles || nexfiles || *argv != NULL) {
 				/*
 				 * If we queued up any include files,
 				 * pull them in now.  Otherwise, check
@@ -1085,6 +1106,13 @@
 					dir = incfiles->dir;
 					incfiles++;
 					nincfiles--;
+					pat_flg = INCLUDE;
+				} else if (nexfiles) {
+					file = exfiles->file;
+					dir = exfiles->dir;
+					exfiles++;
+					nexfiles--;
+					pat_flg = EXCLUDE;
 				} else if (strcmp(*argv, "-I") == 0) {
 					if (*++argv == NULL)
 						break;
@@ -1112,7 +1140,7 @@
 							dirisnext = 1;
 							continue;
 						}
-						if (pat_add(str, dir) < 0)
+						if (pat_add(str, dir, pat_flg) < 0)
 							tar_usage();
 						sawpat = 1;
 					}
@@ -1129,7 +1157,7 @@
 					if (*++argv == NULL)
  						break;
 					chdname = *argv++;
-				} else if (pat_add(*argv++, chdname) < 0)
+				} else if (pat_add(*argv++, chdname, INCLUDE) < 0)
 					tar_usage();
 				else
 					sawpat = 1;
@@ -1498,7 +1526,7 @@
 				cpio_usage();
 			}
 			while ((str = getline(fp)) != NULL) {
-				pat_add(str, NULL);
+				pat_add(str, NULL, INCLUDE);
 			}
 			fclose(fp);
 			if (getline_error) {
@@ -1626,7 +1654,7 @@
 	case LIST:
 	case EXTRACT:
 		for (; optind < argc; optind++)
-			if (pat_add(argv[optind], 0) < 0)
+			if (pat_add(argv[optind], 0, INCLUDE) < 0)
 				cpio_usage();
 		break;
 	case COPY:
Index: pat_rep.c
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/pat_rep.c,v
retrieving revision 1.16
diff -u -u -r1.16 pat_rep.c
--- pat_rep.c	2002/10/23 19:39:42	1.16
+++ pat_rep.c	2002/12/05 18:44:25
@@ -55,6 +55,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <assert.h>
 #ifdef NET2_REGEX
 #include <regexp.h>
 #else
@@ -72,10 +73,10 @@
  */
 
 #define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
-static PATTERN *pathead = NULL;		/* file pattern match list head */
-static PATTERN *pattail = NULL;		/* file pattern match list tail */
-static REPLACE *rephead = NULL;		/* replacement string list head */
-static REPLACE *reptail = NULL;		/* replacement string list tail */
+static PATTERN *pathead[2];		/* file pattern match list head */
+static PATTERN *pattail[2];		/* file pattern match list tail */
+static REPLACE *rephead;		/* replacement string list head */
+static REPLACE *reptail;		/* replacement string list tail */
 
 static int rep_name(char *, size_t, int *, int);
 static int tty_rename(ARCHD *);
@@ -247,7 +248,7 @@
  */
 
 int
-pat_add(char *str, char *chdn)
+pat_add(char *str, char *chdn, int pat_type)
 {
 	PATTERN *pt;
 
@@ -275,15 +276,27 @@
 	pt->fow = NULL;
 	pt->flgs = 0;
 	pt->chdname = chdn;
-	if (pathead == NULL) {
-		pattail = pathead = pt;
+	if (pathead[pat_type] == NULL) {
+		pattail[pat_type] = pathead[pat_type] = pt;
 		return(0);
 	}
-	pattail->fow = pt;
-	pattail = pt;
+	pattail[pat_type]->fow = pt;
+	pattail[pat_type] = pt;
 	return(0);
 }
 
+void
+invert_patterns(void)
+{
+	PATTERN *pt;
+	pt = pathead[INCLUDE];
+	pathead[INCLUDE] = pathead[EXCLUDE];
+	pathead[EXCLUDE] = pt;
+	pt = pattail[INCLUDE];
+	pattail[INCLUDE] = pattail[EXCLUDE];
+	pattail[EXCLUDE] = pt;
+}
+
 /*
  * pat_chk()
  *	complain if any the user supplied pattern did not result in a match to
@@ -300,7 +313,7 @@
 	 * walk down the list checking the flags to make sure MTCH was set,
 	 * if not complain
 	 */
-	for (pt = pathead; pt != NULL; pt = pt->fow) {
+	for (pt = pathead[INCLUDE]; pt != NULL; pt = pt->fow) {
 		if (pt->flgs & MTCH)
 			continue;
 		if (!wban) {
@@ -337,7 +350,7 @@
 	/*
 	 * if no patterns just return
 	 */
-	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
+	if ((pathead[INCLUDE] == NULL) || ((pt = arcn->pat) == NULL))
 		return(0);
 
 	/*
@@ -414,8 +427,8 @@
 	 * and the pattern rejects a member (i.e. it matched it) it is done.
 	 * In effect we place the order of the flags as having -c last.
 	 */
-	pt = pathead;
-	ppt = &pathead;
+	pt = pathead[INCLUDE];
+	ppt = &pathead[INCLUDE];
 	while ((pt != NULL) && (pt != arcn->pat)) {
 		ppt = &(pt->fow);
 		pt = pt->fow;
@@ -444,29 +457,43 @@
  *	0 if this archive member should be processed, 1 if it should be
  *	skipped and -1 if we are done with all patterns (and pax should quit
  *	looking for more members)
+ *
+ *	when pat_type == INCLUDE
+ *          - MATCH means proceed
+ *          - NOT_MATCH means skip
+ *	when pat_type == EXCLUDE
+ *	   - MATCH means skip
+ *	   - NOT_MATCH means proceed
+ *	when no pattern
+ *	   - pat_type == INCLUDE
+ *	       -> proceed
+ *	   - pat_type == EXCLUDE
+ *	       -> proceed
  */
 
 int
-pat_match(ARCHD *arcn)
+pat_match(ARCHD *arcn, int pat_type)
 {
 	PATTERN *pt;
 
+	assert((pat_type == INCLUDE) || (pat_type == EXCLUDE));
+
 	arcn->pat = NULL;
 
 	/*
 	 * if there are no more patterns and we have -n (and not -c) we are
 	 * done. otherwise with no patterns to match, matches all
 	 */
-	if (pathead == NULL) {
-		if (nflag && !cflag)
-			return(-1);
-		return(0);
+	if (pathead[pat_type] == NULL) {
+		if (nflag && pat_type == INCLUDE)
+			return(DONE);
+		return(PROCEED);
 	}
 
 	/*
 	 * have to search down the list one at a time looking for a match.
 	 */
-	pt = pathead;
+	pt = pathead[pat_type];
 	while (pt != NULL) {
 		/*
 		 * check for a file name match unless we have DIR_MTCH set in
@@ -487,26 +514,12 @@
 		pt = pt->fow;
 	}
 
-	/*
-	 * return the result, remember that cflag (-c) inverts the sense of a
-	 * match
-	 */
-	if (pt == NULL)
-		return(cflag ? 0 : 1);
-
-	/*
-	 * we had a match, now when we invert the sense (-c) we reject this
-	 * member. However we have to tag the pattern a being successful, (in a
-	 * match, not in selecting a archive member) so we call pat_sel() here.
-	 */
 	arcn->pat = pt;
-	if (!cflag)
-		return(0);
-
-	if (pat_sel(arcn) < 0)
-		return(-1);
-	arcn->pat = NULL;
-	return(1);
+	if (pt != NULL) {
+		return pat_type == INCLUDE ? SKIP : PROCEED;
+	} else {
+		return pat_type == INCLUDE ? PROCEED : SKIP;
+	}
 }
 
 /*
Index: tar.1
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/tar.1,v
retrieving revision 1.6
diff -u -u -r1.6 tar.1
--- tar.1	2002/11/06 14:56:48	1.6
+++ tar.1	2002/12/05 18:44:25
@@ -231,6 +231,8 @@
 .It Fl X Ar file , Fl -exclude-from Ar file
 Exclude files listed in the given file.
 Do not cross mount points in the file system.
+.It Fl -exclude Ar pattern
+Exclude files matching the pattern.
 .It Fl Z , -compress , -uncompress
 Compress archive using compress.
 .It Fl -strict