Subject: tar --exclude
To: None <tech-userlevel@netbsd.org>
From: None <hiramatu@boreas.dti.ne.jp>
List: tech-userlevel
Date: 12/06/2002 00:51:16
--Multipart_Fri_Dec__6_00:51:16_2002-1
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: quoted-printable

Hi,

=46rom man page of GNU tar,

 --exclude pattern       Exclude files matching the pattern (don't extract
                         them, don't add them, don't list them).

I think this option is useful, and here is my attempt to add --exclude
to our tar.

Thanks in advance for comments.

-----
// Hiramatsu Yoshifumi
// hiramatu@boreas.dti.ne.jp


--Multipart_Fri_Dec__6_00:51:16_2002-1
Content-Type: application/octet-stream; type=patch
Content-Disposition: attachment; filename="pax.diff"
Content-Transfer-Encoding: 7bit

diff -ur pax.old/Makefile pax/Makefile
--- pax.old/Makefile	Mon Dec  2 23:28:08 2002
+++ pax/Makefile	Mon Dec  2 23:47:43 2002
@@ -59,7 +59,7 @@
 .if !defined(HOSTPROG) && !defined(SMALLPROG)
 CPPFLAGS+=	-DSUPPORT_RMT
 
-LDADD+=	-lrmt
+LDADD+=	-lrmt -lutil
 DPADD+=	${LIBRMT}
 .endif
 
diff -ur pax.old/ar_subs.c pax/ar_subs.c
--- pax.old/ar_subs.c	Mon Dec  2 23:28:08 2002
+++ pax/ar_subs.c	Thu Dec  5 00:33:08 2002
@@ -125,15 +125,22 @@
 		 * check for pattern, and user specified options match.
 		 * When all patterns are matched we are done.
 		 */
-		if ((res = pat_match(arcn)) < 0)
+		res = pat_match(arcn, EXCLUDE);
+		if (res == DONE) {
 			break;
+		}
+		if (res != SKIP) {
+			res = pat_match(arcn, INCLUDE);
+			if (res == DONE)
+				break;
+		}
 
-		if ((res == 0) && (sel_chk(arcn) == 0)) {
+		if ((res == PROCEED) && (sel_chk(arcn) == 0)) {
 			/*
 			 * pattern resulted in a selected file
 			 */
-			if (pat_sel(arcn) < 0)
-				break;
+			//if (pat_sel(arcn) < 0)
+				//break;
 
 			/*
 			 * modify the name as requested by the user if name
@@ -219,10 +226,18 @@
 		 * check for pattern, and user specified options match. When
 		 * all the patterns are matched we are done
 		 */
-		if ((res = pat_match(arcn)) < 0)
+		res = pat_match(arcn, EXCLUDE);
+		if (res == DONE) {
 			break;
+		}
+		if (res != SKIP) {
+			res = pat_match(arcn, INCLUDE);
+			if (res == DONE)
+				break;
+		}
+
 
-		if ((res > 0) || (sel_chk(arcn) != 0)) {
+		if ((res == SKIP) || (sel_chk(arcn) != 0)) {
 			/*
 			 * file is not selected. skip past any file
 			 * data and padding and go back for the next
diff -ur pax.old/extern.h pax/extern.h
--- pax.old/extern.h	Mon Dec  2 23:28:08 2002
+++ pax/extern.h	Wed Dec  4 00:36:27 2002
@@ -209,10 +209,18 @@
  * pat_rep.c
  */
 int rep_add(char *);
-int pat_add(char *, char *);
+#define INCLUDE 0
+#define EXCLUDE 1
+int pat_add(char *, char *, int);
+void invert_patterns(void);
 void pat_chk(void);
+#define SELECT_OK 0
+#define SELECT_NG 1
 int pat_sel(ARCHD *);
-int pat_match(ARCHD *);
+#define SKIP 0
+#define PROCEED 1
+#define DONE 2
+int pat_match(ARCHD *, int);
 int mod_name(ARCHD *);
 int set_dest(ARCHD *, char *, int);
 
diff -ur pax.old/options.c pax/options.c
--- pax.old/options.c	Mon Dec  2 23:28:08 2002
+++ pax/options.c	Thu Dec  5 23:49:19 2002
@@ -639,7 +639,7 @@
 	case LIST:
 	case EXTRACT:
 		for (; optind < argc; optind++)
-			if (pat_add(argv[optind], NULL) < 0)
+			if (pat_add(argv[optind], NULL, INCLUDE) < 0)
 				pax_usage();
 		break;
 	case COPY:
@@ -663,6 +663,9 @@
 		maxflt = 0;
 		break;
 	}
+	if (cflag) {
+		invert_patterns();
+	}
 }
 
 
@@ -701,6 +704,8 @@
 	{ "absolute-paths",	no_argument,		0,	'P' },
 	{ "files-from",		required_argument,	0,	'T' },
 	{ "exclude-from",	required_argument,	0,	'X' },
+	{ "exclude",		required_argument,	0,
+						OPT_EXCLUDE },
 	{ "compress",		no_argument,		0,	'Z' },
 	{ "uncompress",		no_argument,		0,	'Z' },
 	{ "strict",		no_argument,		0,
@@ -754,8 +759,6 @@
 	{ "version",		no_argument,		0,
 						OPT_VERSION },
 	{ "verify",		no_argument,		0,	'W' },
-	{ "exclude",		required_argument,	0,
-						OPT_EXCLUDE },
 	{ "block-compress",	no_argument,		0,
 						OPT_BLOCK_COMPRESS },
 	{ "norecurse",		no_argument,		0,
@@ -772,11 +775,18 @@
 	int Oflag = 0;
 	int nincfiles = 0;
 	int incfiles_max = 0;
+	int nexfiles = 0;
+	int exfiles_max = 0;
 	struct incfile {
 		char *file;
 		char *dir;
 	};
+	struct exfile {
+		char *file;
+		char *dir;
+	};
 	struct incfile *incfiles = NULL;
+	struct exfile *exfiles = NULL;
 
 	/*
 	 * Set default values.
@@ -977,11 +987,18 @@
 			Aflag = 1;
 			break;
 		case 'X':
-			/*
-			 * GNU tar compat: exclude the files listed in optarg
-			 */
-			if (tar_gnutar_X_compat(optarg) != 0)
-				tar_usage();
+			if (++nexfiles > exfiles_max) {
+				exfiles_max = nexfiles + 3;
+				exfiles = realloc(exfiles,
+				    sizeof(*exfiles) * exfiles_max);
+				if (exfiles == NULL) {
+					tty_warn(0, "Unable to allocate space "
+					    "for option list");
+					exit(1);
+				}
+			}
+			exfiles[nexfiles - 1].file = optarg;
+			exfiles[nexfiles - 1].dir = chdname;
 			break;
 		case 'Z':
 			/*
@@ -1008,6 +1025,9 @@
 		case '8':
 			arcname = DEV_8;
 			break;
+		case OPT_EXCLUDE:
+			pat_add(optarg, NULL, EXCLUDE);
+			break;
 		case OPT_ATIME_PRESERVE:
 			patime = 1;
 			break;
@@ -1070,9 +1090,10 @@
 		{
 			int sawpat = 0;
 			int dirisnext = 0;
+			int pat_flg;
 			char *file, *dir;
 
-			while (nincfiles || *argv != NULL) {
+			while (nincfiles || nexfiles || *argv != NULL) {
 				/*
 				 * If we queued up any include files,
 				 * pull them in now.  Otherwise, check
@@ -1085,6 +1106,13 @@
 					dir = incfiles->dir;
 					incfiles++;
 					nincfiles--;
+					pat_flg = INCLUDE;
+				} else if (nexfiles) {
+					file = exfiles->file;
+					dir = exfiles->dir;
+					exfiles++;
+					nexfiles--;
+					pat_flg = EXCLUDE;
 				} else if (strcmp(*argv, "-I") == 0) {
 					if (*++argv == NULL)
 						break;
@@ -1112,7 +1140,7 @@
 							dirisnext = 1;
 							continue;
 						}
-						if (pat_add(str, dir) < 0)
+						if (pat_add(str, dir, pat_flg) < 0)
 							tar_usage();
 						sawpat = 1;
 					}
@@ -1129,7 +1157,7 @@
 					if (*++argv == NULL)
  						break;
 					chdname = *argv++;
-				} else if (pat_add(*argv++, chdname) < 0)
+				} else if (pat_add(*argv++, chdname, INCLUDE) < 0)
 					tar_usage();
 				else
 					sawpat = 1;
@@ -1498,7 +1526,7 @@
 				cpio_usage();
 			}
 			while ((str = getline(fp)) != NULL) {
-				pat_add(str, NULL);
+				pat_add(str, NULL, INCLUDE);
 			}
 			fclose(fp);
 			if (getline_error) {
@@ -1626,7 +1654,7 @@
 	case LIST:
 	case EXTRACT:
 		for (; optind < argc; optind++)
-			if (pat_add(argv[optind], 0) < 0)
+			if (pat_add(argv[optind], 0, INCLUDE) < 0)
 				cpio_usage();
 		break;
 	case COPY:
diff -ur pax.old/pat_rep.c pax/pat_rep.c
--- pax.old/pat_rep.c	Mon Dec  2 23:28:08 2002
+++ pax/pat_rep.c	Wed Dec  4 22:40:12 2002
@@ -55,6 +55,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <assert.h>
 #ifdef NET2_REGEX
 #include <regexp.h>
 #else
@@ -74,6 +75,8 @@
 #define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
 static PATTERN *pathead = NULL;		/* file pattern match list head */
 static PATTERN *pattail = NULL;		/* file pattern match list tail */
+static PATTERN *ex_pathead = NULL;	/* exclude pattern match list head */
+static PATTERN *ex_pattail = NULL;	/* exclude pattern match list tail */
 static REPLACE *rephead = NULL;		/* replacement string list head */
 static REPLACE *reptail = NULL;		/* replacement string list tail */
 
@@ -259,7 +262,7 @@
  */
 
 int
-pat_add(char *str, char *chdn)
+pat_add(char *str, char *chdn, int pat_type)
 {
 	PATTERN *pt;
 
@@ -287,15 +290,33 @@
 	pt->fow = NULL;
 	pt->flgs = 0;
 	pt->chdname = chdn;
-	if (pathead == NULL) {
-		pattail = pathead = pt;
-		return(0);
+	if (pat_type == INCLUDE) {
+		if (pathead == NULL) {
+			pattail = pathead = pt;
+			return(0);
+		}
+		pattail->fow = pt;
+		pattail = pt;
+	} else if (pat_type == EXCLUDE) {
+		if (ex_pathead == NULL) {
+			ex_pathead = ex_pathead = pt;
+			return(0);
+		}
+		ex_pattail->fow = pt;
+		ex_pattail = pt;
 	}
-	pattail->fow = pt;
-	pattail = pt;
 	return(0);
 }
 
+void
+invert_patterns(void)
+{
+	ex_pathead = pathead;
+	pathead = NULL;
+	ex_pattail = pattail;
+	pattail = NULL;
+}
+
 /*
  * pat_chk()
  *	complain if any the user supplied pattern did not result in a match to
@@ -457,28 +478,53 @@
  *	skipped and -1 if we are done with all patterns (and pax should quit
  *	looking for more members)
  */
+/*  when pat_type == INCLUDE
+           - MATCH means proceed
+           - NOT_MATCH means skip
+          when pat_type == EXCLUDE
+           - MATCH means skip
+           - NOT_MATCH means proceed
+       */
+       /* when no pattern
+           - pat_type == INCLUDE
+               -> proceed
+           - pat_type == EXCLUDE
+               -> proceed
+     */
 
 int
-pat_match(ARCHD *arcn)
+pat_match(ARCHD *arcn, int pat_type)
 {
 	PATTERN *pt;
 
+	assert((pat_type == INCLUDE) || (pat_type == EXCLUDE));
+
 	arcn->pat = NULL;
 
 	/*
 	 * if there are no more patterns and we have -n (and not -c) we are
 	 * done. otherwise with no patterns to match, matches all
 	 */
-	if (pathead == NULL) {
-		if (nflag && !cflag)
-			return(-1);
-		return(0);
+	if (pat_type == INCLUDE) {
+		if (pathead == NULL) {
+			if (nflag)
+				return(DONE);
+			return(PROCEED);
+		}
+	} else if(pat_type == EXCLUDE) {
+		if (ex_pathead == NULL) {
+			return(PROCEED);
+		}
 	}
 
 	/*
 	 * have to search down the list one at a time looking for a match.
 	 */
-	pt = pathead;
+	if (pat_type == INCLUDE) {
+		pt = pathead;
+	} else if(pat_type == EXCLUDE) {
+		pt = ex_pathead;
+	}
 	while (pt != NULL) {
 		/*
 		 * check for a file name match unless we have DIR_MTCH set in
@@ -503,8 +549,13 @@
 	 * return the result, remember that cflag (-c) inverts the sense of a
 	 * match
 	 */
-	if (pt == NULL)
-		return(cflag ? 0 : 1);
+	if (pat_type == INCLUDE) {
+		if (pt == NULL)
+			return(SKIP);
+	} else if(pat_type == EXCLUDE) {
+		if (pt == NULL)
+			return(PROCEED);
+	}
 
 	/*
 	 * we had a match, now when we invert the sense (-c) we reject this
@@ -512,12 +563,17 @@
 	 * match, not in selecting a archive member) so we call pat_sel() here.
 	 */
 	arcn->pat = pt;
-	if (!cflag)
-		return(0);
-
-	if (pat_sel(arcn) < 0)
-		return(-1);
-	arcn->pat = NULL;
+	//if (!cflag)
+		//return(0);
+	if (pat_type == INCLUDE) {
+		if (pat_sel(arcn) < 0)
+			return(DONE);
+		return(PROCEED);
+	} else { /* pat_type == INCLUDE */
+		if (pat_sel(arcn) < 0)
+			return(DONE);
+		return(SKIP);
+	}
 	return(1);
 }
 
diff -ur pax.old/tar.1 pax/tar.1
--- pax.old/tar.1	Mon Dec  2 23:28:08 2002
+++ pax/tar.1	Thu Dec  5 21:01:18 2002
@@ -231,6 +231,8 @@
 .It Fl X Ar file , Fl -exclude-from Ar file
 Exclude files listed in the given file.
 Do not cross mount points in the file system.
+.It Fl -exclude Ar pattern
+Exclude files matching the pattern.
 .It Fl Z , -compress , -uncompress
 Compress archive using compress.
 .It Fl -strict

--Multipart_Fri_Dec__6_00:51:16_2002-1--