Subject: dump and nodump flag
To: None <tech-kern@netbsd.org, tech-userlevel@netbsd.org>
From: Manuel Bouyer <bouyer@antioche.lip6.fr>
List: tech-userlevel
Date: 03/01/1999 20:12:21
[ I don't know if this is for tech-kern or tech-userlevel. It's a userland
  tool, but the subject is more the filesystem than userland. So I cross
  post this, with reply-to: tech-kern ]

Hi,
appened below is a patch against the 1.3.3 dump, which changes the handling
of nodump flags. For now, files are ignored only if nodump is set to this
file, which is not really handy if you don't want to dump a large, moving
tree (like a /tmp, or ~ftp/pub/NetBSD). You can still do a chflags -R
before dump, but I don't find it really elegant. I'd prefer to
have this flag handled recursively by dump: if a directory has nodump set,
don't archive any file or directory below it.
There is a solution for this in PR 6705 which adds another pass to
dump to build a map of nodump inodes. In the patch I propose below,
This is hanlded in pass 2 (mapping directories). In this pass, a file
or directory which is marked to be dumped will have its parent marked for
dump too, recursively to the root. We can, at the same time,
mark content of a nodump directory as nodump too, recursively.
For this, I use the usedinomap map to hold the nodump attribute:
if a directoty is in the dumpdirmap but not in the usedinomap, then it
has the nodump attribute, and it needs to be propagated to files and
directories below it (note that we need to add directories back to dumpdirmap
here, as they may have got the dump attr and been removed previous loops).
If files were already sheduled for dump, dump size estimation is adjusted.

I've tested these change on 2 machines here, they seem to do the rigth thing.
These machines will be backed up with amanda every nigths, this should allow
me to catch problems.
If I don't get objections I'll port this to -current and commit next week-end
or so.

--
Manuel Bouyer, LIP6, Universite Paris VI.           Manuel.Bouyer@lip6.fr
--

Index: traverse.c
===================================================================
RCS file: /archive/cvs/cvsroot/NetBSD/src/sbin/dump/traverse.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 traverse.c
--- traverse.c	1997/12/15 16:38:51	1.1.1.1
+++ traverse.c	1999/03/01 18:47:17
@@ -79,9 +79,11 @@
 typedef	int32_t fsizeT;
 #endif
 
-static	int dirindir __P((ino_t ino, daddr_t blkno, int level, long *size));
+static	int dirindir __P((ino_t ino, daddr_t blkno, int level, long *size,
+    long *tapesize, int nodump));
 static	void dmpindir __P((ino_t ino, daddr_t blk, int level, fsizeT *size));
-static	int searchdir __P((ino_t ino, daddr_t blkno, long size, long filesize));
+static	int searchdir __P((ino_t ino, daddr_t blkno, long size, long filesize,
+    long *tapesize, int nodump));
 
 /*
  * This is an estimation of the number of TP_BSIZE blocks in the file.
@@ -151,10 +153,14 @@
 	dp = getino(ino);
 	if ((mode = (dp->di_mode & IFMT)) == 0)
 		return;
-	SETINO(ino, usedinomap);
+	/*
+	 * Put all dirs in dumpdirmap, but only inodes that are to be dumped
+	 * in the used and dump maps.
+	 */
 	if (mode == IFDIR)
 		SETINO(ino, dumpdirmap);
 	if (WANTTODUMP(dp)) {
+		SETINO(ino, usedinomap);
 		SETINO(ino, dumpinomap);
 		if (mode != IFREG && mode != IFDIR && mode != IFLNK)
 			*tapesize += 1;
@@ -281,8 +287,8 @@
 	ino_t maxino;
 	long *tapesize;
 {
-	struct	dinode *dp;
-	int i, isdir;
+	struct	dinode *dp, di;
+	int i, isdir, nodump;
 	char *map;
 	ino_t ino;
 	long filesize;
@@ -294,24 +300,36 @@
 			isdir = *map++;
 		else
 			isdir >>= 1;
-		if ((isdir & 1) == 0 || TSTINO(ino, dumpinomap))
+		/*
+		 * If dir has been removed from the used map, it's either
+		 * because it had the nodump flag, or it herited it from
+		 * its parent. A directory can't be in dumpinomap if
+		 * not in usedinomap, but we have to go throuh it anyway
+		 * to propagate the nodump attribute.
+		 */
+		nodump = (TSTINO(ino, usedinomap) == 0);
+		if ((isdir & 1) == 0 ||
+		    (TSTINO(ino, dumpinomap) && nodump == 0))
 			continue;
+
 		dp = getino(ino);
-		filesize = dp->di_size;
+		di = *dp; /* inode buf may be changed in searchdir */
+		filesize = di.di_size;
 		for (ret = 0, i = 0; filesize > 0 && i < NDADDR; i++) {
-			if (dp->di_db[i] != 0)
-				ret |= searchdir(ino, dp->di_db[i],
+			if (di.di_db[i] != 0)
+				ret |= searchdir(ino, di.di_db[i],
 					(long)dblksize(sblock, dp, i),
-					filesize);
+					filesize, tapesize, nodump);
 			if (ret & HASDUMPEDFILE)
 				filesize = 0;
 			else
 				filesize -= sblock->fs_bsize;
 		}
 		for (i = 0; filesize > 0 && i < NIADDR; i++) {
-			if (dp->di_ib[i] == 0)
+			if (di.di_ib[i] == 0)
 				continue;
-			ret |= dirindir(ino, dp->di_ib[i], i, &filesize);
+			ret |= dirindir(ino, di.di_ib[i], i, &filesize,
+			    tapesize, nodump);
 		}
 		if (ret & HASDUMPEDFILE) {
 			SETINO(ino, dumpinomap);
@@ -319,7 +337,11 @@
 			change = 1;
 			continue;
 		}
-		if ((ret & HASSUBDIRS) == 0) {
+		if (nodump) {
+			if (ret & HASSUBDIRS)
+				change = 1; /* subdirs have inherited nodump */
+			CLRINO(ino, dumpdirmap);
+		} else if ((ret & HASSUBDIRS) == 0) {
 			if (!TSTINO(ino, dumpinomap)) {
 				CLRINO(ino, dumpdirmap);
 				change = 1;
@@ -335,11 +357,13 @@
  * require the directory to be dumped.
  */
 static int
-dirindir(ino, blkno, ind_level, filesize)
+dirindir(ino, blkno, ind_level, filesize, tapesize, nodump)
 	ino_t ino;
 	daddr_t blkno;
 	int ind_level;
 	long *filesize;
+	long *tapesize;
+	int nodump;
 {
 	int ret = 0;
 	int i;
@@ -351,7 +375,7 @@
 			blkno = idblk[i];
 			if (blkno != 0)
 				ret |= searchdir(ino, blkno, sblock->fs_bsize,
-					*filesize);
+					*filesize, tapesize, nodump);
 			if (ret & HASDUMPEDFILE)
 				*filesize = 0;
 			else
@@ -363,7 +387,8 @@
 	for (i = 0; *filesize > 0 && i < NINDIR(sblock); i++) {
 		blkno = idblk[i];
 		if (blkno != 0)
-			ret |= dirindir(ino, blkno, ind_level, filesize);
+			ret |= dirindir(ino, blkno, ind_level, filesize,
+			    tapesize, nodump);
 	}
 	return (ret);
 }
@@ -374,13 +399,16 @@
  * contains any subdirectories.
  */
 static int
-searchdir(ino, blkno, size, filesize)
+searchdir(ino, blkno, size, filesize, tapesize, nodump)
 	ino_t ino;
 	daddr_t blkno;
 	long size;
 	long filesize;
+	long *tapesize;
+	int nodump;
 {
 	struct direct *dp;
+	struct dinode *ip;
 	long loc, ret = 0;
 	char dblk[MAXBSIZE];
 
@@ -402,15 +430,29 @@
 			if (dp->d_name[1] == '.' && dp->d_name[2] == '\0')
 				continue;
 		}
-		if (TSTINO(dp->d_ino, dumpinomap)) {
-			ret |= HASDUMPEDFILE;
-			if (ret & HASSUBDIRS)
-				break;
-		}
-		if (TSTINO(dp->d_ino, dumpdirmap)) {
-			ret |= HASSUBDIRS;
-			if (ret & HASDUMPEDFILE)
-				break;
+		if (nodump) {
+			ip = getino(dp->d_ino);
+			if (TSTINO(dp->d_ino, dumpinomap)) {
+				CLRINO(dp->d_ino, dumpinomap);
+				CLRINO(dp->d_ino, usedinomap);
+				*tapesize -= blockest(ip);
+			}
+			/* Add dir back to the dir map, to propagate nodump */
+			if ((ip->di_mode & IFMT) == IFDIR) {
+				SETINO(dp->d_ino, dumpdirmap);
+				ret |= HASSUBDIRS;
+			}
+		} else {
+			if (TSTINO(dp->d_ino, dumpinomap)) {
+				ret |= HASDUMPEDFILE;
+				if (ret & HASSUBDIRS)
+					break;
+			}
+			if (TSTINO(dp->d_ino, dumpdirmap)) {
+				ret |= HASSUBDIRS;
+				if (ret & HASDUMPEDFILE)
+					break;
+			}
 		}
 	}
 	return (ret);