Subject: bin/6705: improved nodump dir handling
To: None <gnats-bugs@gnats.netbsd.org>
From: None <bgrayson@ece.utexas.edu>
List: netbsd-bugs
Date: 01/01/1999 23:52:28
>Number:         6705
>Category:       bin
>Synopsis:       improved nodump dir handling
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    bin-bug-people (Utility Bug People)
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Fri Jan  1 22:05:01 1999
>Last-Modified:
>Originator:     Brian Grayson
>Organization:
	Parallel and Distributed Systems
	Electrical and Computer Engineering
	The University of Texas at Austin
>Release:        Dec 31 1998
>Environment:

>Description:
	Currently, if the nodump flag (see chflags(1)) is set for
	a directory, dump will go ahead and dump that directory,
	and its contents.  It would be nice if nodump on a
	directory meant, "Don't dump this dir or any of its
	children."  This would allow one to not bother dumping, say,
	/usr/src, or /var, or /tmp, or /scratch, or anything else
	that it is just as easy to re-download/rebuild than to
	restore from tape.  Such actions also save tape space,
	without requiring one to do a nightly chflags -R nodump
	on the associated trees.

	The included patches provide support for honoring nodump
	on a directory.  I considered several implementation
	methods, but decided the simplest would be to add another
	map (dumpable), and simply walk the entire FS in an
	added pass 0, to propogate nodump bits.  This solves the
	slightly-complicated case of "we were asked to only dump
	/tmp/foo/bar, but /tmp is marked nodump, and the honor
	level was not set otherwise, so we shouldn't bother
	dumping anything."

	Note that pass 0 checks for nonodump, and short-circuits
	if nonodump is set, thus for level-0 default dumps (no
	-h0 option), this new functionality does not slow things down.

	I would suggest someone in the know (hi Luke!) look over
	these patches to make sure that they cover all the corner
	cases -- I'm new at parsing dinode's directly, and could
	have missed or flubbed something.

	By the way, I wrote up some stuff to shove into
	/usr/src/regress/sbin/dump that sets up a 1000-file
	directory tree, and then does some dump estimates
	involving the nodump flag and the -h flag, and
	checks the results.  I'll be submitting that stuff as a
	separate PR after a few minor mods, in a few minutes.
>How-To-Repeat:
>Fix:
	Patches for dump.h, main.c, and traverse.c in
	/usr/src/sbin/dump:
--- dump.h.dist	Fri Jan  1 00:11:40 1999
+++ dump.h	Fri Jan  1 23:33:40 1999
@@ -45,6 +45,8 @@
 char	*usedinomap;	/* map of allocated inodes */
 char	*dumpdirmap;	/* map of directories to be dumped */
 char	*dumpinomap;	/* map of files to be dumped */
+char	*dumpablemap;	/* map for whether parent directory/ies
+			   are marked 'nodump' or not. */
 /*
  * Map manipulation macros.
  */
@@ -137,6 +139,7 @@
 /* mapping routines */
 struct	dinode;
 long	blockest __P((struct dinode *dp));
+void	setupdumpablemap __P((ino_t maxino, char *const toplevel));
 void	mapfileino __P((ino_t, long *, int *));
 int	mapfiles __P((ino_t maxino, long *tapesize, char *disk,
 		    char * const *dirv));

--- main.c.dist	Fri Jan  1 00:11:21 1999
+++ main.c	Fri Jan  1 23:37:08 1999
@@ -408,11 +408,17 @@
 	usedinomap = (char *)calloc((unsigned) mapsize, sizeof(char));
 	dumpdirmap = (char *)calloc((unsigned) mapsize, sizeof(char));
 	dumpinomap = (char *)calloc((unsigned) mapsize, sizeof(char));
+	dumpablemap = (char *)calloc((unsigned) mapsize, sizeof(char));
 	tapesize = 3 * (howmany(mapsize * sizeof(char), TP_BSIZE) + 1);
 
 	nonodump = iswap32(spcl.c_level) < honorlevel;
 
 	(void)signal(SIGINFO, statussig);
+
+	msg("propogating nodump flags (Pass 0)\n");
+	/* XXX  Currently, even if we are dumping only a subset,
+	 * setupdumpablemap() looks at the entire FS.  */
+	setupdumpablemap(maxino, toplevel);
 
 	msg("mapping (Pass I) [regular files]\n");
 	anydirskipped = mapfiles(maxino, &tapesize, toplevel,

--- traverse.c.dist	Fri Jan  1 00:11:08 1999
+++ traverse.c	Fri Jan  1 23:08:54 1999
@@ -133,10 +133,195 @@
 #define	WANTTODUMP(dp) \
 	(CHANGEDSINCE(dp, iswap32(spcl.c_ddate)) && \
 	 (nonodump || ((dp)->di_flags & UF_NODUMP) != UF_NODUMP))
+#define NODUMP_SET(dp) \
+	(!nonodump && (((dp)->di_flags & UF_NODUMP) == UF_NODUMP))
 #else
 #define	WANTTODUMP(dp) CHANGEDSINCE(dp, iswap32(spcl.c_ddate))
+#define NODUMP_SET(dp) 0
 #endif
 
+/* Macros to tell if an inode/direct block/indirect block is unused or not. */
+#define FREE_INODE(dp) (((dp)->di_mode & IFMT) == 0)
+#define VALID_DIRECT_BLK(dp,i)		(dp->di_db[i] != 0)
+#define VALID_INDIRECT_BLK(dp,i)	(dp->di_ib[i] != 0)
+
+int
+walkdirblock(blkno, func, size)
+	daddr_t blkno;
+	void (*func)(ino_t);
+	long size;
+{
+	char dblk[MAXBSIZE];
+	long loc, blksize = sblock->fs_bsize;
+
+	/* Trim size down to this block's size, if needed. */
+	if (size > blksize) size = blksize;
+	bread(fsbtodb(sblock, blkno), dblk, (int)blksize);
+	for (loc=0; loc<size; ) {
+		struct direct *dp = (struct direct *)(dblk + loc);
+		if (dp->d_reclen == 0) {
+			msg("corrupted directory, block number %d, for ino %d\n", blkno, dp->d_ino);
+			break;
+		}
+		loc += iswap16(dp->d_reclen);
+		/* Skip free entries, . and .. */
+		if (dp->d_ino == 0) continue;
+		if (dp->d_name[0] == '.') {
+			if (dp->d_name[1] == '\0') continue;
+			if (dp->d_name[1] == '.' && dp->d_name[2] == '\0')
+				continue;
+		}
+		/* Apply function. */
+		(*func) (dp->d_ino);
+	}
+	/* Return amount that has been handled from this dir's stuff. */
+	return size;
+}
+
+void
+walkindirblock(blkno, func, ind_level, size)
+	daddr_t blkno;
+	void (*func)(ino_t);
+	int ind_level;
+	long *size;
+{
+	daddr_t indirblk[MAXNINDIR];
+	long i;
+
+	bread(fsbtodb(sblock, blkno), (char*)indirblk, (int)sblock->fs_bsize);
+	if (ind_level <= 0) {
+		for (i=0; *size>0 && i<NINDIR(sblock); i++) {
+			blkno = indirblk[i];
+			if (blkno != 0)
+				*size -= walkdirblock(blkno, func, *size);
+		}
+		return;
+	}
+	ind_level--;
+	for (i=0; *size>0 && i<NINDIR(sblock); i++) {
+		blkno = indirblk[i];
+		if (blkno != 0)
+			walkindirblock(blkno, func, ind_level, size);
+	}
+	return;
+}
+
+/*
+ * A utility function that takes a directory inode 
+ * and applies the given function to the inode of each
+ * file/dir in the directory, including indirect blocks.
+ */
+void
+walkdir (ino, func)
+	ino_t ino;
+	void (*func)(ino_t);
+{
+	struct dinode *dp;
+	long filesize, loc, size, i;
+	/* For every file in this directory, apply func, passing the
+	 * inode number. */
+	dp = getino(ino);
+	size = sblock->fs_bsize;
+	filesize = dp->di_size;
+	/* First, check all direct blocks in the dir. */
+	for (i=0; filesize > 0 && i<NDADDR; i++) {
+		daddr_t blkno;
+		/* Need to make sure we still have the block, as the
+		 * recursive calls may have called getino(). */
+		dp = getino(ino);
+		blkno = iswap32(dp->di_db[i]);
+		if (filesize < size)
+			size = filesize;
+		if (VALID_DIRECT_BLK(dp,i))
+			filesize -= walkdirblock(blkno, func, size);
+		else
+			filesize -= size;
+	}
+	/* Now check indirect blocks. */
+	for (i=0; filesize>0 && i<NIADDR; i++) {
+		/* Need to make sure we still have the block, as the
+		 * recursive calls may have called getino(). */
+		dp = getino(ino);
+		if (VALID_INDIRECT_BLK(dp,i))
+			walkindirblock(dp->di_ib[i], func, i, &filesize);
+		else
+			filesize -= size;
+	}
+}
+
+
+/*
+ * Utility function to recursively clear the dumpable bit for an FS.
+ */
+void
+cleardumpable(ino)
+	ino_t ino;
+{
+	int i;
+	long filesize, loc, size;
+	struct dinode *dp;
+
+	/* Short-circuit. */
+	if (! TSTINO(ino, dumpablemap)) return;
+
+	/* Clear and recur. */
+	CLRINO(ino, dumpablemap);
+#ifdef DEBUG
+	msg("Clearing %d\n", ino);
+#endif
+	dp = getino(ino);
+	/* Only recur for directories. */
+	if ((dp->di_mode & IFMT) != IFDIR) {
+		return;
+	}
+	walkdir(ino, cleardumpable);
+}
+
+/*
+ * Start off with every file dumpable, but then propogate
+ * the effect of nodump for directories.
+ */
+void
+setupdumpablemap(ino, toplevel)
+	ino_t ino;
+	char * const toplevel;
+{
+	int i;
+	FTS	*dirh;
+	FTSENT	*entry;
+	char	*fsdirv[] = {toplevel, NULL};
+	unsigned int	done = 0;
+
+	/* First of all, start with all files dumpable. */
+	for (i=1; i<ino; i++)
+		SETINO(i, dumpablemap);
+
+	/* If nonodump is set (i.e., we are not honoring the nodump
+	 * flag), then bail out. */
+	if (nonodump) return;
+
+	/* For the subset-case, we could simply walk the subset trees,
+	 * and then check the parent directories for nodump flag, but
+	 * the logic there gets a bit tricky.  So for both cases, do a
+	 * full propogation of the nodump bit for the entire FS. */
+
+	/*
+	 * Starting from the root inode, propogate any
+	 * directory-based nodump flags.
+	 */
+	for (i=1; i<ino; i++) {
+		struct dinode *dp;
+		/* Short circuit if we've already cleared the bit. */
+		if (!TSTINO(i, dumpablemap)) continue;
+		dp = getino(i);
+		/* Also short-circuit if the inode is unused. */
+		if (FREE_INODE(dp)) continue;
+		if (NODUMP_SET(dp)) {
+			cleardumpable(i);
+		}
+	}
+}
+
 /*
  * Determine if given inode should be dumped
  */
@@ -160,7 +345,7 @@
 	SETINO(ino, usedinomap);
 	if (mode == IFDIR)
 		SETINO(ino, dumpdirmap);
-	if (WANTTODUMP(dp)) {
+	if (WANTTODUMP(dp) && TSTINO(ino, dumpablemap)) {
 		SETINO(ino, dumpinomap);
 		if (mode != IFREG && mode != IFDIR && mode != IFLNK)
 			*tapesize += 1;
@@ -305,7 +490,7 @@
 		dp = getino(ino);
 		filesize = dp->di_size;
 		for (ret = 0, i = 0; filesize > 0 && i < NDADDR; i++) {
-			if (dp->di_db[i] != 0)
+			if (VALID_DIRECT_BLK(dp,i))
 				ret |= searchdir(ino, iswap32(dp->di_db[i]),
 					(long)dblksize(sblock, dp, i),
 					filesize);
>Audit-Trail:
>Unformatted: