Subject: patches to make fsck only do unclean disks.
To: None <gnats-bugs@netbsd.org, current-users@netbsd.org>
From: John Kohl <jtk@kolvir.blrc.ma.us>
List: current-users
Date: 09/15/1994 00:35:06
>Submitter-Id:	net
>Originator:	John Kohl
>Organization:
>Confidential:	no
>Synopsis:	fsck doesn't skip cleanly unmounted disks
>Severity:	non-critical
>Priority:	low
>Category:	kern
>Class:		change-request
>Release:	1.0_BETA
>Environment:
NetBSD 1.0_BETA (Sep 15th sup)
i486/33, 16MB main memory, SCSI and IDE disk drives
System: NetBSD kolvir 1.0_BETA NetBSD 1.0_BETA (KOLVIR) #15: Thu Sep 15 00:16:07 EDT 1994 jtk@kolvir:/u1/NetBSD-1.0/src/sys/arch/i386/compile/KOLVIR i386

>Description:
	fsck doesn't know how to skip disks that were dismounted
cleanly, and the kernel doesn't try to unmount disks at reboot time.
>How-To-Repeat:
	reboot your machine, wait forever for an FSCK to finish.
>Fix:

Apply these patches.  I agree the fs_clean_gen stuff is a bit of a
kludge--the idea is that every 256 mounts without an FSCK will make it
"dirty", just in case something slow is going on.  Feel free to trash
the generational stuff if you don't like that (I do, and it was much
less tricky than I thought it would be).

The UPDATE_SBONLY flag maybe should have gone into <sys/mount.h>, but I
did that part of the code before I decided I needed to open up
<sys/mount.h> anyway (to insert a vfs_unmountall() prototype).

Oh, and I just noticed the lack of error checking when calling
ffs_sbupdate() at umount time.  Not much productive can be done there, I
suspect.

Ports other than i386 will need to hack their own machdep.c files to put
the vfs_unmountall() calls back in.

===================================================================
RCS file: sys/ufs/ffs/RCS/ffs_vfsops.c,v
retrieving revision 1.1
diff -c -r1.1 sys/ufs/ffs/ffs_vfsops.c
*** 1.1	1994/09/15 00:33:26
--- sys/ufs/ffs/ffs_vfsops.c	1994/09/15 04:00:27
***************
*** 63,68 ****
--- 63,71 ----
  
  int ffs_sbupdate __P((struct ufsmount *, int));
  
+ #define UPDATE_SBONLY	-1		/* see also mount.h's MNT_WAIT and
+ 					   MNT_NOWAIT flags */
+ 
  struct vfsops ufs_vfsops = {
  	MOUNT_UFS,
  	ffs_mount,
***************
*** 177,184 ****
  			error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
  		if (error)
  			return (error);
! 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR))
  			fs->fs_ronly = 0;
  		if (args.fspec == 0) {
  			/*
  			 * Process export requests.
--- 180,191 ----
  			error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
  		if (error)
  			return (error);
! 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
  			fs->fs_ronly = 0;
+ 			fs->fs_clean_gen = ++fs->fs_clean; /* save for writing
+ 							      at umount time */
+ 			fs->fs_clean = 0;
+ 		}
  		if (args.fspec == 0) {
  			/*
  			 * Process export requests.
***************
*** 225,230 ****
--- 232,240 ----
  	    &size);
  	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
  	(void)ffs_statfs(mp, &mp->mnt_stat, p);
+ 	if (fs->fs_ronly == 0)
+ 	    /* push out clean/dirty marker */
+ 	    (void) ffs_sbupdate(ump, UPDATE_SBONLY);
  	return (0);
  }
  
***************
*** 405,412 ****
  	bp = NULL;
  	fs = ump->um_fs;
  	fs->fs_ronly = ronly;
! 	if (ronly == 0)
  		fs->fs_fmod = 1;
  	blks = howmany(fs->fs_cssize, fs->fs_fsize);
  	base = space = malloc((u_long)fs->fs_cssize, M_UFSMNT,
  	    M_WAITOK);
--- 415,434 ----
  	bp = NULL;
  	fs = ump->um_fs;
  	fs->fs_ronly = ronly;
! 	if (fs->fs_clean == 0) {
! 	    /* XXX we'd like to mention the device or filesystem
! 	       that was mounted dirty, but we don't have a pathname
! 	       and we can't use VOP_GETATTR() since mfs_getattr is really
! 	       mfs_badop. */
! 	    if (devvp != rootvp)
! 		printf("warning: mounting dirty filesystem\n");
! 	}
! 	if (ronly == 0) {
  		fs->fs_fmod = 1;
+ 		fs->fs_clean_gen = ++fs->fs_clean; /* save for writing
+ 						      at umount time */
+ 		fs->fs_clean = 0;
+ 	}
  	blks = howmany(fs->fs_cssize, fs->fs_fsize);
  	base = space = malloc((u_long)fs->fs_cssize, M_UFSMNT,
  	    M_WAITOK);
***************
*** 506,511 ****
--- 528,542 ----
  	ump = VFSTOUFS(mp);
  	fs = ump->um_fs;
  	ronly = !fs->fs_ronly;
+ 	/* ffs_sbupdate writes out some other cg stuff *after* the
+ 	 * superblock.  So we write once, to force it all to disk (with
+ 	 * fs_clean left at zero as it was at mount time),
+ 	 * set the clean bit, and then write just the superblock.
+ 	 *
+ 	 */
+ 	error = ffs_sbupdate(ump, MNT_WAIT);
+ 	fs->fs_clean = fs->fs_clean_gen;
+ 	error = ffs_sbupdate(ump, UPDATE_SBONLY);
  	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
  	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
  		NOCRED, p);
***************
*** 831,837 ****
  	register struct buf *bp;
  	int blks;
  	caddr_t space;
! 	int i, size, error = 0;
  
  	bp = getblk(mp->um_devvp, SBOFF >> (fs->fs_fshift - fs->fs_fsbtodb),
  	    (int)fs->fs_sbsize, 0, 0);
--- 862,868 ----
  	register struct buf *bp;
  	int blks;
  	caddr_t space;
! 	int i, size, error = 0, allerror = 0;
  
  	bp = getblk(mp->um_devvp, SBOFF >> (fs->fs_fshift - fs->fs_fsbtodb),
  	    (int)fs->fs_sbsize, 0, 0);
***************
*** 848,857 ****
  			lp[i] = lp[i-1];			/* XXX */
  		lp[0] = tmp;					/* XXX */
  	}							/* XXX */
! 	if (waitfor == MNT_WAIT)
! 		error = bwrite(bp);
  	else
  		bawrite(bp);
  	blks = howmany(fs->fs_cssize, fs->fs_fsize);
  	space = (caddr_t)fs->fs_csp[0];
  	for (i = 0; i < blks; i += fs->fs_frag) {
--- 879,891 ----
  			lp[i] = lp[i-1];			/* XXX */
  		lp[0] = tmp;					/* XXX */
  	}							/* XXX */
! 	if (waitfor == MNT_WAIT || waitfor == UPDATE_SBONLY)
! 		allerror = bwrite(bp);
  	else
  		bawrite(bp);
+ 	if (waitfor == UPDATE_SBONLY)
+ 	    return allerror;
+ 
  	blks = howmany(fs->fs_cssize, fs->fs_fsize);
  	space = (caddr_t)fs->fs_csp[0];
  	for (i = 0; i < blks; i += fs->fs_frag) {
***************
*** 867,871 ****
  		else
  			bawrite(bp);
  	}
! 	return (error);
  }
--- 901,907 ----
  		else
  			bawrite(bp);
  	}
! 	if (!allerror && error)
! 	    allerror = error;
! 	return (allerror);
  }
===================================================================
RCS file: sys/arch/i386/i386/RCS/machdep.c,v
retrieving revision 1.1
diff -c -r1.1 sys/arch/i386/i386/machdep.c
*** 1.1	1994/09/15 03:03:44
--- sys/arch/i386/i386/machdep.c	1994/09/15 03:58:09
***************
*** 675,687 ****
  		if (panicstr == 0)
  			vnode_pager_umount(NULL);
  		sync(&proc0, (void *)0, (int *)0);
- #if 0
  		/*
  		 * Unmount filesystems
  		 */
  		if (panicstr == 0)
  			vfs_unmountall();
- #endif
  		for (iter = 0; iter < 20; iter++) {
  			nbusy = 0;
  			for (bp = &buf[nbuf]; --bp >= buf; )
--- 675,685 ----
===================================================================
RCS file: sys/kern/RCS/vfs_syscalls.c,v
retrieving revision 1.1
diff -c -r1.1 sys/kern/vfs_syscalls.c
*** 1.1	1994/09/15 03:21:12
--- sys/kern/vfs_syscalls.c	1994/09/15 03:48:51
***************
*** 198,208 ****
  		return (error);
  	}
  	/*
! 	 * Put the new filesystem on the mount list after root.
  	 */
  	cache_purge(vp);
  	if (!error) {
! 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
  		VOP_UNLOCK(vp);
  		vfs_unlock(mp);
  		error = VFS_START(mp, 0, p);
--- 198,211 ----
  		return (error);
  	}
  	/*
! 	 * Put the new filesystem on the mount list just after root.
! 	 * this is convenient so we can umountall() in inverse order.
! 	 * (more likely to suceeed that way!)
  	 */
  	cache_purge(vp);
  	if (!error) {
! 		TAILQ_INSERT_AFTER(&mountlist, mountlist.tqh_first, mp,
! 				   mnt_list);
  		VOP_UNLOCK(vp);
  		vfs_unlock(mp);
  		error = VFS_START(mp, 0, p);
===================================================================
RCS file: sys/kern/RCS/vfs_subr.c,v
retrieving revision 1.1
diff -c -r1.1 sys/kern/vfs_subr.c
*** 1.1	1994/09/15 03:21:29
--- sys/kern/vfs_subr.c	1994/09/15 03:37:37
***************
*** 1421,1423 ****
--- 1421,1443 ----
  	}
  	return (np);
  }
+ 
+ void
+ vfs_unmountall()
+ {
+     /* 
+      * run down the mount list, and unmount all the file systems.
+      *
+      * XXX should we just do ones with M_LOCAL set? can non-local things hang,
+      * because they might require some user process assistance?
+      * (e.g. amd)
+      */
+     register struct mount *mp, *nmp;
+ 
+     for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ 	nmp = mp->mnt_list.tqe_next;
+ 	if (mp->mnt_flag & MNT_ROOTFS)
+ 	    continue;			/* don't bother, we won't succeed. */
+ 	(void) dounmount(mp, 0, curproc); /* just try it. */
+     }
+ }
===================================================================
RCS file: sys/ufs/ffs/RCS/fs.h,v
retrieving revision 1.1
diff -c -r1.1 sys/ufs/ffs/fs.h
*** 1.1	1994/09/15 01:05:56
--- sys/ufs/ffs/fs.h	1994/09/15 01:06:34
***************
*** 214,220 ****
  	char   	fs_fmod;    		/* super block modified flag */
  	char   	fs_clean;    		/* file system is clean flag */
  	char   	fs_ronly;   		/* mounted read-only flag */
! 	char   	fs_flags;   		/* currently unused flag */
  	char	fs_fsmnt[MAXMNTLEN];	/* name mounted on */
  /* these fields retain the current block allocation info */
  	long	fs_cgrotor;		/* last cg searched */
--- 214,220 ----
  	char   	fs_fmod;    		/* super block modified flag */
  	char   	fs_clean;    		/* file system is clean flag */
  	char   	fs_ronly;   		/* mounted read-only flag */
! 	char   	fs_clean_gen;		/* in-core copy of clean byte for generational checking */
  	char	fs_fsmnt[MAXMNTLEN];	/* name mounted on */
  /* these fields retain the current block allocation info */
  	long	fs_cgrotor;		/* last cg searched */
===================================================================
RCS file: sys/sys/RCS/mount.h,v
retrieving revision 1.1
diff -c -r1.1 sys/sys/mount.h
*** 1.1	1994/09/15 03:04:15
--- sys/sys/mount.h	1994/09/15 03:05:00
***************
*** 397,402 ****
--- 397,403 ----
  int	vfs_lock __P((struct mount *));	    /* lock a vfs */
  int	vfs_mountedon __P((struct vnode *));/* is a vfs mounted on vp */
  void	vfs_unlock __P((struct mount *));   /* unlock a vfs */
+ void	vfs_unmountall __P((void));
  extern	TAILQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
  extern	struct vfsops *vfssw[];		    /* filesystem type table */
  extern	int nvfssw;
===================================================================
RCS file: sbin/fsck/RCS/main.c,v
retrieving revision 1.13
diff -c -r1.13 sbin/fsck/main.c
*** 1.13	1994/06/08 19:00:24
--- sbin/fsck/main.c	1994/09/15 02:53:17
***************
*** 68,85 ****
  	extern int optind;
  
  	sync();
  	while ((ch = getopt(argc, argv, "dpnNyYb:c:l:m:")) != EOF) {
  		switch (ch) {
  		case 'p':
  			preen++;
  			break;
- 
  		case 'b':
  			bflag = argtoi('b', "number", optarg, 10);
  			printf("Alternate super block location: %d\n", bflag);
  			break;
  
  		case 'c':
  			cvtlevel = argtoi('c', "conversion level", optarg, 10);
  			break;
  		
--- 68,87 ----
  	extern int optind;
  
  	sync();
+ 	docleancheck = TRUE;
  	while ((ch = getopt(argc, argv, "dpnNyYb:c:l:m:")) != EOF) {
  		switch (ch) {
  		case 'p':
  			preen++;
  			break;
  		case 'b':
  			bflag = argtoi('b', "number", optarg, 10);
+ 			docleancheck = FALSE;
  			printf("Alternate super block location: %d\n", bflag);
  			break;
  
  		case 'c':
+ 			docleancheck = FALSE;
  			cvtlevel = argtoi('c', "conversion level", optarg, 10);
  			break;
  		
***************
*** 172,187 ****
  	struct dups *dp;
  	struct zlncnt *zlnp;
  	int cylno;
  
  	if (preen && child)
  		(void)signal(SIGQUIT, voidquit);
  	cdevname = filesys;
  	if (debug && preen)
  		pwarn("starting\n");
! 	if (setup(filesys) == 0) {
! 		if (preen)
! 			pfatal("CAN'T CHECK FILE SYSTEM.");
! 		return (0);
  	}
  	/*
  	 * 1: scan inodes tallying blocks used
--- 174,198 ----
  	struct dups *dp;
  	struct zlncnt *zlnp;
  	int cylno;
+ 	int setupval;
  
  	if (preen && child)
  		(void)signal(SIGQUIT, voidquit);
  	cdevname = filesys;
  	if (debug && preen)
  		pwarn("starting\n");
! 	setupval = setup(filesys);
! 	switch (setupval) {
! 	case 0:
! 	    if (preen)
! 		pfatal("CAN'T CHECK FILE SYSTEM.");
! 	    return (0);
! 	case -1:
! 	    if (!hotroot) {
! 		/* always check the root */
! 		pwarn("File system unmounted cleanly.\n");
! 		return(0);
! 	    }
  	}
  	/*
  	 * 1: scan inodes tallying blocks used
===================================================================
RCS file: sbin/fsck/RCS/utilities.c,v
retrieving revision 1.9
diff -c -r1.9 sbin/fsck/utilities.c
*** 1.9	1994/06/08 19:00:33
--- sbin/fsck/utilities.c	1994/09/15 02:05:44
***************
*** 230,235 ****
--- 230,236 ----
  {
  	register struct bufarea *bp, *nbp;
  	int cnt = 0;
+ 	int modified;
  
  	if (fswritefd < 0) {
  		(void)close(fsreadfd);
***************
*** 254,259 ****
--- 255,270 ----
  	if (bufhead.b_size != cnt)
  		errexit("Panic: lost %d buffers\n", bufhead.b_size - cnt);
  	pbp = pdirbp = (struct bufarea *)0;
+ 	if (sblk.b_markclean == MARK_AS_CLEAN) {
+ 	    /* mark it now as cleaned */
+ 	    modified = fsmodified;
+ 	    sblock.fs_clean = 1;
+ 	    sblk.b_dirty = DIRTIED;
+ 	    flush(fswritefd, &sblk);
+ 	    /* don't claim modified if all we did was mark it clean.
+                Just keep previous state */
+ 	    fsmodified = modified;	
+ 	}
  	if (debug)
  		printf("cache missed %ld of %ld (%d%%)\n", diskreads,
  		    totalreads, (int)(diskreads * 100 / totalreads));
===================================================================
RCS file: sbin/fsck/RCS/setup.c,v
retrieving revision 1.11
diff -c -r1.11 sbin/fsck/setup.c
*** 1.11	1994/06/29 11:01:37
--- sbin/fsck/setup.c	1994/09/15 02:05:38
***************
*** 66,71 ****
--- 66,72 ----
  	off_t sizepb;
  	struct stat statb;
  	struct fs proto;
+ 	int checkclean = docleancheck;
  
  	havesb = 0;
  	fswritefd = -1;
***************
*** 108,113 ****
--- 109,115 ----
  	 * Read in the superblock, looking for alternates if necessary
  	 */
  	if (readsb(1) == 0) {
+ 	    	checkclean = FALSE;  /* something wrong---do the full check */
  		if (bflag || preen || calcsb(dev, fsreadfd, &proto) == 0)
  			return(0);
  		if (reply("LOOK FOR ALTERNATE SUPERBLOCKS") == 0)
***************
*** 128,133 ****
--- 130,144 ----
  			return(0);
  		}
  		pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag);
+ 	}
+ 	if (debug)
+ 	    pwarn("fs_clean is %d\n", sblock.fs_clean);
+ 	if (preen && checkclean && sblock.fs_clean)
+ 	    return -1;
+ 	if (!preen && sblock.fs_clean)
+ 	    pwarn("(This filesystem unmounted cleanly or was previously marked\nclean by fsck.)\n");
+ 	if (!sblock.fs_clean) {
+ 	    sbmarkasclean();
  	}
  	maxfsblock = sblock.fs_size;
  	maxino = sblock.fs_ncg * sblock.fs_ipg;
===================================================================
RCS file: sbin/fsck/RCS/fsck.h,v
retrieving revision 1.6
diff -c -r1.6 sbin/fsck/fsck.h
*** 1.6	1994/06/08 19:00:21
--- sbin/fsck/fsck.h	1994/09/15 02:05:29
***************
*** 68,73 ****
--- 68,74 ----
  		struct	dinode *b_dinode;	/* inode block */
  	} b_un;
  	char	b_dirty;
+ 	char	b_markclean;
  };
  
  #define	B_INUSE 1
***************
*** 80,92 ****
  struct bufarea *pbp;		/* current inode block */
  struct bufarea *getdatablk();
  
! #define	dirty(bp)	(bp)->b_dirty = 1
  #define	initbarea(bp) \
  	(bp)->b_dirty = 0; \
  	(bp)->b_bno = (daddr_t)-1; \
  	(bp)->b_flags = 0;
  
! #define	sbdirty()	sblk.b_dirty = 1
  #define	cgdirty()	cgblk.b_dirty = 1
  #define	sblock		(*sblk.b_un.b_fs)
  #define	cgrp		(*cgblk.b_un.b_cg)
--- 81,99 ----
  struct bufarea *pbp;		/* current inode block */
  struct bufarea *getdatablk();
  
! #define DIRTIED 0x1
! #define MARK_AS_CLEAN 0x2
! 
! #define	dirty(bp)	(bp)->b_dirty = DIRTIED
  #define	initbarea(bp) \
  	(bp)->b_dirty = 0; \
+ 	(bp)->b_markclean = 0; \
  	(bp)->b_bno = (daddr_t)-1; \
  	(bp)->b_flags = 0;
  
! #define	sbdirty()	sblk.b_dirty = DIRTIED
! #define	sbmarkasclean()	sblk.b_markclean = MARK_AS_CLEAN
! 
  #define	cgdirty()	cgblk.b_dirty = 1
  #define	sblock		(*sblk.b_un.b_fs)
  #define	cgrp		(*cgblk.b_un.b_cg)
***************
*** 176,181 ****
--- 183,189 ----
  char	preen;			/* just fix normal inconsistencies */
  char	hotroot;		/* checking root device */
  char	havesb;			/* superblock has been read */
+ char	docleancheck;		/* check clean byte in filesystem */
  int	fsmodified;		/* 1 => write done to file system */
  int	fsreadfd;		/* file descriptor for reading file system */
  int	fswritefd;		/* file descriptor for writing file system */
***************
*** 207,212 ****
--- 215,223 ----
  #define	KEEPON	0x04
  #define	ALTERED	0x08
  #define	FOUND	0x10
+ 
+ #define TRUE	1
+ #define	FALSE	0
  
  time_t time();
  struct dinode *ginode();
===================================================================
RCS file: sbin/dumpfs/RCS/dumpfs.c,v
retrieving revision 1.1
diff -c -r1.1 sbin/dumpfs/dumpfs.c
*** 1.1	1994/09/15 00:35:31
--- sbin/dumpfs/dumpfs.c	1994/09/15 01:34:35
***************
*** 155,162 ****
  	    afs.fs_sbsize, afs.fs_cgsize, afs.fs_cgoffset, afs.fs_cgmask);
  	printf("csaddr\t%d\tcssize\t%d\tshift\t%d\tmask\t0x%08x\n",
  	    afs.fs_csaddr, afs.fs_cssize, afs.fs_csshift, afs.fs_csmask);
! 	printf("cgrotor\t%d\tfmod\t%d\tronly\t%d\n",
! 	    afs.fs_cgrotor, afs.fs_fmod, afs.fs_ronly);
  	if (afs.fs_cpc != 0)
  		printf("blocks available in each of %d rotational positions",
  		     afs.fs_nrpos);
--- 155,162 ----
  	    afs.fs_sbsize, afs.fs_cgsize, afs.fs_cgoffset, afs.fs_cgmask);
  	printf("csaddr\t%d\tcssize\t%d\tshift\t%d\tmask\t0x%08x\n",
  	    afs.fs_csaddr, afs.fs_cssize, afs.fs_csshift, afs.fs_csmask);
! 	printf("cgrotor\t%d\tfmod\t%d\tronly\t%d\tclean\t%d\n",
! 	    afs.fs_cgrotor, afs.fs_fmod, afs.fs_ronly, afs.fs_clean);
  	if (afs.fs_cpc != 0)
  		printf("blocks available in each of %d rotational positions",
  		     afs.fs_nrpos);
===================================================================
RCS file: sbin/newfs/RCS/mkfs.c,v
retrieving revision 1.1
diff -c -r1.1 sbin/newfs/mkfs.c
*** 1.1	1994/09/15 02:33:06
--- sbin/newfs/mkfs.c	1994/09/15 02:35:19
***************
*** 562,567 ****
--- 562,569 ----
  	sblock.fs_cstotal.cs_nifree = 0;
  	sblock.fs_cstotal.cs_nffree = 0;
  	sblock.fs_fmod = 0;
+ 	sblock.fs_clean = 1;
+ 	sblock.fs_clean_gen = 0;
  	sblock.fs_ronly = 0;
  	/*
  	 * Dump out summary information about file system.