Subject: mountroot opens the disk device too many times
To: None <tech-kern@netbsd.org>
From: Charles M. Hannum <abuse@spamalicious.com>
List: tech-kern
Date: 08/13/2004 01:41:55
--Boundary-00=_jxBHBauYs3LzYPi
Content-Type: text/plain;
  charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

When booting a system that uses CompactFlash or some other removable media for 
the root file system, it is often irritating that the mountroot path causes 
the device to be opened once for each file system.  For a GENERIC hpcmips 
kernel, for example, it does 3 probes before it identifies the root file 
system as FFS.

There is, unfortunately, no trivial fix for this.  There is no reference 
counting done anywhere in the path used to open and close the root device.  
(This is probably a bug, but I'm not going there right now.)  It seems that 
the only way to fix it is to hoist the VOP_OPEN() out of the foo_mountfs() 
routines and into one central place -- namely vfs_mountroot().

I've done a "rigged demo" version of this, and it does cut the boot time on my 
WorkPad considerable by removing 2 more power cycles of the CF card.  I 
enclose a patch below; it is obviously not finished, though, as the code that 
is currently "if"ed should be moved into the foo_mount() routines.  (The 
"already in use" checks should probably also be abstracted into a generic 
vfs_foo() function as well.)

--Boundary-00=_jxBHBauYs3LzYPi
Content-Type: text/x-diff;
  charset="us-ascii";
  name="mountroot.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
	filename="mountroot.diff"

Index: ./fs/cd9660/cd9660_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/fs/cd9660/cd9660_vfsops.c,v
retrieving revision 1.16
diff -u -r1.16 cd9660_vfsops.c
--- ./fs/cd9660/cd9660_vfsops.c	5 Jul 2004 07:28:45 -0000	1.16
+++ ./fs/cd9660/cd9660_vfsops.c	13 Aug 2004 01:32:18 -0000
@@ -131,12 +131,6 @@
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 	
-	/*
-	 * Get vnodes for swapdev and rootdev.
-	 */
-	if (bdevvp(rootdev, &rootvp))
-		panic("cd9660_mountroot: can't setup rootvp");
-
 	if ((error = vfs_rootmountalloc(MOUNT_CD9660, "root_device", &mp))
 			!= 0) {
 		vrele(rootvp);
@@ -148,13 +142,12 @@
 		mp->mnt_op->vfs_refcount--;
 		vfs_unbusy(mp);
 		free(mp, M_MOUNT);
-		vrele(rootvp);
 		return (error);
 	}
+	(void)cd9660_statvfs(mp, &mp->mnt_stat, p);
 	simple_lock(&mountlist_slock);
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	simple_unlock(&mountlist_slock);
-	(void)cd9660_statvfs(mp, &mp->mnt_stat, p);
 	vfs_unbusy(mp);
 	return (0);
 }
@@ -318,6 +311,7 @@
 	if (!ronly)
 		return EROFS;
 	
+if ((mp->mnt_flag & MNT_ROOTFS) == 0) {
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
@@ -328,13 +322,15 @@
 		return error;
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return EBUSY;
-	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
-		return (error);
 
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return error;
 	needclose = 1;
+}
+
+	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
+		goto out;
 	
 	/* This is the "logical sector size".  The standard says this
 	 * should be 2048 or the physical sector size on the device,
@@ -507,11 +503,13 @@
 		brelse(pribp);
 	if (supbp)
 		brelse(supbp);
+if ((mp->mnt_flag & MNT_ROOTFS) == 0) {
 	if (needclose) {
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
 		VOP_UNLOCK(devvp, 0);
 	}
+}
 	if (isomp) {
 		free(isomp, M_ISOFSMNT);
 		mp->mnt_data = NULL;
Index: ./fs/msdosfs/msdosfs_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/fs/msdosfs/msdosfs_vfsops.c,v
retrieving revision 1.19
diff -u -r1.19 msdosfs_vfsops.c
--- ./fs/msdosfs/msdosfs_vfsops.c	27 Jun 2004 06:55:12 -0000	1.19
+++ ./fs/msdosfs/msdosfs_vfsops.c	13 Aug 2004 01:32:18 -0000
@@ -190,12 +190,6 @@
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 
-	/*
-	 * Get vnodes for swapdev and rootdev.
-	 */
-	if (bdevvp(rootdev, &rootvp))
-		panic("msdosfs_mountroot: can't setup rootvp");
-
 	if ((error = vfs_rootmountalloc(MOUNT_MSDOS, "root_device", &mp))) {
 		vrele(rootvp);
 		return (error);
@@ -212,7 +206,6 @@
 		mp->mnt_op->vfs_refcount--;
 		vfs_unbusy(mp);
 		free(mp, M_MOUNT);
-		vrele(rootvp);
 		return (error);
 	}
 
@@ -224,10 +217,10 @@
 		return (error);
 	}
 
+	(void)msdosfs_statvfs(mp, &mp->mnt_stat, p);
 	simple_lock(&mountlist_slock);
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	simple_unlock(&mountlist_slock);
-	(void)msdosfs_statvfs(mp, &mp->mnt_stat, p);
 	vfs_unbusy(mp);
 	return (0);
 }
@@ -416,6 +409,12 @@
 	int	bsize = 0, dtype = 0, tmp;
 	u_long	dirsperblk;
 
+	bp  = NULL; /* both used in error_exit */
+	pmp = NULL;
+
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+
+if ((mp->mnt_flag & MNT_ROOTFS) == 0) {
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
@@ -426,16 +425,14 @@
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
-	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
-		return (error);
 
-	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
+}
 
-	bp  = NULL; /* both used in error_exit */
-	pmp = NULL;
+	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
+		goto error_exit;
 
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		/*
@@ -762,9 +759,11 @@
 error_exit:;
 	if (bp)
 		brelse(bp);
+if ((mp->mnt_flag & MNT_ROOTFS) == 0) {
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 	(void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
 	VOP_UNLOCK(devvp, 0);
+}
 	if (pmp) {
 		if (pmp->pm_inusemap)
 			free(pmp->pm_inusemap, M_MSDOSFSFAT);
Index: ./fs/ntfs/ntfs_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/fs/ntfs/ntfs_vfsops.c,v
retrieving revision 1.23
diff -u -r1.23 ntfs_vfsops.c
--- ./fs/ntfs/ntfs_vfsops.c	30 May 2004 20:46:25 -0000	1.23
+++ ./fs/ntfs/ntfs_vfsops.c	13 Aug 2004 01:32:18 -0000
@@ -174,12 +174,6 @@
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 
-	/*
-	 * Get vnodes for rootdev.
-	 */
-	if (bdevvp(rootdev, &rootvp))
-		panic("ntfs_mountroot: can't setup rootvp");
-
 	if ((error = vfs_rootmountalloc(MOUNT_NTFS, "root_device", &mp))) {
 		vrele(rootvp);
 		return (error);
@@ -194,14 +188,13 @@
 		mp->mnt_op->vfs_refcount--;
 		vfs_unbusy(mp);
 		free(mp, M_MOUNT);
-		vrele(rootvp);
 		return (error);
 	}
 
+	(void)ntfs_statvfs(mp, &mp->mnt_stat, p);
 	simple_lock(&mountlist_slock);
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	simple_unlock(&mountlist_slock);
-	(void)ntfs_statvfs(mp, &mp->mnt_stat, p);
 	vfs_unbusy(mp);
 	return (0);
 }
Index: ./kern/init_main.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_main.c,v
retrieving revision 1.239
diff -u -r1.239 init_main.c
--- ./kern/init_main.c	5 Jul 2004 07:28:45 -0000	1.239
+++ ./kern/init_main.c	13 Aug 2004 01:32:18 -0000
@@ -537,8 +537,9 @@
 	 * Get the vnode for '/'.  Set filedesc0.fd_fd.fd_cdir to
 	 * reference it.
 	 */
-	if (VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode))
-		panic("cannot find root vnode");
+	error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode);
+	if (error)
+		panic("cannot find root vnode, error=%d", error);
 	cwdi0.cwdi_cdir = rootvnode;
 	VREF(cwdi0.cwdi_cdir);
 	VOP_UNLOCK(rootvnode, 0);
Index: ./kern/vfs_subr.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v
retrieving revision 1.230
diff -u -r1.230 vfs_subr.c
--- ./kern/vfs_subr.c	1 Jul 2004 10:03:29 -0000	1.230
+++ ./kern/vfs_subr.c	13 Aug 2004 01:32:19 -0000
@@ -381,7 +381,7 @@
 	(void)vfs_busy(mp, LK_NOWAIT, 0);
 	LIST_INIT(&mp->mnt_vnodelist);
 	mp->mnt_op = vfsp;
-	mp->mnt_flag = MNT_RDONLY;
+	mp->mnt_flag = MNT_RDONLY|MNT_ROOTFS;
 	mp->mnt_vnodecovered = NULLVP;
 	mp->mnt_leaf = mp;
 	vfsp->vfs_refcount++;
@@ -2739,6 +2739,7 @@
 vfs_mountroot()
 {
 	struct vfsops *v;
+	int error = ENODEV;
 
 	if (root_device == NULL)
 		panic("vfs_mountroot: root device unknown");
@@ -2754,6 +2755,13 @@
 	case DV_DISK:
 		if (rootdev == NODEV)
 			panic("vfs_mountroot: rootdev not set for DV_DISK");
+	        if (bdevvp(rootdev, &rootvp))
+	                panic("vfs_mountroot: can't get vnode for rootdev");
+		error = VOP_OPEN(rootvp, FREAD, FSCRED, curproc);
+		if (error) {
+			printf("vfs_mountroot: can't open root device\n");
+			return (error);
+		}
 		break;
 
 	default:
@@ -2765,8 +2773,10 @@
 	/*
 	 * If user specified a file system, use it.
 	 */
-	if (mountroot != NULL)
-		return ((*mountroot)());
+	if (mountroot != NULL) {
+		error = (*mountroot)();
+		goto done;
+	}
 
 	/*
 	 * Try each file system currently configured into the kernel.
@@ -2777,7 +2787,8 @@
 #ifdef DEBUG
 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
 #endif
-		if ((*v->vfs_mountroot)() == 0) {
+		error = (*v->vfs_mountroot)();
+		if (!error) {
 			aprint_normal("root file system type: %s\n",
 			    v->vfs_name);
 			break;
@@ -2789,9 +2800,15 @@
 		if (root_device->dv_class == DV_DISK)
 			printf(" (dev 0x%x)", rootdev);
 		printf("\n");
-		return (EFTYPE);
+		error = EFTYPE;
 	}
-	return (0);
+
+done:
+	if (error && root_device->dv_class == DV_DISK) {
+		VOP_CLOSE(rootvp, FREAD, FSCRED, curproc);
+		vrele(rootvp);
+	}
+	return (error);
 }
 
 /*
Index: ./ufs/ext2fs/ext2fs_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ext2fs/ext2fs_vfsops.c,v
retrieving revision 1.73
diff -u -r1.73 ext2fs_vfsops.c
--- ./ufs/ext2fs/ext2fs_vfsops.c	5 Jul 2004 07:28:45 -0000	1.73
+++ ./ufs/ext2fs/ext2fs_vfsops.c	13 Aug 2004 01:32:19 -0000
@@ -205,12 +205,6 @@
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 	
-	/*
-	 * Get vnodes for rootdev.
-	 */
-	if (bdevvp(rootdev, &rootvp))
-		panic("ext2fs_mountroot: can't setup bdevvp's");
-
 	if ((error = vfs_rootmountalloc(MOUNT_EXT2FS, "root_device", &mp))) {
 		vrele(rootvp);
 		return (error);
@@ -220,12 +214,8 @@
 		mp->mnt_op->vfs_refcount--;
 		vfs_unbusy(mp);
 		free(mp, M_MOUNT);
-		vrele(rootvp);
 		return (error);
 	}
-	simple_lock(&mountlist_slock);
-	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
-	simple_unlock(&mountlist_slock);
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	memset(fs->e2fs_fsmnt, 0, sizeof(fs->e2fs_fsmnt));
@@ -237,6 +227,9 @@
 		    sizeof(fs->e2fs.e2fs_fsmnt) - 1, 0);
 	}
 	(void)ext2fs_statvfs(mp, &mp->mnt_stat, p);
+	simple_lock(&mountlist_slock);
+	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	simple_unlock(&mountlist_slock);
 	vfs_unbusy(mp);
 	setrootfstime((time_t)fs->e2fs.e2fs_wtime);
 	return (0);
Index: ./ufs/ffs/ffs_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.151
diff -u -r1.151 ffs_vfsops.c
--- ./ufs/ffs/ffs_vfsops.c	5 Jul 2004 07:28:46 -0000	1.151
+++ ./ufs/ffs/ffs_vfsops.c	13 Aug 2004 01:32:20 -0000
@@ -144,12 +144,6 @@
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 
-	/*
-	 * Get vnodes for rootdev.
-	 */
-	if (bdevvp(rootdev, &rootvp))
-		panic("ffs_mountroot: can't setup bdevvp's");
-
 	if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
 		vrele(rootvp);
 		return (error);
@@ -158,17 +152,16 @@
 		mp->mnt_op->vfs_refcount--;
 		vfs_unbusy(mp);
 		free(mp, M_MOUNT);
-		vrele(rootvp);
 		return (error);
 	}
-	simple_lock(&mountlist_slock);
-	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
-	simple_unlock(&mountlist_slock);
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
 	(void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
 	(void)ffs_statvfs(mp, &mp->mnt_stat, p);
+	simple_lock(&mountlist_slock);
+	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	simple_unlock(&mountlist_slock);
 	vfs_unbusy(mp);
 	setrootfstime((time_t)fs->fs_time);
 	return (0);
@@ -685,8 +678,17 @@
 	struct ucred *cred;
 	u_int32_t sbsize = 8192;	/* keep gcc happy*/
 
+	bp = NULL;
+	ump = NULL;
+	fs = NULL;
+	sblockloc = 0;
+	fstype = 0;
+
 	dev = devvp->v_rdev;
 	cred = p ? p->p_ucred : NOCRED;
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+
+if ((mp->mnt_flag & MNT_ROOTFS) == 0) {
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
@@ -697,27 +699,23 @@
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
+
+	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
+	if (error)
+		return (error);
+}
+
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 	error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0);
 	if (error)
-		return (error);
+		goto out;
 
-	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
-	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
-	if (error)
-		return (error);
 	if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred, p) != 0)
 		size = DEV_BSIZE;
 	else
 		size = dpart.disklab->d_secsize;
 
-	bp = NULL;
-	ump = NULL;
-	fs = NULL;
-	sblockloc = 0;
-	fstype = 0;
-
 	/*
 	 * Try reading the superblock in each of its possible locations.		 */
 	for (i = 0; ; i++) {
@@ -965,9 +963,11 @@
 	devvp->v_specmountpoint = NULL;
 	if (bp)
 		brelse(bp);
+if ((mp->mnt_flag & MNT_ROOTFS) == 0) {
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
 	VOP_UNLOCK(devvp, 0);
+}
 	if (ump) {
 		if (ump->um_oldfscompat)
 			free(ump->um_oldfscompat, M_UFSMNT);
Index: ./ufs/lfs/lfs_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/lfs/lfs_vfsops.c,v
retrieving revision 1.154
diff -u -r1.154 lfs_vfsops.c
--- ./ufs/lfs/lfs_vfsops.c	5 Jul 2004 07:28:46 -0000	1.154
+++ ./ufs/lfs/lfs_vfsops.c	13 Aug 2004 01:32:23 -0000
@@ -307,13 +307,6 @@
 
 	if (rootdev == NODEV)
 		return (ENODEV);
-	/*
-	 * Get vnodes for swapdev and rootdev.
-	 */
-	if ((error = bdevvp(rootdev, &rootvp))) {
-		printf("lfs_mountroot: can't setup bdevvp's");
-		return (error);
-	}
 	if ((error = vfs_rootmountalloc(MOUNT_LFS, "root_device", &mp))) {
 		vrele(rootvp);
 		return (error);
@@ -322,13 +315,12 @@
 		mp->mnt_op->vfs_refcount--;
 		vfs_unbusy(mp);
 		free(mp, M_MOUNT);
-		vrele(rootvp);
 		return (error);
 	}
+	(void)lfs_statvfs(mp, &mp->mnt_stat, p);
 	simple_lock(&mountlist_slock);
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	simple_unlock(&mountlist_slock);
-	(void)lfs_statvfs(mp, &mp->mnt_stat, p);
 	vfs_unbusy(mp);
 	setrootfstime((time_t)(VFSTOUFS(mp)->um_lfs->lfs_tstamp));
 	return (0);
Index: ./ufs/mfs/mfs_vfsops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/mfs/mfs_vfsops.c,v
retrieving revision 1.61
diff -u -r1.61 mfs_vfsops.c
--- ./ufs/mfs/mfs_vfsops.c	5 Jul 2004 07:28:46 -0000	1.61
+++ ./ufs/mfs/mfs_vfsops.c	13 Aug 2004 01:32:23 -0000
@@ -176,14 +176,6 @@
 	struct mfsnode *mfsp;
 	int error = 0;
 
-	/*
-	 * Get vnodes for rootdev.
-	 */
-	if (bdevvp(rootdev, &rootvp)) {
-		printf("mfs_mountroot: can't setup bdevvp's");
-		return (error);
-	}
-
 	if ((error = vfs_rootmountalloc(MOUNT_MFS, "mfs_root", &mp))) {
 		vrele(rootvp);
 		return (error);
@@ -205,17 +197,16 @@
 		bufq_free(&mfsp->mfs_buflist);
 		free(mp, M_MOUNT);
 		free(mfsp, M_MFSNODE);
-		vrele(rootvp);
 		return (error);
 	}	
-	simple_lock(&mountlist_slock);
-	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
-	simple_unlock(&mountlist_slock);
 	mp->mnt_vnodecovered = NULLVP;
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	(void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
 	(void)ffs_statvfs(mp, &mp->mnt_stat, p);
+	simple_lock(&mountlist_slock);
+	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	simple_unlock(&mountlist_slock);
 	vfs_unbusy(mp);
 	return (0);
 }

--Boundary-00=_jxBHBauYs3LzYPi--