Subject: kern/16136: Bogus entry in /etc/fstab can panic kernel
To: None <gnats-bugs@gnats.netbsd.org>
From: Chris Jepeway <jepeway@blasted-heath.com>
List: netbsd-bugs
Date: 03/30/2002 17:16:51
>Number:         16136
>Category:       kern
>Synopsis:       Bogus entry in /etc/fstab can panic kernel
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sat Mar 30 14:17:00 PST 2002
>Closed-Date:
>Last-Modified:
>Originator:     Chris Jepeway
>Release:        NetBSD 1.5Y
>Organization:
	Blasted Heath Consulting, LLC
>Environment:
System: NetBSD pos 1.5Y NetBSD 1.5Y (POS) #9: Mon Mar 11 14:01:26 EST 2002 jepe
way@the-morrigan:/src/sys/arch/i386/compile/POS i386
Architecture: i386
Machine: i386
>Description:
	Suppose that you've got an entry in /etc/fstab for /mnt that's
	entirely bogus: it references a non-existant partition.  If
	you tried a "mount /mnt" you'd get a

	    mount_ffs: /dev/wd1b on /mnt: Device not configured

	Now suppose you were to mount a real FFS partition on /mnt.
	If you then did this:

		touch /mnt/whee
		mount -u -r /mnt

	you'd panic the next time the syncer runs:

		panic: update: rofs mod

	The problem is that ffs_mount() uses some of its args before
	it checks them for validity.  In particular, it changes the
	mount from r/w to r/o before it checks that the inode named
	in its path arg is the same as the device of the existing mount.
	The result is a r/w mount point where its private FFS data thinks
	it's r/o.

	I know that 1.5.2 has this problem, too.

>How-To-Repeat:
	First, we need a bogus entry in /etc/fstab for a mount point.
	If partition sd0a is a real FFS partition, but sd0b isn't even
	on the disk label, make an entry like this:

		/dev/sd0b       /mnt    ffs rw 1 1

	I suppose something like

		/dev/sd0p       /mnt    ffs rw 1 1

	would work if partition b exists on sd0.

	Then, issue these commands:

        #
        # Mount the real parition read-only
        #
        mount -r /dev/sd0a /mnt
        #
        # Using back-to-back commands, re-mount the
        # partition read-write, force a write to the f/s;
        # and remount the partition read-only in a way
        # that uses the bogus /etc/fstab entry
        #
        mount -u -o rw /dev/sd0a /mnt ; touch /mnt/wheee ; \
                mount -u -r /mnt
        #
        # That last mount failed; however, some part of the f/s
        # thinks /mnt is now read-write; others think it's still
        # read-only
        #

        #
        # Do another write
        #
        touch /mnt/bleah
        #
        # This succeeds
        #

        #
        # Wait for the syncer daemon to run and watch the panic
        #


>Fix:
	Here's a patch to verison 1.87 of ffs_vfsops.c.  It's
	largish, but it mostly just adds more argument checking
	and drags the checks that were there to where they occur
	before the args are used.

	As always, I tender this patch as more of an illustration
	of *some* potential fix than as *the* definitive fix.  It's
	been running for weeks on more than one i386, though, so I
	think it's a pretty good illustration.

	The patch is against a private CVS repository forked from
	NetBSD 1.5Y.

Index: ffs_vfsops.c
===================================================================
RCS file: /cvsroot/src/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- ffs_vfsops.c	2001/10/25 21:49:08	1.1
+++ ffs_vfsops.c	2002/02/21 19:04:06	1.2
@@ -183,7 +183,7 @@
 	struct ufsmount *ump = NULL;
 	struct fs *fs;
 	size_t size;
-	int error, flags;
+	int error, flags, update;
 	mode_t accessmode;
 
 	error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
@@ -194,14 +194,115 @@
 	mp->mnt_flag &= ~MNT_SOFTDEP;
 #endif
 
+	update = mp->mnt_flag & MNT_UPDATE;
+
+	/* Check arguments */
+	if (update) {
+		/* Use the extant mount */
+		ump = VFSTOUFS(mp);
+		devvp = ump->um_devvp;
+	} else {
+		/* New mounts must have a filename for the device */
+		if (args.fspec == NULL)
+			return EINVAL;
+
+		/* Check for update-only flags */
+		if (mp->mnt_flag &
+		    (MNT_WANTRDWR |	/*
+					 * Upgrading from read-only to
+					 * read-write can only occur after
+					 * the initial mount
+					 */
+		     MNT_EXRDONLY |
+		     MNT_DEFEXPORTED |
+		     MNT_EXPORTANON |
+		     MNT_EXKERB |	/* Only update mounts are allowed */
+		     MNT_EXNORESPORT |  /* to affect exporting		  */
+		     MNT_EXPUBLIC |
+		     MNT_DELEXPORT))
+		return EINVAL;
+	}
+
+	error = 0;
+
+	if (args.fspec) {
+		/*
+		 * Look up the name and verify that it's sane.
+		 */
+		NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+		if ((error = namei(ndp)) != 0)
+			return (error);
+		devvp = ndp->ni_vp;
+
+		if (!update) {
+			/*
+			 * Be sure this is a valid block device
+			 */
+			if (devvp->v_type != VBLK)
+				error = ENOTBLK;
+			else if (major(devvp->v_rdev) >= nblkdev)
+				error = ENXIO;
+		} else {
+			/*
+			 * Be sure we're still naming the same device
+			 * used for our initial mount
+			 */
+			if (devvp != ump->um_devvp)
+				error = EINVAL;
+			else
+				/*
+				 * The initial mount got a reference on this
+				 * device, so drop the one obtained via
+				 * namei(), above
+				 */
+				vrele(devvp);
+		}
+	}
+
 	/*
-	 * If updating, check whether changing from read-only to
-	 * read/write; if there is no device name, that's all we do.
+	 * If mount by non-root, then verify that user has necessary
+	 * permissions on the device.
 	 */
-	if (mp->mnt_flag & MNT_UPDATE) {
+	if (error == 0 && p->p_ucred->cr_uid != 0) {
+		accessmode = VREAD;
+		if ((!update && (mp->mnt_flag & MNT_RDONLY) == 0) ||
+		     (update && (mp->mnt_flag & MNT_WANTRDWR)))
+			accessmode |= VWRITE;
+		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
+		VOP_UNLOCK(devvp, 0);
+	}
+
+	if (error) {
+		vrele(devvp);
+		return (error);
+	}
+
+	if (!update) {
+		error = ffs_mountfs(devvp, mp, p);
+		if (error) {
+			vrele(devvp);
+			return (error);
+		}
+
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
+		if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
+		    (MNT_SOFTDEP | MNT_ASYNC)) {
+			printf("%s fs uses soft updates, "
+			       "ignoring async mode\n",
+			       fs->fs_fsmnt);
+			mp->mnt_flag &= ~MNT_ASYNC;
+		}
+	} else {
+		/*
+		 * Update the mount
+		 */
+		fs = ump->um_fs;
 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+			/*
+			 * Changing from r/w to r/o
+			 */
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
@@ -264,20 +365,11 @@
 			if (error)
 				return (error);
 		}
+
 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
 			/*
-			 * If upgrade to read-write by non-root, then verify
-			 * that user has necessary permissions on the device.
+			 * Changing from read-only to read/write
 			 */
-			devvp = ump->um_devvp;
-			if (p->p_ucred->cr_uid != 0) {
-				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
-				error = VOP_ACCESS(devvp, VREAD | VWRITE,
-						   p->p_ucred, p);
-				VOP_UNLOCK(devvp, 0);
-				if (error)
-					return (error);
-			}
 			fs->fs_ronly = 0;
 			fs->fs_clean <<= 1;
 			fs->fs_fmod = 1;
@@ -301,63 +393,7 @@
 			mp->mnt_flag &= ~MNT_ASYNC;
 		}
 	}
-	/*
-	 * Not an update, or updating the name: look up the name
-	 * and verify that it refers to a sensible block device.
-	 */
-	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
-	if ((error = namei(ndp)) != 0)
-		return (error);
-	devvp = ndp->ni_vp;
 
-	if (devvp->v_type != VBLK) {
-		vrele(devvp);
-		return (ENOTBLK);
-	}
-	if (major(devvp->v_rdev) >= nblkdev) {
-		vrele(devvp);
-		return (ENXIO);
-	}
-	/*
-	 * If mount by non-root, then verify that user has necessary
-	 * permissions on the device.
-	 */
-	if (p->p_ucred->cr_uid != 0) {
-		accessmode = VREAD;
-		if ((mp->mnt_flag & MNT_RDONLY) == 0)
-			accessmode |= VWRITE;
-		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
-		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
-		VOP_UNLOCK(devvp, 0);
-		if (error) {
-			vrele(devvp);
-			return (error);
-		}
-	}
-	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
-		error = ffs_mountfs(devvp, mp, p);
-		if (!error) {
-			ump = VFSTOUFS(mp);
-			fs = ump->um_fs;
-			if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
-			    (MNT_SOFTDEP | MNT_ASYNC)) {
-				printf("%s fs uses soft updates, "
-				       "ignoring async mode\n",
-				    fs->fs_fsmnt);
-				mp->mnt_flag &= ~MNT_ASYNC;
-			}
-		}
-	}
-	else {
-		if (devvp != ump->um_devvp)
-			error = EINVAL;	/* needs translation */
-		else
-			vrele(devvp);
-	}
-	if (error) {
-		vrele(devvp);
-		return (error);
-	}
 	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
 	memset(fs->fs_fsmnt + size, 0, sizeof(fs->fs_fsmnt) - size);
 	memcpy(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN);
>Release-Note:
>Audit-Trail:
>Unformatted: