Subject: new kpi proposal, sysdisk(9)
To: None <tech-kern@NetBSD.org>
From: Elad Efrat <elad@NetBSD.org>
List: tech-kern
Date: 12/27/2006 23:08:26
This is a multi-part message in MIME format.
--------------080009040705060704090605
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit

hi,

attached is initial implementation of a new kpi, sysdisk(9), and demo
usage and output.

what it does is very simple: keeps tracks of disks used by the system.
kind of like a generic 'mountlist', only that it'd include devices
covered by raidframe and swap devices, for example.

original motivation is raw disk access policy enforcement in
securelevel. currently, only disks that are mounted are denied raw
disk access when the system is 'secure'. devices used for swap, for
example, are not considered mounted even though they are just as
important.

the interface is simple:

  sysdisk_add(devvp) - mark device for 'devvp' as in use,
  sysdisk_remove(devvp) - mark device for 'devvp' as unused,
  sysdisk_lookup(devvp) - check if device for 'devvp' is used

reference counting is done so that multiple subsystems can just do
sysdisk_add() and sysdisk_remove() and sysdisk(9) will only consider
the device unused once the reference count is zero.

code (for example, secmodel implementing securelevel, or anything
else that cares about raw disk access to disks used by the system) can
just use sysdisk_lookup() to know if the disk is in use, and make their
decision appropriately.

notes:
  - no locking is done yet.
  - the interface is subject to change. I had thought about making it
    take 'dev_t' and 'enum vtype', but for now I think 'struct vnode *'
    is enough.
  - the implementation 'normalizes' devices to char device. if the
    passed 'devvp' is VBLK, it'll devsw_blk2chr() it.
  - we can add another argument to sysdisk_add(), indicating what
    subsystem makes use of this device, among other expansion options.
    this is just initial implementation. :)
  - man-page, etc. will be added if committed.
  - sysdisk_add() and sysdisk_remove() hooks should really be
    implemented on top of something like kauth(9)'s fileop scope...

attached files:
  - kern_sysdisk.c and sysdisk.h, code for sysdisk(9)
  - secmodel.diff, sample secmodel integration: bsd44::securelevel
  - sysdisk-sample.diff, sysdisk(9) integration in ffs, cd9660, and uvm

demo output:

phyre:test {4} ./open /dev/cd0a
open(/dev/cd0a) ro: success
open(/dev/cd0a) rw: success
phyre:test {5} mount -t cd9660 /dev/cd0a /mnt
phyre:test {6} ./open /dev/cd0a
open(/dev/cd0a) ro: success
secmodel: active disk.   <-- uprintf() in secmodel_bsd44_securelevel.c
open(/dev/cd0a) rw: success
phyre:test {7} umount /mnt
phyre:test {8} ./open /dev/cd0a
open(/dev/cd0a) ro: success
open(/dev/cd0a) rw: success
phyre:test {9}

comments?

-e.

--------------080009040705060704090605
Content-Type: text/plain;
 name="kern_sysdisk.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="kern_sysdisk.c"

/* $NetBSD$ */

/*-
 * Copyright (c) 2006 Elad Efrat <elad@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Elad Efrat.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD$");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/vnode.h>
#include <sys/queue.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/once.h>
#include <sys/disklabel.h>
#include <miscfs/specfs/specdev.h>

#include <sys/sysdisk.h>

struct sysdisk {
	u_int sysdisk_refcnt;
	dev_t sysdisk_dev;
	TAILQ_ENTRY(sysdisk) sysdisk_list;
};

TAILQ_HEAD(, sysdisk) sysdisklist;

/*
 * Initialize the system disk subsystem.
 */
static int
sysdisk_init(void)
{
	TAILQ_INIT(&sysdisklist);

	return (0);
}

static sysdisk_t
sysdisk_alloc(void)
{
	sysdisk_t sd;

	sd = malloc(sizeof(struct sysdisk), M_TEMP, M_WAITOK);

	return (sd);
}

static dev_t
sysdisk_normalize(struct vnode *vp)
{
	dev_t dev;

	switch (vp->v_type) {
	case VCHR:
		dev = vp->v_rdev;
		break;

	case VBLK:
		dev = devsw_blk2chr(vp->v_rdev);
		break;

	default:
		dev = NODEV;
		break;
	}

	return (dev);
}

int
sysdisk_add(struct vnode *vp)
{
	sysdisk_t sd;
	static ONCE_DECL(control);
	int error;

	error = RUN_ONCE(&control, sysdisk_init);
	if (error)
		return (EINVAL);

	sd = sysdisk_lookup(vp);
	if (sd == NULL) {
		dev_t dev;

		dev = sysdisk_normalize(vp);
		if (dev == NODEV) {
			return (ENODEV);
		}

		sd = sysdisk_alloc();

		sd->sysdisk_dev = dev;
		sd->sysdisk_refcnt = 1;
		TAILQ_INSERT_TAIL(&sysdisklist, sd, sysdisk_list);
	} else
		sd->sysdisk_refcnt++;

	return (0);
}

sysdisk_t
sysdisk_lookup(struct vnode *vp)
{
	struct sysdisk *sd;
	dev_t dev;

	dev = sysdisk_normalize(vp);
	if (dev == NODEV)
		return (NULL);

	TAILQ_FOREACH(sd, &sysdisklist, sysdisk_list) {
		if (major(dev) == major(sd->sysdisk_dev) &&
		    DISKUNIT(dev) == DISKUNIT(dev))
			return ((sysdisk_t)sd);
	}

	return (NULL);
}

void
sysdisk_remove(struct vnode *vp)
{
	sysdisk_t sd;

	sd = sysdisk_lookup(vp);
	if (sd != NULL && --(sd->sysdisk_refcnt) == 0) {
		TAILQ_REMOVE(&sysdisklist, sd, sysdisk_list);
		free(sd, M_TEMP);
	}
}

--------------080009040705060704090605
Content-Type: text/plain;
 name="sysdisk.h"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="sysdisk.h"

#ifndef	_SYS_SYSDISK_H_
#define	_SYS_SYSDISK_H_

#include <sys/vnode.h>

typedef struct sysdisk *sysdisk_t;

int sysdisk_add(struct vnode *);
sysdisk_t sysdisk_lookup(struct vnode *);
void sysdisk_remove(struct vnode *);

#endif /* _SYS_SYSDISK_H_ */

--------------080009040705060704090605
Content-Type: text/plain;
 name="secmodel.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="secmodel.diff"

Index: secmodel_bsd44_securelevel.c
===================================================================
RCS file: /usr/cvs/src/sys/secmodel/bsd44/secmodel_bsd44_securelevel.c,v
retrieving revision 1.20
diff -u -p -r1.20 secmodel_bsd44_securelevel.c
--- secmodel_bsd44_securelevel.c	26 Dec 2006 10:43:44 -0000	1.20
+++ secmodel_bsd44_securelevel.c	26 Dec 2006 19:43:43 -0000
@@ -52,6 +52,7 @@ __KERNEL_RCSID(0, "$NetBSD: secmodel_bsd
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
+#include <sys/sysdisk.h>
 
 #include <miscfs/specfs/specdev.h>
 
@@ -464,13 +465,11 @@ secmodel_bsd44_securelevel_device_cb(kau
 				break;
 			}
 
-			/*
-			 * XXX: This is bogus. We should be failing the request
-			 * XXX: not only if this specific slice is mounted, but
-			 * XXX: if it's on a disk with any other mounted slice.
-			 */
-			if (vfs_mountedon(bvp) && (securelevel > 0))
+			if ((sysdisk_lookup(vp) != NULL) && (securelevel > 0)) {
+				uprintf("secmodel: active disk.\n");
+				result = KAUTH_RESULT_ALLOW; /* XXX for demo. */
 				break;
+			}
 
 			if (securelevel < 2)
 				result = KAUTH_RESULT_ALLOW;

--------------080009040705060704090605
Content-Type: text/plain;
 name="sysdisk-sample.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="sysdisk-sample.diff"

Index: fs/cd9660/cd9660_vfsops.c
===================================================================
RCS file: /usr/cvs/src/sys/fs/cd9660/cd9660_vfsops.c,v
retrieving revision 1.38
diff -u -p -r1.38 cd9660_vfsops.c
--- fs/cd9660/cd9660_vfsops.c	16 Nov 2006 01:33:35 -0000	1.38
+++ fs/cd9660/cd9660_vfsops.c	26 Dec 2006 19:24:37 -0000
@@ -65,6 +65,7 @@ __KERNEL_RCSID(0, "$NetBSD: cd9660_vfsop
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/kauth.h>
+#include <sys/sysdisk.h>
 
 #include <fs/cd9660/iso.h>
 #include <fs/cd9660/cd9660_extern.h>
@@ -245,6 +246,7 @@ cd9660_mount(mp, path, data, ndp, l)
 			VOP_UNLOCK(devvp, 0);
 			goto fail;
 		}
+		sysdisk_add(devvp);
 	} else {
 		vrele(devvp);
 		if (devvp != imp->im_devvp)
@@ -553,6 +555,7 @@ cd9660_unmount(mp, mntflags, l)
 		isomp->im_devvp->v_specmountpoint = NULL;
 
 	vn_lock(isomp->im_devvp, LK_EXCLUSIVE | LK_RETRY);
+	sysdisk_remove(isomp->im_devvp);
 	error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, l);
 	vput(isomp->im_devvp);
 	free(isomp, M_ISOFSMNT);
Index: ufs/ffs/ffs_vfsops.c
===================================================================
RCS file: /usr/cvs/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.190
diff -u -p -r1.190 ffs_vfsops.c
--- ufs/ffs/ffs_vfsops.c	16 Nov 2006 01:33:53 -0000	1.190
+++ ufs/ffs/ffs_vfsops.c	26 Dec 2006 19:26:15 -0000
@@ -74,6 +74,8 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
+#include <sys/sysdisk.h>
+
 /* how many times ffs_init() was called */
 int ffs_initcount = 0;
 
@@ -1012,6 +1014,7 @@ ffs_mountfs(struct vnode *devvp, struct 
 #endif
 	}
 #endif /* UFS_EXTATTR */
+	sysdisk_add(devvp);
 	return (0);
 out:
 	if (fs)
@@ -1201,6 +1204,7 @@ ffs_unmount(struct mount *mp, int mntfla
 	if (ump->um_devvp->v_type != VBAD)
 		ump->um_devvp->v_specmountpoint = NULL;
 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
+	sysdisk_remove(ump->um_devvp);
 	(void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, l);
 	vput(ump->um_devvp);
Index: uvm/uvm_swap.c
===================================================================
RCS file: /usr/cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.115
diff -u -p -r1.115 uvm_swap.c
--- uvm/uvm_swap.c	7 Dec 2006 14:06:51 -0000	1.115
+++ uvm/uvm_swap.c	26 Dec 2006 19:26:30 -0000
@@ -60,6 +60,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_swap.c,v
 #include <sys/syscallargs.h>
 #include <sys/swap.h>
 #include <sys/kauth.h>
+#include <sys/sysdisk.h>
 
 #include <uvm/uvm.h>
 
@@ -338,6 +339,7 @@ swaplist_insert(struct swapdev *sdp, str
 	sdp->swd_priority = priority;
 	CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
 	uvmexp.nswapdev++;
+	sysdisk_add(sdp->swd_vp);
 }
 
 /*
@@ -364,6 +366,7 @@ swaplist_find(struct vnode *vp, boolean_
 					CIRCLEQ_REMOVE(&spp->spi_swapdev,
 					    sdp, swd_next);
 					uvmexp.nswapdev--;
+					sysdisk_remove(sdp->swd_vp);
 				}
 				return(sdp);
 			}

--------------080009040705060704090605--