Subject: Re: turn bpf device into a cloner
To: None <tech-net@netbsd.org>
From: Christos Zoulas <christos@zoulas.com>
List: tech-net
Date: 11/29/2004 08:09:39
In article <I7vE9A.F9I@tac.nyc.ny.us>,
Christos Zoulas <christos@tac.gw.com> wrote:
>In article <200411271904.04736.abuse@spamalicious.com>,
>Charles M. Hannum <abuse@spamalicious.com> wrote:
>>On Saturday 27 November 2004 18:52, Christos Zoulas wrote:
>>> The following patch turns /dev/bpfX into cloners. Only one is really
>>> needed, /dev/bpf, but for now we can leave the stray /dev/bpfX's until we
>>> fix the source code. Comments?
>>
>>Thanks (well, not really) to ptyfs, there are already four methods of creating 
>>cloning devices in the kernel.  This adds a fifth.
>>
>>This has already way out of hand.  Before you add more one-off hacks, it is 
>>time to sit down and actually DESIGN how cloning devices are going to work 
>>globally.
>

So I went through and cleaned up a bit the cloner interface:
1. made fileops const
2. added 2 new negative errno's to `officially' support the cloning hack:
	EDUPFD (used to overload ENODEV)
	EMOVEFD (used to overload ENXIO)
3. created an fdclone() function to encapsulate the operations needed for
   EMOVEFD, and made all cloners use it.
4. Centralized the local noop/badop fileops functions to:
   fnullop_fcntl, fnullop_poll, fnullop_kqfilter, fbadop_stat
5. included the bpf cloning change with this patch.
6. there were 2 cases where ENXIO was returned where ENODEV should have
   been returned. I fixed changed them.

If I don't hear any complaints, I'll commit that soon.

christos


Index: compat/svr4/svr4_net.c
===================================================================
RCS file: /cvsroot/src/sys/compat/svr4/svr4_net.c,v
retrieving revision 1.36
diff -u -u -r1.36 svr4_net.c
--- compat/svr4/svr4_net.c	22 May 2004 22:52:13 -0000	1.36
+++ compat/svr4/svr4_net.c	29 Nov 2004 07:59:45 -0000
@@ -109,7 +109,7 @@
 int svr4_soo_close __P((struct file *, struct proc *));
 int svr4_ptm_alloc __P((struct proc *));
 
-static struct fileops svr4_netops = {
+static const struct fileops svr4_netops = {
 	soo_read, soo_write, soo_ioctl, soo_fcntl, soo_poll,
 	soo_stat, svr4_soo_close, soo_kqfilter
 };
@@ -211,19 +211,12 @@
 		return error;
 	}
 
-	fp->f_flag = FREAD|FWRITE;
+	error = fdclone(p, fp, fd, &svr4_netops, so);
 	fp->f_type = DTYPE_SOCKET;
-	fp->f_ops = &svr4_netops;
-
-	fp->f_data = (caddr_t)so;
-	(void) svr4_stream_get(fp);
+	(void)svr4_stream_get(fp);
 
 	DPRINTF(("ok);\n"));
-
-	curlwp->l_dupfd = fd;	/* XXX */
-	FILE_SET_MATURE(fp);
-	FILE_UNUSE(fp, p);
-	return ENXIO;
+	return error;
 }
 
 
@@ -283,8 +276,8 @@
 		case ENXIO:
 			return error;
 		case 0:
-			curlwp->l_dupfd = fd;	/* XXX */
-			return ENXIO;
+			curlwp->l_dupfd = fd;
+			return EMOVEFD;
 		default:
 			if (ttynumbers[++n] == '\0') {
 				if (ttyletters[++l] == '\0')
Index: compat/svr4_32/svr4_32_net.c
===================================================================
RCS file: /cvsroot/src/sys/compat/svr4_32/svr4_32_net.c,v
retrieving revision 1.10
diff -u -u -r1.10 svr4_32_net.c
--- compat/svr4_32/svr4_32_net.c	13 Sep 2003 08:22:51 -0000	1.10
+++ compat/svr4_32/svr4_32_net.c	29 Nov 2004 07:59:45 -0000
@@ -102,7 +102,7 @@
 int svr4_soo_close __P((struct file *, struct proc *));
 int svr4_ptm_alloc __P((struct proc *));
 
-static struct fileops svr4_32_netops = {
+static const struct fileops svr4_32_netops = {
 	soo_read, soo_write, soo_ioctl, soo_fcntl, soo_poll,
 	soo_stat, svr4_soo_close
 };
Index: conf/files
===================================================================
RCS file: /cvsroot/src/sys/conf/files,v
retrieving revision 1.699
diff -u -u -r1.699 files
--- conf/files	25 Nov 2004 06:52:14 -0000	1.699
+++ conf/files	29 Nov 2004 07:59:45 -0000
@@ -1279,7 +1279,7 @@
 file	miscfs/specfs/spec_vnops.c
 file	miscfs/syncfs/sync_subr.c
 file	miscfs/syncfs/sync_vnops.c
-file	net/bpf.c			bpfilter		needs-count
+file	net/bpf.c			bpfilter		needs-flag
 file	net/bpf_filter.c		bpf_filter
 file	net/bsd-comp.c			ppp & ppp_bsdcomp
 file	net/if.c
Index: dev/dmover/dmover_io.c
===================================================================
RCS file: /cvsroot/src/sys/dev/dmover/dmover_io.c,v
retrieving revision 1.14
diff -u -u -r1.14 dmover_io.c
--- dev/dmover/dmover_io.c	28 Nov 2004 07:44:05 -0000	1.14
+++ dev/dmover/dmover_io.c	29 Nov 2004 07:59:45 -0000
@@ -580,21 +580,6 @@
 }
 
 /*
- * dmio_fcntl:
- *
- *	Fcntl file op.
- */
-static int
-dmio_fcntl(struct file *fp, u_int cmd, void *data, struct proc *p)
-{
-
-	if (cmd == F_SETFL)
-		return (0);
-
-	return (EOPNOTSUPP);
-}
-
-/*
  * dmio_poll:
  *
  *	Poll file op.
@@ -644,18 +629,6 @@
 }
 
 /*
- * dmio_stat:
- *
- *	Stat file op.
- */
-static int
-dmio_stat(struct file *fp, struct stat *sb, struct proc *p)
-{
-
-	return (EOPNOTSUPP);
-}
-
-/*
  * dmio_close:
  *
  *	Close file op.
@@ -704,14 +677,15 @@
 	return (0);
 }
 
-static struct fileops dmio_fileops = {
+static const struct fileops dmio_fileops = {
 	dmio_read,
 	dmio_write,
 	dmio_ioctl,
-	dmio_fcntl,
+	fnullop_fcntl,
 	dmio_poll,
-	dmio_stat,
+	fbadop_stat,
 	dmio_close,
+	fnullop_kqueue
 };
 
 /*
@@ -738,14 +712,5 @@
 	TAILQ_INIT(&ds->ds_pending);
 	TAILQ_INIT(&ds->ds_complete);
 
-	fp->f_flag = FREAD | FWRITE;
-	fp->f_type = DTYPE_MISC;
-	fp->f_ops = &dmio_fileops;
-	fp->f_data = (caddr_t) ds;
-
-	curlwp->l_dupfd = fd;	/* XXX */
-	FILE_SET_MATURE(fp);
-	FILE_UNUSE(fp, p);
-
-	return (ENXIO);
+	return fdclone(p, fp, fd, &dmio_fileops, ds);
 }
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_descrip.c,v
retrieving revision 1.126
diff -u -u -r1.126 kern_descrip.c
--- kern/kern_descrip.c	31 May 2004 15:30:55 -0000	1.126
+++ kern/kern_descrip.c	29 Nov 2004 07:59:45 -0000
@@ -1515,7 +1515,7 @@
 	 * will simply report the error.
 	 */
 	curlwp->l_dupfd = minor(dev);	/* XXX */
-	return (ENODEV);
+	return EDUPFD;
 }
 
 /*
@@ -1551,17 +1551,17 @@
 	/*
 	 * There are two cases of interest here.
 	 *
-	 * For ENODEV simply dup (dfd) to file descriptor
+	 * For EDUPFD simply dup (dfd) to file descriptor
 	 * (indx) and return.
 	 *
-	 * For ENXIO steal away the file structure from (dfd) and
+	 * For EMOVEFD steal away the file structure from (dfd) and
 	 * store it in (indx).  (dfd) is effectively closed by
 	 * this operation.
 	 *
 	 * Any other error code is just returned.
 	 */
 	switch (error) {
-	case ENODEV:
+	case EDUPFD:
 		/*
 		 * Check that the mode the file is being opened for is a
 		 * subset of the mode of the existing descriptor.
@@ -1580,7 +1580,7 @@
 		FILE_UNUSE_HAVELOCK(wfp, p);
 		return (0);
 
-	case ENXIO:
+	case EMOVEFD:
 		/*
 		 * Steal away the file pointer from dfd, and stuff it into indx.
 		 */
@@ -1838,3 +1838,52 @@
 	else if (pgid < 0)
 		kgsignal(-pgid, &ksi, fdescdata);
 }
+
+int
+fdclone(struct proc *p, struct file *fp, int fd, const struct fileops *fops,
+    void *data)
+{
+	fp->f_flag = FREAD | FWRITE;
+	fp->f_type = DTYPE_MISC;
+	fp->f_ops = fops;
+	fp->f_data = data;
+
+	curlwp->l_dupfd = fd;
+
+	FILE_SET_MATURE(fp);
+	FILE_UNUSE(fp, p);
+	return EMOVEFD;
+}
+
+/* ARGSUSED */
+int
+fnullop_fcntl(struct file *fp, u_int cmd, void *data, struct proc *p)
+{
+	if (cmd == F_SETFL)
+		return 0;
+
+	return EOPNOTSUPP;
+}
+
+/* ARGSUSED */
+int
+fnullop_poll(struct file *fp, int which, struct proc *p)
+{
+	return 0;
+}
+
+
+/* ARGSUSED */
+int
+fnullop_kqfilter(struct file *fp, struct knote *kn)
+{
+
+	return 0;
+}
+
+/* ARGSUSED */
+int
+fbadop_stat(struct file *fp, struct stat *sb, struct proc *p)
+{
+	return EOPNOTSUPP;
+}
Index: kern/kern_event.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_event.c,v
retrieving revision 1.20
diff -u -u -r1.20 kern_event.c
--- kern/kern_event.c	25 Apr 2004 16:42:41 -0000	1.20
+++ kern/kern_event.c	29 Nov 2004 07:59:46 -0000
@@ -72,7 +72,7 @@
 static int	kqueue_stat(struct file *fp, struct stat *sp, struct proc *p);
 static int	kqueue_close(struct file *fp, struct proc *p);
 
-static struct fileops kqueueops = {
+static const struct fileops kqueueops = {
 	kqueue_read, kqueue_write, kqueue_ioctl, kqueue_fcntl, kqueue_poll,
 	kqueue_stat, kqueue_close, kqueue_kqfilter
 };
Index: kern/kern_systrace.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_systrace.c,v
retrieving revision 1.41
diff -u -u -r1.41 kern_systrace.c
--- kern/kern_systrace.c	28 Nov 2004 07:44:05 -0000	1.41
+++ kern/kern_systrace.c	29 Nov 2004 07:59:46 -0000
@@ -84,16 +84,13 @@
 		int);
 int	systracef_write(struct file *, off_t *, struct uio *, struct ucred *,
 		int);
-int	systracef_fcntl(struct file *, u_int, void *, struct proc *);
 int	systracef_poll(struct file *, int, struct proc *);
 #else
 int	systracef_read(struct file *, off_t *, struct uio *, struct ucred *);
 int	systracef_write(struct file *, off_t *, struct uio *, struct ucred *);
 int	systracef_select(struct file *, int, struct proc *);
 #endif
-int	systracef_kqfilter(struct file *, struct knote *);
 int	systracef_ioctl(struct file *, u_long, void *, struct proc *);
-int	systracef_stat(struct file *, struct stat *, struct proc *);
 int	systracef_close(struct file *, struct proc *);
 
 struct str_policy {
@@ -182,22 +179,15 @@
 int	systrace_msg_ugid(struct fsystrace *, struct str_process *);
 int	systrace_make_msg(struct str_process *, int, struct str_message *);
 
-static struct fileops systracefops = {
+static const struct fileops systracefops = {
 	systracef_read,
 	systracef_write,
 	systracef_ioctl,
-#ifdef __NetBSD__
-	systracef_fcntl,
+	fnullop_fcntl,
 	systracef_poll,
-#else
-	systracef_select,
-	systracef_kqfilter,
-#endif
-	systracef_stat,
-	systracef_close
-#ifdef __NetBSD__
-	, systracef_kqfilter
-#endif
+	fbadop_stat,
+	systracef_close,
+	fnullop_kqfilter
 };
 
 #ifdef __NetBSD__
@@ -433,19 +423,6 @@
 }
 
 #ifdef __NetBSD__
-/* ARGSUSED */
-int
-systracef_fcntl(struct file *fp, u_int cmd, void *data, struct proc *p)
-{
-
-	if (cmd == F_SETFL)
-		return 0;
-
-	return (EOPNOTSUPP);
-}
-#endif
-
-#ifdef __NetBSD__
 int
 systracef_poll(struct file *fp, int events, struct proc *p)
 {
@@ -490,21 +467,6 @@
 
 /* ARGSUSED */
 int
-systracef_kqfilter(struct file *fp, struct knote *kn)
-{
-	return (1);
-}
-
-
-/* ARGSUSED */
-int
-systracef_stat(struct file *fp, struct stat *sb, struct proc *p)
-{
-	return (EOPNOTSUPP);
-}
-
-/* ARGSUSED */
-int
 systracef_close(struct file *fp, struct proc *p)
 {
 	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
@@ -610,16 +572,7 @@
 	fst->p_ruid = p->p_cred->p_ruid;
 	fst->p_rgid = p->p_cred->p_rgid;
 
-	fp->f_flag = FREAD | FWRITE;
-	fp->f_type = DTYPE_MISC;
-	fp->f_ops = &systracefops;
-	fp->f_data = (caddr_t) fst;
-
-	curlwp->l_dupfd = fd;	/* XXX */
-	FILE_SET_MATURE(fp);
-	FILE_UNUSE(fp, p);
-
-	return (ENXIO);
+	return fdclone(p, fp, fd, &systracefops, fst);
 }
 
 void
Index: kern/sys_pipe.c
===================================================================
RCS file: /cvsroot/src/sys/kern/sys_pipe.c,v
retrieving revision 1.61
diff -u -u -r1.61 sys_pipe.c
--- kern/sys_pipe.c	21 Nov 2004 04:30:33 -0000	1.61
+++ kern/sys_pipe.c	29 Nov 2004 07:59:46 -0000
@@ -135,15 +135,13 @@
 		struct ucred *cred, int flags);
 static int pipe_close(struct file *fp, struct proc *p);
 static int pipe_poll(struct file *fp, int events, struct proc *p);
-static int pipe_fcntl(struct file *fp, u_int com, void *data,
-		struct proc *p);
 static int pipe_kqfilter(struct file *fp, struct knote *kn);
 static int pipe_stat(struct file *fp, struct stat *sb, struct proc *p);
 static int pipe_ioctl(struct file *fp, u_long cmd, void *data,
 		struct proc *p);
 
-static struct fileops pipeops = {
-	pipe_read, pipe_write, pipe_ioctl, pipe_fcntl, pipe_poll,
+static const struct fileops pipeops = {
+	pipe_read, pipe_write, pipe_ioctl, fnullop_fcntl, pipe_poll,
 	pipe_stat, pipe_close, pipe_kqfilter
 };
 
@@ -1484,19 +1482,6 @@
 	return (0);
 }
 
-static int
-pipe_fcntl(fp, cmd, data, p)
-	struct file *fp;
-	u_int cmd;
-	void *data;
-	struct proc *p;
-{
-	if (cmd == F_SETFL)
-		return (0);
-	else
-		return (EOPNOTSUPP);
-}
-
 /*
  * Handle pipe sysctls.
  */
Index: kern/tty_ptm.c
===================================================================
RCS file: /cvsroot/src/sys/kern/tty_ptm.c,v
retrieving revision 1.3
diff -u -u -r1.3 tty_ptm.c
--- kern/tty_ptm.c	24 Nov 2004 22:19:27 -0000	1.3
+++ kern/tty_ptm.c	29 Nov 2004 07:59:46 -0000
@@ -340,7 +340,7 @@
 		if ((error = pty_alloc_master(p, &fd, &dev)) != 0)
 			return error;
 		curlwp->l_dupfd = fd;
-		return ENXIO;
+		return EMOVEFD;
 	case 1:		/* /dev/ptm */
 		return 0;
 	default:
Index: kern/uipc_syscalls.c
===================================================================
RCS file: /cvsroot/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.88
diff -u -u -r1.88 uipc_syscalls.c
--- kern/uipc_syscalls.c	22 May 2004 22:52:13 -0000	1.88
+++ kern/uipc_syscalls.c	29 Nov 2004 07:59:46 -0000
@@ -64,7 +64,7 @@
 /*
  * System call interface to the socket abstraction.
  */
-extern	struct fileops socketops;
+extern const struct fileops socketops;
 
 int
 sys_socket(struct lwp *l, void *v, register_t *retval)
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.212
diff -u -u -r1.212 vfs_syscalls.c
--- kern/vfs_syscalls.c	1 Oct 2004 16:30:56 -0000	1.212
+++ kern/vfs_syscalls.c	29 Nov 2004 07:59:46 -0000
@@ -1142,7 +1142,7 @@
 		FILE_UNUSE(fp, p);
 		fdp->fd_ofiles[indx] = NULL;
 		ffree(fp);
-		if ((error == ENODEV || error == ENXIO) &&
+		if ((error == EDUPFD || error == EMOVEFD) &&
 		    l->l_dupfd >= 0 &&			/* XXX from fdopen */
 		    (error =
 			dupfdopen(p, indx, l->l_dupfd, flags, error)) == 0) {
Index: kern/vfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_vnops.c,v
retrieving revision 1.82
diff -u -u -r1.82 vfs_vnops.c
--- kern/vfs_vnops.c	6 Nov 2004 07:34:53 -0000	1.82
+++ kern/vfs_vnops.c	29 Nov 2004 07:59:47 -0000
@@ -85,7 +85,7 @@
 static int vn_statfile(struct file *fp, struct stat *sb, struct proc *p);
 static int vn_ioctl(struct file *fp, u_long com, void *data, struct proc *p);
 
-struct 	fileops vnops = {
+const struct fileops vnops = {
 	vn_read, vn_write, vn_ioctl, vn_fcntl, vn_poll,
 	vn_statfile, vn_closefile, vn_kqfilter
 };
Index: miscfs/fdesc/fdesc_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/fdesc/fdesc_vnops.c,v
retrieving revision 1.81
diff -u -u -r1.81 fdesc_vnops.c
--- miscfs/fdesc/fdesc_vnops.c	27 Apr 2004 17:37:31 -0000	1.81
+++ miscfs/fdesc/fdesc_vnops.c	29 Nov 2004 07:59:47 -0000
@@ -450,7 +450,7 @@
 		 * VOP_OPEN will simply report the error.
 		 */
 		curlwp->l_dupfd = VTOFDESC(vp)->fd_fd;	/* XXX */
-		return (ENODEV);
+		return EDUPFD;
 
 	case Fctty:
 		return ((*ctty_cdevsw.d_open)(devctty, ap->a_mode, 0, ap->a_p));
Index: miscfs/portal/portal_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/portal/portal_vnops.c,v
retrieving revision 1.57
diff -u -u -r1.57 portal_vnops.c
--- miscfs/portal/portal_vnops.c	12 Nov 2004 04:15:29 -0000	1.57
+++ miscfs/portal/portal_vnops.c	29 Nov 2004 07:59:47 -0000
@@ -337,7 +337,7 @@
 	 * by testing whether the dupfd has been set.
 	 */
 	if (curlwp->l_dupfd >= 0)	/* XXX */
-		return (ENODEV);
+		return EDUPFD;
 
 	pt = VTOPORTAL(vp);
 	fmp = VFSTOPORTAL(vp->v_mount);
@@ -510,11 +510,11 @@
 
 	/*
 	 * Save the dup fd in the proc structure then return the
-	 * special error code (ENXIO) which causes magic things to
+	 * special error code (EMOVEFD) which causes magic things to
 	 * happen in vn_open.  The whole concept is, well, hmmm.
 	 */
-	curlwp->l_dupfd = fd;	/* XXX */
-	error = ENXIO;
+	curlwp->l_dupfd = fd;
+	error = EMOVEFD;
 
 bad:;
 	/*
Index: net/bpf.c
===================================================================
RCS file: /cvsroot/src/sys/net/bpf.c,v
retrieving revision 1.104
diff -u -u -r1.104 bpf.c
--- net/bpf.c	19 Aug 2004 20:58:23 -0000	1.104
+++ net/bpf.c	29 Nov 2004 07:59:47 -0000
@@ -41,8 +41,6 @@
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.104 2004/08/19 20:58:23 christos Exp $");
 
-#include "bpfilter.h"
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
@@ -53,8 +51,10 @@
 #include <sys/ioctl.h>
 #include <sys/conf.h>
 #include <sys/vnode.h>
+#include <sys/queue.h>
 
 #include <sys/file.h>
+#include <sys/filedesc.h>
 #include <sys/tty.h>
 #include <sys/uio.h>
 
@@ -106,7 +106,7 @@
  *  bpf_dtab holds the descriptors, indexed by minor device #
  */
 struct bpf_if	*bpf_iflist;
-struct bpf_d	bpf_dtab[NBPFILTER];
+LIST_HEAD(, bpf_d) bpf_list;
 
 static int	bpf_allocbufs(struct bpf_d *);
 static void	bpf_deliver(struct bpf_if *,
@@ -129,17 +129,31 @@
 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 static int	bpf_setdlt(struct bpf_d *, u_int);
 
+static int	bpf_read(struct file *, off_t *, struct uio *, struct ucred *,
+    int);
+static int	bpf_write(struct file *, off_t *, struct uio *, struct ucred *,
+    int);
+static int	bpf_ioctl(struct file *, u_long, void *, struct proc *);
+static int	bpf_poll(struct file *, int, struct proc *);
+static int	bpf_close(struct file *, struct proc *);
+static int	bpf_kqfilter(struct file *, struct knote *);
+
+static const struct fileops bpf_fileops = {
+	bpf_read,
+	bpf_write,
+	bpf_ioctl,
+	fnullop_fcntl,
+	bpf_poll,
+	fbadop_stat,
+	bpf_close,
+	bpf_kqfilter,
+};
+
 dev_type_open(bpfopen);
-dev_type_close(bpfclose);
-dev_type_read(bpfread);
-dev_type_write(bpfwrite);
-dev_type_ioctl(bpfioctl);
-dev_type_poll(bpfpoll);
-dev_type_kqfilter(bpfkqfilter);
 
 const struct cdevsw bpf_cdevsw = {
-	bpfopen, bpfclose, bpfread, bpfwrite, bpfioctl,
-	nostop, notty, bpfpoll, nommap, bpfkqfilter,
+	bpfopen, noclose, noread, nowrite, noioctl,
+	nostop, notty, nopoll, nommap, nokqfilter,
 };
 
 static int
@@ -339,9 +353,6 @@
  * This is probably cheaper than marking with a constant since
  * the address should be in a register anyway.
  */
-#define D_ISFREE(d) ((d) == (d)->bd_next)
-#define D_MARKFREE(d) ((d)->bd_next = (d))
-#define D_MARKUSED(d) ((d)->bd_next = 0)
 
 /*
  * bpfilterattach() is called at boot time.
@@ -351,18 +362,11 @@
 bpfilterattach(n)
 	int n;
 {
-	int i;
-	/*
-	 * Mark all the descriptors free.
-	 */
-	for (i = 0; i < NBPFILTER; ++i)
-		D_MARKFREE(&bpf_dtab[i]);
-
+	LIST_INIT(&bpf_list);
 }
 
 /*
- * Open ethernet device.  Returns ENXIO for illegal minor device number,
- * EBUSY if file is open by another process.
+ * Open ethernet device. Clones.
  */
 /* ARGSUSED */
 int
@@ -373,24 +377,22 @@
 	struct proc *p;
 {
 	struct bpf_d *d;
+	struct file *fp;
+	int error, fd;
 
-	if (minor(dev) >= NBPFILTER)
-		return (ENXIO);
-	/*
-	 * Each minor can be opened by only one process.  If the requested
-	 * minor is in use, return EBUSY.
-	 */
-	d = &bpf_dtab[minor(dev)];
-	if (!D_ISFREE(d))
-		return (EBUSY);
+	/* falloc() will use the descriptor for us. */
+	if ((error = falloc(p, &fp, &fd)) != 0)
+		return error;
 
-	/* Mark "free" and do most initialization. */
-	memset((char *)d, 0, sizeof(*d));
+	d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK);
+	(void)memset(d, 0, sizeof(*d));
 	d->bd_bufsize = bpf_bufsize;
 	d->bd_seesent = 1;
 	callout_init(&d->bd_callout);
 
-	return (0);
+	LIST_INSERT_HEAD(&bpf_list, d, bd_list);
+
+	return fdclone(p, fp, fd, &bpf_fileops, d);
 }
 
 /*
@@ -398,14 +400,10 @@
  * deallocating its buffers, and marking it free.
  */
 /* ARGSUSED */
-int
-bpfclose(dev, flag, mode, p)
-	dev_t dev;
-	int flag;
-	int mode;
-	struct proc *p;
+static int
+bpf_close(struct file *fp, struct proc *p)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	int s;
 
 	s = splnet();
@@ -416,6 +414,9 @@
 		bpf_detachd(d);
 	splx(s);
 	bpf_freed(d);
+	LIST_REMOVE(d, bd_list);
+	free(d, M_DEVBUF);
+	fp->f_data = NULL;
 
 	return (0);
 }
@@ -434,13 +435,11 @@
 /*
  *  bpfread - read next chunk of packets from buffers
  */
-int
-bpfread(dev, uio, ioflag)
-	dev_t dev;
-	struct uio *uio;
-	int ioflag;
+static int
+bpf_read(struct file *fp, off_t *offp, struct uio *uio,
+    struct ucred *cred, int flags)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	int timed_out;
 	int error;
 	int s;
@@ -463,7 +462,7 @@
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == 0) {
-		if (ioflag & IO_NDELAY) {
+		if (fp->f_flag & FNONBLOCK) {
 			if (d->bd_slen == 0) {
 				splx(s);
 				return (EWOULDBLOCK);
@@ -567,13 +566,11 @@
 }
 
 
-int
-bpfwrite(dev, uio, ioflag)
-	dev_t dev;
-	struct uio *uio;
-	int ioflag;
+static int
+bpf_write(struct file *fp, off_t *offp, struct uio *uio,
+    struct ucred *cred, int flags)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int error, s;
@@ -650,15 +647,14 @@
  *  BIOSHDRCMPLT	Set "header already complete" flag.
  */
 /* ARGSUSED */
-int
-bpfioctl(dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p)
+static int
+bpf_ioctl(struct file *fp, u_long cmd, void *data, struct proc *p)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	int s, error = 0;
 #ifdef BPF_KERN_FILTER
 	struct bpf_insn **p;
 #endif
-	void *addr = arg;
 
 	s = splnet();
 	if (d->bd_state == BPF_WAITING)
@@ -685,7 +681,7 @@
 				n += d->bd_hlen;
 			splx(s);
 
-			*(int *)addr = n;
+			*(int *)data = n;
 			break;
 		}
 
@@ -693,7 +689,7 @@
 	 * Get buffer len [for read()].
 	 */
 	case BIOCGBLEN:
-		*(u_int *)addr = d->bd_bufsize;
+		*(u_int *)data = d->bd_bufsize;
 		break;
 
 	/*
@@ -703,12 +699,12 @@
 		if (d->bd_bif != 0)
 			error = EINVAL;
 		else {
-			u_int size = *(u_int *)addr;
+			u_int size = *(u_int *)data;
 
 			if (size > bpf_maxbufsize)
-				*(u_int *)addr = size = bpf_maxbufsize;
+				*(u_int *)data = size = bpf_maxbufsize;
 			else if (size < BPF_MINBUFSIZE)
-				*(u_int *)addr = size = BPF_MINBUFSIZE;
+				*(u_int *)data = size = BPF_MINBUFSIZE;
 			d->bd_bufsize = size;
 		}
 		break;
@@ -717,7 +713,7 @@
 	 * Set link layer read filter.
 	 */
 	case BIOCSETF:
-		error = bpf_setf(d, addr);
+		error = bpf_setf(d, data);
 		break;
 
 #ifdef BPF_KERN_FILTER
@@ -732,7 +728,7 @@
 		}
 
 		/* Validate and store filter */
-		error = bpf_setf(d, addr);
+		error = bpf_setf(d, data);
 
 		/* Free possible old filter */
 		if (cmd == BIOCSTCPF)
@@ -786,7 +782,7 @@
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
-			*(u_int *)addr = d->bd_bif->bif_dlt;
+			*(u_int *)data = d->bd_bif->bif_dlt;
 		break;
 
 	/*
@@ -796,7 +792,7 @@
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
-			error = bpf_getdltlist(d, addr);
+			error = bpf_getdltlist(d, data);
 		break;
 
 	/*
@@ -806,7 +802,7 @@
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
-			error = bpf_setdlt(d, *(u_int *)addr);
+			error = bpf_setdlt(d, *(u_int *)data);
 		break;
 
 	/*
@@ -816,14 +812,14 @@
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
-			bpf_ifname(d->bd_bif->bif_ifp, addr);
+			bpf_ifname(d->bd_bif->bif_ifp, data);
 		break;
 
 	/*
 	 * Set interface.
 	 */
 	case BIOCSETIF:
-		error = bpf_setif(d, addr);
+		error = bpf_setif(d, data);
 		break;
 
 	/*
@@ -831,7 +827,7 @@
 	 */
 	case BIOCSRTIMEOUT:
 		{
-			struct timeval *tv = addr;
+			struct timeval *tv = data;
 
 			/* Compute number of ticks. */
 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
@@ -845,7 +841,7 @@
 	 */
 	case BIOCGRTIMEOUT:
 		{
-			struct timeval *tv = addr;
+			struct timeval *tv = data;
 
 			tv->tv_sec = d->bd_rtout / hz;
 			tv->tv_usec = (d->bd_rtout % hz) * tick;
@@ -857,7 +853,7 @@
 	 */
 	case BIOCGSTATS:
 		{
-			struct bpf_stat *bs = addr;
+			struct bpf_stat *bs = data;
 
 			bs->bs_recv = d->bd_rcount;
 			bs->bs_drop = d->bd_dcount;
@@ -867,7 +863,7 @@
 
 	case BIOCGSTATSOLD:
 		{
-			struct bpf_stat_old *bs = addr;
+			struct bpf_stat_old *bs = data;
 
 			bs->bs_recv = d->bd_rcount;
 			bs->bs_drop = d->bd_dcount;
@@ -878,12 +874,12 @@
 	 * Set immediate mode.
 	 */
 	case BIOCIMMEDIATE:
-		d->bd_immediate = *(u_int *)addr;
+		d->bd_immediate = *(u_int *)data;
 		break;
 
 	case BIOCVERSION:
 		{
-			struct bpf_version *bv = addr;
+			struct bpf_version *bv = data;
 
 			bv->bv_major = BPF_MAJOR_VERSION;
 			bv->bv_minor = BPF_MINOR_VERSION;
@@ -891,25 +887,25 @@
 		}
 
 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
-		*(u_int *)addr = d->bd_hdrcmplt;
+		*(u_int *)data = d->bd_hdrcmplt;
 		break;
 
 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
-		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
+		d->bd_hdrcmplt = *(u_int *)data ? 1 : 0;
 		break;
 
 	/*
 	 * Get "see sent packets" flag
 	 */
 	case BIOCGSEESENT:
-		*(u_int *)addr = d->bd_seesent;
+		*(u_int *)data = d->bd_seesent;
 		break;
 
 	/*
 	 * Set "see sent" packets flag
 	 */
 	case BIOCSSEESENT:
-		d->bd_seesent = *(u_int *)addr;
+		d->bd_seesent = *(u_int *)data;
 		break;
 
 	case FIONBIO:		/* Non-blocking I/O */
@@ -921,17 +917,17 @@
 		break;
 
 	case FIOASYNC:		/* Send signal on receive packets */
-		d->bd_async = *(int *)addr;
+		d->bd_async = *(int *)data;
 		break;
 
 	case TIOCSPGRP:		/* Process or group to send signals to */
 	case FIOSETOWN:
-		error = fsetown(p, &d->bd_pgid, cmd, addr);
+		error = fsetown(p, &d->bd_pgid, cmd, data);
 		break;
 
 	case TIOCGPGRP:
 	case FIOGETOWN:
-		error = fgetown(p, d->bd_pgid, cmd, addr);
+		error = fgetown(p, d->bd_pgid, cmd, data);
 		break;
 	}
 	return (error);
@@ -1074,10 +1070,10 @@
  * ability to write to the BPF device.
  * Otherwise, return false but make a note that a selwakeup() must be done.
  */
-int
-bpfpoll(dev_t dev, int events, struct proc *p)
+static int
+bpf_poll(struct file *fp, int events, struct proc *p)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	int s = splnet();
 	int revents;
 
@@ -1134,10 +1130,10 @@
 static const struct filterops bpfread_filtops =
 	{ 1, NULL, filt_bpfrdetach, filt_bpfread };
 
-int
-bpfkqfilter(dev_t dev, struct knote *kn)
+static int
+bpf_kqfilter(struct file *fp, struct knote *kn)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	struct klist *klist;
 	int s;
 
@@ -1501,8 +1497,6 @@
 	}
 	if (d->bd_filter)
 		free(d->bd_filter, M_DEVBUF);
-
-	D_MARKFREE(d);
 }
 
 /*
@@ -1560,16 +1554,11 @@
 {
 	struct bpf_if *bp, **pbp;
 	struct bpf_d *d;
-	int i, s, cmaj;
-
-	/* locate the major number */
-	cmaj = cdevsw_lookup_major(&bpf_cdevsw);
+	int s;
 
 	/* Nuke the vnodes for any open instances */
-	for (i = 0; i < NBPFILTER; ++i) {
-		d = &bpf_dtab[i];
-		if (!D_ISFREE(d) && d->bd_bif != NULL &&
-		    d->bd_bif->bif_ifp == ifp) {
+	for (d = LIST_FIRST(&bpf_list); d != NULL; d = LIST_NEXT(d, bd_list)) {
+		if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
 			/*
 			 * Detach the descriptor from an interface now.
 			 * It will be free'ed later by close routine.
@@ -1578,7 +1567,6 @@
 			d->bd_promisc = 0;	/* we can't touch device. */
 			bpf_detachd(d);
 			splx(s);
-			vdevgone(cmaj, i, i, VCHR);
 		}
 	}
 
Index: net/bpfdesc.h
===================================================================
RCS file: /cvsroot/src/sys/net/bpfdesc.h,v
retrieving revision 1.20
diff -u -u -r1.20 bpfdesc.h
--- net/bpfdesc.h	29 May 2004 14:18:33 -0000	1.20
+++ net/bpfdesc.h	29 Nov 2004 07:59:47 -0000
@@ -89,6 +89,7 @@
 	struct selinfo	bd_sel;		/* bsd select info */
 #endif
 	struct callout	bd_callout;	/* for BPF timeouts with select */
+	LIST_ENTRY(bpf_d) bd_list;	/* list of all BPF's */
 };
 
 
Index: opencrypto/cryptodev.c
===================================================================
RCS file: /cvsroot/src/sys/opencrypto/cryptodev.c,v
retrieving revision 1.11
diff -u -u -r1.11 cryptodev.c
--- opencrypto/cryptodev.c	17 Sep 2004 14:11:27 -0000	1.11
+++ opencrypto/cryptodev.c	29 Nov 2004 07:59:47 -0000
@@ -97,21 +97,17 @@
 static int	cryptof_read(struct file *, off_t *, struct uio *, struct ucred *, int);
 static int	cryptof_write(struct file *, off_t *, struct uio *, struct ucred *, int);
 static int	cryptof_ioctl(struct file *, u_long, void*, struct proc *p);
-static int	cryptof_fcntl(struct file *, u_int, void*, struct proc *p);
-static int	cryptof_poll(struct file *, int, struct proc *);
-static int	cryptof_kqfilter(struct file *, struct knote *);
-static int	cryptof_stat(struct file *, struct stat *, struct proc *);
 static int	cryptof_close(struct file *, struct proc *);
 
-static struct fileops cryptofops = {
+static const struct fileops cryptofops = {
     cryptof_read,
     cryptof_write,
     cryptof_ioctl,
-    cryptof_fcntl,
-    cryptof_poll,
-    cryptof_stat,
+    fnullop_fcntl,
+    fnullop_poll,
+    fbadop_stat,
     cryptof_close,
-    cryptof_kqfilter
+    fnullop_kqfilter
 };
 
 static struct	csession *csefind(struct fcrypt *, u_int);
@@ -327,13 +323,6 @@
 	return (error);
 }
 
-/* ARGSUSED */
-int
-cryptof_fcntl(struct file *fp, u_int cmd, void *data, struct proc *p)
-{
-  return (0);
-}
-
 static int
 cryptodev_op(struct csession *cse, struct crypt_op *cop, struct proc *p)
 {
@@ -598,29 +587,6 @@
 
 /* ARGSUSED */
 static int
-cryptof_poll(struct file *fp, int which, struct proc *p)
-{
-	return (0);
-}
-
-
-/* ARGSUSED */
-static int
-cryptof_kqfilter(struct file *fp, struct knote *kn)
-{
-
-	return (0);
-}
-
-/* ARGSUSED */
-static int
-cryptof_stat(struct file *fp, struct stat *sb, struct proc *p)
-{
-	return (EOPNOTSUPP);
-}
-
-/* ARGSUSED */
-static int
 cryptof_close(struct file *fp, struct proc *p)
 {
 	struct fcrypt *fcr = (struct fcrypt *)fp->f_data;
Index: sys/errno.h
===================================================================
RCS file: /cvsroot/src/sys/sys/errno.h,v
retrieving revision 1.34
diff -u -u -r1.34 errno.h
--- sys/errno.h	7 Aug 2003 16:34:03 -0000	1.34
+++ sys/errno.h	29 Nov 2004 07:59:47 -0000
@@ -171,6 +171,8 @@
 #define	EJUSTRETURN	-2		/* don't modify regs, just return */
 #define	ERESTART	-3		/* restart syscall */
 #define	EPASSTHROUGH	-4		/* ioctl not handled by this layer */
+#define	EDUPFD		-5		/* Dup given fd */
+#define	EMOVEFD		-6		/* Move given fd */
 #endif
 
 #endif /* !_SYS_ERRNO_H_ */
Index: sys/file.h
===================================================================
RCS file: /cvsroot/src/sys/sys/file.h,v
retrieving revision 1.50
diff -u -u -r1.50 file.h
--- sys/file.h	16 May 2004 17:48:18 -0000	1.50
+++ sys/file.h	29 Nov 2004 07:59:47 -0000
@@ -70,7 +70,7 @@
 	u_int		f_msgcount;	/* references from message queue */
 	int		f_usecount;	/* number active users */
 	struct ucred	*f_cred;	/* creds associated with descriptor */
-	struct fileops {
+	const struct fileops {
 		int	(*fo_read)	(struct file *, off_t *, struct uio *,
 					    struct ucred *, int);
 		int	(*fo_write)	(struct file *, off_t *, struct uio *,
@@ -151,7 +151,7 @@
 extern int		maxfiles;	/* kernel limit on # of open files */
 extern int		nfiles;		/* actual number of open files */
 
-extern struct fileops	vnops;		/* vnode operations for files */
+extern const struct fileops vnops;	/* vnode operations for files */
 
 int	dofileread(struct proc *, int, struct file *, void *, size_t,
 	    off_t *, int, register_t *);
@@ -167,6 +167,15 @@
 int	fgetown(struct proc *, pid_t, int, void *);
 void	fownsignal(pid_t, int, int, int, void *);
 
+int	fdclone(struct proc *, struct file *, int, const struct fileops *,
+    void *);
+
+/* Commonly used fileops */
+int	fnullop_fcntl(struct file *, u_int, void *, struct proc *);
+int	fnullop_poll(struct file *, int, struct proc *);
+int	fnullop_kqfilter(struct file *, struct knote *);
+int	fbadop_stat(struct file *, struct stat *, struct proc *);
+
 #endif /* _KERNEL */
 
 #endif /* _SYS_FILE_H_ */