Subject: turn bpf device into a cloner
To: None <tech-net@netbsd.org>
From: Christos Zoulas <christos@zoulas.com>
List: tech-net
Date: 11/27/2004 13:52:06
The following patch turns /dev/bpfX into cloners. Only one is really needed,
/dev/bpf, but for now we can leave the stray /dev/bpfX's until we fix the
source code. Comments?

christos

Index: bpf.c
===================================================================
RCS file: /cvsroot/src/sys/net/bpf.c,v
retrieving revision 1.104
diff -u -u -r1.104 bpf.c
--- bpf.c	19 Aug 2004 20:58:23 -0000	1.104
+++ bpf.c	27 Nov 2004 18:46:26 -0000
@@ -41,8 +41,6 @@
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.104 2004/08/19 20:58:23 christos Exp $");
 
-#include "bpfilter.h"
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
@@ -53,8 +51,10 @@
 #include <sys/ioctl.h>
 #include <sys/conf.h>
 #include <sys/vnode.h>
+#include <sys/queue.h>
 
 #include <sys/file.h>
+#include <sys/filedesc.h>
 #include <sys/tty.h>
 #include <sys/uio.h>
 
@@ -106,7 +106,7 @@
  *  bpf_dtab holds the descriptors, indexed by minor device #
  */
 struct bpf_if	*bpf_iflist;
-struct bpf_d	bpf_dtab[NBPFILTER];
+LIST_HEAD(, bpf_d) bpf_list;
 
 static int	bpf_allocbufs(struct bpf_d *);
 static void	bpf_deliver(struct bpf_if *,
@@ -129,17 +129,33 @@
 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 static int	bpf_setdlt(struct bpf_d *, u_int);
 
+static int	bpf_read(struct file *, off_t *, struct uio *, struct ucred *,
+    int);
+static int	bpf_write(struct file *, off_t *, struct uio *, struct ucred *,
+    int);
+static int	bpf_ioctl(struct file *, u_long, void *, struct proc *);
+static int	bpf_fcntl(struct file *, u_int, void *, struct proc *);
+static int	bpf_poll(struct file *, int, struct proc *);
+static int	bpf_stat(struct file *, struct stat *, struct proc *);
+static int	bpf_close(struct file *, struct proc *);
+static int	bpf_kqfilter(struct file *, struct knote *);
+
+static struct fileops bpf_fileops = {
+	bpf_read,
+	bpf_write,
+	bpf_ioctl,
+	bpf_fcntl,
+	bpf_poll,
+	bpf_stat,
+	bpf_close,
+	bpf_kqfilter,
+};
+
 dev_type_open(bpfopen);
-dev_type_close(bpfclose);
-dev_type_read(bpfread);
-dev_type_write(bpfwrite);
-dev_type_ioctl(bpfioctl);
-dev_type_poll(bpfpoll);
-dev_type_kqfilter(bpfkqfilter);
 
 const struct cdevsw bpf_cdevsw = {
-	bpfopen, bpfclose, bpfread, bpfwrite, bpfioctl,
-	nostop, notty, bpfpoll, nommap, bpfkqfilter,
+	bpfopen, noclose, noread, nowrite, noioctl,
+	nostop, notty, nopoll, nommap, nokqfilter,
 };
 
 static int
@@ -339,9 +355,6 @@
  * This is probably cheaper than marking with a constant since
  * the address should be in a register anyway.
  */
-#define D_ISFREE(d) ((d) == (d)->bd_next)
-#define D_MARKFREE(d) ((d)->bd_next = (d))
-#define D_MARKUSED(d) ((d)->bd_next = 0)
 
 /*
  * bpfilterattach() is called at boot time.
@@ -351,18 +364,11 @@
 bpfilterattach(n)
 	int n;
 {
-	int i;
-	/*
-	 * Mark all the descriptors free.
-	 */
-	for (i = 0; i < NBPFILTER; ++i)
-		D_MARKFREE(&bpf_dtab[i]);
-
+	LIST_INIT(&bpf_list);
 }
 
 /*
- * Open ethernet device.  Returns ENXIO for illegal minor device number,
- * EBUSY if file is open by another process.
+ * Open ethernet device. Clones.
  */
 /* ARGSUSED */
 int
@@ -373,24 +379,31 @@
 	struct proc *p;
 {
 	struct bpf_d *d;
+	struct file *fp;
+	int error, fd;
 
-	if (minor(dev) >= NBPFILTER)
-		return (ENXIO);
-	/*
-	 * Each minor can be opened by only one process.  If the requested
-	 * minor is in use, return EBUSY.
-	 */
-	d = &bpf_dtab[minor(dev)];
-	if (!D_ISFREE(d))
-		return (EBUSY);
+	/* falloc() will use the descriptor for us. */
+	if ((error = falloc(p, &fp, &fd)) != 0)
+		return error;
 
-	/* Mark "free" and do most initialization. */
-	memset((char *)d, 0, sizeof(*d));
+	d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK);
+	(void)memset(d, 0, sizeof(*d));
 	d->bd_bufsize = bpf_bufsize;
 	d->bd_seesent = 1;
 	callout_init(&d->bd_callout);
 
-	return (0);
+	LIST_INSERT_HEAD(&bpf_list, d, bd_list);
+
+	fp->f_flag = flag;
+	fp->f_type = DTYPE_MISC;
+	fp->f_ops = &bpf_fileops;
+	fp->f_data = d;
+
+	curlwp->l_dupfd = fd;
+	FILE_SET_MATURE(fp);
+	FILE_UNUSE(fp, p);
+
+	return ENXIO;
 }
 
 /*
@@ -398,14 +411,10 @@
  * deallocating its buffers, and marking it free.
  */
 /* ARGSUSED */
-int
-bpfclose(dev, flag, mode, p)
-	dev_t dev;
-	int flag;
-	int mode;
-	struct proc *p;
+static int
+bpf_close(struct file *fp, struct proc *p)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	int s;
 
 	s = splnet();
@@ -416,6 +425,9 @@
 		bpf_detachd(d);
 	splx(s);
 	bpf_freed(d);
+	LIST_REMOVE(d, bd_list);
+	free(d, M_DEVBUF);
+	fp->f_data = NULL;
 
 	return (0);
 }
@@ -434,13 +446,11 @@
 /*
  *  bpfread - read next chunk of packets from buffers
  */
-int
-bpfread(dev, uio, ioflag)
-	dev_t dev;
-	struct uio *uio;
-	int ioflag;
+static int
+bpf_read(struct file *fp, off_t *offp, struct uio *uio,
+    struct ucred *cred, int flags)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	int timed_out;
 	int error;
 	int s;
@@ -463,7 +473,7 @@
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == 0) {
-		if (ioflag & IO_NDELAY) {
+		if (fp->f_flag & FNONBLOCK) {
 			if (d->bd_slen == 0) {
 				splx(s);
 				return (EWOULDBLOCK);
@@ -567,13 +577,11 @@
 }
 
 
-int
-bpfwrite(dev, uio, ioflag)
-	dev_t dev;
-	struct uio *uio;
-	int ioflag;
+static int
+bpf_write(struct file *fp, off_t *offp, struct uio *uio,
+    struct ucred *cred, int flags)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int error, s;
@@ -607,6 +615,20 @@
 	return (error);
 }
 
+static int
+bpf_stat(struct file *fp, struct stat *sb, struct proc *p)
+{
+	return EOPNOTSUPP;
+}       
+
+static int
+bpf_fcntl(struct file *fp, u_int cmd, void *data, struct proc *p)
+{ 
+	if (cmd == F_SETFL)
+		return 0;
+	return EOPNOTSUPP;
+}
+
 /*
  * Reset a descriptor by flushing its packet buffer and clearing the
  * receive and drop counts.  Should be called at splnet.
@@ -650,15 +672,14 @@
  *  BIOSHDRCMPLT	Set "header already complete" flag.
  */
 /* ARGSUSED */
-int
-bpfioctl(dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p)
+static int
+bpf_ioctl(struct file *fp, u_long cmd, void *data, struct proc *p)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	int s, error = 0;
 #ifdef BPF_KERN_FILTER
 	struct bpf_insn **p;
 #endif
-	void *addr = arg;
 
 	s = splnet();
 	if (d->bd_state == BPF_WAITING)
@@ -685,7 +706,7 @@
 				n += d->bd_hlen;
 			splx(s);
 
-			*(int *)addr = n;
+			*(int *)data = n;
 			break;
 		}
 
@@ -693,7 +714,7 @@
 	 * Get buffer len [for read()].
 	 */
 	case BIOCGBLEN:
-		*(u_int *)addr = d->bd_bufsize;
+		*(u_int *)data = d->bd_bufsize;
 		break;
 
 	/*
@@ -703,12 +724,12 @@
 		if (d->bd_bif != 0)
 			error = EINVAL;
 		else {
-			u_int size = *(u_int *)addr;
+			u_int size = *(u_int *)data;
 
 			if (size > bpf_maxbufsize)
-				*(u_int *)addr = size = bpf_maxbufsize;
+				*(u_int *)data = size = bpf_maxbufsize;
 			else if (size < BPF_MINBUFSIZE)
-				*(u_int *)addr = size = BPF_MINBUFSIZE;
+				*(u_int *)data = size = BPF_MINBUFSIZE;
 			d->bd_bufsize = size;
 		}
 		break;
@@ -717,7 +738,7 @@
 	 * Set link layer read filter.
 	 */
 	case BIOCSETF:
-		error = bpf_setf(d, addr);
+		error = bpf_setf(d, data);
 		break;
 
 #ifdef BPF_KERN_FILTER
@@ -732,7 +753,7 @@
 		}
 
 		/* Validate and store filter */
-		error = bpf_setf(d, addr);
+		error = bpf_setf(d, data);
 
 		/* Free possible old filter */
 		if (cmd == BIOCSTCPF)
@@ -786,7 +807,7 @@
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
-			*(u_int *)addr = d->bd_bif->bif_dlt;
+			*(u_int *)data = d->bd_bif->bif_dlt;
 		break;
 
 	/*
@@ -796,7 +817,7 @@
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
-			error = bpf_getdltlist(d, addr);
+			error = bpf_getdltlist(d, data);
 		break;
 
 	/*
@@ -806,7 +827,7 @@
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
-			error = bpf_setdlt(d, *(u_int *)addr);
+			error = bpf_setdlt(d, *(u_int *)data);
 		break;
 
 	/*
@@ -816,14 +837,14 @@
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
-			bpf_ifname(d->bd_bif->bif_ifp, addr);
+			bpf_ifname(d->bd_bif->bif_ifp, data);
 		break;
 
 	/*
 	 * Set interface.
 	 */
 	case BIOCSETIF:
-		error = bpf_setif(d, addr);
+		error = bpf_setif(d, data);
 		break;
 
 	/*
@@ -831,7 +852,7 @@
 	 */
 	case BIOCSRTIMEOUT:
 		{
-			struct timeval *tv = addr;
+			struct timeval *tv = data;
 
 			/* Compute number of ticks. */
 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
@@ -845,7 +866,7 @@
 	 */
 	case BIOCGRTIMEOUT:
 		{
-			struct timeval *tv = addr;
+			struct timeval *tv = data;
 
 			tv->tv_sec = d->bd_rtout / hz;
 			tv->tv_usec = (d->bd_rtout % hz) * tick;
@@ -857,7 +878,7 @@
 	 */
 	case BIOCGSTATS:
 		{
-			struct bpf_stat *bs = addr;
+			struct bpf_stat *bs = data;
 
 			bs->bs_recv = d->bd_rcount;
 			bs->bs_drop = d->bd_dcount;
@@ -867,7 +888,7 @@
 
 	case BIOCGSTATSOLD:
 		{
-			struct bpf_stat_old *bs = addr;
+			struct bpf_stat_old *bs = data;
 
 			bs->bs_recv = d->bd_rcount;
 			bs->bs_drop = d->bd_dcount;
@@ -878,12 +899,12 @@
 	 * Set immediate mode.
 	 */
 	case BIOCIMMEDIATE:
-		d->bd_immediate = *(u_int *)addr;
+		d->bd_immediate = *(u_int *)data;
 		break;
 
 	case BIOCVERSION:
 		{
-			struct bpf_version *bv = addr;
+			struct bpf_version *bv = data;
 
 			bv->bv_major = BPF_MAJOR_VERSION;
 			bv->bv_minor = BPF_MINOR_VERSION;
@@ -891,25 +912,25 @@
 		}
 
 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
-		*(u_int *)addr = d->bd_hdrcmplt;
+		*(u_int *)data = d->bd_hdrcmplt;
 		break;
 
 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
-		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
+		d->bd_hdrcmplt = *(u_int *)data ? 1 : 0;
 		break;
 
 	/*
 	 * Get "see sent packets" flag
 	 */
 	case BIOCGSEESENT:
-		*(u_int *)addr = d->bd_seesent;
+		*(u_int *)data = d->bd_seesent;
 		break;
 
 	/*
 	 * Set "see sent" packets flag
 	 */
 	case BIOCSSEESENT:
-		d->bd_seesent = *(u_int *)addr;
+		d->bd_seesent = *(u_int *)data;
 		break;
 
 	case FIONBIO:		/* Non-blocking I/O */
@@ -921,17 +942,17 @@
 		break;
 
 	case FIOASYNC:		/* Send signal on receive packets */
-		d->bd_async = *(int *)addr;
+		d->bd_async = *(int *)data;
 		break;
 
 	case TIOCSPGRP:		/* Process or group to send signals to */
 	case FIOSETOWN:
-		error = fsetown(p, &d->bd_pgid, cmd, addr);
+		error = fsetown(p, &d->bd_pgid, cmd, data);
 		break;
 
 	case TIOCGPGRP:
 	case FIOGETOWN:
-		error = fgetown(p, d->bd_pgid, cmd, addr);
+		error = fgetown(p, d->bd_pgid, cmd, data);
 		break;
 	}
 	return (error);
@@ -1074,10 +1095,10 @@
  * ability to write to the BPF device.
  * Otherwise, return false but make a note that a selwakeup() must be done.
  */
-int
-bpfpoll(dev_t dev, int events, struct proc *p)
+static int
+bpf_poll(struct file *fp, int events, struct proc *p)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	int s = splnet();
 	int revents;
 
@@ -1134,10 +1155,10 @@
 static const struct filterops bpfread_filtops =
 	{ 1, NULL, filt_bpfrdetach, filt_bpfread };
 
-int
-bpfkqfilter(dev_t dev, struct knote *kn)
+static int
+bpf_kqfilter(struct file *fp, struct knote *kn)
 {
-	struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct bpf_d *d = fp->f_data;
 	struct klist *klist;
 	int s;
 
@@ -1501,8 +1522,6 @@
 	}
 	if (d->bd_filter)
 		free(d->bd_filter, M_DEVBUF);
-
-	D_MARKFREE(d);
 }
 
 /*
@@ -1560,16 +1579,11 @@
 {
 	struct bpf_if *bp, **pbp;
 	struct bpf_d *d;
-	int i, s, cmaj;
-
-	/* locate the major number */
-	cmaj = cdevsw_lookup_major(&bpf_cdevsw);
+	int s;
 
 	/* Nuke the vnodes for any open instances */
-	for (i = 0; i < NBPFILTER; ++i) {
-		d = &bpf_dtab[i];
-		if (!D_ISFREE(d) && d->bd_bif != NULL &&
-		    d->bd_bif->bif_ifp == ifp) {
+	for (d = LIST_FIRST(&bpf_list); d != NULL; d = LIST_NEXT(d, bd_list)) {
+		if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
 			/*
 			 * Detach the descriptor from an interface now.
 			 * It will be free'ed later by close routine.
@@ -1578,7 +1592,6 @@
 			d->bd_promisc = 0;	/* we can't touch device. */
 			bpf_detachd(d);
 			splx(s);
-			vdevgone(cmaj, i, i, VCHR);
 		}
 	}
 
Index: bpfdesc.h
===================================================================
RCS file: /cvsroot/src/sys/net/bpfdesc.h,v
retrieving revision 1.20
diff -u -u -r1.20 bpfdesc.h
--- bpfdesc.h	29 May 2004 14:18:33 -0000	1.20
+++ bpfdesc.h	27 Nov 2004 18:46:26 -0000
@@ -89,6 +89,7 @@
 	struct selinfo	bd_sel;		/* bsd select info */
 #endif
 	struct callout	bd_callout;	/* for BPF timeouts with select */
+	LIST_ENTRY(bpf_d) bd_list;	/* list of all BPF's */
 };