Subject: tap(4): virtual Ethernet device
To: None <tech-net@NetBSD.org>
From: Quentin Garnier <cube@cubidou.net>
List: tech-net
Date: 01/03/2005 06:14:37
--Z8pjODCYYz+3zRe3
Content-Type: multipart/mixed; boundary="NAuYj0K7Umiq33rm"
Content-Disposition: inline


--NAuYj0K7Umiq33rm
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

I know what Nathan said about peer review, and yet I wouldn't
feel comfortable committing this without getting feedback before.

NAME
     tap - virtual Ethernet device

SYNOPSIS
     pseudo-device tap

DESCRIPTION
     The tap driver allows the creation and use of virtual Ethernet devices.
     Those interfaces appear just as any real Ethernet NIC to the kernel, b=
ut
     can also be accessed by userland through a character device node in or=
der
     to read frames being sent by the system or to inject frames.

     In that respect it is very similar to what tun(4) provides, but the ad=
ded
     Ethernet layer allows easy integration with machine emulators or virtu=
al
     Ethernet networks through the use of bridge(4) with tunneling.

--=20
Quentin Garnier - cube@cubidou.net - cube@NetBSD.org
"Commala-come-five! / Even when the shadows rise!
To see the world and walk the world / Makes ya glad to be alive."
Susannah's Song, The Dark Tower VI, Stephen King, 2004.

--NAuYj0K7Umiq33rm
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="tap.diff"
Content-Transfer-Encoding: quoted-printable

Index: etc/MAKEDEV.tmpl
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/etc/MAKEDEV.tmpl,v
retrieving revision 1.34
diff -u -r1.34 MAKEDEV.tmpl
--- etc/MAKEDEV.tmpl	29 Dec 2004 06:36:46 -0000	1.34
+++ etc/MAKEDEV.tmpl	1 Jan 2005 13:44:22 -0000
@@ -266,6 +266,7 @@
 #	stic*	PixelStamp interface chip
 #	sysmon	System Monitoring hardware
 #	systrace syscall tracer
+#	tap*	virtual Ethernet device
 #	tun*	network tunnel driver
 #	twe	3ware Escalade control interface
 #	uk*	unknown SCSI device
@@ -435,6 +436,7 @@
 	makedev lkm clockctl
 	makedev local
 	makedev atabus0 atabus1 atabus2 atabus3
+	makedev tap tap0 tap1 tap2 tap3
 	;;
=20
 init)
@@ -1465,6 +1467,18 @@
 	mkdev drvctl c %drvctl_chr% 0 644
 	;;
=20
+tap*)
+	unit=3D${i#tap}
+	case "$unit" in
+	[0-9]*)
+		mkdev tap${unit} c %tap_chr% ${unit} 600
+		;;
+	*)
+		mkdev tap c %tap_chr% 0xfffff 600
+		;;
+	esac
+	;;
+
 midevend)
 %MI_DEVICES_END%
 local)
Index: distrib/sets/lists/comp/mi
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/distrib/sets/lists/comp/mi,v
retrieving revision 1.748
diff -u -r1.748 mi
--- distrib/sets/lists/comp/mi	29 Dec 2004 00:12:00 -0000	1.748
+++ distrib/sets/lists/comp/mi	1 Jan 2005 13:44:26 -0000
@@ -879,6 +879,7 @@
 ./usr/include/net/if_sppp.h			comp-c-include
 ./usr/include/net/if_stf.h			comp-c-include
 ./usr/include/net/if_stripvar.h			comp-c-include
+./usr/include/net/if_tap.h			comp-c-include
 ./usr/include/net/if_token.h			comp-c-include
 ./usr/include/net/if_tun.h			comp-c-include
 ./usr/include/net/if_types.h			comp-c-include
Index: distrib/sets/lists/man/mi
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/distrib/sets/lists/man/mi,v
retrieving revision 1.741
diff -u -r1.741 mi
--- distrib/sets/lists/man/mi	29 Dec 2004 00:12:02 -0000	1.741
+++ distrib/sets/lists/man/mi	1 Jan 2005 13:44:27 -0000
@@ -1225,6 +1225,7 @@
 ./usr/share/man/cat4/sw.0			man-sys-catman		.cat
 ./usr/share/man/cat4/sysbeep.0			man-sys-catman		.cat
 ./usr/share/man/cat4/systrace.0			man-sys-catman		.cat
+./usr/share/man/cat4/tap.0			man-sys-catman		.cat
 ./usr/share/man/cat4/tb.0			man-sys-catman		.cat
 ./usr/share/man/cat4/tc.0			man-sys-catman		.cat
 ./usr/share/man/cat4/tcds.0			man-sys-catman		.cat
@@ -3401,6 +3402,7 @@
 ./usr/share/man/man4/sw.4			man-sys-man		.man
 ./usr/share/man/man4/sysbeep.4			man-sys-man		.man
 ./usr/share/man/man4/systrace.4			man-sys-man		.man
+./usr/share/man/man4/tap.4			man-sys-man		.man
 ./usr/share/man/man4/tb.4			man-sys-man		.man
 ./usr/share/man/man4/tc.4			man-sys-man		.man
 ./usr/share/man/man4/tcds.4			man-sys-man		.man
Index: share/man/man4/Makefile
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/share/man/man4/Makefile,v
retrieving revision 1.342
diff -u -r1.342 Makefile
--- share/man/man4/Makefile	23 Dec 2004 23:13:14 -0000	1.342
+++ share/man/man4/Makefile	1 Jan 2005 13:44:27 -0000
@@ -35,7 +35,7 @@
 	siop.4 sip.4 siside.4 sk.4 sl.4 slide.4 \
 	sm.4 spc.4 speaker.4 spif.4 spp.4 sqphy.4 \
 	ss.4 st.4 ste.4 stge.4 sti.4 stpcide.4 sv.4 strip.4 systrace.4 \
-	tb.4 tc.4 tcds.4 tcp.4 termios.4 tfb.4 ti.4 \
+	tap.4 tb.4 tc.4 tcds.4 tcp.4 termios.4 tfb.4 ti.4 \
 	tl.4 tlp.4 tlphy.4 tp.4 tr.4 trm.4 tty.4 tun.4 \
 	tqphy.4 twe.4 txp.4 ubsec.4 udp.4 uep.4 uha.4 uk.4 ukphy.4 \
 	unix.4 userconf.4 verifiedexec.4 vga.4 viaide.4 \
Index: sys/conf/files
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/conf/files,v
retrieving revision 1.703
diff -u -r1.703 files
--- sys/conf/files	21 Dec 2004 16:36:53 -0000	1.703
+++ sys/conf/files	1 Jan 2005 13:44:28 -0000
@@ -1069,6 +1069,7 @@
 defpseudo gif:		ifnet
 defpseudo faith:	ifnet
 defpseudo stf:		ifnet
+defpseudo tap {[unit =3D -1]}:		ifnet, ether, bpf_filter
=20
 defpseudo sequencer
 defpseudo clockctl
@@ -1307,6 +1308,7 @@
 file	net/if_spppsubr.c		sppp
 file	net/if_strip.c			strip			needs-flag
 file	net/if_tokensubr.c		token			needs-flag
+file	net/if_tap.c			tap
 file	net/if_tun.c			tun			needs-flag
 file	net/if_vlan.c			vlan			needs-flag
 file	net/if_pppoe.c			pppoe			needs-flag
Index: sys/conf/majors
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/conf/majors,v
retrieving revision 1.12
diff -u -r1.12 majors
--- sys/conf/majors	25 Sep 2004 03:30:44 -0000	1.12
+++ sys/conf/majors	1 Jan 2005 13:44:29 -0000
@@ -18,3 +18,4 @@
 device-major	atabus		char 166		atabus
 device-major	drvctl		char 167		drvctl
 device-major	dk		char 168 block 168
+device-major	tap		char 169		tap
Index: sys/net/Makefile
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/net/Makefile,v
retrieving revision 1.16
diff -u -r1.16 Makefile
--- sys/net/Makefile	22 Jun 2004 14:29:45 -0000	1.16
+++ sys/net/Makefile	1 Jan 2005 13:44:29 -0000
@@ -6,7 +6,7 @@
 	if_atm.h if_bridgevar.h if_dl.h if_ether.h if_fddi.h if_gif.h \
 	if_gre.h if_hippi.h if_ieee1394.h if_llc.h if_media.h \
 	if_ppp.h if_pppvar.h if_pppoe.h if_slvar.h if_sppp.h if_stf.h \
-	if_stripvar.h if_token.h if_tun.h if_types.h if_vlanvar.h \
+	if_stripvar.h if_tap.h if_token.h if_tun.h if_types.h if_vlanvar.h \
 	netisr.h pfil.h pfkeyv2.h pfvar.h ppp-comp.h ppp_defs.h radix.h \
 	raw_cb.h route.h slcompress.h slip.h zlib.h
=20
--- /dev/null	Sat Jan  1 04:38:12 2005
+++ share/man/man4/tap.4	Sat Jan  1 14:47:42 2005
@@ -0,0 +1,172 @@
+.\" $NetBSD$
+.\"
+.\"  Copyright (c) 2004 The NetBSD Foundation.
+.\"  All rights reserved.
+.\"
+.\"  This code is derived from software contributed to the NetBSD Foundati=
on
+.\"   by Quentin Garnier.
+.\"=20
+.\"  Redistribution and use in source and binary forms, with or without
+.\"  modification, are permitted provided that the following conditions
+.\"  are met:
+.\"  1. Redistributions of source code must retain the above copyright
+.\"     notice, this list of conditions and the following disclaimer.
+.\"  2. Redistributions in binary form must reproduce the above copyright
+.\"     notice, this list of conditions and the following disclaimer in the
+.\"     documentation and/or other materials provided with the distributio=
n.
+.\"  3. All advertising materials mentioning features or use of this softw=
are
+.\"     must display the following acknowledgement:
+.\"         This product includes software developed by the NetBSD
+.\"         Foundation, Inc. and its contributors.
+.\"  4. Neither the name of The NetBSD Foundation nor the names of its
+.\"     contributors may be used to endorse or promote products derived
+.\"     from this software without specific prior written permission.
+.\"=20
+.\"  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBU=
TORS
+.\"  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT L=
IMITED
+.\"  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTI=
CULAR
+.\"  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBU=
TORS
+.\"  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\"  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\"  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSIN=
ESS
+.\"  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER =
IN
+.\"  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWIS=
E)
+.\"  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED O=
F THE
+.\"  POSSIBILITY OF SUCH DAMAGE.
+.\"/
+.Dd December 21, 2004
+.Dt TAP 4
+.Os
+.Sh NAME
+.Nm tap
+.Nd virtual Ethernet device
+.Sh SYNOPSIS
+.Cd pseudo-device tap
+.Sh DESCRIPTION
+The
+.Nm
+driver allows the creation and use of virtual Ethernet devices.
+Those interfaces appear just as any real Ethernet NIC to the kernel,
+but can also be accessed by userland through a character device node in or=
der
+to read frames being sent by the system or to inject frames.
+.Pp
+In that respect it is very similar to what
+.Xr tun 4
+provides, but the added Ethernet layer allows easy integration with machine
+emulators or virtual Ethernet networks through the use of
+.Xr bridge 4
+with tunneling.
+.Sh INTERFACE CREATION
+Interfaces may be created in two different ways:
+using the
+.Xr ifconfig 8
+.Cm create
+command with a specified device number,
+or its ioctl equivalent,
+.Dv SIOCIFCREATE ,
+or using the special cloning device
+.Pa /dev/tap .
+.Pp
+The former works the same as any other cloning network interface:
+the administrator can create and destroy interfaces at any time,
+notably at boot time.
+This is the easiest way of combining
+.Nm
+and
+.Xr bridge 4 .
+Later, userland will actually access the interfaces through the specific
+device nodes
+.Pa /dev/tapN .
+.Pp
+The latter is aimed at applications that need a virtual Ethernet device for
+the duration of their execution.
+A new interface is created at the opening of
+.Pa /dev/tap ,
+and is later destroyed when the last process using the file descriptor clo=
ses
+it.
+.Sh CHARACTER DEVICES
+Whether the
+.Nm
+devices are accessed through the special cloning device
+.Pa /dev/tap
+or through the specific devices
+.Pa /dev/tapN ,
+the possible actions to control the matching interface are the same.
+.Pp
+When using
+.Pa /dev/tap
+though, as the interface is created on-the-fly, its name is not known
+immediately by the application.
+Therefore the
+.Dv TAPGIFNAME
+ioctl is provided.
+It should be the first action an application using the special cloning dev=
ice
+will do.
+It takes a pointer to a
+.Ft struct ifreq
+as an argument.
+.Pp
+Ethernet frames sent out by the kernel on a
+.Nm
+interface can be obtained by the controlling application with
+.Xr read 2 .
+It can also inject frames in the kernel with
+.Xr write 2 .
+There is absolutely no validation of the content of the injected frame,
+it can be any data, of any length.
+.Pp
+One call of
+.Xr write 2
+will inject a single frame in the kernel, as one call of
+.Xr read 2
+will retrieve a single frame from the queue, to the extent of the provided
+buffer.
+If the buffer is not large enough, the frame will be truncated.
+.Pp
+.Nm
+character devices support the
+.Dv FIONREAD
+ioctl which returns the size of the next available frame,
+or 0 if there is no available frame in the queue.
+.Pp
+They also support non-blocking I/O through the
+.Dv FIONBIO
+ioctl.
+In that mode,
+.Er EWOULDBLOCK
+is returned by
+.Xr read 2
+when no data is available.
+.Pp
+Asynchronous I/O is supported through the
+.Dv FIOASYNC ,
+.Dv FIOSETOWN
+and
+.Dv FIOGETOWN
+ioctls.
+The first will enable
+.Dv SIGIO
+generation, while the two other configure the process group that
+will receive the signal when data is ready.
+.Pp
+Synchronisation may also be achieved through the use of
+.Xr select 2 ,
+.Xr poll 2
+or
+.Xr kevent 2 .
+.Sh FILES
+.Bl -tag -compact -width /dev/tap[0-9]*
+.It Pa /dev/tap
+cloning device
+.It Pa /dev/tap[0-9]*
+individual character device nodes
+.El
+.Sh SEE ALSO
+.Xr bridge 4 ,
+.Xr ifconfig 8 ,
+.Xr tun 4
+.Sh HISTORY
+The
+.Nm
+driver first appeared in
+.Nx 3.0 .
--- /dev/null	Sat Jan  1 04:38:12 2005
+++ sys/net/if_tap.c	Sat Jan  1 14:39:58 2005
@@ -0,0 +1,1372 @@
+/*	$NetBSD$	*/
+
+/*
+ *  Copyright (c) 2003, 2004 The NetBSD Foundation.
+ *  All rights reserved.
+ *
+ *  This code is derived from software contributed to the NetBSD Foundation
+ *   by Quentin Garnier.
+ *=20
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. All advertising materials mentioning features or use of this softwa=
re
+ *     must display the following acknowledgement:
+ *         This product includes software developed by the NetBSD
+ *         Foundation, Inc. and its contributors.
+ *  4. Neither the name of The NetBSD Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *=20
+ *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUT=
ORS
+ *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LI=
MITED
+ *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTIC=
ULAR
+ *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUT=
ORS
+ *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINE=
SS
+ *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF=
 THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * tap(4) is a virtual Ethernet interface.  It appears as a real Ethernet
+ * device to the system, but can also be accessed by userland through a
+ * character device interface, which allows reading and injecting frames.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: if_ppp.c,v 1.95 2004/12/05 15:03:13 christos E=
xp $");
+
+#include "bpfilter.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/device.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/ksyms.h>
+#include <sys/poll.h>
+#include <sys/select.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_ether.h>
+#include <net/if_media.h>
+#include <net/if_tap.h>
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+
+/*
+ * sysctl node management
+ *
+ * It's not really possible to use a SYSCTL_SETUP block with
+ * current LKM implementation, so it is easier to just define
+ * our own function.
+ *
+ * The handler function is a "helper" in Andrew Brown's sysctl
+ * framework terminology.  It is used as a gateway for sysctl
+ * requests over the nodes.
+ *
+ * tap_log allows the module to log creations of nodes and
+ * destroy them all at once using sysctl_teardown.
+ */
+static int tap_node;
+static int	tap_sysctl_handler(SYSCTLFN_PROTO);
+
+/*
+ * Since we're an Ethernet device, we need the 3 following
+ * components: a leading struct device, a struct ethercom,
+ * and also a struct ifmedia since we don't attach a PHY to
+ * ourselves. We could emulate one, but there's no real
+ * point.
+ */
+
+struct tap_softc {
+	struct device	sc_dev;
+	struct ifmedia	sc_im;
+	struct ethercom	sc_ec;
+	int		sc_flags;
+#define	TAP_INUSE	0x00000001	/* tap device can only be opened once */
+#define TAP_ASYNCIO	0x00000002	/* user is using async I/O (SIGIO) on the d=
evice */
+#define TAP_NBIO	0x00000004	/* user wants calls to avoid blocking */
+#define TAP_GOING	0x00000008	/* interface is being destroyed */
+	struct selinfo	sc_rsel;
+	pid_t		sc_pgid; /* For async. IO */
+	struct lock	sc_rdlock;
+	struct simplelock	sc_kqlock;
+};
+
+/* autoconf(9) glue */
+
+void	tapattach(int);
+
+static int	tap_match(struct device *, struct cfdata *, void *);
+static void	tap_attach(struct device *, struct device *, void *);
+static int	tap_detach(struct device*, int);
+
+/* Ethernet address helper functions */
+
+static char	*tap_ether_sprintf(char *, const u_char *);
+static int	tap_ether_aton(u_char *, char *);
+
+CFATTACH_DECL(tap, sizeof(struct tap_softc),
+    tap_match, tap_attach, tap_detach, NULL);
+extern struct cfdriver tap_cd;
+
+/* Real device access routines */
+static int	tap_dev_close(struct tap_softc *);
+static int	tap_dev_read(int, struct uio *, int);
+static int	tap_dev_write(int, struct uio *, int);
+static int	tap_dev_ioctl(int, u_long, caddr_t, struct proc *);
+static int	tap_dev_poll(int, int, struct proc *);
+static int	tap_dev_kqfilter(int, struct knote *);
+
+/* Fileops access routines */
+static int	tap_fops_close(struct file *, struct proc *);
+static int	tap_fops_read(struct file *, off_t *, struct uio *,
+    struct ucred *, int);
+static int	tap_fops_write(struct file *, off_t *, struct uio *,
+    struct ucred *, int);
+static int	tap_fops_ioctl(struct file *, u_long, void *,
+    struct proc *);
+static int	tap_fops_poll(struct file *, int, struct proc *);
+static int	tap_fops_kqfilter(struct file *, struct knote *);
+
+static const struct fileops tap_fileops =3D {
+	tap_fops_read,
+	tap_fops_write,
+	tap_fops_ioctl,
+	fnullop_fcntl,
+	tap_fops_poll,
+	fbadop_stat,
+	tap_fops_close,
+	tap_fops_kqfilter,
+};
+
+/* Helper for cloning open() */
+static int	tap_dev_cloner(struct proc *);
+
+/* Character device routines */
+static int	tap_cdev_open(dev_t, int, int, struct proc *);
+static int	tap_cdev_close(dev_t, int, int, struct proc *);
+static int	tap_cdev_read(dev_t, struct uio *, int);
+static int	tap_cdev_write(dev_t, struct uio *, int);
+static int	tap_cdev_ioctl(dev_t, u_long, caddr_t, int, struct proc *);
+static int	tap_cdev_poll(dev_t, int, struct proc *);
+static int	tap_cdev_kqfilter(dev_t, struct knote *);
+
+const struct cdevsw tap_cdevsw =3D {
+	tap_cdev_open, tap_cdev_close,
+	tap_cdev_read, tap_cdev_write,
+	tap_cdev_ioctl, nostop, notty,
+	tap_cdev_poll, nommap,
+	tap_cdev_kqfilter,
+};
+
+#define TAP_CLONER	0xfffff		/* Maximal minor value */
+
+/* kqueue-related routines */
+static void	tap_kqdetach(struct knote *);
+static int	tap_kqread(struct knote *, long);
+
+/*
+ * Those are needed by the if_media interface.
+ */
+
+static int	tap_mediachange(struct ifnet *);
+static void	tap_mediastatus(struct ifnet *, struct ifmediareq *);
+
+/*
+ * Those are needed by the ifnet interface, and would typically be
+ * there for any network interface driver.
+ * Some other routines are optional: watchdog and drain.
+ */
+
+static void	tap_start(struct ifnet *);
+static void	tap_stop(struct ifnet *, int);
+static int	tap_init(struct ifnet *);
+static int	tap_ioctl(struct ifnet *, u_long, caddr_t);
+
+/* This is an internal function to keep tap_ioctl readable */
+static int	tap_lifaddr(struct ifnet *, u_long, struct ifaliasreq *);
+
+/*
+ * tap is a clonable interface, although it is highly unrealistic for
+ * an Ethernet device.
+ *
+ * Here are the bits needed for a clonable interface.
+ */
+static int	tap_clone_create(struct if_clone *, int);
+static int	tap_clone_destroy(struct ifnet *);
+
+struct if_clone tap_cloners =3D IF_CLONE_INITIALIZER("tap",
+					tap_clone_create,
+					tap_clone_destroy);
+
+/* Helper functionis shared by the two cloning code paths */
+static struct tap_softc *	tap_clone_creator(int);
+static int	tap_clone_destroyer(struct device *);
+
+void
+tapattach(int n)
+{
+	int error;
+
+	error =3D config_cfattach_attach(tap_cd.cd_name, &tap_ca);
+	if (error) {
+		aprint_error("%s: unable to register cfattach\n",
+		    tap_cd.cd_name);
+		(void)config_cfdriver_detach(&tap_cd);
+		return;
+	}
+
+	if_clone_attach(&tap_cloners);
+}
+
+/* Pretty much useless for a pseudo-device */
+static int
+tap_match(struct device *self, struct cfdata *cfdata, void *arg)
+{
+	return (1);
+}
+
+void
+tap_attach(struct device *parent, struct device *self, void *aux)
+{
+	struct tap_softc *sc =3D (struct tap_softc *)self;
+	struct ifnet *ifp;
+	u_int8_t enaddr[ETHER_ADDR_LEN] =3D
+	    { 0xf0, 0x0b, 0xa4, 0xff, 0xff, 0xff };
+	char enaddrstr[18];
+	uint32_t ui;
+	int error;
+	struct sysctlnode *node;
+
+	aprint_normal("%s: faking Ethernet device\n",
+	    self->dv_xname);
+
+	/*
+	 * In order to obtain unique initial Ethernet address on a host,
+	 * do some randomisation using mono_time.  It's not meant for anything
+	 * but avoiding hard-coding an address.
+	 */
+	ui =3D (mono_time.tv_sec ^ mono_time.tv_usec) & 0xffffff;
+	memcpy(enaddr+3, (u_int8_t *)&ui, 3);
+
+	aprint_normal("%s: Ethernet address %s\n", sc->sc_dev.dv_xname,
+	    tap_ether_sprintf(enaddrstr, enaddr));
+
+	/*
+	 * Why 1000baseT? Why not? You can add more.
+	 *
+	 * Note that there are 3 steps: init, one or several additions to
+	 * list of supported media, and in the end, the selection of one
+	 * of them.
+	 */
+	ifmedia_init(&sc->sc_im, 0, tap_mediachange, tap_mediastatus);
+	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_1000_T, 0, NULL);
+	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL);
+	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_100_TX, 0, NULL);
+	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL);
+	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_10_T, 0, NULL);
+	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL);
+	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_AUTO, 0, NULL);
+	ifmedia_set(&sc->sc_im, IFM_ETHER|IFM_AUTO);
+
+	/*
+	 * One should note that an interface must do multicast in order
+	 * to support IPv6.
+	 */
+	ifp =3D &sc->sc_ec.ec_if;
+	strcpy(ifp->if_xname, sc->sc_dev.dv_xname);
+	ifp->if_softc	=3D sc;
+	ifp->if_flags	=3D IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_ioctl	=3D tap_ioctl;
+	ifp->if_start	=3D tap_start;
+	ifp->if_stop	=3D tap_stop;
+	ifp->if_init	=3D tap_init;
+	IFQ_SET_READY(&ifp->if_snd);
+
+	sc->sc_ec.ec_capabilities =3D ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
+
+	/* Those steps are mandatory for an Ethernet driver, the fisrt call
+	 * being common to all network interface drivers. */
+	if_attach(ifp);
+	ether_ifattach(ifp, enaddr);
+
+	sc->sc_flags =3D 0;
+
+	/*
+	 * Add a sysctl node for that interface.
+	 *
+	 * The pointer transmitted is not a string, but instead a pointer to
+	 * the softc structure, which we can use to build the string value on
+	 * the fly in the helper function of the node.  See the comments for
+	 * tap_sysctl_handler for details.
+	 */
+	if ((error =3D sysctl_createv(NULL, 0, NULL,
+	    &node, CTLFLAG_READWRITE,
+	    CTLTYPE_STRING, sc->sc_dev.dv_xname, NULL,
+	    tap_sysctl_handler, 0, sc, 18,
+	    CTL_NET, PF_LINK, tap_node, sc->sc_dev.dv_unit, CTL_EOL)) !=3D 0)
+		aprint_error("%s: sysctl_createv returned %d, ignoring\n",
+		    sc->sc_dev.dv_xname, error);
+
+	/*
+	 * Initialize the two locks for the device.
+	 *
+	 * We need a lock here because even though the tap device can be
+	 * opened only once, the file descriptor might be passed to another
+	 * process, say a fork(2)ed child.
+	 *
+	 * The Giant saves us from most of the hassle, but since the read
+	 * operation can sleep, we don't want two processes to wake up at
+	 * the same moment and both try and dequeue a single packet.
+	 *
+	 * The queue for event listeners (used by kqueue(9), see below) has
+	 * to be protected, too, but we don't need the same level of
+	 * complexity for that lock, so a simple spinning lock is fine.
+	 */
+	lockinit(&sc->sc_rdlock, PSOCK|PCATCH, "tapl", 0, LK_SLEEPFAIL);
+	simple_lock_init(&sc->sc_kqlock);
+}
+
+/*
+ * When detaching, we do the inverse of what is done in the attach
+ * routine, in reversed order.
+ */
+static int
+tap_detach(struct device* self, int flags)
+{
+	struct tap_softc *sc =3D (struct tap_softc *)self;
+	struct ifnet *ifp =3D &sc->sc_ec.ec_if;
+	int error, s;
+
+	/*
+	 * Some processes might be sleeping on "tap", so we have to make
+	 * them release their hold on the device.
+	 *
+	 * The LK_DRAIN operation will wait for every locked process to
+	 * release their hold.
+	 */
+	sc->sc_flags |=3D TAP_GOING;
+	s =3D splnet();
+	tap_stop(ifp, 1);
+	if_down(ifp);
+	splx(s);
+	lockmgr(&sc->sc_rdlock, LK_DRAIN, NULL);
+
+	/*
+	 * Destroying a single leaf is a very straightforward operation using
+	 * sysctl_destroyv.  One should be sure to always end the path with
+	 * CTL_EOL.
+	 */
+	if ((error =3D sysctl_destroyv(NULL, CTL_NET, PF_LINK, tap_node,
+	    sc->sc_dev.dv_unit, CTL_EOL)) !=3D 0)
+		aprint_error("%s: sysctl_destroyv returned %d, ignoring\n",
+		    sc->sc_dev.dv_xname, error);
+	ether_ifdetach(ifp);
+	if_detach(ifp);
+	ifmedia_delete_instance(&sc->sc_im, IFM_INST_ANY);
+
+	return (0);
+}
+
+/*
+ * This function is called by the ifmedia layer to notify the driver
+ * that the user requested a media change.  A real driver would
+ * reconfigure the hardware.
+ */
+static int
+tap_mediachange(struct ifnet *ifp)
+{
+	return (0);
+}
+
+/*
+ * Here the user asks for the currently used media.
+ */
+static void
+tap_mediastatus(struct ifnet *ifp, struct ifmediareq *imr)
+{
+	struct tap_softc *sc =3D (struct tap_softc *)ifp->if_softc;
+	imr->ifm_active =3D sc->sc_im.ifm_cur->ifm_media;
+}
+
+/*
+ * This is the function where we SEND packets.
+ *
+ * There is no 'receive' equivalent.  A typical driver will get
+ * interrupts from the hardware, and from there will inject new packets
+ * into the network stack.
+ *
+ * Once handled, a packet must be freed.  A real driver might not be able
+ * to fit all the pending packets into the hardware, and is allowed to
+ * return before having sent all the packets.  It should then use the
+ * if_flags flag IFF_OACTIVE to notify the upper layer.
+ *
+ * There are also other flags one should check, such as IFF_PAUSE.
+ *
+ * It is our duty to make packets available to BPF listeners.
+ *
+ * You should be aware that this function is called by the Ethernet layer
+ * at splnet().
+ *
+ * When the device is opened, we have to pass the packet(s) to the
+ * userland.  For that we stay in OACTIVE mode while the userland gets
+ * the packets, and we send a signal to the processes waiting to read.
+ *
+ * wakeup(sc) is the counterpart to the tsleep call in
+ * tap_dev_read, while selnotify() is used for kevent(2) and
+ * poll(2) (which includes select(2)) listeners.
+ */
+static void
+tap_start(struct ifnet *ifp)
+{
+	struct tap_softc *sc =3D (struct tap_softc *)ifp->if_softc;
+	struct mbuf *m0;
+
+	if ((sc->sc_flags & TAP_INUSE) =3D=3D 0) {
+		/* Simply drop packets */
+		for(;;) {
+			IFQ_DEQUEUE(&ifp->if_snd, m0);
+			if (m0 =3D=3D NULL)
+				return;
+
+			ifp->if_opackets++;
+#if NBPFILTER > 0
+			if (ifp->if_bpf)
+				bpf_mtap(ifp->if_bpf, m0);
+#endif
+
+			m_freem(m0);
+		}
+	} else if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+		ifp->if_flags |=3D IFF_OACTIVE;
+		wakeup(sc);
+		selnotify(&sc->sc_rsel, 1);
+		if (sc->sc_flags & TAP_ASYNCIO)
+			fownsignal(sc->sc_pgid, SIGIO, POLL_IN,
+			    POLLIN|POLLRDNORM, NULL);
+	}
+}
+
+/*
+ * A typical driver will only contain the following handlers for
+ * ioctl calls, except SIOCSIFPHYADDR.
+ * The latter is a hack I used to set the Ethernet address of the
+ * faked device.
+ *
+ * Note that both ifmedia_ioctl() and ether_ioctl() have to be
+ * called under splnet().
+ */
+static int
+tap_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct tap_softc *sc =3D (struct tap_softc *)ifp->if_softc;
+	struct ifreq *ifr =3D (struct ifreq *)data;
+	int s, error;
+
+	s =3D splnet();
+
+	switch (cmd) {
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		error =3D ifmedia_ioctl(ifp, ifr, &sc->sc_im, cmd);
+		break;
+	case SIOCSIFPHYADDR:
+		error =3D tap_lifaddr(ifp, cmd, (struct ifaliasreq *)data);
+		break;
+	default:
+		error =3D ether_ioctl(ifp, cmd, data);
+		if (error =3D=3D ENETRESET)
+			error =3D 0;
+		break;
+	}
+
+	splx(s);
+
+	return (error);
+}
+
+/*
+ * Helper function to set Ethernet address.  This shouldn't be done there,
+ * and should actually be available to all Ethernet drivers, real or not.
+ */
+static int
+tap_lifaddr(struct ifnet *ifp, u_long cmd, struct ifaliasreq *ifra)
+{
+	struct sockaddr *sa =3D (struct sockaddr *)&ifra->ifra_addr;
+
+	if (sa->sa_family !=3D AF_LINK)
+		return (EINVAL);
+
+	memcpy(LLADDR(ifp->if_sadl), sa->sa_data, ETHER_ADDR_LEN);
+
+	return (0);
+}
+
+/*
+ * _init() would typically be called when an interface goes up,
+ * meaning it should configure itself into the state in which it
+ * can send packets.
+ */
+static int
+tap_init(struct ifnet *ifp)
+{
+	ifp->if_flags |=3D IFF_RUNNING;
+
+	tap_start(ifp);
+
+	return (0);
+}
+
+/*
+ * _stop() is called when an interface goes down.  It is our
+ * responsability to validate that state by clearing the
+ * IFF_RUNNING flag.
+ *
+ * We have to wake up all the sleeping processes to have the pending
+ * read requests cancelled.
+ */
+static void
+tap_stop(struct ifnet *ifp, int disable)
+{
+	struct tap_softc *sc =3D (struct tap_softc *)ifp->if_softc;
+
+	ifp->if_flags &=3D ~IFF_RUNNING;
+	wakeup(sc);
+	selnotify(&sc->sc_rsel, 1);
+	if (sc->sc_flags & TAP_ASYNCIO)
+		fownsignal(sc->sc_pgid, SIGIO, POLL_HUP, 0, NULL);
+}
+
+/*
+ * The 'create' command of ifconfig can be used to create
+ * any numbered instance of a given device.  Thus we have to
+ * make sure we have enough room in cd_devs to create the
+ * user-specified instance.  config_attach_pseudo will do this
+ * for us.
+ */
+static int
+tap_clone_create(struct if_clone *ifc, int unit)
+{
+	if (tap_clone_creator(unit) =3D=3D NULL) {
+		aprint_error("%s%d: unable to attach an instance\n",
+                    tap_cd.cd_name, unit);
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+/*
+ * tap(4) can be cloned by two ways:
+ *   using 'ifconfig tap0 create', which will use the network
+ *     interface cloning API, and call tap_clone_create above.
+ *   opening the cloning device node, whose minor number is TAP_CLONER.
+ *     See below for an explanation on how this part work.
+ *
+ * config_attach_pseudo can be called with unit =3D DVUNIT_ANY to have
+ * autoconf(9) choose a unit number for us.  This is what happens when
+ * the cloner is openend, while the ifcloner interface creates a device
+ * with a specific unit number.
+ */
+static struct tap_softc *
+tap_clone_creator(int unit)
+{
+	struct cfdata *cf;
+
+	cf =3D malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
+	cf->cf_name =3D tap_cd.cd_name;
+	cf->cf_atname =3D tap_ca.ca_name;
+	cf->cf_unit =3D unit;
+	cf->cf_fstate =3D FSTATE_STAR;
+
+	return (struct tap_softc *)config_attach_pseudo(cf);
+}
+
+/*
+ * The clean design of if_clone and autoconf(9) makes that part
+ * really straightforward.  The second argument of config_detach
+ * means neither QUIET nor FORCED.
+ */
+static int
+tap_clone_destroy(struct ifnet *ifp)
+{
+	return tap_clone_destroyer((struct device *)ifp->if_softc);
+}
+
+static int
+tap_clone_destroyer(struct device *dev)
+{
+	struct cfdata *cf =3D dev->dv_cfdata;
+	int error;
+
+	if ((error =3D config_detach(dev, 0)) !=3D 0)
+		aprint_error("%s: unable to detach instance\n",
+		    dev->dv_xname);
+	free(cf, M_DEVBUF);
+
+	return (error);
+}
+
+/*
+ * tap(4) is a bit of an hybrid device.  It can be used in two different
+ * ways:
+ *  1. ifconfig tapN create, then use /dev/tapN to read/write off it.
+ *  2. open /dev/tap, get a new interface created and read/write off it.
+ *     That interface is destroyed when the process that had it created ex=
its.
+ *
+ * The first way is managed by the cdevsw structure, and you access interf=
aces
+ * through a (major, minor) mapping:  tap4 is obtained by the minor number
+ * 4.  The entry points for the cdevsw interface are prefixed by tap_cdev_.
+ *
+ * The second way is the so-called "cloning" device.  It's a special minor
+ * number (chosen as the maximal number, to allow as much tap devices as
+ * possible).  The user first opens the cloner (e.g., /dev/tap), and that
+ * call ends in tap_cdev_open.  The actual place where it is handled is
+ * tap_dev_cloner.
+ *
+ * An tap device cannot be opened more than once at a time, so the cdevsw
+ * part of open() does nothing but noting that the interface is being used=
 and
+ * hence ready to actually handle packets.
+ */
+
+static int
+tap_cdev_open(dev_t dev, int flags, int fmt, struct proc *p)
+{
+	struct tap_softc *sc;
+
+	if (minor(dev) =3D=3D TAP_CLONER)
+		return tap_dev_cloner(p);
+
+	sc =3D (struct tap_softc *)device_lookup(&tap_cd, minor(dev));
+	if (sc =3D=3D NULL)
+		return (ENXIO);
+
+	/* The device can only be opened once */
+	if (sc->sc_flags & TAP_INUSE)
+		return (EBUSY);
+	sc->sc_flags |=3D TAP_INUSE;
+	return (0);
+}
+
+/*
+ * There are several kinds of cloning devices, and the most simple is the =
one
+ * tap(4) uses.  What it does is change the file descriptor with a new one,
+ * with its own fileops structure (which maps to the various read, write,
+ * ioctl functions).  It starts allocating a new file descriptor with fall=
oc,
+ * then actually creates the new tap devices.
+ *
+ * Once those two steps are successful, we can re-wire the existing file
+ * descriptor to its new self.  This is done with fdclone():  it fills the=
 fp
+ * structure as needed (notably f_data gets filled with the fifth parameter
+ * passed, the unit of the tap device which will allows us identifying the
+ * device later), and returns EMOVEFD.
+ *
+ * That magic value is interpreted by sys_open() which then replaces the
+ * current file descriptor by the new one (through a magic member of struct
+ * proc, p_dupfd).
+ *
+ * The tap device is flagged as being busy since it otherwise could be
+ * externally accessed through the corresponding device node with the cdev=
sw
+ * interface.
+ */
+
+static int
+tap_dev_cloner(struct proc *p)
+{
+	struct tap_softc *sc;
+	struct file *fp;
+	int error, fd;
+
+	if ((error =3D falloc(p, &fp, &fd)) !=3D 0)
+		return (error);
+
+	if ((sc =3D tap_clone_creator(DVUNIT_ANY)) =3D=3D NULL) {
+		FILE_UNUSE(fp, p);
+		ffree(fp);
+		return (ENXIO);
+	}
+
+	sc->sc_flags |=3D TAP_INUSE;
+
+	return fdclone(p, fp, fd, &tap_fileops, (void *)(intptr_t)sc->sc_dev.dv_u=
nit);
+}
+
+/*
+ * While all other operations (read, write, ioctl, poll and kqfilter) are
+ * really the same whether we are in cdevsw or fileops mode, the close()
+ * function is slightly different in the two cases.
+ *
+ * As for the other, the core of it is shared in tap_dev_close.  What
+ * it does is sufficient for the cdevsw interface, but the cloning interfa=
ce
+ * needs another thing:  the interface is destroyed when the processes that
+ * created it closes it.
+ */
+static int
+tap_cdev_close(dev_t dev, int flags, int fmt, struct proc *p)
+{
+	struct tap_softc *sc =3D
+	    (struct tap_softc *)device_lookup(&tap_cd, minor(dev));
+
+	if (sc =3D=3D NULL)
+		return (ENXIO);
+
+	return tap_dev_close(sc);
+}
+
+/*
+ * It might happen that the administrator used ifconfig to externally dest=
roy
+ * the interface.  In that case, tap_fops_close will be called while
+ * tap_detach is already happening.  If we called it again from here, we
+ * would dead lock.  TAP_GOING ensures that this situation doesn't happen.
+ */
+static int
+tap_fops_close(struct file *fp, struct proc *p)
+{
+	int unit =3D (intptr_t)fp->f_data;
+	struct tap_softc *sc;
+	int error;
+
+	sc =3D (struct tap_softc *)device_lookup(&tap_cd, unit);
+	if (sc =3D=3D NULL)
+		return (ENXIO);
+
+	/* tap_dev_close currently always succeeds, but it might not
+	 * always be the case. */
+	if ((error =3D tap_dev_close(sc)) !=3D 0)
+		return (error);
+
+	/* Destroy the device now that it is no longer useful,
+	 * unless it's already being destroyed. */
+	if ((sc->sc_flags & TAP_GOING) !=3D 0)
+		return (0);
+
+	return tap_clone_destroyer((struct device *)sc);
+}
+
+static int
+tap_dev_close(struct tap_softc *sc)
+{
+	struct ifnet *ifp;
+	int s;
+
+	s =3D splnet();
+	/* Let tap_start handle packets again */
+	ifp =3D &sc->sc_ec.ec_if;
+	ifp->if_flags &=3D ~IFF_OACTIVE;
+
+	/* Purge output queue */
+	if (!(IFQ_IS_EMPTY(&ifp->if_snd))) {
+		struct mbuf *m;
+
+		for (;;) {
+			IFQ_DEQUEUE(&ifp->if_snd, m);
+			if (m =3D=3D NULL)
+				break;
+
+			ifp->if_opackets++;
+#if NBPFILTER > 0
+			if (ifp->if_bpf)
+				bpf_mtap(ifp->if_bpf, m);
+#endif
+		}
+	}
+	splx(s);
+
+	sc->sc_flags &=3D ~(TAP_INUSE | TAP_ASYNCIO);
+
+	return (0);
+}
+
+static int
+tap_cdev_read(dev_t dev, struct uio *uio, int flags)
+{
+	return tap_dev_read(minor(dev), uio, flags);
+}
+
+static int
+tap_fops_read(struct file *fp, off_t *offp, struct uio *uio,
+    struct ucred *cred, int flags)
+{
+	return tap_dev_read((intptr_t)fp->f_data, uio, flags);
+}
+
+static int
+tap_dev_read(int unit, struct uio *uio, int flags)
+{
+	struct tap_softc *sc =3D
+	    (struct tap_softc *)device_lookup(&tap_cd, unit);
+	struct ifnet *ifp;
+	struct mbuf *m, *n;
+	int error =3D 0, s;
+
+	if (sc =3D=3D NULL)
+		return (ENXIO);
+
+	ifp =3D &sc->sc_ec.ec_if;
+	if ((ifp->if_flags & IFF_UP) =3D=3D 0)
+		return (EHOSTDOWN);
+
+	/*
+	 * In the TAP_NBIO case, we have to make sure we won't be sleeping
+	 */
+	if ((sc->sc_flags & TAP_NBIO) &&
+	    lockstatus(&sc->sc_rdlock) =3D=3D LK_EXCLUSIVE)
+		return (EWOULDBLOCK);
+	error =3D lockmgr(&sc->sc_rdlock, LK_EXCLUSIVE, NULL);
+	if (error !=3D 0)
+		return (error);
+
+	s =3D splnet();
+	if (IFQ_IS_EMPTY(&ifp->if_snd)) {
+		ifp->if_flags &=3D ~IFF_OACTIVE;
+		splx(s);
+		/*
+		 * We must release the lock before sleeping, and re-acquire it
+		 * after.
+		 */
+		(void)lockmgr(&sc->sc_rdlock, LK_RELEASE, NULL);
+		if (sc->sc_flags & TAP_NBIO)
+			error =3D EWOULDBLOCK;
+		else
+			error =3D tsleep(sc, PSOCK|PCATCH, "tap", 0);
+
+		if (error !=3D 0)
+			return (error);
+		/* The device might have been downed */
+		if ((ifp->if_flags & IFF_UP) =3D=3D 0)
+			return (EHOSTDOWN);
+		if ((sc->sc_flags & TAP_NBIO) &&
+		    lockstatus(&sc->sc_rdlock) =3D=3D LK_EXCLUSIVE)
+			return (EWOULDBLOCK);
+		error =3D lockmgr(&sc->sc_rdlock, LK_EXCLUSIVE, NULL);
+		if (error !=3D 0)
+			return (error);
+		s =3D splnet();
+	}
+
+	IFQ_DEQUEUE(&ifp->if_snd, m);
+	ifp->if_flags &=3D ~IFF_OACTIVE;
+	splx(s);
+	if (m =3D=3D NULL) {
+		error =3D 0;
+		goto out;
+	}
+
+	ifp->if_opackets++;
+#if NBPFILTER > 0
+	if (ifp->if_bpf)
+		bpf_mtap(ifp->if_bpf, m);
+#endif
+
+	/*
+	 * One read is one packet.
+	 */
+	do {
+		error =3D uiomove(mtod(m, caddr_t),
+		    min(m->m_len, uio->uio_resid), uio);
+		MFREE(m, n);
+		m =3D n;
+	} while (m !=3D NULL && uio->uio_resid > 0 && error =3D=3D 0);
+
+	if (m !=3D NULL)
+		m_freem(m);
+
+out:
+	(void)lockmgr(&sc->sc_rdlock, LK_RELEASE, NULL);
+	return (error);
+}
+
+static int
+tap_cdev_write(dev_t dev, struct uio *uio, int flags)
+{
+	return tap_dev_write(minor(dev), uio, flags);
+}
+
+static int
+tap_fops_write(struct file *fp, off_t *offp, struct uio *uio,
+    struct ucred *cred, int flags)
+{
+	return tap_dev_write((intptr_t)fp->f_data, uio, flags);
+}
+
+static int
+tap_dev_write(int unit, struct uio *uio, int flags)
+{
+	struct tap_softc *sc =3D
+	    (struct tap_softc *)device_lookup(&tap_cd, unit);
+	struct ifnet *ifp;
+	struct mbuf *m, **mp;
+	int error =3D 0;
+
+	if (sc =3D=3D NULL)
+		return (ENXIO);
+
+	ifp =3D &sc->sc_ec.ec_if;
+
+	/* One write, one packet, that's the rule */
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m =3D=3D NULL) {
+		ifp->if_ierrors++;
+		return (ENOBUFS);
+	}
+	m->m_pkthdr.len =3D uio->uio_resid;
+
+	mp =3D &m;
+	while (error =3D=3D 0 && uio->uio_resid > 0) {
+		if (*mp !=3D m) {
+			MGET(*mp, M_DONTWAIT, MT_DATA);
+			if (*mp =3D=3D NULL) {
+				error =3D ENOBUFS;
+				break;
+			}
+		}
+		(*mp)->m_len =3D min(MHLEN, uio->uio_resid);
+		error =3D uiomove(mtod(*mp, caddr_t), (*mp)->m_len, uio);
+		mp =3D &(*mp)->m_next;
+	}
+	if (error) {
+		ifp->if_ierrors++;
+		m_freem(m);
+		return (error);
+	}
+
+	ifp->if_ipackets++;
+	m->m_pkthdr.rcvif =3D ifp;
+
+#if NBPFILTER > 0
+	if (ifp->if_bpf)
+		bpf_mtap(ifp->if_bpf, m);
+#endif
+	(*ifp->if_input)(ifp, m);
+
+	return (0);
+}
+
+static int
+tap_cdev_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags,
+    struct proc *p)
+{
+	return tap_dev_ioctl(minor(dev), cmd, data, p);
+}
+
+static int
+tap_fops_ioctl(struct file *fp, u_long cmd, void *data, struct proc *p)
+{
+	return tap_dev_ioctl((intptr_t)fp->f_data, cmd, (caddr_t)data, p);
+}
+
+static int
+tap_dev_ioctl(int unit, u_long cmd, caddr_t data, struct proc *p)
+{
+	struct tap_softc *sc =3D
+	    (struct tap_softc *)device_lookup(&tap_cd, unit);
+	int error =3D 0;
+
+	if (sc =3D=3D NULL)
+		return (ENXIO);
+
+	switch (cmd) {
+	case FIONREAD:
+		{
+			struct ifnet *ifp =3D &sc->sc_ec.ec_if;
+			struct mbuf *m;
+			int s;
+
+			s =3D splnet();
+			IFQ_POLL(&ifp->if_snd, m);
+
+			if (m =3D=3D NULL)
+				*(int *)data =3D 0;
+			else
+				*(int *)data =3D m->m_pkthdr.len;
+			splx(s);
+		} break;
+	case TIOCSPGRP:
+	case FIOSETOWN:
+		error =3D fsetown(p, &sc->sc_pgid, cmd, data);
+		break;
+	case TIOCGPGRP:
+	case FIOGETOWN:
+		error =3D fgetown(p, sc->sc_pgid, cmd, data);
+		break;
+	case FIOASYNC:
+		if (*(int *)data)
+			sc->sc_flags |=3D TAP_ASYNCIO;
+		else
+			sc->sc_flags &=3D ~TAP_ASYNCIO;
+		break;
+	case FIONBIO:
+		if (*(int *)data)
+			sc->sc_flags |=3D TAP_NBIO;
+		else
+			sc->sc_flags &=3D ~TAP_NBIO;
+		break;
+	case TAPGIFNAME:
+		{
+			struct ifreq *ifr =3D (struct ifreq *)data;
+			struct ifnet *ifp =3D &sc->sc_ec.ec_if;
+
+			strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
+		} break;
+	default:
+		error =3D ENOTTY;
+		break;
+	}
+
+	return (0);
+}
+
+static int
+tap_cdev_poll(dev_t dev, int events, struct proc *p)
+{
+	return tap_dev_poll(minor(dev), events, p);
+}
+
+static int
+tap_fops_poll(struct file *fp, int events, struct proc *p)
+{
+	return tap_dev_poll((intptr_t)fp->f_data, events, p);
+}
+
+static int
+tap_dev_poll(int unit, int events, struct proc *p)
+{
+	struct tap_softc *sc =3D
+	    (struct tap_softc *)device_lookup(&tap_cd, unit);
+	int revents =3D 0;
+
+	if (sc =3D=3D NULL)
+		return (ENXIO);
+
+	if (events & (POLLIN|POLLRDNORM)) {
+		struct ifnet *ifp =3D &sc->sc_ec.ec_if;
+		struct mbuf *m;
+		int s;
+
+		s =3D splnet();
+		IFQ_POLL(&ifp->if_snd, m);
+		splx(s);
+
+		if (m !=3D NULL)
+			revents |=3D events & (POLLIN|POLLRDNORM);
+		else {
+			(void)simple_lock(&sc->sc_kqlock);
+			selrecord(p, &sc->sc_rsel);
+			simple_unlock(&sc->sc_kqlock);
+		}
+	}
+	revents |=3D events & (POLLOUT|POLLWRNORM);
+
+	return (revents);
+}
+
+static struct filterops tap_read_filterops =3D { 1, NULL, tap_kqdetach,
+	tap_kqread };
+static struct filterops tap_seltrue_filterops =3D { 1, NULL, tap_kqdetach,
+	filt_seltrue };
+
+static int
+tap_cdev_kqfilter(dev_t dev, struct knote *kn)
+{
+	return tap_dev_kqfilter(minor(dev), kn);
+}
+
+static int
+tap_fops_kqfilter(struct file *fp, struct knote *kn)
+{
+	return tap_dev_kqfilter((intptr_t)fp->f_data, kn);
+}
+
+static int
+tap_dev_kqfilter(int unit, struct knote *kn)
+{
+	struct tap_softc *sc =3D
+	    (struct tap_softc *)device_lookup(&tap_cd, unit);
+
+	if (sc =3D=3D NULL)
+		return (ENXIO);
+
+	switch(kn->kn_filter) {
+	case EVFILT_READ:
+		kn->kn_fop =3D &tap_read_filterops;
+		break;
+	case EVFILT_WRITE:
+		kn->kn_fop =3D &tap_seltrue_filterops;
+		break;
+	default:
+		return (1);
+	}
+
+	kn->kn_hook =3D sc;
+	(void)simple_lock(&sc->sc_kqlock);
+	SLIST_INSERT_HEAD(&sc->sc_rsel.sel_klist, kn, kn_selnext);
+	simple_unlock(&sc->sc_kqlock);
+	return (0);
+}
+
+static void
+tap_kqdetach(struct knote *kn)
+{
+	struct tap_softc *sc =3D (struct tap_softc *)kn->kn_hook;
+
+	(void)simple_lock(&sc->sc_kqlock);
+	SLIST_REMOVE(&sc->sc_rsel.sel_klist, kn, knote, kn_selnext);
+	simple_unlock(&sc->sc_kqlock);
+}
+
+static int
+tap_kqread(struct knote *kn, long hint)
+{
+	struct tap_softc *sc =3D (struct tap_softc *)kn->kn_hook;
+	struct ifnet *ifp =3D &sc->sc_ec.ec_if;
+	struct mbuf *m;
+	int s;
+
+	s =3D splnet();
+	IFQ_POLL(&ifp->if_snd, m);
+
+	if (m =3D=3D NULL)
+		kn->kn_data =3D 0;
+	else
+		kn->kn_data =3D m->m_pkthdr.len;
+	splx(s);
+	return (kn->kn_data !=3D 0 ? 1 : 0);
+}
+
+/*
+ * sysctl management routines
+ * You can set the address of an interface through:
+ * net.link.tap.tap<number>
+ *
+ * Note the consistent use of tap_log in order to use
+ * sysctl_teardown at unload time.
+ *
+ * In the kernel you will find a lot of SYSCTL_SETUP blocks.  Those
+ * blocks register a function in a special section of the kernel
+ * (called a link set) which is used at init_sysctl() time to cycle
+ * through all those functions to create the kernel's sysctl tree.
+ *
+ * It is not (currently) possible to use link sets in a LKM, so the
+ * easiest is to simply call our own setup routine at load time.
+ *
+ * In the SYSCTL_SETUP blocks you find in the kernel, nodes have the
+ * CTLFLAG_PERMANENT flag, meaning they cannot be removed.  Once the
+ * whole kernel sysctl tree is built, it is not possible to add any
+ * permanent node.
+ *
+ * It should be noted that we're not saving the sysctlnode pointer
+ * we are returned when creating the "tap" node.  That structure
+ * cannot be trusted once out of the calling function, as it might
+ * get reused.  So we just save the MIB number, and always give the
+ * full path starting from the root for later calls to sysctl_createv
+ * and sysctl_destroyv.
+ */
+SYSCTL_SETUP(sysctl_tap_setup, "sysctl net.link.tap subtree setup")
+{
+	struct sysctlnode *node;
+	int error =3D 0;
+
+	if ((error =3D sysctl_createv(clog, 0, NULL, NULL,
+	    CTLFLAG_PERMANENT,
+	    CTLTYPE_NODE, "net", NULL,
+	    NULL, 0, NULL, 0,
+	    CTL_NET, CTL_EOL)) !=3D 0)
+		return;
+
+	if ((error =3D sysctl_createv(clog, 0, NULL, NULL,
+	    CTLFLAG_PERMANENT,
+	    CTLTYPE_NODE, "link", NULL,
+	    NULL, 0, NULL, 0,
+	    CTL_NET, PF_LINK, CTL_EOL)) !=3D 0)
+		return;
+
+	/*
+	 * The first four parameters of sysctl_createv are for management.
+	 *
+	 * The four that follows, here starting with a '0' for the flags,
+	 * describe the node.
+	 *
+	 * The next series of four set its value, through various possible
+	 * means.
+	 *
+	 * Last but not least, the path to the node is described.  That path
+	 * is relative to the given root (third argument).  Here we're
+	 * starting from the root.
+	 */
+	if ((error =3D sysctl_createv(clog, 0, NULL, &node,
+	    CTLFLAG_PERMANENT,
+	    CTLTYPE_NODE, "tap", NULL,
+	    NULL, 0, NULL, 0,
+	    CTL_NET, PF_LINK, CTL_CREATE, CTL_EOL)) !=3D 0)
+		return;
+	tap_node =3D node->sysctl_num;
+}
+
+/*
+ * The helper functions make Andrew Brown's interface really
+ * shine.  It makes possible to create value on the fly whether
+ * the sysctl value is read or written.
+ *
+ * As shown as an example in the man page, the first step is to
+ * create a copy of the node to have sysctl_lookup work on it.
+ *
+ * Here, we have more work to do than just a copy, since we have
+ * to create the string.  The first step is to collect the actual
+ * value of the node, which is a convenient pointer to the softc
+ * of the interface.  From there we create the string and use it
+ * as the value, but only for the *copy* of the node.
+ *
+ * Then we let sysctl_lookup do the magic, which consists in
+ * setting oldp and newp as required by the operation.  When the
+ * value is read, that means that the string will be copied to
+ * the user, and when it is written, the new value will be copied
+ * over in the addr array.
+ *
+ * If newp is NULL, the user was reading the value, so we don't
+ * have anything else to do.  If a new value was written, we
+ * have to check it.
+ *
+ * If it is incorrect, we can return an error and leave 'node' as
+ * it is:  since it is a copy of the actual node, the change will
+ * be forgotten.
+ *
+ * Upon a correct input, we commit the change to the ifnet
+ * structure of our interface.
+ */
+static int
+tap_sysctl_handler(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node;
+	struct tap_softc *sc;
+	struct ifnet *ifp;
+	int error;
+	size_t len;
+	char addr[18];
+
+	node =3D *rnode;
+	sc =3D node.sysctl_data;
+	ifp =3D &sc->sc_ec.ec_if;
+	(void)tap_ether_sprintf(addr, LLADDR(ifp->if_sadl));
+	node.sysctl_data =3D addr;
+	error =3D sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp =3D=3D NULL)
+		return (error);
+
+	len =3D strlen(addr);
+	if (len < 11 || len > 17)
+		return (EINVAL);
+
+	/* Commit change */
+	if (tap_ether_aton(LLADDR(ifp->if_sadl), addr) !=3D 0)
+		return (EINVAL);
+	return (error);
+}
+
+/*
+ * ether_aton implementation, not using a static buffer.
+ */
+static int
+tap_ether_aton(u_char *dest, char *str)
+{
+	int i;
+	char *cp =3D str;
+	u_char val[6];
+
+#define	set_value			\
+	if (*cp > '9' && *cp < 'a')	\
+		*cp -=3D 'A' - 10;	\
+	else if (*cp > '9')		\
+		*cp -=3D 'a' - 10;	\
+	else				\
+		*cp -=3D '0'
+
+	for (i =3D 0; i < 6; i++, cp++) {
+		if (!isxdigit(*cp))
+			return (1);
+		set_value;
+		val[i] =3D *cp++;
+		if (isxdigit(*cp)) {
+			set_value;
+			val[i] *=3D 16;
+			val[i] +=3D *cp++;
+		}
+		if (*cp =3D=3D ':' || i =3D=3D 5)
+			continue;
+		else
+			return (1);
+	}
+	memcpy(dest, val, 6);
+	return (0);
+}
+
+/*
+ * ether_sprintf made thread-safer.
+ *
+ * Copied over from sys/net/if_ethersubr.c, with a change to avoid the use
+ * of a static buffer.
+ */
+
+/*
+ * Copyright (c) 1982, 1989, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURP=
OSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENT=
IAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STR=
ICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY W=
AY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)if_ethersubr.c      8.2 (Berkeley) 4/4/96
+ */
+
+static char digits[] =3D "0123456789abcdef";
+static char *
+tap_ether_sprintf(char *dest, const u_char *ap)
+{
+	char *cp =3D dest;
+	int i;
+
+	for (i =3D 0; i < 6; i++) {
+		*cp++ =3D digits[*ap >> 4];
+		*cp++ =3D digits[*ap++ & 0xf];
+		*cp++ =3D ':';
+	}
+	*--cp =3D 0;
+	return (dest);
+}
--- /dev/null	Sat Jan  1 04:38:12 2005
+++ sys/net/if_tap.h	Mon Dec 20 18:37:55 2004
@@ -0,0 +1,40 @@
+/*	$NetBSD$	*/
+
+/*
+ *  Copyright (c) 2004 The NetBSD Foundation.
+ *  All rights reserved.
+ *
+ *  This code is derived from software contributed to the NetBSD Foundation
+ *   by Quentin Garnier.
+ *=20
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. All advertising materials mentioning features or use of this softwa=
re
+ *     must display the following acknowledgement:
+ *         This product includes software developed by the NetBSD
+ *         Foundation, Inc. and its contributors.
+ *  4. Neither the name of The NetBSD Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *=20
+ *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUT=
ORS
+ *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LI=
MITED
+ *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTIC=
ULAR
+ *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUT=
ORS
+ *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINE=
SS
+ *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF=
 THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* 'e' comes from former name 'ethfoo' */
+#define TAPGIFNAME	_IOR('e', 0, struct ifreq)

--NAuYj0K7Umiq33rm--

--Z8pjODCYYz+3zRe3
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (NetBSD)

iQEVAwUBQdjUvdgoQloHrPnoAQJC/Af/VNOUXwscjzAxheEPZ3B7qADnIP5cNmcp
bYWvUleR5XI19zfC+ZxWwoXHdqPwFbewHL5vg/zmntA4mD1My9yTlpuW46E3BzoN
lo2HjdWWOQBPiyCQOy4JyIoYHttf/dHZjXGZ/zsyR+2uKm96UHgbDWJG0QhKxa1r
AIS7Pqx5WWCndO6p5/YhSdNnMCR58IEObai6ABwDN2Ky6rIFEnzaa6ttE3/1BTuI
/eC5XK1MYum4zZF2bnOtM/U9o7FKYwZLIhIo4KL5Yqr1znyJW8MJK9t8TailjbIL
52wuFV6A4J6t6/L9+95CeLOl7+Hg/HZmoE6UHR8nPnYWA3+uK7thqw==
=9QkH
-----END PGP SIGNATURE-----

--Z8pjODCYYz+3zRe3--