Subject: Re: /dev/tap and tcpdump don't go together very well? [conclusion & diff]
To: None <current-users@NetBSD.org>
From: Rhialto <rhialto@falu.nl>
List: current-users
Date: 04/08/2007 22:08:47
Possible conclusions from this thead:

- It may make sense to allow opening /dev/tap write-only. All packets
  written from the kernel side should be dropped (and sent to bpf).
  See attached diff. That plus opening /dev/tap O_WRONLY keeps the
  OACTIVE off and tcpdump sees some icmp6 packets that apparently get
  sent.

- Maybe tap should silently never set the IFF_PROMISC flag since it
  already is, and setting it has unexpected side effects.
  See diff, but untested.

- Other (hardware) interfaces should be able to change their MAC address
  too.

Index: if_tap.c
===================================================================
RCS file: /cvsroot/src/sys/net/if_tap.c,v
retrieving revision 1.24
diff -u -u -r1.24 if_tap.c
--- if_tap.c	24 Nov 2006 01:04:30 -0000	1.24
+++ if_tap.c	8 Apr 2007 19:42:42 -0000
@@ -108,6 +108,7 @@
 #define TAP_ASYNCIO	0x00000002	/* user is using async I/O (SIGIO) on the device */
 #define TAP_NBIO	0x00000004	/* user wants calls to avoid blocking */
 #define TAP_GOING	0x00000008	/* interface is being destroyed */
+#define TAP_WRITEONLY	0x00000010	/* no packets from kernel side */
 	struct selinfo	sc_rsel;
 	pid_t		sc_pgid; /* For async. IO */
 	struct lock	sc_rdlock;
@@ -157,7 +158,7 @@
 };
 
 /* Helper for cloning open() */
-static int	tap_dev_cloner(struct lwp *);
+static int	tap_dev_cloner(struct lwp *, int);
 
 /* Character device routines */
 static int	tap_cdev_open(dev_t, int, int, struct lwp *);
@@ -451,7 +452,8 @@
 	struct tap_softc *sc = (struct tap_softc *)ifp->if_softc;
 	struct mbuf *m0;
 
-	if ((sc->sc_flags & TAP_INUSE) == 0) {
+	if ((sc->sc_flags & TAP_INUSE) == 0 ||
+	    (sc->sc_flags & TAP_WRITEONLY) != 0) {
 		/* Simply drop packets */
 		for(;;) {
 			IFQ_DEQUEUE(&ifp->if_snd, m0);
@@ -502,6 +504,10 @@
 	case SIOCSIFPHYADDR:
 		error = tap_lifaddr(ifp, cmd, (struct ifaliasreq *)data);
 		break;
+	case SIOCSIFFLAGS:
+		ifp->if_flags &= ~IFF_PROMISC;
+		error = 0;
+		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		if (error == ENETRESET)
@@ -541,7 +547,7 @@
 {
 	ifp->if_flags |= IFF_RUNNING;
 
-	tap_start(ifp);
+	/*tap_start(ifp);*/
 
 	return (0);
 }
@@ -664,7 +670,7 @@
 	struct tap_softc *sc;
 
 	if (minor(dev) == TAP_CLONER)
-		return tap_dev_cloner(l);
+		return tap_dev_cloner(l, flags);
 
 	sc = (struct tap_softc *)device_lookup(&tap_cd, minor(dev));
 	if (sc == NULL)
@@ -674,6 +680,8 @@
 	if (sc->sc_flags & TAP_INUSE)
 		return (EBUSY);
 	sc->sc_flags |= TAP_INUSE;
+	if ((flags & FREAD) == 0)
+	    sc->sc_flags |= TAP_WRITEONLY;
 	return (0);
 }
 
@@ -700,7 +708,7 @@
  */
 
 static int
-tap_dev_cloner(struct lwp *l)
+tap_dev_cloner(struct lwp *l, int flags)
 {
 	struct tap_softc *sc;
 	struct file *fp;
@@ -716,8 +724,10 @@
 	}
 
 	sc->sc_flags |= TAP_INUSE;
+	if ((flags & FREAD) == 0)
+	    sc->sc_flags |= TAP_WRITEONLY;
 
-	return fdclone(l, fp, fd, FREAD|FWRITE, &tap_fileops,
+	return fdclone(l, fp, fd, flags & (FREAD|FWRITE), &tap_fileops,
 	    (void *)(intptr_t)device_unit(&sc->sc_dev));
 }


Index: tap.4
===================================================================
RCS file: /cvsroot/src/share/man/man4/tap.4,v
retrieving revision 1.6
diff -u -u -r1.6 tap.4
--- tap.4	18 Dec 2006 00:16:10 -0000	1.6
+++ tap.4	8 Apr 2007 19:59:16 -0000
@@ -125,6 +125,21 @@
 buffer.
 If the buffer is not large enough, the frame will be truncated.
 .Pp
+If you don't intend to read frames from the
+.Nm
+interface, open the special file with access mode O_WRONLY.
+In that case, frames sent by the kernel are immediately discarded
+rather than queued indefinitely.
+.Pp
+If you use
+.Xr bpf 4 ,
+for instance through
+.Xr tcpdump 8 ,
+frames from the kernel side to the userland side are produced when
+they are received, not when they are sent.
+They are also produced when discarded from the interface queue,
+either because the device is destroyed or because it was opened O_WRONLY.
+.Pp
 .Nm
 character devices support the
 .Dv FIONREAD
 
-Olaf.
-- 
___ Olaf 'Rhialto' Seibert      -- You author it, and I'll reader it.
\X/ rhialto/at/xs4all.nl        -- Cetero censeo "authored" delendum esse.