Subject: Re: /dev/tap and tcpdump don't go together very well? [conclusion & diff]
To: None <current-users@NetBSD.org>
From: Rhialto <rhialto@falu.nl>
List: current-users
Date: 04/08/2007 22:08:47
Possible conclusions from this thead:
- It may make sense to allow opening /dev/tap write-only. All packets
written from the kernel side should be dropped (and sent to bpf).
See attached diff. That plus opening /dev/tap O_WRONLY keeps the
OACTIVE off and tcpdump sees some icmp6 packets that apparently get
sent.
- Maybe tap should silently never set the IFF_PROMISC flag since it
already is, and setting it has unexpected side effects.
See diff, but untested.
- Other (hardware) interfaces should be able to change their MAC address
too.
Index: if_tap.c
===================================================================
RCS file: /cvsroot/src/sys/net/if_tap.c,v
retrieving revision 1.24
diff -u -u -r1.24 if_tap.c
--- if_tap.c 24 Nov 2006 01:04:30 -0000 1.24
+++ if_tap.c 8 Apr 2007 19:42:42 -0000
@@ -108,6 +108,7 @@
#define TAP_ASYNCIO 0x00000002 /* user is using async I/O (SIGIO) on the device */
#define TAP_NBIO 0x00000004 /* user wants calls to avoid blocking */
#define TAP_GOING 0x00000008 /* interface is being destroyed */
+#define TAP_WRITEONLY 0x00000010 /* no packets from kernel side */
struct selinfo sc_rsel;
pid_t sc_pgid; /* For async. IO */
struct lock sc_rdlock;
@@ -157,7 +158,7 @@
};
/* Helper for cloning open() */
-static int tap_dev_cloner(struct lwp *);
+static int tap_dev_cloner(struct lwp *, int);
/* Character device routines */
static int tap_cdev_open(dev_t, int, int, struct lwp *);
@@ -451,7 +452,8 @@
struct tap_softc *sc = (struct tap_softc *)ifp->if_softc;
struct mbuf *m0;
- if ((sc->sc_flags & TAP_INUSE) == 0) {
+ if ((sc->sc_flags & TAP_INUSE) == 0 ||
+ (sc->sc_flags & TAP_WRITEONLY) != 0) {
/* Simply drop packets */
for(;;) {
IFQ_DEQUEUE(&ifp->if_snd, m0);
@@ -502,6 +504,10 @@
case SIOCSIFPHYADDR:
error = tap_lifaddr(ifp, cmd, (struct ifaliasreq *)data);
break;
+ case SIOCSIFFLAGS:
+ ifp->if_flags &= ~IFF_PROMISC;
+ error = 0;
+ break;
default:
error = ether_ioctl(ifp, cmd, data);
if (error == ENETRESET)
@@ -541,7 +547,7 @@
{
ifp->if_flags |= IFF_RUNNING;
- tap_start(ifp);
+ /*tap_start(ifp);*/
return (0);
}
@@ -664,7 +670,7 @@
struct tap_softc *sc;
if (minor(dev) == TAP_CLONER)
- return tap_dev_cloner(l);
+ return tap_dev_cloner(l, flags);
sc = (struct tap_softc *)device_lookup(&tap_cd, minor(dev));
if (sc == NULL)
@@ -674,6 +680,8 @@
if (sc->sc_flags & TAP_INUSE)
return (EBUSY);
sc->sc_flags |= TAP_INUSE;
+ if ((flags & FREAD) == 0)
+ sc->sc_flags |= TAP_WRITEONLY;
return (0);
}
@@ -700,7 +708,7 @@
*/
static int
-tap_dev_cloner(struct lwp *l)
+tap_dev_cloner(struct lwp *l, int flags)
{
struct tap_softc *sc;
struct file *fp;
@@ -716,8 +724,10 @@
}
sc->sc_flags |= TAP_INUSE;
+ if ((flags & FREAD) == 0)
+ sc->sc_flags |= TAP_WRITEONLY;
- return fdclone(l, fp, fd, FREAD|FWRITE, &tap_fileops,
+ return fdclone(l, fp, fd, flags & (FREAD|FWRITE), &tap_fileops,
(void *)(intptr_t)device_unit(&sc->sc_dev));
}
Index: tap.4
===================================================================
RCS file: /cvsroot/src/share/man/man4/tap.4,v
retrieving revision 1.6
diff -u -u -r1.6 tap.4
--- tap.4 18 Dec 2006 00:16:10 -0000 1.6
+++ tap.4 8 Apr 2007 19:59:16 -0000
@@ -125,6 +125,21 @@
buffer.
If the buffer is not large enough, the frame will be truncated.
.Pp
+If you don't intend to read frames from the
+.Nm
+interface, open the special file with access mode O_WRONLY.
+In that case, frames sent by the kernel are immediately discarded
+rather than queued indefinitely.
+.Pp
+If you use
+.Xr bpf 4 ,
+for instance through
+.Xr tcpdump 8 ,
+frames from the kernel side to the userland side are produced when
+they are received, not when they are sent.
+They are also produced when discarded from the interface queue,
+either because the device is destroyed or because it was opened O_WRONLY.
+.Pp
.Nm
character devices support the
.Dv FIONREAD
-Olaf.
--
___ Olaf 'Rhialto' Seibert -- You author it, and I'll reader it.
\X/ rhialto/at/xs4all.nl -- Cetero censeo "authored" delendum esse.