Subject: proposed fix for stray ifnet pointers from interface deletion
To: None <tech-net@netbsd.org>
From: Greg Troxel <gdt@ir.bbn.com>
List: tech-net
Date: 07/12/2005 14:28:05
I wrote earlier about crashes due to stray ifnet pointers and
interface deletion (pppd), and filed a PR:

  http://www.netbsd.org/cgi-bin/query-pr-single.pl?number=29580

I've cleaned up the patch a bit and would like to commit it and am
soliciting comments first.  I compiled this Monday's current with the
patch, and have run it on a normal i386 box with no coming/going
interfaces, with ifconfig gif0 destroy (with quagga/ospfd running on
gif0), and on a laptop with a wi(4) that I ejected while quagga/ospfd
was running on it.  I've been running with essentially this patch
against 2.99.15 on two machines with a ppp link and they no longer
crash when the line goes down.

Further cleanups are probably in order: adding PRU_PURGEIF on other
protocols that can store ifnet pointers, and perhaps not running the
first purge on interfaces with addresses - since it has to be done
anyway.  But, I'm trying  to make a conservative change, looking
forward to requesting a pullup for netbsd-3 and probably netbsd-2.

Index: sys/sys/protosw.h
===================================================================
RCS file: /cvsroot/src/sys/sys/protosw.h,v
retrieving revision 1.35
diff -u -r1.35 protosw.h
--- sys/sys/protosw.h	22 Apr 2004 01:34:17 -0000	1.35
+++ sys/sys/protosw.h	12 Jul 2005 17:23:03 -0000
@@ -114,6 +114,9 @@
 #define	PR_LASTHDR	0x40		/* enforce ipsec policy; last header */
 #define	PR_ABRTACPTDIS	0x80		/* abort on accept(2) to disconnected
 					   socket */
+#define PR_PURGEIF	0x100		/* might store struct ifnet pointer;
+					   PRU_PURGEIF must be called on ifnet
+					   deletion */
 
 /*
  * The arguments to usrreq are:
Index: sys/net/if.c
===================================================================
RCS file: /cvsroot/src/sys/net/if.c,v
retrieving revision 1.159
diff -u -r1.159 if.c
--- sys/net/if.c	22 Jun 2005 06:16:02 -0000	1.159
+++ sys/net/if.c	12 Jul 2005 17:23:05 -0000
@@ -618,6 +618,13 @@
 			panic("if_detach: no domain for AF %d",
 			    family);
 #endif
+		/*
+		 * XXX These PURGEIF calls are redundant with the
+		 * purge-all-families calls below, but are left in for
+		 * now both to make a smaller change, and to avoid
+		 * unplanned interactions with clearing of
+		 * ifp->if_addrlist.
+		 */
 		purged = 0;
 		for (pr = dp->dom_protosw;
 		     pr < dp->dom_protoswNPROTOSW; pr++) {
@@ -652,6 +659,29 @@
 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
 			(*dp->dom_ifdetach)(ifp,
 			    ifp->if_afdata[dp->dom_family]);
+
+		/*
+		 * One would expect multicast memberships (INET and
+		 * INET6) on UDP sockets to be purged by the PURGEIF
+		 * calls above, but if all addresses were removed from
+		 * the interface prior to destruction, the calls will
+		 * not be made (e.g. ppp, for which pppd(8) generally
+		 * removes addresses before destroying the interface).
+		 * Because there is no invariant that multicast
+		 * memberships only exist for interfaces with IPv4
+		 * addresses, we must call PURGEIF regardless of
+		 * addresses.  (Protocols which might store ifnet
+		 * pointers are marked with PR_PURGEIF.)
+		 */
+		for (pr = dp->dom_protosw;
+		     pr < dp->dom_protoswNPROTOSW; pr++) {
+			so.so_proto = pr;
+			if (pr->pr_usrreq != NULL &&
+			    pr->pr_flags & PR_PURGEIF)
+				(void) (*pr->pr_usrreq)(&so,
+				    PRU_PURGEIF, NULL, NULL,
+				    (struct mbuf *) ifp, curproc);
+		}
 	}
 
 	/* Announce that the interface is gone. */
Index: sys/netinet/in_proto.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_proto.c,v
retrieving revision 1.69
diff -u -r1.69 in_proto.c
--- sys/netinet/in_proto.c	29 Apr 2005 10:39:09 -0000	1.69
+++ sys/netinet/in_proto.c	12 Jul 2005 17:23:06 -0000
@@ -157,17 +157,17 @@
   0,
   ip_init,	0,		ip_slowtimo,	ip_drain,	NULL
 },
-{ SOCK_DGRAM,	&inetdomain,	IPPROTO_UDP,	PR_ATOMIC|PR_ADDR,
+{ SOCK_DGRAM,	&inetdomain,	IPPROTO_UDP,	PR_ATOMIC|PR_ADDR|PR_PURGEIF,
   udp_input,	0,		udp_ctlinput,	udp_ctloutput,
   udp_usrreq,
   udp_init,	0,		0,		0,		NULL
 },
-{ SOCK_STREAM,	&inetdomain,	IPPROTO_TCP,	PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
+{ SOCK_STREAM,	&inetdomain,	IPPROTO_TCP,	PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS|PR_PURGEIF,
   tcp_input,	0,		tcp_ctlinput,	tcp_ctloutput,
   tcp_usrreq,
   tcp_init,	0,		tcp_slowtimo,	tcp_drain,	NULL
 },
-{ SOCK_RAW,	&inetdomain,	IPPROTO_RAW,	PR_ATOMIC|PR_ADDR,
+{ SOCK_RAW,	&inetdomain,	IPPROTO_RAW,	PR_ATOMIC|PR_ADDR|PR_PURGEIF,
   rip_input,	rip_output,	rip_ctlinput,	rip_ctloutput,
   rip_usrreq,
   0,		0,		0,		0,
Index: sys/netinet6/in6_proto.c
===================================================================
RCS file: /cvsroot/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.59
diff -u -r1.59 in6_proto.c
--- sys/netinet6/in6_proto.c	29 May 2005 21:43:51 -0000	1.59
+++ sys/netinet6/in6_proto.c	12 Jul 2005 17:23:06 -0000
@@ -136,13 +136,13 @@
   ip6_init,	0,		frag6_slowtimo,	frag6_drain,
   NULL,
 },
-{ SOCK_DGRAM,	&inet6domain,	IPPROTO_UDP,	PR_ATOMIC|PR_ADDR,
+{ SOCK_DGRAM,	&inet6domain,	IPPROTO_UDP,	PR_ATOMIC|PR_ADDR|PR_PURGEIF,
   udp6_input,	0,		udp6_ctlinput,	ip6_ctloutput,
   udp6_usrreq,	udp6_init,
   0,		0,		0,
   NULL,
 },
-{ SOCK_STREAM,	&inet6domain,	IPPROTO_TCP,	PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
+{ SOCK_STREAM,	&inet6domain,	IPPROTO_TCP,	PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS|PR_PURGEIF,
   tcp6_input,	0,		tcp6_ctlinput,	tcp_ctloutput,
   tcp_usrreq,
 #ifdef INET	/* don't call initialization and timeout routines twice */
@@ -152,7 +152,7 @@
 #endif
   NULL,
 },
-{ SOCK_RAW,	&inet6domain,	IPPROTO_RAW,	PR_ATOMIC|PR_ADDR,
+{ SOCK_RAW,	&inet6domain,	IPPROTO_RAW,	PR_ATOMIC|PR_ADDR|PR_PURGEIF,
   rip6_input,	rip6_output,	rip6_ctlinput,	rip6_ctloutput,
   rip6_usrreq,
   0,		0,		0,		0,

-- 
        Greg Troxel <gdt@ir.bbn.com>