NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kern/41114: soft interrupt queue patch for BRIDGE_IPF to prevent lock issues



>Number:         41114
>Category:       kern
>Synopsis:       soft interrupt queue patch for BRIDGE_IPF to prevent lock 
>issues
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Wed Apr 01 05:00:01 +0000 2009
>Originator:     Peter
>Release:        current 5.99.9
>Organization:
>Environment:
NetBSD 5.99.9 alpha
>Description:
Using BRIDGE_IPF causes nearly immediate lock errors.  I asked on current users 
and apparently others had had this problem and it was resulting from 
pfil_run_hooks on inet_pfil_hooks being unsafe for interrupts.

So I wrote this which fixes my lock issues though it might have its own issues.
Changes:
    bridge_enqueue and bridge_forward are put on a netisr soft interrupt queue 
using IF_QUEUE and stuffing the packet, and bridge information into an mbuf.  I 
didn't like the idea of allocating another mbuf, but didn't see any good 
alternative for using what little I know of the existing kernel infrastructure 
to do this.  If bridge filtering is off, the soft interrupts are not used so 
there's no performance penalty.

   stopping and starting the bridge clears if_bridge pointers in interfaces 
that have been added to the bridge.  Previously the bridge code was called even 
if the bridge was down and then the bridge would either return or route it.  
This seemed strange.  If the bridge is down, the packet should be handled as 
normal by that interface.

   bridge_output previously passed runfilt=0 to bridge_enqueue meaning that 
outgoing local packets were not filtered.  This seemed strange so I put it to 
1.  If bridge filtering is on, then you would want it on for local packets as 
well I would assume.

Caveats:
   As I said, I don't like allocating another mbuf, so it seems like that could 
be an unnecessary performance hit.  But since it didn't work at all before, 
this seems like a step in the right direction.

   I am fairly certain this is unconnected to the bridge code, but it appears 
that, right now in current, if you use "dup-to" and "keep state" on the same 
rule on an interface that has an assigned ip address, it locks the system up.  
I have experienced this with the bridge both down and up and with fair 
consistency but I have yet to compile a kernel without any bridge code to check 
if it is there as well.
>How-To-Repeat:
...
>Fix:
Index: sys/net/if_bridge.c
===================================================================
RCS file: /cvsroot/src/sys/net/if_bridge.c,v
retrieving revision 1.64
diff -u -r1.64 if_bridge.c
--- sys/net/if_bridge.c 18 Jan 2009 10:28:55 -0000      1.64
+++ sys/net/if_bridge.c 1 Apr 2009 04:28:51 -0000
@@ -111,6 +111,7 @@
 
 #if defined(BRIDGE_IPF) && defined(PFIL_HOOKS)
 /* Used for bridge_ip[6]_checkbasic */
+#include <net/netisr.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
@@ -186,6 +187,10 @@
 static void    bridge_start(struct ifnet *);
 
 static void    bridge_forward(struct bridge_softc *, struct mbuf *m);
+void   bridge_forward_soft(struct bridge_softc *, struct ifnet * dst_ifp,
+                       struct mbuf *m);
+void   bridge_enqueue_soft(struct bridge_softc *sc, struct ifnet *dst_ifp, 
+                       struct mbuf *m, int runfilt);
 
 static void    bridge_timer(void *);
 
@@ -242,6 +247,7 @@
 static int     bridge_ioctl_sifprio(struct bridge_softc *, void *);
 static int     bridge_ioctl_sifcost(struct bridge_softc *, void *);
 #if defined(BRIDGE_IPF) && defined(PFIL_HOOKS)
+extern void bridgeipfintr(void);
 static int     bridge_ioctl_gfilt(struct bridge_softc *, void *);
 static int     bridge_ioctl_sfilt(struct bridge_softc *, void *);
 static int     bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
@@ -249,6 +255,24 @@
 # ifdef INET6
 static int     bridge_ip6_checkbasic(struct mbuf **mp);
 # endif /* INET6 */
+#define BRIDGE_FORWARDING              0
+#define BRIDGE_ENQUEUING               1
+#define FILL_BRIDGESC_MBUF(m, sc, dst, chk, f) \
+       m = m_get(M_DONTWAIT, MT_DATA); \
+       if(m) { \
+               bcopy(&sc, mtod(m, char *), sizeof(struct bridge_softc *)); \
+               bcopy(&dst, mtod(m, char *) + sizeof(struct bridge_softc *), 
sizeof(struct ifnet *)); \
+               bcopy(&chk, mtod(m, char *) + sizeof(struct bridge_softc *) + 
sizeof(struct ifnet *), sizeof(struct mbuf *)); \
+               mtod(m, char *)[sizeof(struct bridge_softc *) + sizeof(struct 
ifnet *) + sizeof(struct mbuf *)] = f; } \
+
+
+#define GET_BRIDGEDIR_FROM_MBUF(m) mtod(m, char *)[sizeof(struct bridge_softc 
*) + sizeof(struct ifnet *) + sizeof(struct mbuf *)]
+
+#define GET_AND_FREE_BRIDGESC_MBUF(m, sc, dst, chk) \
+       bcopy(mtod(m, char *), sc, sizeof(struct bridge_softc *)); \
+       bcopy(mtod(m, char *) + sizeof(struct bridge_softc *), dst, 
sizeof(struct ifnet *)); \
+       bcopy(mtod(m, char *) + sizeof(struct bridge_softc *) + sizeof(struct 
ifnet *), chk, sizeof(struct mbuf *)); \
+       m_freem(m);
 #endif /* BRIDGE_IPF && PFIL_HOOKS */
 
 struct bridge_control {
@@ -310,6 +334,10 @@
 static struct if_clone bridge_cloner =
     IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
 
+#if defined(BRIDGE_IPF) && defined(PFIL_HOOKS)
+struct ifqueue bridgeipfintrq;
+#endif /* BRIDGE_IPF && PFIL_HOOKS */
+
 /*
  * bridgeattach:
  *
@@ -637,7 +665,8 @@
        bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
        bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
 
-       ifs->if_bridge = sc;
+       if (sc->sc_if.if_flags & IFF_RUNNING)
+               ifs->if_bridge = sc;
        LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
 
        if (sc->sc_if.if_flags & IFF_RUNNING)
@@ -1011,10 +1040,16 @@
        oflags = sc->sc_filter_flags;
 
        if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
+               bridgeipfintrq.ifq_head = NULL;
+               bridgeipfintrq.ifq_tail = NULL;
+               bridgeipfintrq.ifq_len = 0;
+               bridgeipfintrq.ifq_maxlen = IFQ_MAXLEN;
+               bridgeipfintrq.ifq_drops = 0;
                pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
                        &sc->sc_if.if_pfil);
        }
        if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
+               IF_PURGE(&bridgeipfintrq);
                pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
                        &sc->sc_if.if_pfil);
        }
@@ -1070,15 +1105,22 @@
 bridge_init(struct ifnet *ifp)
 {
        struct bridge_softc *sc = ifp->if_softc;
+       struct bridge_iflist *bif;
+
 
        if (ifp->if_flags & IFF_RUNNING)
                return (0);
 
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               bif->bif_ifp->if_bridge = sc;
+       }
+
        callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
            bridge_timer, sc);
 
        ifp->if_flags |= IFF_RUNNING;
        bstp_initialization(sc);
+
        return (0);
 }
 
@@ -1091,14 +1133,22 @@
 bridge_stop(struct ifnet *ifp, int disable)
 {
        struct bridge_softc *sc = ifp->if_softc;
+       struct bridge_iflist *bif;
 
        if ((ifp->if_flags & IFF_RUNNING) == 0)
                return;
 
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               bif->bif_ifp->if_bridge = NULL;
+       }
+
        callout_stop(&sc->sc_brcallout);
        bstp_stop(sc);
 
        IF_PURGE(&ifp->if_snd);
+#if defined(PFIL_HOOKS) && defined(BRIDGE_IPF)
+       IF_PURGE(&bridgeipfintrq);
+#endif /* PFIL_HOOKS && BRIDGE_IPF */
 
        bridge_rtflush(sc, IFBF_FLUSHDYN);
 
@@ -1116,28 +1166,59 @@
 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
     int runfilt)
 {
-       ALTQ_DECL(struct altq_pktattr pktattr;)
-       int len, error;
-       short mflags;
-
+       struct mbuf * sc_mbuf;
+       int s;
        /*
         * Clear any in-bound checksum flags for this packet.
         */
        m->m_pkthdr.csum_flags = 0;
 
 #ifdef PFIL_HOOKS
-       if (runfilt) {
-               if (pfil_run_hooks(&sc->sc_if.if_pfil, &m,
-                   dst_ifp, PFIL_OUT) != 0) {
-                       if (m != NULL)
-                               m_freem(m);
+       if (runfilt && (sc->sc_filter_flags & IFBF_FILT_USEIPF)) {
+#ifdef BRIDGE_IPF
+               if(IF_QFULL(&bridgeipfintrq)) {
+                       IF_DROP(&bridgeipfintrq);
+                       m_freem(m);
+                       return;
+               }
+               FILL_BRIDGESC_MBUF(sc_mbuf, sc, dst_ifp, m, BRIDGE_ENQUEUING);
+               if(sc_mbuf == NULL) {
+                       IF_DROP(&bridgeipfintrq);
+                       m_freem(m);
                        return;
                }
+               s = splnet();
+               schednetisr(NETISR_BRIDGE_IPF);
+               IF_ENQUEUE(&bridgeipfintrq, sc_mbuf);
+               splx(s);
+               return;
+#endif /* BRIDGE_IPF */
+       }
+#endif /* PFIL_HOOKS */
+       bridge_enqueue_soft(sc, dst_ifp, m, runfilt);
+}
+
+void
+bridge_enqueue_soft(struct bridge_softc *sc, struct ifnet *dst_ifp, 
+   struct mbuf *m, int runfilt)
+{
+       ALTQ_DECL(struct altq_pktattr pktattr;)
+       int len, error;
+       short mflags;
+       int s;
+
+#ifdef PFIL_HOOKS
+       if(runfilt) {
+               if (pfil_run_hooks(&sc->sc_if.if_pfil, &m,
+                   dst_ifp, PFIL_OUT) != 0)
+                       return;
                if (m == NULL)
                        return;
        }
 #endif /* PFIL_HOOKS */
 
+       s = splnet();
+
 #ifdef ALTQ
        /*
         * If ALTQ is enabled on the member interface, do
@@ -1158,6 +1239,7 @@
        if (error) {
                /* mbuf is already freed */
                sc->sc_if.if_oerrors++;
+               splx(s);
                return;
        }
 
@@ -1173,6 +1255,7 @@
 
        if ((dst_ifp->if_flags & IFF_OACTIVE) == 0)
                (*dst_ifp->if_start)(dst_ifp);
+       splx(s);
 }
 
 /*
@@ -1206,16 +1289,6 @@
        s = splnet();
 
        /*
-        * If bridge is down, but the original output interface is up,
-        * go ahead and send out that interface.  Otherwise, the packet
-        * is dropped below.
-        */
-       if ((sc->sc_if.if_flags & IFF_RUNNING) == 0) {
-               dst_if = ifp;
-               goto sendunicast;
-       }
-
-       /*
         * If the packet is a multicast, or we don't know a better way to
         * get there, send to all interfaces.
         */
@@ -1260,7 +1333,7 @@
                                }
                        }
 
-                       bridge_enqueue(sc, dst_if, mc, 0);
+                       bridge_enqueue(sc, dst_if, mc, 1);
                }
                if (used == 0)
                        m_freem(m);
@@ -1268,7 +1341,6 @@
                return (0);
        }
 
- sendunicast:
        /*
         * XXX Spanning tree consideration here?
         */
@@ -1279,7 +1351,7 @@
                return (0);
        }
 
-       bridge_enqueue(sc, dst_if, m, 0);
+       bridge_enqueue(sc, dst_if, m, 1);
 
        splx(s);
        return (0);
@@ -1310,6 +1382,7 @@
        struct bridge_iflist *bif;
        struct ifnet *src_if, *dst_if;
        struct ether_header *eh;
+       struct mbuf * sc_mbuf;
 
        src_if = m->m_pkthdr.rcvif;
 
@@ -1382,18 +1455,49 @@
                dst_if = NULL;
        }
 
-#ifdef PFIL_HOOKS
-       if (pfil_run_hooks(&sc->sc_if.if_pfil, &m,
-           m->m_pkthdr.rcvif, PFIL_IN) != 0) {
-               if (m != NULL)
+#if defined(PFIL_HOOKS) && defined(BRIDGE_IPF)
+       if(sc->sc_filter_flags & IFBF_FILT_USEIPF) {
+               int s;
+               if(IF_QFULL(&bridgeipfintrq)) {
+                       IF_DROP(&bridgeipfintrq);
+                       m_freem(m);
+                       return;
+               }
+               FILL_BRIDGESC_MBUF(sc_mbuf, sc, dst_if, m, BRIDGE_FORWARDING);
+               if(sc_mbuf == NULL) {
+                       IF_DROP(&bridgeipfintrq);
                        m_freem(m);
+                       return;
+               }
+               s = splnet();
+               schednetisr(NETISR_BRIDGE_IPF);
+               IF_ENQUEUE(&bridgeipfintrq, sc_mbuf);
+               splx(s);
                return;
        }
+
+#endif /* PFIL_HOOKS && BRIDGE_IPF */
+       bridge_forward_soft(sc, dst_if, m);
+}
+
+
+void
+bridge_forward_soft(struct bridge_softc *sc, struct ifnet * dst_ifp, struct 
mbuf *m)
+{
+       struct bridge_iflist *bif;
+       struct ifnet *src_if;
+
+       src_if = m->m_pkthdr.rcvif;
+
+#ifdef PFIL_HOOKS
+       if (pfil_run_hooks(&sc->sc_if.if_pfil, &m,
+             src_if, PFIL_IN) != 0)
+               return;
        if (m == NULL)
                return;
 #endif /* PFIL_HOOKS */
 
-       if (dst_if == NULL) {
+       if (dst_ifp == NULL) {
                bridge_broadcast(sc, src_if, m);
                return;
        }
@@ -1402,11 +1506,11 @@
         * At this point, we're dealing with a unicast frame
         * going to a different interface.
         */
-       if ((dst_if->if_flags & IFF_RUNNING) == 0) {
+       if ((dst_ifp->if_flags & IFF_RUNNING) == 0) {
                m_freem(m);
                return;
        }
-       bif = bridge_lookup_member_if(sc, dst_if);
+       bif = bridge_lookup_member_if(sc, dst_ifp);
        if (bif == NULL) {
                /* Not a member of the bridge (anymore?) */
                m_freem(m);
@@ -1422,7 +1526,7 @@
                }
        }
 
-       bridge_enqueue(sc, dst_if, m, 1);
+       bridge_enqueue(sc, dst_ifp, m, 1);
 }
 
 /*
@@ -1439,9 +1543,6 @@
        struct ether_header *eh;
        struct mbuf *mc;
 
-       if ((sc->sc_if.if_flags & IFF_RUNNING) == 0)
-               return (m);
-
        bif = bridge_lookup_member_if(sc, ifp);
        if (bif == NULL)
                return (m);
@@ -1495,6 +1596,19 @@
         * Unicast.  Make sure it's not for us.
         */
        LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+               /* We just received a packet that we sent out. */
+               if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), eh->ether_shost,
+                   ETHER_ADDR_LEN) == 0
+#if NCARP > 0
+                   || (bif->bif_ifp->if_carp && 
carp_ourether(bif->bif_ifp->if_carp,
+                       eh, IFT_ETHER, 1) != NULL)
+#endif /* NCARP > 0 */
+                   ) {
+                       m_freem(m);
+                       return (NULL);
+               }
+       }
+       LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
                /* It is destined for us. */
                if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), eh->ether_dhost,
                    ETHER_ADDR_LEN) == 0
@@ -1510,17 +1624,6 @@
                        return (m);
                }
 
-               /* We just received a packet that we sent out. */
-               if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), eh->ether_shost,
-                   ETHER_ADDR_LEN) == 0
-#if NCARP > 0
-                   || (bif->bif_ifp->if_carp && 
carp_ourether(bif->bif_ifp->if_carp,
-                       eh, IFT_ETHER, 1) != NULL)
-#endif /* NCARP > 0 */
-                   ) {
-                       m_freem(m);
-                       return (NULL);
-               }
        }
 
        /* Perform the bridge forwarding function. */
@@ -1942,6 +2045,45 @@
 extern struct pfil_head inet_pfil_hook;                 /* XXX */
 extern struct pfil_head inet6_pfil_hook;                /* XXX */
 
+void bridgeipfintr(void)
+{
+       int s;
+       struct mbuf * sc_mbuf, * m;
+       struct bridge_softc * sc;
+       struct ifnet * dst;
+
+       mutex_enter(softnet_lock);
+       KERNEL_LOCK(1, NULL);
+       while(!IF_IS_EMPTY(&bridgeipfintrq)) {
+               s = splnet();
+               IF_DEQUEUE(&bridgeipfintrq, sc_mbuf);
+               splx(s);
+               if(sc_mbuf == NULL) {
+                       break;
+               }
+               switch(GET_BRIDGEDIR_FROM_MBUF(sc_mbuf)) {
+                       case BRIDGE_FORWARDING:
+                               GET_AND_FREE_BRIDGESC_MBUF(sc_mbuf, &sc, &dst, 
&m);
+                               bridge_forward_soft(sc, dst, m);
+                               break;
+                       case BRIDGE_ENQUEUING:
+                               GET_AND_FREE_BRIDGESC_MBUF(sc_mbuf, &sc, &dst, 
&m);
+                               bridge_enqueue_soft(sc, dst, m, 1);
+                               break;
+                       default:
+                               /* m unreliable here, so not freed */
+                               m_freem(sc_mbuf);
+#ifdef DIAGNOSTIC
+                               printf("WARNING: bridgeipfintr, dropping\n");
+#endif /* DIAGNOSTIC */
+                               /* error */
+                               break;
+               }
+       }
+       KERNEL_UNLOCK_ONE(NULL);
+       mutex_exit(softnet_lock);
+}
+
 /*
  * Send bridge packets through IPF if they are one of the types IPF can deal
  * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
Index: sys/net/if_bridgevar.h
===================================================================
RCS file: /cvsroot/src/sys/net/if_bridgevar.h,v
retrieving revision 1.13
diff -u -r1.13 if_bridgevar.h
--- sys/net/if_bridgevar.h      18 Jan 2009 10:28:55 -0000      1.13
+++ sys/net/if_bridgevar.h      1 Apr 2009 04:28:51 -0000
@@ -317,6 +317,9 @@
 
 void   bridge_enqueue(struct bridge_softc *, struct ifnet *, struct mbuf *,
            int);
+#if defined(PFIL_HOOKS) && defined(BRIDGE_IPF)
+void bridgeipfintr(void);
+#endif /* PFIL_HOOKS && BRIDGE_IPF */
 
 #endif /* _KERNEL */
 #endif /* !_NET_IF_BRIDGEVAR_H_ */
Index: sys/net/netisr.h
===================================================================
RCS file: /cvsroot/src/sys/net/netisr.h,v
retrieving revision 1.39
diff -u -r1.39 netisr.h
--- sys/net/netisr.h    12 Nov 2008 12:36:28 -0000      1.39
+++ sys/net/netisr.h    1 Apr 2009 04:28:51 -0000
@@ -53,6 +53,10 @@
 #include "opt_atalk.h"
 #include "opt_iso.h"
 #include "opt_natm.h"
+#include "opt_bridge_ipf.h"
+#ifndef PFIL_HOOKS
+# include "opt_pfil_hooks.h"
+#endif /* !PFIL_HOOKS */
 #include "arp.h"
 #endif /* defined(_KERNEL_OPT) */
 
@@ -70,6 +74,9 @@
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/ip_var.h>
+#if defined(PFIL_HOOKS) && defined(BRIDGE_IPF)
+void bridgeipfintr(void);
+#endif
 #if NARP > 0
 #include <netinet/if_inarp.h>
 #endif
@@ -103,6 +110,9 @@
  * on the lowest level routine of each protocol.
  */
 #define        NETISR_IP       2               /* same as AF_INET */
+#if defined(PFIL_HOOKS) && defined(BRIDGE_IPF) 
+#define NETISR_BRIDGE_IPF      5        
+#endif /* PFIL_HOOKS && BRIDGE_IPF */
 #define        NETISR_NS       6               /* same as AF_NS */
 #define        NETISR_ISO      7               /* same as AF_ISO */
 #define        NETISR_CCITT    10              /* same as AF_CCITT */
Index: sys/net/netisr_dispatch.h
===================================================================
RCS file: /cvsroot/src/sys/net/netisr_dispatch.h,v
retrieving revision 1.14
diff -u -r1.14 netisr_dispatch.h
--- sys/net/netisr_dispatch.h   14 Jul 2007 21:02:42 -0000      1.14
+++ sys/net/netisr_dispatch.h   1 Apr 2009 04:28:51 -0000
@@ -32,6 +32,9 @@
        DONETISR(NETISR_ARP,arpintr);
 #endif
        DONETISR(NETISR_IP,ipintr);
+#if defined(PFIL_HOOKS) && defined(BRIDGE_IPF)
+       DONETISR(NETISR_BRIDGE_IPF,bridgeipfintr);
+#endif
 #endif
 #ifdef INET6
        DONETISR(NETISR_IPV6,ip6intr);



Home | Main Index | Thread Index | Old Index