Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: NetBSD/xen goes off the network - reproduceable



On Tue, Feb 21, 2012 at 02:28:08PM -0500, Brian Marcotte wrote:
> > the attached patch changes xennet(4) to use a pool for receive
> > buffer, and allocate them on demand for the ring. If a receive buffer
> > is stalled in some socket queue, it's should not be a problem any more
> > (other than memory consumption).
> 
> I applied the patch to NetBSD-6 and I can no longer provoke the problem
> with my simple test.
> 
> Thanks!
> 
> However, running "tcpdump -i xennet0 -n icmp" reliably causes the panic
> below after 1-15 seconds. There may be one other issue, but I'm going to
> need another day to look into it. I think it may be something I'm doing.

I found the problem: when pushing more receive buffers to the ring, we
may accidentally overwrite the status we're using. This leads to 0-len
mbufs, which bpf doesn't like. Just free the receive buffer a few line
latter, and all is well :)

I also fixed the xennet_rx_free_req() logic to not wait for the
ring to be completely empty to start filling it. 4/5 of the ring size
looks a good compromise (doing it too often for a low number of buffers
increases significantly the CPU usage).

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--
Index: xen/if_xennet_xenbus.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/if_xennet_xenbus.c,v
retrieving revision 1.58
diff -u -p -u -r1.58 if_xennet_xenbus.c
--- xen/if_xennet_xenbus.c      2 Feb 2012 19:43:01 -0000       1.58
+++ xen/if_xennet_xenbus.c      21 Feb 2012 21:39:58 -0000
@@ -148,7 +148,6 @@ int xennet_debug = 0xff;
 #endif
 
 #define GRANT_INVALID_REF -1 /* entry is free */
-#define GRANT_STACK_REF   -2 /* entry owned by the network stack */
 
 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
@@ -210,6 +209,9 @@ struct xennet_xenbus_softc {
 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
 static u_long xennet_pages[NET_RX_RING_SIZE];
 
+static pool_cache_t if_xennetrxbuf_cache;
+static int if_xennetrxbuf_cache_inited=0;
+
 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
 static void xennet_xenbus_attach(device_t, device_t, void *);
 static int  xennet_xenbus_detach(device_t, int);
@@ -219,6 +221,7 @@ static void xennet_alloc_rx_buffer(struc
 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
 static void xennet_tx_complete(struct xennet_xenbus_softc *);
 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
+static void xennet_rx_free_req(struct xennet_rxreq *);
 static int  xennet_handler(void *);
 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
 #ifdef XENNET_DEBUG_DUMP
@@ -301,6 +304,14 @@ xennet_xenbus_attach(device_t parent, de
        sc->sc_xbusd = xa->xa_xbusd;
        sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
 
+       /* xenbus ensure 2 devices can't be probed at the same time */
+       if (if_xennetrxbuf_cache_inited == 0) {
+               if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
+                   "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
+               if_xennetrxbuf_cache_inited = 1;
+       }
+               
+
        /* initialize free RX and RX request lists */
        mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
        SLIST_INIT(&sc->sc_txreq_head);
@@ -316,13 +327,10 @@ xennet_xenbus_attach(device_t parent, de
                struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
                rxreq->rxreq_id = i;
                rxreq->rxreq_sc = sc;
-               rxreq->rxreq_va = uvm_km_alloc(kernel_map,
-                   PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
+               rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+                   if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
                if (rxreq->rxreq_va == 0)
                        break;
-               if (!pmap_extract(pmap_kernel(), rxreq->rxreq_va,
-                   &rxreq->rxreq_pa))
-                       panic("%s: no pa for mapped va ?", device_xname(self));
                rxreq->rxreq_gntref = GRANT_INVALID_REF;
                SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
        }
@@ -581,7 +589,9 @@ again:
                xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
                return false;
        }
+       mutex_enter(&sc->sc_rx_lock);
        xennet_alloc_rx_buffer(sc);
+       mutex_exit(&sc->sc_rx_lock);
 
        if (sc->sc_backend_status == BEST_SUSPENDED) {
                xenbus_device_resume(sc->sc_xbusd);
@@ -680,7 +690,7 @@ xennet_alloc_rx_buffer(struct xennet_xen
 
        otherend_id = sc->sc_xbusd->xbusd_otherend_id;
 
-       mutex_enter(&sc->sc_rx_lock);
+       KASSERT(mutex_owned(&sc->sc_rx_lock));
        for (i = 0; sc->sc_free_rxreql != 0; i++) {
                req  = SLIST_FIRST(&sc->sc_rxreq_head);
                KASSERT(req != NULL);
@@ -729,7 +739,6 @@ xennet_alloc_rx_buffer(struct xennet_xen
 
 out_loop:
        if (i == 0) {
-               mutex_exit(&sc->sc_rx_lock);
                return;
        }
 
@@ -763,8 +772,11 @@ out_loop:
 
        sc->sc_rx_ring.req_prod_pvt = req_prod + i;
        RING_PUSH_REQUESTS(&sc->sc_rx_ring);
-
-       mutex_exit(&sc->sc_rx_lock);
+#if 0
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
+       if (notify)
+               hypervisor_notify_via_evtchn(sc->sc_evtchn);
+#endif
        return;
 }
 
@@ -780,7 +792,6 @@ xennet_free_rx_buffer(struct xennet_xenb
        mmu_update_t mmu[1];
        multicall_entry_t mcl[2];
 
-       int s = splbio();
        mutex_enter(&sc->sc_rx_lock);
        
        DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
@@ -788,16 +799,6 @@ xennet_free_rx_buffer(struct xennet_xenb
        for (i = 0; i < NET_RX_RING_SIZE; i++) {
                struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
 
-               /*
-                * if the buffer is in transit in the network stack, wait for
-                * the network stack to free it.
-                */
-               while ((volatile grant_ref_t)rxreq->rxreq_gntref ==
-                   GRANT_STACK_REF)
-                       mutex_exit(&sc->sc_rx_lock);
-                       tsleep(xennet_xenbus_detach, PRIBIO, "xnet_free", hz/2);
-                       mutex_enter(&sc->sc_rx_lock);
-
                if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
                        /*
                         * this req is still granted. Get back the page or
@@ -861,7 +862,6 @@ xennet_free_rx_buffer(struct xennet_xenb
 
        }
        mutex_exit(&sc->sc_rx_lock);
-       splx(s);
        DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
 }
 
@@ -871,10 +871,23 @@ xennet_free_rx_buffer(struct xennet_xenb
 static void
 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
 {
-       struct xennet_rxreq *req = arg;
+       int s = splnet();
+       KASSERT(buf == m->m_ext.ext_buf);
+       KASSERT(arg == NULL);
+       KASSERT(m != NULL);
+       vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
+       pool_cache_put_paddr(if_xennetrxbuf_cache,
+           (void *)va, m->m_ext.ext_paddr);
+       pool_cache_put(mb_cache, m);
+       splx(s);
+};
+
+static void
+xennet_rx_free_req(struct xennet_rxreq *req)
+{
        struct xennet_xenbus_softc *sc = req->rxreq_sc;
        
-       mutex_enter(&sc->sc_rx_lock);
+       KASSERT(mutex_owned(&sc->sc_rx_lock));
 
        /* puts back the RX request in the list of free RX requests */
        SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
@@ -886,18 +899,10 @@ xennet_rx_mbuf_free(struct mbuf *m, void
         */
        req->rxreq_gntref = GRANT_INVALID_REF;
        
-       if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
+       if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
            __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
-               mutex_exit(&sc->sc_rx_lock);
                xennet_alloc_rx_buffer(sc);
        }
-       else {
-               mutex_exit(&sc->sc_rx_lock);
-       }
-       
-       if (m)
-               pool_cache_put(mb_cache, m);
-
 }
 
 /*
@@ -988,16 +993,10 @@ again:
        DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
            sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
 
+       mutex_enter(&sc->sc_rx_lock);
        resp_prod = sc->sc_rx_ring.sring->rsp_prod;
        xen_rmb(); /* ensure we see replies up to resp_prod */
 
-       /*
-        * The locking in this loop needs to be done carefully, as
-        * m_free() will take the sc_rx_lock() via
-        * xennet_rx_mbuf_free()
-        */
-       mutex_enter(&sc->sc_rx_lock);
-       
        for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
                netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
                req = &sc->sc_rxreqs[rx->id];
@@ -1034,8 +1033,6 @@ again:
                            __func__, sc->sc_rx_feature);
                }
 
-               req->rxreq_gntref = GRANT_INVALID_REF;
-
                pa = req->rxreq_pa;
                va = req->rxreq_va;
                
@@ -1067,10 +1064,7 @@ again:
                                DPRINTFN(XEDB_EVENT,
                                    ("xennet_handler bad dest\n"));
                                /* packet not for us */
-                               mutex_exit(&sc->sc_rx_lock);
-                               xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE,
-                                   req);
-                               mutex_enter(&sc->sc_rx_lock);
+                               xennet_rx_free_req(req);
                                continue;
                        }
                }
@@ -1078,50 +1072,37 @@ again:
                if (__predict_false(m == NULL)) {
                        printf("%s: rx no mbuf\n", ifp->if_xname);
                        ifp->if_ierrors++;
-                       mutex_exit(&sc->sc_rx_lock);
-                       xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
-                       mutex_enter(&sc->sc_rx_lock);
+                       xennet_rx_free_req(req);
                        continue;
                }
                MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
 
                m->m_pkthdr.rcvif = ifp;
-               if (__predict_true(sc->sc_rx_ring.req_prod_pvt != 
-                   sc->sc_rx_ring.sring->rsp_prod)) {
-                       m->m_len = m->m_pkthdr.len = rx->status;
-                       MEXTADD(m, pktp, rx->status,
-                           M_DEVBUF, xennet_rx_mbuf_free, req);
-                       m->m_flags |= M_EXT_RW; /* we own the buffer */
-                       req->rxreq_gntref = GRANT_STACK_REF;
-                       mutex_exit(&sc->sc_rx_lock);
-               } else {
-                       /*
-                        * This was our last receive buffer, allocate
-                        * memory, copy data and push the receive
-                        * buffer back to the hypervisor.
-                        */
-                       m->m_len = min(MHLEN, rx->status);
-                       m->m_pkthdr.len = 0;
-                       mutex_exit(&sc->sc_rx_lock);
-                       m_copyback(m, 0, rx->status, pktp);
-                       xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
-                       if (m->m_pkthdr.len < rx->status) {
-                               /* out of memory, just drop packets */
-                               ifp->if_ierrors++;
-                               m_freem(m);
-                               mutex_enter(&sc->sc_rx_lock);
-                               continue;
-                       }
+               req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+                   if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
+               if (__predict_false(req->rxreq_va == 0)) {
+                       printf("%s: rx no buf\n", ifp->if_xname);
+                       ifp->if_ierrors++;
+                       req->rxreq_va = va;
+                       req->rxreq_pa = pa;
+                       xennet_rx_free_req(req);
+                       m_freem(m);
+                       continue;
                }
-               
+               m->m_len = m->m_pkthdr.len = rx->status;
+               MEXTADD(m, pktp, rx->status,
+                   M_DEVBUF, xennet_rx_mbuf_free, NULL);
+               m->m_flags |= M_EXT_RW; /* we own the buffer */
+               m->m_ext.ext_paddr = pa;
                if ((rx->flags & NETRXF_csum_blank) != 0) {
                        xennet_checksum_fill(&m);
                        if (m == NULL) {
                                ifp->if_ierrors++;
-                               mutex_enter(&sc->sc_rx_lock);
                                continue;
                        }
                }
+               /* free req may overwrite *rx, better doing it late */
+               xennet_rx_free_req(req);
                /*
                 * Pass packet to bpf if there is a listener.
                 */
@@ -1131,7 +1112,6 @@ again:
 
                /* Pass the packet up. */
                (*ifp->if_input)(ifp, m);
-               mutex_enter(&sc->sc_rx_lock);
        }
        xen_rmb();
        sc->sc_rx_ring.rsp_cons = i;


Home | Main Index | Thread Index | Old Index