Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: NetBSD/xen goes off the network - reproduceable



Hello,
the attached patch changes xennet(4) to use a pool for receive
buffer, and allocate them on demand for the ring. If a receive buffer
is stalled in some socket queue, it's should not be a problem any more
(other than memory consumption).
Additionally, this gives an interesting performance boost:
with ttcp between dom0 and domU with the old code:
ttcp-r: 268435456 bytes in 5.98 real seconds = 43827.76 KB/sec +++
ttcp-r: 58827 I/O calls, msec/call = 0.10, calls/sec = 9835.26
ttcp-r: 0.1user 2.0sys 0:05real 35% 0i+0d 0maxrss 0+0pf 0+0csw

ttcp-r: 268435456 bytes in 6.19 real seconds = 42335.30 KB/sec +++
ttcp-r: 60436 I/O calls, msec/call = 0.10, calls/sec = 9760.19
ttcp-r: 0.1user 2.0sys 0:06real 35% 0i+0d 0maxrss 0+0pf 0+0csw

ttcp-r: 268435456 bytes in 5.95 real seconds = 44069.24 KB/sec +++
ttcp-r: 59035 I/O calls, msec/call = 0.10, calls/sec = 9924.42
ttcp-r: 0.1user 1.9sys 0:05real 34% 0i+0d 0maxrss 0+0pf 0+0csw

And with this patch:
ttcp-r: 268435456 bytes in 4.63 real seconds = 56567.77 KB/sec +++
ttcp-r: 40711 I/O calls, msec/call = 0.12, calls/sec = 8784.98
ttcp-r: 0.0user 1.2sys 0:04real 27% 0i+0d 0maxrss 0+0pf 0+0csw

ttcp-r: 268435456 bytes in 4.69 real seconds = 55894.39 KB/sec +++
ttcp-r: 40713 I/O calls, msec/call = 0.12, calls/sec = 8680.83
ttcp-r: 0.0user 1.2sys 0:04real 27% 0i+0d 0maxrss 0+0pf 0+0csw

ttcp-r: 268435456 bytes in 4.64 real seconds = 56546.86 KB/sec +++
ttcp-r: 40738 I/O calls, msec/call = 0.12, calls/sec = 8787.56
ttcp-r: 0.0user 1.2sys 0:04real 28% 0i+0d 0maxrss 0+0pf 0+0csw

faster with less CPU used :) 

I also tried to just copy the receive buffer to a mbuf cluster, we
get the same speed but with 40%cpu instead of 27 ...

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--
Index: xen/if_xennet_xenbus.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/if_xennet_xenbus.c,v
retrieving revision 1.58
diff -u -p -u -r1.58 if_xennet_xenbus.c
--- xen/if_xennet_xenbus.c      2 Feb 2012 19:43:01 -0000       1.58
+++ xen/if_xennet_xenbus.c      21 Feb 2012 17:09:04 -0000
@@ -148,7 +148,6 @@ int xennet_debug = 0xff;
 #endif
 
 #define GRANT_INVALID_REF -1 /* entry is free */
-#define GRANT_STACK_REF   -2 /* entry owned by the network stack */
 
 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
@@ -210,6 +209,9 @@ struct xennet_xenbus_softc {
 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
 static u_long xennet_pages[NET_RX_RING_SIZE];
 
+static pool_cache_t if_xennetrxbuf_cache;
+static int if_xennetrxbuf_cache_inited=0;
+
 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
 static void xennet_xenbus_attach(device_t, device_t, void *);
 static int  xennet_xenbus_detach(device_t, int);
@@ -219,6 +221,7 @@ static void xennet_alloc_rx_buffer(struc
 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
 static void xennet_tx_complete(struct xennet_xenbus_softc *);
 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
+static void xennet_rx_free_req(struct xennet_rxreq *);
 static int  xennet_handler(void *);
 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
 #ifdef XENNET_DEBUG_DUMP
@@ -301,6 +304,14 @@ xennet_xenbus_attach(device_t parent, de
        sc->sc_xbusd = xa->xa_xbusd;
        sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
 
+       /* xenbus ensure 2 devices can't be probed at the same time */
+       if (if_xennetrxbuf_cache_inited == 0) {
+               if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
+                   "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
+               if_xennetrxbuf_cache_inited = 1;
+       }
+               
+
        /* initialize free RX and RX request lists */
        mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
        SLIST_INIT(&sc->sc_txreq_head);
@@ -316,13 +327,10 @@ xennet_xenbus_attach(device_t parent, de
                struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
                rxreq->rxreq_id = i;
                rxreq->rxreq_sc = sc;
-               rxreq->rxreq_va = uvm_km_alloc(kernel_map,
-                   PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
+               rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+                   if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
                if (rxreq->rxreq_va == 0)
                        break;
-               if (!pmap_extract(pmap_kernel(), rxreq->rxreq_va,
-                   &rxreq->rxreq_pa))
-                       panic("%s: no pa for mapped va ?", device_xname(self));
                rxreq->rxreq_gntref = GRANT_INVALID_REF;
                SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
        }
@@ -581,7 +589,9 @@ again:
                xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
                return false;
        }
+       mutex_enter(&sc->sc_rx_lock);
        xennet_alloc_rx_buffer(sc);
+       mutex_exit(&sc->sc_rx_lock);
 
        if (sc->sc_backend_status == BEST_SUSPENDED) {
                xenbus_device_resume(sc->sc_xbusd);
@@ -680,7 +690,7 @@ xennet_alloc_rx_buffer(struct xennet_xen
 
        otherend_id = sc->sc_xbusd->xbusd_otherend_id;
 
-       mutex_enter(&sc->sc_rx_lock);
+       KASSERT(mutex_owned(&sc->sc_rx_lock));
        for (i = 0; sc->sc_free_rxreql != 0; i++) {
                req  = SLIST_FIRST(&sc->sc_rxreq_head);
                KASSERT(req != NULL);
@@ -729,7 +739,6 @@ xennet_alloc_rx_buffer(struct xennet_xen
 
 out_loop:
        if (i == 0) {
-               mutex_exit(&sc->sc_rx_lock);
                return;
        }
 
@@ -764,7 +773,6 @@ out_loop:
        sc->sc_rx_ring.req_prod_pvt = req_prod + i;
        RING_PUSH_REQUESTS(&sc->sc_rx_ring);
 
-       mutex_exit(&sc->sc_rx_lock);
        return;
 }
 
@@ -780,7 +788,6 @@ xennet_free_rx_buffer(struct xennet_xenb
        mmu_update_t mmu[1];
        multicall_entry_t mcl[2];
 
-       int s = splbio();
        mutex_enter(&sc->sc_rx_lock);
        
        DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
@@ -788,16 +795,6 @@ xennet_free_rx_buffer(struct xennet_xenb
        for (i = 0; i < NET_RX_RING_SIZE; i++) {
                struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
 
-               /*
-                * if the buffer is in transit in the network stack, wait for
-                * the network stack to free it.
-                */
-               while ((volatile grant_ref_t)rxreq->rxreq_gntref ==
-                   GRANT_STACK_REF)
-                       mutex_exit(&sc->sc_rx_lock);
-                       tsleep(xennet_xenbus_detach, PRIBIO, "xnet_free", hz/2);
-                       mutex_enter(&sc->sc_rx_lock);
-
                if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
                        /*
                         * this req is still granted. Get back the page or
@@ -861,7 +858,6 @@ xennet_free_rx_buffer(struct xennet_xenb
 
        }
        mutex_exit(&sc->sc_rx_lock);
-       splx(s);
        DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
 }
 
@@ -871,10 +867,21 @@ xennet_free_rx_buffer(struct xennet_xenb
 static void
 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
 {
-       struct xennet_rxreq *req = arg;
+
+       KASSERT(buf == m->m_ext.ext_buf);
+       KASSERT(arg == NULL);
+       vaddr_t va = (vaddr_t)(m->m_ext.ext_buf) & ~((vaddr_t)PAGE_MASK);
+       pool_cache_put_paddr(if_xennetrxbuf_cache,
+           (void *)va, m->m_ext.ext_paddr);
+       pool_cache_put(mb_cache, m);
+};
+
+static void
+xennet_rx_free_req(struct xennet_rxreq *req)
+{
        struct xennet_xenbus_softc *sc = req->rxreq_sc;
        
-       mutex_enter(&sc->sc_rx_lock);
+       KASSERT(mutex_owned(&sc->sc_rx_lock));
 
        /* puts back the RX request in the list of free RX requests */
        SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
@@ -886,18 +893,10 @@ xennet_rx_mbuf_free(struct mbuf *m, void
         */
        req->rxreq_gntref = GRANT_INVALID_REF;
        
-       if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
+       if (sc->sc_free_rxreql >= NET_RX_RING_SIZE &&
            __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
-               mutex_exit(&sc->sc_rx_lock);
                xennet_alloc_rx_buffer(sc);
        }
-       else {
-               mutex_exit(&sc->sc_rx_lock);
-       }
-       
-       if (m)
-               pool_cache_put(mb_cache, m);
-
 }
 
 /*
@@ -988,16 +987,10 @@ again:
        DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
            sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
 
+       mutex_enter(&sc->sc_rx_lock);
        resp_prod = sc->sc_rx_ring.sring->rsp_prod;
        xen_rmb(); /* ensure we see replies up to resp_prod */
 
-       /*
-        * The locking in this loop needs to be done carefully, as
-        * m_free() will take the sc_rx_lock() via
-        * xennet_rx_mbuf_free()
-        */
-       mutex_enter(&sc->sc_rx_lock);
-       
        for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
                netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
                req = &sc->sc_rxreqs[rx->id];
@@ -1034,8 +1027,6 @@ again:
                            __func__, sc->sc_rx_feature);
                }
 
-               req->rxreq_gntref = GRANT_INVALID_REF;
-
                pa = req->rxreq_pa;
                va = req->rxreq_va;
                
@@ -1067,10 +1058,7 @@ again:
                                DPRINTFN(XEDB_EVENT,
                                    ("xennet_handler bad dest\n"));
                                /* packet not for us */
-                               mutex_exit(&sc->sc_rx_lock);
-                               xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE,
-                                   req);
-                               mutex_enter(&sc->sc_rx_lock);
+                               xennet_rx_free_req(req);
                                continue;
                        }
                }
@@ -1078,47 +1066,33 @@ again:
                if (__predict_false(m == NULL)) {
                        printf("%s: rx no mbuf\n", ifp->if_xname);
                        ifp->if_ierrors++;
-                       mutex_exit(&sc->sc_rx_lock);
-                       xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
-                       mutex_enter(&sc->sc_rx_lock);
+                       xennet_rx_free_req(req);
                        continue;
                }
                MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
 
                m->m_pkthdr.rcvif = ifp;
-               if (__predict_true(sc->sc_rx_ring.req_prod_pvt != 
-                   sc->sc_rx_ring.sring->rsp_prod)) {
-                       m->m_len = m->m_pkthdr.len = rx->status;
-                       MEXTADD(m, pktp, rx->status,
-                           M_DEVBUF, xennet_rx_mbuf_free, req);
-                       m->m_flags |= M_EXT_RW; /* we own the buffer */
-                       req->rxreq_gntref = GRANT_STACK_REF;
-                       mutex_exit(&sc->sc_rx_lock);
-               } else {
-                       /*
-                        * This was our last receive buffer, allocate
-                        * memory, copy data and push the receive
-                        * buffer back to the hypervisor.
-                        */
-                       m->m_len = min(MHLEN, rx->status);
-                       m->m_pkthdr.len = 0;
-                       mutex_exit(&sc->sc_rx_lock);
-                       m_copyback(m, 0, rx->status, pktp);
-                       xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
-                       if (m->m_pkthdr.len < rx->status) {
-                               /* out of memory, just drop packets */
-                               ifp->if_ierrors++;
-                               m_freem(m);
-                               mutex_enter(&sc->sc_rx_lock);
-                               continue;
-                       }
+               req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+                   if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
+               if (__predict_false(req->rxreq_va == 0)) {
+                       printf("%s: rx no buf\n", ifp->if_xname);
+                       ifp->if_ierrors++;
+                       req->rxreq_va = va;
+                       req->rxreq_pa = pa;
+                       xennet_rx_free_req(req);
+                       m_freem(m);
+                       continue;
                }
-               
+               xennet_rx_free_req(req);
+               m->m_len = m->m_pkthdr.len = rx->status;
+               MEXTADD(m, pktp, rx->status,
+                   M_DEVBUF, xennet_rx_mbuf_free, NULL);
+               m->m_flags |= M_EXT_RW; /* we own the buffer */
+               m->m_ext.ext_paddr = pa;
                if ((rx->flags & NETRXF_csum_blank) != 0) {
                        xennet_checksum_fill(&m);
                        if (m == NULL) {
                                ifp->if_ierrors++;
-                               mutex_enter(&sc->sc_rx_lock);
                                continue;
                        }
                }
@@ -1131,7 +1105,6 @@ again:
 
                /* Pass the packet up. */
                (*ifp->if_input)(ifp, m);
-               mutex_enter(&sc->sc_rx_lock);
        }
        xen_rmb();
        sc->sc_rx_ring.rsp_cons = i;


Home | Main Index | Thread Index | Old Index