Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/xen/xen Allocating a fixed, limited number of buffe...



details:   https://anonhg.NetBSD.org/src/rev/f1a09cfd45d2
branches:  trunk
changeset: 777551:f1a09cfd45d2
user:      bouyer <bouyer%NetBSD.org@localhost>
date:      Wed Feb 22 18:54:51 2012 +0000

description:
Allocating a fixed, limited number of buffers for receive and sending them
to the network stack is a bad idea, because if all receive buffers sits
in socket queues, the interface receive stalls by lack of buffers.
Instead, get receive pages from a pool_cache(9). Copying to a mbuf cluser
would work too, but testings shows this has an important performance hit.
This also simplifies locking.

While there, notify the dom0 when we add some receive buffers (older
linux dom0 didn't care, but newer one do).

Problem reported and fix tested by Brian Marcotte on port-xen

diffstat:

 sys/arch/xen/xen/if_xennet_xenbus.c |  141 +++++++++++++++--------------------
 1 files changed, 59 insertions(+), 82 deletions(-)

diffs (truncated from 316 to 300 lines):

diff -r 6318f21a0136 -r f1a09cfd45d2 sys/arch/xen/xen/if_xennet_xenbus.c
--- a/sys/arch/xen/xen/if_xennet_xenbus.c       Wed Feb 22 18:35:26 2012 +0000
+++ b/sys/arch/xen/xen/if_xennet_xenbus.c       Wed Feb 22 18:54:51 2012 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: if_xennet_xenbus.c,v 1.58 2012/02/02 19:43:01 tls Exp $      */
+/*      $NetBSD: if_xennet_xenbus.c,v 1.59 2012/02/22 18:54:51 bouyer Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -85,7 +85,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.58 2012/02/02 19:43:01 tls Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.59 2012/02/22 18:54:51 bouyer Exp $");
 
 #include "opt_xen.h"
 #include "opt_nfs_boot.h"
@@ -148,7 +148,6 @@
 #endif
 
 #define GRANT_INVALID_REF -1 /* entry is free */
-#define GRANT_STACK_REF   -2 /* entry owned by the network stack */
 
 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
@@ -210,6 +209,9 @@
 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
 static u_long xennet_pages[NET_RX_RING_SIZE];
 
+static pool_cache_t if_xennetrxbuf_cache;
+static int if_xennetrxbuf_cache_inited=0;
+
 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
 static void xennet_xenbus_attach(device_t, device_t, void *);
 static int  xennet_xenbus_detach(device_t, int);
@@ -219,6 +221,7 @@
 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
 static void xennet_tx_complete(struct xennet_xenbus_softc *);
 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
+static void xennet_rx_free_req(struct xennet_rxreq *);
 static int  xennet_handler(void *);
 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
 #ifdef XENNET_DEBUG_DUMP
@@ -301,6 +304,14 @@
        sc->sc_xbusd = xa->xa_xbusd;
        sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
 
+       /* xenbus ensure 2 devices can't be probed at the same time */
+       if (if_xennetrxbuf_cache_inited == 0) {
+               if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
+                   "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
+               if_xennetrxbuf_cache_inited = 1;
+       }
+               
+
        /* initialize free RX and RX request lists */
        mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
        SLIST_INIT(&sc->sc_txreq_head);
@@ -316,13 +327,10 @@
                struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
                rxreq->rxreq_id = i;
                rxreq->rxreq_sc = sc;
-               rxreq->rxreq_va = uvm_km_alloc(kernel_map,
-                   PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
+               rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+                   if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
                if (rxreq->rxreq_va == 0)
                        break;
-               if (!pmap_extract(pmap_kernel(), rxreq->rxreq_va,
-                   &rxreq->rxreq_pa))
-                       panic("%s: no pa for mapped va ?", device_xname(self));
                rxreq->rxreq_gntref = GRANT_INVALID_REF;
                SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
        }
@@ -581,7 +589,9 @@
                xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
                return false;
        }
+       mutex_enter(&sc->sc_rx_lock);
        xennet_alloc_rx_buffer(sc);
+       mutex_exit(&sc->sc_rx_lock);
 
        if (sc->sc_backend_status == BEST_SUSPENDED) {
                xenbus_device_resume(sc->sc_xbusd);
@@ -675,12 +685,12 @@
        RING_IDX i;
        struct xennet_rxreq *req;
        struct xen_memory_reservation reservation;
-       int s, otherend_id;
+       int s, otherend_id, notify;
        paddr_t pfn;
 
        otherend_id = sc->sc_xbusd->xbusd_otherend_id;
 
-       mutex_enter(&sc->sc_rx_lock);
+       KASSERT(mutex_owned(&sc->sc_rx_lock));
        for (i = 0; sc->sc_free_rxreql != 0; i++) {
                req  = SLIST_FIRST(&sc->sc_rxreq_head);
                KASSERT(req != NULL);
@@ -729,7 +739,6 @@
 
 out_loop:
        if (i == 0) {
-               mutex_exit(&sc->sc_rx_lock);
                return;
        }
 
@@ -762,9 +771,9 @@
        }
 
        sc->sc_rx_ring.req_prod_pvt = req_prod + i;
-       RING_PUSH_REQUESTS(&sc->sc_rx_ring);
-
-       mutex_exit(&sc->sc_rx_lock);
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
+       if (notify)
+               hypervisor_notify_via_evtchn(sc->sc_evtchn);
        return;
 }
 
@@ -780,7 +789,6 @@
        mmu_update_t mmu[1];
        multicall_entry_t mcl[2];
 
-       int s = splbio();
        mutex_enter(&sc->sc_rx_lock);
        
        DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
@@ -788,16 +796,6 @@
        for (i = 0; i < NET_RX_RING_SIZE; i++) {
                struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
 
-               /*
-                * if the buffer is in transit in the network stack, wait for
-                * the network stack to free it.
-                */
-               while ((volatile grant_ref_t)rxreq->rxreq_gntref ==
-                   GRANT_STACK_REF)
-                       mutex_exit(&sc->sc_rx_lock);
-                       tsleep(xennet_xenbus_detach, PRIBIO, "xnet_free", hz/2);
-                       mutex_enter(&sc->sc_rx_lock);
-
                if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
                        /*
                         * this req is still granted. Get back the page or
@@ -861,7 +859,6 @@
 
        }
        mutex_exit(&sc->sc_rx_lock);
-       splx(s);
        DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
 }
 
@@ -871,10 +868,23 @@
 static void
 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
 {
-       struct xennet_rxreq *req = arg;
+       int s = splnet();
+       KASSERT(buf == m->m_ext.ext_buf);
+       KASSERT(arg == NULL);
+       KASSERT(m != NULL);
+       vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
+       pool_cache_put_paddr(if_xennetrxbuf_cache,
+           (void *)va, m->m_ext.ext_paddr);
+       pool_cache_put(mb_cache, m);
+       splx(s);
+};
+
+static void
+xennet_rx_free_req(struct xennet_rxreq *req)
+{
        struct xennet_xenbus_softc *sc = req->rxreq_sc;
        
-       mutex_enter(&sc->sc_rx_lock);
+       KASSERT(mutex_owned(&sc->sc_rx_lock));
 
        /* puts back the RX request in the list of free RX requests */
        SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
@@ -886,18 +896,10 @@
         */
        req->rxreq_gntref = GRANT_INVALID_REF;
        
-       if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
+       if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
            __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
-               mutex_exit(&sc->sc_rx_lock);
                xennet_alloc_rx_buffer(sc);
        }
-       else {
-               mutex_exit(&sc->sc_rx_lock);
-       }
-       
-       if (m)
-               pool_cache_put(mb_cache, m);
-
 }
 
 /*
@@ -988,16 +990,10 @@
        DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
            sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
 
+       mutex_enter(&sc->sc_rx_lock);
        resp_prod = sc->sc_rx_ring.sring->rsp_prod;
        xen_rmb(); /* ensure we see replies up to resp_prod */
 
-       /*
-        * The locking in this loop needs to be done carefully, as
-        * m_free() will take the sc_rx_lock() via
-        * xennet_rx_mbuf_free()
-        */
-       mutex_enter(&sc->sc_rx_lock);
-       
        for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
                netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
                req = &sc->sc_rxreqs[rx->id];
@@ -1034,8 +1030,6 @@
                            __func__, sc->sc_rx_feature);
                }
 
-               req->rxreq_gntref = GRANT_INVALID_REF;
-
                pa = req->rxreq_pa;
                va = req->rxreq_va;
                
@@ -1067,10 +1061,7 @@
                                DPRINTFN(XEDB_EVENT,
                                    ("xennet_handler bad dest\n"));
                                /* packet not for us */
-                               mutex_exit(&sc->sc_rx_lock);
-                               xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE,
-                                   req);
-                               mutex_enter(&sc->sc_rx_lock);
+                               xennet_rx_free_req(req);
                                continue;
                        }
                }
@@ -1078,50 +1069,37 @@
                if (__predict_false(m == NULL)) {
                        printf("%s: rx no mbuf\n", ifp->if_xname);
                        ifp->if_ierrors++;
-                       mutex_exit(&sc->sc_rx_lock);
-                       xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
-                       mutex_enter(&sc->sc_rx_lock);
+                       xennet_rx_free_req(req);
                        continue;
                }
                MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
 
                m->m_pkthdr.rcvif = ifp;
-               if (__predict_true(sc->sc_rx_ring.req_prod_pvt != 
-                   sc->sc_rx_ring.sring->rsp_prod)) {
-                       m->m_len = m->m_pkthdr.len = rx->status;
-                       MEXTADD(m, pktp, rx->status,
-                           M_DEVBUF, xennet_rx_mbuf_free, req);
-                       m->m_flags |= M_EXT_RW; /* we own the buffer */
-                       req->rxreq_gntref = GRANT_STACK_REF;
-                       mutex_exit(&sc->sc_rx_lock);
-               } else {
-                       /*
-                        * This was our last receive buffer, allocate
-                        * memory, copy data and push the receive
-                        * buffer back to the hypervisor.
-                        */
-                       m->m_len = min(MHLEN, rx->status);
-                       m->m_pkthdr.len = 0;
-                       mutex_exit(&sc->sc_rx_lock);
-                       m_copyback(m, 0, rx->status, pktp);
-                       xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
-                       if (m->m_pkthdr.len < rx->status) {
-                               /* out of memory, just drop packets */
-                               ifp->if_ierrors++;
-                               m_freem(m);
-                               mutex_enter(&sc->sc_rx_lock);
-                               continue;
-                       }
+               req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+                   if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
+               if (__predict_false(req->rxreq_va == 0)) {
+                       printf("%s: rx no buf\n", ifp->if_xname);
+                       ifp->if_ierrors++;
+                       req->rxreq_va = va;
+                       req->rxreq_pa = pa;
+                       xennet_rx_free_req(req);
+                       m_freem(m);
+                       continue;
                }
-               
+               m->m_len = m->m_pkthdr.len = rx->status;
+               MEXTADD(m, pktp, rx->status,
+                   M_DEVBUF, xennet_rx_mbuf_free, NULL);
+               m->m_flags |= M_EXT_RW; /* we own the buffer */
+               m->m_ext.ext_paddr = pa;
                if ((rx->flags & NETRXF_csum_blank) != 0) {
                        xennet_checksum_fill(&m);
                        if (m == NULL) {
                                ifp->if_ierrors++;
-                               mutex_enter(&sc->sc_rx_lock);



Home | Main Index | Thread Index | Old Index