Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/xen/xen support feature-sg



details:   https://anonhg.NetBSD.org/src/rev/539cce5f20af
branches:  trunk
changeset: 971507:539cce5f20af
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Sun Apr 26 12:58:28 2020 +0000

description:
support feature-sg

practical offshot of this xennet(4) now supports jumbo frames, it is
also necessary for eventual TSO support

always defrag short Tx packets before passing to backend - even
with feature-sg it's way faster to process the packet if it's
passed as single fragment

diffstat:

 sys/arch/xen/xen/if_xennet_xenbus.c |  313 +++++++++++++++++++++++++++--------
 1 files changed, 239 insertions(+), 74 deletions(-)

diffs (truncated from 545 to 300 lines):

diff -r e38a402718a2 -r 539cce5f20af sys/arch/xen/xen/if_xennet_xenbus.c
--- a/sys/arch/xen/xen/if_xennet_xenbus.c       Sun Apr 26 12:38:21 2020 +0000
+++ b/sys/arch/xen/xen/if_xennet_xenbus.c       Sun Apr 26 12:58:28 2020 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: if_xennet_xenbus.c,v 1.118 2020/04/25 15:26:18 bouyer Exp $      */
+/*      $NetBSD: if_xennet_xenbus.c,v 1.119 2020/04/26 12:58:28 jdolecek Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -81,7 +81,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.118 2020/04/25 15:26:18 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.119 2020/04/26 12:58:28 jdolecek Exp $");
 
 #include "opt_xen.h"
 #include "opt_nfs_boot.h"
@@ -186,19 +186,28 @@
        struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
        SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
        SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
-       int sc_free_rxreql; /* number of free receive request struct */
+       int sc_free_txreql; /* number of free transmit request structs */
+       int sc_free_rxreql; /* number of free receive request structs */
 
        int sc_backend_status; /* our status with backend */
 #define BEST_CLOSED            0
 #define BEST_DISCONNECTED      1
 #define BEST_CONNECTED         2
 #define BEST_SUSPENDED         3
-       bool sc_ipv6_csum;      /* whether backend support IPv6 csum offload */
+       int sc_features;
+#define FEATURE_IPV6CSUM       0x01    /* IPv6 checksum offload */
+#define FEATURE_SG             0x02    /* scatter-gatter */
+#define FEATURE_BITS           "\20\1IPV6-CSUM\2SG"
        krndsource_t sc_rnd_source;
+       struct evcnt sc_cnt_tx_defrag;
+       struct evcnt sc_cnt_tx_queue_full;
+       struct evcnt sc_cnt_tx_drop;
+       struct evcnt sc_cnt_tx_frag;
+       struct evcnt sc_cnt_rx_frag;
 };
 
 static pool_cache_t if_xennetrxbuf_cache;
-static int if_xennetrxbuf_cache_inited=0;
+static int if_xennetrxbuf_cache_inited = 0;
 
 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
 static void xennet_xenbus_attach(device_t, device_t, void *);
@@ -256,6 +265,9 @@
        unsigned long uval;
        extern int ifqmaxlen; /* XXX */
        char mac[32];
+       char buf[64];
+       bus_size_t maxsz;
+       int nsegs;
 
        aprint_normal(": Xen Virtual Network Interface\n");
        sc->sc_dev = self;
@@ -263,6 +275,18 @@
        sc->sc_xbusd = xa->xa_xbusd;
        sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
 
+       /* read feature support flags */
+       err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
+           "feature-ipv6-csum-offload", &uval, 10);
+       if (!err && uval == 1)
+               sc->sc_features |= FEATURE_IPV6CSUM;
+       err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
+           "feature-sg", &uval, 10);
+       if (!err && uval == 1)
+               sc->sc_features |= FEATURE_SG;
+       snprintb(buf, sizeof(buf), FEATURE_BITS, sc->sc_features);
+       aprint_normal_dev(sc->sc_dev, "backend features %s\n", buf);
+
        /* xenbus ensure 2 devices can't be probed at the same time */
        if (if_xennetrxbuf_cache_inited == 0) {
                if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
@@ -271,13 +295,26 @@
        }
 
        /* initialize free RX and RX request lists */
+       if (sc->sc_features & FEATURE_SG) {
+               maxsz = ETHER_MAX_LEN_JUMBO;
+               /*
+                * Linux netback drops the packet if the request has more
+                * segments than XEN_NETIF_NR_SLOTS_MIN (== 18). With 2KB
+                * MCLBYTES this means maximum packet size 36KB, in reality
+                * less due to mbuf chain fragmentation.
+                */
+               nsegs = XEN_NETIF_NR_SLOTS_MIN;
+       } else {
+               maxsz = PAGE_SIZE;
+               nsegs = 1;
+       }
        mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
        SLIST_INIT(&sc->sc_txreq_head);
        for (i = 0; i < NET_TX_RING_SIZE; i++) {
                struct xennet_txreq *txreq = &sc->sc_txreqs[i];
        
                txreq->txreq_id = i;
-               if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, PAGE_SIZE, 1,
+               if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs,
                    PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
                    &txreq->txreq_dmamap) != 0)
                        break;
@@ -285,13 +322,14 @@
                SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
                    txreq_next);
        }
+       sc->sc_free_txreql = i;
 
        mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
        SLIST_INIT(&sc->sc_rxreq_head);
        for (i = 0; i < NET_RX_RING_SIZE; i++) {
                struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
                rxreq->rxreq_id = i;
-               if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, PAGE_SIZE, 1,
+               if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs,
                    PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
                    &rxreq->rxreq_dmamap) != 0)
                        break;
@@ -323,14 +361,11 @@
        aprint_normal_dev(self, "MAC address %s\n",
            ether_sprintf(sc->sc_enaddr));
 
-       /* read ipv6 csum support flag */
-       err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
-           "feature-ipv6-csum-offload", &uval, 10);
-       sc->sc_ipv6_csum = (!err && uval == 1);
-
        /* Initialize ifnet structure and attach interface */
        strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
        sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
+       if (sc->sc_features & FEATURE_SG)
+               sc->sc_ethercom.ec_capabilities |= ETHERCAP_JUMBO_MTU;
        ifp->if_softc = sc;
        ifp->if_start = xennet_start;
        ifp->if_ioctl = xennet_ioctl;
@@ -349,7 +384,7 @@
                M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_IPv4       \
                | M_CSUM_TCPv6 | M_CSUM_UDPv6                   \
        )
-       if (sc->sc_ipv6_csum) {
+       if (sc->sc_features & FEATURE_IPV6CSUM) {
                /*
                 * If backend supports IPv6 csum offloading, we can skip
                 * IPv6 csum for Tx packets. Rx packet validation can
@@ -359,6 +394,7 @@
                    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_TCPv6_Tx;
        }
 
+       IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN));
        IFQ_SET_READY(&ifp->if_snd);
        if_attach(ifp);
        if_deferred_start_init(ifp, NULL);
@@ -387,6 +423,17 @@
        rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
            RND_TYPE_NET, RND_FLAG_DEFAULT);
 
+       evcnt_attach_dynamic(&sc->sc_cnt_tx_defrag, EVCNT_TYPE_MISC,
+           NULL, device_xname(sc->sc_dev), "Tx packet defrag");
+       evcnt_attach_dynamic(&sc->sc_cnt_tx_frag, EVCNT_TYPE_MISC,
+           NULL, device_xname(sc->sc_dev), "Tx multi-segment packet");
+       evcnt_attach_dynamic(&sc->sc_cnt_tx_drop, EVCNT_TYPE_MISC,
+           NULL, device_xname(sc->sc_dev), "Tx packet dropped");
+       evcnt_attach_dynamic(&sc->sc_cnt_tx_queue_full, EVCNT_TYPE_MISC,
+           NULL, device_xname(sc->sc_dev), "Tx queue full");
+       evcnt_attach_dynamic(&sc->sc_cnt_rx_frag, EVCNT_TYPE_MISC,
+           NULL, device_xname(sc->sc_dev), "Rx multi-segment packet");
+
        if (!pmf_device_register(self, xennet_xenbus_suspend,
            xennet_xenbus_resume))
                aprint_error_dev(self, "couldn't establish power handler\n");
@@ -441,6 +488,12 @@
        ether_ifdetach(ifp);
        if_detach(ifp);
 
+       evcnt_detach(&sc->sc_cnt_tx_defrag);
+       evcnt_detach(&sc->sc_cnt_tx_frag);
+       evcnt_detach(&sc->sc_cnt_tx_drop);
+       evcnt_detach(&sc->sc_cnt_tx_queue_full);
+       evcnt_detach(&sc->sc_cnt_rx_frag);
+
        /* Unhook the entropy source. */
        rnd_detach_source(&sc->sc_rnd_source);
 
@@ -583,6 +636,12 @@
                goto abort_transaction;
        }
        error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
+           "feature-sg", "%u", 1);
+       if (error) {
+               errmsg = "writing feature-sg";
+               goto abort_transaction;
+       }
+       error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
            "event-channel", "%u", sc->sc_evtchn);
        if (error) {
                errmsg = "writing event channel";
@@ -690,6 +749,7 @@
 {
        RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
        RING_IDX i;
+       netif_rx_request_t *rxreq;
        struct xennet_rxreq *req;
        int otherend_id, notify;
        struct mbuf *m;
@@ -751,11 +811,9 @@
 
                req->rxreq_m = m;
 
-               RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
-                   req->rxreq_id;
-
-               RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
-                   req->rxreq_gntref;
+               rxreq = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i);
+               rxreq->id = req->rxreq_id;
+               rxreq->gref = req->rxreq_gntref;
 
                SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
                sc->sc_free_rxreql--;
@@ -864,19 +922,25 @@
                KASSERT(req->txreq_id ==
                    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
                KASSERT(xengnt_status(req->txreq_gntref) == 0);
-               KASSERT(req->txreq_m != NULL);
+               xengnt_revoke_access(req->txreq_gntref);
+               req->txreq_gntref = GRANT_INVALID_REF;
 
-               if (__predict_false(
-                   RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
-                   NETIF_RSP_OKAY))
-                       if_statinc(ifp, if_oerrors);
-               else
-                       if_statinc(ifp, if_opackets);
-               xengnt_revoke_access(req->txreq_gntref);
-               bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, req->txreq_dmamap);
-               m_freem(req->txreq_m);
-               req->txreq_m = NULL;
+               /* Cleanup/statistics if this is the master req of a chain */
+               if (req->txreq_m) {
+                       if (__predict_false(
+                           RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
+                           NETIF_RSP_OKAY))
+                               if_statinc(ifp, if_oerrors);
+                       else
+                               if_statinc(ifp, if_opackets);
+                       bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
+                           req->txreq_dmamap);
+                       m_freem(req->txreq_m);
+                       req->txreq_m = NULL;
+               }
+
                SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
+               sc->sc_free_txreql++;
        }
 
        sc->sc_tx_ring.rsp_cons = resp_prod;
@@ -901,7 +965,8 @@
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;
        RING_IDX resp_prod, i;
        struct xennet_rxreq *req;
-       struct mbuf *m;
+       struct mbuf *m, *m0;
+       int rxflags, m0_rxflags;
        int more_to_do;
 
        if (sc->sc_backend_status != BEST_CONNECTED)
@@ -920,6 +985,7 @@
        resp_prod = sc->sc_rx_ring.sring->rsp_prod;
        xen_rmb(); /* ensure we see replies up to resp_prod */
 
+       m0 = NULL;
        for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
                netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
                req = &sc->sc_rxreqs[rx->id];
@@ -936,19 +1002,54 @@
                bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->rxreq_dmamap, 0,
                     m->m_pkthdr.len, BUS_DMASYNC_PREREAD);
 
-               MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
-               m_set_rcvif(m, ifp);
+               if (m0 == NULL) {
+                       MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
+                       m_set_rcvif(m, ifp);
+               }
+
+               rxflags = rx->flags;
 
-               if (rx->flags & NETRXF_csum_blank)
+               if (m0 || rxflags & NETRXF_more_data) {
+                       /*
+                        * On Rx, every fragment (even first one) contain
+                        * just length of data in the fragment.
+                        */
+                       if (m0 == NULL) {
+                               m0 = m;



Home | Main Index | Thread Index | Old Index