Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/xen/xen support feature-sg
details: https://anonhg.NetBSD.org/src/rev/d5ea37752810
branches: trunk
changeset: 931315:d5ea37752810
user: jdolecek <jdolecek%NetBSD.org@localhost>
date: Sun Apr 26 12:58:28 2020 +0000
description:
support feature-sg
practical offshot of this xennet(4) now supports jumbo frames, it is
also necessary for eventual TSO support
always defrag short Tx packets before passing to backend - even
with feature-sg it's way faster to process the packet if it's
passed as single fragment
diffstat:
sys/arch/xen/xen/if_xennet_xenbus.c | 313 +++++++++++++++++++++++++++--------
1 files changed, 239 insertions(+), 74 deletions(-)
diffs (truncated from 545 to 300 lines):
diff -r e0a3a592997f -r d5ea37752810 sys/arch/xen/xen/if_xennet_xenbus.c
--- a/sys/arch/xen/xen/if_xennet_xenbus.c Sun Apr 26 12:38:21 2020 +0000
+++ b/sys/arch/xen/xen/if_xennet_xenbus.c Sun Apr 26 12:58:28 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: if_xennet_xenbus.c,v 1.118 2020/04/25 15:26:18 bouyer Exp $ */
+/* $NetBSD: if_xennet_xenbus.c,v 1.119 2020/04/26 12:58:28 jdolecek Exp $ */
/*
* Copyright (c) 2006 Manuel Bouyer.
@@ -81,7 +81,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.118 2020/04/25 15:26:18 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.119 2020/04/26 12:58:28 jdolecek Exp $");
#include "opt_xen.h"
#include "opt_nfs_boot.h"
@@ -186,19 +186,28 @@
struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
- int sc_free_rxreql; /* number of free receive request struct */
+ int sc_free_txreql; /* number of free transmit request structs */
+ int sc_free_rxreql; /* number of free receive request structs */
int sc_backend_status; /* our status with backend */
#define BEST_CLOSED 0
#define BEST_DISCONNECTED 1
#define BEST_CONNECTED 2
#define BEST_SUSPENDED 3
- bool sc_ipv6_csum; /* whether backend support IPv6 csum offload */
+ int sc_features;
+#define FEATURE_IPV6CSUM 0x01 /* IPv6 checksum offload */
+#define FEATURE_SG 0x02 /* scatter-gatter */
+#define FEATURE_BITS "\20\1IPV6-CSUM\2SG"
krndsource_t sc_rnd_source;
+ struct evcnt sc_cnt_tx_defrag;
+ struct evcnt sc_cnt_tx_queue_full;
+ struct evcnt sc_cnt_tx_drop;
+ struct evcnt sc_cnt_tx_frag;
+ struct evcnt sc_cnt_rx_frag;
};
static pool_cache_t if_xennetrxbuf_cache;
-static int if_xennetrxbuf_cache_inited=0;
+static int if_xennetrxbuf_cache_inited = 0;
static int xennet_xenbus_match(device_t, cfdata_t, void *);
static void xennet_xenbus_attach(device_t, device_t, void *);
@@ -256,6 +265,9 @@
unsigned long uval;
extern int ifqmaxlen; /* XXX */
char mac[32];
+ char buf[64];
+ bus_size_t maxsz;
+ int nsegs;
aprint_normal(": Xen Virtual Network Interface\n");
sc->sc_dev = self;
@@ -263,6 +275,18 @@
sc->sc_xbusd = xa->xa_xbusd;
sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
+ /* read feature support flags */
+ err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
+ "feature-ipv6-csum-offload", &uval, 10);
+ if (!err && uval == 1)
+ sc->sc_features |= FEATURE_IPV6CSUM;
+ err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
+ "feature-sg", &uval, 10);
+ if (!err && uval == 1)
+ sc->sc_features |= FEATURE_SG;
+ snprintb(buf, sizeof(buf), FEATURE_BITS, sc->sc_features);
+ aprint_normal_dev(sc->sc_dev, "backend features %s\n", buf);
+
/* xenbus ensure 2 devices can't be probed at the same time */
if (if_xennetrxbuf_cache_inited == 0) {
if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
@@ -271,13 +295,26 @@
}
/* initialize free RX and RX request lists */
+ if (sc->sc_features & FEATURE_SG) {
+ maxsz = ETHER_MAX_LEN_JUMBO;
+ /*
+ * Linux netback drops the packet if the request has more
+ * segments than XEN_NETIF_NR_SLOTS_MIN (== 18). With 2KB
+ * MCLBYTES this means maximum packet size 36KB, in reality
+ * less due to mbuf chain fragmentation.
+ */
+ nsegs = XEN_NETIF_NR_SLOTS_MIN;
+ } else {
+ maxsz = PAGE_SIZE;
+ nsegs = 1;
+ }
mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
SLIST_INIT(&sc->sc_txreq_head);
for (i = 0; i < NET_TX_RING_SIZE; i++) {
struct xennet_txreq *txreq = &sc->sc_txreqs[i];
txreq->txreq_id = i;
- if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, PAGE_SIZE, 1,
+ if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs,
PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
&txreq->txreq_dmamap) != 0)
break;
@@ -285,13 +322,14 @@
SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
txreq_next);
}
+ sc->sc_free_txreql = i;
mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
SLIST_INIT(&sc->sc_rxreq_head);
for (i = 0; i < NET_RX_RING_SIZE; i++) {
struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
rxreq->rxreq_id = i;
- if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, PAGE_SIZE, 1,
+ if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs,
PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
&rxreq->rxreq_dmamap) != 0)
break;
@@ -323,14 +361,11 @@
aprint_normal_dev(self, "MAC address %s\n",
ether_sprintf(sc->sc_enaddr));
- /* read ipv6 csum support flag */
- err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
- "feature-ipv6-csum-offload", &uval, 10);
- sc->sc_ipv6_csum = (!err && uval == 1);
-
/* Initialize ifnet structure and attach interface */
strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
+ if (sc->sc_features & FEATURE_SG)
+ sc->sc_ethercom.ec_capabilities |= ETHERCAP_JUMBO_MTU;
ifp->if_softc = sc;
ifp->if_start = xennet_start;
ifp->if_ioctl = xennet_ioctl;
@@ -349,7 +384,7 @@
M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_IPv4 \
| M_CSUM_TCPv6 | M_CSUM_UDPv6 \
)
- if (sc->sc_ipv6_csum) {
+ if (sc->sc_features & FEATURE_IPV6CSUM) {
/*
* If backend supports IPv6 csum offloading, we can skip
* IPv6 csum for Tx packets. Rx packet validation can
@@ -359,6 +394,7 @@
IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_TCPv6_Tx;
}
+ IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN));
IFQ_SET_READY(&ifp->if_snd);
if_attach(ifp);
if_deferred_start_init(ifp, NULL);
@@ -387,6 +423,17 @@
rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
RND_TYPE_NET, RND_FLAG_DEFAULT);
+ evcnt_attach_dynamic(&sc->sc_cnt_tx_defrag, EVCNT_TYPE_MISC,
+ NULL, device_xname(sc->sc_dev), "Tx packet defrag");
+ evcnt_attach_dynamic(&sc->sc_cnt_tx_frag, EVCNT_TYPE_MISC,
+ NULL, device_xname(sc->sc_dev), "Tx multi-segment packet");
+ evcnt_attach_dynamic(&sc->sc_cnt_tx_drop, EVCNT_TYPE_MISC,
+ NULL, device_xname(sc->sc_dev), "Tx packet dropped");
+ evcnt_attach_dynamic(&sc->sc_cnt_tx_queue_full, EVCNT_TYPE_MISC,
+ NULL, device_xname(sc->sc_dev), "Tx queue full");
+ evcnt_attach_dynamic(&sc->sc_cnt_rx_frag, EVCNT_TYPE_MISC,
+ NULL, device_xname(sc->sc_dev), "Rx multi-segment packet");
+
if (!pmf_device_register(self, xennet_xenbus_suspend,
xennet_xenbus_resume))
aprint_error_dev(self, "couldn't establish power handler\n");
@@ -441,6 +488,12 @@
ether_ifdetach(ifp);
if_detach(ifp);
+ evcnt_detach(&sc->sc_cnt_tx_defrag);
+ evcnt_detach(&sc->sc_cnt_tx_frag);
+ evcnt_detach(&sc->sc_cnt_tx_drop);
+ evcnt_detach(&sc->sc_cnt_tx_queue_full);
+ evcnt_detach(&sc->sc_cnt_rx_frag);
+
/* Unhook the entropy source. */
rnd_detach_source(&sc->sc_rnd_source);
@@ -583,6 +636,12 @@
goto abort_transaction;
}
error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
+ "feature-sg", "%u", 1);
+ if (error) {
+ errmsg = "writing feature-sg";
+ goto abort_transaction;
+ }
+ error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
"event-channel", "%u", sc->sc_evtchn);
if (error) {
errmsg = "writing event channel";
@@ -690,6 +749,7 @@
{
RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
RING_IDX i;
+ netif_rx_request_t *rxreq;
struct xennet_rxreq *req;
int otherend_id, notify;
struct mbuf *m;
@@ -751,11 +811,9 @@
req->rxreq_m = m;
- RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
- req->rxreq_id;
-
- RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
- req->rxreq_gntref;
+ rxreq = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i);
+ rxreq->id = req->rxreq_id;
+ rxreq->gref = req->rxreq_gntref;
SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
sc->sc_free_rxreql--;
@@ -864,19 +922,25 @@
KASSERT(req->txreq_id ==
RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
KASSERT(xengnt_status(req->txreq_gntref) == 0);
- KASSERT(req->txreq_m != NULL);
+ xengnt_revoke_access(req->txreq_gntref);
+ req->txreq_gntref = GRANT_INVALID_REF;
- if (__predict_false(
- RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
- NETIF_RSP_OKAY))
- if_statinc(ifp, if_oerrors);
- else
- if_statinc(ifp, if_opackets);
- xengnt_revoke_access(req->txreq_gntref);
- bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, req->txreq_dmamap);
- m_freem(req->txreq_m);
- req->txreq_m = NULL;
+ /* Cleanup/statistics if this is the master req of a chain */
+ if (req->txreq_m) {
+ if (__predict_false(
+ RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
+ NETIF_RSP_OKAY))
+ if_statinc(ifp, if_oerrors);
+ else
+ if_statinc(ifp, if_opackets);
+ bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
+ req->txreq_dmamap);
+ m_freem(req->txreq_m);
+ req->txreq_m = NULL;
+ }
+
SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
+ sc->sc_free_txreql++;
}
sc->sc_tx_ring.rsp_cons = resp_prod;
@@ -901,7 +965,8 @@
struct ifnet *ifp = &sc->sc_ethercom.ec_if;
RING_IDX resp_prod, i;
struct xennet_rxreq *req;
- struct mbuf *m;
+ struct mbuf *m, *m0;
+ int rxflags, m0_rxflags;
int more_to_do;
if (sc->sc_backend_status != BEST_CONNECTED)
@@ -920,6 +985,7 @@
resp_prod = sc->sc_rx_ring.sring->rsp_prod;
xen_rmb(); /* ensure we see replies up to resp_prod */
+ m0 = NULL;
for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
req = &sc->sc_rxreqs[rx->id];
@@ -936,19 +1002,54 @@
bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->rxreq_dmamap, 0,
m->m_pkthdr.len, BUS_DMASYNC_PREREAD);
- MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
- m_set_rcvif(m, ifp);
+ if (m0 == NULL) {
+ MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
+ m_set_rcvif(m, ifp);
+ }
+
+ rxflags = rx->flags;
- if (rx->flags & NETRXF_csum_blank)
+ if (m0 || rxflags & NETRXF_more_data) {
+ /*
+ * On Rx, every fragment (even first one) contain
+ * just length of data in the fragment.
+ */
+ if (m0 == NULL) {
+ m0 = m;
Home |
Main Index |
Thread Index |
Old Index