Subject: sk(4) jumbo frames support - patch
To: None <tech-net@netbsd.org>
From: Jeff Rizzo <riz@tastylime.net>
List: tech-net
Date: 11/27/2005 08:58:19
--nVMJ2NtxeReIH9PS
Content-Type: multipart/mixed; boundary="SUOF0GtieIMvvwua"
Content-Disposition: inline


--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

In my quest to bring our sk(4) driver up to snuff, here's another patch
which enables the use of jumbo frames - I'd appreciate feedback.

Please note that this patch will _apply_ without my previous patch,
but it won't actually compile.  :)  (Please apply the most recent
sysctl int_mod patch before testing)

This was primarily taken from OpenBSD, which appears to have gotten
it largely from the bge(4) driver.

With this patch applied, I was able to use 9000 byte frames with a DLink
DGE-530T board, and combined with lowering the interrupt moderation timer,
I was able to improve tcp throughput of a simple test by about 15% over
1500-byte frames.

+j

--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="sk-jumbo.diff"
Content-Transfer-Encoding: quoted-printable

--- if_skvar.h.orig	2005-11-27 08:37:44.000000000 -0800
+++ if_skvar.h	2005-11-27 08:39:18.000000000 -0800
@@ -87,11 +87,6 @@
 #ifndef _DEV_PCI_IF_SKVAR_H_
 #define _DEV_PCI_IF_SKVAR_H_
=20
-struct sk_jslot {
-	caddr_t			sk_buf;
-	int			sk_inuse;
-};
-
 struct sk_jpool_entry {
 	int                             slot;
 	LIST_ENTRY(sk_jpool_entry)	jpool_entries;
@@ -122,6 +117,7 @@
 	struct sk_chain		sk_rx_chain[SK_RX_RING_CNT];
 	struct sk_txmap_entry	*sk_tx_map[SK_TX_RING_CNT];
 	bus_dmamap_t		sk_rx_map[SK_RX_RING_CNT];
+	bus_dmamap_t		sk_rx_jumbo_map;
 	int			sk_tx_prod;
 	int			sk_tx_cons;
 	int			sk_tx_cnt;
@@ -129,7 +125,7 @@
 	int			sk_rx_cons;
 	int			sk_rx_cnt;
 	/* Stick the jumbo mem management stuff here too. */
-	struct sk_jslot		sk_jslots[SK_JSLOTS];
+	caddr_t			sk_jslots[SK_JSLOTS];
 	void			*sk_jumbo_buf;
=20
 };
--- if_sk.c.orig	2005-11-27 08:37:44.000000000 -0800
+++ if_sk.c	2005-11-27 08:43:39.000000000 -0800
@@ -191,6 +191,10 @@
 void sk_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 void sk_reset(struct sk_softc *);
 int sk_newbuf(struct sk_if_softc *, int, struct mbuf *, bus_dmamap_t);
+int sk_alloc_jumbo_mem(struct sk_if_softc *);
+void sk_free_jumbo_mem(struct sk_if_softc *);
+void *sk_jalloc(struct sk_if_softc *);
+void sk_jfree(struct mbuf *, caddr_t, size_t, void *);
 int sk_init_rx_ring(struct sk_if_softc *);
 int sk_init_tx_ring(struct sk_if_softc *);
 u_int8_t sk_vpd_readbyte(struct sk_softc *, int);
@@ -726,7 +730,8 @@
 	}
=20
 	for (i =3D 0; i < SK_RX_RING_CNT; i++) {
-		if (sk_newbuf(sc_if, i, NULL, NULL) =3D=3D ENOBUFS) {
+		if (sk_newbuf(sc_if, i, NULL,=20
+		    sc_if->sk_cdata.sk_rx_jumbo_map) =3D=3D ENOBUFS) {
 			printf("%s: failed alloc of %dth mbuf\n",
 			    sc_if->sk_dev.dv_xname, i);
 			return(ENOBUFS);
@@ -773,26 +778,13 @@
 sk_newbuf(struct sk_if_softc *sc_if, int i, struct mbuf *m,
 	  bus_dmamap_t dmamap)
 {
-	struct sk_softc		*sc =3D sc_if->sk_softc;
 	struct mbuf		*m_new =3D NULL;
 	struct sk_chain		*c;
 	struct sk_rx_desc	*r;
=20
-	if (dmamap =3D=3D NULL) {
-		/* if (m) panic() */
-
-		if (bus_dmamap_create(sc->sc_dmatag, MCLBYTES, 1, MCLBYTES,
-				      0, BUS_DMA_NOWAIT, &dmamap)) {
-			printf("%s: can't create recv map\n",
-			       sc_if->sk_dev.dv_xname);
-			return(ENOMEM);
-		}
-	} else if (m =3D=3D NULL)
-		bus_dmamap_unload(sc->sc_dmatag, dmamap);
-
-	sc_if->sk_cdata.sk_rx_map[i] =3D dmamap;
-
 	if (m =3D=3D NULL) {
+		caddr_t buf =3D NULL;
+
 		MGETHDR(m_new, M_DONTWAIT, MT_DATA);
 		if (m_new =3D=3D NULL) {
 			printf("%s: no memory for rx list -- "
@@ -801,19 +793,18 @@
 		}
=20
 		/* Allocate the jumbo buffer */
-		MCLGET(m_new, M_DONTWAIT);
-		if (!(m_new->m_flags & M_EXT)) {
+		buf =3D sk_jalloc(sc_if);
+		if (buf =3D=3D NULL) {
 			m_freem(m_new);
-			return (ENOBUFS);
+			DPRINTFN(1, ("%s jumbo allocation failed -- packet "
+			    "dropped!\n", sc_if->sk_ethercom.ec_if.if_xname));
+			return(ENOBUFS);
 		}
=20
-		m_new->m_len =3D m_new->m_pkthdr.len =3D MCLBYTES;
-
-		m_adj(m_new, ETHER_ALIGN);
+		/* Attach the buffer to the mbuf */
+		m_new->m_len =3D m_new->m_pkthdr.len =3D SK_JLEN;
+		MEXTADD(m_new, buf, SK_JLEN, 0, sk_jfree, sc_if);
=20
-		if (bus_dmamap_load_mbuf(sc->sc_dmatag, dmamap, m_new,
-					 BUS_DMA_NOWAIT))
-			return(ENOBUFS);
 	} else {
 		/*
 	 	 * We're re-using a previously allocated mbuf;
@@ -821,16 +812,18 @@
 		 * default values.
 		 */
 		m_new =3D m;
-		m_new->m_len =3D m_new->m_pkthdr.len =3D MCLBYTES;
-		m_adj(m_new, ETHER_ALIGN);
+		m_new->m_len =3D m_new->m_pkthdr.len =3D SK_JLEN;
 		m_new->m_data =3D m_new->m_ext.ext_buf;
 	}
+	m_adj(m_new, ETHER_ALIGN);
=20
 	c =3D &sc_if->sk_cdata.sk_rx_chain[i];
 	r =3D c->sk_desc;
 	c->sk_mbuf =3D m_new;
-	r->sk_data_lo =3D dmamap->dm_segs[0].ds_addr;
-	r->sk_ctl =3D dmamap->dm_segs[0].ds_len | SK_RXSTAT;
+	r->sk_data_lo =3D dmamap->dm_segs[0].ds_addr +
+	    (((vaddr_t)m_new->m_data
+		- (vaddr_t)sc_if->sk_cdata.sk_jumbo_buf));
+	r->sk_ctl =3D SK_JLEN | SK_RXSTAT;
=20
 	SK_CDRXSYNC(sc_if, i, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
=20
@@ -838,6 +831,161 @@
 }
=20
 /*
+ * Memory management for jumbo frames.
+ */
+
+int
+sk_alloc_jumbo_mem(struct sk_if_softc *sc_if)
+{
+	struct sk_softc		*sc =3D sc_if->sk_softc;
+	caddr_t			ptr, kva;
+	bus_dma_segment_t	seg;
+	int		i, rseg, state, error;
+	struct sk_jpool_entry   *entry;
+
+	state =3D error =3D 0;
+
+	/* Grab a big chunk o' storage. */
+	if (bus_dmamem_alloc(sc->sc_dmatag, SK_JMEM, PAGE_SIZE, 0,
+			     &seg, 1, &rseg, BUS_DMA_NOWAIT)) {
+		printf("%s: can't alloc rx buffers\n", sc->sk_dev.dv_xname);
+		return (ENOBUFS);
+	}
+
+	state =3D 1;
+	if (bus_dmamem_map(sc->sc_dmatag, &seg, rseg, SK_JMEM, &kva,
+			   BUS_DMA_NOWAIT)) {
+		printf("%s: can't map dma buffers (%d bytes)\n",
+		    sc->sk_dev.dv_xname, SK_JMEM);
+		error =3D ENOBUFS;
+		goto out;
+	}
+
+	state =3D 2;
+	if (bus_dmamap_create(sc->sc_dmatag, SK_JMEM, 1, SK_JMEM, 0,
+	    BUS_DMA_NOWAIT, &sc_if->sk_cdata.sk_rx_jumbo_map)) {
+		printf("%s: can't create dma map\n", sc->sk_dev.dv_xname);
+		error =3D ENOBUFS;
+		goto out;
+	}
+
+	state =3D 3;
+	if (bus_dmamap_load(sc->sc_dmatag, sc_if->sk_cdata.sk_rx_jumbo_map,
+			    kva, SK_JMEM, NULL, BUS_DMA_NOWAIT)) {
+		printf("%s: can't load dma map\n", sc->sk_dev.dv_xname);
+		error =3D ENOBUFS;
+		goto out;
+	}
+
+	state =3D 4;
+	sc_if->sk_cdata.sk_jumbo_buf =3D (caddr_t)kva;
+	DPRINTFN(1,("sk_jumbo_buf =3D 0x%08X\n", sc_if->sk_cdata.sk_jumbo_buf));
+
+	LIST_INIT(&sc_if->sk_jfree_listhead);
+	LIST_INIT(&sc_if->sk_jinuse_listhead);
+
+	/*
+	 * Now divide it up into 9K pieces and save the addresses
+	 * in an array.
+	 */
+	ptr =3D sc_if->sk_cdata.sk_jumbo_buf;
+	for (i =3D 0; i < SK_JSLOTS; i++) {
+		sc_if->sk_cdata.sk_jslots[i] =3D ptr;
+		ptr +=3D SK_JLEN;
+		entry =3D malloc(sizeof(struct sk_jpool_entry),
+		    M_DEVBUF, M_NOWAIT);
+		if (entry =3D=3D NULL) {
+			printf("%s: no memory for jumbo buffer queue!\n",
+			    sc->sk_dev.dv_xname);
+			error =3D ENOBUFS;
+			goto out;
+		}
+		entry->slot =3D i;
+		if (i)
+		LIST_INSERT_HEAD(&sc_if->sk_jfree_listhead,
+				 entry, jpool_entries);
+		else
+		LIST_INSERT_HEAD(&sc_if->sk_jinuse_listhead,
+				 entry, jpool_entries);
+	}
+out:
+	if (error !=3D 0) {
+		switch (state) {
+		case 4:
+			bus_dmamap_unload(sc->sc_dmatag,
+			    sc_if->sk_cdata.sk_rx_jumbo_map);
+		case 3:
+			bus_dmamap_destroy(sc->sc_dmatag,
+			    sc_if->sk_cdata.sk_rx_jumbo_map);
+		case 2:
+			bus_dmamem_unmap(sc->sc_dmatag, kva, SK_JMEM);
+		case 1:
+			bus_dmamem_free(sc->sc_dmatag, &seg, rseg);
+			break;
+		default:
+			break;
+		}
+	}
+
+	return (error);
+}
+
+/*
+ * Allocate a jumbo buffer.
+ */
+void *
+sk_jalloc(struct sk_if_softc *sc_if)
+{
+	struct sk_jpool_entry   *entry;
+
+	entry =3D LIST_FIRST(&sc_if->sk_jfree_listhead);
+
+	if (entry =3D=3D NULL)
+		return (NULL);
+
+	LIST_REMOVE(entry, jpool_entries);
+	LIST_INSERT_HEAD(&sc_if->sk_jinuse_listhead, entry, jpool_entries);
+	return (sc_if->sk_cdata.sk_jslots[entry->slot]);
+}
+
+/*
+ * Release a jumbo buffer.
+ */
+void
+sk_jfree(struct mbuf *m, caddr_t buf, size_t size, void	*arg)
+{
+	struct sk_jpool_entry *entry;
+	struct sk_if_softc *sc;
+	int i, s;
+
+	/* Extract the softc struct pointer. */
+	sc =3D (struct sk_if_softc *)arg;
+
+	if (sc =3D=3D NULL)
+		panic("sk_jfree: can't find softc pointer!");
+
+	/* calculate the slot this buffer belongs to */
+
+	i =3D ((vaddr_t)buf
+	     - (vaddr_t)sc->sk_cdata.sk_jumbo_buf) / SK_JLEN;
+
+	if ((i < 0) || (i >=3D SK_JSLOTS))
+		panic("sk_jfree: asked to free buffer that we don't manage!");
+
+	s =3D splvm();
+	entry =3D LIST_FIRST(&sc->sk_jinuse_listhead);
+	if (entry =3D=3D NULL)
+		panic("sk_jfree: buffer not in use!");
+	entry->slot =3D i;
+	LIST_REMOVE(entry, jpool_entries);
+	LIST_INSERT_HEAD(&sc->sk_jfree_listhead, entry, jpool_entries);
+
+	if (__predict_true(m !=3D NULL))
+		pool_cache_put(&mbpool_cache, m);
+	splx(s);
+}
+
+/*
  * Set media options.
  */
 int
@@ -1120,7 +1268,7 @@
 	 * amount of SRAM on it, somewhere between 512K and 2MB. We
 	 * need to divide this up a) between the transmitter and
  	 * receiver and b) between the two XMACs, if this is a
-	 * dual port NIC. Our algotithm is to divide up the memory
+	 * dual port NIC. Our algorithm is to divide up the memory
 	 * evenly so that everyone gets a fair share.
 	 */
 	if (sk_win_read_1(sc, SK_CONFIG) & SK_CONFIG_SINGLEMAC) {
@@ -1214,8 +1362,8 @@
 	for (i =3D 0; i < SK_TX_RING_CNT; i++) {
 		sc_if->sk_cdata.sk_tx_chain[i].sk_mbuf =3D NULL;
=20
-		if (bus_dmamap_create(sc->sc_dmatag, MCLBYTES, SK_NTXSEG,
-		    MCLBYTES, 0, BUS_DMA_NOWAIT, &dmamap)) {
+		if (bus_dmamap_create(sc->sc_dmatag, SK_JLEN, SK_NTXSEG,
+		    SK_JLEN, 0, BUS_DMA_NOWAIT, &dmamap)) {
 			aprint_error("%s: Can't create TX dmamap\n",
 				sc_if->sk_dev.dv_xname);
 			bus_dmamap_unload(sc->sc_dmatag, sc_if->sk_ring_map);
@@ -1245,12 +1393,15 @@
         sc_if->sk_rdata =3D (struct sk_ring_data *)kva;
 	bzero(sc_if->sk_rdata, sizeof(struct sk_ring_data));
=20
-	/* XXX TLS It's not clear what's wrong with the Jumbo MTU
-	   XXX TLS support in this driver, so we don't enable it. */
-
-	sc_if->sk_ethercom.ec_capabilities =3D ETHERCAP_VLAN_MTU;
-
 	ifp =3D &sc_if->sk_ethercom.ec_if;
+	/* Try to allocate memory for jumbo buffers. */
+	if (sk_alloc_jumbo_mem(sc_if)) {
+		printf("%s: jumbo buffer allocation failed\n", ifp->if_xname);
+		goto fail;
+	}
+	sc_if->sk_ethercom.ec_capabilities =3D ETHERCAP_VLAN_MTU=20
+		| ETHERCAP_JUMBO_MTU;
+
 	ifp->if_softc =3D sc_if;
 	ifp->if_flags =3D IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl =3D sk_ioctl;
@@ -1889,7 +2040,7 @@
=20
 		cur_rx =3D &sc_if->sk_cdata.sk_rx_chain[cur];
 		cur_desc =3D &sc_if->sk_rdata->sk_rx_ring[cur];
-		dmamap =3D sc_if->sk_cdata.sk_rx_map[cur];
+		dmamap =3D sc_if->sk_cdata.sk_rx_jumbo_map;
=20
 		bus_dmamap_sync(sc_if->sk_softc->sc_dmatag, dmamap, 0,
 		    dmamap->dm_mapsize, BUS_DMASYNC_POSTREAD);
@@ -2482,7 +2633,8 @@
 	/* serial mode register */
 	DPRINTFN(6, ("sk_init_yukon: 9\n"));
 	SK_YU_WRITE_2(sc_if, YUKON_SMR, YU_SMR_DATA_BLIND(0x1c) |
-		      YU_SMR_MFL_VLAN | YU_SMR_IPG_DATA(0x1e));
+		      YU_SMR_MFL_VLAN | YU_SMR_MFL_JUMBO |
+		      YU_SMR_IPG_DATA(0x1e));
=20
 	DPRINTFN(6, ("sk_init_yukon: 10\n"));
 	/* Setup Yukon's address */

--SUOF0GtieIMvvwua--

--nVMJ2NtxeReIH9PS
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.1 (NetBSD)

iQCVAwUBQ4nlq7OuUtxCgar5AQLwnAQAmB64oLCx0Dyi/rzHuBCboX2PJupSNGLS
ltmo1OISHdno6QzPlKY0GtSPmCphqqqVkleRc7A1SBoQ1RsQ0Ad+2arYAGCvB3Xl
r5f017pfqQ6yrSk9LzTatjlPuQZOW5vt8joGRXePjuNDyeA96CwMQ4cUiVVr8LgA
HyeYe3lnD3U=
=y9XM
-----END PGP SIGNATURE-----

--nVMJ2NtxeReIH9PS--