Subject: Re: hw ip4csum-tx bug on ex(4)? (Re: RTL8169 hw IP4CSUM_Tx workaround)
To: None <tech-kern@NetBSD.org, tech-net@NetBSD.org>
From: Izumi Tsutsui <tsutsui@ceres.dti.ne.jp>
List: tech-kern
Date: 11/07/2006 23:13:43
I wrote:

> BTW, does anyone see the similar hardware ip4csum-tx problem
> with short IP packets on ex(4)?
 :
> "tcpdump -vn | grep 'bad cksum'"  on the destination host
> during "ping -f -s 1473 desthost" on ex(4) host complains:
 :
> 21:31:23.282784 IP (tos 0x0, ttl 255, id 8113, offset 1480, flags [none], \
> length: 21, bad cksum f2ff (->f1ff)!) 192.168.20.17 > 192.168.20.28: icmp
 :

The attached diff seems to fix this problem, but I wonder if
it's really worth to add such tweaks to just handle rare case bugs.
(I'm not sure if there is any protocol other than ICMP
 which could generate 21 or 22 byte IP fragment packets)

Comments?
---
Izumi Tsutsui


Index: elinkxl.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/elinkxl.c,v
retrieving revision 1.92
diff -u -r1.92 elinkxl.c
--- elinkxl.c	5 Nov 2006 07:59:21 -0000	1.92
+++ elinkxl.c	6 Nov 2006 15:09:13 -0000
@@ -279,7 +279,7 @@
 	 * map for them.
 	 */
 	if ((error = bus_dmamem_alloc(sc->sc_dmat,
-	    EX_NDPD * sizeof (struct ex_dpd), PAGE_SIZE, 0, &sc->sc_dseg, 1,
+	    DPDMEM_SIZE + EX_IP4CSUMTX_PADLEN, PAGE_SIZE, 0, &sc->sc_dseg, 1,
 	    &sc->sc_drseg, BUS_DMA_NOWAIT)) != 0) {
 		aprint_error(
 		    "%s: can't allocate download descriptors, error = %d\n",
@@ -290,19 +290,19 @@
 	attach_stage = 5;
 
 	if ((error = bus_dmamem_map(sc->sc_dmat, &sc->sc_dseg, sc->sc_drseg,
-	    EX_NDPD * sizeof (struct ex_dpd), (caddr_t *)&sc->sc_dpd,
+	    DPDMEM_SIZE + EX_IP4CSUMTX_PADLEN, (caddr_t *)&sc->sc_dpd,
 	    BUS_DMA_NOWAIT|BUS_DMA_COHERENT)) != 0) {
 		aprint_error("%s: can't map download descriptors, error = %d\n",
 		    sc->sc_dev.dv_xname, error);
 		goto fail;
 	}
-	memset(sc->sc_dpd, 0, EX_NDPD * sizeof (struct ex_dpd));
+	memset(sc->sc_dpd, 0, DPDMEM_SIZE + EX_IP4CSUMTX_PADLEN);
 
 	attach_stage = 6;
 
 	if ((error = bus_dmamap_create(sc->sc_dmat,
-	    EX_NDPD * sizeof (struct ex_dpd), 1,
-	    EX_NDPD * sizeof (struct ex_dpd), 0, BUS_DMA_NOWAIT,
+	    DPDMEM_SIZE + EX_IP4CSUMTX_PADLEN, 1,
+	    DPDMEM_SIZE + EX_IP4CSUMTX_PADLEN, 0, BUS_DMA_NOWAIT,
 	    &sc->sc_dpd_dmamap)) != 0) {
 		aprint_error(
 		    "%s: can't create download desc. DMA map, error = %d\n",
@@ -313,13 +313,15 @@
 	attach_stage = 7;
 
 	if ((error = bus_dmamap_load(sc->sc_dmat, sc->sc_dpd_dmamap,
-	    sc->sc_dpd, EX_NDPD * sizeof (struct ex_dpd), NULL,
+	    sc->sc_dpd, DPDMEM_SIZE + EX_IP4CSUMTX_PADLEN, NULL,
 	    BUS_DMA_NOWAIT)) != 0) {
 		aprint_error(
 		    "%s: can't load download desc. DMA map, error = %d\n",
 		    sc->sc_dev.dv_xname, error);
 		goto fail;
 	}
+	bus_dmamap_sync(sc->sc_dmat, sc->sc_dpd_dmamap,
+	    DPDMEMPAD_OFF, EX_IP4CSUMTX_PADLEN, BUS_DMASYNC_PREWRITE);
 
 	attach_stage = 8;
 
@@ -1053,7 +1055,7 @@
 	struct ex_txdesc *txp;
 	struct mbuf *mb_head;
 	bus_dmamap_t dmamap;
-	int m_csumflags, offset, totlen, segment, error;
+	int m_csumflags, offset, seglen, totlen, segment, error;
 	u_int32_t csum_flags;
 
 	if (sc->tx_head || sc->tx_free == NULL)
@@ -1153,11 +1155,28 @@
 		totlen = 0;
 		for (segment = 0; segment < dmamap->dm_nsegs; segment++, fr++) {
 			fr->fr_addr = htole32(dmamap->dm_segs[segment].ds_addr);
-			fr->fr_len = htole32(dmamap->dm_segs[segment].ds_len);
-			totlen += dmamap->dm_segs[segment].ds_len;
+			seglen = dmamap->dm_segs[segment].ds_len;
+			fr->fr_len = htole32(seglen);
+			totlen += seglen;
+		}
+		if ((m_csumflags & M_CSUM_IPv4) != 0 &&
+		    totlen <= EX_IP4CSUMTX_PADLEN) {
+			/*
+			 * Pad short packets to avoid ip4csum-tx bug.
+			 *
+			 * XXX Should we still consider if such short
+			 *     (36 bytes or less) packets might already
+			 *     occupy EX_NTFRAG (== 32) fragements here?
+			 */
+			KASSERT(segment <= EX_NTFRAGS);
+			fr->fr_addr = htole32(DPDMEMPAD_DMADDR(sc));
+			seglen = EX_IP4CSUMTX_PADLEN + 1 - totlen;
+			fr->fr_len = htole32(EX_FR_LAST | seglen);
+			totlen += seglen;
+		} else {
+			fr--;
+			fr->fr_len |= htole32(EX_FR_LAST);
 		}
-		fr--;
-		fr->fr_len |= htole32(EX_FR_LAST);
 		txp->tx_mbhead = mb_head;
 
 		bus_dmamap_sync(sc->sc_dmat, dmamap, 0, dmamap->dm_mapsize,
Index: elinkxlreg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/elinkxlreg.h,v
retrieving revision 1.11
diff -u -r1.11 elinkxlreg.h
--- elinkxlreg.h	9 Nov 2002 11:45:19 -0000	1.11
+++ elinkxlreg.h	6 Nov 2006 15:09:13 -0000
@@ -268,6 +268,19 @@
 	struct ex_dpd *tx_dpd;
 };
 
+/*
+ * hardware ip4csum-tx on ex(4) sometimes seems to set wrong IP checksums
+ * if the TX IP packet length is 21 or 22 bytes which requires autopadding.
+ * To avoid this bug, we have to pad such very short packets manually.
+ */
+#define EX_IP4CSUMTX_MINLEN	22
+#define EX_IP4CSUMTX_PADLEN	\
+	(sizeof(struct ether_header) + EX_IP4CSUMTX_MINLEN)
+
+#define DPDMEM_SIZE		(sizeof(struct ex_dpd) * EX_NDPD)
+#define DPDMEMPAD_OFF		DPDMEM_SIZE
+#define DPDMEMPAD_DMADDR(sc)	((sc)->sc_dpddma + DPDMEMPAD_OFF)
+
 #define DPD_DMADDR(s,t) \
 	((s)->sc_dpddma + ((caddr_t)((t)->tx_dpd) - (caddr_t)((s)->sc_dpd)))