Subject: Re: nfe(4) hardware checksum support
To: None <tech-kern@NetBSD.org>
From: Izumi Tsutsui <tsutsui@ceres.dti.ne.jp>
List: tech-kern
Date: 01/07/2007 01:24:51
> > Could anyone test the attached patch which enables
> > hardware checksumming on nfe(4), NVIDIA nForce
> > integrated Ethernet?

After some debugging, I notice that bit definitions of
NFE_RX_UDP_CSUMOK and NFE_RX_TCP_CSUMOK in if_nfereg.h are
swapped (FreeBSD doesn't seem to distinguish them in RX csum_flags).
Updated patch is attached.

> Patch applied to my nForce 250 10/100, and looking for ways to test the
> feature.

As Thomas E. Spanjaard said, ifconfig(8) shows/controls
hardware checksum features on each interface.

I'd like to see:
- there is no negative side effect
- any performance improvements on benchmarks like ttcp
  (though I'm afraid we can't see it on 100baseTX with fast CPU)
- 'vmstat -e' output on kernel with "options TCP_CSUM_COUNTERS"
  and "options UDP_CSUM_COUNTERS"
- packets which have bad checksum data are handled correctly
  (I'm not sure how we can test it without special debugging programs)
etc.
---
Izumi Tsutsui

Index: if_nfe.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_nfe.c,v
retrieving revision 1.11
diff -u -r1.11 if_nfe.c
--- if_nfe.c	1 Jan 2007 04:13:25 -0000	1.11
+++ if_nfe.c	6 Jan 2007 16:18:23 -0000
@@ -319,12 +319,12 @@
 		sc->sc_ethercom.ec_capabilities |=
 			ETHERCAP_VLAN_HWTAGGING | ETHERCAP_VLAN_MTU;
 #endif
-#ifdef NFE_CSUM
 	if (sc->sc_flags & NFE_HW_CSUM) {
-		ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
-		    IFCAP_CSUM_UDPv4;
+		ifp->if_capabilities |=
+		    IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
+		    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
+		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx;
 	}
-#endif
 
 	sc->sc_mii.mii_ifp = ifp;
 	sc->sc_mii.mii_readreg = nfe_miibus_readreg;
@@ -801,19 +801,31 @@
 		m->m_pkthdr.len = m->m_len = len;
 		m->m_pkthdr.rcvif = ifp;
 
-#ifdef notyet
-		if (sc->sc_flags & NFE_HW_CSUM) {
-			if (flags & NFE_RX_IP_CSUMOK)
-				m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
-			if (flags & NFE_RX_UDP_CSUMOK)
-				m->m_pkthdr.csum_flags |= M_UDP_CSUM_IN_OK;
-			if (flags & NFE_RX_TCP_CSUMOK)
-				m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
-		}
-#elif defined(NFE_CSUM)
-		if ((sc->sc_flags & NFE_HW_CSUM) && (flags & NFE_RX_CSUMOK))
-			m->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
-#endif
+		if ((sc->sc_flags & NFE_HW_CSUM) != 0) {
+			/*
+			 * XXX
+			 * no way to check M_CSUM_IPv4_BAD or non-IPv4 packets?
+			 */
+			if (flags & NFE_RX_IP_CSUMOK) {
+				m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
+				DPRINTFN(3, ("%s: ip4csum-rx ok\n",
+				    sc->sc_dev.dv_xname));
+			}
+			/*
+			 * XXX
+			 * no way to check M_CSUM_TCP_UDP_BAD or
+			 * other protocols?
+			 */
+			if (flags & NFE_RX_UDP_CSUMOK) {
+				m->m_pkthdr.csum_flags |= M_CSUM_UDPv4;
+				DPRINTFN(3, ("%s: udp4csum-rx ok\n",
+				    sc->sc_dev.dv_xname));
+			} else if (flags & NFE_RX_TCP_CSUMOK) {
+				m->m_pkthdr.csum_flags |= M_CSUM_TCPv4;
+				DPRINTFN(3, ("%s: tcp4csum-rx ok\n",
+				    sc->sc_dev.dv_xname));
+			}
+		}
 
 #if NBPFILTER > 0
 		if (ifp->if_bpf)
@@ -929,7 +941,7 @@
 	struct nfe_desc64 *desc64;
 	struct nfe_tx_data *data;
 	bus_dmamap_t map;
-	uint16_t flags;
+	uint16_t flags, csumflags;
 #if NVLAN > 0
 	struct m_tag *mtag;
 	uint32_t vtag = 0;
@@ -941,6 +953,7 @@
 	data = NULL;
 
 	flags = 0;
+	csumflags = 0;
 	first = sc->txq.cur;
 
 	map = sc->txq.data[first].map;
@@ -962,12 +975,12 @@
 	if ((mtag = VLAN_OUTPUT_TAG(&sc->sc_ethercom, m0)) != NULL)
 		vtag = NFE_TX_VTAG | VLAN_TAG_VALUE(mtag);
 #endif
-#ifdef NFE_CSUM
-	if (m0->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
-		flags |= NFE_TX_IP_CSUM;
-	if (m0->m_pkthdr.csum_flags & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT))
-		flags |= NFE_TX_TCP_CSUM;
-#endif
+	if ((sc->sc_flags & NFE_HW_CSUM) != 0) {
+		if (m0->m_pkthdr.csum_flags & M_CSUM_IPv4)
+			csumflags |= NFE_TX_IP_CSUM;
+		if (m0->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4))
+			csumflags |= NFE_TX_TCP_CSUM;
+	}
 
 	for (i = 0; i < map->dm_nsegs; i++) {
 		data = &sc->txq.data[sc->txq.cur];
@@ -982,9 +995,7 @@
 			    htole32(map->dm_segs[i].ds_addr & 0xffffffff);
 			desc64->length = htole16(map->dm_segs[i].ds_len - 1);
 			desc64->flags = htole16(flags);
-#if NVLAN > 0
-			desc64->vtag = htole32(vtag);
-#endif
+			desc64->vtag = 0;
 		} else {
 			desc32 = &sc->txq.desc32[sc->txq.cur];
 
@@ -993,21 +1004,11 @@
 			desc32->flags = htole16(flags);
 		}
 
-		if (map->dm_nsegs > 1) {
-			/*
-			 * Checksum flags and vtag belong to the first fragment
-			 * only.
-			 */
-			flags &= ~(NFE_TX_IP_CSUM | NFE_TX_TCP_CSUM);
-#if NVLAN > 0
-			vtag = 0;
-#endif
-			/*
-			 * Setting of the valid bit in the first descriptor is
-			 * deferred until the whole chain is fully setup.
-			 */
-			flags |= NFE_TX_VALID;
-		}
+		/*
+		 * Setting of the valid bit in the first descriptor is
+		 * deferred until the whole chain is fully setup.
+		 */
+		flags |= NFE_TX_VALID;
 
 		sc->txq.queued++;
 		sc->txq.cur = (sc->txq.cur + 1) % NFE_TX_RING_COUNT;
@@ -1019,6 +1020,12 @@
 		flags |= NFE_TX_LASTFRAG_V2;
 		desc64->flags = htole16(flags);
 
+		/* Checksum flags and vtag belong to the first fragment only. */
+#if NVLAN > 0
+		sc->txq.desc64[first].vtag = htole32(vtag);
+#endif
+		sc->txq.desc64[first].flags |= htole16(csumflags);
+
 		/* finally, set the valid bit in the first descriptor */
 		sc->txq.desc64[first].flags |= htole16(NFE_TX_VALID);
 	} else {
@@ -1029,6 +1036,9 @@
 			flags |= NFE_TX_LASTFRAG_V1;
 		desc32->flags = htole16(flags);
 
+		/* Checksum flags belong to the first fragment only. */
+		sc->txq.desc32[first].flags |= htole16(csumflags);
+
 		/* finally, set the valid bit in the first descriptor */
 		sc->txq.desc32[first].flags |= htole16(NFE_TX_VALID);
 	}
@@ -1116,10 +1126,8 @@
 		sc->rxtxctl |= NFE_RXTX_V3MAGIC;
 	else if (sc->sc_flags & NFE_JUMBO_SUP)
 		sc->rxtxctl |= NFE_RXTX_V2MAGIC;
-#ifdef NFE_CSUM
 	if (sc->sc_flags & NFE_HW_CSUM)
 		sc->rxtxctl |= NFE_RXTX_RXCSUM;
-#endif
 #if NVLAN > 0
 	/*
 	 * Although the adapter is capable of stripping VLAN tags from received
Index: if_nfereg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_nfereg.h,v
retrieving revision 1.1
diff -u -r1.1 if_nfereg.h
--- if_nfereg.h	12 Mar 2006 22:40:42 -0000	1.1
+++ if_nfereg.h	6 Jan 2007 16:18:23 -0000
@@ -163,16 +163,18 @@
 	uint16_t	flags;
 #define NFE_RX_FIXME_V2		0x4300
 #define NFE_RX_VALID_V2		(1 << 13)
+#define NFE_RX_IP_CSUMOK	(1 << 12)
+#define NFE_RX_UDP_CSUMOK	(1 << 11)
+#define NFE_RX_TCP_CSUMOK	(1 << 10)
 #define NFE_TX_ERROR_V2		0x5c04
 #define NFE_TX_LASTFRAG_V2	(1 << 13)
+#define NFE_TX_IP_CSUM		(1 << 11)
+#define NFE_TX_TCP_CSUM		(1 << 10)
 } __packed;
 
 /* flags common to V1/V2 descriptors */
-#define NFE_RX_CSUMOK		0x1c00
 #define NFE_RX_ERROR		(1 << 14)
 #define NFE_RX_READY		(1 << 15)
-#define NFE_TX_TCP_CSUM		(1 << 10)
-#define NFE_TX_IP_CSUM		(1 << 11)
 #define NFE_TX_VALID		(1 << 15)
 
 #define NFE_READ(sc, reg) \