Subject: nfe(4) hardware checksum support
To: None <tech-net@NetBSD.org>
From: Izumi Tsutsui <tsutsui@ceres.dti.ne.jp>
List: tech-kern
Date: 01/01/2007 15:45:15
A happy new year,

Could anyone test the attached patch which enables
hardware checksumming on nfe(4), NVIDIA nForce
integrated Ethernet?

Values in descriptor flags are teken from FreeBSD's driver
and it seems working on my nForce3 250 100/10M Ethernet,
but I'm not sure how I can confirm RX part working properly
and it's still better to test it on more other nForce chipsets
including gigabit variants.
---
Izumi Tsutsui


Index: if_nfe.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_nfe.c,v
retrieving revision 1.11
diff -u -r1.11 if_nfe.c
--- if_nfe.c	1 Jan 2007 04:13:25 -0000	1.11
+++ if_nfe.c	1 Jan 2007 06:15:40 -0000
@@ -319,12 +319,12 @@
 		sc->sc_ethercom.ec_capabilities |=
 			ETHERCAP_VLAN_HWTAGGING | ETHERCAP_VLAN_MTU;
 #endif
-#ifdef NFE_CSUM
 	if (sc->sc_flags & NFE_HW_CSUM) {
-		ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
-		    IFCAP_CSUM_UDPv4;
+		ifp->if_capabilities |=
+		    IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
+		    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
+		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx;
 	}
-#endif
 
 	sc->sc_mii.mii_ifp = ifp;
 	sc->sc_mii.mii_readreg = nfe_miibus_readreg;
@@ -801,19 +801,23 @@
 		m->m_pkthdr.len = m->m_len = len;
 		m->m_pkthdr.rcvif = ifp;
 
-#ifdef notyet
-		if (sc->sc_flags & NFE_HW_CSUM) {
+		if ((sc->sc_flags & NFE_HW_CSUM) != 0) {
+			/*
+			 * XXX
+			 * no way to check M_CSUM_IPv4_BAD or non-IPv4 packets?
+			 */
 			if (flags & NFE_RX_IP_CSUMOK)
-				m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
+				m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
+			/*
+			 * XXX
+			 * no way to check M_CSUM_TCP_UDP_BAD or
+			 * other protocols?
+			 */
 			if (flags & NFE_RX_UDP_CSUMOK)
-				m->m_pkthdr.csum_flags |= M_UDP_CSUM_IN_OK;
-			if (flags & NFE_RX_TCP_CSUMOK)
-				m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
-		}
-#elif defined(NFE_CSUM)
-		if ((sc->sc_flags & NFE_HW_CSUM) && (flags & NFE_RX_CSUMOK))
-			m->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
-#endif
+				m->m_pkthdr.csum_flags |= M_CSUM_UDPv4;
+			else if (flags & NFE_RX_TCP_CSUMOK)
+				m->m_pkthdr.csum_flags |= M_CSUM_TCPv4;
+		}
 
 #if NBPFILTER > 0
 		if (ifp->if_bpf)
@@ -929,7 +933,7 @@
 	struct nfe_desc64 *desc64;
 	struct nfe_tx_data *data;
 	bus_dmamap_t map;
-	uint16_t flags;
+	uint16_t flags, csumflags;
 #if NVLAN > 0
 	struct m_tag *mtag;
 	uint32_t vtag = 0;
@@ -941,6 +945,7 @@
 	data = NULL;
 
 	flags = 0;
+	csumflags = 0;
 	first = sc->txq.cur;
 
 	map = sc->txq.data[first].map;
@@ -962,12 +967,12 @@
 	if ((mtag = VLAN_OUTPUT_TAG(&sc->sc_ethercom, m0)) != NULL)
 		vtag = NFE_TX_VTAG | VLAN_TAG_VALUE(mtag);
 #endif
-#ifdef NFE_CSUM
-	if (m0->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
-		flags |= NFE_TX_IP_CSUM;
-	if (m0->m_pkthdr.csum_flags & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT))
-		flags |= NFE_TX_TCP_CSUM;
-#endif
+	if ((sc->sc_flags & NFE_HW_CSUM) != 0) {
+		if (m0->m_pkthdr.csum_flags & M_CSUM_IPv4)
+			csumflags |= NFE_TX_IP_CSUM;
+		if (m0->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4))
+			csumflags |= NFE_TX_TCP_CSUM;
+	}
 
 	for (i = 0; i < map->dm_nsegs; i++) {
 		data = &sc->txq.data[sc->txq.cur];
@@ -982,9 +987,7 @@
 			    htole32(map->dm_segs[i].ds_addr & 0xffffffff);
 			desc64->length = htole16(map->dm_segs[i].ds_len - 1);
 			desc64->flags = htole16(flags);
-#if NVLAN > 0
-			desc64->vtag = htole32(vtag);
-#endif
+			desc64->vtag = 0;
 		} else {
 			desc32 = &sc->txq.desc32[sc->txq.cur];
 
@@ -993,21 +996,11 @@
 			desc32->flags = htole16(flags);
 		}
 
-		if (map->dm_nsegs > 1) {
-			/*
-			 * Checksum flags and vtag belong to the first fragment
-			 * only.
-			 */
-			flags &= ~(NFE_TX_IP_CSUM | NFE_TX_TCP_CSUM);
-#if NVLAN > 0
-			vtag = 0;
-#endif
-			/*
-			 * Setting of the valid bit in the first descriptor is
-			 * deferred until the whole chain is fully setup.
-			 */
-			flags |= NFE_TX_VALID;
-		}
+		/*
+		 * Setting of the valid bit in the first descriptor is
+		 * deferred until the whole chain is fully setup.
+		 */
+		flags |= NFE_TX_VALID;
 
 		sc->txq.queued++;
 		sc->txq.cur = (sc->txq.cur + 1) % NFE_TX_RING_COUNT;
@@ -1019,6 +1012,12 @@
 		flags |= NFE_TX_LASTFRAG_V2;
 		desc64->flags = htole16(flags);
 
+		/* Checksum flags and vtag belong to the first fragment only. */
+#if NVLAN > 0
+		sc->txq.desc64[first].vtag = htole32(vtag);
+#endif
+		sc->txq.desc64[first].flags |= htole16(csumflags);
+
 		/* finally, set the valid bit in the first descriptor */
 		sc->txq.desc64[first].flags |= htole16(NFE_TX_VALID);
 	} else {
@@ -1029,6 +1028,9 @@
 			flags |= NFE_TX_LASTFRAG_V1;
 		desc32->flags = htole16(flags);
 
+		/* Checksum flags belong to the first fragment only. */
+		sc->txq.desc32[first].flags |= htole16(csumflags);
+
 		/* finally, set the valid bit in the first descriptor */
 		sc->txq.desc32[first].flags |= htole16(NFE_TX_VALID);
 	}
@@ -1116,10 +1118,8 @@
 		sc->rxtxctl |= NFE_RXTX_V3MAGIC;
 	else if (sc->sc_flags & NFE_JUMBO_SUP)
 		sc->rxtxctl |= NFE_RXTX_V2MAGIC;
-#ifdef NFE_CSUM
 	if (sc->sc_flags & NFE_HW_CSUM)
 		sc->rxtxctl |= NFE_RXTX_RXCSUM;
-#endif
 #if NVLAN > 0
 	/*
 	 * Although the adapter is capable of stripping VLAN tags from received
Index: if_nfereg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_nfereg.h,v
retrieving revision 1.1
diff -u -r1.1 if_nfereg.h
--- if_nfereg.h	12 Mar 2006 22:40:42 -0000	1.1
+++ if_nfereg.h	1 Jan 2007 06:15:40 -0000
@@ -163,16 +163,18 @@
 	uint16_t	flags;
 #define NFE_RX_FIXME_V2		0x4300
 #define NFE_RX_VALID_V2		(1 << 13)
+#define NFE_RX_IP_CSUMOK	(1 << 12)
+#define NFE_RX_TCP_CSUMOK	(1 << 11)
+#define NFE_RX_UDP_CSUMOK	(1 << 10)
 #define NFE_TX_ERROR_V2		0x5c04
 #define NFE_TX_LASTFRAG_V2	(1 << 13)
+#define NFE_TX_IP_CSUM		(1 << 11)
+#define NFE_TX_TCP_CSUM		(1 << 10)
 } __packed;
 
 /* flags common to V1/V2 descriptors */
-#define NFE_RX_CSUMOK		0x1c00
 #define NFE_RX_ERROR		(1 << 14)
 #define NFE_RX_READY		(1 << 15)
-#define NFE_TX_TCP_CSUM		(1 << 10)
-#define NFE_TX_IP_CSUM		(1 << 11)
 #define NFE_TX_VALID		(1 << 15)
 
 #define NFE_READ(sc, reg) \