Subject: Re: hardware checksum for hme
To: None <tech-net@netbsd.org>
From: john heasley <heas@shrubbery.net>
List: tech-net
Date: 02/13/2005 10:28:24
--pf9I7BMVVzbSWLtt
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Tue, Oct 26, 2004 at 05:19:06PM -0700, john heasley:
> If anyone is interested in trying a patch to hme(4) for hardware tcp/udp
> checksum:
> 
> 	http://www.shrubbery.net/~heas/hmecsum.diff

--pf9I7BMVVzbSWLtt
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="hmecsum.diff"

Index: hme.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/hme.c,v
retrieving revision 1.42
diff -u -d -u -r1.42 hme.c
--- hme.c	26 Oct 2004 22:52:44 -0000	1.42
+++ hme.c	27 Oct 2004 00:00:14 -0000
@@ -43,7 +43,7 @@
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD: hme.c,v 1.42 2004/10/26 22:52:44 heas Exp $");
 
-/* #define HMEDEBUG */
+#define HMEDEBUG 1
 
 #include "opt_inet.h"
 #include "opt_ns.h"
@@ -75,6 +75,8 @@
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
 #endif
 
 #ifdef NS
@@ -115,9 +117,9 @@
 int		hme_mediachange __P((struct ifnet *));
 void		hme_mediastatus __P((struct ifnet *, struct ifmediareq *));
 
-struct mbuf	*hme_get __P((struct hme_softc *, int, int));
+struct mbuf	*hme_get __P((struct hme_softc *, int, u_int32_t));
 int		hme_put __P((struct hme_softc *, int, struct mbuf *));
-void		hme_read __P((struct hme_softc *, int, int));
+void		hme_read __P((struct hme_softc *, int, u_int32_t));
 int		hme_eint __P((struct hme_softc *, u_int));
 int		hme_rint __P((struct hme_softc *));
 int		hme_tint __P((struct hme_softc *));
@@ -129,7 +131,6 @@
 void	hme_copyfrombuf_contig __P((struct hme_softc *, void *, int, int));
 void	hme_zerobuf_contig __P((struct hme_softc *, int, int));
 
-
 void
 hme_config(sc)
 	struct hme_softc *sc;
@@ -186,7 +187,7 @@
 	 * Also, apparently, the buffers must extend to a DMA burst
 	 * boundary beyond the maximum packet size.
 	 */
-#define _HME_NDESC	128
+#define _HME_NDESC	256
 #define _HME_BUFSZ	1600
 
 	/* Note: the # of descriptors must be a multiple of 16 */
@@ -203,7 +204,7 @@
 	size =	2048 +					/* TX descriptors */
 		2048 +					/* RX descriptors */
 		sc->sc_rb.rb_ntbuf * _HME_BUFSZ +	/* TX buffers */
-		sc->sc_rb.rb_nrbuf * _HME_BUFSZ;	/* TX buffers */
+		sc->sc_rb.rb_nrbuf * _HME_BUFSZ;	/* RX buffers */
 
 	/* Allocate DMA buffer */
 	if ((error = bus_dmamem_alloc(dmatag, size,
@@ -255,6 +256,13 @@
 	ifp->if_flags =
 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST;
 	sc->sc_if_flags = ifp->if_flags;
+	/*
+	 * XXX Rx-only h/w cksum causes the chip to hang when a reply comes
+	 *	from tcp_respond(), eg from a connection to an unbound tcp
+	 *	port
+	 */
+	/* ifp->if_capabilities |= IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx */
+	ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/* Initialize ifmedia structures and MII info */
@@ -603,6 +611,13 @@
 
 	/* Enable DMA */
 	v |= HME_ERX_CFG_DMAENABLE;
+
+	/* set h/w rx checksum start offset (# of half-words) */
+	/* XXX need to test vlan */
+	v |= (((ETHER_HDR_LEN + sizeof(struct ip) + 
+		((sc->sc_ethercom.ec_capenable & ETHERCAP_VLAN_MTU) ?
+		ETHER_VLAN_ENCAP_LEN : 0)) / 2) << HME_ERX_CFG_CSUMSHIFT) &
+		HME_ERX_CFG_CSUMSTART;
 	bus_space_write_4(t, erx, HME_ERXI_CFG, v);
 
 	/* step 11. XIF Configuration */
@@ -612,7 +627,7 @@
 
 	/* step 12. RX_MAC Configuration Register */
 	v = bus_space_read_4(t, mac, HME_MACI_RXCFG);
-	v |= HME_MAC_RXCFG_ENABLE;
+	v |= HME_MAC_RXCFG_ENABLE | HME_MAC_RXCFG_PSTRIP;
 	bus_space_write_4(t, mac, HME_MACI_RXCFG, v);
 
 	/* step 13. TX_MAC Configuration Register */
@@ -692,15 +707,17 @@
  * we copy into clusters.
  */
 struct mbuf *
-hme_get(sc, ri, totlen)
+hme_get(sc, ri, flags)
 	struct hme_softc *sc;
-	int ri, totlen;
+	int ri;
+	u_int32_t flags;
 {
 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
 	struct mbuf *m, *m0, *newm;
 	caddr_t bp;
-	int len;
+	int len, totlen;
 
+	totlen = HME_XD_DECODE_RSIZE(flags);
 	MGETHDR(m0, M_DONTWAIT, MT_DATA);
 	if (m0 == 0)
 		return (0);
@@ -741,6 +758,94 @@
 		}
 	}
 
+	if (ifp->if_csum_flags_rx & (M_CSUM_TCPv4 | M_CSUM_TCPv4)) {
+		struct ether_header *eh;
+		struct ip *ip;
+		struct udphdr *uh;
+		uint16_t *opts;
+		int32_t hlen, pktlen;
+		uint32_t temp;
+
+		if (sc->sc_ethercom.ec_capenable & ETHERCAP_VLAN_MTU) {
+			pktlen = m0->m_pkthdr.len - ETHER_HDR_LEN -
+				ETHER_VLAN_ENCAP_LEN;
+			eh = (struct ether_header *) mtod(m0, caddr_t) +
+				ETHER_VLAN_ENCAP_LEN;
+		} else {
+			pktlen = m0->m_pkthdr.len - ETHER_HDR_LEN;
+			eh = mtod(m0, struct ether_header *);
+		}
+		if (ntohs(eh->ether_type) != ETHERTYPE_IP)
+			goto swcsum;
+		ip = (struct ip *) ((caddr_t) eh + ETHER_HDR_LEN);
+
+		/* IPv4 only */
+		if (ip->ip_v != IPVERSION)
+			goto swcsum;
+
+		hlen = ip->ip_hl << 2;
+		if (hlen < sizeof(struct ip)) 
+			goto swcsum;
+
+		/* too short, truncated, fragment */
+		if ((ntohs(ip->ip_len) < hlen) || (ntohs(ip->ip_len) > pktlen)
+		    || (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)))
+                       goto swcsum;
+
+		switch (ip->ip_p) {
+		case IPPROTO_TCP:
+			if (! (ifp->if_csum_flags_rx & M_CSUM_TCPv4))
+				goto swcsum;
+			if (pktlen < (hlen + sizeof(struct tcphdr)))
+				goto swcsum;
+			m0->m_pkthdr.csum_flags = M_CSUM_TCPv4;
+			break;
+		case IPPROTO_UDP:
+			if (! (ifp->if_csum_flags_rx & M_CSUM_UDPv4))
+				goto swcsum;
+			if (pktlen < (hlen + sizeof(struct udphdr)))
+				goto swcsum;
+			uh = (struct udphdr *)((caddr_t)ip + hlen);
+			/* no checksum */
+			if (uh->uh_sum == 0)
+				goto swcsum;
+			m0->m_pkthdr.csum_flags = M_CSUM_UDPv4;
+			break;
+		default:
+                       goto swcsum;
+		}
+
+		m0->m_pkthdr.csum_data = ~flags & HME_XD_RXCKSUM;
+
+		/* if the pkt had ip options, we have to deduct them */
+		if (hlen > sizeof(struct ip)) {
+			uint32_t m;
+
+			m = 0;
+			temp = hlen - sizeof(struct ip);
+			opts = (uint16_t *) ((caddr_t) ip + sizeof(struct ip));
+
+		        while (temp > 1) {
+				m += ntohs(*opts++);
+				temp -= 2;
+			}
+			while (m >> 16)
+				m = (m >> 16) + (m & 0xffff);
+
+			/* deduct ip opts sum from the hwsum (rfc 1624) */
+			m0->m_pkthdr.csum_data = ~((~m0->m_pkthdr.csum_data) +
+						   ~m + 0);
+
+			while (m0->m_pkthdr.csum_data >> 16)
+				m0->m_pkthdr.csum_data =
+					(m0->m_pkthdr.csum_data >> 16) +
+					(m0->m_pkthdr.csum_data & 0xffff);
+		}
+
+		m0->m_pkthdr.csum_flags |= M_CSUM_DATA | M_CSUM_NO_PSEUDOHDR;
+	}
+
+swcsum: 
 	return (m0);
 
 bad:
@@ -752,13 +857,16 @@
  * Pass a packet to the higher levels.
  */
 void
-hme_read(sc, ix, len)
+hme_read(sc, ix, flags)
 	struct hme_softc *sc;
-	int ix, len;
+	int ix;
+	u_int32_t flags;
 {
 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
 	struct mbuf *m;
+	int len;
 
+	len = HME_XD_DECODE_RSIZE(flags);
 	if (len <= sizeof(struct ether_header) ||
 	    len > ((sc->sc_ethercom.ec_capenable & ETHERCAP_VLAN_MTU) ?
 	    ETHER_VLAN_ENCAP_LEN + ETHERMTU + sizeof(struct ether_header) :
@@ -772,7 +880,7 @@
 	}
 
 	/* Pull packet off interface. */
-	m = hme_get(sc, ix, len);
+	m = hme_get(sc, ix, flags);
 	if (m == 0) {
 		ifp->if_ierrors++;
 		return;
@@ -800,6 +908,7 @@
 	struct hme_softc *sc = (struct hme_softc *)ifp->if_softc;
 	caddr_t txd = sc->sc_rb.rb_txd;
 	struct mbuf *m;
+	unsigned int offset, txflags;
 	unsigned int ri, len;
 	unsigned int ntbuf = sc->sc_rb.rb_ntbuf;
 
@@ -822,17 +931,67 @@
 			bpf_mtap(ifp->if_bpf, m);
 #endif
 
+		/* hme_put frees the mbuf, collect the necessary bits */
+		if (ifp->if_csum_flags_tx & (M_CSUM_TCPv4 | M_CSUM_TCPv4) &&
+		    m->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4)) {
+			txflags = 1;
+			offset = m->m_pkthdr.csum_data;
+		} else
+			offset = txflags = 0;
+
 		/*
 		 * Copy the mbuf chain into the transmit buffer.
 		 */
 		len = hme_put(sc, ri, m);
 
+		if (txflags) {
+               		struct ether_header *eh;
+               		struct ip *ip;
+               		int32_t hlen;
+               		u_int32_t cksum;
+
+			eh = (struct ether_header *) (sc->sc_rb.rb_txbuf +
+			     (ri % sc->sc_rb.rb_ntbuf) * _HME_BUFSZ +
+			     ((sc->sc_ethercom.ec_capenable & ETHERCAP_VLAN_MTU)
+			      ? ETHER_VLAN_ENCAP_LEN : 0));
+			ip = (struct ip *) ((caddr_t) eh + ETHER_HDR_LEN);
+
+			/* XXX can't handle fragmented packet */
+			if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+				aprint_verbose(
+					"hme_start: can't csum fragment\n");
+				goto skipit;
+			}
+			hlen = ip->ip_hl << 2;
+
+			/* XXX why does it only have the phdr sometimes */
+			if (*(uint16_t *) (ip + hlen + offset) == 0) {
+				cksum = ntohs(ip->ip_len) - hlen + ip->ip_p;
+				cksum += (ntohl(ip->ip_src.s_addr) >> 16) +
+					 (htonl(ip->ip_src.s_addr) & 0xffff);
+				cksum += (ntohl(ip->ip_dst.s_addr) >> 16) +
+					 (htonl(ip->ip_dst.s_addr) & 0xffff);
+				while (cksum >> 16)
+					cksum = (cksum >> 16) +
+						(cksum & 0xffff);
+
+				*(uint16_t *) (ip + hlen + offset) =
+					htons((uint16_t) cksum);
+			}
+
+			txflags = HME_XD_TXCKSUM | ((offset + hlen +
+				  ETHER_HDR_LEN) << HME_XD_TXCSSTUFFSHIFT) |
+		  		  ((hlen + ETHER_HDR_LEN) <<
+				  HME_XD_TXCSSTARTSHIFT);
+		} else
+skipit:
+			txflags = 0;
 		/*
 		 * Initialize transmit registers and start transmission
 		 */
 		HME_XD_SETFLAGS(sc->sc_pci, txd, ri,
 			HME_XD_OWN | HME_XD_SOP | HME_XD_EOP |
-			HME_XD_ENCODE_TSIZE(len));
+			HME_XD_ENCODE_TSIZE(len) | txflags);
 
 		/*if (sc->sc_rb.rb_td_nbusy <= 0)*/
 		bus_space_write_4(sc->sc_bustag, sc->sc_etx, HME_ETXI_PENDING,
@@ -920,7 +1079,7 @@
 {
 	caddr_t xdr = sc->sc_rb.rb_rxd;
 	unsigned int nrbuf = sc->sc_rb.rb_nrbuf;
-	unsigned int ri, len;
+	unsigned int ri;
 	u_int32_t flags;
 
 	ri = sc->sc_rb.rb_rdtail;
@@ -936,10 +1095,8 @@
 		if (flags & HME_XD_OFL) {
 			printf("%s: buffer overflow, ri=%d; flags=0x%x\n",
 					sc->sc_dev.dv_xname, ri, flags);
-		} else {
-			len = HME_XD_DECODE_RSIZE(flags);
-			hme_read(sc, ri, len);
-		}
+		} else
+			hme_read(sc, ri, flags);
 
 		/* This buffer can be used by the hardware again */
 		HME_XD_SETFLAGS(sc->sc_pci, xdr, ri,
@@ -1295,7 +1452,6 @@
 	s = splnet();
 
 	switch (cmd) {
-
 	case SIOCSIFADDR:
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
@@ -1339,6 +1495,10 @@
 		break;
 
 	case SIOCSIFFLAGS:
+#ifdef HMEDEBUG
+		sc->sc_debug = (ifp->if_flags & IFF_DEBUG) != 0 ? 1 : 0;
+#endif
+
 		if ((ifp->if_flags & IFF_UP) == 0 &&
 		    (ifp->if_flags & IFF_RUNNING) != 0) {
 			/*
@@ -1370,9 +1530,7 @@
 				hme_init(sc);
 #undef RESETIGN
 		}
-#ifdef HMEDEBUG
-		sc->sc_debug = (ifp->if_flags & IFF_DEBUG) != 0 ? 1 : 0;
-#endif
+
 		break;
 
 	case SIOCADDMULTI:
Index: hmereg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/hmereg.h,v
retrieving revision 1.16
diff -u -d -u -r1.16 hmereg.h
--- hmereg.h	2 Nov 2003 11:07:45 -0000	1.16
+++ hmereg.h	27 Oct 2004 00:00:14 -0000
@@ -52,8 +52,10 @@
 #define HME_SEB_CFG_BURST16	0x00000000	/* 16 byte bursts */
 #define HME_SEB_CFG_BURST32	0x00000001	/* 32 byte bursts */
 #define HME_SEB_CFG_BURST64	0x00000002	/* 64 byte bursts */
-#define HME_SEB_CFG_64BIT	0x00000004	/* ? */
-#define HME_SEB_CFG_PARITY	0x00000008	/* ? */
+#define HME_SEB_CFG_64BIT	0x00000004	/* 64-bit CEI/SBus DVMA (94) */
+#define HME_SEB_CFG_PARITY	0x00000008	/* DVMA & PIO parity check */
+#define HME_SEB_CFG_VERS	0xf0000000	/* ether channel version */
+#define HME_SEB_CFG_VERSSHIFT	28
 
 #define HME_SEB_STAT_GOTFRAME	0x00000001	/* frame received */
 #define HME_SEB_STAT_RCNTEXP	0x00000002	/* rx frame count expired */
@@ -168,14 +170,15 @@
 #define HME_ERXI_FIFO_SRPTR	(6*4)		/* FIFO shadow read pointer */
 #define HME_ERXI_STATEMACHINE	(7*4)		/* State machine */
 
-/* RXI_CFG bits */
+/* ERXI_CFG bits */
 #define HME_ERX_CFG_DMAENABLE	0x00000001	/* Enable RX DMA */
 #define HME_ERX_CFG_BYTEOFFSET	0x00000038	/* RX first byte offset */
 #define HME_ERX_CFG_RINGSIZE32	0x00000000	/* Descriptor ring size: 32 */
 #define HME_ERX_CFG_RINGSIZE64	0x00000200	/* Descriptor ring size: 64 */
 #define HME_ERX_CFG_RINGSIZE128	0x00000400	/* Descriptor ring size: 128 */
 #define HME_ERX_CFG_RINGSIZE256	0x00000600	/* Descriptor ring size: 256 */
-#define HME_ERX_CFG_CSUMSTART	0x007f0000	/* cksum offset */
+#define HME_ERX_CFG_CSUMSTART	0x007f0000	/* cksum offset (half words) */
+#define	HME_ERX_CFG_CSUMSHIFT	16
 
 /*
  * HME MAC-core register offsets
@@ -214,6 +217,9 @@
 #define HME_MAC_XIF_SQETWIN	0x000003e0	/* SQE time window */
 #define HME_MAC_XIF_LANCE	0x00000010	/* Lance mode enable */
 #define HME_MAC_XIF_LIPG0	0x000003e0	/* Lance mode IPG0 */
+#define HME_MAC_XIF_BITS	"\177\020"				\
+				"b\0OE\0b\1XLBACK\0b\2MLBACK\0"		\
+				"b\4MIIENA\0b\4SQEENA\0\0"
 
 /* Transmit config register. */
 #define HME_MAC_TXCFG_ENABLE	0x00000001	/* Enable the transmitter */
@@ -223,6 +229,10 @@
 #define HME_MAC_TXCFG_DBACKOFF	0x00000100	/* Disable backoff */
 #define HME_MAC_TXCFG_FULLDPLX	0x00000200	/* Enable full-duplex */
 #define HME_MAC_TXCFG_DGIVEUP	0x00000400	/* Don't give up on transmits */
+#define HME_MAC_TXCFG_BITS	"\177\020"				\
+				"b\0ENA\0b\6SMODE\0b\7IGNCOLL\0"	\
+				"b\x8_FCSOFF\0b\x9_DBACKOFF\0"		\
+				"b\xa_FULLDPLX\0b\xc_DGIVEUP\0\0"
 
 /* Receive config register. */
 #define HME_MAC_RXCFG_ENABLE	0x00000001 /* Enable the receiver */
@@ -234,6 +244,10 @@
 #define HME_MAC_RXCFG_PGRP	0x00000400 /* Enable promisc group mode */
 #define HME_MAC_RXCFG_HENABLE	0x00000800 /* Enable the hash filter */
 #define HME_MAC_RXCFG_AENABLE	0x00001000 /* Enable the address filter */
+#define HME_MAC_RXCFG_BITS	"\177\020"				\
+				"b\0ENA\0b\6PSTRIP\0b\7PMISC\0"		\
+				"b\x8ERRDIS\0b\x9CRCDIS\0b\xaME\0"	\
+				"b\xbPGRP\0b\xcHASHENA\0\xd_ADDRENA\0\0"
 
 /*
  * HME MIF register offsets
@@ -255,6 +269,9 @@
 #define HME_MIF_CFG_MDI0	0x00000100	/* MDI_0 (ro) */
 #define HME_MIF_CFG_MDI1	0x00000200	/* MDI_1 (ro) */
 #define HME_MIF_CFG_PPADDR	0x00007c00	/* Poll phy address */
+#define HME_MIF_CFG_BITS	"\177\020"				\
+				"b\0PHYEXT\0b\1POLLENA\0b\3BBMODE\0"	\
+				"b\x8MDI0\0b\x9MDI1\0\0"
 
 /* MIF Frame/Output register */
 #define HME_MIF_FO_ST		0xc0000000	/* Start of frame */
@@ -295,16 +312,22 @@
 	*((u_int32_t *)HME_XD_ADDR(b,i)) = ((p) ? htole32((a)) : (a));	\
 } while(/* CONSTCOND */ 0)
 
-/* Descriptor flag values */
+/* Descriptor control word flag values */
 #define HME_XD_OWN	0x80000000	/* ownership: 1=hw, 0=sw */
 #define HME_XD_SOP	0x40000000	/* start of packet marker (tx) */
 #define HME_XD_OFL	0x40000000	/* buffer overflow (rx) */
 #define HME_XD_EOP	0x20000000	/* end of packet marker (tx) */
-#define HME_XD_TXCKSUM	0x10000000	/* checksum enable (tx) */
+
+#define	HME_XD_TXCKSUM	0x10000000	/* checksum enable (tx) */
+#define	HME_XD_TXCSSTUFF 0xff00000	/* checksum stuff offset (tx) */
+#define	HME_XD_TXCSSTUFFSHIFT  20
+#define	HME_XD_TXCSSTART 0x000fc000	/* checksum start offset (tx) */
+#define	HME_XD_TXCSSTARTSHIFT  14
+#define	HME_XD_TXLENMSK	0x00003fff	/* packet length mask (tx) */
+
 #define HME_XD_RXLENMSK	0x3fff0000	/* packet length mask (rx) */
 #define HME_XD_RXLENSHIFT	16
-#define HME_XD_TXLENMSK	0x00003fff	/* packet length mask (tx) */
-#define HME_XD_RXCKSUM	0x0000ffff	/* packet checksum (rx) */
+#define HME_XD_RXCKSUM	0x0000ffff	/* packet checksum (rx), complement */
 
 /* Macros to encode/decode the receive buffer size from the flags field */
 #define HME_XD_ENCODE_RSIZE(sz)		\

--pf9I7BMVVzbSWLtt--