Subject: Re: hardware checksum for hme
To: None <tech-net@netbsd.org>
From: john heasley <heas@shrubbery.net>
List: tech-net
Date: 02/13/2005 10:28:24
--pf9I7BMVVzbSWLtt
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Tue, Oct 26, 2004 at 05:19:06PM -0700, john heasley:
> If anyone is interested in trying a patch to hme(4) for hardware tcp/udp
> checksum:
>
> http://www.shrubbery.net/~heas/hmecsum.diff
--pf9I7BMVVzbSWLtt
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="hmecsum.diff"
Index: hme.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/hme.c,v
retrieving revision 1.42
diff -u -d -u -r1.42 hme.c
--- hme.c 26 Oct 2004 22:52:44 -0000 1.42
+++ hme.c 27 Oct 2004 00:00:14 -0000
@@ -43,7 +43,7 @@
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: hme.c,v 1.42 2004/10/26 22:52:44 heas Exp $");
-/* #define HMEDEBUG */
+#define HMEDEBUG 1
#include "opt_inet.h"
#include "opt_ns.h"
@@ -75,6 +75,8 @@
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
#endif
#ifdef NS
@@ -115,9 +117,9 @@
int hme_mediachange __P((struct ifnet *));
void hme_mediastatus __P((struct ifnet *, struct ifmediareq *));
-struct mbuf *hme_get __P((struct hme_softc *, int, int));
+struct mbuf *hme_get __P((struct hme_softc *, int, u_int32_t));
int hme_put __P((struct hme_softc *, int, struct mbuf *));
-void hme_read __P((struct hme_softc *, int, int));
+void hme_read __P((struct hme_softc *, int, u_int32_t));
int hme_eint __P((struct hme_softc *, u_int));
int hme_rint __P((struct hme_softc *));
int hme_tint __P((struct hme_softc *));
@@ -129,7 +131,6 @@
void hme_copyfrombuf_contig __P((struct hme_softc *, void *, int, int));
void hme_zerobuf_contig __P((struct hme_softc *, int, int));
-
void
hme_config(sc)
struct hme_softc *sc;
@@ -186,7 +187,7 @@
* Also, apparently, the buffers must extend to a DMA burst
* boundary beyond the maximum packet size.
*/
-#define _HME_NDESC 128
+#define _HME_NDESC 256
#define _HME_BUFSZ 1600
/* Note: the # of descriptors must be a multiple of 16 */
@@ -203,7 +204,7 @@
size = 2048 + /* TX descriptors */
2048 + /* RX descriptors */
sc->sc_rb.rb_ntbuf * _HME_BUFSZ + /* TX buffers */
- sc->sc_rb.rb_nrbuf * _HME_BUFSZ; /* TX buffers */
+ sc->sc_rb.rb_nrbuf * _HME_BUFSZ; /* RX buffers */
/* Allocate DMA buffer */
if ((error = bus_dmamem_alloc(dmatag, size,
@@ -255,6 +256,13 @@
ifp->if_flags =
IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST;
sc->sc_if_flags = ifp->if_flags;
+ /*
+ * XXX Rx-only h/w cksum causes the chip to hang when a reply comes
+ * from tcp_respond(), eg from a connection to an unbound tcp
+ * port
+ */
+ /* ifp->if_capabilities |= IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx */
+ ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
IFQ_SET_READY(&ifp->if_snd);
/* Initialize ifmedia structures and MII info */
@@ -603,6 +611,13 @@
/* Enable DMA */
v |= HME_ERX_CFG_DMAENABLE;
+
+ /* set h/w rx checksum start offset (# of half-words) */
+ /* XXX need to test vlan */
+ v |= (((ETHER_HDR_LEN + sizeof(struct ip) +
+ ((sc->sc_ethercom.ec_capenable & ETHERCAP_VLAN_MTU) ?
+ ETHER_VLAN_ENCAP_LEN : 0)) / 2) << HME_ERX_CFG_CSUMSHIFT) &
+ HME_ERX_CFG_CSUMSTART;
bus_space_write_4(t, erx, HME_ERXI_CFG, v);
/* step 11. XIF Configuration */
@@ -612,7 +627,7 @@
/* step 12. RX_MAC Configuration Register */
v = bus_space_read_4(t, mac, HME_MACI_RXCFG);
- v |= HME_MAC_RXCFG_ENABLE;
+ v |= HME_MAC_RXCFG_ENABLE | HME_MAC_RXCFG_PSTRIP;
bus_space_write_4(t, mac, HME_MACI_RXCFG, v);
/* step 13. TX_MAC Configuration Register */
@@ -692,15 +707,17 @@
* we copy into clusters.
*/
struct mbuf *
-hme_get(sc, ri, totlen)
+hme_get(sc, ri, flags)
struct hme_softc *sc;
- int ri, totlen;
+ int ri;
+ u_int32_t flags;
{
struct ifnet *ifp = &sc->sc_ethercom.ec_if;
struct mbuf *m, *m0, *newm;
caddr_t bp;
- int len;
+ int len, totlen;
+ totlen = HME_XD_DECODE_RSIZE(flags);
MGETHDR(m0, M_DONTWAIT, MT_DATA);
if (m0 == 0)
return (0);
@@ -741,6 +758,94 @@
}
}
+ if (ifp->if_csum_flags_rx & (M_CSUM_TCPv4 | M_CSUM_TCPv4)) {
+ struct ether_header *eh;
+ struct ip *ip;
+ struct udphdr *uh;
+ uint16_t *opts;
+ int32_t hlen, pktlen;
+ uint32_t temp;
+
+ if (sc->sc_ethercom.ec_capenable & ETHERCAP_VLAN_MTU) {
+ pktlen = m0->m_pkthdr.len - ETHER_HDR_LEN -
+ ETHER_VLAN_ENCAP_LEN;
+ eh = (struct ether_header *) mtod(m0, caddr_t) +
+ ETHER_VLAN_ENCAP_LEN;
+ } else {
+ pktlen = m0->m_pkthdr.len - ETHER_HDR_LEN;
+ eh = mtod(m0, struct ether_header *);
+ }
+ if (ntohs(eh->ether_type) != ETHERTYPE_IP)
+ goto swcsum;
+ ip = (struct ip *) ((caddr_t) eh + ETHER_HDR_LEN);
+
+ /* IPv4 only */
+ if (ip->ip_v != IPVERSION)
+ goto swcsum;
+
+ hlen = ip->ip_hl << 2;
+ if (hlen < sizeof(struct ip))
+ goto swcsum;
+
+ /* too short, truncated, fragment */
+ if ((ntohs(ip->ip_len) < hlen) || (ntohs(ip->ip_len) > pktlen)
+ || (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)))
+ goto swcsum;
+
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ if (! (ifp->if_csum_flags_rx & M_CSUM_TCPv4))
+ goto swcsum;
+ if (pktlen < (hlen + sizeof(struct tcphdr)))
+ goto swcsum;
+ m0->m_pkthdr.csum_flags = M_CSUM_TCPv4;
+ break;
+ case IPPROTO_UDP:
+ if (! (ifp->if_csum_flags_rx & M_CSUM_UDPv4))
+ goto swcsum;
+ if (pktlen < (hlen + sizeof(struct udphdr)))
+ goto swcsum;
+ uh = (struct udphdr *)((caddr_t)ip + hlen);
+ /* no checksum */
+ if (uh->uh_sum == 0)
+ goto swcsum;
+ m0->m_pkthdr.csum_flags = M_CSUM_UDPv4;
+ break;
+ default:
+ goto swcsum;
+ }
+
+ m0->m_pkthdr.csum_data = ~flags & HME_XD_RXCKSUM;
+
+ /* if the pkt had ip options, we have to deduct them */
+ if (hlen > sizeof(struct ip)) {
+ uint32_t m;
+
+ m = 0;
+ temp = hlen - sizeof(struct ip);
+ opts = (uint16_t *) ((caddr_t) ip + sizeof(struct ip));
+
+ while (temp > 1) {
+ m += ntohs(*opts++);
+ temp -= 2;
+ }
+ while (m >> 16)
+ m = (m >> 16) + (m & 0xffff);
+
+ /* deduct ip opts sum from the hwsum (rfc 1624) */
+ m0->m_pkthdr.csum_data = ~((~m0->m_pkthdr.csum_data) +
+ ~m + 0);
+
+ while (m0->m_pkthdr.csum_data >> 16)
+ m0->m_pkthdr.csum_data =
+ (m0->m_pkthdr.csum_data >> 16) +
+ (m0->m_pkthdr.csum_data & 0xffff);
+ }
+
+ m0->m_pkthdr.csum_flags |= M_CSUM_DATA | M_CSUM_NO_PSEUDOHDR;
+ }
+
+swcsum:
return (m0);
bad:
@@ -752,13 +857,16 @@
* Pass a packet to the higher levels.
*/
void
-hme_read(sc, ix, len)
+hme_read(sc, ix, flags)
struct hme_softc *sc;
- int ix, len;
+ int ix;
+ u_int32_t flags;
{
struct ifnet *ifp = &sc->sc_ethercom.ec_if;
struct mbuf *m;
+ int len;
+ len = HME_XD_DECODE_RSIZE(flags);
if (len <= sizeof(struct ether_header) ||
len > ((sc->sc_ethercom.ec_capenable & ETHERCAP_VLAN_MTU) ?
ETHER_VLAN_ENCAP_LEN + ETHERMTU + sizeof(struct ether_header) :
@@ -772,7 +880,7 @@
}
/* Pull packet off interface. */
- m = hme_get(sc, ix, len);
+ m = hme_get(sc, ix, flags);
if (m == 0) {
ifp->if_ierrors++;
return;
@@ -800,6 +908,7 @@
struct hme_softc *sc = (struct hme_softc *)ifp->if_softc;
caddr_t txd = sc->sc_rb.rb_txd;
struct mbuf *m;
+ unsigned int offset, txflags;
unsigned int ri, len;
unsigned int ntbuf = sc->sc_rb.rb_ntbuf;
@@ -822,17 +931,67 @@
bpf_mtap(ifp->if_bpf, m);
#endif
+ /* hme_put frees the mbuf, collect the necessary bits */
+ if (ifp->if_csum_flags_tx & (M_CSUM_TCPv4 | M_CSUM_TCPv4) &&
+ m->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4)) {
+ txflags = 1;
+ offset = m->m_pkthdr.csum_data;
+ } else
+ offset = txflags = 0;
+
/*
* Copy the mbuf chain into the transmit buffer.
*/
len = hme_put(sc, ri, m);
+ if (txflags) {
+ struct ether_header *eh;
+ struct ip *ip;
+ int32_t hlen;
+ u_int32_t cksum;
+
+ eh = (struct ether_header *) (sc->sc_rb.rb_txbuf +
+ (ri % sc->sc_rb.rb_ntbuf) * _HME_BUFSZ +
+ ((sc->sc_ethercom.ec_capenable & ETHERCAP_VLAN_MTU)
+ ? ETHER_VLAN_ENCAP_LEN : 0));
+ ip = (struct ip *) ((caddr_t) eh + ETHER_HDR_LEN);
+
+ /* XXX can't handle fragmented packet */
+ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+ aprint_verbose(
+ "hme_start: can't csum fragment\n");
+ goto skipit;
+ }
+ hlen = ip->ip_hl << 2;
+
+ /* XXX why does it only have the phdr sometimes */
+ if (*(uint16_t *) (ip + hlen + offset) == 0) {
+ cksum = ntohs(ip->ip_len) - hlen + ip->ip_p;
+ cksum += (ntohl(ip->ip_src.s_addr) >> 16) +
+ (htonl(ip->ip_src.s_addr) & 0xffff);
+ cksum += (ntohl(ip->ip_dst.s_addr) >> 16) +
+ (htonl(ip->ip_dst.s_addr) & 0xffff);
+ while (cksum >> 16)
+ cksum = (cksum >> 16) +
+ (cksum & 0xffff);
+
+ *(uint16_t *) (ip + hlen + offset) =
+ htons((uint16_t) cksum);
+ }
+
+ txflags = HME_XD_TXCKSUM | ((offset + hlen +
+ ETHER_HDR_LEN) << HME_XD_TXCSSTUFFSHIFT) |
+ ((hlen + ETHER_HDR_LEN) <<
+ HME_XD_TXCSSTARTSHIFT);
+ } else
+skipit:
+ txflags = 0;
/*
* Initialize transmit registers and start transmission
*/
HME_XD_SETFLAGS(sc->sc_pci, txd, ri,
HME_XD_OWN | HME_XD_SOP | HME_XD_EOP |
- HME_XD_ENCODE_TSIZE(len));
+ HME_XD_ENCODE_TSIZE(len) | txflags);
/*if (sc->sc_rb.rb_td_nbusy <= 0)*/
bus_space_write_4(sc->sc_bustag, sc->sc_etx, HME_ETXI_PENDING,
@@ -920,7 +1079,7 @@
{
caddr_t xdr = sc->sc_rb.rb_rxd;
unsigned int nrbuf = sc->sc_rb.rb_nrbuf;
- unsigned int ri, len;
+ unsigned int ri;
u_int32_t flags;
ri = sc->sc_rb.rb_rdtail;
@@ -936,10 +1095,8 @@
if (flags & HME_XD_OFL) {
printf("%s: buffer overflow, ri=%d; flags=0x%x\n",
sc->sc_dev.dv_xname, ri, flags);
- } else {
- len = HME_XD_DECODE_RSIZE(flags);
- hme_read(sc, ri, len);
- }
+ } else
+ hme_read(sc, ri, flags);
/* This buffer can be used by the hardware again */
HME_XD_SETFLAGS(sc->sc_pci, xdr, ri,
@@ -1295,7 +1452,6 @@
s = splnet();
switch (cmd) {
-
case SIOCSIFADDR:
switch (ifa->ifa_addr->sa_family) {
#ifdef INET
@@ -1339,6 +1495,10 @@
break;
case SIOCSIFFLAGS:
+#ifdef HMEDEBUG
+ sc->sc_debug = (ifp->if_flags & IFF_DEBUG) != 0 ? 1 : 0;
+#endif
+
if ((ifp->if_flags & IFF_UP) == 0 &&
(ifp->if_flags & IFF_RUNNING) != 0) {
/*
@@ -1370,9 +1530,7 @@
hme_init(sc);
#undef RESETIGN
}
-#ifdef HMEDEBUG
- sc->sc_debug = (ifp->if_flags & IFF_DEBUG) != 0 ? 1 : 0;
-#endif
+
break;
case SIOCADDMULTI:
Index: hmereg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/hmereg.h,v
retrieving revision 1.16
diff -u -d -u -r1.16 hmereg.h
--- hmereg.h 2 Nov 2003 11:07:45 -0000 1.16
+++ hmereg.h 27 Oct 2004 00:00:14 -0000
@@ -52,8 +52,10 @@
#define HME_SEB_CFG_BURST16 0x00000000 /* 16 byte bursts */
#define HME_SEB_CFG_BURST32 0x00000001 /* 32 byte bursts */
#define HME_SEB_CFG_BURST64 0x00000002 /* 64 byte bursts */
-#define HME_SEB_CFG_64BIT 0x00000004 /* ? */
-#define HME_SEB_CFG_PARITY 0x00000008 /* ? */
+#define HME_SEB_CFG_64BIT 0x00000004 /* 64-bit CEI/SBus DVMA (94) */
+#define HME_SEB_CFG_PARITY 0x00000008 /* DVMA & PIO parity check */
+#define HME_SEB_CFG_VERS 0xf0000000 /* ether channel version */
+#define HME_SEB_CFG_VERSSHIFT 28
#define HME_SEB_STAT_GOTFRAME 0x00000001 /* frame received */
#define HME_SEB_STAT_RCNTEXP 0x00000002 /* rx frame count expired */
@@ -168,14 +170,15 @@
#define HME_ERXI_FIFO_SRPTR (6*4) /* FIFO shadow read pointer */
#define HME_ERXI_STATEMACHINE (7*4) /* State machine */
-/* RXI_CFG bits */
+/* ERXI_CFG bits */
#define HME_ERX_CFG_DMAENABLE 0x00000001 /* Enable RX DMA */
#define HME_ERX_CFG_BYTEOFFSET 0x00000038 /* RX first byte offset */
#define HME_ERX_CFG_RINGSIZE32 0x00000000 /* Descriptor ring size: 32 */
#define HME_ERX_CFG_RINGSIZE64 0x00000200 /* Descriptor ring size: 64 */
#define HME_ERX_CFG_RINGSIZE128 0x00000400 /* Descriptor ring size: 128 */
#define HME_ERX_CFG_RINGSIZE256 0x00000600 /* Descriptor ring size: 256 */
-#define HME_ERX_CFG_CSUMSTART 0x007f0000 /* cksum offset */
+#define HME_ERX_CFG_CSUMSTART 0x007f0000 /* cksum offset (half words) */
+#define HME_ERX_CFG_CSUMSHIFT 16
/*
* HME MAC-core register offsets
@@ -214,6 +217,9 @@
#define HME_MAC_XIF_SQETWIN 0x000003e0 /* SQE time window */
#define HME_MAC_XIF_LANCE 0x00000010 /* Lance mode enable */
#define HME_MAC_XIF_LIPG0 0x000003e0 /* Lance mode IPG0 */
+#define HME_MAC_XIF_BITS "\177\020" \
+ "b\0OE\0b\1XLBACK\0b\2MLBACK\0" \
+ "b\4MIIENA\0b\4SQEENA\0\0"
/* Transmit config register. */
#define HME_MAC_TXCFG_ENABLE 0x00000001 /* Enable the transmitter */
@@ -223,6 +229,10 @@
#define HME_MAC_TXCFG_DBACKOFF 0x00000100 /* Disable backoff */
#define HME_MAC_TXCFG_FULLDPLX 0x00000200 /* Enable full-duplex */
#define HME_MAC_TXCFG_DGIVEUP 0x00000400 /* Don't give up on transmits */
+#define HME_MAC_TXCFG_BITS "\177\020" \
+ "b\0ENA\0b\6SMODE\0b\7IGNCOLL\0" \
+ "b\x8_FCSOFF\0b\x9_DBACKOFF\0" \
+ "b\xa_FULLDPLX\0b\xc_DGIVEUP\0\0"
/* Receive config register. */
#define HME_MAC_RXCFG_ENABLE 0x00000001 /* Enable the receiver */
@@ -234,6 +244,10 @@
#define HME_MAC_RXCFG_PGRP 0x00000400 /* Enable promisc group mode */
#define HME_MAC_RXCFG_HENABLE 0x00000800 /* Enable the hash filter */
#define HME_MAC_RXCFG_AENABLE 0x00001000 /* Enable the address filter */
+#define HME_MAC_RXCFG_BITS "\177\020" \
+ "b\0ENA\0b\6PSTRIP\0b\7PMISC\0" \
+ "b\x8ERRDIS\0b\x9CRCDIS\0b\xaME\0" \
+ "b\xbPGRP\0b\xcHASHENA\0\xd_ADDRENA\0\0"
/*
* HME MIF register offsets
@@ -255,6 +269,9 @@
#define HME_MIF_CFG_MDI0 0x00000100 /* MDI_0 (ro) */
#define HME_MIF_CFG_MDI1 0x00000200 /* MDI_1 (ro) */
#define HME_MIF_CFG_PPADDR 0x00007c00 /* Poll phy address */
+#define HME_MIF_CFG_BITS "\177\020" \
+ "b\0PHYEXT\0b\1POLLENA\0b\3BBMODE\0" \
+ "b\x8MDI0\0b\x9MDI1\0\0"
/* MIF Frame/Output register */
#define HME_MIF_FO_ST 0xc0000000 /* Start of frame */
@@ -295,16 +312,22 @@
*((u_int32_t *)HME_XD_ADDR(b,i)) = ((p) ? htole32((a)) : (a)); \
} while(/* CONSTCOND */ 0)
-/* Descriptor flag values */
+/* Descriptor control word flag values */
#define HME_XD_OWN 0x80000000 /* ownership: 1=hw, 0=sw */
#define HME_XD_SOP 0x40000000 /* start of packet marker (tx) */
#define HME_XD_OFL 0x40000000 /* buffer overflow (rx) */
#define HME_XD_EOP 0x20000000 /* end of packet marker (tx) */
-#define HME_XD_TXCKSUM 0x10000000 /* checksum enable (tx) */
+
+#define HME_XD_TXCKSUM 0x10000000 /* checksum enable (tx) */
+#define HME_XD_TXCSSTUFF 0xff00000 /* checksum stuff offset (tx) */
+#define HME_XD_TXCSSTUFFSHIFT 20
+#define HME_XD_TXCSSTART 0x000fc000 /* checksum start offset (tx) */
+#define HME_XD_TXCSSTARTSHIFT 14
+#define HME_XD_TXLENMSK 0x00003fff /* packet length mask (tx) */
+
#define HME_XD_RXLENMSK 0x3fff0000 /* packet length mask (rx) */
#define HME_XD_RXLENSHIFT 16
-#define HME_XD_TXLENMSK 0x00003fff /* packet length mask (tx) */
-#define HME_XD_RXCKSUM 0x0000ffff /* packet checksum (rx) */
+#define HME_XD_RXCKSUM 0x0000ffff /* packet checksum (rx), complement */
/* Macros to encode/decode the receive buffer size from the flags field */
#define HME_XD_ENCODE_RSIZE(sz) \
--pf9I7BMVVzbSWLtt--