Subject: HW checksumming, part 2
To: None <tech-net@netbsd.org>
From: Jason R Thorpe <thorpej@zembu.com>
List: tech-net
Date: 05/18/2001 14:38:38
Okay, I have some out-bound HW-assisted checksum processing working,
only for IP header checksums right now. Doing it for TCP and UDP is
going to require some changes to do incremental checksums (because of
the pseudo-header that is used in the computation).
I would like to commit these changes if there are no objections, and
then get on to the incremental checksum stuff for TCP/UDP.
--
-- Jason R. Thorpe <thorpej@zembu.com>
Index: sys/mbuf.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/mbuf.h,v
retrieving revision 1.57
diff -c -r1.57 mbuf.h
*** sys/mbuf.h 2001/04/30 01:13:21 1.57
--- sys/mbuf.h 2001/05/18 21:23:42
***************
*** 1,7 ****
/* $NetBSD: mbuf.h,v 1.57 2001/04/30 01:13:21 lukem Exp $ */
/*-
! * Copyright (c) 1996, 1997, 1999 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
--- 1,7 ----
/* $NetBSD: mbuf.h,v 1.57 2001/04/30 01:13:21 lukem Exp $ */
/*-
! * Copyright (c) 1996, 1997, 1999, 2001 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
***************
*** 115,123 ****
--- 115,131 ----
struct pkthdr {
struct ifnet *rcvif; /* rcv interface */
int len; /* total packet length */
+ int csuminfo; /* checksum information */
struct mbuf *aux; /* extra data buffer; ipsec/others */
};
+ #define M_CSUM_IPv4 0x0001 /* IPv4 header */
+ #define M_CSUM_IPv4_BAD 0x0002 /* IPv4 header checksum bad */
+ #define M_CSUM_TCPv4 0x0004 /* TCP header/payload */
+ #define M_CSUM_TCPv4_BAD 0x0008 /* TCP header/payload checksum bad */
+ #define M_CSUM_UDPv4 0x0010 /* UDP header/payload */
+ #define M_CSUM_UDPv4_BAD 0x0020 /* UDP header/payload checksum bad */
+
/* description of external storage mapped into mbuf, valid if M_EXT set */
struct m_ext {
caddr_t ext_buf; /* start of buffer */
***************
*** 246,251 ****
--- 254,260 ----
(m)->m_nextpkt = (struct mbuf *)NULL; \
(m)->m_data = (m)->m_pktdat; \
(m)->m_flags = M_PKTHDR; \
+ (m)->m_pkthdr.csuminfo = 0; \
(m)->m_pkthdr.aux = (struct mbuf *)NULL; \
} else \
(m) = m_retryhdr((how), (type)); \
Index: net/if.h
===================================================================
RCS file: /cvsroot/syssrc/sys/net/if.h,v
retrieving revision 1.68
diff -c -r1.68 if.h
*** net/if.h 2001/04/10 22:58:55 1.68
--- net/if.h 2001/05/18 21:23:42
***************
*** 1,7 ****
/* $NetBSD: if.h,v 1.68 2001/04/10 22:58:55 enami Exp $ */
/*-
! * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
--- 1,7 ----
/* $NetBSD: if.h,v 1.68 2001/04/10 22:58:55 enami Exp $ */
/*-
! * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
***************
*** 277,282 ****
--- 277,283 ----
void *if_bridge; /* bridge glue */
int if_dlt; /* data link type (<net/dlt.h>) */
struct pfil_head if_pfil; /* filtering point */
+ int if_capabilities; /* interface capabilities */
};
#define if_mtu if_data.ifi_mtu
#define if_type if_data.ifi_type
***************
*** 327,332 ****
--- 328,338 ----
#define IF_Kbps(x) ((x) * 1000) /* kilobits/sec. */
#define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */
#define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */
+
+ /* Capabilities that interfaces can advertise. */
+ #define IFCAP_CSUM_IPv4 0x0001 /* can do IPv4 header checksums */
+ #define IFCAP_CSUM_TCPv4 0x0002 /* can do IPv4/TCP checksums */
+ #define IFCAP_CSUM_UDPv4 0x0004 /* can do IPv4/UDP checksums */
/*
* Output queues (ifp->if_snd) and internetwork datagram level (pup level 1)
Index: netinet/ip_fil.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_fil.c,v
retrieving revision 1.66
diff -c -r1.66 ip_fil.c
*** netinet/ip_fil.c 2001/03/26 06:13:12 1.66
--- netinet/ip_fil.c 2001/05/18 21:23:43
***************
*** 1384,1389 ****
--- 1384,1396 ----
struct route iproute;
frentry_t *fr;
+ #if defined(__NetBSD__) && defined(M_CSUM_IPv4)
+ /*
+ * Clear any in-bound checksum flags for this packet.
+ */
+ m0->m_pkthdr.csuminfo = 0;
+ #endif /* __NetBSD__ && M_CSUM_IPv4 */
+
hlen = fin->fin_hlen;
ip = mtod(m0, struct ip *);
***************
*** 1491,1498 ****
--- 1498,1512 ----
# endif
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
+ # if defined(__NetBSD__) && defined(M_CSUM_IPv4)
+ if (ifp->if_capabilities & IFCAP_CSUM_IPv4)
+ m->m_pkthdr.csuminfo |= M_CSUM_IPv4;
+ else if (ip->ip_sum == 0)
+ ip->ip_sum = in_cksum(m, hlen);
+ # else
if (!ip->ip_sum)
ip->ip_sum = in_cksum(m, hlen);
+ # endif /* __NetBSD__ && M_CSUM_IPv4 */
# if BSD >= 199306
error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst,
ro->ro_rt);
Index: netinet/ip_flow.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_flow.c,v
retrieving revision 1.17
diff -c -r1.17 ip_flow.c
*** netinet/ip_flow.c 2001/04/13 23:30:22 1.17
--- netinet/ip_flow.c 2001/05/18 21:23:43
***************
*** 179,187 ****
return 0;
/*
! * Veryify the IP header checksum.
*/
! if (in_cksum(m, sizeof(struct ip)) != 0)
return 0;
/*
--- 179,190 ----
return 0;
/*
! * Verify the IP header checksum.
*/
! if (m->m_pkthdr.csuminfo & M_CSUM_IPv4) {
! if (m->m_pkthdr.csuminfo & M_CSUM_IPv4_BAD)
! return 0;
! } else if (in_cksum(m, sizeof(struct ip)) != 0)
return 0;
/*
***************
*** 199,210 ****
--- 202,221 ----
return 0;
/*
+ * Clear any in-bound checksum flags for this packet.
+ */
+ m->m_pkthdr.csuminfo = 0;
+
+ /*
* Everything checks out and so we can forward this packet.
* Modify the TTL and incrementally change the checksum.
*
* This method of adding the checksum works on either endian CPU.
* If htons() is inlined, all the arithmetic is folded; otherwise
* the htons()s are combined by CSE due to the __const__ attribute.
+ *
+ * Don't bother using HW checksumming here -- the incremental
+ * update is pretty fast.
*/
ip->ip_ttl -= IPTTLDEC;
if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
Index: netinet/ip_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_input.c,v
retrieving revision 1.133
diff -c -r1.133 ip_input.c
*** netinet/ip_input.c 2001/04/16 17:03:33 1.133
--- netinet/ip_input.c 2001/05/18 21:23:43
***************
*** 433,439 ****
}
}
! if (in_cksum(m, hlen) != 0) {
ipstat.ips_badsum++;
goto bad;
}
--- 433,444 ----
}
}
! if (m->m_pkthdr.csuminfo & M_CSUM_IPv4) {
! if (m->m_pkthdr.csuminfo & M_CSUM_IPv4_BAD) {
! ipstat.ips_badsum++;
! goto bad;
! }
! } else if (in_cksum(m, hlen) != 0) {
ipstat.ips_badsum++;
goto bad;
}
***************
*** 1433,1438 ****
--- 1438,1448 ----
#ifdef IPSEC
struct ifnet dummyifp;
#endif
+
+ /*
+ * Clear any in-bound checksum flags for this packet.
+ */
+ m->m_pkthdr.csuminfo = 0;
dest = 0;
#ifdef DIAGNOSTIC
Index: netinet/ip_mroute.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_mroute.c,v
retrieving revision 1.54
diff -c -r1.54 ip_mroute.c
*** netinet/ip_mroute.c 2001/05/08 10:07:15 1.54
--- netinet/ip_mroute.c 2001/05/18 21:23:43
***************
*** 997,1002 ****
--- 997,1007 ----
vifi_t vifi;
#endif /* RSVP_ISI */
+ /*
+ * Clear any in-bound checksum flags for this packet.
+ */
+ m->m_pkthdr.csuminfo = 0;
+
if (mrtdebug & DEBUG_FORWARD)
log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n",
ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
Index: netinet/ip_output.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_output.c,v
retrieving revision 1.84
diff -c -r1.84 ip_output.c
*** netinet/ip_output.c 2001/04/13 23:30:24 1.84
--- netinet/ip_output.c 2001/05/18 21:23:44
***************
*** 575,582 ****
if (ia)
ia->ia_ifa.ifa_data.ifad_outbytes += ip_len;
#endif
! ip->ip_sum = 0;
! ip->ip_sum = in_cksum(m, hlen);
#ifdef IPSEC
/* clean ipsec history once it goes out of the node */
ipsec_delaux(m);
--- 575,586 ----
if (ia)
ia->ia_ifa.ifa_data.ifad_outbytes += ip_len;
#endif
! if (ifp->if_capabilities & IFCAP_CSUM_IPv4)
! m->m_pkthdr.csuminfo |= M_CSUM_IPv4;
! else {
! ip->ip_sum = 0;
! ip->ip_sum = in_cksum(m, hlen);
! }
#ifdef IPSEC
/* clean ipsec history once it goes out of the node */
ipsec_delaux(m);
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_input.c,v
retrieving revision 1.124
diff -c -r1.124 tcp_input.c
*** netinet/tcp_input.c 2001/05/08 10:15:13 1.124
--- netinet/tcp_input.c 2001/05/18 21:23:44
***************
*** 956,968 ****
bzero(ipov->ih_x1, sizeof ipov->ih_x1);
ipov->ih_len = htons(tlen + off);
! if (in_cksum(m, len) != 0) {
tcpstat.tcps_rcvbadsum++;
goto drop;
}
}
#else
! if (in4_cksum(m, IPPROTO_TCP, toff, tlen + off) != 0) {
tcpstat.tcps_rcvbadsum++;
goto drop;
}
--- 956,978 ----
bzero(ipov->ih_x1, sizeof ipov->ih_x1);
ipov->ih_len = htons(tlen + off);
! if (m->m_pkthdr.csuminfo & M_CSUM_TCPv4) {
! if (m->m_pkthdr.csuminfo & M_CSUM_TCPv4_BAD) {
! tcpstat.tcps_rcvbadsum++;
! goto drop;
! }
! } else if (in_cksum(m, len) != 0) {
tcpstat.tcps_rcvbadsum++;
goto drop;
}
}
#else
! if (m->m_pkthdr.csuminfo & M_CSUM_TCPv4) {
! if (m->m_pkthdr.csuminfo & M_CSUM_TCPv4_BAD) {
! tcpstat.tcps_rcvbadsum++;
! goto drop;
! }
! } else if (in4_cksum(m, IPPROTO_TCP, toff, tlen + off) != 0) {
tcpstat.tcps_rcvbadsum++;
goto drop;
}
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/udp_usrreq.c,v
retrieving revision 1.76
diff -c -r1.76 udp_usrreq.c
*** netinet/udp_usrreq.c 2001/05/08 10:15:14 1.76
--- netinet/udp_usrreq.c 2001/05/18 21:23:45
***************
*** 254,260 ****
* Checksum extended UDP header and data.
*/
if (uh->uh_sum) {
! if (in4_cksum(m, IPPROTO_UDP, iphlen, len) != 0) {
udpstat.udps_badsum++;
m_freem(m);
return;
--- 254,266 ----
* Checksum extended UDP header and data.
*/
if (uh->uh_sum) {
! if (m->m_pkthdr.csuminfo & M_CSUM_UDPv4) {
! if (m->m_pkthdr.csuminfo & M_CSUM_UDPv4_BAD) {
! udpstat.udps_badsum++;
! m_freem(m);
! return;
! }
! } else if (in4_cksum(m, IPPROTO_UDP, iphlen, len) != 0) {
udpstat.udps_badsum++;
m_freem(m);
return;
***************
*** 945,957 ****
* Checksum extended UDP header and data.
*/
if (uh->uh_sum) {
! bzero(((struct ipovly *)ip)->ih_x1,
! sizeof ((struct ipovly *)ip)->ih_x1);
! ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
! if (in_cksum(m, len + sizeof (struct ip)) != 0) {
! udpstat.udps_badsum++;
! m_freem(m);
! return;
}
}
--- 951,971 ----
* Checksum extended UDP header and data.
*/
if (uh->uh_sum) {
! if (m->m_pkthdr.csuminfo & M_CSUM_UDPv4) {
! if (m->m_pkthdr.csuminfo & M_CSUM_UDPv4_BAD) {
! udpstat.udps_badsum++;
! m_freem(m);
! return;
! }
! } else {
! bzero(((struct ipovly *)ip)->ih_x1,
! sizeof ((struct ipovly *)ip)->ih_x1);
! ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
! if (in_cksum(m, len + sizeof (struct ip)) != 0) {
! udpstat.udps_badsum++;
! m_freem(m);
! return;
! }
}
}
Index: dev/pci/if_sip.c
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/pci/if_sip.c,v
retrieving revision 1.30
diff -c -r1.30 if_sip.c
*** dev/pci/if_sip.c 2001/05/18 04:38:30 1.30
--- dev/pci/if_sip.c 2001/05/18 21:23:45
***************
*** 1,3 ****
--- 1,4 ----
+ #define SIP_EVENT_COUNTERS
/* $NetBSD: if_sip.c,v 1.30 2001/05/18 04:38:30 thorpej Exp $ */
/*-
***************
*** 250,255 ****
--- 251,262 ----
struct evcnt sc_ev_txdstall; /* Tx stalled due to no txd */
struct evcnt sc_ev_txintr; /* Tx interrupts */
struct evcnt sc_ev_rxintr; /* Rx interrupts */
+ struct evcnt sc_ev_rxipsum; /* IP checksums checked in-bound */
+ struct evcnt sc_ev_rxtcpsum; /* TCP checksums checked in-bound */
+ struct evcnt sc_ev_rxudpsum; /* UDP checksums checked in-boudn */
+ struct evcnt sc_ev_txipsum; /* IP checksums comp. out-bound */
+ struct evcnt sc_ev_txtcpsum; /* TCP checksums comp. out-bound */
+ struct evcnt sc_ev_txudpsum; /* UDP checksums comp. out-bound */
#endif /* SIP_EVENT_COUNTERS */
u_int32_t sc_txcfg; /* prototype TXCFG register */
***************
*** 792,797 ****
--- 799,811 ----
* And the DP83820 can do VLAN tagging in hardware.
*/
sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_HWTAGGING;
+
+ /*
+ * The DP83820 can do IPv4, TCPv4, and UDPv4 checksums
+ * in hardware.
+ */
+ ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
+ IFCAP_CSUM_UDPv4;
#endif /* DP83820 */
/*
***************
*** 812,817 ****
--- 826,843 ----
NULL, sc->sc_dev.dv_xname, "txintr");
evcnt_attach_dynamic(&sc->sc_ev_rxintr, EVCNT_TYPE_INTR,
NULL, sc->sc_dev.dv_xname, "rxintr");
+ evcnt_attach_dynamic(&sc->sc_ev_rxipsum, EVCNT_TYPE_MISC,
+ NULL, sc->sc_dev.dv_xname, "rxipsum");
+ evcnt_attach_dynamic(&sc->sc_ev_rxtcpsum, EVCNT_TYPE_MISC,
+ NULL, sc->sc_dev.dv_xname, "rxtcpsum");
+ evcnt_attach_dynamic(&sc->sc_ev_rxudpsum, EVCNT_TYPE_MISC,
+ NULL, sc->sc_dev.dv_xname, "rxudpsum");
+ evcnt_attach_dynamic(&sc->sc_ev_txipsum, EVCNT_TYPE_MISC,
+ NULL, sc->sc_dev.dv_xname, "txipsum");
+ evcnt_attach_dynamic(&sc->sc_ev_txtcpsum, EVCNT_TYPE_MISC,
+ NULL, sc->sc_dev.dv_xname, "txtcpsum");
+ evcnt_attach_dynamic(&sc->sc_ev_txudpsum, EVCNT_TYPE_MISC,
+ NULL, sc->sc_dev.dv_xname, "txudpsum");
#endif /* SIP_EVENT_COUNTERS */
/*
***************
*** 1019,1024 ****
--- 1045,1053 ----
/*
* If VLANs are enabled and the packet has a VLAN tag, set
* up the descriptor to encapsulate the packet for us.
+ *
+ * This apparently has to be on the last descriptor of
+ * the packet.
*/
if (sc->sc_ethercom.ec_nvlans != 0 &&
(m = m_aux_find(m0, AF_LINK, ETHERTYPE_VLAN)) != NULL) {
***************
*** 1026,1031 ****
--- 1055,1083 ----
htole32(EXTSTS_VPKT |
htons(*mtod(m, int *) & EXTSTS_VTCI));
}
+
+ /*
+ * If the upper-layer has requested IPv4/TCPv4/UDPv4
+ * checksumming, set up the descriptor to do this work
+ * for us.
+ *
+ * This apparently has to be on the first descriptor of
+ * the packet.
+ */
+ if (m0->m_pkthdr.csuminfo & M_CSUM_IPv4) {
+ u_int32_t extsts = EXTSTS_IPPKT;
+
+ SIP_EVCNT_INCR(&sc->sc_ev_txipsum);
+ if (m0->m_pkthdr.csuminfo & M_CSUM_TCPv4) {
+ SIP_EVCNT_INCR(&sc->sc_ev_txtcpsum);
+ extsts |= EXTSTS_TCPPKT;
+ } else if (m0->m_pkthdr.csuminfo & M_CSUM_UDPv4) {
+ SIP_EVCNT_INCR(&sc->sc_ev_txudpsum);
+ extsts |= EXTSTS_UDPPKT;
+ }
+ sc->sc_txdescs[sc->sc_txnext].sipd_extsts |=
+ htole32(extsts);
+ }
#endif /* DP83820 */
/* Sync the descriptors we're using. */
***************
*** 1547,1552 ****
--- 1599,1628 ----
*mtod(vtag, int *) = ntohs(extsts & EXTSTS_VTCI);
vtag->m_len = sizeof(int);
}
+
+ /*
+ * Set the incoming checksum information for the
+ * packet.
+ */
+ if (extsts & EXTSTS_IPPKT) {
+ SIP_EVCNT_INCR(&sc->sc_ev_rxipsum);
+ m->m_pkthdr.csuminfo |= M_CSUM_IPv4;
+ if (extsts & EXTSTS_Rx_IPERR)
+ m->m_pkthdr.csuminfo |= M_CSUM_IPv4_BAD;
+ if (extsts & EXTSTS_TCPPKT) {
+ SIP_EVCNT_INCR(&sc->sc_ev_rxtcpsum);
+ m->m_pkthdr.csuminfo |= M_CSUM_TCPv4;
+ if (extsts & EXTSTS_Rx_TCPERR)
+ m->m_pkthdr.csuminfo |=
+ M_CSUM_TCPv4_BAD;
+ } else if (extsts & EXTSTS_UDPPKT) {
+ SIP_EVCNT_INCR(&sc->sc_ev_rxtcpsum);
+ m->m_pkthdr.csuminfo |= M_CSUM_UDPv4;
+ if (extsts & EXTSTS_Rx_UDPERR)
+ m->m_pkthdr.csuminfo |=
+ M_CSUM_UDPv4_BAD;
+ }
+ }
#endif /* DP83820 */
/* Pass it on. */
***************
*** 1783,1798 ****
#ifdef DP83820
/*
* Initialize the VLAN/IP receive control register.
*/
! reg = 0;
if (sc->sc_ethercom.ec_nvlans != 0)
reg |= VRCR_VTDEN|VRCR_VTREN;
bus_space_write_4(st, sh, SIP_VRCR, reg);
/*
* Initialize the VLAN/IP transmit control register.
*/
! reg = 0;
if (sc->sc_ethercom.ec_nvlans != 0)
reg |= VTCR_VPPTI;
bus_space_write_4(st, sh, SIP_VTCR, reg);
--- 1859,1878 ----
#ifdef DP83820
/*
* Initialize the VLAN/IP receive control register.
+ * We enable checksum computation on all incoming
+ * packets, and do not reject packets w/ bad checksums.
*/
! reg = VRCR_IPEN;
if (sc->sc_ethercom.ec_nvlans != 0)
reg |= VRCR_VTDEN|VRCR_VTREN;
bus_space_write_4(st, sh, SIP_VRCR, reg);
/*
* Initialize the VLAN/IP transmit control register.
+ * We enable outgoing checksum computation on a
+ * per-packet basis.
*/
! reg = VTCR_PPCHK;
if (sc->sc_ethercom.ec_nvlans != 0)
reg |= VTCR_VPPTI;
bus_space_write_4(st, sh, SIP_VTCR, reg);