Subject: Re: Patch to make upper-layer network protocols align as needed
To: Jonathan Stone <jonathan@DSG.Stanford.EDU>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-net
Date: 06/30/2002 15:22:46
--azLHFNyN32YCQGCU
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Mon, May 13, 2002 at 12:13:23PM -0700, Jonathan Stone wrote:
> The idea is excellent. The name, however, er, sucks :).
> It doesn't fit the style of the existing mbuf code at all.
> (to me sounds rather Linux-like).
> One alternative that (imho) fits a lot better is m_copyup().
Ok, I have renamed it to m_copyup().
I am committing the following diff. The new code path will not be
used unless individual device drivers stop doing the alignment fixup
themselves, so the change is effectively a noop until those device
drivers are changed.
This change was good for an additional 30Mb/s or so on a DP83820 Gig-E
interface in a 533MHz Alpha.
--
-- Jason R. Thorpe <thorpej@wasabisystems.com>
--azLHFNyN32YCQGCU
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=copyup-patch
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_mbuf.c,v
retrieving revision 1.59
diff -c -r1.59 uipc_mbuf.c
*** kern/uipc_mbuf.c 2002/03/09 01:46:33 1.59
--- kern/uipc_mbuf.c 2002/06/30 22:13:28
***************
*** 729,734 ****
--- 729,784 ----
}
/*
+ * Like m_pullup(), except a new mbuf is always allocated, and we allow
+ * the amount of empty space before the data in the new mbuf to be specified
+ * (in the event that the caller expects to prepend later).
+ */
+ int MSFail;
+
+ struct mbuf *
+ m_copyup(struct mbuf *n, int len, int dstoff)
+ {
+ struct mbuf *m;
+ int count, space;
+
+ if (len > (MHLEN - dstoff))
+ goto bad;
+ MGET(m, M_DONTWAIT, n->m_type);
+ if (m == NULL)
+ goto bad;
+ m->m_len = 0;
+ if (n->m_flags & M_PKTHDR) {
+ M_COPY_PKTHDR(m, n);
+ n->m_flags &= ~M_PKTHDR;
+ }
+ m->m_data += dstoff;
+ space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
+ do {
+ count = min(min(max(len, max_protohdr), space), n->m_len);
+ memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
+ (unsigned)count);
+ len -= count;
+ m->m_len += count;
+ n->m_len -= count;
+ space -= count;
+ if (n->m_len)
+ n->m_data += count;
+ else
+ n = m_free(n);
+ } while (len > 0 && n);
+ if (len > 0) {
+ (void) m_free(m);
+ goto bad;
+ }
+ m->m_next = n;
+ return (m);
+ bad:
+ m_freem(n);
+ MSFail++;
+ return (NULL);
+ }
+
+ /*
* Partition an mbuf chain in two pieces, returning the tail --
* all but the first len0 bytes. In case of failure, it returns NULL and
* attempts to restore the chain to its original state.
Index: netinet/icmp_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/icmp_var.h,v
retrieving revision 1.20
diff -c -r1.20 icmp_var.h
*** netinet/icmp_var.h 2002/06/09 16:33:37 1.20
--- netinet/icmp_var.h 2002/06/30 22:13:28
***************
*** 83,88 ****
--- 83,94 ----
#ifdef _KERNEL
struct icmpstat icmpstat;
+
+ #ifdef __NO_STRICT_ALIGNMENT
+ #define ICMP_HDR_ALIGNED_P(ic) 1
+ #else
+ #define ICMP_HDR_ALIGNED_P(ic) ((((vaddr_t) (ic)) & 3) == 0)
#endif
+ #endif /* _KERNEL_ */
#endif /* _NETINET_ICMP_VAR_H_ */
Index: netinet/igmp.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/igmp.c,v
retrieving revision 1.29
diff -c -r1.29 igmp.c
*** netinet/igmp.c 2002/06/09 16:33:37 1.29
--- netinet/igmp.c 2002/06/30 22:13:29
***************
*** 180,185 ****
--- 180,186 ----
m->m_data += iphlen;
m->m_len -= iphlen;
igmp = mtod(m, struct igmp *);
+ /* No need to assert alignment here. */
if (in_cksum(m, ip->ip_len - iphlen)) {
++igmpstat.igps_rcv_badsum;
m_freem(m);
Index: netinet/igmp_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/igmp_var.h,v
retrieving revision 1.13
diff -c -r1.13 igmp_var.h
*** netinet/igmp_var.h 2002/05/29 01:33:45 1.13
--- netinet/igmp_var.h 2002/06/30 22:13:29
***************
*** 75,80 ****
--- 75,86 ----
*/
#define IGMP_RANDOM_DELAY(X) (arc4random() % (X) + 1)
+ #ifdef __NO_STRICT_ALIGNMENT
+ #define IGMP_HDR_ALIGNED_P(ig) 1
+ #else
+ #define IGMP_HDR_ALIGNED_P(ig) ((((vaddr_t) (ig)) & 3) == 0)
+ #endif
+
void igmp_init __P((void));
void igmp_input __P((struct mbuf *, ...));
void igmp_joingroup __P((struct in_multi *));
Index: netinet/ip_flow.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_flow.c,v
retrieving revision 1.24
diff -c -r1.24 ip_flow.c
*** netinet/ip_flow.c 2002/06/09 16:33:40 1.24
--- netinet/ip_flow.c 2002/06/30 22:13:30
***************
*** 143,149 ****
ipflow_fastforward(
struct mbuf *m)
{
! struct ip *ip;
struct ipflow *ipf;
struct rtentry *rt;
struct sockaddr *dst;
--- 143,149 ----
ipflow_fastforward(
struct mbuf *m)
{
! struct ip *ip, ip_store;
struct ipflow *ipf;
struct rtentry *rt;
struct sockaddr *dst;
***************
*** 166,172 ****
/*
* IP header with no option and valid version and length
*/
! ip = mtod(m, struct ip *);
iplen = ntohs(ip->ip_len);
if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) ||
iplen < sizeof(struct ip) || iplen > m->m_pkthdr.len)
--- 166,177 ----
/*
* IP header with no option and valid version and length
*/
! if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)))
! ip = mtod(m, struct ip *);
! else {
! memcpy(&ip_store, mtod(m, caddr_t), sizeof(ip_store));
! ip = &ip_store;
! }
iplen = ntohs(ip->ip_len);
if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) ||
iplen < sizeof(struct ip) || iplen > m->m_pkthdr.len)
***************
*** 232,237 ****
--- 237,248 ----
ip->ip_sum -= ~htons(IPTTLDEC << 8);
else
ip->ip_sum += htons(IPTTLDEC << 8);
+
+ /*
+ * Done modifying the header; copy it back, if necessary.
+ */
+ if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0)
+ memcpy(mtod(m, caddr_t), &ip_store, sizeof(ip_store));
/*
* Trim the packet in case it's too long..
Index: netinet/ip_icmp.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_icmp.c,v
retrieving revision 1.68
diff -c -r1.68 ip_icmp.c
*** netinet/ip_icmp.c 2002/06/13 16:25:54 1.68
--- netinet/ip_icmp.c 2002/06/30 22:13:30
***************
*** 419,424 ****
--- 419,425 ----
m->m_len -= hlen;
m->m_data += hlen;
icp = mtod(m, struct icmp *);
+ /* Don't need to assert alignment, here. */
if (in_cksum(m, icmplen)) {
icmpstat.icps_checksum++;
goto freeit;
Index: netinet/ip_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_input.c,v
retrieving revision 1.153
diff -c -r1.153 ip_input.c
*** netinet/ip_input.c 2002/06/13 16:25:54 1.153
--- netinet/ip_input.c 2002/06/30 22:13:32
***************
*** 420,429 ****
if (TAILQ_FIRST(&in_ifaddr) == 0)
goto bad;
ipstat.ips_total++;
! if (m->m_len < sizeof (struct ip) &&
! (m = m_pullup(m, sizeof (struct ip))) == 0) {
! ipstat.ips_toosmall++;
! return;
}
ip = mtod(m, struct ip *);
if (ip->ip_v != IPVERSION) {
--- 420,443 ----
if (TAILQ_FIRST(&in_ifaddr) == 0)
goto bad;
ipstat.ips_total++;
! /*
! * If the IP header is not aligned, slurp it up into a new
! * mbuf with space for link headers, in the event we forward
! * it. Otherwise, if it is aligned, make sure the entire
! * base IP header is in the first mbuf of the chain.
! */
! if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
! if ((m = m_copyup(m, sizeof(struct ip),
! (max_linkhdr + 3) & ~3)) == NULL) {
! /* XXXJRT new stat, please */
! ipstat.ips_toosmall++;
! return;
! }
! } else if (__predict_false(m->m_len < sizeof (struct ip))) {
! if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
! ipstat.ips_toosmall++;
! return;
! }
}
ip = mtod(m, struct ip *);
if (ip->ip_v != IPVERSION) {
Index: netinet/ip_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/ip_var.h,v
retrieving revision 1.47
diff -c -r1.47 ip_var.h
*** netinet/ip_var.h 2002/05/07 02:59:38 1.47
--- netinet/ip_var.h 2002/06/30 22:13:32
***************
*** 183,188 ****
--- 183,194 ----
#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */
#define IP_MTUDISC 0x0400 /* Path MTU Discovery; set DF */
+ #ifdef __NO_STRICT_ALIGNMENT
+ #define IP_HDR_ALIGNED_P(ip) 1
+ #else
+ #define IP_HDR_ALIGNED_P(ip) ((((vaddr_t) (ip)) & 3) == 0)
+ #endif
+
extern struct ipstat ipstat; /* ip statistics */
extern LIST_HEAD(ipqhead, ipq) ipq; /* ip reass. queue */
extern u_int16_t ip_id; /* ip packet ctr, for ids */
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_input.c,v
retrieving revision 1.145
diff -c -r1.145 tcp_input.c
*** netinet/tcp_input.c 2002/06/29 04:13:21 1.145
--- netinet/tcp_input.c 2002/06/30 22:13:36
***************
*** 921,926 ****
--- 921,928 ----
return;
}
+ KASSERT(TCP_HDR_ALIGNED_P(th));
+
/*
* Check that TCP offset makes sense,
* pull out TCP options and adjust length. XXX
***************
*** 971,976 ****
--- 973,979 ----
* (as they're before toff) and we don't need to update those.
*/
#endif
+ KASSERT(TCP_HDR_ALIGNED_P(th));
optlen = off - sizeof (struct tcphdr);
optp = ((caddr_t)th) + sizeof(struct tcphdr);
/*
Index: netinet/tcp_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_var.h,v
retrieving revision 1.92
diff -c -r1.92 tcp_var.h
*** netinet/tcp_var.h 2002/06/09 16:33:45 1.92
--- netinet/tcp_var.h 2002/06/30 22:13:40
***************
*** 679,684 ****
--- 679,690 ----
{ 1, 0, &tcp_delack_ticks }, \
}
+ #ifdef __NO_STRICT_ALIGNMENT
+ #define TCP_HDR_ALIGNED_P(th) 1
+ #else
+ #define TCP_HDR_ALIGNED_P(th) ((((vaddr_t) (th)) & 3) == 0)
+ #endif
+
int tcp_attach __P((struct socket *));
void tcp_canceltimers __P((struct tcpcb *));
struct tcpcb *
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/udp_usrreq.c,v
retrieving revision 1.94
diff -c -r1.94 udp_usrreq.c
*** netinet/udp_usrreq.c 2002/06/09 16:33:45 1.94
--- netinet/udp_usrreq.c 2002/06/30 22:13:42
***************
*** 262,267 ****
--- 262,268 ----
return;
}
#endif
+ KASSERT(UDP_HDR_ALIGNED_P(uh));
/* destination port of 0 is illegal, based on RFC768. */
if (uh->uh_dport == 0)
***************
*** 418,423 ****
--- 419,425 ----
return IPPROTO_DONE;
}
#endif
+ KASSERT(UDP_HDR_ALIGNED_P(uh));
ulen = ntohs((u_short)uh->uh_ulen);
/*
* RFC2675 section 4: jumbograms will have 0 in the UDP header field,
Index: netinet/udp_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/udp_var.h,v
retrieving revision 1.18
diff -c -r1.18 udp_var.h
*** netinet/udp_var.h 2002/05/12 20:33:51 1.18
--- netinet/udp_var.h 2002/06/30 22:13:42
***************
*** 88,93 ****
--- 88,99 ----
extern struct inpcbtable udbtable;
extern struct udpstat udpstat;
+ #ifdef __NO_STRICT_ALIGNMENT
+ #define UDP_HDR_ALIGNED_P(uh) 1
+ #else
+ #define UDP_HDR_ALIGNED_P(uh) ((((vaddr_t) (uh)) & 3) == 0)
+ #endif
+
void *udp_ctlinput __P((int, struct sockaddr *, void *));
void udp_init __P((void));
void udp_input __P((struct mbuf *, ...));
Index: netinet6/icmp6.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet6/icmp6.c,v
retrieving revision 1.82
diff -c -r1.82 icmp6.c
*** netinet6/icmp6.c 2002/06/09 14:43:11 1.82
--- netinet6/icmp6.c 2002/06/30 22:13:45
***************
*** 469,474 ****
--- 469,475 ----
return IPPROTO_DONE;
}
#endif
+ KASSERT(IP6_HDR_ALIGNED_P(icmp6));
code = icmp6->icmp6_code;
if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
Index: netinet6/ip6_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet6/ip6_input.c,v
retrieving revision 1.56
diff -c -r1.56 ip6_input.c
*** netinet6/ip6_input.c 2002/06/09 14:43:12 1.56
--- netinet6/ip6_input.c 2002/06/30 22:13:47
***************
*** 269,277 ****
IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /*nothing*/);
#endif
! if (m->m_len < sizeof(struct ip6_hdr)) {
! struct ifnet *inifp;
! inifp = m->m_pkthdr.rcvif;
if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
ip6stat.ip6s_toosmall++;
in6_ifstat_inc(inifp, ifs6_in_hdrerr);
--- 269,291 ----
IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /*nothing*/);
#endif
! /*
! * If the IPv6 header is not aligned, slurp it up into a new
! * mbuf with space for link headers, in the event we forward
! * it. OTherwise, if it is aligned, make sure the entire base
! * IPv6 header is in the first mbuf of the chain.
! */
! if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
! struct ifnet *inifp = m->m_pkthdr.rcvif;
! if ((m = m_copyup(m, sizeof(struct ip6_hdr),
! (max_linkhdr + 3) & ~3)) == NULL) {
! /* XXXJRT new stat, please */
! ip6stat.ip6s_toosmall++;
! in6_ifstat_inc(inifp, ifs6_in_hdrerr);
! return;
! }
! } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
! struct ifnet *inifp = m->m_pkthdr.rcvif;
if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
ip6stat.ip6s_toosmall++;
in6_ifstat_inc(inifp, ifs6_in_hdrerr);
***************
*** 625,630 ****
--- 639,645 ----
return;
}
#endif
+ KASSERT(IP6_HDR_ALIGNED_P(hbh));
nxt = hbh->ip6h_nxt;
/*
***************
*** 789,794 ****
--- 804,810 ----
return -1;
}
#endif
+ KASSERT(IP6_HDR_ALIGNED_P(hbh));
off += hbhlen;
hbhlen -= sizeof(struct ip6_hbh);
opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
***************
*** 1150,1155 ****
--- 1166,1172 ----
return;
}
#endif
+ KASSERT(IP6_HDR_ALIGNED_P(ip6e));
switch (nxt) {
case IPPROTO_DSTOPTS:
Index: netinet6/ip6_var.h
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet6/ip6_var.h,v
retrieving revision 1.21
diff -c -r1.21 ip6_var.h
*** netinet6/ip6_var.h 2002/06/08 21:22:33 1.21
--- netinet6/ip6_var.h 2002/06/30 22:13:47
***************
*** 206,211 ****
--- 206,217 ----
#define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */
#define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */
+ #ifdef __NO_STRICT_ALIGNMENT
+ #define IP6_HDR_ALIGNED_P(ip) 1
+ #else
+ #define IP6_HDR_ALIGNED_P(ip) ((((vaddr_t) (ip)) & 3) == 0)
+ #endif
+
extern struct ip6stat ip6stat; /* statistics */
extern u_int32_t ip6_id; /* fragment identifier */
extern int ip6_defhlim; /* default hop limit */
Index: sys/mbuf.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/mbuf.h,v
retrieving revision 1.66
diff -c -r1.66 mbuf.h
*** sys/mbuf.h 2002/06/22 05:37:01 1.66
--- sys/mbuf.h 2002/06/30 22:13:49
***************
*** 651,656 ****
--- 651,657 ----
struct mbuf *m_prepend __P((struct mbuf *,int,int));
struct mbuf *m_pulldown __P((struct mbuf *, int, int, int *));
struct mbuf *m_pullup __P((struct mbuf *, int));
+ struct mbuf *m_copyup __P((struct mbuf *, int, int));
struct mbuf *m_split __P((struct mbuf *,int,int));
void m_adj __P((struct mbuf *, int));
void m_cat __P((struct mbuf *,struct mbuf *));
--azLHFNyN32YCQGCU--