Subject: Re: Wart in tcp_output(), IPv6-related?
To: None <itojun@iijlab.net>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-net
Date: 04/26/2002 18:40:09
--d9ADC0YsG2v16Js0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Sat, Apr 27, 2002 at 09:07:30AM +0900, itojun@iijlab.net wrote:

 > 	i am happy to remove "max_linkhdr + hdrlen + len <= MCLBYTES" line.
 > 	if the line gets removed the "always-copy" behavior will go away.

Attached is the patch I will commit.

 > 	thanks for analysis.

Also thank Matt Thomas :-)

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--d9ADC0YsG2v16Js0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=tcp-patch

Index: tcp_output.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_output.c,v
retrieving revision 1.78
diff -u -r1.78 tcp_output.c
--- tcp_output.c	2002/03/01 22:54:09	1.78
+++ tcp_output.c	2002/04/27 01:36:53
@@ -1,3 +1,4 @@
+#define TCP_OUTPUT_COUNTERS
 /*	$NetBSD: tcp_output.c,v 1.78 2002/03/01 22:54:09 thorpej Exp $	*/
 
 /*
@@ -200,6 +201,21 @@
 int	tcp_cwm = 1;
 int	tcp_cwm_burstsize = 4;
 
+#ifdef TCP_OUTPUT_COUNTERS
+#include <sys/device.h>
+
+extern struct evcnt tcp_output_bigheader;
+extern struct evcnt tcp_output_copysmall;
+extern struct evcnt tcp_output_copybig;
+extern struct evcnt tcp_output_refbig;
+
+#define	TCP_OUTPUT_COUNTER_INCR(ev)	(ev)->ev_count++
+#else
+
+#define	TCP_OUTPUT_COUNTER_INCR(ev)	/* nothing */
+
+#endif /* TCP_OUTPUT_COUNTERS */
+
 static
 #ifndef GPROF
 __inline
@@ -384,29 +400,43 @@
 	m->m_data -= hdrlen;
 #else
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
-	if (m != NULL &&
-	    (max_linkhdr + hdrlen > MHLEN ||
-	     max_linkhdr + hdrlen + len <= MCLBYTES)) {
+	if (__predict_false(m == NULL))
+		return (ENOBUFS);
+
+	/*
+	 * XXX Because other code assumes headers will fit in
+	 * XXX one header mbuf.
+	 *
+	 * (This code should almost *never* be run.)
+	 */
+	if (__predict_false((max_linkhdr + hdrlen) > MHLEN)) {
+		TCP_OUTPUT_COUNTER_INCR(&tcp_output_bigheader);
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_freem(m);
-			m = NULL;
+			return (ENOBUFS);
 		}
 	}
-	if (m == NULL)
-		return (ENOBUFS);
+
 	m->m_data += max_linkhdr;
 	m->m_len = hdrlen;
 	if (len <= M_TRAILINGSPACE(m)) {
 		m_copydata(so->so_snd.sb_mb, off, (int) len,
 		    mtod(m, caddr_t) + hdrlen);
 		m->m_len += len;
+		TCP_OUTPUT_COUNTER_INCR(&tcp_output_copysmall);
 	} else {
 		m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
 		if (m->m_next == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
+#ifdef TCP_OUTPUT_COUNTERS
+		if (m->m_next->m_flags & M_EXT)
+			TCP_OUTPUT_COUNTER_INCR(&tcp_output_refbig);
+		else
+			TCP_OUTPUT_COUNTER_INCR(&tcp_output_copybig);
+#endif /* TCP_OUTPUT_COUNTERS */
 	}
 #endif
 
Index: tcp_subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_subr.c,v
retrieving revision 1.124
diff -u -r1.124 tcp_subr.c
--- tcp_subr.c	2002/03/15 09:25:41	1.124
+++ tcp_subr.c	2002/04/27 01:36:54
@@ -1,3 +1,4 @@
+#define TCP_OUTPUT_COUNTERS
 /*	$NetBSD: tcp_subr.c,v 1.124 2002/03/15 09:25:41 itojun Exp $	*/
 
 /*
@@ -227,6 +228,19 @@
     NULL, "tcp", "swcsum");
 #endif /* TCP_CSUM_COUNTERS */
 
+#ifdef TCP_OUTPUT_COUNTERS
+#include <sys/device.h>
+
+struct evcnt tcp_output_bigheader = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+    NULL, "tcp", "output big header");
+struct evcnt tcp_output_copysmall = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+    NULL, "tcp", "output copy small");
+struct evcnt tcp_output_copybig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+    NULL, "tcp", "output copy big");
+struct evcnt tcp_output_refbig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+    NULL, "tcp", "output reference big");
+#endif /* TCP_OUTPUT_COUNTERS */
+
 /*
  * Tcp initialization
  */
@@ -271,6 +285,13 @@
 	evcnt_attach_static(&tcp_hwcsum_data);
 	evcnt_attach_static(&tcp_swcsum);
 #endif /* TCP_CSUM_COUNTERS */
+
+#ifdef TCP_OUTPUT_COUNTERS
+	evcnt_attach_static(&tcp_output_bigheader);
+	evcnt_attach_static(&tcp_output_copysmall);
+	evcnt_attach_static(&tcp_output_copybig);
+	evcnt_attach_static(&tcp_output_refbig);
+#endif /* TCP_OUTPUT_COUNTERS */
 }
 
 /*

--d9ADC0YsG2v16Js0--