Subject: Socket buffer optimization tweak
To: None <tech-net@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-net
Date: 10/18/2001 09:22:59
--IpbVkmxF4tDyP/Kb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Folks...

One thing that has turned up in my profiling of our network stack
is that an awful lot of time is spend in sbappend().  The reason
for this is obvious when you take a look at what that routine does --
it searches for the last mbuf in the socket buffer *each time it is
called* and then appends the new mbuf chain to it by calling sbcompress().

The obvious solution here is to add a tail pointer to the socket buffer
and use it when you can.

I have done this, and added an sbappend_simple() that can be used for
the simple case of stream data (e.g. TCP).  I have not yet tried to
use the tail pointer in the pre-existing sbappend*() functions, since
I need to get my head wrapped around the intracacies of how each one
works (they're full of special cases for end-of-record and whatnot,
and I'd prefer not to break them because I missed something subtle :-)

The patch appended does indeed work for TCP -- I have not yet done a
profile with the change, but will be doing so shortly.  I would like
to get some feedback on this now, however, and would like to have
the changes reviewed for mistakes.

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--IpbVkmxF4tDyP/Kb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=foo

Index: kern/uipc_socket.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_socket.c,v
retrieving revision 1.58
diff -c -r1.58 uipc_socket.c
*** kern/uipc_socket.c	2001/09/29 14:16:19	1.58
--- kern/uipc_socket.c	2001/10/18 16:04:55
***************
*** 664,674 ****
  			sbfree(&so->so_rcv, m);
  			if (paddr) {
  				*paddr = m;
! 				so->so_rcv.sb_mb = m->m_next;
  				m->m_next = 0;
  				m = so->so_rcv.sb_mb;
  			} else {
  				MFREE(m, so->so_rcv.sb_mb);
  				m = so->so_rcv.sb_mb;
  			}
  		}
--- 664,677 ----
  			sbfree(&so->so_rcv, m);
  			if (paddr) {
  				*paddr = m;
! 				if ((so->so_rcv.sb_mb = m->m_next) == NULL)
! 					so->so_rcv.sb_mbtail = NULL;
  				m->m_next = 0;
  				m = so->so_rcv.sb_mb;
  			} else {
  				MFREE(m, so->so_rcv.sb_mb);
+ 				if (so->so_rcv.sb_mb == NULL)
+ 					so->so_rcv.sb_mbtail = NULL;
  				m = so->so_rcv.sb_mb;
  			}
  		}
***************
*** 686,696 ****
  				    SCM_RIGHTS)
  					error = (*pr->pr_domain->dom_externalize)(m);
  				*controlp = m;
! 				so->so_rcv.sb_mb = m->m_next;
  				m->m_next = 0;
  				m = so->so_rcv.sb_mb;
  			} else {
  				MFREE(m, so->so_rcv.sb_mb);
  				m = so->so_rcv.sb_mb;
  			}
  		}
--- 689,702 ----
  				    SCM_RIGHTS)
  					error = (*pr->pr_domain->dom_externalize)(m);
  				*controlp = m;
! 				if ((so->so_rcv.sb_mb = m->m_next) == NULL)
! 					so->so_rcv.sb_mbtail = NULL;
  				m->m_next = 0;
  				m = so->so_rcv.sb_mb;
  			} else {
  				MFREE(m, so->so_rcv.sb_mb);
+ 				if (so->so_rcv.sb_mb == NULL)
+ 					so->so_rcv.sb_mbtail = NULL;
  				m = so->so_rcv.sb_mb;
  			}
  		}
***************
*** 752,761 ****
  				if (mp) {
  					*mp = m;
  					mp = &m->m_next;
! 					so->so_rcv.sb_mb = m = m->m_next;
  					*mp = (struct mbuf *)0;
  				} else {
  					MFREE(m, so->so_rcv.sb_mb);
  					m = so->so_rcv.sb_mb;
  				}
  				if (m)
--- 758,771 ----
  				if (mp) {
  					*mp = m;
  					mp = &m->m_next;
! 					if ((so->so_rcv.sb_mb = m = m->m_next)
! 					    == NULL)
! 						so->so_rcv.sb_mbtail = NULL;
  					*mp = (struct mbuf *)0;
  				} else {
  					MFREE(m, so->so_rcv.sb_mb);
+ 					if (so->so_rcv.sb_mb == NULL)
+ 						so->so_rcv.sb_mbtail = NULL;
  					m = so->so_rcv.sb_mb;
  				}
  				if (m)
***************
*** 815,822 ****
  			(void) sbdroprecord(&so->so_rcv);
  	}
  	if ((flags & MSG_PEEK) == 0) {
! 		if (m == 0)
! 			so->so_rcv.sb_mb = nextrecord;
  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
  			    (struct mbuf *)(long)flags, (struct mbuf *)0,
--- 825,834 ----
  			(void) sbdroprecord(&so->so_rcv);
  	}
  	if ((flags & MSG_PEEK) == 0) {
! 		if (m == 0) {
! 			if ((so->so_rcv.sb_mb = nextrecord) == NULL)
! 				so->so_rcv.sb_mbtail = NULL;
! 		}
  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
  			    (struct mbuf *)(long)flags, (struct mbuf *)0,
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_socket2.c,v
retrieving revision 1.41
diff -c -r1.41 uipc_socket2.c
*** kern/uipc_socket2.c	2001/08/05 08:25:39	1.41
--- kern/uipc_socket2.c	2001/10/18 16:04:56
***************
*** 448,453 ****
--- 448,460 ----
  	sbcompress(sb, m, n);
  }
  
+ void
+ sbappend_simple(struct sockbuf *sb, struct mbuf *m)
+ {
+ 
+ 	sbcompress(sb, m, sb->sb_mbtail);
+ }
+ 
  #ifdef SOCKBUF_DEBUG
  void
  sbcheck(struct sockbuf *sb)
***************
*** 597,602 ****
--- 604,610 ----
  		n->m_nextpkt = m;
  	} else
  		sb->sb_mb = m;
+ 	sb->sb_mbtail = m;
  	return (1);
  }
  
***************
*** 628,633 ****
--- 636,642 ----
  		n->m_nextpkt = control;
  	} else
  		sb->sb_mb = control;
+ 	sb->sb_mbtail = control;
  	return (1);
  }
  
***************
*** 668,673 ****
--- 677,683 ----
  			n->m_next = m;
  		else
  			sb->sb_mb = m;
+ 		sb->sb_mbtail = m;
  		sballoc(sb, m);
  		n = m;
  		m->m_flags &= ~M_EOR;
***************
*** 736,741 ****
--- 746,753 ----
  		m->m_nextpkt = next;
  	} else
  		sb->sb_mb = next;
+ 	if (sb->sb_mb == NULL)
+ 		sb->sb_mbtail = NULL;
  }
  
  /*
***************
*** 755,760 ****
--- 767,774 ----
  			MFREE(m, mn);
  		} while ((m = mn) != NULL);
  	}
+ 	if (sb->sb_mb == NULL)
+ 		sb->sb_mbtail = NULL;
  }
  
  /*
Index: netccitt/if_x25subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/if_x25subr.c,v
retrieving revision 1.25
diff -c -r1.25 if_x25subr.c
*** netccitt/if_x25subr.c	2001/06/14 05:44:26	1.25
--- netccitt/if_x25subr.c	2001/10/18 16:05:04
***************
*** 767,773 ****
  	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
  #define transfer_sockbuf(s, f, l) \
  	while ((m = (s)->sb_mb) != NULL) \
! 		{(s)->sb_mb = m->m_act; m->m_act = 0; sbfree((s), m); f;}
  
  	if (rt)
  		rt->rt_refcnt--;
--- 767,779 ----
  	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
  #define transfer_sockbuf(s, f, l) \
  	while ((m = (s)->sb_mb) != NULL) \
! 		{ \
! 			if (((s)->sb_mb = m->m_act) == NULL) \
! 				(s)->sb_mbtail = NULL; \
! 			m->m_act = 0; \
! 			sbfree((s), m); \
! 			f; \
! 		}
  
  	if (rt)
  		rt->rt_refcnt--;
Index: netccitt/pk_output.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/pk_output.c,v
retrieving revision 1.13
diff -c -r1.13 pk_output.c
*** netccitt/pk_output.c	2000/03/30 13:53:36	1.13
--- netccitt/pk_output.c	2001/10/18 16:05:04
***************
*** 210,216 ****
  		if ((m = sb->sb_mb) == 0)
  			return (NULL);
  
! 		sb->sb_mb = m->m_nextpkt;
  		m->m_act = 0;
  		for (n = m; n; n = n->m_next)
  			sbfree(sb, n);
--- 210,217 ----
  		if ((m = sb->sb_mb) == 0)
  			return (NULL);
  
! 		if ((sb->sb_mb = m->m_nextpkt) == NULL)
! 			sb->sb_mbtail = NULL;
  		m->m_act = 0;
  		for (n = m; n; n = n->m_next)
  			sbfree(sb, n);
Index: netccitt/pk_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/pk_usrreq.c,v
retrieving revision 1.20
diff -c -r1.20 pk_usrreq.c
*** netccitt/pk_usrreq.c	2001/04/13 23:30:21	1.20
--- netccitt/pk_usrreq.c	2001/10/18 16:05:05
***************
*** 264,270 ****
  			struct mbuf *n = so->so_rcv.sb_mb;
  			if (n && n->m_type == MT_OOBDATA) {
  				unsigned        len = n->m_pkthdr.len;
! 				so->so_rcv.sb_mb = n->m_nextpkt;
  				if (len != n->m_len &&
  				    (n = m_pullup(n, len)) == 0)
  					break;
--- 264,271 ----
  			struct mbuf *n = so->so_rcv.sb_mb;
  			if (n && n->m_type == MT_OOBDATA) {
  				unsigned        len = n->m_pkthdr.len;
! 				if ((so->so_rcv.sb_mb = n->m_nextpkt) == NULL)
! 					so->so_rcv.sb_mbtail = NULL;
  				if (len != n->m_len &&
  				    (n = m_pullup(n, len)) == 0)
  					break;
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_input.c,v
retrieving revision 1.131
diff -c -r1.131 tcp_input.c
*** netinet/tcp_input.c	2001/09/17 17:27:00	1.131
--- netinet/tcp_input.c	2001/10/18 16:05:07
***************
*** 525,531 ****
  	if (so->so_state & SS_CANTRCVMORE)
  		m_freem(q->ipqe_m);
  	else
! 		sbappend(&so->so_rcv, q->ipqe_m);
  	pool_put(&ipqent_pool, q);
  	sorwakeup(so);
  	return (pkt_flags);
--- 525,531 ----
  	if (so->so_state & SS_CANTRCVMORE)
  		m_freem(q->ipqe_m);
  	else
! 		sbappend_simple(&so->so_rcv, q->ipqe_m);
  	pool_put(&ipqent_pool, q);
  	sorwakeup(so);
  	return (pkt_flags);
***************
*** 1365,1371 ****
  			 * to socket buffer.
  			 */
  			m_adj(m, toff + off);
! 			sbappend(&so->so_rcv, m);
  			sorwakeup(so);
  			TCP_SETUP_ACK(tp, th);
  			if (tp->t_flags & TF_ACKNOW)
--- 1365,1371 ----
  			 * to socket buffer.
  			 */
  			m_adj(m, toff + off);
! 			sbappend_simple(&so->so_rcv, m);
  			sorwakeup(so);
  			TCP_SETUP_ACK(tp, th);
  			if (tp->t_flags & TF_ACKNOW)
***************
*** 2084,2090 ****
  			tcpstat.tcps_rcvbyte += tlen;
  			ND6_HINT(tp);
  			m_adj(m, hdroptlen);
! 			sbappend(&(so)->so_rcv, m);
  			sorwakeup(so);
  		} else {
  			m_adj(m, hdroptlen);
--- 2084,2090 ----
  			tcpstat.tcps_rcvbyte += tlen;
  			ND6_HINT(tp);
  			m_adj(m, hdroptlen);
! 			sbappend_simple(&(so)->so_rcv, m);
  			sorwakeup(so);
  		} else {
  			m_adj(m, hdroptlen);
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.65
diff -c -r1.65 tcp_usrreq.c
*** netinet/tcp_usrreq.c	2001/09/10 20:15:14	1.65
--- netinet/tcp_usrreq.c	2001/10/18 16:05:08
***************
*** 509,515 ****
  			error = EINVAL;
  			break;
  		}
! 		sbappend(&so->so_snd, m);
  		error = tcp_output(tp);
  		break;
  
--- 509,515 ----
  			error = EINVAL;
  			break;
  		}
! 		sbappend_simple(&so->so_snd, m);
  		error = tcp_output(tp);
  		break;
  
***************
*** 565,571 ****
  		 * of data past the urgent section.
  		 * Otherwise, snd_up should be one lower.
  		 */
! 		sbappend(&so->so_snd, m);
  		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  		tp->t_force = 1;
  		error = tcp_output(tp);
--- 565,571 ----
  		 * of data past the urgent section.
  		 * Otherwise, snd_up should be one lower.
  		 */
! 		sbappend_simple(&so->so_snd, m);
  		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  		tp->t_force = 1;
  		error = tcp_output(tp);
Index: sys/socketvar.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/socketvar.h,v
retrieving revision 1.50
diff -c -r1.50 socketvar.h
*** sys/socketvar.h	2001/09/28 18:02:45	1.50
--- sys/socketvar.h	2001/10/18 16:05:09
***************
*** 93,98 ****
--- 93,99 ----
  		u_long	sb_mbmax;	/* max chars of mbufs to use */
  		long	sb_lowat;	/* low water mark */
  		struct mbuf *sb_mb;	/* the mbuf chain */
+ 		struct mbuf *sb_mbtail;	/* the last mbuf in the chain */
  		struct selinfo sb_sel;	/* process selecting read/write */
  		short	sb_flags;	/* flags, see below */
  		short	sb_timeo;	/* timeout for read/write */
***************
*** 265,270 ****
--- 266,272 ----
  	    struct mbuf *, struct mbuf *, struct proc *);
  int	uipc_ctloutput(int, struct socket *, int, int, struct mbuf **);
  void	sbappend(struct sockbuf *sb, struct mbuf *m);
+ void	sbappend_simple(struct sockbuf *sb, struct mbuf *m);
  int	sbappendaddr(struct sockbuf *sb, struct sockaddr *asa,
  	    struct mbuf *m0, struct mbuf *control);
  int	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,

--IpbVkmxF4tDyP/Kb--