Subject: Socket buffer optimization tweak
To: None <tech-net@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-net
Date: 10/18/2001 09:22:59
--IpbVkmxF4tDyP/Kb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Folks...
One thing that has turned up in my profiling of our network stack
is that an awful lot of time is spend in sbappend(). The reason
for this is obvious when you take a look at what that routine does --
it searches for the last mbuf in the socket buffer *each time it is
called* and then appends the new mbuf chain to it by calling sbcompress().
The obvious solution here is to add a tail pointer to the socket buffer
and use it when you can.
I have done this, and added an sbappend_simple() that can be used for
the simple case of stream data (e.g. TCP). I have not yet tried to
use the tail pointer in the pre-existing sbappend*() functions, since
I need to get my head wrapped around the intracacies of how each one
works (they're full of special cases for end-of-record and whatnot,
and I'd prefer not to break them because I missed something subtle :-)
The patch appended does indeed work for TCP -- I have not yet done a
profile with the change, but will be doing so shortly. I would like
to get some feedback on this now, however, and would like to have
the changes reviewed for mistakes.
--
-- Jason R. Thorpe <thorpej@wasabisystems.com>
--IpbVkmxF4tDyP/Kb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=foo
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_socket.c,v
retrieving revision 1.58
diff -c -r1.58 uipc_socket.c
*** kern/uipc_socket.c 2001/09/29 14:16:19 1.58
--- kern/uipc_socket.c 2001/10/18 16:04:55
***************
*** 664,674 ****
sbfree(&so->so_rcv, m);
if (paddr) {
*paddr = m;
! so->so_rcv.sb_mb = m->m_next;
m->m_next = 0;
m = so->so_rcv.sb_mb;
} else {
MFREE(m, so->so_rcv.sb_mb);
m = so->so_rcv.sb_mb;
}
}
--- 664,677 ----
sbfree(&so->so_rcv, m);
if (paddr) {
*paddr = m;
! if ((so->so_rcv.sb_mb = m->m_next) == NULL)
! so->so_rcv.sb_mbtail = NULL;
m->m_next = 0;
m = so->so_rcv.sb_mb;
} else {
MFREE(m, so->so_rcv.sb_mb);
+ if (so->so_rcv.sb_mb == NULL)
+ so->so_rcv.sb_mbtail = NULL;
m = so->so_rcv.sb_mb;
}
}
***************
*** 686,696 ****
SCM_RIGHTS)
error = (*pr->pr_domain->dom_externalize)(m);
*controlp = m;
! so->so_rcv.sb_mb = m->m_next;
m->m_next = 0;
m = so->so_rcv.sb_mb;
} else {
MFREE(m, so->so_rcv.sb_mb);
m = so->so_rcv.sb_mb;
}
}
--- 689,702 ----
SCM_RIGHTS)
error = (*pr->pr_domain->dom_externalize)(m);
*controlp = m;
! if ((so->so_rcv.sb_mb = m->m_next) == NULL)
! so->so_rcv.sb_mbtail = NULL;
m->m_next = 0;
m = so->so_rcv.sb_mb;
} else {
MFREE(m, so->so_rcv.sb_mb);
+ if (so->so_rcv.sb_mb == NULL)
+ so->so_rcv.sb_mbtail = NULL;
m = so->so_rcv.sb_mb;
}
}
***************
*** 752,761 ****
if (mp) {
*mp = m;
mp = &m->m_next;
! so->so_rcv.sb_mb = m = m->m_next;
*mp = (struct mbuf *)0;
} else {
MFREE(m, so->so_rcv.sb_mb);
m = so->so_rcv.sb_mb;
}
if (m)
--- 758,771 ----
if (mp) {
*mp = m;
mp = &m->m_next;
! if ((so->so_rcv.sb_mb = m = m->m_next)
! == NULL)
! so->so_rcv.sb_mbtail = NULL;
*mp = (struct mbuf *)0;
} else {
MFREE(m, so->so_rcv.sb_mb);
+ if (so->so_rcv.sb_mb == NULL)
+ so->so_rcv.sb_mbtail = NULL;
m = so->so_rcv.sb_mb;
}
if (m)
***************
*** 815,822 ****
(void) sbdroprecord(&so->so_rcv);
}
if ((flags & MSG_PEEK) == 0) {
! if (m == 0)
! so->so_rcv.sb_mb = nextrecord;
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
(struct mbuf *)(long)flags, (struct mbuf *)0,
--- 825,834 ----
(void) sbdroprecord(&so->so_rcv);
}
if ((flags & MSG_PEEK) == 0) {
! if (m == 0) {
! if ((so->so_rcv.sb_mb = nextrecord) == NULL)
! so->so_rcv.sb_mbtail = NULL;
! }
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
(struct mbuf *)(long)flags, (struct mbuf *)0,
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_socket2.c,v
retrieving revision 1.41
diff -c -r1.41 uipc_socket2.c
*** kern/uipc_socket2.c 2001/08/05 08:25:39 1.41
--- kern/uipc_socket2.c 2001/10/18 16:04:56
***************
*** 448,453 ****
--- 448,460 ----
sbcompress(sb, m, n);
}
+ void
+ sbappend_simple(struct sockbuf *sb, struct mbuf *m)
+ {
+
+ sbcompress(sb, m, sb->sb_mbtail);
+ }
+
#ifdef SOCKBUF_DEBUG
void
sbcheck(struct sockbuf *sb)
***************
*** 597,602 ****
--- 604,610 ----
n->m_nextpkt = m;
} else
sb->sb_mb = m;
+ sb->sb_mbtail = m;
return (1);
}
***************
*** 628,633 ****
--- 636,642 ----
n->m_nextpkt = control;
} else
sb->sb_mb = control;
+ sb->sb_mbtail = control;
return (1);
}
***************
*** 668,673 ****
--- 677,683 ----
n->m_next = m;
else
sb->sb_mb = m;
+ sb->sb_mbtail = m;
sballoc(sb, m);
n = m;
m->m_flags &= ~M_EOR;
***************
*** 736,741 ****
--- 746,753 ----
m->m_nextpkt = next;
} else
sb->sb_mb = next;
+ if (sb->sb_mb == NULL)
+ sb->sb_mbtail = NULL;
}
/*
***************
*** 755,760 ****
--- 767,774 ----
MFREE(m, mn);
} while ((m = mn) != NULL);
}
+ if (sb->sb_mb == NULL)
+ sb->sb_mbtail = NULL;
}
/*
Index: netccitt/if_x25subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/if_x25subr.c,v
retrieving revision 1.25
diff -c -r1.25 if_x25subr.c
*** netccitt/if_x25subr.c 2001/06/14 05:44:26 1.25
--- netccitt/if_x25subr.c 2001/10/18 16:05:04
***************
*** 767,773 ****
((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
#define transfer_sockbuf(s, f, l) \
while ((m = (s)->sb_mb) != NULL) \
! {(s)->sb_mb = m->m_act; m->m_act = 0; sbfree((s), m); f;}
if (rt)
rt->rt_refcnt--;
--- 767,779 ----
((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
#define transfer_sockbuf(s, f, l) \
while ((m = (s)->sb_mb) != NULL) \
! { \
! if (((s)->sb_mb = m->m_act) == NULL) \
! (s)->sb_mbtail = NULL; \
! m->m_act = 0; \
! sbfree((s), m); \
! f; \
! }
if (rt)
rt->rt_refcnt--;
Index: netccitt/pk_output.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/pk_output.c,v
retrieving revision 1.13
diff -c -r1.13 pk_output.c
*** netccitt/pk_output.c 2000/03/30 13:53:36 1.13
--- netccitt/pk_output.c 2001/10/18 16:05:04
***************
*** 210,216 ****
if ((m = sb->sb_mb) == 0)
return (NULL);
! sb->sb_mb = m->m_nextpkt;
m->m_act = 0;
for (n = m; n; n = n->m_next)
sbfree(sb, n);
--- 210,217 ----
if ((m = sb->sb_mb) == 0)
return (NULL);
! if ((sb->sb_mb = m->m_nextpkt) == NULL)
! sb->sb_mbtail = NULL;
m->m_act = 0;
for (n = m; n; n = n->m_next)
sbfree(sb, n);
Index: netccitt/pk_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/pk_usrreq.c,v
retrieving revision 1.20
diff -c -r1.20 pk_usrreq.c
*** netccitt/pk_usrreq.c 2001/04/13 23:30:21 1.20
--- netccitt/pk_usrreq.c 2001/10/18 16:05:05
***************
*** 264,270 ****
struct mbuf *n = so->so_rcv.sb_mb;
if (n && n->m_type == MT_OOBDATA) {
unsigned len = n->m_pkthdr.len;
! so->so_rcv.sb_mb = n->m_nextpkt;
if (len != n->m_len &&
(n = m_pullup(n, len)) == 0)
break;
--- 264,271 ----
struct mbuf *n = so->so_rcv.sb_mb;
if (n && n->m_type == MT_OOBDATA) {
unsigned len = n->m_pkthdr.len;
! if ((so->so_rcv.sb_mb = n->m_nextpkt) == NULL)
! so->so_rcv.sb_mbtail = NULL;
if (len != n->m_len &&
(n = m_pullup(n, len)) == 0)
break;
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_input.c,v
retrieving revision 1.131
diff -c -r1.131 tcp_input.c
*** netinet/tcp_input.c 2001/09/17 17:27:00 1.131
--- netinet/tcp_input.c 2001/10/18 16:05:07
***************
*** 525,531 ****
if (so->so_state & SS_CANTRCVMORE)
m_freem(q->ipqe_m);
else
! sbappend(&so->so_rcv, q->ipqe_m);
pool_put(&ipqent_pool, q);
sorwakeup(so);
return (pkt_flags);
--- 525,531 ----
if (so->so_state & SS_CANTRCVMORE)
m_freem(q->ipqe_m);
else
! sbappend_simple(&so->so_rcv, q->ipqe_m);
pool_put(&ipqent_pool, q);
sorwakeup(so);
return (pkt_flags);
***************
*** 1365,1371 ****
* to socket buffer.
*/
m_adj(m, toff + off);
! sbappend(&so->so_rcv, m);
sorwakeup(so);
TCP_SETUP_ACK(tp, th);
if (tp->t_flags & TF_ACKNOW)
--- 1365,1371 ----
* to socket buffer.
*/
m_adj(m, toff + off);
! sbappend_simple(&so->so_rcv, m);
sorwakeup(so);
TCP_SETUP_ACK(tp, th);
if (tp->t_flags & TF_ACKNOW)
***************
*** 2084,2090 ****
tcpstat.tcps_rcvbyte += tlen;
ND6_HINT(tp);
m_adj(m, hdroptlen);
! sbappend(&(so)->so_rcv, m);
sorwakeup(so);
} else {
m_adj(m, hdroptlen);
--- 2084,2090 ----
tcpstat.tcps_rcvbyte += tlen;
ND6_HINT(tp);
m_adj(m, hdroptlen);
! sbappend_simple(&(so)->so_rcv, m);
sorwakeup(so);
} else {
m_adj(m, hdroptlen);
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.65
diff -c -r1.65 tcp_usrreq.c
*** netinet/tcp_usrreq.c 2001/09/10 20:15:14 1.65
--- netinet/tcp_usrreq.c 2001/10/18 16:05:08
***************
*** 509,515 ****
error = EINVAL;
break;
}
! sbappend(&so->so_snd, m);
error = tcp_output(tp);
break;
--- 509,515 ----
error = EINVAL;
break;
}
! sbappend_simple(&so->so_snd, m);
error = tcp_output(tp);
break;
***************
*** 565,571 ****
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
! sbappend(&so->so_snd, m);
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_force = 1;
error = tcp_output(tp);
--- 565,571 ----
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
! sbappend_simple(&so->so_snd, m);
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_force = 1;
error = tcp_output(tp);
Index: sys/socketvar.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/socketvar.h,v
retrieving revision 1.50
diff -c -r1.50 socketvar.h
*** sys/socketvar.h 2001/09/28 18:02:45 1.50
--- sys/socketvar.h 2001/10/18 16:05:09
***************
*** 93,98 ****
--- 93,99 ----
u_long sb_mbmax; /* max chars of mbufs to use */
long sb_lowat; /* low water mark */
struct mbuf *sb_mb; /* the mbuf chain */
+ struct mbuf *sb_mbtail; /* the last mbuf in the chain */
struct selinfo sb_sel; /* process selecting read/write */
short sb_flags; /* flags, see below */
short sb_timeo; /* timeout for read/write */
***************
*** 265,270 ****
--- 266,272 ----
struct mbuf *, struct mbuf *, struct proc *);
int uipc_ctloutput(int, struct socket *, int, int, struct mbuf **);
void sbappend(struct sockbuf *sb, struct mbuf *m);
+ void sbappend_simple(struct sockbuf *sb, struct mbuf *m);
int sbappendaddr(struct sockbuf *sb, struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control);
int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
--IpbVkmxF4tDyP/Kb--