Subject: Re: Add "last record" and "last mbuf" pointers to sockbuf
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-kern
Date: 07/03/2002 10:33:33
--ey/N+yb7u/X9mFhi
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Wed, Jul 03, 2002 at 06:30:58PM +0900, YAMAMOTO Takashi wrote:

 > i made a patch. (attached)
 > both tcp and udp seems to work with it.

Thank you very much for your debugging assistance, I've verified that
everything is working for me, as well.

Here is the updated patch, which I plan to commit.  Note that I don't
really have a good way to test the changes to netccitt, but I doubt
that code currently works at all.

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--ey/N+yb7u/X9mFhi
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=sbappend-5

Index: dev/kttcp.c
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/kttcp.c,v
retrieving revision 1.1
diff -c -r1.1 kttcp.c
*** dev/kttcp.c	2002/06/28 23:27:14	1.1
--- dev/kttcp.c	2002/07/03 17:26:07
***************
*** 268,273 ****
--- 268,277 ----
  		if (space < resid && (atomic || space < so->so_snd.sb_lowat)) {
  			if (so->so_state & SS_NBIO)
  				snderr(EWOULDBLOCK);
+ 			SBLASTRECORDCHK(&so->so_rcv,
+ 			    "kttcp_soreceive sbwait 1");
+ 			SBLASTMBUFCHK(&so->so_rcv,
+ 			    "kttcp_soreceive sbwait 1");
  			sbunlock(&so->so_snd);
  			error = sbwait(&so->so_snd);
  			splx(s);
***************
*** 470,479 ****
--- 474,491 ----
  		goto restart;
  	}
   dontblock:
+ 	/*
+ 	 * On entry here, m points to the first record of the socket buffer.
+ 	 * While we process the initial mbufs containing address and control
+ 	 * info, we save a copy of m->m_nextpkt into nextrecord.
+ 	 */
  #ifdef notyet /* XXXX */
  	if (uio->uio_procp)
  		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
  #endif
+ 	KASSERT(m == so->so_rcv.sb_mb);
+ 	SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 1");
+ 	SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 1");
  	nextrecord = m->m_nextpkt;
  	if (pr->pr_flags & PR_ADDR) {
  #ifdef DIAGNOSTIC
***************
*** 498,510 ****
  			m = so->so_rcv.sb_mb;
  		}
  	}
  	if (m) {
! 		if ((flags & MSG_PEEK) == 0)
  			m->m_nextpkt = nextrecord;
  		type = m->m_type;
  		if (type == MT_OOBDATA)
  			flags |= MSG_OOB;
  	}
  	moff = 0;
  	offset = 0;
  	while (m && resid > 0 && error == 0) {
--- 510,548 ----
  			m = so->so_rcv.sb_mb;
  		}
  	}
+ 
+ 	/*
+ 	 * If m is non-NULL, we have some data to read.  From now on,
+ 	 * make sure to keep sb_lastrecord consistent when working on
+ 	 * the last packet on the chain (nextrecord == NULL) and we
+ 	 * change m->m_nextpkt.
+ 	 */
  	if (m) {
! 		if ((flags & MSG_PEEK) == 0) {
  			m->m_nextpkt = nextrecord;
+ 			/*
+ 			 * If nextrecord == NULL (this is a single chain),
+ 			 * then sb_lastrecord may not be valid here if m
+ 			 * was changed earlier.
+ 			 */
+ 			if (nextrecord == NULL) {
+ 				KASSERT(so->so_rcv.sb_mb == m);
+ 				so->so_rcv.sb_lastrecord = m;
+ 			}
+ 		}
  		type = m->m_type;
  		if (type == MT_OOBDATA)
  			flags |= MSG_OOB;
+ 	} else {
+ 		if ((flags & MSG_PEEK) == 0) {
+ 			KASSERT(so->so_rcv.sb_mb == m);
+ 			so->so_rcv.sb_mb = nextrecord;
+ 			SB_UPDATE_TAIL(&so->so_rcv);
+ 		}
  	}
+ 	SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 2");
+ 	SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 2");
+ 
  	moff = 0;
  	offset = 0;
  	while (m && resid > 0 && error == 0) {
***************
*** 550,557 ****
  					MFREE(m, so->so_rcv.sb_mb);
  					m = so->so_rcv.sb_mb;
  				}
! 				if (m)
  					m->m_nextpkt = nextrecord;
  			}
  		} else {
  			if (flags & MSG_PEEK)
--- 588,610 ----
  					MFREE(m, so->so_rcv.sb_mb);
  					m = so->so_rcv.sb_mb;
  				}
! 				/*
! 				 * If m != NULL, we also know that
! 				 * so->so_rcv.sb_mb != NULL.
! 				 */
! 				KASSERT(so->so_rcv.sb_mb == m);
! 				if (m) {
  					m->m_nextpkt = nextrecord;
+ 					if (nextrecord == NULL)
+ 						so->so_rcv.sb_lastrecord = m;
+ 				} else {
+ 					so->so_rcv.sb_mb = nextrecord;
+ 					SB_UPDATE_TAIL(&so->so_rcv);
+ 				}
+ 				SBLASTRECORDCHK(&so->so_rcv,
+ 				    "kttcp_soreceive 3");
+ 				SBLASTMBUFCHK(&so->so_rcv,
+ 				    "kttcp_soreceive 3");
  			}
  		} else {
  			if (flags & MSG_PEEK)
***************
*** 590,595 ****
--- 643,652 ----
  		    !sosendallatonce(so) && !nextrecord) {
  			if (so->so_error || so->so_state & SS_CANTRCVMORE)
  				break;
+ 			SBLASTRECORDCHK(&so->so_rcv,
+ 			    "kttcp_soreceive sbwait 2");
+ 			SBLASTMBUFCHK(&so->so_rcv,
+ 			    "kttcp_soreceive sbwait 2");
  			error = sbwait(&so->so_rcv);
  			if (error) {
  				sbunlock(&so->so_rcv);
***************
*** 607,614 ****
  			(void) sbdroprecord(&so->so_rcv);
  	}
  	if ((flags & MSG_PEEK) == 0) {
! 		if (m == 0)
  			so->so_rcv.sb_mb = nextrecord;
  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
  			    (struct mbuf *)(long)flags, (struct mbuf *)0,
--- 664,684 ----
  			(void) sbdroprecord(&so->so_rcv);
  	}
  	if ((flags & MSG_PEEK) == 0) {
! 		if (m == 0) {
! 			/*
! 			 * First part is an SB_UPDATE_TAIL().  Second part
! 			 * makes sure sb_lastrecord is up-to-date if
! 			 * there is still data in the socket buffer.
! 			 */
  			so->so_rcv.sb_mb = nextrecord;
+ 			if (so->so_rcv.sb_mb == NULL) {
+ 				so->so_rcv.sb_mbtail = NULL;
+ 				so->so_rcv.sb_lastrecord = NULL;
+ 			} else if (nextrecord->m_nextpkt == NULL)
+ 				so->so_rcv.sb_lastrecord = nextrecord;
+ 		}
+ 		SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 4");
+ 		SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 4");
  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
  			    (struct mbuf *)(long)flags, (struct mbuf *)0,
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_socket.c,v
retrieving revision 1.68
diff -c -r1.68 uipc_socket.c
*** kern/uipc_socket.c	2002/06/11 00:21:33	1.68
--- kern/uipc_socket.c	2002/07/03 17:26:08
***************
*** 894,899 ****
--- 894,901 ----
  			error = EWOULDBLOCK;
  			goto release;
  		}
+ 		SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
+ 		SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
  		sbunlock(&so->so_rcv);
  		error = sbwait(&so->so_rcv);
  		splx(s);
***************
*** 902,911 ****
--- 904,921 ----
  		goto restart;
  	}
   dontblock:
+ 	/*
+ 	 * On entry here, m points to the first record of the socket buffer.
+ 	 * While we process the initial mbufs containing address and control
+ 	 * info, we save a copy of m->m_nextpkt into nextrecord.
+ 	 */
  #ifdef notyet /* XXXX */
  	if (uio->uio_procp)
  		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
  #endif
+ 	KASSERT(m == so->so_rcv.sb_mb);
+ 	SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
+ 	SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
  	nextrecord = m->m_nextpkt;
  	if (pr->pr_flags & PR_ADDR) {
  #ifdef DIAGNOSTIC
***************
*** 958,970 ****
  			controlp = &(*controlp)->m_next;
  		}
  	}
  	if (m) {
! 		if ((flags & MSG_PEEK) == 0)
  			m->m_nextpkt = nextrecord;
  		type = m->m_type;
  		if (type == MT_OOBDATA)
  			flags |= MSG_OOB;
  	}
  	moff = 0;
  	offset = 0;
  	while (m && uio->uio_resid > 0 && error == 0) {
--- 968,1006 ----
  			controlp = &(*controlp)->m_next;
  		}
  	}
+ 
+ 	/*
+ 	 * If m is non-NULL, we have some data to read.  From now on,
+ 	 * make sure to keep sb_lastrecord consistent when working on
+ 	 * the last packet on the chain (nextrecord == NULL) and we
+ 	 * change m->m_nextpkt.
+ 	 */
  	if (m) {
! 		if ((flags & MSG_PEEK) == 0) {
  			m->m_nextpkt = nextrecord;
+ 			/*
+ 			 * If nextrecord == NULL (this is a single chain),
+ 			 * then sb_lastrecord may not be valid here if m
+ 			 * was changed earlier.
+ 			 */
+ 			if (nextrecord == NULL) {
+ 				KASSERT(so->so_rcv.sb_mb == m);
+ 				so->so_rcv.sb_lastrecord = m;
+ 			}
+ 		}
  		type = m->m_type;
  		if (type == MT_OOBDATA)
  			flags |= MSG_OOB;
+ 	} else {
+ 		if ((flags & MSG_PEEK) == 0) {
+ 			KASSERT(so->so_rcv.sb_mb == m);
+ 			so->so_rcv.sb_mb = nextrecord;
+ 			SB_UPDATE_TAIL(&so->so_rcv);
+ 		}
  	}
+ 	SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
+ 	SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
+ 
  	moff = 0;
  	offset = 0;
  	while (m && uio->uio_resid > 0 && error == 0) {
***************
*** 992,997 ****
--- 1028,1035 ----
  		 * block interrupts again.
  		 */
  		if (mp == 0) {
+ 			SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
+ 			SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
  			splx(s);
  			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
  			s = splsoftnet();
***************
*** 1033,1040 ****
  					MFREE(m, so->so_rcv.sb_mb);
  					m = so->so_rcv.sb_mb;
  				}
! 				if (m)
  					m->m_nextpkt = nextrecord;
  			}
  		} else {
  			if (flags & MSG_PEEK)
--- 1071,1091 ----
  					MFREE(m, so->so_rcv.sb_mb);
  					m = so->so_rcv.sb_mb;
  				}
! 				/*
! 				 * If m != NULL, we also know that
! 				 * so->so_rcv.sb_mb != NULL.
! 				 */
! 				KASSERT(so->so_rcv.sb_mb == m);
! 				if (m) {
  					m->m_nextpkt = nextrecord;
+ 					if (nextrecord == NULL)
+ 						so->so_rcv.sb_lastrecord = m;
+ 				} else {
+ 					so->so_rcv.sb_mb = nextrecord;
+ 					SB_UPDATE_TAIL(&so->so_rcv);
+ 				}
+ 				SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
+ 				SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
  			}
  		} else {
  			if (flags & MSG_PEEK)
***************
*** 1090,1095 ****
--- 1141,1148 ----
  				    (struct mbuf *)(long)flags,
  				    (struct mbuf *)0,
  				    (struct proc *)0);
+ 			SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
+ 			SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
  			error = sbwait(&so->so_rcv);
  			if (error) {
  				sbunlock(&so->so_rcv);
***************
*** 1107,1114 ****
  			(void) sbdroprecord(&so->so_rcv);
  	}
  	if ((flags & MSG_PEEK) == 0) {
! 		if (m == 0)
  			so->so_rcv.sb_mb = nextrecord;
  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
  			    (struct mbuf *)(long)flags, (struct mbuf *)0,
--- 1160,1180 ----
  			(void) sbdroprecord(&so->so_rcv);
  	}
  	if ((flags & MSG_PEEK) == 0) {
! 		if (m == 0) {
! 			/*
! 			 * First part is an inline SB_UPDATE_TAIL().  Second
! 			 * part makes sure sb_lastrecord is up-to-date if
! 			 * there is still data in the socket buffer.
! 			 */
  			so->so_rcv.sb_mb = nextrecord;
+ 			if (so->so_rcv.sb_mb == NULL) {
+ 				so->so_rcv.sb_mbtail = NULL;
+ 				so->so_rcv.sb_lastrecord = NULL;
+ 			} else if (nextrecord->m_nextpkt == NULL)
+ 				so->so_rcv.sb_lastrecord = nextrecord;
+ 		}
+ 		SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
+ 		SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
  			    (struct mbuf *)(long)flags, (struct mbuf *)0,
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_socket2.c,v
retrieving revision 1.42
diff -c -r1.42 uipc_socket2.c
*** kern/uipc_socket2.c	2001/11/12 15:25:33	1.42
--- kern/uipc_socket2.c	2002/07/03 17:26:09
***************
*** 425,430 ****
--- 425,485 ----
   * or sbdroprecord() when the data is acknowledged by the peer.
   */
  
+ #ifdef SOCKBUF_DEBUG
+ void
+ sblastrecordchk(struct sockbuf *sb, const char *where)
+ {
+ 	struct mbuf *m = sb->sb_mb;
+ 
+ 	while (m && m->m_nextpkt)
+ 		m = m->m_nextpkt;
+ 
+ 	if (m != sb->sb_lastrecord) {
+ 		printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
+ 		    sb->sb_mb, sb->sb_lastrecord, m);
+ 		printf("packet chain:\n");
+ 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
+ 			printf("\t%p\n", m);
+ 		panic("sblastrecordchk from %s\n", where);
+ 	}
+ }
+ 
+ void
+ sblastmbufchk(struct sockbuf *sb, const char *where)
+ {
+ 	struct mbuf *m = sb->sb_mb;
+ 	struct mbuf *n;
+ 
+ 	while (m && m->m_nextpkt)
+ 		m = m->m_nextpkt;
+ 
+ 	while (m && m->m_next)
+ 		m = m->m_next;
+ 
+ 	if (m != sb->sb_mbtail) {
+ 		printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
+ 		    sb->sb_mb, sb->sb_mbtail, m);
+ 		printf("packet tree:\n");
+ 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
+ 			printf("\t");
+ 			for (n = m; n != NULL; n = n->m_next)
+ 				printf("%p ", n);
+ 			printf("\n");
+ 		}
+ 		panic("sblastmbufchk from %s", where);
+ 	}
+ }
+ #endif /* SOCKBUF_DEBUG */
+ 
+ #define	SBLINKRECORD(sb, m0)						\
+ do {									\
+ 	if ((sb)->sb_lastrecord != NULL)				\
+ 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
+ 	else								\
+ 		(sb)->sb_mb = (m0);					\
+ 	(sb)->sb_lastrecord = (m0);					\
+ } while (/*CONSTCOND*/0)
+ 
  /*
   * Append mbuf chain m to the last record in the
   * socket buffer sb.  The additional space associated
***************
*** 438,454 ****
  
  	if (m == 0)
  		return;
! 	if ((n = sb->sb_mb) != NULL) {
! 		while (n->m_nextpkt)
! 			n = n->m_nextpkt;
  		do {
  			if (n->m_flags & M_EOR) {
  				sbappendrecord(sb, m); /* XXXXXX!!!! */
  				return;
  			}
  		} while (n->m_next && (n = n->m_next));
  	}
  	sbcompress(sb, m, n);
  }
  
  #ifdef SOCKBUF_DEBUG
--- 493,542 ----
  
  	if (m == 0)
  		return;
! 
! 	SBLASTRECORDCHK(sb, "sbappend 1");
! 
! 	if ((n = sb->sb_lastrecord) != NULL) {
! 		/*
! 		 * XXX Would like to simply use sb_mbtail here, but
! 		 * XXX I need to verify that I won't miss an EOR that
! 		 * XXX way.
! 		 */
  		do {
  			if (n->m_flags & M_EOR) {
  				sbappendrecord(sb, m); /* XXXXXX!!!! */
  				return;
  			}
  		} while (n->m_next && (n = n->m_next));
+ 	} else {
+ 		/*
+ 		 * If this is the first record in the socket buffer, it's
+ 		 * also the last record.
+ 		 */
+ 		sb->sb_lastrecord = m;
  	}
  	sbcompress(sb, m, n);
+ 	SBLASTRECORDCHK(sb, "sbappend 2");
+ }
+ 
+ /*
+  * This version of sbappend() should only be used when the caller
+  * absolutely knows that there will never be more than one record
+  * in the socket buffer, that is, a stream protocol (such as TCP).
+  */
+ void
+ sbappend_stream(struct sockbuf *sb, struct mbuf *m)
+ {
+ 
+ 	KDASSERT(m->m_nextpkt == NULL);
+ 	KASSERT(sb->sb_mb == sb->sb_lastrecord);
+ 
+ 	SBLASTMBUFCHK(sb, __func__);
+ 
+ 	sbcompress(sb, m, sb->sb_mbtail);
+ 
+ 	sb->sb_lastrecord = sb->sb_mb;
+ 	SBLASTRECORDCHK(sb, __func__);
  }
  
  #ifdef SOCKBUF_DEBUG
***************
*** 456,462 ****
  sbcheck(struct sockbuf *sb)
  {
  	struct mbuf	*m;
! 	int		len, mbcnt;
  
  	len = 0;
  	mbcnt = 0;
--- 544,550 ----
  sbcheck(struct sockbuf *sb)
  {
  	struct mbuf	*m;
! 	u_long		len, mbcnt;
  
  	len = 0;
  	mbcnt = 0;
***************
*** 469,475 ****
  			panic("sbcheck nextpkt");
  	}
  	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
! 		printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
  		    mbcnt, sb->sb_mbcnt);
  		panic("sbcheck");
  	}
--- 557,563 ----
  			panic("sbcheck nextpkt");
  	}
  	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
! 		printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
  		    mbcnt, sb->sb_mbcnt);
  		panic("sbcheck");
  	}
***************
*** 487,504 ****
  
  	if (m0 == 0)
  		return;
! 	if ((m = sb->sb_mb) != NULL)
! 		while (m->m_nextpkt)
! 			m = m->m_nextpkt;
  	/*
  	 * Put the first mbuf on the queue.
  	 * Note this permits zero length records.
  	 */
  	sballoc(sb, m0);
! 	if (m)
! 		m->m_nextpkt = m0;
! 	else
! 		sb->sb_mb = m0;
  	m = m0->m_next;
  	m0->m_next = 0;
  	if (m && (m0->m_flags & M_EOR)) {
--- 575,588 ----
  
  	if (m0 == 0)
  		return;
! 
  	/*
  	 * Put the first mbuf on the queue.
  	 * Note this permits zero length records.
  	 */
  	sballoc(sb, m0);
! 	SBLASTRECORDCHK(sb, "sbappendrecord 1");
! 	SBLINKRECORD(sb, m0);
  	m = m0->m_next;
  	m0->m_next = 0;
  	if (m && (m0->m_flags & M_EOR)) {
***************
*** 506,511 ****
--- 590,596 ----
  		m->m_flags |= M_EOR;
  	}
  	sbcompress(sb, m, m0);
+ 	SBLASTRECORDCHK(sb, "sbappendrecord 2");
  }
  
  /*
***************
*** 520,525 ****
--- 605,613 ----
  
  	if (m0 == 0)
  		return;
+ 
+ 	SBLASTRECORDCHK(sb, "sbinsertoob 1");
+ 
  	for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
  	    again:
  		switch (m->m_type) {
***************
*** 539,544 ****
--- 627,636 ----
  	 */
  	sballoc(sb, m0);
  	m0->m_nextpkt = *mp;
+ 	if (*mp == NULL) {
+ 		/* m0 is actually the new tail */
+ 		sb->sb_lastrecord = m0;
+ 	}
  	*mp = m0;
  	m = m0->m_next;
  	m0->m_next = 0;
***************
*** 547,552 ****
--- 639,645 ----
  		m->m_flags |= M_EOR;
  	}
  	sbcompress(sb, m, m0);
+ 	SBLASTRECORDCHK(sb, "sbinsertoob 2");
  }
  
  /*
***************
*** 559,565 ****
  sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
  	struct mbuf *control)
  {
! 	struct mbuf	*m, *n;
  	int		space;
  
  	space = asa->sa_len;
--- 652,658 ----
  sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
  	struct mbuf *control)
  {
! 	struct mbuf	*m, *n, *nlast;
  	int		space;
  
  	space = asa->sa_len;
***************
*** 592,612 ****
  	else
  		control = m0;
  	m->m_next = control;
! 	for (n = m; n; n = n->m_next)
  		sballoc(sb, n);
! 	if ((n = sb->sb_mb) != NULL) {
! 		while (n->m_nextpkt)
! 			n = n->m_nextpkt;
! 		n->m_nextpkt = m;
! 	} else
! 		sb->sb_mb = m;
  	return (1);
  }
  
  int
  sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  {
! 	struct mbuf	*m, *n;
  	int		space;
  
  	space = 0;
--- 685,711 ----
  	else
  		control = m0;
  	m->m_next = control;
! 
! 	SBLASTRECORDCHK(sb, "sbappendaddr 1");
! 
! 	for (n = m; n->m_next != NULL; n = n->m_next)
  		sballoc(sb, n);
! 	sballoc(sb, n);
! 	nlast = n;
! 	SBLINKRECORD(sb, m);
! 
! 	sb->sb_mbtail = nlast;
! 	SBLASTMBUFCHK(sb, "sbappendaddr");
! 
! 	SBLASTRECORDCHK(sb, "sbappendaddr 2");
! 
  	return (1);
  }
  
  int
  sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  {
! 	struct mbuf	*m, *mlast, *n;
  	int		space;
  
  	space = 0;
***************
*** 623,636 ****
  	if (space > sbspace(sb))
  		return (0);
  	n->m_next = m0;			/* concatenate data to control */
! 	for (m = control; m; m = m->m_next)
  		sballoc(sb, m);
! 	if ((n = sb->sb_mb) != NULL) {
! 		while (n->m_nextpkt)
! 			n = n->m_nextpkt;
! 		n->m_nextpkt = control;
! 	} else
! 		sb->sb_mb = control;
  	return (1);
  }
  
--- 722,741 ----
  	if (space > sbspace(sb))
  		return (0);
  	n->m_next = m0;			/* concatenate data to control */
! 
! 	SBLASTRECORDCHK(sb, "sbappendcontrol 1");
! 
! 	for (m = control; m->m_next != NULL; m = m->m_next)
  		sballoc(sb, m);
! 	sballoc(sb, m);
! 	mlast = m;
! 	SBLINKRECORD(sb, control);
! 
! 	sb->sb_mbtail = mlast;
! 	SBLASTMBUFCHK(sb, "sbappendcontrol");
! 
! 	SBLASTRECORDCHK(sb, "sbappendcontrol 2");
! 
  	return (1);
  }
  
***************
*** 671,676 ****
--- 776,782 ----
  			n->m_next = m;
  		else
  			sb->sb_mb = m;
+ 		sb->sb_mbtail = m;
  		sballoc(sb, m);
  		n = m;
  		m->m_flags &= ~M_EOR;
***************
*** 683,688 ****
--- 789,795 ----
  		else
  			printf("semi-panic: sbcompress\n");
  	}
+ 	SBLASTMBUFCHK(sb, __func__);
  }
  
  /*
***************
*** 692,704 ****
  void
  sbflush(struct sockbuf *sb)
  {
  
- 	if (sb->sb_flags & SB_LOCK)
- 		panic("sbflush");
  	while (sb->sb_mbcnt)
  		sbdrop(sb, (int)sb->sb_cc);
! 	if (sb->sb_cc || sb->sb_mb)
! 		panic("sbflush 2");
  }
  
  /*
--- 799,814 ----
  void
  sbflush(struct sockbuf *sb)
  {
+ 
+ 	KASSERT((sb->sb_flags & SB_LOCK) == 0);
  
  	while (sb->sb_mbcnt)
  		sbdrop(sb, (int)sb->sb_cc);
! 
! 	KASSERT(sb->sb_cc == 0);
! 	KASSERT(sb->sb_mb == NULL);
! 	KASSERT(sb->sb_mbtail == NULL);
! 	KASSERT(sb->sb_lastrecord == NULL);
  }
  
  /*
***************
*** 739,744 ****
--- 849,865 ----
  		m->m_nextpkt = next;
  	} else
  		sb->sb_mb = next;
+ 	/*
+ 	 * First part is an inline SB_UPDATE_TAIL().  Second part
+ 	 * makes sure sb_lastrecord is up-to-date if we dropped
+ 	 * part of the last record.
+ 	 */
+ 	m = sb->sb_mb;
+ 	if (m == NULL) {
+ 		sb->sb_mbtail = NULL;
+ 		sb->sb_lastrecord = NULL;
+ 	} else if (m->m_nextpkt == NULL)
+ 		sb->sb_lastrecord = m;
  }
  
  /*
***************
*** 758,763 ****
--- 879,885 ----
  			MFREE(m, mn);
  		} while ((m = mn) != NULL);
  	}
+ 	SB_UPDATE_TAIL(sb);
  }
  
  /*
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_input.c,v
retrieving revision 1.146
diff -c -r1.146 tcp_input.c
*** netinet/tcp_input.c	2002/06/30 22:40:35	1.146
--- netinet/tcp_input.c	2002/07/03 17:26:13
***************
*** 664,670 ****
  	if (so->so_state & SS_CANTRCVMORE)
  		m_freem(q->ipqe_m);
  	else
! 		sbappend(&so->so_rcv, q->ipqe_m);
  	pool_put(&ipqent_pool, q);
  	sorwakeup(so);
  	return (pkt_flags);
--- 664,670 ----
  	if (so->so_state & SS_CANTRCVMORE)
  		m_freem(q->ipqe_m);
  	else
! 		sbappend_stream(&so->so_rcv, q->ipqe_m);
  	pool_put(&ipqent_pool, q);
  	sorwakeup(so);
  	return (pkt_flags);
***************
*** 1524,1530 ****
  			 * to socket buffer.
  			 */
  			m_adj(m, toff + off);
! 			sbappend(&so->so_rcv, m);
  			sorwakeup(so);
  			TCP_SETUP_ACK(tp, th);
  			if (tp->t_flags & TF_ACKNOW)
--- 1524,1530 ----
  			 * to socket buffer.
  			 */
  			m_adj(m, toff + off);
! 			sbappend_stream(&so->so_rcv, m);
  			sorwakeup(so);
  			TCP_SETUP_ACK(tp, th);
  			if (tp->t_flags & TF_ACKNOW)
***************
*** 2263,2269 ****
  			tcpstat.tcps_rcvbyte += tlen;
  			ND6_HINT(tp);
  			m_adj(m, hdroptlen);
! 			sbappend(&(so)->so_rcv, m);
  			sorwakeup(so);
  		} else {
  			m_adj(m, hdroptlen);
--- 2263,2269 ----
  			tcpstat.tcps_rcvbyte += tlen;
  			ND6_HINT(tp);
  			m_adj(m, hdroptlen);
! 			sbappend_stream(&(so)->so_rcv, m);
  			sorwakeup(so);
  		} else {
  			m_adj(m, hdroptlen);
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.71
diff -c -r1.71 tcp_usrreq.c
*** netinet/tcp_usrreq.c	2002/06/09 16:33:44	1.71
--- netinet/tcp_usrreq.c	2002/07/03 17:26:17
***************
*** 508,514 ****
  			error = EINVAL;
  			break;
  		}
! 		sbappend(&so->so_snd, m);
  		error = tcp_output(tp);
  		break;
  
--- 508,514 ----
  			error = EINVAL;
  			break;
  		}
! 		sbappend_stream(&so->so_snd, m);
  		error = tcp_output(tp);
  		break;
  
***************
*** 564,570 ****
  		 * of data past the urgent section.
  		 * Otherwise, snd_up should be one lower.
  		 */
! 		sbappend(&so->so_snd, m);
  		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  		tp->t_force = 1;
  		error = tcp_output(tp);
--- 564,570 ----
  		 * of data past the urgent section.
  		 * Otherwise, snd_up should be one lower.
  		 */
! 		sbappend_stream(&so->so_snd, m);
  		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  		tp->t_force = 1;
  		error = tcp_output(tp);
Index: netccitt/if_x25subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/if_x25subr.c,v
retrieving revision 1.28
diff -c -r1.28 if_x25subr.c
*** netccitt/if_x25subr.c	2002/05/12 21:30:35	1.28
--- netccitt/if_x25subr.c	2002/07/03 17:26:17
***************
*** 770,776 ****
  	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
  #define transfer_sockbuf(s, f, l) \
  	while ((m = (s)->sb_mb) != NULL) \
! 		{(s)->sb_mb = m->m_nextpkt; m->m_nextpkt = 0; sbfree((s), m); f;}
  
  	if (rt)
  		rt->rt_refcnt--;
--- 770,782 ----
  	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
  #define transfer_sockbuf(s, f, l) \
  	while ((m = (s)->sb_mb) != NULL) \
! 		{ \
! 			(s)->sb_mb = m->m_nextpkt; \
! 			SB_UPDATE_TAIL((s)); \
! 			m->m_nextpkt = 0; \
! 			sbfree((s), m); \
! 			f; \
! 		}
  
  	if (rt)
  		rt->rt_refcnt--;
Index: netccitt/pk_output.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/pk_output.c,v
retrieving revision 1.16
diff -c -r1.16 pk_output.c
*** netccitt/pk_output.c	2002/05/12 21:43:57	1.16
--- netccitt/pk_output.c	2002/07/03 17:26:18
***************
*** 214,219 ****
--- 214,220 ----
  			return (NULL);
  
  		sb->sb_mb = m->m_nextpkt;
+ 		SB_UPDATE_TAIL(sb);
  		m->m_nextpkt = 0;
  		for (n = m; n; n = n->m_next)
  			sbfree(sb, n);
Index: netccitt/pk_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netccitt/pk_usrreq.c,v
retrieving revision 1.21
diff -c -r1.21 pk_usrreq.c
*** netccitt/pk_usrreq.c	2001/11/13 00:12:59	1.21
--- netccitt/pk_usrreq.c	2002/07/03 17:26:18
***************
*** 268,273 ****
--- 268,274 ----
  			if (n && n->m_type == MT_OOBDATA) {
  				unsigned        len = n->m_pkthdr.len;
  				so->so_rcv.sb_mb = n->m_nextpkt;
+ 				SB_UPDATE_TAIL(&so->so_rcv);
  				if (len != n->m_len &&
  				    (n = m_pullup(n, len)) == 0)
  					break;
Index: sys/socketvar.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/socketvar.h,v
retrieving revision 1.51
diff -c -r1.51 socketvar.h
*** sys/socketvar.h	2002/05/02 17:55:52	1.51
--- sys/socketvar.h	2002/07/03 17:26:19
***************
*** 93,98 ****
--- 93,101 ----
  		u_long	sb_mbmax;	/* max chars of mbufs to use */
  		long	sb_lowat;	/* low water mark */
  		struct mbuf *sb_mb;	/* the mbuf chain */
+ 		struct mbuf *sb_mbtail;	/* the last mbuf in the chain */
+ 		struct mbuf *sb_lastrecord;/* first mbuf of last record in
+ 					      socket buffer */
  		struct selinfo sb_sel;	/* process selecting read/write */
  		short	sb_flags;	/* flags, see below */
  		short	sb_timeo;	/* timeout for read/write */
***************
*** 125,130 ****
--- 128,141 ----
  	struct mbuf	*so_pendfree;	/* loaned-page mbufs w/ frees pending */
  };
  
+ #define	SB_UPDATE_TAIL(sb)						\
+ do {									\
+ 	if ((sb)->sb_mb == NULL) {					\
+ 		(sb)->sb_mbtail = NULL;					\
+ 		(sb)->sb_lastrecord = NULL;				\
+ 	}								\
+ } while (/*CONSTCOND*/0)
+ 
  /*
   * Socket state bits.
   */
***************
*** 266,271 ****
--- 277,283 ----
  	    struct mbuf *, struct mbuf *, struct proc *);
  int	uipc_ctloutput(int, struct socket *, int, int, struct mbuf **);
  void	sbappend(struct sockbuf *sb, struct mbuf *m);
+ void	sbappend_stream(struct sockbuf *sb, struct mbuf *m);
  int	sbappendaddr(struct sockbuf *sb, struct sockaddr *asa,
  	    struct mbuf *m0, struct mbuf *control);
  int	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
***************
*** 319,324 ****
--- 331,347 ----
  
  int	sendit(struct proc *, int, struct msghdr *, int, register_t *);
  int	recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);
+ 
+ #ifdef SOCKBUF_DEBUG
+ void	sblastrecordchk(struct sockbuf *, const char *);
+ #define	SBLASTRECORDCHK(sb, where)	sblastrecordchk((sb), (where))
+ 
+ void	sblastmbufchk(struct sockbuf *, const char *);
+ #define	SBLASTMBUFCHK(sb, where)	sblastmbufchk((sb), (where))
+ #else
+ #define	SBLASTRECORDCHK(sb, where)	/* nothing */
+ #define	SBLASTMBUFCHK(sb, where)	/* nothing */
+ #endif /* SOCKBUF_DEBUG */
  
  #endif /* _KERNEL */
  

--ey/N+yb7u/X9mFhi--