Subject: Re: Add "last record" and "last mbuf" pointers to sockbuf
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-kern
Date: 07/02/2002 21:00:43
--vni90+aGYgRvsTuO
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Wed, Jul 03, 2002 at 05:14:46AM +0900, YAMAMOTO Takashi wrote:

 > why not if you want people to debug it? :-)

Ok, you asked for it :-)

Attached is my current diff.  I am stuck on the following:

.
.
.
boot device: sd0
root on sd0a dumps on sd0b
root file system type: ffs
Tue Jul  2 20:50:29 PDT 2002
Starting file system checks:
/dev/rsd0a: file system is clean; not checking
Setting tty flags.
Setting sysctl variables:
kern.sbmax: 262144 -> 16777216
Starting network.
Hostname: e7500.fast-100.shagadelic.org
NIS domainname: SHAG-NET
IPv6 mode: autoconfigured host
Configuring network interfaces: fxp0 bge0.
add net default: gateway 192.168.2.1
Adding interface aliases:
Building databases...
Starting syslogd.
Checking for core dump...
savecore: /dev/sd0b: Device not configured
Jul  2 20:50:34 e7500 savecore: /dev/sd0b: Device not configured
Setting date via ntp.
Starting rpcbind.
Starting ypbind.
Mounting all filesystems...
Clearing /tmp.
Starting amd.
Creating a.out runtime link editor directory cache.
Checking quotas: done.
Starting statd.
Starting lockd.
swapctl: adding /swapfile as swap device at priority 0
Starting virecover.
starting local daemons:.
Updating motd.
Starting ntpd.
Starting rtsold.
Starting sshd.
bge0: gigabit link up
Starting inetd.
Starting cron.
Tue Jul  2 20:50:36 PDT 2002
sblastrecordchk: sb_mb 0xc0f8a900 sb_lastrecord 0x0 last 0xc0f8e500
packet chain:
        0xc0f8a900
        0xc0f8a100
        0xc0f8ae00
        0xc0f8e500
panic: sblastrecordchk from soreceive 4

Stopped in pid 192 (rtsold) at  cpu_Debugger+0xc:       movl    %ebp,%esp
db> trace
cpu_Debugger(0,c0faa3e0,c0faa39c,c01af695,c02951f4) at cpu_Debugger+0xc
panic(c0295208,c029515e,0,c0f8e500,0) at panic+0xb2
sblastrecordchk(c0faa3e0,c029515e,e351cee0,c01adbe2,e351cf80,800,e31f98f0,0,4,e3
1d42e8,c0faa3e0,0,ffffffff,0,1,c0f8a900,c02fed18,0,1,0,0,0,0,1c,0,0,e351ce90,c01
a2371,c0faa39c,0,e351cee0,0,0,0,e351ce90,c01a2353,0,30,e351cf00,c019f011,e31f98f
0,e31f9918,e351cee0,c0b90f00,1,c019ef7e,e351cf00,c019ef7e,e351cf80,5,e351f00c,c0
1b1102,e351cf80,e351cf78,e351cf38,e351cf80,bfbfd52c,754,e351ced8,1,e351d02c,5) a
t sblastrecordchk+0x7d
soreceive(c0faa39c,0,e351cee0,0,0) at soreceive+0xc19
soo_read(e31f98f0,e31f9918,e351cee0,c0b90f00,1) at soo_read+0x29
dofileread(e351f00c,5,e31f98f0,bfbfd480,800) at dofileread+0xa1
sys_read(e351f00c,e351cf80,e351cf78,c021d8f2) at sys_read+0x72
syscall_plain(804001f,bfbf001f,bfbf001f,480f001f,8051030) at syscall_plain+0xaf
db> 

It is possible that my assertion checks are wrong, but I really need
a fresh set of eyes to look at this :-)

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--vni90+aGYgRvsTuO
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=sbappend-3

Index: kern/uipc_socket.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_socket.c,v
retrieving revision 1.68
diff -c -r1.68 uipc_socket.c
*** kern/uipc_socket.c	2002/06/11 00:21:33	1.68
--- kern/uipc_socket.c	2002/07/03 03:56:01
***************
*** 902,911 ****
--- 902,919 ----
  		goto restart;
  	}
   dontblock:
+ 	/*
+ 	 * On entry here, m points to the first record of the socket buffer.
+ 	 * While we process the initial mbufs containing address and control
+ 	 * info, we save a copy of m->m_nextpkt into nextrecord.
+ 	 */
  #ifdef notyet /* XXXX */
  	if (uio->uio_procp)
  		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
  #endif
+ 	KASSERT(m == so->so_rcv.sb_mb);
+ 	SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
+ 	SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
  	nextrecord = m->m_nextpkt;
  	if (pr->pr_flags & PR_ADDR) {
  #ifdef DIAGNOSTIC
***************
*** 923,932 ****
--- 931,942 ----
  			if (paddr) {
  				*paddr = m;
  				so->so_rcv.sb_mb = m->m_next;
+ 				SB_UPDATE_TAIL(&so->so_rcv);
  				m->m_next = 0;
  				m = so->so_rcv.sb_mb;
  			} else {
  				MFREE(m, so->so_rcv.sb_mb);
+ 				SB_UPDATE_TAIL(&so->so_rcv);
  				m = so->so_rcv.sb_mb;
  			}
  		}
***************
*** 946,955 ****
--- 956,967 ----
  					error = (*pr->pr_domain->dom_externalize)(m);
  				*controlp = m;
  				so->so_rcv.sb_mb = m->m_next;
+ 				SB_UPDATE_TAIL(&so->so_rcv);
  				m->m_next = 0;
  				m = so->so_rcv.sb_mb;
  			} else {
  				MFREE(m, so->so_rcv.sb_mb);
+ 				SB_UPDATE_TAIL(&so->so_rcv);
  				m = so->so_rcv.sb_mb;
  			}
  		}
***************
*** 958,963 ****
--- 970,982 ----
  			controlp = &(*controlp)->m_next;
  		}
  	}
+ 
+ 	/*
+ 	 * If m is non-NULL, we have some data to read.  From now on,
+ 	 * make sure to keep sb_lastrecord consistent when working on
+ 	 * the last packet on the chain (nextrecord == NULL) and we
+ 	 * change m->m_nextpkt.
+ 	 */
  	if (m) {
  		if ((flags & MSG_PEEK) == 0)
  			m->m_nextpkt = nextrecord;
***************
*** 965,970 ****
--- 984,1002 ----
  		if (type == MT_OOBDATA)
  			flags |= MSG_OOB;
  	}
+ 
+ 	/*
+ 	 * If nextrecord == NULL (this is a single chain), then
+ 	 * sb_lastrecord may not be valid here if m was changed
+ 	 * earlier.
+ 	 */
+ 	if (nextrecord == NULL && (flags & MSG_PEEK) == 0) {
+ 		KASSERT(so->so_rcv.sb_mb == m);
+ 		so->so_rcv.sb_lastrecord = m;
+ 	}
+ 	SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
+ 	SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
+ 
  	moff = 0;
  	offset = 0;
  	while (m && uio->uio_resid > 0 && error == 0) {
***************
*** 1028,1040 ****
  					*mp = m;
  					mp = &m->m_next;
  					so->so_rcv.sb_mb = m = m->m_next;
  					*mp = (struct mbuf *)0;
  				} else {
  					MFREE(m, so->so_rcv.sb_mb);
  					m = so->so_rcv.sb_mb;
  				}
! 				if (m)
  					m->m_nextpkt = nextrecord;
  			}
  		} else {
  			if (flags & MSG_PEEK)
--- 1060,1090 ----
  					*mp = m;
  					mp = &m->m_next;
  					so->so_rcv.sb_mb = m = m->m_next;
+ //					SB_UPDATE_TAIL(&so->so_rcv);
  					*mp = (struct mbuf *)0;
  				} else {
  					MFREE(m, so->so_rcv.sb_mb);
+ //					SB_UPDATE_TAIL(&so->so_rcv);
  					m = so->so_rcv.sb_mb;
  				}
! 				/*
! 				 * If m != NULL, we also know that
! 				 * so->so_rcv.sb_mb != NULL.
! 				 */
! 				if (m) {
  					m->m_nextpkt = nextrecord;
+ 					if (nextrecord == NULL)
+ 						so->so_rcv.sb_lastrecord = m;
+ 				} else {
+ 					/*
+ 					 * This replaces using SB_UPDATE_TAIL()
+ 					 * above.
+ 					 */
+ 					so->so_rcv.sb_lastrecord = NULL;
+ 					so->so_rcv.sb_mbtail = NULL;
+ 				}
+ 				SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
+ 				SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
  			}
  		} else {
  			if (flags & MSG_PEEK)
***************
*** 1107,1114 ****
  			(void) sbdroprecord(&so->so_rcv);
  	}
  	if ((flags & MSG_PEEK) == 0) {
! 		if (m == 0)
  			so->so_rcv.sb_mb = nextrecord;
  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
  			    (struct mbuf *)(long)flags, (struct mbuf *)0,
--- 1157,1177 ----
  			(void) sbdroprecord(&so->so_rcv);
  	}
  	if ((flags & MSG_PEEK) == 0) {
! 		if (m == 0) {
! 			/*
! 			 * First part is an inline SB_UPDATE_TAIL().  Second
! 			 * part makes sure sb_lastrecord is up-to-date if
! 			 * there is still data in the socket buffer.
! 			 */
  			so->so_rcv.sb_mb = nextrecord;
+ 			if (so->so_rcv.sb_mb == NULL) {
+ 				so->so_rcv.sb_mbtail = NULL;
+ 				so->so_rcv.sb_lastrecord = NULL;
+ 			} else if (nextrecord->m_nextpkt == NULL)
+ 				so->so_rcv.sb_lastrecord = nextrecord;
+ 		}
+ 		SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
+ 		SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
  		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
  			    (struct mbuf *)(long)flags, (struct mbuf *)0,
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/uipc_socket2.c,v
retrieving revision 1.42
diff -c -r1.42 uipc_socket2.c
*** kern/uipc_socket2.c	2001/11/12 15:25:33	1.42
--- kern/uipc_socket2.c	2002/07/03 03:56:01
***************
*** 425,430 ****
--- 425,485 ----
   * or sbdroprecord() when the data is acknowledged by the peer.
   */
  
+ #ifdef SOCKBUF_DEBUG
+ void
+ sblastrecordchk(struct sockbuf *sb, const char *where)
+ {
+ 	struct mbuf *m = sb->sb_mb;
+ 
+ 	while (m && m->m_nextpkt)
+ 		m = m->m_nextpkt;
+ 
+ 	if (m != sb->sb_lastrecord) {
+ 		printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
+ 		    sb->sb_mb, sb->sb_lastrecord, m);
+ 		printf("packet chain:\n");
+ 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
+ 			printf("\t%p\n", m);
+ 		panic("sblastrecordchk from %s\n", where);
+ 	}
+ }
+ 
+ void
+ sblastmbufchk(struct sockbuf *sb, const char *where)
+ {
+ 	struct mbuf *m = sb->sb_mb;
+ 	struct mbuf *n;
+ 
+ 	while (m && m->m_nextpkt)
+ 		m = m->m_nextpkt;
+ 
+ 	while (m && m->m_next)
+ 		m = m->m_next;
+ 
+ 	if (m != sb->sb_mbtail) {
+ 		printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
+ 		    sb->sb_mb, sb->sb_mbtail, m);
+ 		printf("packet tree:\n");
+ 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
+ 			printf("\t");
+ 			for (n = m; n != NULL; n = n->m_next)
+ 				printf("%p ", n);
+ 			printf("\n");
+ 		}
+ 		panic("sblastmbufchk from %s", where);
+ 	}
+ }
+ #endif /* SOCKBUF_DEBUG */
+ 
+ #define	SBLINKRECORD(sb, m0)						\
+ do {									\
+ 	if ((sb)->sb_lastrecord != NULL)				\
+ 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
+ 	else								\
+ 		(sb)->sb_mb = (m0);					\
+ 	(sb)->sb_lastrecord = (m0);					\
+ } while (/*CONSTCOND*/0)
+ 
  /*
   * Append mbuf chain m to the last record in the
   * socket buffer sb.  The additional space associated
***************
*** 438,454 ****
  
  	if (m == 0)
  		return;
! 	if ((n = sb->sb_mb) != NULL) {
! 		while (n->m_nextpkt)
! 			n = n->m_nextpkt;
  		do {
  			if (n->m_flags & M_EOR) {
  				sbappendrecord(sb, m); /* XXXXXX!!!! */
  				return;
  			}
  		} while (n->m_next && (n = n->m_next));
  	}
  	sbcompress(sb, m, n);
  }
  
  #ifdef SOCKBUF_DEBUG
--- 493,546 ----
  
  	if (m == 0)
  		return;
! 
! 	SBLASTRECORDCHK(sb, "sbappend 1");
! 
! 	if ((n = sb->sb_lastrecord) != NULL) {
! 		/*
! 		 * XXX Would like to simply use sb_mbtail here, but
! 		 * XXX I need to verify that I won't miss an EOR that
! 		 * XXX way.
! 		 */
  		do {
  			if (n->m_flags & M_EOR) {
  				sbappendrecord(sb, m); /* XXXXXX!!!! */
  				return;
  			}
  		} while (n->m_next && (n = n->m_next));
+ 	} else {
+ 		/*
+ 		 * If this is the first record in the socket buffer, it's
+ 		 * also the last record.
+ 		 */
+ 		sb->sb_lastrecord = m;
  	}
  	sbcompress(sb, m, n);
+ 	SBLASTRECORDCHK(sb, "sbappend 2");
+ }
+ 
+ /*
+  * This version of sbappend() should only be used when the caller
+  * absolutely knows that there will never be more than one record
+  * in the socket buffer, that is, a stream protocol (such as TCP).
+  */
+ void
+ sbappend_stream(struct sockbuf *sb, struct mbuf *m)
+ {
+ 
+ 	KASSERT(m->m_nextpkt == NULL);	/* XXXJRT KDASSERT */
+ 	KASSERT(sb->sb_mb == sb->sb_lastrecord);
+ 
+ 	/*
+ 	 * If this is the first record in the socket buffer, it's
+ 	 * also the last record.
+ 	 */
+ 	if (sb->sb_mb == NULL)
+ 		sb->sb_lastrecord = m;
+ 
+ 	SBLASTMBUFCHK(sb, __func__);
+ 
+ 	sbcompress(sb, m, sb->sb_mbtail);
  }
  
  #ifdef SOCKBUF_DEBUG
***************
*** 456,462 ****
  sbcheck(struct sockbuf *sb)
  {
  	struct mbuf	*m;
! 	int		len, mbcnt;
  
  	len = 0;
  	mbcnt = 0;
--- 548,554 ----
  sbcheck(struct sockbuf *sb)
  {
  	struct mbuf	*m;
! 	u_long		len, mbcnt;
  
  	len = 0;
  	mbcnt = 0;
***************
*** 469,475 ****
  			panic("sbcheck nextpkt");
  	}
  	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
! 		printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
  		    mbcnt, sb->sb_mbcnt);
  		panic("sbcheck");
  	}
--- 561,567 ----
  			panic("sbcheck nextpkt");
  	}
  	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
! 		printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
  		    mbcnt, sb->sb_mbcnt);
  		panic("sbcheck");
  	}
***************
*** 487,504 ****
  
  	if (m0 == 0)
  		return;
! 	if ((m = sb->sb_mb) != NULL)
! 		while (m->m_nextpkt)
! 			m = m->m_nextpkt;
  	/*
  	 * Put the first mbuf on the queue.
  	 * Note this permits zero length records.
  	 */
  	sballoc(sb, m0);
! 	if (m)
! 		m->m_nextpkt = m0;
! 	else
! 		sb->sb_mb = m0;
  	m = m0->m_next;
  	m0->m_next = 0;
  	if (m && (m0->m_flags & M_EOR)) {
--- 579,592 ----
  
  	if (m0 == 0)
  		return;
! 
  	/*
  	 * Put the first mbuf on the queue.
  	 * Note this permits zero length records.
  	 */
  	sballoc(sb, m0);
! 	SBLASTRECORDCHK(sb, "sbappendrecord 1");
! 	SBLINKRECORD(sb, m0);
  	m = m0->m_next;
  	m0->m_next = 0;
  	if (m && (m0->m_flags & M_EOR)) {
***************
*** 506,511 ****
--- 594,600 ----
  		m->m_flags |= M_EOR;
  	}
  	sbcompress(sb, m, m0);
+ 	SBLASTRECORDCHK(sb, "sbappendrecord 2");
  }
  
  /*
***************
*** 520,525 ****
--- 609,617 ----
  
  	if (m0 == 0)
  		return;
+ 
+ 	SBLASTRECORDCHK(sb, "sbinsertoob 1");
+ 
  	for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
  	    again:
  		switch (m->m_type) {
***************
*** 539,544 ****
--- 631,640 ----
  	 */
  	sballoc(sb, m0);
  	m0->m_nextpkt = *mp;
+ 	if (*mp == NULL) {
+ 		/* m0 is actually the new tail */
+ 		sb->sb_lastrecord = m0;
+ 	}
  	*mp = m0;
  	m = m0->m_next;
  	m0->m_next = 0;
***************
*** 547,552 ****
--- 643,649 ----
  		m->m_flags |= M_EOR;
  	}
  	sbcompress(sb, m, m0);
+ 	SBLASTRECORDCHK(sb, "sbinsertoob 2");
  }
  
  /*
***************
*** 559,565 ****
  sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
  	struct mbuf *control)
  {
! 	struct mbuf	*m, *n;
  	int		space;
  
  	space = asa->sa_len;
--- 656,662 ----
  sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
  	struct mbuf *control)
  {
! 	struct mbuf	*m, *n, *nlast;
  	int		space;
  
  	space = asa->sa_len;
***************
*** 592,612 ****
  	else
  		control = m0;
  	m->m_next = control;
! 	for (n = m; n; n = n->m_next)
  		sballoc(sb, n);
! 	if ((n = sb->sb_mb) != NULL) {
! 		while (n->m_nextpkt)
! 			n = n->m_nextpkt;
! 		n->m_nextpkt = m;
! 	} else
! 		sb->sb_mb = m;
  	return (1);
  }
  
  int
  sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  {
! 	struct mbuf	*m, *n;
  	int		space;
  
  	space = 0;
--- 689,715 ----
  	else
  		control = m0;
  	m->m_next = control;
! 
! 	SBLASTRECORDCHK(sb, "sbappendaddr 1");
! 
! 	for (n = m; n->m_next != NULL; n = n->m_next)
  		sballoc(sb, n);
! 	sballoc(sb, n);
! 	nlast = n;
! 	SBLINKRECORD(sb, m);
! 
! 	sb->sb_mbtail = nlast;
! 	SBLASTMBUFCHK(sb, "sbappendaddr");
! 
! 	SBLASTRECORDCHK(sb, "sbappendaddr 2");
! 
  	return (1);
  }
  
  int
  sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  {
! 	struct mbuf	*m, *mlast, *n;
  	int		space;
  
  	space = 0;
***************
*** 623,636 ****
  	if (space > sbspace(sb))
  		return (0);
  	n->m_next = m0;			/* concatenate data to control */
! 	for (m = control; m; m = m->m_next)
  		sballoc(sb, m);
! 	if ((n = sb->sb_mb) != NULL) {
! 		while (n->m_nextpkt)
! 			n = n->m_nextpkt;
! 		n->m_nextpkt = control;
! 	} else
! 		sb->sb_mb = control;
  	return (1);
  }
  
--- 726,745 ----
  	if (space > sbspace(sb))
  		return (0);
  	n->m_next = m0;			/* concatenate data to control */
! 
! 	SBLASTRECORDCHK(sb, "sbappendcontrol 1");
! 
! 	for (m = control; m->m_next != NULL; m = m->m_next)
  		sballoc(sb, m);
! 	sballoc(sb, m);
! 	mlast = m;
! 	SBLINKRECORD(sb, control);
! 
! 	sb->sb_mbtail = mlast;
! 	SBLASTMBUFCHK(sb, "sbappendcontrol");
! 
! 	SBLASTRECORDCHK(sb, "sbappendcontrol 2");
! 
  	return (1);
  }
  
***************
*** 671,676 ****
--- 780,786 ----
  			n->m_next = m;
  		else
  			sb->sb_mb = m;
+ 		sb->sb_mbtail = m;
  		sballoc(sb, m);
  		n = m;
  		m->m_flags &= ~M_EOR;
***************
*** 683,688 ****
--- 793,799 ----
  		else
  			printf("semi-panic: sbcompress\n");
  	}
+ 	SBLASTMBUFCHK(sb, __func__);
  }
  
  /*
***************
*** 693,704 ****
  sbflush(struct sockbuf *sb)
  {
  
! 	if (sb->sb_flags & SB_LOCK)
! 		panic("sbflush");
  	while (sb->sb_mbcnt)
  		sbdrop(sb, (int)sb->sb_cc);
! 	if (sb->sb_cc || sb->sb_mb)
! 		panic("sbflush 2");
  }
  
  /*
--- 804,818 ----
  sbflush(struct sockbuf *sb)
  {
  
! 	KASSERT((sb->sb_flags & SB_LOCK) == 0);
! 
  	while (sb->sb_mbcnt)
  		sbdrop(sb, (int)sb->sb_cc);
! 
! 	KASSERT(sb->sb_cc == 0);
! 	KASSERT(sb->sb_mb == NULL);
! 	KASSERT(sb->sb_mbtail == NULL);
! 	KASSERT(sb->sb_lastrecord == NULL);
  }
  
  /*
***************
*** 739,744 ****
--- 853,869 ----
  		m->m_nextpkt = next;
  	} else
  		sb->sb_mb = next;
+ 	/*
+ 	 * First part is an inline SB_UPDATE_TAIL().  Second part
+ 	 * makes sure sb_lastrecord is up-to-date if we dropped
+ 	 * part of the last record.
+ 	 */
+ 	m = sb->sb_mb;
+ 	if (m == NULL) {
+ 		sb->sb_mbtail = NULL;
+ 		sb->sb_lastrecord = NULL;
+ 	} else if (m->m_nextpkt == NULL)
+ 		sb->sb_lastrecord = m;
  }
  
  /*
***************
*** 758,763 ****
--- 883,889 ----
  			MFREE(m, mn);
  		} while ((m = mn) != NULL);
  	}
+ 	SB_UPDATE_TAIL(sb);
  }
  
  /*
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_input.c,v
retrieving revision 1.146
diff -c -r1.146 tcp_input.c
*** netinet/tcp_input.c	2002/06/30 22:40:35	1.146
--- netinet/tcp_input.c	2002/07/03 03:56:05
***************
*** 664,670 ****
  	if (so->so_state & SS_CANTRCVMORE)
  		m_freem(q->ipqe_m);
  	else
! 		sbappend(&so->so_rcv, q->ipqe_m);
  	pool_put(&ipqent_pool, q);
  	sorwakeup(so);
  	return (pkt_flags);
--- 665,671 ----
  	if (so->so_state & SS_CANTRCVMORE)
  		m_freem(q->ipqe_m);
  	else
! 		sbappend_stream(&so->so_rcv, q->ipqe_m);
  	pool_put(&ipqent_pool, q);
  	sorwakeup(so);
  	return (pkt_flags);
***************
*** 1524,1530 ****
  			 * to socket buffer.
  			 */
  			m_adj(m, toff + off);
! 			sbappend(&so->so_rcv, m);
  			sorwakeup(so);
  			TCP_SETUP_ACK(tp, th);
  			if (tp->t_flags & TF_ACKNOW)
--- 1525,1531 ----
  			 * to socket buffer.
  			 */
  			m_adj(m, toff + off);
! 			sbappend_stream(&so->so_rcv, m);
  			sorwakeup(so);
  			TCP_SETUP_ACK(tp, th);
  			if (tp->t_flags & TF_ACKNOW)
***************
*** 2263,2269 ****
  			tcpstat.tcps_rcvbyte += tlen;
  			ND6_HINT(tp);
  			m_adj(m, hdroptlen);
! 			sbappend(&(so)->so_rcv, m);
  			sorwakeup(so);
  		} else {
  			m_adj(m, hdroptlen);
--- 2264,2270 ----
  			tcpstat.tcps_rcvbyte += tlen;
  			ND6_HINT(tp);
  			m_adj(m, hdroptlen);
! 			sbappend_stream(&(so)->so_rcv, m);
  			sorwakeup(so);
  		} else {
  			m_adj(m, hdroptlen);
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.71
diff -c -r1.71 tcp_usrreq.c
*** netinet/tcp_usrreq.c	2002/06/09 16:33:44	1.71
--- netinet/tcp_usrreq.c	2002/07/03 03:56:09
***************
*** 508,514 ****
  			error = EINVAL;
  			break;
  		}
! 		sbappend(&so->so_snd, m);
  		error = tcp_output(tp);
  		break;
  
--- 508,514 ----
  			error = EINVAL;
  			break;
  		}
! 		sbappend_stream(&so->so_snd, m);
  		error = tcp_output(tp);
  		break;
  
***************
*** 564,570 ****
  		 * of data past the urgent section.
  		 * Otherwise, snd_up should be one lower.
  		 */
! 		sbappend(&so->so_snd, m);
  		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  		tp->t_force = 1;
  		error = tcp_output(tp);
--- 564,570 ----
  		 * of data past the urgent section.
  		 * Otherwise, snd_up should be one lower.
  		 */
! 		sbappend_stream(&so->so_snd, m);
  		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  		tp->t_force = 1;
  		error = tcp_output(tp);
Index: sys/socketvar.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/socketvar.h,v
retrieving revision 1.51
diff -c -r1.51 socketvar.h
*** sys/socketvar.h	2002/05/02 17:55:52	1.51
--- sys/socketvar.h	2002/07/03 03:56:10
***************
*** 1,3 ****
--- 1,4 ----
+ #define	SOCKBUF_DEBUG
  /*	$NetBSD: socketvar.h,v 1.51 2002/05/02 17:55:52 thorpej Exp $	*/
  
  /*-
***************
*** 93,98 ****
--- 94,102 ----
  		u_long	sb_mbmax;	/* max chars of mbufs to use */
  		long	sb_lowat;	/* low water mark */
  		struct mbuf *sb_mb;	/* the mbuf chain */
+ 		struct mbuf *sb_mbtail;	/* the last mbuf in the chain */
+ 		struct mbuf *sb_lastrecord;/* first mbuf of last record in
+ 					      socket buffer */
  		struct selinfo sb_sel;	/* process selecting read/write */
  		short	sb_flags;	/* flags, see below */
  		short	sb_timeo;	/* timeout for read/write */
***************
*** 125,130 ****
--- 129,152 ----
  	struct mbuf	*so_pendfree;	/* loaned-page mbufs w/ frees pending */
  };
  
+ #define	SB_UPDATE(sb, newm)						\
+ do {									\
+ 	if ((newm) == NULL) {						\
+ 		(sb)->sb_mbtail = NULL;					\
+ 		(sb)->sb_lastrecord = NULL;				\
+ 	} else if ((sb)->sb_mb == (sb)->sb_lastrecord)			\
+ 		(sb)->sb_lastrecord = (newm);				\
+ 	(sb)->sb_mb = (newm);						\
+ } while (/*CONSTCOND*/0)
+ 
+ #define	SB_UPDATE_TAIL(sb)						\
+ do {									\
+ 	if ((sb)->sb_mb == NULL) {					\
+ 		(sb)->sb_mbtail = NULL;					\
+ 		(sb)->sb_lastrecord = NULL;				\
+ 	}								\
+ } while (/*CONSTCOND*/0)
+ 
  /*
   * Socket state bits.
   */
***************
*** 266,271 ****
--- 288,294 ----
  	    struct mbuf *, struct mbuf *, struct proc *);
  int	uipc_ctloutput(int, struct socket *, int, int, struct mbuf **);
  void	sbappend(struct sockbuf *sb, struct mbuf *m);
+ void	sbappend_stream(struct sockbuf *sb, struct mbuf *m);
  int	sbappendaddr(struct sockbuf *sb, struct sockaddr *asa,
  	    struct mbuf *m0, struct mbuf *control);
  int	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
***************
*** 319,324 ****
--- 342,358 ----
  
  int	sendit(struct proc *, int, struct msghdr *, int, register_t *);
  int	recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);
+ 
+ #ifdef SOCKBUF_DEBUG
+ void	sblastrecordchk(struct sockbuf *, const char *);
+ #define	SBLASTRECORDCHK(sb, where)	sblastrecordchk((sb), (where))
+ 
+ void	sblastmbufchk(struct sockbuf *, const char *);
+ #define	SBLASTMBUFCHK(sb, where)	sblastmbufchk((sb), (where))
+ #else
+ #define	SBLASTRECORDCHK(sb, where)	/* nothing */
+ #define	SBLASTMBUFCHK(sb, where)	/* nothing */
+ #endif /* SOCKBUF_DEBUG */
  
  #endif /* _KERNEL */
  

--vni90+aGYgRvsTuO--