Subject: kern/1301: problem in kern/vfs_bio.c
To: None <gnats-bugs@gnats.netbsd.org>
From: Alasdair Baird <alasdair@wildcat.demon.co.uk>
List: netbsd-bugs
Date: 08/01/1995 00:16:47
>Number:         1301
>Category:       kern
>Synopsis:       feature in vfs_bio.c causes panics
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    kern-bug-people (Kernel Bug People)
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Mon Jul 31 19:20:01 1995
>Last-Modified:
>Originator:     Alasdair Baird
>Organization:
Absolutely none whatsoever!
>Release:        NetBSD-current-verycurrent
>Environment:
	
System: NetBSD wildcat.demon.co.uk 1.0A NetBSD 1.0A (WILDCAT) #296: Mon Jul 31 23:04:31 BST 1995 root@:/usr/src/sys/arch/i386/compile/WILDCAT i386


>Description:

	A recent change to the getblk() routine in vfs_bio.c
	has introduced a feature that causes a decrease in
	stability on my system, to the point where it is made
	very difficult to do a full build due to the numerous
	crashes, with such errors as "panic: bremfree: lost tail".

	The fix that follows has been in use on my machine for
	sometime and seems to alleviate this problem.

	The cause of the problem can be found in getblk()
	where, after failing to find a block on a buffer hash
	list, the routine calls getnewbuf() to obtain a fresh
	buffer, binshash() to insert it onto the hash list
	and bgetvp() to associate it with the appropriate
	vnode.  However, before bgetvp() is called, a call to
	allocbuf() is made to grow/shrink the buffer; this routine
	can sleep if there is a temporary shortfall in available
	buffer space, which is where the problem lies.  If it
	sleeps, it is possible for another thread to see a
	buffer that is only on one of the two lists it should be
	on, hence later panics.

	The cure for this problem is to switch about the order
	of the bgetvp() and allocbuf() calls.

>How-To-Repeat:

	Abuse the buffer cache heavily.

>Fix:

	The fix below, in addition to reordering the two mis-ordered
	routine calls, eliminates the use of splbio() while traversing
	the hash list as this need only be done to inspect the B_BUSY
	field of candidate buffers.

	This patch also removes a slightly excessive splx()/splbio()
	pair from the mainline of the getnewbuf() routine.


*** vfs_bio.c.ORIG	Fri Jul 14 08:03:18 1995
--- vfs_bio.c	Mon Jul 31 23:02:56 1995
***************
*** 504,515 ****
  	daddr_t blkno;
  	int size, slpflag, slptimeo;
  {
  	struct buf *bp;
  	int s, err;
  
- start:
- 	s = splbio();
- 
  	/*
  	 * XXX
  	 * The following is an inlined version of 'incore()', but with
--- 504,513 ----
  	daddr_t blkno;
  	int size, slpflag, slptimeo;
  {
+ 	struct bufhashhdr *bh;
  	struct buf *bp;
  	int s, err;
  
  	/*
  	 * XXX
  	 * The following is an inlined version of 'incore()', but with
***************
*** 520,531 ****
  	 * case, we can't allow the system to allocate a new buffer for
  	 * the block until the write is finished.
  	 */
! loop:
!         bp = BUFHASH(vp, blkno)->lh_first;
          for (; bp != NULL; bp = bp->b_hash.le_next) {
                  if (bp->b_lblkno != blkno || bp->b_vp != vp)
  			continue;
  
  		if (ISSET(bp->b_flags, B_BUSY)) {
  			SET(bp->b_flags, B_WANTED);
  			err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
--- 518,531 ----
  	 * case, we can't allow the system to allocate a new buffer for
  	 * the block until the write is finished.
  	 */
! 	bh = BUFHASH(vp, blkno);
! start:
!         bp = bh->lh_first;
          for (; bp != NULL; bp = bp->b_hash.le_next) {
                  if (bp->b_lblkno != blkno || bp->b_vp != vp)
  			continue;
  
+ 		s = splbio();
  		if (ISSET(bp->b_flags, B_BUSY)) {
  			SET(bp->b_flags, B_WANTED);
  			err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
***************
*** 536,561 ****
  			goto start;
  		}
  
! 		if (!ISSET(bp->b_flags, B_INVAL))
  			break;
          }
  
! 	if (bp) {
! 		SET(bp->b_flags, (B_BUSY | B_CACHE));
! 		bremfree(bp);
! 		splx(s);
! 		allocbuf(bp, size);
! 	} else {
! 		splx(s);
  		if ((bp = getnewbuf(slpflag, slptimeo)) == NULL)
  			goto start;
! 		binshash(bp, BUFHASH(vp, blkno));
! 		allocbuf(bp, size);
  		bp->b_blkno = bp->b_lblkno = blkno;
  		s = splbio();
  		bgetvp(vp, bp);
  		splx(s);
  	}
  	return (bp);
  }
  
--- 536,560 ----
  			goto start;
  		}
  
! 		if (!ISSET(bp->b_flags, B_INVAL)) {
! 			SET(bp->b_flags, (B_BUSY | B_CACHE));
! 			bremfree(bp);
! 			splx(s);
  			break;
+ 		}
+ 		splx(s);
          }
  
! 	if (bp == NULL) {
  		if ((bp = getnewbuf(slpflag, slptimeo)) == NULL)
  			goto start;
! 		binshash(bp, bh);
  		bp->b_blkno = bp->b_lblkno = blkno;
  		s = splbio();
  		bgetvp(vp, bp);
  		splx(s);
  	}
+ 	allocbuf(bp, size);
  	return (bp);
  }
  
***************
*** 693,708 ****
  
  	/* Buffer is no longer on free lists. */
  	SET(bp->b_flags, B_BUSY);
- 	splx(s);
  
  	/* If buffer was a delayed write, start it, and go back to the top. */
  	if (ISSET(bp->b_flags, B_DELWRI)) {
  		bawrite (bp);
  		goto start;
  	}
  
  	/* disassociate us from our vnode, if we had one... */
- 	s = splbio();
  	if (bp->b_vp)
  		brelvp(bp);
  	splx(s);
--- 692,706 ----
  
  	/* Buffer is no longer on free lists. */
  	SET(bp->b_flags, B_BUSY);
  
  	/* If buffer was a delayed write, start it, and go back to the top. */
  	if (ISSET(bp->b_flags, B_DELWRI)) {
+ 		splx(s);
  		bawrite (bp);
  		goto start;
  	}
  
  	/* disassociate us from our vnode, if we had one... */
  	if (bp->b_vp)
  		brelvp(bp);
  	splx(s);
>Audit-Trail:
>Unformatted: