Subject: Re: New patch (Re: RFC: addition of B_NESTED to buf.h and vfs_bio.c)
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Reinoud Zandijk <reinoud@netbsd.org>
List: tech-kern
Date: 01/07/2006 22:31:28
--nVMJ2NtxeReIH9PS
Content-Type: multipart/mixed; boundary="SUOF0GtieIMvvwua"
Content-Disposition: inline


--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Dear Takashi,

> in a conversation with Greg we've concluded that since the struct buf is 
> currently in flux and iobuf's are just to be introduced its better to 
> implement the nestediobuf's in a less intrusive way.. I'll create a new 
> DIFF_BIO patch for that. The other patches don't have to changed.

Here it is,

Reinoud

--SUOF0GtieIMvvwua
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=DIFF_BIO

Index: sys/sys/buf.h
===================================================================
RCS file: /cvsroot/src/sys/sys/buf.h,v
retrieving revision 1.86
diff -p -r1.86 buf.h
*** sys/sys/buf.h	7 Jan 2006 00:26:58 -0000	1.86
--- sys/sys/buf.h	7 Jan 2006 21:05:41 -0000
*************** struct buf {
*** 146,151 ****
--- 146,156 ----
  	struct  workhead b_dep;		/* List of filesystem dependencies. */
  	void	*b_saveaddr;		/* Original b_addr for physio. */
  
+ 	/* nested buffer support */
+ 	struct buf *b_masterbuf;	/* If nested points to masterbuf */
+ 	LIST_HEAD(, buf) b_nesters;	/* Keeps track of it's nested bufs */
+ 	LIST_ENTRY(buf) b_nestedlist;
+ 
  	/*
  	 * private data for owner.
  	 *  - buffer cache buffers are owned by corresponding filesystem.
*************** do {									\
*** 174,179 ****
--- 179,187 ----
  	LIST_INIT(&(bp)->b_dep);					\
  	simple_lock_init(&(bp)->b_interlock);				\
  	(bp)->b_dev = NODEV;						\
+ 	(bp)->b_vp = NULL;						\
+ 	(bp)->b_masterbuf = NULL;					\
+ 	LIST_INIT(&(bp)->b_nesters);					\
  	BIO_SETPRIO((bp), BPRIO_DEFAULT);				\
  } while (/*CONSTCOND*/0)
  
*************** struct buf *getblk(struct vnode *, daddr
*** 282,287 ****
--- 290,297 ----
  struct buf *geteblk(int);
  struct buf *getnewbuf(int, int, int);
  struct buf *incore(struct vnode *, daddr_t);
+ struct buf *nestiobuf(struct buf *, daddr_t, caddr_t, int);
+ void	nestiobufdone(struct buf *);
  
  void	minphys(struct buf *);
  int	physio(void (*)(struct buf *), struct buf *, dev_t, int,
Index: sys/kern/vfs_bio.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_bio.c,v
retrieving revision 1.151
diff -p -r1.151 vfs_bio.c
*** sys/kern/vfs_bio.c	7 Jan 2006 00:26:58 -0000	1.151
--- sys/kern/vfs_bio.c	7 Jan 2006 21:05:43 -0000
*************** biodone(struct buf *bp)
*** 1395,1400 ****
--- 1395,1403 ----
  	if (!ISSET(bp->b_flags, B_READ))	/* wake up reader */
  		vwakeup(bp);
  
+ 	if (!LIST_EMPTY(&bp->b_nesters))
+ 		panic("biodone: called on master buf with nesters");
+ 
  	/*
  	 * If necessary, call out.  Unlock the buffer before calling
  	 * iodone() as the buffer isn't valid any more when it return.
*************** putiobuf(struct buf *bp)
*** 1772,1774 ****
--- 1775,1884 ----
  	pool_put(&bufiopool, bp);
  	splx(s);
  }
+ 
+ /*
+  * Get a new buffer nested into the specified buffer at the given offset and
+  * length. NO read/write actions ought to be caried out on the master buffer
+  * anymore only on the nested buffers as they effectively split up the master
+  * buffer's action.
+  *
+  * Bug alert: make sure all nested buffers cover the complete mbp->resid
+  * space.  If space is to be skipped, mbp->resid needs to be accounted for or
+  * the biodone on mbp won't be called!
+  */
+ struct buf *
+ nestiobuf(struct buf *mbp, daddr_t blkno, caddr_t base, int size)
+ {
+ 	struct buf *bp;
+ 	int s;
+ 
+ 	KASSERT(base != NULL);
+ 	KASSERT(size > 0);
+ 	KASSERT(mbp);
+ 	KASSERT(mbp->b_bcount >= size);
+ 
+ 	bp = getiobuf_nowait();
+ 
+ 	/* Adjust relevant information from the master buffer. */
+ 	memcpy(bp, mbp, sizeof(struct buf));
+ 	bp->b_blkno = blkno;
+ 	bp->b_data  = base;
+ 	bp->b_bufsize = bp->b_bcount = bp->b_resid = size;
+ 
+ 	/* setup our callback */
+ 	SET(bp->b_flags, B_CALL);
+ 	bp->b_iodone = nestiobufdone;
+ 
+ 	/* avoid confusion for softdep? */
+ 	LIST_INIT(&bp->b_dep);
+ 
+ 	/* dependency administration */
+ 	bp->b_masterbuf = mbp;
+ 	LIST_INSERT_HEAD(&mbp->b_nesters, bp, b_nestedlist);
+ 	LIST_INIT(&bp->b_nesters);
+ 
+ 	/* for write requests we have to increase mbp's num_output */
+ 	if ((mbp->b_flags & B_READ) == 0) {
+ 		s = splbio();
+ 		V_INCR_NUMOUTPUT(mbp->b_vp);
+ 		splx(s);
+ 	};
+ 
+ 	return bp;
+ }
+ 
+ 
+ void
+ nestiobufdone(struct buf *bp)
+ {
+ 	struct buf *mbp;
+ 	int s;
+ 
+ 	/* Block disk interrupts. */
+ 	s = splbio();
+ 	simple_lock(&bqueue_slock);
+ 	simple_lock(&bp->b_interlock);
+ 
+ 	KASSERT(ISSET(bp->b_flags, B_BUSY));
+ 	KASSERT(!ISSET(bp->b_flags, B_CALL));
+ 	KASSERT(bp->b_masterbuf);
+ 
+ 	/* keep masterbuf for its possible iodone */
+ 	mbp = bp->b_masterbuf;
+ 
+ 	/* Wake up any proceeses waiting for _this_ buffer to become free. */
+ 	if (ISSET(bp->b_flags, B_WANTED)) {
+ 		CLR(bp->b_flags, B_WANTED|B_AGE);
+ 		wakeup(bp);
+ 	}
+ 
+ 	/* Propagate status to master buffer if its a nested buffer */
+ 	if (bp->b_flags & B_ERROR) {
+ 		mbp->b_flags |= B_ERROR;
+ 		mbp->b_error = bp->b_error;
+ 	};
+ 	mbp->b_resid -= bp->b_bcount;
+ 
+ 	/* remove ourselves from the master buffers administration */
+ 	LIST_REMOVE(bp, b_nestedlist);
+ 
+ 	/* sanity */
+ 	bp->b_masterbuf = NULL;
+ 	bp->b_bufsize = 0;
+ 
+ 	/* Allow disk interrupts again */
+ 	simple_unlock(&bp->b_interlock);
+ 	simple_unlock(&bqueue_slock);
+ 
+ 	/* recycle */
+ 	putiobuf(bp);
+ 	splx(s);
+ 
+ 	/* call biodone on master buffer if its completed by this op */
+ 	if (mbp->b_resid == 0) {
+ 		if (!LIST_EMPTY(&mbp->b_nesters))
+ 			panic("biodone: empty nested buf list isn't");
+ 		biodone(mbp);
+ 	};
+ }
+ 

--SUOF0GtieIMvvwua--

--nVMJ2NtxeReIH9PS
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (NetBSD)

iQEUAwUBQ8AzKYKcNwBDyKpoAQIzPAf48G1kR9g9gz9EfmobxDCF5OtNjOw+y6+4
eDZnH0BdqkhCj6KdK2x7/ELzJX702kcqndI55oYWr/Y+G4a3vH3eekgXYB07jTEU
FfkM+yN8kVMACGjGXA+DDX9inE/dLKfZQQxK0272cW6EoezbabtQrr2rcvIwzguS
2og4fCzx8jKeEeRnO/GIhWpmm5UxM7Pte+ZjLljQWmdw5i2LLacM5HWjghVpslpz
wjwRAEbMDLivfe4kVu/iuSel2+yorZoVizXzX4G3g9ju38rHgZ198sMSJzqF6QQB
MAn/+zBPQG37IdYH9VIYzZoFuVdSEeRQq2cvmsJtDCNrxML/ZaQi
=1LNj
-----END PGP SIGNATURE-----

--nVMJ2NtxeReIH9PS--