Subject: Re: New patch (Re: RFC: addition of B_NESTED to buf.h and vfs_bio.c)
To: None <reinoud@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 01/09/2006 13:04:38
--NextPart-20060109123250-0442600
Content-Type: Text/Plain; charset=us-ascii

> > to me, it seems reverse.
> > your proposed patch changes how to implement nested buffers,
> > from B_CALL and b_private (generic way) to
> > a special-purpose hook and members (less generic way).
> 
> The special-purpose hook (i think you mean b_masterbuf) is to allow the 
> split up code in a routine that creates the nested buffers to use a 
> b_private for its own use. The newest patch does use the B_CALL method to 
> be less intrusive, see my latest patch.

the above comment was after seeing your latest patch.
my question is, what is "its own use".
i don't understand why your "less intrusive" version can't use b_private.

> > do you mean, you want to use the "another set of flags/values/callbacks"
> > for your filesystem?  for what?
> 
> maybe my wording wasn't that clear but i meant to say that implementing it 
> in this way the parts that do split up buffers dont have to know all 
> details on the current buffer implementation's quirks. Especially if we're 
> going to restreamline the buffer code, the less knowledge is needed for a 
> correct operation the better otherwise odd bugs can suddenly show up.
> 
> A simple deligation system might be plausable in the future design to 
> greatly reduce struct buf sizes and provide more customised design without 
> the need for the overloading and/or adding of variables.

what you want is an API to reduce the code to touch bp->b_* directly?
if so, i'd suggest something like the attached patch.

YAMAMOTO Takashi

--NextPart-20060109123250-0442600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"

Index: sys/buf.h
===================================================================
--- sys/buf.h	(revision 1508)
+++ sys/buf.h	(working copy)
@@ -301,6 +301,9 @@ struct buf *getiobuf(void);
 struct buf *getiobuf_nowait(void);
 void putiobuf(struct buf *);
 
+void nestiobuf_setup(struct buf *, struct buf *, int, size_t);
+void nestiobuf_done(struct buf *, int, int);
+
 __END_DECLS
 #endif /* _KERNEL */
 #endif /* !_SYS_BUF_H_ */
Index: kern/vfs_bio.c
===================================================================
--- kern/vfs_bio.c	(revision 1508)
+++ kern/vfs_bio.c	(working copy)
@@ -1783,3 +1783,72 @@ putiobuf(struct buf *bp)
 	pool_put(&bufiopool, bp);
 	splx(s);
 }
+
+static void
+nestiobuf_iodone(struct buf *bp)
+{
+	struct buf *mbp = bp->b_private;
+	int error;
+	int donebytes = bp->b_bcount; /* XXX ignore b_resid */
+
+	KASSERT(bp->b_bufsize == bp->b_bcount);
+	KASSERT(mbp != bp);
+	if ((bp->b_flags & B_ERROR) != 0) {
+		error = bp->b_error;
+	} else {
+		KASSERT(bp->b_resid == 0);
+		error = 0;
+	}
+	putiobuf(bp);
+	nestiobuf_done(mbp, donebytes, error);
+}
+
+void
+nestiobuf_setup(struct buf *mbp, struct buf *bp, int offset, size_t size)
+{
+	const int b_read = mbp->b_flags & B_READ;
+	struct vnode *vp = mbp->b_vp;
+
+	KASSERT(mbp->b_bcount >= offset + size);
+	bp->b_vp = vp;
+	bp->b_flags = B_BUSY | B_CALL | B_ASYNC | b_read;
+	bp->b_iodone = nestiobuf_iodone;
+	bp->b_data = mbp->b_data + offset;
+	bp->b_resid = bp->b_bcount = size;
+#if defined(DIAGNOSTIC)
+	bp->b_bufsize = bp->b_bcount;
+#endif /* defined(DIAGNOSTIC) */
+	bp->b_private = mbp;
+	BIO_COPYPRIO(bp, mbp);
+	if (!b_read && vp != NULL) {
+		int s;
+
+		s = splbio();
+		V_INCR_NUMOUTPUT(vp);
+		splx(s);
+	}
+}
+
+void
+nestiobuf_done(struct buf *mbp, int donebytes, int error)
+{
+	int s;
+
+	if (donebytes == 0) {
+		return;
+	}
+	s = splbio();
+	KASSERT(mbp->b_resid >= donebytes);
+	if (error) {
+		mbp->b_flags |= B_ERROR;
+		mbp->b_error = error;
+	}
+	mbp->b_resid -= donebytes;
+	if (mbp->b_resid == 0) {
+		if ((mbp->b_flags & B_ERROR) != 0) {
+			mbp->b_resid = mbp->b_bcount; /* be conservative */
+		}
+		biodone(mbp);
+	}
+	splx(s);
+}
Index: miscfs/genfs/genfs_vnops.c
===================================================================
--- miscfs/genfs/genfs_vnops.c	(revision 1498)
+++ miscfs/genfs/genfs_vnops.c	(working copy)
@@ -441,7 +441,7 @@ genfs_getpages(void *v)
 	off_t newsize, diskeof, memeof;
 	off_t offset, origoffset, startoffset, endoffset, raoffset;
 	daddr_t lbn, blkno;
-	int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
+	int i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
 	int fs_bshift, fs_bsize, dev_bshift;
 	int flags = ap->a_flags;
 	size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
@@ -672,6 +672,10 @@ genfs_getpages(void *v)
 	mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
 	mbp->b_iodone = (async ? uvm_aio_biodone : 0);
 	mbp->b_vp = vp;
+	if (async)
+		BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
+	else
+		BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
 
 	/*
 	 * if EOF is in the middle of the range, zero the part past EOF.
@@ -800,15 +804,9 @@ genfs_getpages(void *v)
 			bp = mbp;
 		} else {
 			bp = getiobuf();
-			bp->b_data = (char *)kva + offset - startoffset;
-			bp->b_resid = bp->b_bcount = iobytes;
-			bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC;
-			bp->b_iodone = uvm_aio_biodone1;
-			bp->b_vp = vp;
-			bp->b_proc = NULL;
+			nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
 		}
 		bp->b_lblkno = 0;
-		bp->b_private = mbp;
 
 		/* adjust physical blkno for partial blocks */
 		bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
@@ -818,30 +816,13 @@ genfs_getpages(void *v)
 		    "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
 		    bp, offset, iobytes, bp->b_blkno);
 
-		if (async)
-			BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
-		else
-			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
-
 		curproc->p_stats->p_ru.ru_inblock++;
 
 		VOP_STRATEGY(devvp, bp);
 	}
 
 loopdone:
-	if (skipbytes) {
-		s = splbio();
-		if (error) {
-			mbp->b_flags |= B_ERROR;
-			mbp->b_error = error;
-		}
-		mbp->b_resid -= skipbytes;
-		if (mbp->b_resid == 0) {
-			biodone(mbp);
-		}
-		splx(s);
-	}
-
+	nestiobuf_done(mbp, skipbytes, error);
 	if (async) {
 		UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
 		lockmgr(&gp->g_glock, LK_RELEASE, NULL);
@@ -1479,6 +1460,12 @@ genfs_gop_write(struct vnode *vp, struct
 	mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? (B_CALL|B_ASYNC) : 0);
 	mbp->b_iodone = uvm_aio_biodone;
 	mbp->b_vp = vp;
+	if (curproc == uvm.pagedaemon_proc)
+		BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
+	else if (async)
+		BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL);
+	else
+		BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
 
 	bp = NULL;
 	for (offset = startoffset;
@@ -1504,21 +1491,12 @@ genfs_gop_write(struct vnode *vp, struct
 		if (offset == startoffset && iobytes == bytes) {
 			bp = mbp;
 		} else {
-			s = splbio();
-			V_INCR_NUMOUTPUT(vp);
-			splx(s);
-			bp = getiobuf();
 			UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
 			    vp, bp, vp->v_numoutput, 0);
-			bp->b_data = (char *)kva +
-			    (vaddr_t)(offset - pg->offset);
-			bp->b_resid = bp->b_bcount = iobytes;
-			bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
-			bp->b_iodone = uvm_aio_biodone1;
-			bp->b_vp = vp;
+			bp = getiobuf();
+			nestiobuf_setup(mbp, bp, offset - pg->offset, iobytes);
 		}
 		bp->b_lblkno = 0;
-		bp->b_private = mbp;
 
 		/* adjust physical blkno for partial blocks */
 		bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
@@ -1526,12 +1504,6 @@ genfs_gop_write(struct vnode *vp, struct
 		UVMHIST_LOG(ubchist,
 		    "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
 		    vp, offset, bp->b_bcount, bp->b_blkno);
-		if (curproc == uvm.pagedaemon_proc)
-			BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
-		else if (async)
-			BIO_SETPRIO(bp, BPRIO_TIMENONCRITICAL);
-		else
-			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 
 		if (p) {
 			p->p_stats->p_ru.ru_oublock++;
@@ -1541,17 +1513,8 @@ genfs_gop_write(struct vnode *vp, struct
 	}
 	if (skipbytes) {
 		UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
-		s = splbio();
-		if (error) {
-			mbp->b_flags |= B_ERROR;
-			mbp->b_error = error;
-		}
-		mbp->b_resid -= skipbytes;
-		if (mbp->b_resid == 0) {
-			biodone(mbp);
-		}
-		splx(s);
 	}
+	nestiobuf_done(mbp, skipbytes, error);
 	if (async) {
 		UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
 		return (0);

--NextPart-20060109123250-0442600--