Subject: Re: New patch (Re: RFC: addition of B_NESTED to buf.h and vfs_bio.c)
To: None <reinoud@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 01/09/2006 13:04:38
--NextPart-20060109123250-0442600
Content-Type: Text/Plain; charset=us-ascii
> > to me, it seems reverse.
> > your proposed patch changes how to implement nested buffers,
> > from B_CALL and b_private (generic way) to
> > a special-purpose hook and members (less generic way).
>
> The special-purpose hook (i think you mean b_masterbuf) is to allow the
> split up code in a routine that creates the nested buffers to use a
> b_private for its own use. The newest patch does use the B_CALL method to
> be less intrusive, see my latest patch.
the above comment was after seeing your latest patch.
my question is, what is "its own use".
i don't understand why your "less intrusive" version can't use b_private.
> > do you mean, you want to use the "another set of flags/values/callbacks"
> > for your filesystem? for what?
>
> maybe my wording wasn't that clear but i meant to say that implementing it
> in this way the parts that do split up buffers dont have to know all
> details on the current buffer implementation's quirks. Especially if we're
> going to restreamline the buffer code, the less knowledge is needed for a
> correct operation the better otherwise odd bugs can suddenly show up.
>
> A simple deligation system might be plausable in the future design to
> greatly reduce struct buf sizes and provide more customised design without
> the need for the overloading and/or adding of variables.
what you want is an API to reduce the code to touch bp->b_* directly?
if so, i'd suggest something like the attached patch.
YAMAMOTO Takashi
--NextPart-20060109123250-0442600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"
Index: sys/buf.h
===================================================================
--- sys/buf.h (revision 1508)
+++ sys/buf.h (working copy)
@@ -301,6 +301,9 @@ struct buf *getiobuf(void);
struct buf *getiobuf_nowait(void);
void putiobuf(struct buf *);
+void nestiobuf_setup(struct buf *, struct buf *, int, size_t);
+void nestiobuf_done(struct buf *, int, int);
+
__END_DECLS
#endif /* _KERNEL */
#endif /* !_SYS_BUF_H_ */
Index: kern/vfs_bio.c
===================================================================
--- kern/vfs_bio.c (revision 1508)
+++ kern/vfs_bio.c (working copy)
@@ -1783,3 +1783,72 @@ putiobuf(struct buf *bp)
pool_put(&bufiopool, bp);
splx(s);
}
+
+static void
+nestiobuf_iodone(struct buf *bp)
+{
+ struct buf *mbp = bp->b_private;
+ int error;
+ int donebytes = bp->b_bcount; /* XXX ignore b_resid */
+
+ KASSERT(bp->b_bufsize == bp->b_bcount);
+ KASSERT(mbp != bp);
+ if ((bp->b_flags & B_ERROR) != 0) {
+ error = bp->b_error;
+ } else {
+ KASSERT(bp->b_resid == 0);
+ error = 0;
+ }
+ putiobuf(bp);
+ nestiobuf_done(mbp, donebytes, error);
+}
+
+void
+nestiobuf_setup(struct buf *mbp, struct buf *bp, int offset, size_t size)
+{
+ const int b_read = mbp->b_flags & B_READ;
+ struct vnode *vp = mbp->b_vp;
+
+ KASSERT(mbp->b_bcount >= offset + size);
+ bp->b_vp = vp;
+ bp->b_flags = B_BUSY | B_CALL | B_ASYNC | b_read;
+ bp->b_iodone = nestiobuf_iodone;
+ bp->b_data = mbp->b_data + offset;
+ bp->b_resid = bp->b_bcount = size;
+#if defined(DIAGNOSTIC)
+ bp->b_bufsize = bp->b_bcount;
+#endif /* defined(DIAGNOSTIC) */
+ bp->b_private = mbp;
+ BIO_COPYPRIO(bp, mbp);
+ if (!b_read && vp != NULL) {
+ int s;
+
+ s = splbio();
+ V_INCR_NUMOUTPUT(vp);
+ splx(s);
+ }
+}
+
+void
+nestiobuf_done(struct buf *mbp, int donebytes, int error)
+{
+ int s;
+
+ if (donebytes == 0) {
+ return;
+ }
+ s = splbio();
+ KASSERT(mbp->b_resid >= donebytes);
+ if (error) {
+ mbp->b_flags |= B_ERROR;
+ mbp->b_error = error;
+ }
+ mbp->b_resid -= donebytes;
+ if (mbp->b_resid == 0) {
+ if ((mbp->b_flags & B_ERROR) != 0) {
+ mbp->b_resid = mbp->b_bcount; /* be conservative */
+ }
+ biodone(mbp);
+ }
+ splx(s);
+}
Index: miscfs/genfs/genfs_vnops.c
===================================================================
--- miscfs/genfs/genfs_vnops.c (revision 1498)
+++ miscfs/genfs/genfs_vnops.c (working copy)
@@ -441,7 +441,7 @@ genfs_getpages(void *v)
off_t newsize, diskeof, memeof;
off_t offset, origoffset, startoffset, endoffset, raoffset;
daddr_t lbn, blkno;
- int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
+ int i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
int fs_bshift, fs_bsize, dev_bshift;
int flags = ap->a_flags;
size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
@@ -672,6 +672,10 @@ genfs_getpages(void *v)
mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
mbp->b_iodone = (async ? uvm_aio_biodone : 0);
mbp->b_vp = vp;
+ if (async)
+ BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
+ else
+ BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
/*
* if EOF is in the middle of the range, zero the part past EOF.
@@ -800,15 +804,9 @@ genfs_getpages(void *v)
bp = mbp;
} else {
bp = getiobuf();
- bp->b_data = (char *)kva + offset - startoffset;
- bp->b_resid = bp->b_bcount = iobytes;
- bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC;
- bp->b_iodone = uvm_aio_biodone1;
- bp->b_vp = vp;
- bp->b_proc = NULL;
+ nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
}
bp->b_lblkno = 0;
- bp->b_private = mbp;
/* adjust physical blkno for partial blocks */
bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
@@ -818,30 +816,13 @@ genfs_getpages(void *v)
"bp %p offset 0x%x bcount 0x%x blkno 0x%x",
bp, offset, iobytes, bp->b_blkno);
- if (async)
- BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
- else
- BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
-
curproc->p_stats->p_ru.ru_inblock++;
VOP_STRATEGY(devvp, bp);
}
loopdone:
- if (skipbytes) {
- s = splbio();
- if (error) {
- mbp->b_flags |= B_ERROR;
- mbp->b_error = error;
- }
- mbp->b_resid -= skipbytes;
- if (mbp->b_resid == 0) {
- biodone(mbp);
- }
- splx(s);
- }
-
+ nestiobuf_done(mbp, skipbytes, error);
if (async) {
UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
lockmgr(&gp->g_glock, LK_RELEASE, NULL);
@@ -1479,6 +1460,12 @@ genfs_gop_write(struct vnode *vp, struct
mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? (B_CALL|B_ASYNC) : 0);
mbp->b_iodone = uvm_aio_biodone;
mbp->b_vp = vp;
+ if (curproc == uvm.pagedaemon_proc)
+ BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
+ else if (async)
+ BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL);
+ else
+ BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
bp = NULL;
for (offset = startoffset;
@@ -1504,21 +1491,12 @@ genfs_gop_write(struct vnode *vp, struct
if (offset == startoffset && iobytes == bytes) {
bp = mbp;
} else {
- s = splbio();
- V_INCR_NUMOUTPUT(vp);
- splx(s);
- bp = getiobuf();
UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
vp, bp, vp->v_numoutput, 0);
- bp->b_data = (char *)kva +
- (vaddr_t)(offset - pg->offset);
- bp->b_resid = bp->b_bcount = iobytes;
- bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
- bp->b_iodone = uvm_aio_biodone1;
- bp->b_vp = vp;
+ bp = getiobuf();
+ nestiobuf_setup(mbp, bp, offset - pg->offset, iobytes);
}
bp->b_lblkno = 0;
- bp->b_private = mbp;
/* adjust physical blkno for partial blocks */
bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
@@ -1526,12 +1504,6 @@ genfs_gop_write(struct vnode *vp, struct
UVMHIST_LOG(ubchist,
"vp %p offset 0x%x bcount 0x%x blkno 0x%x",
vp, offset, bp->b_bcount, bp->b_blkno);
- if (curproc == uvm.pagedaemon_proc)
- BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
- else if (async)
- BIO_SETPRIO(bp, BPRIO_TIMENONCRITICAL);
- else
- BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
if (p) {
p->p_stats->p_ru.ru_oublock++;
@@ -1541,17 +1513,8 @@ genfs_gop_write(struct vnode *vp, struct
}
if (skipbytes) {
UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
- s = splbio();
- if (error) {
- mbp->b_flags |= B_ERROR;
- mbp->b_error = error;
- }
- mbp->b_resid -= skipbytes;
- if (mbp->b_resid == 0) {
- biodone(mbp);
- }
- splx(s);
}
+ nestiobuf_done(mbp, skipbytes, error);
if (async) {
UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
return (0);
--NextPart-20060109123250-0442600--