Subject: kernel thread helper for vnd(4)
To: None <tech-kern@netbsd.org>
From: Manuel Bouyer <bouyer@antioche.eu.org>
List: tech-kern
Date: 03/29/2005 22:13:21
--+HP7ph2BbKc20aGI
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Hi,
quoting Jason in a previous mail on this list about vnd problems:
On Wed, Mar 23, 2005 at 04:52:53PM -0800, Jason Thorpe wrote:
>
> This is really a known problem. The vnd driver needs to be rewritten
> badly. It needs to queue up requests and defer everything to a
> kthread that actually does the work.
I've changed vnd to use a kernel thread to perform the I/O.
in the current vnd version, vndstrategy() breaks the request in smaller
chunks, put these chunks in buffers and queue them up. vndstart() will
dequeue the buffers and call VOP_STRATEGY().
With my changes, vndstrategy() just queue the requests in a buffer queue
and wakes up the thread. The thread will breaks the request in smaller chunks
and call VOP_STRATEGY(), taking care to not have more than vnd->sc_maxactive
requests active in parallel. It will sleep waiting either for a buffer
in the queue, or for some request to complete if there are
vnd->sc_maxactive I/O active.
The thread is created when a file is attached to the vnd, and will exit when
the vnd is unconfigured.
Does this look good ?
Note: the attached diff is in diff -ub format, to make it smaller and easier
to read.
--
Manuel Bouyer <bouyer@antioche.eu.org>
NetBSD: 26 ans d'experience feront toujours la difference
--
--+HP7ph2BbKc20aGI
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff
Index: vnd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/vnd.c,v
retrieving revision 1.111
diff -u -b -r1.111 vnd.c
--- vnd.c 28 Oct 2004 07:07:39 -0000 1.111
+++ vnd.c 29 Mar 2005 19:11:45 -0000
@@ -143,6 +143,7 @@
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/proc.h>
+#include <sys/kthread.h>
#include <sys/errno.h>
#include <sys/buf.h>
#include <sys/bufq.h>
@@ -207,7 +208,6 @@
int vnddetach(void);
static void vndclear(struct vnd_softc *, int);
-static void vndstart(struct vnd_softc *);
static int vndsetcred(struct vnd_softc *, struct ucred *);
static void vndthrottle(struct vnd_softc *, struct vnode *);
static void vndiodone(struct buf *);
@@ -221,6 +221,8 @@
static int vndlock(struct vnd_softc *);
static void vndunlock(struct vnd_softc *);
+void vndthread(void *);
+
static dev_type_open(vndopen);
static dev_type_close(vndclose);
static dev_type_read(vndread);
@@ -393,38 +395,24 @@
}
/*
- * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
+ * Qeue the request, and wakeup the kernel thread to handle it.
*/
static void
vndstrategy(struct buf *bp)
{
int unit = vndunit(bp->b_dev);
struct vnd_softc *vnd = &vnd_softc[unit];
- struct vndxfer *vnx;
- struct mount *mp;
- int s, bsize, resid;
- off_t bn;
- caddr_t addr;
- int sz, flags, error, wlabel;
- struct disklabel *lp;
- struct partition *pp;
+ struct disklabel *lp = vnd->sc_dkdev.dk_label;
+ int s = splbio();
+
+ bp->b_resid = bp->b_bcount;
-#ifdef DEBUG
- if (vnddebug & VDB_FOLLOW)
- printf("vndstrategy(%p): unit %d\n", bp, unit);
-#endif
if ((vnd->sc_flags & VNF_INITED) == 0) {
bp->b_error = ENXIO;
bp->b_flags |= B_ERROR;
goto done;
}
- /* If it's a nil transfer, wake up the top half now. */
- if (bp->b_bcount == 0)
- goto done;
-
- lp = vnd->sc_dkdev.dk_label;
-
/*
* The transfer must be a whole number of blocks.
*/
@@ -435,15 +423,6 @@
}
/*
- * Do bounds checking and adjust transfer. If there's an error,
- * the bounds check will flag that for us.
- */
- wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING);
- if (DISKPART(bp->b_dev) != RAW_PART)
- if (bounds_check_with_label(&vnd->sc_dkdev, bp, wlabel) <= 0)
- goto done;
-
- /*
* check if we're read-only.
*/
if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) {
@@ -452,6 +431,67 @@
goto done;
}
+ /*
+ * Do bounds checking and adjust transfer. If there's an error,
+ * the bounds check will flag that for us.
+ */
+ if (DISKPART(bp->b_dev) != RAW_PART) {
+ if (bounds_check_with_label(&vnd->sc_dkdev,
+ bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0)
+ goto done;
+ }
+
+ /* If it's a nil transfer, wake up the top half now. */
+ if (bp->b_bcount == 0)
+ goto done;
+#ifdef DEBUG
+ if (vnddebug & VDB_FOLLOW)
+ printf("vndstrategy(%p): unit %d\n", bp, unit);
+#endif
+ BUFQ_PUT(&vnd->sc_tab, bp);
+ wakeup(&vnd->sc_tab);
+ splx(s);
+ return;
+done:
+ biodone(bp);
+ splx(s);
+}
+
+void
+vndthread(void *arg)
+{
+ struct vnd_softc *vnd = arg;
+ struct buf *bp;
+ struct vndxfer *vnx;
+ struct mount *mp;
+ int s, bsize, resid;
+ off_t bn;
+ caddr_t addr;
+ int sz, flags, error;
+ struct disklabel *lp;
+ struct partition *pp;
+
+ s = splbio();
+ vnd->sc_flags |= VNF_KTHREAD;
+ wakeup(&vnd->sc_kthread);
+
+ /*
+ * Dequeue requests, break them into bsize pieces and submit using
+ * VOP_BMAP/VOP_STRATEGY.
+ */
+ while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
+ bp = BUFQ_GET(&vnd->sc_tab);
+ if (bp == NULL) {
+ tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
+ continue;
+ };
+ splx(s);
+
+#ifdef DEBUG
+ if (vnddebug & VDB_FOLLOW)
+ printf("vndthread(%p\n", bp);
+#endif
+ lp = vnd->sc_dkdev.dk_label;
bp->b_resid = bp->b_bcount;
/*
@@ -464,7 +504,8 @@
* Translate the partition-relative block number to an absolute.
*/
if (DISKPART(bp->b_dev) != RAW_PART) {
- pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
+ pp = &vnd->sc_dkdev.dk_label->d_partitions[
+ DISKPART(bp->b_dev)];
bn += pp->p_offset;
}
@@ -480,7 +521,10 @@
addr = bp->b_data;
flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL;
- /* Allocate a header for this transfer and link it to the buffer */
+ /*
+ * Allocate a header for this transfer and link it to the
+ * buffer
+ */
s = splbio();
vnx = VND_GETXFER(vnd);
splx(s);
@@ -492,6 +536,12 @@
if ((flags & B_READ) == 0)
vn_start_write(vnd->sc_vp, &mp, V_WAIT);
+ /*
+ * Feed requests sequentially.
+ * We do it this way to keep from flooding NFS servers if we
+ * are connected to an NFS file. This places the burden on
+ * the client rather than the server.
+ */
for (resid = bp->b_resid; resid; resid -= sz) {
struct vndbuf *nbp;
struct vnode *vp;
@@ -540,6 +590,12 @@
#endif
s = splbio();
+ while (vnd->sc_active >= vnd->sc_maxactive) {
+ tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
+ if (vnd->sc_flags & VNF_VUNCONF)
+ goto kthread_end;
+ }
+ vnd->sc_active++;
nbp = VND_GETBUF(vnd);
splx(s);
BUF_INIT(&nbp->vb_buf);
@@ -567,9 +623,23 @@
goto out;
}
vnx->vx_pending++;
+#ifdef DEBUG
+ if (vnddebug & VDB_IO)
+ printf("vndstart(%ld): bp %p vp %p blkno "
+ "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
+ (long) (vnd-vnd_softc), &nbp->vb_buf,
+ nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
+ nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
+ nbp->vb_buf.b_bcount);
+#endif
+
+ /* Instrumentation. */
+ disk_busy(&vnd->sc_dkdev);
+
+ if ((nbp->vb_buf.b_flags & B_READ) == 0)
+ vp->v_numoutput++;
+ VOP_STRATEGY(vp, &nbp->vb_buf);
- BUFQ_PUT(&vnd->sc_tab, &nbp->vb_buf);
- vndstart(vnd);
splx(s);
bn += sz;
addr += sz;
@@ -589,57 +659,21 @@
VND_PUTXFER(vnd, vnx);
biodone(bp);
}
- splx(s);
- return;
-
- done:
+ continue;
+done:
biodone(bp);
-}
-
-/*
- * Feed requests sequentially.
- * We do it this way to keep from flooding NFS servers if we are connected
- * to an NFS file. This places the burden on the client rather than the
- * server.
- */
-static void
-vndstart(struct vnd_softc *vnd)
-{
- struct buf *bp;
-
- /*
- * Dequeue now since lower level strategy routine might
- * queue using same links
- */
-
- if ((vnd->sc_flags & VNF_BUSY) != 0)
- return;
-
- vnd->sc_flags |= VNF_BUSY;
-
- while (vnd->sc_active < vnd->sc_maxactive) {
- bp = BUFQ_GET(&vnd->sc_tab);
- if (bp == NULL)
- break;
- vnd->sc_active++;
-#ifdef DEBUG
- if (vnddebug & VDB_IO)
- printf("vndstart(%ld): bp %p vp %p blkno 0x%" PRIx64
- " flags %x addr %p cnt 0x%x\n",
- (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno,
- bp->b_flags, bp->b_data, bp->b_bcount);
-#endif
-
- /* Instrumentation. */
- disk_busy(&vnd->sc_dkdev);
-
- if ((bp->b_flags & B_READ) == 0)
- bp->b_vp->v_numoutput++;
- VOP_STRATEGY(bp->b_vp, bp);
+ s = splbio();
+ continue;
}
- vnd->sc_flags &= ~VNF_BUSY;
+
+kthread_end:
+ vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
+ wakeup(&vnd->sc_kthread);
+ splx(s);
+ kthread_exit(0);
}
+
static void
vndiodone(struct buf *bp)
{
@@ -710,7 +744,7 @@
}
vnd->sc_active--;
- vndstart(vnd);
+ wakeup(&vnd->sc_tab);
splx(s);
}
@@ -906,9 +940,23 @@
if ((error = vndsetcred(vnd, p->p_ucred)) != 0)
goto close_and_exit;
+
+ memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */
+ snprintf(vnd->sc_xname, sizeof(vnd->sc_xname), "vnd%d", unit);
+
+
vndthrottle(vnd, vnd->sc_vp);
vio->vnd_size = dbtob(vnd->sc_size);
vnd->sc_flags |= VNF_INITED;
+
+ /* create the kernel thread, wait for it to be up */
+ error = kthread_create1(vndthread, vnd, &vnd->sc_kthread,
+ vnd->sc_xname);
+ if (error)
+ goto close_and_exit;
+ while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
+ tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
+ }
#ifdef DEBUG
if (vnddebug & VDB_INIT)
printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
@@ -920,8 +968,6 @@
#endif
/* Attach the disk. */
- memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */
- snprintf(vnd->sc_xname, sizeof(vnd->sc_xname), "vnd%d", unit);
vnd->sc_dkdev.dk_name = vnd->sc_xname;
disk_attach(&vnd->sc_dkdev);
@@ -1214,9 +1260,16 @@
if ((vnd->sc_flags & VNF_READONLY) == 0)
fflags |= FWRITE;
- vnd->sc_flags &= ~(VNF_INITED | VNF_READONLY | VNF_VLABEL);
+
+ vnd->sc_flags |= VNF_VUNCONF;
+ wakeup(&vnd->sc_tab);
+ while (vnd->sc_flags & VNF_KTHREAD)
+ tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
+
+ vnd->sc_flags &=
+ ~(VNF_INITED | VNF_READONLY | VNF_VLABEL | VNF_VUNCONF);
if (vp == (struct vnode *)0)
- panic("vndioctl: null vp");
+ panic("vndclear: null vp");
(void) vn_close(vp, fflags, vnd->sc_cred, p);
crfree(vnd->sc_cred);
vnd->sc_vp = (struct vnode *)0;
Index: vndvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/vndvar.h,v
retrieving revision 1.13
diff -u -b -r1.13 vndvar.h
--- vndvar.h 27 Feb 2005 00:26:58 -0000 1.13
+++ vndvar.h 29 Mar 2005 19:11:45 -0000
@@ -162,6 +162,7 @@
struct vndgeom sc_geom; /* virtual geometry */
struct pool sc_vxpool; /* vndxfer pool */
struct pool sc_vbpool; /* vndbuf pool */
+ struct proc *sc_kthread; /* kernel thread */
};
#endif
@@ -171,10 +172,11 @@
#define VNF_LABELLING 0x004 /* unit is currently being labelled */
#define VNF_WANTED 0x008 /* someone is waiting to obtain a lock */
#define VNF_LOCKED 0x010 /* unit is locked */
-#define VNF_BUSY 0x020 /* unit is busy */
-#define VNF_READONLY 0x040 /* unit is read-only */
-#define VNF_KLABEL 0x080 /* keep label on close */
-#define VNF_VLABEL 0x100 /* label is valid */
+#define VNF_READONLY 0x020 /* unit is read-only */
+#define VNF_KLABEL 0x040 /* keep label on close */
+#define VNF_VLABEL 0x080 /* label is valid */
+#define VNF_KTHREAD 0x100 /* thread is running */
+#define VNF_VUNCONF 0x200 /* device is unconfiguring */
/*
* A simple structure for describing which vnd units are in use.
--+HP7ph2BbKc20aGI--