tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: vnd locking
On Mon, Jun 01, 2015 at 01:11:44PM +0100, Patrick Welche wrote:
> On Fri, May 22, 2015 at 04:56:57PM +0000, Taylor R Campbell wrote:
> > Date: Fri, 22 May 2015 13:09:58 +0100
> > From: Patrick Welche <prlw1%cam.ac.uk@localhost>
> >
> > There are a load of locking PRs involving vnd. I thought I would have a
> > stab at converting vnd to use condvars and mutexes (mutices?) as a first
> > step. I cobbled something together which allows me to
> > [...]
> > with a LOCKDEBUG kernel, so I may have guessed right, but I haven't found
> > any documentation on The Right Way, so please review carefully, assume
> > the worst, and tell me how to do it better!
>
> I may have regressed: with the attached patch, I now get a locking
> error in vndthread at:
>
> /*
> * Allocate a header for this transfer and link it to the
> * buffer
> */
> mutex_enter(&vnd->sc_sclock); <------------ XXX already locked?!
> vnx = VND_GETXFER(vnd); /* may sleep */
> mutex_exit(&vnd->sc_sclock);
Simply VND_GETXFER(vnd) is pool_get(&(vnd)->sc_vxpool, PR_WAITOK), and
pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
0, 0, "vndxpl", NULL, IPL_BIO);
so I think the splbio replacement mutex_enter/exit is simply not
necessary. Removing them gets me a successful
mkdir /mnt0 /mnt1 /mnt2 /mnt3
dd if=/dev/zero of=/scratch/disk0.img bs=1m count=1100
dd if=/dev/zero of=/scratch/disk1.img bs=1m count=1100
dd if=/dev/zero of=/scratch2/disk2.img bs=1m count=1100
dd if=/dev/zero of=/scratch3/disk3.img bs=1m count=1100
vnconfig vnd0 /scratch/disk0.img
vnconfig vnd1 /scratch/disk1.img
vnconfig vnd2 /scratch2/disk2.img
vnconfig vnd3 /scratch3/disk3.img
for i in 0 1 2 3; do
disklabel -rw vnd$i vndtest;
newfs /dev/rvnd${i}a
mount /dev/vnd${i}a /mnt${i}
done
for i in 0 1 2 3; do
bonnie -d /mnt${i}&
done
with a LOCKDEBUG kernel on a quad core computer, with scratch, scratch2
and scratch3 being on separate physical disks.
Cheers,
Patrick
Index: vndvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/vndvar.h,v
retrieving revision 1.34
diff -u -p -r1.34 vndvar.h
--- vndvar.h 25 May 2015 20:57:18 -0000 1.34
+++ vndvar.h 23 Jun 2015 17:16:50 -0000
@@ -107,14 +107,19 @@ struct vnode;
*/
struct vnd_softc {
device_t sc_dev;
+ kmutex_t sc_sclock; /* protect this softc */
int sc_flags; /* flags */
+ kcondvar_t sc_intercv; /* interruptible lock condvar */
uint64_t sc_size; /* size of vnd */
struct vnode *sc_vp; /* vnode */
kauth_cred_t sc_cred; /* credentials */
- int sc_maxactive; /* max # of active requests */
struct bufq_state *sc_tab; /* transfer queue */
+ kcondvar_t sc_bufqcv; /* queue's condvar */
int sc_pending; /* number of pending transfers */
+ kcondvar_t sc_pendingcv; /* pending transfer condvar */
+ int sc_maxactive; /* max # of active requests */
int sc_active; /* number of active transfers */
+ kcondvar_t sc_activitycv; /* activity condvar */
struct disk sc_dkdev; /* generic disk device info */
struct vndgeom sc_geom; /* virtual geometry */
struct pool sc_vxpool; /* vndxfer pool */
@@ -140,7 +145,6 @@ struct vnd_softc {
#define VNF_KLABEL 0x040 /* keep label on close */
#define VNF_VLABEL 0x080 /* label is valid */
#define VNF_KTHREAD 0x100 /* thread is running */
-#define VNF_VUNCONF 0x200 /* device is unconfiguring */
#define VNF_COMP 0x400 /* file is compressed */
#define VNF_CLEARING 0x800 /* unit is being torn down */
#define VNF_USE_VN_RDWR 0x1000 /* have to use vn_rdwr() */
Index: vnd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/vnd.c,v
retrieving revision 1.245
diff -u -p -r1.245 vnd.c
--- vnd.c 25 May 2015 21:02:37 -0000 1.245
+++ vnd.c 23 Jun 2015 17:16:51 -0000
@@ -204,7 +204,7 @@ const struct bdevsw vnd_bdevsw = {
.d_dump = vnddump,
.d_psize = vndsize,
.d_discard = nodiscard,
- .d_flag = D_DISK
+ .d_flag = D_DISK | D_MPSAFE
};
const struct cdevsw vnd_cdevsw = {
@@ -219,7 +219,7 @@ const struct cdevsw vnd_cdevsw = {
.d_mmap = nommap,
.d_kqfilter = nokqfilter,
.d_discard = nodiscard,
- .d_flag = D_DISK
+ .d_flag = D_DISK | D_MPSAFE
};
static int vnd_match(device_t, cfdata_t, void *);
@@ -241,12 +241,9 @@ static struct dkdriver vnddkdriver = {
void
vndattach(int num)
{
- int error;
- error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
- if (error)
- aprint_error("%s: unable to register cfattach\n",
- vnd_cd.cd_name);
+ if (config_cfattach_attach(vnd_cd.cd_name, &vnd_ca))
+ aprint_error("%s: unable to register\n", vnd_cd.cd_name);
}
static int
@@ -265,6 +262,11 @@ vnd_attach(device_t parent, device_t sel
sc->sc_comp_offsets = NULL;
sc->sc_comp_buff = NULL;
sc->sc_comp_decombuf = NULL;
+ mutex_init(&sc->sc_sclock, MUTEX_DEFAULT, IPL_BIO);
+ cv_init(&sc->sc_intercv, "vndlck");
+ cv_init(&sc->sc_activitycv, "vndac");
+ cv_init(&sc->sc_bufqcv, "vndbp");
+ cv_init(&sc->sc_pendingcv, "vndpc");
bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver);
if (!pmf_device_register(self, NULL, NULL))
@@ -284,6 +286,11 @@ vnd_detach(device_t self, int flags)
}
pmf_device_deregister(self);
+ mutex_destroy(&sc->sc_sclock);
+ cv_destroy(&sc->sc_intercv);
+ cv_destroy(&sc->sc_activitycv);
+ cv_destroy(&sc->sc_bufqcv);
+ cv_destroy(&sc->sc_pendingcv);
bufq_free(sc->sc_tab);
disk_destroy(&sc->sc_dkdev);
@@ -470,7 +477,8 @@ vndstrategy(struct buf *bp)
device_lookup_private(&vnd_cd, unit);
struct disklabel *lp;
daddr_t blkno;
- int s = splbio();
+
+ mutex_enter(&vnd->sc_sclock);
if (vnd == NULL) {
bp->b_error = ENXIO;
@@ -545,18 +553,20 @@ vndstrategy(struct buf *bp)
KASSERT(vnd->sc_pending >= 0 &&
vnd->sc_pending <= VND_MAXPENDING(vnd));
while (vnd->sc_pending == VND_MAXPENDING(vnd))
- tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0);
+ cv_wait(&vnd->sc_pendingcv, &vnd->sc_sclock);
vnd->sc_pending++;
}
+ mutex_exit(&vnd->sc_sclock);
bufq_put(vnd->sc_tab, bp);
- wakeup(&vnd->sc_tab);
- splx(s);
+ mutex_enter(&vnd->sc_sclock);
+ cv_signal(&vnd->sc_bufqcv);
+ mutex_exit(&vnd->sc_sclock);
return;
done:
+ mutex_exit(&vnd->sc_sclock);
bp->b_resid = bp->b_bcount;
biodone(bp);
- splx(s);
}
static bool
@@ -601,7 +611,8 @@ static void
vndthread(void *arg)
{
struct vnd_softc *vnd = arg;
- int s;
+
+ mutex_enter(&vnd->sc_sclock);
/* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to
* directly access the backing vnode. If we can, use these two
@@ -620,31 +631,27 @@ vndthread(void *arg)
"using read/write operations");
#endif
- s = splbio();
- vnd->sc_flags |= VNF_KTHREAD;
- wakeup(&vnd->sc_kthread);
-
/*
* Dequeue requests and serve them depending on the available
* vnode operations.
*/
- while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
+ while ((vnd->sc_flags & VNF_KTHREAD) != 0) {
struct vndxfer *vnx;
struct buf *obp;
struct buf *bp;
obp = bufq_get(vnd->sc_tab);
if (obp == NULL) {
- tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
+ cv_wait(&vnd->sc_bufqcv, &vnd->sc_sclock);
continue;
};
if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
KASSERT(vnd->sc_pending > 0 &&
vnd->sc_pending <= VND_MAXPENDING(vnd));
if (vnd->sc_pending-- == VND_MAXPENDING(vnd))
- wakeup(&vnd->sc_pending);
+ cv_signal(&vnd->sc_pendingcv);
}
- splx(s);
+ mutex_exit(&vnd->sc_sclock);
#ifdef DEBUG
if (vnddebug & VDB_FOLLOW)
printf("vndthread(%p)\n", obp);
@@ -672,17 +679,17 @@ vndthread(void *arg)
* Allocate a header for this transfer and link it to the
* buffer
*/
- s = splbio();
- vnx = VND_GETXFER(vnd);
- splx(s);
+ /* mutex_enter(&vnd->sc_sclock); */
+ vnx = VND_GETXFER(vnd); /* may sleep */
+ /* mutex_exit(&vnd->sc_sclock); */
vnx->vx_vnd = vnd;
- s = splbio();
+ mutex_enter(&vnd->sc_sclock);
while (vnd->sc_active >= vnd->sc_maxactive) {
- tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
+ cv_wait(&vnd->sc_activitycv, &vnd->sc_sclock);
}
vnd->sc_active++;
- splx(s);
+ mutex_exit(&vnd->sc_sclock);
/* Instrumentation. */
disk_busy(&vnd->sc_dkdev);
@@ -706,17 +713,15 @@ vndthread(void *arg)
else
handle_with_rdwr(vnd, obp, bp);
- s = splbio();
+ mutex_enter(&vnd->sc_sclock);
continue;
done:
biodone(obp);
- s = splbio();
+ mutex_enter(&vnd->sc_sclock);
}
- vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
- wakeup(&vnd->sc_kthread);
- splx(s);
+ mutex_exit(&vnd->sc_sclock);
kthread_exit(0);
}
@@ -794,7 +799,7 @@ handle_with_rdwr(struct vnd_softc *vnd,
}
/*
- * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
+ * Handles the read/write request given in 'bp' using the vnode's VOP_BMAP
* and VOP_STRATEGY operations.
*
* 'obp' is a pointer to the original request fed to the vnd device.
@@ -917,7 +922,8 @@ vndiodone(struct buf *bp)
struct vndxfer *vnx = VND_BUFTOXFER(bp);
struct vnd_softc *vnd = vnx->vx_vnd;
struct buf *obp = bp->b_private;
- int s = splbio();
+
+ mutex_enter(&vnd->sc_sclock);
KASSERT(&vnx->vx_buf == bp);
KASSERT(vnd->sc_active > 0);
@@ -931,9 +937,9 @@ vndiodone(struct buf *bp)
(bp->b_flags & B_READ));
vnd->sc_active--;
if (vnd->sc_active == 0) {
- wakeup(&vnd->sc_tab);
+ cv_broadcast(&vnd->sc_activitycv);
}
- splx(s);
+ mutex_exit(&vnd->sc_sclock);
obp->b_error = bp->b_error;
obp->b_resid = bp->b_resid;
buf_destroy(bp);
@@ -1163,7 +1169,7 @@ vndioctl(dev_t dev, u_long cmd, void *da
}
KASSERT(l);
error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred);
- if (!error && nd.ni_vp->v_type != VREG)
+ if (!error && vattr.va_type != VREG)
error = EOPNOTSUPP;
if (!error && vattr.va_bytes < vattr.va_size)
/* File is definitely sparse, use vn_rdwr() */
@@ -1359,13 +1365,14 @@ vndioctl(dev_t dev, u_long cmd, void *da
vio->vnd_size = dbtob(vnd->sc_size);
vnd->sc_flags |= VNF_INITED;
- /* create the kernel thread, wait for it to be up */
- error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd,
- &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev));
- if (error)
+ vnd->sc_flags |= VNF_KTHREAD;
+ error = kthread_create(PRI_BIO,
+ KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, NULL,
+ vndthread, vnd, &vnd->sc_kthread, "%s",
+ device_xname(vnd->sc_dev));
+ if (error) {
+ vnd->sc_flags &= ~VNF_KTHREAD;
goto close_and_exit;
- while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
- tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
}
#ifdef DEBUG
if (vnddebug & VDB_INIT)
@@ -1657,10 +1664,10 @@ vndshutdown(void)
static void
vndclear(struct vnd_softc *vnd, int myminor)
{
+ struct lwp *kt;
struct vnode *vp = vnd->sc_vp;
int fflags = FREAD;
int bmaj, cmaj, i, mn;
- int s;
#ifdef DEBUG
if (vnddebug & VDB_FOLLOW)
@@ -1678,17 +1685,25 @@ vndclear(struct vnd_softc *vnd, int mymi
vdevgone(cmaj, mn, mn, VCHR);
}
+ mutex_enter(&vnd->sc_sclock);
+
if ((vnd->sc_flags & VNF_READONLY) == 0)
fflags |= FWRITE;
- s = splbio();
bufq_drain(vnd->sc_tab);
- splx(s);
+ cv_broadcast(&vnd->sc_bufqcv);
+
+ mutex_exit(&vnd->sc_sclock);
- vnd->sc_flags |= VNF_VUNCONF;
- wakeup(&vnd->sc_tab);
- while (vnd->sc_flags & VNF_KTHREAD)
- tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
+ if (vnd->sc_flags & VNF_KTHREAD) {
+ mutex_enter(&vnd->sc_sclock);
+ vnd->sc_flags &= ~VNF_KTHREAD;
+ kt = vnd->sc_kthread;
+ cv_signal(&vnd->sc_bufqcv);
+ cv_signal(&vnd->sc_activitycv);
+ mutex_exit(&vnd->sc_sclock);
+ kthread_join(kt);
+ }
#ifdef VND_COMPRESSION
/* free the compressed file buffers */
@@ -1709,7 +1724,7 @@ vndclear(struct vnd_softc *vnd, int mymi
#endif /* VND_COMPRESSION */
vnd->sc_flags &=
~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL
- | VNF_VUNCONF | VNF_COMP | VNF_CLEARING);
+ | VNF_COMP | VNF_CLEARING);
if (vp == NULL)
panic("vndclear: null vp");
(void) vn_close(vp, fflags, vnd->sc_cred);
@@ -1802,6 +1817,50 @@ vndgetdefaultlabel(struct vnd_softc *sc,
}
/*
+ * Wait interruptibly for an exclusive lock.
+ *
+ * XXX
+ * Several drivers do this; it should be abstracted and made MP-safe.
+ */
+static int
+vndlock(struct vnd_softc *sc)
+{
+ int error;
+
+ mutex_enter(&sc->sc_sclock);
+
+ error = 0;
+ while ((sc->sc_flags & VNF_LOCKED) != 0) {
+ sc->sc_flags |= VNF_WANTED;
+ error = cv_wait_sig(&sc->sc_intercv, &sc->sc_sclock);
+ if (error != 0)
+ break;
+ }
+ if (error == 0)
+ sc->sc_flags |= VNF_LOCKED;
+
+ mutex_exit(&sc->sc_sclock);
+
+ return error;
+}
+
+/*
+ * Unlock and wake up any waiters.
+ */
+static void
+vndunlock(struct vnd_softc *sc)
+{
+
+ mutex_enter(&sc->sc_sclock);
+ sc->sc_flags &= ~VNF_LOCKED;
+ if ((sc->sc_flags & VNF_WANTED) != 0) {
+ sc->sc_flags &= ~VNF_WANTED;
+ cv_broadcast(&sc->sc_intercv);
+ }
+ mutex_exit(&sc->sc_sclock);
+}
+
+/*
* Read the disklabel from a vnd. If one is not present, create a fake one.
*/
static void
@@ -1853,40 +1912,6 @@ vndgetdisklabel(dev_t dev, struct vnd_so
}
}
-/*
- * Wait interruptibly for an exclusive lock.
- *
- * XXX
- * Several drivers do this; it should be abstracted and made MP-safe.
- */
-static int
-vndlock(struct vnd_softc *sc)
-{
- int error;
-
- while ((sc->sc_flags & VNF_LOCKED) != 0) {
- sc->sc_flags |= VNF_WANTED;
- if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
- return error;
- }
- sc->sc_flags |= VNF_LOCKED;
- return 0;
-}
-
-/*
- * Unlock and wake up any waiters.
- */
-static void
-vndunlock(struct vnd_softc *sc)
-{
-
- sc->sc_flags &= ~VNF_LOCKED;
- if ((sc->sc_flags & VNF_WANTED) != 0) {
- sc->sc_flags &= ~VNF_WANTED;
- wakeup(sc);
- }
-}
-
#ifdef VND_COMPRESSION
/* compressed file read */
static void
@@ -2051,7 +2076,7 @@ vnd_modcmd(modcmd_t cmd, void *arg)
error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
if (error) {
config_cfdriver_detach(&vnd_cd);
- aprint_error("%s: unable to register cfattach\n",
+ aprint_error("%s: unable to register\n",
vnd_cd.cd_name);
break;
}
Home |
Main Index |
Thread Index |
Old Index