Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/miscfs/specfs specfs: Drain all I/O operations after las...
details: https://anonhg.NetBSD.org/src/rev/e6bf5a15b610
branches: trunk
changeset: 364513:e6bf5a15b610
user: riastradh <riastradh%NetBSD.org@localhost>
date: Mon Mar 28 12:36:51 2022 +0000
description:
specfs: Drain all I/O operations after last .d_close call.
New kind of I/O reference on specdevs, sd_iocnt. This could be done
with psref instead; I chose a reference count instead for now because
we already have to take a per-object lock anyway, v_interlock, for
vdead_check, so another atomic is not likely to hurt much more. We
can always change the mechanism inside spec_io_enter/exit/drain later
on.
Make sure every access to vp->v_rdev or vp->v_specnode and every call
to a devsw operation is protected either:
- by the vnode lock (with vdead_check if we unlocked/relocked),
- by positive sd_opencnt,
- by spec_io_enter/exit, or
- by sd_opencnt management in open/close.
diffstat:
sys/miscfs/specfs/spec_vnops.c | 365 ++++++++++++++++++++++++++++++----------
sys/miscfs/specfs/specdev.h | 3 +-
2 files changed, 273 insertions(+), 95 deletions(-)
diffs (truncated from 586 to 300 lines):
diff -r d24736930cdc -r e6bf5a15b610 sys/miscfs/specfs/spec_vnops.c
--- a/sys/miscfs/specfs/spec_vnops.c Mon Mar 28 12:36:42 2022 +0000
+++ b/sys/miscfs/specfs/spec_vnops.c Mon Mar 28 12:36:51 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: spec_vnops.c,v 1.201 2022/03/28 12:36:42 riastradh Exp $ */
+/* $NetBSD: spec_vnops.c,v 1.202 2022/03/28 12:36:51 riastradh Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -58,7 +58,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.201 2022/03/28 12:36:42 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.202 2022/03/28 12:36:51 riastradh Exp $");
#include <sys/param.h>
#include <sys/proc.h>
@@ -81,6 +81,7 @@
#include <sys/kauth.h>
#include <sys/fstrans.h>
#include <sys/module.h>
+#include <sys/atomic.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>
@@ -173,6 +174,7 @@
{ &spec_vnodeop_p, spec_vnodeop_entries };
static kauth_listener_t rawio_listener;
+static struct kcondvar specfs_iocv;
/* Returns true if vnode is /dev/mem or /dev/kmem. */
bool
@@ -218,6 +220,141 @@
rawio_listener = kauth_listen_scope(KAUTH_SCOPE_DEVICE,
rawio_listener_cb, NULL);
+ cv_init(&specfs_iocv, "specio");
+}
+
+/*
+ * spec_io_enter(vp, &sn, &dev)
+ *
+ * Enter an operation that may not hold vp's vnode lock or an
+ * fstrans on vp's mount. Until spec_io_exit, the vnode will not
+ * be revoked.
+ *
+ * On success, set sn to the specnode pointer and dev to the dev_t
+ * number and return zero. Caller must later call spec_io_exit
+ * when done.
+ *
+ * On failure, return ENXIO -- the device has been revoked and no
+ * longer exists.
+ */
+static int
+spec_io_enter(struct vnode *vp, struct specnode **snp, dev_t *devp)
+{
+ dev_t dev;
+ struct specnode *sn;
+ unsigned iocnt;
+ int error = 0;
+
+ mutex_enter(vp->v_interlock);
+
+ /*
+ * Extract all the info we need from the vnode, unless the
+ * vnode has already been reclaimed. This can happen if the
+ * underlying device has been removed and all the device nodes
+ * for it have been revoked. The caller may not hold a vnode
+ * lock or fstrans to prevent this from happening before it has
+ * had an opportunity to notice the vnode is dead.
+ */
+ if (vdead_check(vp, VDEAD_NOWAIT) != 0 ||
+ (sn = vp->v_specnode) == NULL ||
+ (dev = vp->v_rdev) == NODEV) {
+ error = ENXIO;
+ goto out;
+ }
+
+ /*
+ * Notify spec_close that we are doing an I/O operation which
+ * may not be not bracketed by fstrans(9) and thus is not
+ * blocked by vfs suspension.
+ *
+ * We could hold this reference with psref(9) instead, but we
+ * already have to take the interlock for vdead_check, so
+ * there's not much more cost here to another atomic operation.
+ */
+ do {
+ iocnt = atomic_load_relaxed(&sn->sn_dev->sd_iocnt);
+ if (__predict_false(iocnt == UINT_MAX)) {
+ /*
+ * The I/O count is limited by the number of
+ * LWPs (which will never overflow this) --
+ * unless one driver uses another driver via
+ * specfs, which is rather unusual, but which
+ * could happen via pud(4) userspace drivers.
+ * We could use a 64-bit count, but can't use
+ * atomics for that on all platforms.
+ * (Probably better to switch to psref or
+ * localcount instead.)
+ */
+ error = EBUSY;
+ goto out;
+ }
+ } while (atomic_cas_uint(&sn->sn_dev->sd_iocnt, iocnt, iocnt + 1)
+ != iocnt);
+
+ /* Success! */
+ *snp = sn;
+ *devp = dev;
+ error = 0;
+
+out: mutex_exit(vp->v_interlock);
+ return error;
+}
+
+/*
+ * spec_io_exit(vp, sn)
+ *
+ * Exit an operation entered with a successful spec_io_enter --
+ * allow concurrent spec_node_revoke to proceed. The argument sn
+ * must match the struct specnode pointer returned by spec_io_exit
+ * for vp.
+ */
+static void
+spec_io_exit(struct vnode *vp, struct specnode *sn)
+{
+ struct specdev *sd = sn->sn_dev;
+ unsigned iocnt;
+
+ KASSERT(vp->v_specnode == sn);
+
+ /*
+ * We are done. Notify spec_close if appropriate. The
+ * transition of 1 -> 0 must happen under device_lock so
+ * spec_close doesn't miss a wakeup.
+ */
+ do {
+ iocnt = atomic_load_relaxed(&sd->sd_iocnt);
+ KASSERT(iocnt > 0);
+ if (iocnt == 1) {
+ mutex_enter(&device_lock);
+ if (atomic_dec_uint_nv(&sd->sd_iocnt) == 0)
+ cv_broadcast(&specfs_iocv);
+ mutex_exit(&device_lock);
+ break;
+ }
+ } while (atomic_cas_uint(&sd->sd_iocnt, iocnt, iocnt - 1) != iocnt);
+}
+
+/*
+ * spec_io_drain(sd)
+ *
+ * Wait for all existing spec_io_enter/exit sections to complete.
+ * Caller must ensure spec_io_enter will fail at this point.
+ */
+static void
+spec_io_drain(struct specdev *sd)
+{
+
+ /*
+ * I/O at the same time as closing is unlikely -- it often
+ * indicates an application bug.
+ */
+ if (__predict_true(atomic_load_relaxed(&sd->sd_iocnt) == 0))
+ return;
+
+ mutex_enter(&device_lock);
+ while (atomic_load_relaxed(&sd->sd_iocnt) > 0)
+ cv_wait(&specfs_iocv, &device_lock);
+ mutex_exit(&device_lock);
}
/*
@@ -258,6 +395,7 @@
sd->sd_refcnt = 1;
sd->sd_opencnt = 0;
sd->sd_bdevvp = NULL;
+ sd->sd_iocnt = 0;
sd->sd_opened = false;
sn->sn_dev = sd;
sd = NULL;
@@ -472,6 +610,7 @@
/* If the device is no longer in use, destroy our record. */
if (refcnt == 1) {
+ KASSERT(sd->sd_iocnt == 0);
KASSERT(sd->sd_opencnt == 0);
KASSERT(sd->sd_bdevvp == NULL);
kmem_free(sd, sizeof(*sd));
@@ -509,29 +648,26 @@
int a_mode;
kauth_cred_t a_cred;
} */ *ap = v;
- struct lwp *l;
- struct vnode *vp;
+ struct lwp *l = curlwp;
+ struct vnode *vp = ap->a_vp;
dev_t dev;
int error;
enum kauth_device_req req;
specnode_t *sn;
specdev_t *sd;
spec_ioctl_t ioctl;
- u_int gen;
- const char *name;
+ u_int gen = 0;
+ const char *name = NULL;
bool needclose = false;
struct partinfo pi;
-
- l = curlwp;
- vp = ap->a_vp;
+
+ KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
+ KASSERTMSG(vp->v_type == VBLK || vp->v_type == VCHR, "type=%d",
+ vp->v_type);
+
dev = vp->v_rdev;
sn = vp->v_specnode;
sd = sn->sn_dev;
- name = NULL;
- gen = 0;
-
- KASSERTMSG(vp->v_type == VBLK || vp->v_type == VCHR, "type=%d",
- vp->v_type);
/*
* Don't allow open if fs is mounted -nodev.
@@ -797,6 +933,8 @@
struct vnode *vp = ap->a_vp;
struct uio *uio = ap->a_uio;
struct lwp *l = curlwp;
+ struct specnode *sn;
+ dev_t dev;
struct buf *bp;
daddr_t bn;
int bsize, bscale;
@@ -819,9 +957,27 @@
switch (vp->v_type) {
case VCHR:
+ /*
+ * Release the lock while we sleep -- possibly
+ * indefinitely, if this is, e.g., a tty -- in
+ * cdev_read, so we don't hold up everything else that
+ * might want access to the vnode.
+ *
+ * But before we issue the read, take an I/O reference
+ * to the specnode so close will know when we're done
+ * reading. Note that the moment we release the lock,
+ * the vnode's identity may change; hence spec_io_enter
+ * may fail, and the caller may have a dead vnode on
+ * their hands, if the file system on which vp lived
+ * has been unmounted.
+ */
VOP_UNLOCK(vp);
- error = cdev_read(vp->v_rdev, uio, ap->a_ioflag);
- vn_lock(vp, LK_SHARED | LK_RETRY);
+ error = spec_io_enter(vp, &sn, &dev);
+ if (error)
+ goto out;
+ error = cdev_read(dev, uio, ap->a_ioflag);
+ spec_io_exit(vp, sn);
+out: vn_lock(vp, LK_SHARED | LK_RETRY);
return (error);
case VBLK:
@@ -898,6 +1054,8 @@
struct vnode *vp = ap->a_vp;
struct uio *uio = ap->a_uio;
struct lwp *l = curlwp;
+ struct specnode *sn;
+ dev_t dev;
struct buf *bp;
daddr_t bn;
int bsize, bscale;
@@ -913,9 +1071,27 @@
switch (vp->v_type) {
case VCHR:
+ /*
+ * Release the lock while we sleep -- possibly
+ * indefinitely, if this is, e.g., a tty -- in
+ * cdev_write, so we don't hold up everything else that
+ * might want access to the vnode.
+ *
+ * But before we issue the write, take an I/O reference
+ * to the specnode so close will know when we're done
+ * writing. Note that the moment we release the lock,
+ * the vnode's identity may change; hence spec_io_enter
+ * may fail, and the caller may have a dead vnode on
+ * their hands, if the file system on which vp lived
+ * has been unmounted.
+ */
VOP_UNLOCK(vp);
- error = cdev_write(vp->v_rdev, uio, ap->a_ioflag);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
Home |
Main Index |
Thread Index |
Old Index