tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Locking strategy for device deletion (also see PR kern/48536)



   Date: Wed, 08 Jun 2016 04:42:21 +1000
   from: matthew green <mrg%eterna.com.au@localhost>

   that one i think is a much larger issue that affects *all* of our
   drivers and needs a general fix where eg device_lookup_private()
   returns a reference counted value that must be returned, before the
   module can be considered ready to unload (this still leaves a very
   minor race between device_put(d); and return;...)

I have the attached patch in my tree to add reference counting to
device_private.  I haven't committed it yet because I haven't made a
proof-of-concept bug nor adapted any drivers to use it to demonstrate
fixing the bug.

I'm also not sure reference counts are the right way to go: that would
incur interprocessor synchronization for each device read/write/ioctl.
It may be that passive references, or per-CPU counters like dyoung@
mentioned, are a better way to go.

Per-CPU counters have the advantage that they do not require binding
the caller to a CPU, and the disadvantage that they take O(ndevices *
nCPUs) space versus O(ndevices + nCPUs) space.

We would also need devsw_detach to wait for users to drain, which in
turn would require all users to potentially notify it -- again, with
reference counts, passive references, or per-CPU counters.

Then, e.g., cdev_read would call cdevsw_lookup_acquire instead of
cdevsw_lookup, and after d->d_read returns, cdev_read would also call
cdevsw_release.  Finally, devsw_detach would wait for all such callers
to have called cdevsw_release.

int
cdev_read(dev_t dev, struct uio *uio, int flag)
{
	const struct cdevsw *d;
	int rv, mpflag;

	if ((d = cdevsw_lookup_acquire(dev)) == NULL)
		return ENXIO;

	DEV_LOCK(d);
	rv = (*d->d_read)(dev, uio, flag);
	DEV_UNLOCK(d);

	cdevsw_release(d);

	return rv;
}
? sys/sys/param.h.save
Index: sys/kern/subr_autoconf.c
===================================================================
RCS file: /cvsroot/src/sys/kern/subr_autoconf.c,v
retrieving revision 1.241
diff -p -u -r1.241 subr_autoconf.c
--- sys/kern/subr_autoconf.c	28 Mar 2016 09:50:40 -0000	1.241
+++ sys/kern/subr_autoconf.c	7 Jun 2016 22:12:39 -0000
@@ -219,6 +219,7 @@ static int config_finalize_done;
 /* list of all devices */
 static struct devicelist alldevs = TAILQ_HEAD_INITIALIZER(alldevs);
 static kmutex_t alldevs_mtx;
+static kcondvar_t device_refcnt_cv;
 static volatile bool alldevs_garbage = false;
 static volatile devgen_t alldevs_gen = 1;
 static volatile int alldevs_nread = 0;
@@ -342,6 +343,7 @@ config_init(void)
 	KASSERT(config_initialized == false);
 
 	mutex_init(&alldevs_mtx, MUTEX_DEFAULT, IPL_VM);
+	cv_init(&device_refcnt_cv, "dvrefcnt");
 
 	mutex_init(&config_misc_lock, MUTEX_DEFAULT, IPL_NONE);
 	cv_init(&config_misc_cv, "cfgmisc");
@@ -1254,6 +1256,10 @@ config_devunlink(device_t dev, struct de
 
 	KASSERT(mutex_owned(&alldevs_mtx));
 
+	/* Wait for users to drain.  */
+	while (dev->dv_refcnt)
+		cv_wait(&device_refcnt_cv, &alldevs_mtx);
+
  	/* Unlink from device list.  Link to garbage list. */
 	TAILQ_REMOVE(&alldevs, dev, dv_list);
 	TAILQ_INSERT_TAIL(garbage, dev, dv_list);
@@ -2217,6 +2223,74 @@ config_alldevs_exit(struct alldevs_foray
 }
 
 /*
+ * device_hold:
+ *
+ *	Add a reference to a device, or return EBUSY if there are too
+ *	many references.
+ */
+int
+device_hold(device_t dv)
+{
+	unsigned refcnt;
+
+	do {
+		refcnt = dv->dv_refcnt;
+		if (refcnt == UINT_MAX)
+			return EBUSY;
+	} while (atomic_cas_uint(&dv->dv_refcnt, refcnt, (refcnt + 1))
+	    != refcnt);
+
+	return 0;
+}
+
+/*
+ * device_rele:
+ *
+ *	Release a reference to a device.  If it drops to zero, notify
+ *	anyone waiting for the device to be unreferenced.
+ */
+void
+device_rele(device_t dv)
+{
+	unsigned refcnt;
+
+	do {
+		refcnt = dv->dv_refcnt;
+		if (refcnt == 1) {
+			mutex_enter(&alldevs_mtx);
+			KASSERT(mutex_owned(&alldevs_mtx));
+			if (atomic_dec_uint_nv(&dv->dv_refcnt) == 0)
+				cv_broadcast(&device_refcnt_cv);
+			mutex_exit(&alldevs_mtx);
+			return;
+		}
+	} while (atomic_cas_uint(&dv->dv_refcnt, refcnt, (refcnt - 1))
+	    != refcnt);
+}
+
+/*
+ * device_lookup_hold:
+ *
+ *	Look up a device instance for a given driver, and add a
+ *	reference to it.
+ */
+device_t
+device_lookup_hold(cfdriver_t cd, int unit)
+{
+	device_t dv;
+
+	mutex_enter(&alldevs_mtx);
+	if (unit < 0 || unit >= cd->cd_ndevs)
+		dv = NULL;
+	else if ((dv = cd->cd_devs[unit]) != NULL &&
+	    (dv->dv_del_gen != 0 || device_hold(dv) != 0))
+		dv = NULL;
+	mutex_exit(&alldevs_mtx);
+
+	return dv;
+}
+
+/*
  * device_lookup:
  *
  *	Look up a device instance for a given driver.
Index: sys/sys/device.h
===================================================================
RCS file: /cvsroot/src/sys/sys/device.h,v
retrieving revision 1.148
diff -p -u -r1.148 device.h
--- sys/sys/device.h	7 Dec 2015 11:38:46 -0000	1.148
+++ sys/sys/device.h	7 Jun 2016 22:12:40 -0000
@@ -159,6 +159,7 @@ struct device {
 	void		*dv_private;	/* this device's private storage */
 	int		*dv_locators;	/* our actual locators (optional) */
 	prop_dictionary_t dv_properties;/* properties dictionary */
+	unsigned	dv_refcnt;	/* reference count */
 
 	size_t		dv_activity_count;
 	void		(**dv_activity_handlers)(device_t, devactive_t);
@@ -487,7 +488,10 @@ void	config_twiddle_fn(void *);
 
 void	null_childdetached(device_t, device_t);
 
-device_t	device_lookup(cfdriver_t, int);
+int		device_hold(device_t);
+void		device_rele(device_t);
+device_t	device_lookup_hold(cfdriver_t, int);
+device_t	device_lookup(cfdriver_t, int); /* deprecated */
 void		*device_lookup_private(cfdriver_t, int);
 void		device_register(device_t, void *);
 void		device_register_post_config(device_t, void *);


Home | Main Index | Thread Index | Old Index