tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CPU-local reference counts



Prompted by the discussion last week about making sure autoconf
devices and bdevsw/cdevsw instances don't go away at inopportune
moments during module unload, I threw together a generic sketch of the
CPU-local reference counts I noted, which I'm calling localcount(9).

This localcount(9) API behaves semantically like typical reference
counts, with different performance characteristics:

- Pro vs atomic reference counts: During normal operation, acquiring
  and releasing a reference needs no interprocessor synchronization.

- Pro vs psref(9): Like reference counts, localcount(9) references can
  be held not only across sleeps but from CPU to CPU or thread to
  thread.

- Con vs atomic reference counts: Draining references involves
  expensive interprocessor synchronization, like psref(9).

- Con: localcount(9) requires eight bytes of memory per object *per
  CPU*, which is usually much more than psref(9) and practically
  always much more than simple atomic reference counts.

So as a rough heuristic, localcount(9) should be used for classes of
objects of which there are maybe a few dozen instances but not a few
thousand instances -- e.g., autoconf devices, but not network flows --
and on which there may be a mixture of long-term I/O waits, such as
xyzread for a device xyz(4), and short-term fast operations, such as
xyzioctl(IODOSOMETHINGFASTBYREADINGFROMACPUREGISTER).

The files localcount.h and subr_localcount.c attached contain a
not-even-compile-tested draft sketch of an implementation.  The file
localcount_example.c attached contains a usage example.

I don't plan to commit this in the near future -- at the very least,
Someone^TM should try using it for, e.g., autoconf devices or devsw
instances, and make sure it works in practice.  I'm posting this just
for review of the concept.

Thoughts?
/*	$NetBSD$	*/

/*-
 * Copyright (c) 2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef	_SYS_LOCALCOUNT_H
#define	_SYS_LOCALCOUNT_H

#ifndef _KERNEL
#error <sys/localcount.h> is for kernel consumers only.
#endif

#include <sys/types.h>

struct kcondvar;
struct kmutex;
struct percpu;

struct localcount {
	int64_t		*lc_totalp;
	struct percpu	*lc_percpu; /* int64_t */
};

int	localcount_init(struct localcount *);
void	localcount_drain(struct localcount *, struct kcondvar *,
	    struct kmutex *);
void	localcount_fini(struct localcount *);
void	localcount_acquire(struct localcount *);
void	localcount_release(struct localcount *, struct kcondvar *,
	    struct kmutex *);

#endif	/* _SYS_LOCALCOUNT_H */
/*	$NetBSD$	*/

/*-
 * Copyright (c) 2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * CPU-local reference counts
 *
 *	localcount(9) is a reference-counting scheme that involves no
 *	interprocessor synchronization most of the time, at the cost of
 *	eight bytes of memory per CPU per object and at the cost of
 *	expensive interprocessor synchronization to drain references.
 *
 *	localcount(9) references may be held across sleeps, may be
 *	transferred from CPU to CPU or thread to thread: they behave
 *	semantically like typical reference counts, with different
 *	pragmatic performance characteristics.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD$");

#include <sys/types.h>
#include <sys/condvar.h>
#include <sys/errno.h>
#include <sys/mutex.h>
#include <sys/percpu.h>
#include <sys/xc.h>

/*
 * localcount_init(lc)
 *
 *	Initialize a localcount object.  Returns 0 on success, error
 *	code on failure.  May fail to allocate memory for percpu(9).
 *
 *	The caller must call localcount_drain and then localcount_fini
 *	when done with lc.
 */
int
localcount_init(struct localcount *lc)
{

	lc->lc_totalp = NULL;
	lc->lc_percpu = percpu_alloc(sizeof(int64_t));
	if (lc->lc_percpu == NULL)
		return ENOMEM;

	return 0;
}

/*
 * localcount_drain(lc, cv, interlock)
 *
 *	Wait for all acquired references to lc to drain.  Caller must
 *	hold interlock; localcount_drain releases it during cross-calls
 *	and waits on cv.  The cv and interlock passed here must be the
 *	same as are passed to localcount_release for this lc.
 *
 *	Caller must guarantee that no new references can be acquired
 *	with localcount_acquire before calling localcount_drain.  For
 *	example, any object that may be found in a list and acquired
 *	must be removed from the list before localcount_drain.
 *
 *	The localcount object lc may be used only with localcount_fini
 *	after this, unless reinitialized after localcount_fini with
 *	localcount_init.
 */
void
localcount_drain(struct localcount *lc, kcondvar_t *cv, kmutex_t *interlock)
{
	int64_t total = 0;

	KASSERT(mutex_owned(interlock));
	KASSERT(lc->lc_totalp == NULL);

	/* Mark it draining.  */
	lc->lc_totalp = &total;

	/*
	 * Count up all references on all CPUs.
	 *
	 * This serves as a global memory barrier: after xc_wait, all
	 * CPUs will have witnessed the nonnull value of lc->lc_totalp,
	 * so that it is safe to wait on the cv for them.
	 */
	mutex_exit(interlock);
	xc_wait(xc_broadcast(0, &localcount_xc, lc, interlock));
	mutex_enter(interlock);

	/* Wait for remaining references to drain.  */
	while (total != 0) {
		/*
		 * At this point, now that we have added up all
		 * references on all CPUs, the total had better be
		 * nonnegative.
		 */
		KASSERTMSG((0 < total),
		    "negatively referenced localcount: %p, %"PRId64,
		    lc, total);
		cv_wait(cv, interlock);
	}

	/* Paranoia: Cause any further use of lc->lc_totalp to crash.  */
	lc->lc_totalp = (void *)(uintptr_t)1;
}

/*
 * localcount_fini(lc)
 *
 *	Finalize a localcount object, releasing any memory allocated
 *	for it.  Caller must have already called localcount_drain.
 */
void
localcount_fini(struct localcount *lc)
{

	KASSERT(lc->lc_totalp == (void *)(uintptr_t)1);
	percpu_free(lc->lc_percpu);
}

static void
localcount_xc(void *cookie0, void *cookie1)
{
	struct localcount *lc = cookie0;
	kmutex_t *interlock = cookie1;
	int64_t *localp;

	mutex_enter(interlock);
	localp = percpu_geref(lc->lc_percpu);
	*lc->lc_totalp += *localp;
	percpu_putref(lc->lc_percpu);
	mutex_exit(interlock);
}

static void
localcount_adjust(struct localcount *lc, int delta)
{
	int64_t *localp;

	localp = percpu_getref(lc->lc_percpu);
	*localp += delta;
	percpu_putref(lc->lc_percpu);
}

/*
 * localcount_acquire(lc)
 *
 *	Acquire a reference to lc.
 *
 *	The reference may be held across sleeps and may be migrated
 *	from CPU to CPU, or even thread to thread -- it is only
 *	counted, not associated with a particular concrete owner.
 *
 *	Involves no interprocessor synchronization.  May be used in any
 *	context: while a lock is held, within a pserialize(9) read
 *	section, in hard interrupt context (provided other users block
 *	hard interrupts), in soft interrupt context, in thread context,
 *	&c.
 *
 *	Caller must guarantee that there is no concurrent
 *	localcount_drain.  For example, any object that may be found in
 *	a list and acquired must be removed from the list before
 *	localcount_drain.
 */
void
localcount_acquire(struct localcount *lc)
{

	KASSERT(lc->lc_totalp == NULL);
	localcount_adjust(lc, +1)
}

/*
 * localcount_release(lc, cv, interlock)
 *
 *	Release a reference to lc.  If there is a concurrent
 *	localcount_drain and this may be the last reference, notify
 *	localcount_drain by acquiring interlock, waking cv, and
 *	releasing interlock.  The cv and interlock passed here must be
 *	the same as are passed to localcount_drain for this lc.
 *
 *	Involves no interprocessor synchronization unless there is a
 *	concurrent localcount_drain in progress.
 */
void
localcount_release(struct localcount *lc, kcondvar_t *cv, kmutex_t *interlock)
{
	int64_t *localp;
	int s;

	/*
	 * Block xcall so that if someone begins draining after we see
	 * lc->lc_totalp as null, then they won't start cv_wait until
	 * after they have counted this CPU's contributions.
	 *
	 * Otherwise, localcount_drain may notice an extant reference
	 * from this CPU and cv_wait for it, but having seen
	 * lc->lc_totalp as null, this CPU will not wake
	 * localcount_drain.
	 */
	s = splsoftserial();

	if (__predict_false(lc->lc_totalp != NULL)) {
		/*
		 * Slow path -- wake localcount_drain in case this is
		 * the last reference.
		 */
		mutex_enter(interlock);
		localcount_adjust(lc, -1);
		*lc->lc_totalp -= 1;
		if (*lc->lc_totalp == 0)
			cv_broadcast(cv);
		mutex_exit(interlock);
		goto out;
	}

	localcount_adjust(lc, -1);
out:	splx(s);
}
/*
 * An object that may be acquired with a CPU-local reference should
 * have a struct localcount object embedded within it:
 */
struct obj {
	...
	struct localcount	obj_localcount;
	struct pslist_entry	obj_entry;
	...
};

/*
 * Global pserialized list of objects:
 */
struct {
	kmutex_t		lock;
	struct pslist_head	head;
	pserialize_t		psz;
	kcondvar_t		drain_cv;
} obj_list __cacheline_aligned;

/*
 * The localcount must be initialized with localcount_init:
 */
struct obj *
obj_create(int flags)
{
	struct obj *obj;
	int error;

	obj = kmem_alloc(sizeof(*obj), KM_SLEEP);
	...
	if (localcount_init(&obj->obj_localcount) != 0) {
		kmem_free(obj, sizeof(*obj));
		return NULL;
	}
	...

	/* Publish it in the list.  */
	mutex_enter(&obj_list.lock);
	PSLIST_WRITER_INSERT_HEAD(&obj_list.head, obj, obj_entry);
	mutex_exit(&obj_list.lock);
}

/*
 * References to the object may be acquired with localcount_acquire,
 * guaranteeing the object will not be destroyed until after it is
 * released with localcount_release:
 */
void
process_obj(int key, int op)
{
	struct obj *obj;
	int s;

	s = pserialize_read_enter();
	PSLIST_READER_FOREACH(obj, &obj_list.head, struct obj,
	    obj_entry) {
		if (obj->obj_key == key) {
			localcount_acquire(&obj->obj_localcount);
			break;
		}
	}
	pserialize_read_exit(s);

	if (obj == NULL)
		return;

	perform_op(obj, op);

	localcount_release(&obj->obj_localcount);
}

/*
 * An object may be destroyed after (a) preventing new references from
 * being acquired by removing it from the list, and (b) waiting for all
 * extant references to be released by using localcount_drain:
 */
void
destroy_obj(int key)
{
	struct obj *obj;

	/* (a) Prevent new references.  */
	mutex_enter(&obj_list.lock);
	PSLIST_WRITER_FOREACH(obj, &obj_list.head, struct obj,
	    obj_entry) {
		if (obj->obj_key == key)
			break;
	}
	if (obj != NULL) {
		pserialize_perform(obj_list.psz);
		PSLIST_WRITER_REMOVE(obj, obj_entry);
	}

	/* (b) Wait for extant references to drain.  */
	localcount_drain(&obj->obj_localcount, &obj_list.drain_cv,
	    &obj_list.lock);
	mutex_exit(&obj_list.lock);

	/* Finally, destroy the object and free the memory.  */
	localcount_fini(&obj->obj_localcount);
	kmem_free(obj, sizeof(*obj));
}


Home | Main Index | Thread Index | Old Index