Subject: Preparing callout(9) for a HZ-less kernel
To: None <tech-kern@netbsd.org>
From: Joerg Sonnenberger <joerg@britannica.bec.de>
List: tech-kern
Date: 10/18/2007 21:44:35
--ReaqsoxgOBHFXBhH
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hi all,
attached patch is a first big step to allow operation with dynamic
timers. I don't think that the hashed time wheel works very effective
for the much larger possible range of values without adding a lot of
movement for the timers, so the red black tree should be as efficient.
Initial benchmarks from rmind@ have shown a small regression for the
MySQL benchmark, but I hope the move to dynamic timers will ultimately
justify this.

With dynamic timers I mean to allow platforms which programmable
one-shot timers to schedule the callouts on a more precise base.
With the removal of most catch-all interrupts like the slow/fast timers
in the network code this will allow a lot more sleeping and therefore
power saving. For the network stack it can also improve performane by
allowing shorter times for retransmit etc.

The callout_schedule and callout_reset functions are both considered to
be deprecated. I'm still looking through the code of how many cases
actually need the function on a per-case base, but it doesn't look like
we have many of that.

Joerg

--ReaqsoxgOBHFXBhH
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="timeout.diff"

Index: kern/kern_timeout.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/kern/kern_timeout.c,v
retrieving revision 1.27
diff -u -r1.27 kern_timeout.c
--- kern/kern_timeout.c	8 Oct 2007 16:18:04 -0000	1.27
+++ kern/kern_timeout.c	18 Oct 2007 19:11:33 -0000
@@ -68,26 +68,6 @@
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.27 2007/10/08 16:18:04 ad Exp $");
 
-/*
- * Timeouts are kept in a hierarchical timing wheel.  The c_time is the
- * value of the global variable "hardclock_ticks" when the timeout should
- * be called.  There are four levels with 256 buckets each. See 'Scheme 7'
- * in "Hashed and Hierarchical Timing Wheels: Efficient Data Structures
- * for Implementing a Timer Facility" by George Varghese and Tony Lauck.
- *
- * Some of the "math" in here is a bit tricky.  We have to beware of
- * wrapping ints.
- *
- * We use the fact that any element added to the queue must be added with
- * a positive time.  That means that any element `to' on the queue cannot
- * be scheduled to timeout further in time than INT_MAX, but c->c_time can
- * be positive or negative so comparing it with anything is dangerous. 
- * The only way we can use the c->c_time value in any predictable way is
- * when we calculate how far in the future `to' will timeout - "c->c_time
- * - hardclock_ticks".  The result will always be positive for future
- * timeouts and 0 or negative for due timeouts.
- */
-
 #define	_CALLOUT_PRIVATE
 
 #include <sys/param.h>
@@ -110,68 +90,32 @@
 #include <ddb/db_output.h>
 #endif
 
-#define BUCKETS		1024
-#define WHEELSIZE	256
-#define WHEELMASK	255
-#define WHEELBITS	8
-
-static struct callout_circq timeout_wheel[BUCKETS];	/* Queues of timeouts */
-static struct callout_circq timeout_todo;		/* Worklist */
-
-#define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
-
-#define BUCKET(rel, abs)						\
-    (((rel) <= (1 << (2*WHEELBITS)))					\
-    	? ((rel) <= (1 << WHEELBITS))					\
-            ? &timeout_wheel[MASKWHEEL(0, (abs))]			\
-            : &timeout_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE]		\
-        : ((rel) <= (1 << (3*WHEELBITS)))				\
-            ? &timeout_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE]		\
-            : &timeout_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
-
-#define MOVEBUCKET(wheel, time)						\
-    CIRCQ_APPEND(&timeout_todo,						\
-        &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
-
-/*
- * Circular queue definitions.
- */
-
-#define CIRCQ_INIT(list)						\
-do {									\
-        (list)->cq_next_l = (list);					\
-        (list)->cq_prev_l = (list);					\
-} while (/*CONSTCOND*/0)
-
-#define CIRCQ_INSERT(elem, list)					\
-do {									\
-        (elem)->cq_prev_e = (list)->cq_prev_e;				\
-        (elem)->cq_next_l = (list);					\
-        (list)->cq_prev_l->cq_next_l = (elem);				\
-        (list)->cq_prev_l = (elem);					\
-} while (/*CONSTCOND*/0)
-
-#define CIRCQ_APPEND(fst, snd)						\
-do {									\
-        if (!CIRCQ_EMPTY(snd)) {					\
-                (fst)->cq_prev_l->cq_next_l = (snd)->cq_next_l;		\
-                (snd)->cq_next_l->cq_prev_l = (fst)->cq_prev_l;		\
-                (snd)->cq_prev_l->cq_next_l = (fst);			\
-                (fst)->cq_prev_l = (snd)->cq_prev_l;			\
-                CIRCQ_INIT(snd);					\
-        }								\
-} while (/*CONSTCOND*/0)
-
-#define CIRCQ_REMOVE(elem)						\
-do {									\
-        (elem)->cq_next_l->cq_prev_e = (elem)->cq_prev_e;		\
-        (elem)->cq_prev_l->cq_next_e = (elem)->cq_next_e;		\
-} while (/*CONSTCOND*/0)
-
-#define CIRCQ_FIRST(list)	((list)->cq_next_e)
-#define CIRCQ_NEXT(elem)	((elem)->cq_next_e)
-#define CIRCQ_LAST(elem,list)	((elem)->cq_next_l == (list))
-#define CIRCQ_EMPTY(list)	((list)->cq_next_l == (list))
+static RB_HEAD(callout_tree, callout_impl) callouts;
+
+static int
+callout_cmp(const struct callout_impl *left, const struct callout_impl *right)
+{
+	if (left->c_time.tv_sec != right->c_time.tv_sec) {
+		if (left->c_time.tv_sec < right->c_time.tv_sec)
+			return -1;
+		else
+			return 1;
+	}
+	if (left->c_time.tv_nsec != right->c_time.tv_nsec) {
+		if (left->c_time.tv_nsec < right->c_time.tv_nsec)
+			return -1;
+		else
+			return 1;
+	}
+	if (left < right)
+		return -1;
+	if (left > right)
+		return 1;
+	return 0;
+}
+
+RB_PROTOTYPE_STATIC(callout_tree, callout_impl, c_link, callout_cmp)
+RB_GENERATE_STATIC(callout_tree, callout_impl, c_link, callout_cmp)
 
 static void	callout_softclock(void *);
 
@@ -183,6 +127,7 @@
 sleepq_t callout_sleepq;
 void	*callout_si;
 
+static struct evcnt callout_ev_idle;
 static struct evcnt callout_ev_late;
 static struct evcnt callout_ev_block;
 
@@ -252,13 +197,9 @@
 void
 callout_startup(void)
 {
-	int b;
-
 	KASSERT(sizeof(callout_impl_t) <= sizeof(callout_t));
 
-	CIRCQ_INIT(&timeout_todo);
-	for (b = 0; b < BUCKETS; b++)
-		CIRCQ_INIT(&timeout_wheel[b]);
+	RB_INIT(&callouts);
 
 	mutex_init(&callout_lock, MUTEX_SPIN, IPL_SCHED);
 	sleepq_init(&callout_sleepq, &callout_lock);
@@ -267,6 +208,8 @@
 	    NULL, "callout", "late");
 	evcnt_attach_dynamic(&callout_ev_block, EVCNT_TYPE_MISC,
 	    NULL, "callout", "block waiting");
+	evcnt_attach_dynamic(&callout_ev_idle, EVCNT_TYPE_MISC,
+	    NULL, "callout", "idle hardclock");
 }
 
 /*
@@ -326,89 +269,122 @@
 	c->c_magic = 0;
 }
 
-
 /*
- * callout_reset:
+ * callout_schedule:
  *
- *	Reset a callout structure with a new function and argument, and
- *	schedule it to run.
+ *	Schedule a callout to run.  The function and argument must
+ *	already be set in the callout structure.
  */
+static void
+callout_schedule_locked(callout_impl_t *c, const struct timespec *ts)
+{
+	struct timespec now;
+
+	KASSERT(c->c_magic == CALLOUT_MAGIC);
+	KASSERT(c->c_func != NULL);
+
+	nanotime(&now);
+
+	timespecadd(ts, &now, &c->c_time);
+
+	if ((c->c_flags & CALLOUT_PENDING) != 0)
+		RB_REMOVE(callout_tree, &callouts, c);
+	RB_INSERT(callout_tree, &callouts, c);
+
+	c->c_flags &= ~CALLOUT_FIRED;
+	c->c_flags |= CALLOUT_PENDING;
+}
+
 void
-callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg)
+callout_schedule(callout_t *cs, int to_ticks)
 {
 	callout_impl_t *c = (callout_impl_t *)cs;
-	int old_time;
+	struct timespec ts;
 
-	KASSERT(to_ticks >= 0);
-	KASSERT(c->c_magic == CALLOUT_MAGIC);
-	KASSERT(func != NULL);
+	ts.tv_sec = to_ticks / hz;
+	ts.tv_nsec = (1000000000ULL * to_ticks / hz) % 1000000000ULL;
+
+	mutex_spin_enter(&callout_lock);
+	callout_schedule_locked(c, &ts);
+	mutex_spin_exit(&callout_lock);
+}
+
+void
+callout_schedule_sec(callout_t *cs, time_t sec)
+{
+	callout_impl_t *c = (callout_impl_t *)cs;
+	struct timespec ts;
+
+	ts.tv_sec = sec;
+	ts.tv_nsec = 0;
 
 	mutex_spin_enter(&callout_lock);
+	callout_schedule_locked(c, &ts);
+	mutex_spin_exit(&callout_lock);
+}
 
-	/* Initialize the time here, it won't change. */
-	old_time = c->c_time;
-	c->c_time = to_ticks + hardclock_ticks;
-	c->c_flags &= ~CALLOUT_FIRED;
+void
+callout_schedule_msec(callout_t *cs, unsigned long msec)
+{
+	callout_impl_t *c = (callout_impl_t *)cs;
+	struct timespec ts;
+
+	ts.tv_sec = msec / 1000;
+	ts.tv_nsec = msec % 1000 * 1000000;
+
+	mutex_spin_enter(&callout_lock);
+	callout_schedule_locked(c, &ts);
+	mutex_spin_exit(&callout_lock);
+}
+
+void
+callout_schedule_nsec(callout_t *cs, const struct timespec *ts)
+{
+	callout_impl_t *c = (callout_impl_t *)cs;
+
+	mutex_spin_enter(&callout_lock);
+	callout_schedule_locked(c, ts);
+	mutex_spin_exit(&callout_lock);
+}
+
+static void
+callout_setfunc_locked(callout_impl_t *c, void (*func)(void *), void *arg)
+{
+	KASSERT(c->c_magic == CALLOUT_MAGIC);
+	KASSERT(func != NULL);
 
 	c->c_func = func;
 	c->c_arg = arg;
+}
 
-	/*
-	 * If this timeout is already scheduled and now is moved
-	 * earlier, reschedule it now. Otherwise leave it in place
-	 * and let it be rescheduled later.
-	 */
-	if ((c->c_flags & CALLOUT_PENDING) != 0) {
-		if (c->c_time - old_time < 0) {
-			CIRCQ_REMOVE(&c->c_list);
-			CIRCQ_INSERT(&c->c_list, &timeout_todo);
-		}
-	} else {
-		c->c_flags |= CALLOUT_PENDING;
-		CIRCQ_INSERT(&c->c_list, &timeout_todo);
-	}
+void
+callout_setfunc(callout_t *cs, void (*func)(void *), void *arg)
+{
+	callout_impl_t *c = (callout_impl_t *)cs;
 
+	mutex_spin_enter(&callout_lock);
+	callout_setfunc_locked(c, func, arg);
 	mutex_spin_exit(&callout_lock);
 }
 
 /*
- * callout_schedule:
+ * callout_reset:
  *
- *	Schedule a callout to run.  The function and argument must
- *	already be set in the callout structure.
+ *	Reset a callout structure with a new function and argument, and
+ *	schedule it to run.
  */
 void
-callout_schedule(callout_t *cs, int to_ticks)
+callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg)
 {
 	callout_impl_t *c = (callout_impl_t *)cs;
-	int old_time;
+	struct timespec ts;
 
-	KASSERT(to_ticks >= 0);
-	KASSERT(c->c_magic == CALLOUT_MAGIC);
-	KASSERT(c->c_func != NULL);
+	ts.tv_sec = to_ticks / hz;
+	ts.tv_nsec = (1000000000ULL * to_ticks / hz) % 1000000000ULL;
 
 	mutex_spin_enter(&callout_lock);
-
-	/* Initialize the time here, it won't change. */
-	old_time = c->c_time;
-	c->c_time = to_ticks + hardclock_ticks;
-	c->c_flags &= ~CALLOUT_FIRED;
-
-	/*
-	 * If this timeout is already scheduled and now is moved
-	 * earlier, reschedule it now. Otherwise leave it in place
-	 * and let it be rescheduled later.
-	 */
-	if ((c->c_flags & CALLOUT_PENDING) != 0) {
-		if (c->c_time - old_time < 0) {
-			CIRCQ_REMOVE(&c->c_list);
-			CIRCQ_INSERT(&c->c_list, &timeout_todo);
-		}
-	} else {
-		c->c_flags |= CALLOUT_PENDING;
-		CIRCQ_INSERT(&c->c_list, &timeout_todo);
-	}
-
+	callout_setfunc_locked(c, func, arg);
+	callout_schedule_locked(c, &ts);
 	mutex_spin_exit(&callout_lock);
 }
 
@@ -431,7 +407,7 @@
 		callout_barrier(c);
 
 	if ((c->c_flags & CALLOUT_PENDING) != 0)
-		CIRCQ_REMOVE(&c->c_list);
+		RB_REMOVE(callout_tree, &callouts, c);
 
 	expired = ((c->c_flags & CALLOUT_FIRED) != 0);
 	c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);
@@ -441,19 +417,6 @@
 	return expired;
 }
 
-void
-callout_setfunc(callout_t *cs, void (*func)(void *), void *arg)
-{
-	callout_impl_t *c = (callout_impl_t *)cs;
-
-	KASSERT(c->c_magic == CALLOUT_MAGIC);
-
-	mutex_spin_enter(&callout_lock);
-	c->c_func = func;
-	c->c_arg = arg;
-	mutex_spin_exit(&callout_lock);
-}
-
 bool
 callout_expired(callout_t *cs)
 {
@@ -534,25 +497,23 @@
 void
 callout_hardclock(void)
 {
-	int needsoftclock;
+	struct timespec now;
+	bool needsoftclock;
+	callout_impl_t *c;
 
 	mutex_spin_enter(&callout_lock);
 
-	MOVEBUCKET(0, hardclock_ticks);
-	if (MASKWHEEL(0, hardclock_ticks) == 0) {
-		MOVEBUCKET(1, hardclock_ticks);
-		if (MASKWHEEL(1, hardclock_ticks) == 0) {
-			MOVEBUCKET(2, hardclock_ticks);
-			if (MASKWHEEL(2, hardclock_ticks) == 0)
-				MOVEBUCKET(3, hardclock_ticks);
-		}
-	}
-
-	needsoftclock = !CIRCQ_EMPTY(&timeout_todo);
+	if ((c = RB_MIN(callout_tree, &callouts)) != NULL) {
+		nanotime(&now);
+		needsoftclock = timespeccmp(&now, &c->c_time, >=);
+	} else
+		needsoftclock = false;
 	mutex_spin_exit(&callout_lock);
 
 	if (needsoftclock)
 		softint_schedule(callout_si);
+	else
+		callout_ev_idle.ev_count++;
 }
 
 /* ARGSUSED */
@@ -560,6 +521,7 @@
 callout_softclock(void *v)
 {
 	callout_impl_t *c;
+	struct timespec now;
 	struct cpu_info *ci;
 	void (*func)(void *);
 	void *arg;
@@ -571,52 +533,48 @@
 
 	mutex_spin_enter(&callout_lock);
 
-	while (!CIRCQ_EMPTY(&timeout_todo)) {
-		c = CIRCQ_FIRST(&timeout_todo);
+	while ((c = RB_MIN(callout_tree, &callouts)) != NULL) {
 		KASSERT(c->c_magic == CALLOUT_MAGIC);
 		KASSERT(c->c_func != NULL);
 		KASSERT((c->c_flags & CALLOUT_PENDING) != 0);
 		KASSERT((c->c_flags & CALLOUT_FIRED) == 0);
-		CIRCQ_REMOVE(&c->c_list);
 
-		/* If due run it, otherwise insert it into the right bucket. */
-		if (c->c_time - hardclock_ticks > 0) {
-			CIRCQ_INSERT(&c->c_list,
-			    BUCKET((c->c_time - hardclock_ticks), c->c_time));
-		} else {
-			if (c->c_time - hardclock_ticks < 0)
-				callout_ev_late.ev_count++;
-
-			c->c_flags ^= (CALLOUT_PENDING | CALLOUT_FIRED);
-			mpsafe = (c->c_flags & CALLOUT_MPSAFE);
-			func = c->c_func;
-			arg = c->c_arg;
-			c->c_oncpu = ci;
-			c->c_onlwp = l;
-
-			mutex_spin_exit(&callout_lock);
-			if (!mpsafe) {
-				KERNEL_LOCK(1, curlwp);
-				if (ci->ci_data.cpu_callout_cancel != c)
-					(*func)(arg);
-				KERNEL_UNLOCK_ONE(curlwp);
-			} else
-					(*func)(arg);
-			mutex_spin_enter(&callout_lock);
+		nanotime(&now);
+		if (timespeccmp(&now, &c->c_time, <))
+			break;
+		RB_REMOVE(callout_tree, &callouts, c);
 
-			/*
-			 * We can't touch 'c' here because it might be
-			 * freed already.  If LWPs waiting for callout
-			 * to complete, awaken them.
-			 */
-			ci->ci_data.cpu_callout_cancel = NULL;
-			ci->ci_data.cpu_callout = NULL;
-			if ((count = ci->ci_data.cpu_callout_nwait) != 0) {
-				ci->ci_data.cpu_callout_nwait = 0;
-				/* sleepq_wake() drops the lock. */
-				sleepq_wake(&callout_sleepq, ci, count);
-				mutex_spin_enter(&callout_lock);
-			}
+		callout_ev_late.ev_count++;
+
+		c->c_flags ^= (CALLOUT_PENDING | CALLOUT_FIRED);
+		mpsafe = (c->c_flags & CALLOUT_MPSAFE);
+		func = c->c_func;
+		arg = c->c_arg;
+		c->c_oncpu = ci;
+		c->c_onlwp = l;
+
+		mutex_spin_exit(&callout_lock);
+		if (!mpsafe) {
+			KERNEL_LOCK(1, curlwp);
+			if (ci->ci_data.cpu_callout_cancel != c)
+				(*func)(arg);
+			KERNEL_UNLOCK_ONE(curlwp);
+		} else
+			(*func)(arg);
+		mutex_spin_enter(&callout_lock);
+
+		/*
+		 * We can't touch 'c' here because it might be
+		 * freed already.  If LWPs waiting for callout
+		 * to complete, awaken them.
+		 */
+		ci->ci_data.cpu_callout_cancel = NULL;
+		ci->ci_data.cpu_callout = NULL;
+		if ((count = ci->ci_data.cpu_callout_nwait) != 0) {
+			ci->ci_data.cpu_callout_nwait = 0;
+			/* sleepq_wake() drops the lock. */
+			sleepq_wake(&callout_sleepq, ci, count);
+			mutex_spin_enter(&callout_lock);
 		}
 	}
 
@@ -624,56 +582,41 @@
 }
 
 #ifdef DDB
-static void
-db_show_callout_bucket(struct callout_circq *bucket)
+void
+db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif)
 {
-	callout_impl_t *c;
-	db_expr_t offset;
+	struct timespec ts, now;
+	struct callout_impl *c;
 	const char *name;
-	static char question[] = "?";
-
-	if (CIRCQ_EMPTY(bucket))
-		return;
+	db_expr_t offset;
+	static const char unknown_name[] = "?";
 
-	for (c = CIRCQ_FIRST(bucket); /*nothing*/; c = CIRCQ_NEXT(&c->c_list)) {
-		db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name,
-		    &offset);
-		name = name ? name : question;
-#ifdef _LP64
-#define	POINTER_WIDTH	"%16lx"
-#else
-#define	POINTER_WIDTH	"%8lx"
-#endif
-		db_printf("%9d %2d/%-4d " POINTER_WIDTH "  %s\n",
-		    c->c_time - hardclock_ticks,
-		    (int)((bucket - timeout_wheel) / WHEELSIZE),
-		    (int)(bucket - timeout_wheel), (u_long) c->c_arg, name);
+	nanotime(&now);
 
-		if (CIRCQ_LAST(&c->c_list, bucket))
-			break;
+	db_printf("time since boot: %jd.%09ld (%d ticks)\n", (intmax_t)now.tv_sec,
+	    now.tv_nsec, hardclock_ticks);
+	if (RB_EMPTY(&callouts)) {
+		db_printf("no callouts are active\n");
+		return;
 	}
-}
-
-void
-db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif)
-{
-	int b;
-
-	db_printf("hardclock_ticks now: %d\n", hardclock_ticks);
 #ifdef _LP64
-	db_printf("    ticks  wheel               arg  func\n");
+	db_printf("       timeout                 arg  func\n");
 #else
-	db_printf("    ticks  wheel       arg  func\n");
+	db_printf("       timeout         arg  func\n");
 #endif
 
 	/*
-	 * Don't lock the callwheel; all the other CPUs are paused
+	 * Don't lock the callout tree; all the other CPUs are paused
 	 * anyhow, and we might be called in a circumstance where
 	 * some other CPU was paused while holding the lock.
 	 */
-
-	db_show_callout_bucket(&timeout_todo);
-	for (b = 0; b < BUCKETS; b++)
-		db_show_callout_bucket(&timeout_wheel[b]);
+	RB_FOREACH(c, callout_tree, &callouts) {
+		db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name,
+		    &offset);
+		name = name ? name : unknown_name;
+		timespecsub(&c->c_time, &now, &ts);
+		db_printf("%5jd.%09ld %p %s\n",
+		    (intmax_t)ts.tv_sec, ts.tv_nsec, c->c_arg, name);
+	}
 }
 #endif /* DDB */
Index: sys/callout.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/sys/callout.h,v
retrieving revision 1.26
diff -u -r1.26 callout.h
--- sys/callout.h	10 Jul 2007 21:12:32 -0000	1.26
+++ sys/callout.h	18 Oct 2007 18:57:16 -0000
@@ -53,49 +53,34 @@
  * callout_impl_t.
  */
 typedef struct callout {
-	void	*_c_store[10];
+	void	*_c_store[16];
 } callout_t;
 
 /* Internal flags. */
-#define	CALLOUT_PENDING		0x0002	/* callout is on the queue */
-#define	CALLOUT_FIRED		0x0004	/* callout has fired */
-#define	CALLOUT_INVOKING	0x0008	/* callout function is being invoked */
+#define	CALLOUT_PENDING		0x0001	/* callout is on the queue */
+#define	CALLOUT_FIRED		0x0002	/* callout has fired */
+#define	CALLOUT_INVOKING	0x0004	/* callout function is being invoked */
 
 /* End-user flags. */
 #define	CALLOUT_MPSAFE		0x0100	/* does not need kernel_lock */
 #define	CALLOUT_FLAGMASK	0xff00
 
 #ifdef _CALLOUT_PRIVATE
-
-/* The following funkyness is to appease gcc3's strict aliasing. */
-struct callout_circq {
-	/* next element */
-	union {
-		struct callout_impl	*elem;
-		struct callout_circq	*list;
-	} cq_next;
-	/* previous element */
-	union {
-		struct callout_impl	*elem;
-		struct callout_circq	*list;
-	} cq_prev;
-};
-#define	cq_next_e	cq_next.elem
-#define	cq_prev_e	cq_prev.elem
-#define	cq_next_l	cq_next.list
-#define	cq_prev_l	cq_prev.list
+#include <sys/tree.h>
+#include <sys/time.h>
 
 typedef struct callout_impl {
-	struct callout_circq c_list;		/* linkage on queue */
+	RB_ENTRY(callout_impl)	c_link;		/* linkage on queue */
+	struct timespec		c_time;		/* when callout fires */
 	void	(*c_func)(void *);		/* function to call */
 	void	*c_arg;				/* function argument */
 	void	*c_oncpu;			/* non-NULL while running */
 	void	*c_onlwp;			/* non-NULL while running */
-	int	c_time;				/* when callout fires */
 	u_int	c_flags;			/* state of this entry */
 	u_int	c_runwait;			/* number of waiters */
 	u_int	c_magic;			/* magic number */
 } callout_impl_t;
+
 #define	CALLOUT_MAGIC		0x11deeba1
 
 #endif	/* _CALLOUT_PRIVATE */
@@ -110,6 +95,9 @@
 void	callout_setfunc(callout_t *, void (*)(void *), void *);
 void	callout_reset(callout_t *, int, void (*)(void *), void *);
 void	callout_schedule(callout_t *, int);
+void	callout_schedule_sec(callout_t *, time_t);
+void	callout_schedule_msec(callout_t *, unsigned long);
+void	callout_schedule_nsec(callout_t *, const struct timespec *);
 bool	callout_stop(callout_t *);
 bool	callout_pending(callout_t *);
 bool	callout_expired(callout_t *);

--ReaqsoxgOBHFXBhH--