Subject: Improved callout() code
To: None <tech-kern@netbsd.org>
From: David Laight <david@l8s.co.uk>
List: tech-kern
Date: 10/24/2002 15:04:13
The implementation of callout() (in kern_clock.c) is approximately
o(time) for each callout (on a moderately busy system).

This is alleviated slightly by the 'hint' (shortest time to expire
in this bucket) for a slight increase in processing cost.
(but won't make much difference for busy systems, especially
since nfs_timer() is called every 5ms or every tick)

Additionally the callwheel_size is based on NPROC for (very)
historic reasons.  This is particularly pointless since the
distribution of expiry times won't depend on the number of
processes.

For a default i386 GENERIC kernel, the callout table has
1024 slots (16kb).  With HZ=100 each slot is processed every 10
seconds, so callouts for >10 seconds are checked every 10 seconds.

The attached patch reduces:
- the time to start or stop a callout
- the number of times each callout is looked at
- the size of the callout table

There are three wheels on my wagon^H^H^H^H^H implementation.

The basic idea is that callouts that expire within the next
second are linked to the slot for their expiry tick (as now).
Callouts that expire between 1 second and 1 minute are linked
to a slot for the second in which they expire.  Every second
all the callouts from one such slot are relinked to the 'tick'
slots.  Callouts for longer than 1 minute are linked to a
per-minute slot in a third wheel.
Callouts for longer than 1 hour sit in the outer wheel and are
checked only once an hour.

Of course the numbers are all powers of 2 of HZ!
The inner wheel has 2^n > HZ entries (ie 128 if HZ=100)
The middle and outer wheels have 64 each.
(This is actually quite generous for the non-inner wheels)
For HZ=100 this is 256 slots (1kb).

Caveats:
- The order in which callouts fire is (typically) the reverse of the
  old code.  This shouldn't matter, reversing the lists (XXX comments)
  will 'correct' this most of the time.
- I probably put callouts into the outer wheel when the middle one
  would do.
- Leaving the table as a big array is nasty :-)

	David

Index: kern/kern_clock.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/kern_clock.c,v
retrieving revision 1.80
diff -u -r1.80 kern_clock.c
--- kern/kern_clock.c	2002/08/07 05:16:22	1.80
+++ kern/kern_clock.c	2002/10/24 13:24:35
@@ -108,6 +108,8 @@
 #include <sys/gmon.h>
 #endif
 
+#define nelem(x) (sizeof (x) / sizeof *(x))
+
 /*
  * Clock handling routines.
  *
@@ -355,25 +357,56 @@
  * Implementation of a Timer Facility" in the Proceedings of the 11th
  * ACM Annual Symposium on Operating System Principles, Austin, Texas,
  * November 1987.
- */
-struct callout_queue *callwheel;
-int	callwheelsize, callwheelbits, callwheelmask;
+ *
+ * The above design was actually O(time) for each callout (because callouts
+ * that are longer than the table size are revisited every turn of the
+ * wheel.  The 'hint' code alleviated this slightly but the distribution
+ * of timeouts is such that the table size ought to be determined by hz
+ * not maxproc.
+ * Using nested wheels means that timers that expire well into the future
+ * are never looked at.  This means the 'hint' code can be removed
+ * simplifying the common paths and reducing the size of the inner
+ * wheel to around 1 second (was 1024 entries for a default i386 build).
+ */
+struct callout_queue *callwheel;	/* all wheels */
+int callwheelsize;			/* total number of wheel entries */
+
+static int inner_wheel_size;		/* entries in inner wheel */
+static int inner_wheel_mask;		/* mask for inner wheel */
+static int inner_wheel_bits;		/* # bits in mask */
+
+/* Two outer wheels of 64 entries mean we look at 'long expiry'
+   callouts less than once an hour. */
+#define MIDDLE_WHEEL_BITS	6
+#define MIDDLE_WHEEL_SIZE	(1 << MIDDLE_WHEEL_BITS)
+#define MIDDLE_WHEEL_MASK	(MIDDLE_WHEEL_SIZE - 1)
+#define OUTER_WHEEL_BITS	6
+#define OUTER_WHEEL_SIZE	(1 << OUTER_WHEEL_BITS)
+#define OUTER_WHEEL_MASK	(OUTER_WHEEL_SIZE - 1)
+static u_int softmiddle_ticks;
+static u_int softouter_ticks;
 
-static struct callout *nextsoftcheck;	/* next callout to be checked */
+/* Next callout to be checked, (to catch deleted callouts) */
+static struct callout * volatile nextsoftcheck;
 
 #ifdef CALLWHEEL_STATS
-int	     *callwheel_sizes;		/* per-bucket length count */
-struct evcnt callwheel_collisions;	/* number of hash collisions */
-struct evcnt callwheel_maxlength;	/* length of the longest hash chain */
+struct evcnt callwheel_maxlength;	/* length of longest chain requeued */
 struct evcnt callwheel_count;		/* # callouts currently */
 struct evcnt callwheel_established;	/* # callouts established */
 struct evcnt callwheel_fired;		/* # callouts that fired */
 struct evcnt callwheel_disestablished;	/* # callouts disestablished */
 struct evcnt callwheel_changed;		/* # callouts changed */
 struct evcnt callwheel_softclocks;	/* # times softclock() called */
-struct evcnt callwheel_softchecks;	/* # checks per softclock() */
 struct evcnt callwheel_softempty;	/* # empty buckets seen */
-struct evcnt callwheel_hintworked;	/* # times hint saved scan */
+struct evcnt callwheel_relink_inner;	/* # moved to inner wheel */
+struct evcnt callwheel_relink_middle;	/* # moved to middle wheel */
+struct evcnt callwheel_notmoved;	/* # left in outer wheel */
+
+static int count_functions;
+struct func_table {
+	void	(*func)(void *);
+	u_int	count;
+} func_table[128];
 #endif /* CALLWHEEL_STATS */
 
 /*
@@ -385,7 +418,7 @@
 #define	MAX_SOFTCLOCK_STEPS	100
 #endif
 
-struct simplelock callwheel_slock;
+static struct simplelock callwheel_slock;
 
 #define	CALLWHEEL_LOCK(s)						\
 do {									\
@@ -403,7 +436,6 @@
 
 /*
  * These are both protected by callwheel_lock.
- * XXX SHOULD BE STATIC!!
  */
 u_int64_t hardclock_ticks, softclock_ticks;
 
@@ -890,7 +922,8 @@
 	 */
 	simple_lock(&callwheel_slock);	/* already at splclock() */
 	hardclock_ticks++;
-	if (! TAILQ_EMPTY(&callwheel[hardclock_ticks & callwheelmask].cq_q)) {
+	delta = hardclock_ticks & inner_wheel_mask;
+	if (!delta || !LIST_EMPTY(&callwheel[delta].cq_q)) {
 		simple_unlock(&callwheel_slock);
 		if (CLKF_BASEPRI(frame)) {
 			/*
@@ -913,13 +946,94 @@
 #endif
 		}
 		return;
-	} else if (softclock_running == 0 &&
-		   (softclock_ticks + 1) == hardclock_ticks) {
-		softclock_ticks++;
 	}
+	if (softclock_running == 0 && (softclock_ticks + 1) == hardclock_ticks)
+		softclock_ticks++;
 	simple_unlock(&callwheel_slock);
 }
 
+#ifdef CALLWHEEL_STATS
+static void
+count_function(void (*func)(void *))
+{
+	struct func_table *f;
+
+	for (f = func_table; ; f++) {
+		if (!f->func) {
+			if (f - func_table >= nelem(func_table) - 1)
+				return;
+			f->func = func;
+			break;
+		}
+		if (f->func == func)
+			break;
+	}
+	f->count++;
+}
+#endif
+
+static void
+requeue(struct callout_queue *bucket, int s)
+{
+	struct callout_queue *nb;
+	struct callout *c, *next;
+	u_int idx;
+#ifdef CALLWHEEL_STATS
+	u_int depth = 0;
+#endif
+
+	c = LIST_FIRST(&bucket->cq_q);
+	LIST_INIT(&bucket->cq_q);
+	/* XXX maybe we ought to reverse the list before processing it */
+	for (; c; c = next) {
+#ifdef CALLWHEEL_STATS
+		if (++depth > callwheel_maxlength.ev_count)
+			callwheel_maxlength.ev_count = depth;
+#endif
+		next = LIST_NEXT(c, c_link);
+		idx = c->c_time - softclock_ticks;
+		if (idx >= inner_wheel_size << MIDDLE_WHEEL_BITS) {
+			/* this one isn't cooked yet... */
+#ifdef CALLWHEEL_STATS
+			callwheel_notmoved.ev_count++;
+#endif
+			LIST_INSERT_HEAD(&bucket->cq_q, c, c_link);
+			continue;
+		}
+		/* move to a middle or inner... */
+		if (idx >= inner_wheel_size) {
+			/* middle... */
+			idx = inner_wheel_size + (idx >> inner_wheel_bits);
+#ifdef CALLWHEEL_STATS
+			callwheel_relink_middle.ev_count++;
+#endif
+		}
+#ifdef CALLWHEEL_STATS
+		else
+			callwheel_relink_inner.ev_count++;
+#endif
+		nb = &callwheel[idx];
+		LIST_INSERT_HEAD(&nb->cq_q, c, c_link);
+	}
+}
+
+static void
+outer_wheels(int s)
+{
+	u_int idx;
+
+	/* middle wheel first */
+	idx = ++softmiddle_ticks & MIDDLE_WHEEL_MASK;
+	requeue(&callwheel[inner_wheel_size + idx], s);
+
+	/* and outer one if middle wrapped */
+	if (idx)
+		return;
+
+	idx = ++softouter_ticks & OUTER_WHEEL_MASK;
+	requeue(&callwheel[inner_wheel_size + MIDDLE_WHEEL_SIZE + idx], s);
+}
+
 /*
  * Software (low priority) clock interrupt.
  * Run periodic events from timeout queue.
@@ -933,7 +1047,6 @@
 	void (*func)(void *);
 	void *arg;
 	int s, idx;
-	int steps = 0;
 
 	CALLWHEEL_LOCK(s);
 
@@ -945,61 +1058,39 @@
 
 	while (softclock_ticks != hardclock_ticks) {
 		softclock_ticks++;
-		idx = (int)(softclock_ticks & callwheelmask);
+		idx = (int)softclock_ticks & inner_wheel_mask;
+		if (__predict_false(!idx))
+			outer_wheels(s);
 		bucket = &callwheel[idx];
-		c = TAILQ_FIRST(&bucket->cq_q);
-		if (c == NULL) {
 #ifdef CALLWHEEL_STATS
+		if (LIST_EMPTY(&bucket->cq_q)) {
 			callwheel_softempty.ev_count++;
-#endif
 			continue;
 		}
-		if (softclock_ticks < bucket->cq_hint) {
-#ifdef CALLWHEEL_STATS
-			callwheel_hintworked.ev_count++;
 #endif
-			continue;
-		}
-		bucket->cq_hint = UQUAD_MAX;
-		while (c != NULL) {
+		/* XXX maybe we should reverse the list */
+		for (c = LIST_FIRST(&bucket->cq_q); c; c = nextsoftcheck){
+			nextsoftcheck = LIST_NEXT(c, c_link);
+			/* All these SHOULD have expired... */
+			if (c->c_time != softclock_ticks)
+				panic("softclock: callout %p not expired\n", c);
+			LIST_REMOVE(c, c_link);
+			c->c_link.le_prev = 0;
 #ifdef CALLWHEEL_STATS
-			callwheel_softchecks.ev_count++;
-#endif
-			if (c->c_time != softclock_ticks) {
-				if (c->c_time < bucket->cq_hint)
-					bucket->cq_hint = c->c_time;
-				c = TAILQ_NEXT(c, c_link);
-				if (++steps >= MAX_SOFTCLOCK_STEPS) {
-					nextsoftcheck = c;
-					/* Give interrupts a chance. */
-					CALLWHEEL_UNLOCK(s);
-					CALLWHEEL_LOCK(s);
-					c = nextsoftcheck;
-					steps = 0;
-				}
-			} else {
-				nextsoftcheck = TAILQ_NEXT(c, c_link);
-				TAILQ_REMOVE(&bucket->cq_q, c, c_link);
-#ifdef CALLWHEEL_STATS
-				callwheel_sizes[idx]--;
-				callwheel_fired.ev_count++;
-				callwheel_count.ev_count--;
-#endif
-				func = c->c_func;
-				arg = c->c_arg;
-				c->c_func = NULL;
-				c->c_flags &= ~CALLOUT_PENDING;
-				CALLWHEEL_UNLOCK(s);
-				(*func)(arg);
-				CALLWHEEL_LOCK(s);
-				steps = 0;
-				c = nextsoftcheck;
-			}
+			callwheel_fired.ev_count++;
+			callwheel_count.ev_count--;
+			if (count_functions)
+				count_function(c->c_func);
+#endif
+			func = c->c_func;
+			arg = c->c_arg;
+			c->c_func = 0;
+			CALLWHEEL_UNLOCK(s);
+			(*func)(arg);
+			CALLWHEEL_LOCK(s);
 		}
-		if (TAILQ_EMPTY(&bucket->cq_q))
-			bucket->cq_hint = UQUAD_MAX;
 	}
-	nextsoftcheck = NULL;
+	nextsoftcheck = 0;
 	softclock_running = 0;
 	CALLWHEEL_UNLOCK(s);
 }
@@ -1009,14 +1100,19 @@
  *
  *	Determine how many callwheels are necessary and
  *	set hash mask.  Called from allocsys().
+ *	Set inner wheel to have entries for 1 second
  */
 void
 callout_setsize(void)
 {
+	int sz, bits;
 
-	for (callwheelsize = 1; callwheelsize < ncallout; callwheelsize <<= 1)
-		/* loop */ ;
-	callwheelmask = callwheelsize - 1;
+	for (bits = 4, sz = 1 << bits; sz < hz; sz <<= 1)
+		bits++;
+	inner_wheel_bits = bits;
+	inner_wheel_size = sz;
+	inner_wheel_mask = sz - 1;
+	callwheelsize = sz + MIDDLE_WHEEL_SIZE + OUTER_WHEEL_SIZE;
 }
 
 /*
@@ -1029,20 +1125,14 @@
 {
 	int i;
 
-	for (i = 0; i < callwheelsize; i++) {
-		callwheel[i].cq_hint = UQUAD_MAX;
-		TAILQ_INIT(&callwheel[i].cq_q);
-	}
+	for (i = 0; i < callwheelsize; i++)
+		LIST_INIT(&callwheel[i].cq_q);
 
 	simple_lock_init(&callwheel_slock);
 
 #ifdef CALLWHEEL_STATS
-	evcnt_attach_dynamic(&callwheel_collisions, EVCNT_TYPE_MISC,
-	    NULL, "callwheel", "collisions");
-	evcnt_attach_dynamic(&callwheel_maxlength, EVCNT_TYPE_MISC,
-	    NULL, "callwheel", "maxlength");
 	evcnt_attach_dynamic(&callwheel_count, EVCNT_TYPE_MISC,
-	    NULL, "callwheel", "count");
+	    NULL, "callwheel", "active");
 	evcnt_attach_dynamic(&callwheel_established, EVCNT_TYPE_MISC,
 	    NULL, "callwheel", "established");
 	evcnt_attach_dynamic(&callwheel_fired, EVCNT_TYPE_MISC,
@@ -1055,8 +1145,14 @@
 	    NULL, "callwheel", "softclocks");
 	evcnt_attach_dynamic(&callwheel_softempty, EVCNT_TYPE_MISC,
 	    NULL, "callwheel", "softempty");
-	evcnt_attach_dynamic(&callwheel_hintworked, EVCNT_TYPE_MISC,
-	    NULL, "callwheel", "hintworked");
+	evcnt_attach_dynamic(&callwheel_maxlength, EVCNT_TYPE_MISC,
+	    NULL, "callwheel", "max relink chain");
+	evcnt_attach_dynamic(&callwheel_relink_inner, EVCNT_TYPE_MISC,
+	    NULL, "callwheel", "relink inner");
+	evcnt_attach_dynamic(&callwheel_relink_middle, EVCNT_TYPE_MISC,
+	    NULL, "callwheel", "relink middle");
+	evcnt_attach_dynamic(&callwheel_notmoved, EVCNT_TYPE_MISC,
+	    NULL, "callwheel", "notmoved");
 #endif /* CALLWHEEL_STATS */
 }
 
@@ -1083,6 +1179,8 @@
 {
 	struct callout_queue *bucket;
 	int s;
+	u_int ndx;
+	u_int64_t c_time;
 
 	if (ticks <= 0)
 		ticks = 1;
@@ -1093,7 +1191,7 @@
 	 * If this callout's timer is already running, cancel it
 	 * before we modify it.
 	 */
-	if (c->c_flags & CALLOUT_PENDING) {
+	if (c->c_link.le_prev) {
 		callout_stop_locked(c);	/* Already locked */
 #ifdef CALLWHEEL_STATS
 		callwheel_changed.ev_count++;
@@ -1102,27 +1200,33 @@
 
 	c->c_arg = arg;
 	c->c_func = func;
-	c->c_flags = CALLOUT_ACTIVE | CALLOUT_PENDING;
-	c->c_time = hardclock_ticks + ticks;
-
-	bucket = &callwheel[c->c_time & callwheelmask];
-
-#ifdef CALLWHEEL_STATS
-	if (! TAILQ_EMPTY(&bucket->cq_q))
-		callwheel_collisions.ev_count++;
-#endif
+	c->c_flags = CALLOUT_ACTIVE;
+	c_time = hardclock_ticks + ticks;
+	c->c_time = c_time;
+
+	/* sort out bucket index... */
+	ticks = c_time - softclock_ticks;
+	ndx = c_time;
+	if (ticks < inner_wheel_size)
+		/* callout goes into inner wheel */
+		ndx &= inner_wheel_mask;
+	else {
+		ndx >>= inner_wheel_bits;
+		if (ticks < inner_wheel_size << MIDDLE_WHEEL_BITS)
+			/* middle wheel */
+			ndx = inner_wheel_size + (ndx & MIDDLE_WHEEL_MASK);
+		else
+			/* outer wheel */
+			ndx = inner_wheel_size + MIDDLE_WHEEL_SIZE +
+				((ndx >> MIDDLE_WHEEL_BITS) & OUTER_WHEEL_MASK);
+	}
+	bucket = &callwheel[ndx];
 
-	TAILQ_INSERT_TAIL(&bucket->cq_q, c, c_link);
-	if (c->c_time < bucket->cq_hint)
-		bucket->cq_hint = c->c_time;
+	LIST_INSERT_HEAD(&bucket->cq_q, c, c_link);
 
 #ifdef CALLWHEEL_STATS
 	callwheel_count.ev_count++;
 	callwheel_established.ev_count++;
-	if (++callwheel_sizes[c->c_time & callwheelmask] >
-	    callwheel_maxlength.ev_count)
-		callwheel_maxlength.ev_count =
-		    callwheel_sizes[c->c_time & callwheelmask];
 #endif
 
 	CALLWHEEL_UNLOCK(s);
@@ -1136,29 +1240,29 @@
 static void
 callout_stop_locked(struct callout *c)
 {
-	struct callout_queue *bucket;
+	/* XXX MP - need to worry about us deleting an entry for which
+	   the callout routine is actually active.
+	   One option is to loop here waiting for the callout routine
+	   to return (provided we aren't on the cpu that is executing
+	   the callout - ie called from the callout routine itself). */
 
 	/*
 	 * Don't attempt to delete a callout that's not on the queue.
 	 */
-	if ((c->c_flags & CALLOUT_PENDING) == 0) {
-		c->c_flags &= ~CALLOUT_ACTIVE;
+	c->c_flags &= ~CALLOUT_ACTIVE;
+	if (!c->c_link.le_prev)
 		return;
-	}
-
-	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
 
+	/* If we are going to delete the one that softclock() was going to
+	   process next then advance the one softclock() will use. */
 	if (nextsoftcheck == c)
-		nextsoftcheck = TAILQ_NEXT(c, c_link);
+		nextsoftcheck = LIST_NEXT(c, c_link);
 
-	bucket = &callwheel[c->c_time & callwheelmask];
-	TAILQ_REMOVE(&bucket->cq_q, c, c_link);
-	if (TAILQ_EMPTY(&bucket->cq_q))
-		bucket->cq_hint = UQUAD_MAX;
+	LIST_REMOVE(c, c_link);
+	c->c_link.le_prev = 0;
 #ifdef CALLWHEEL_STATS
 	callwheel_count.ev_count--;
 	callwheel_disestablished.ev_count++;
-	callwheel_sizes[c->c_time & callwheelmask]--;
 #endif
 
 	c->c_func = NULL;
@@ -1180,49 +1284,6 @@
 	CALLWHEEL_UNLOCK(s);
 }
 
-#ifdef CALLWHEEL_STATS
-/*
- * callout_showstats:
- *
- *	Display callout statistics.  Call it from DDB.
- */
-void
-callout_showstats(void)
-{
-	u_int64_t curticks;
-	int s;
-
-	s = splclock();
-	curticks = softclock_ticks;
-	splx(s);
-
-	printf("Callwheel statistics:\n");
-	printf("\tCallouts currently queued: %llu\n",
-	    (long long) callwheel_count.ev_count);
-	printf("\tCallouts established: %llu\n",
-	    (long long) callwheel_established.ev_count);
-	printf("\tCallouts disestablished: %llu\n",
-	    (long long) callwheel_disestablished.ev_count);
-	if (callwheel_changed.ev_count != 0)
-		printf("\t\tOf those, %llu were changes\n",
-		    (long long) callwheel_changed.ev_count);
-	printf("\tCallouts that fired: %llu\n",
-	    (long long) callwheel_fired.ev_count);
-	printf("\tNumber of buckets: %d\n", callwheelsize);
-	printf("\tNumber of hash collisions: %llu\n",
-	    (long long) callwheel_collisions.ev_count);
-	printf("\tMaximum hash chain length: %llu\n",
-	    (long long) callwheel_maxlength.ev_count);
-	printf("\tSoftclocks: %llu, Softchecks: %llu\n",
-	    (long long) callwheel_softclocks.ev_count,
-	    (long long) callwheel_softchecks.ev_count);
-	printf("\t\tEmpty buckets seen: %llu\n",
-	    (long long) callwheel_softempty.ev_count);
-	printf("\t\tTimes hint saved scan: %llu\n",
-	    (long long) callwheel_hintworked.ev_count);
-}
-#endif
-
 /*
  * Compute number of hz until specified time.  Used to compute second
  * argument to callout_reset() from an absolute time.
@@ -1482,7 +1543,7 @@
  * For uncompensated quartz crystal oscillatores and nominal update
  * intervals less than 1024 s, operation should be in phase-lock mode
  * (STA_FLL = 0), where the loop is disciplined to phase. For update
- * intervals greater than thiss, operation should be in frequency-lock
+ * intervals greater than this, operation should be in frequency-lock
  * mode (STA_FLL = 1), where the loop is disciplined to frequency.
  *
  * Note: splclock() is in effect.
@@ -1811,3 +1872,115 @@
 	clkinfo.stathz = stathz ? stathz : hz;
 	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
 }
+
+#ifdef DDB
+#include <machine/db_machdep.h>
+#include <ddb/db_interface.h>
+#include <ddb/db_output.h>
+#include <ddb/db_sym.h>
+
+/*
+ * callout_showstats:
+ *
+ *	Display callout statistics.  Call it from DDB.
+ */
+static void
+callout_showstats(void)
+{
+#ifdef CALLWHEEL_STATS
+	u_int64_t curticks;
+	int s;
+
+	s = splclock();
+	curticks = softclock_ticks;
+	splx(s);
+
+	db_printf("Callwheel statistics:\n");
+	db_printf("\tCallouts currently queued: %llu\n",
+	    (long long) callwheel_count.ev_count);
+	db_printf("\tCallouts established: %llu\n",
+	    (long long) callwheel_established.ev_count);
+	db_printf("\tCallouts disestablished: %llu\n",
+	    (long long) callwheel_disestablished.ev_count);
+	if (callwheel_changed.ev_count != 0)
+		db_printf("\t\tOf those, %llu were changes\n",
+		    (long long) callwheel_changed.ev_count);
+	db_printf("\tCallouts that fired: %llu\n",
+	    (long long) callwheel_fired.ev_count);
+	db_printf("\tNumber of buckets: %d (%d,%d,%d)\n",
+	    callwheelsize, inner_wheel_size,
+	    MIDDLE_WHEEL_SIZE, OUTER_WHEEL_SIZE);
+	db_printf("\tLongest chain relinked: %llu\n",
+	    (long long) callwheel_maxlength.ev_count);
+	db_printf("\tCallouts relinked to inner wheel: %llu\n",
+	    (long long) callwheel_relink_inner.ev_count);
+	db_printf("\tCallouts relinked to middle wheel: %llu\n",
+	    (long long) callwheel_relink_middle.ev_count);
+	db_printf("\tCallouts left in outer wheel: %llu\n",
+	    (long long) callwheel_notmoved.ev_count);
+	db_printf("\tSoftclocks: %llu Empty buckets seen: %llu\n",
+	    (long long) callwheel_softclocks.ev_count,
+	    (long long) callwheel_softempty.ev_count);
+#endif
+}
+
+static void
+callout_showprocs(void)
+{
+#ifdef CALLWHEEL_STATS
+	struct func_table *f;
+
+	if (!count_functions) {
+		db_printf("enabling function counts\n");
+		count_functions = 1;
+		return;
+	}
+
+	db_printf("     count function\n");
+	for (f = func_table; f->func; f++) {
+		db_printf("%10u ", f->count);
+		db_printsym((intptr_t)f->func, DB_STGY_PROC, db_printf);
+		db_printf("\n");
+	}
+#endif
+}
+
+void
+db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
+{
+	int i;
+	u_int64_t offset = 0;
+
+	switch (*modif) {
+	case 'r':
+		offset = hardclock_ticks;
+		break;
+	case 0:
+	case 'b':
+		break;
+	case 'p':
+		callout_showprocs();
+		return;
+	case 's':
+		callout_showstats();
+		return;
+	default:
+		return;
+	}
+
+	db_printf("bucket  callout     time fl func [file:line] (arg)\n" );
+	for (i = 0; i < callwheelsize; i++) {
+		struct callout_queue *bucket = &callwheel[i];
+		struct callout *c;
+
+		LIST_FOREACH(c, &bucket->cq_q, c_link) {
+			db_printf("%4x %p %8llx %2x ",
+			    i, c, c->c_time - offset, c->c_flags);
+			db_printsym((intptr_t)c->c_func,DB_STGY_PROC,db_printf);
+			db_printf(" (");
+			db_printsym((intptr_t)c->c_arg, DB_STGY_ANY, db_printf);
+			db_printf(")\n", c->c_arg);
+		}
+	}
+}
+#endif
Index: kern/kern_allocsys.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/kern_allocsys.c,v
retrieving revision 1.21
diff -u -r1.21 kern_allocsys.c
--- kern/kern_allocsys.c	2002/09/27 15:37:43	1.21
+++ kern/kern_allocsys.c	2002/10/24 13:25:08
@@ -136,9 +136,6 @@
 		callout_setsize();
 
 	ALLOCSYS(v, callwheel, struct callout_queue, callwheelsize);
-#ifdef CALLWHEEL_STATS
-	ALLOCSYS(v, callwheel_sizes, int, callwheelsize);
-#endif
 #ifdef SYSVSHM
 	ALLOCSYS(v, shmsegs, struct shmid_ds, shminfo.shmmni);
 #endif
Index: ddb/db_xxx.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ddb/db_xxx.c,v
retrieving revision 1.17
diff -u -r1.17 db_xxx.c
--- ddb/db_xxx.c	2002/08/26 11:34:29	1.17
+++ ddb/db_xxx.c	2002/10/24 13:25:28
@@ -157,7 +157,7 @@
 			case 'n':
 				db_printf("%10d %10d %10d %d %#7x %16s %7.7s\n",
 				    pp ? pp->p_pid : -1, p->p_pgrp->pg_id,
-				    p->p_cred->p_ruid, p->p_stat, p->p_flag,
+				    p->p_ucred->cr_ruid, p->p_stat, p->p_flag,
 				    p->p_comm, (p->p_wchan && p->p_wmesg) ?
 					p->p_wmesg : "");
 				break;
@@ -185,36 +185,6 @@
 	}
 }
 
-void
-db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
-{
-	uint64_t hint;
-	int i;
-
-	for (i = 0; i < callwheelsize; i++) {
-		struct callout_queue *bucket = &callwheel[i];
-		struct callout *c = TAILQ_FIRST(&bucket->cq_q);
-
-		if (c) {
-			db_printf("bucket %d (hint %llx):\n", i,
-			    (long long) bucket->cq_hint);
-			hint = UQUAD_MAX;
-			while (c) {
-				if (c->c_time < hint)
-					hint = c->c_time;
-				db_printf("%p: time %llx arg %p flags %x "
-				    "func %p: ", c, (long long) c->c_time,
-				    c->c_arg, c->c_flags, c->c_func);
-				db_printsym((u_long)c->c_func, DB_STGY_PROC,
-				    db_printf);
-				db_printf("\n");
-				c = TAILQ_NEXT(c, c_link);
-			}
-			if (bucket->cq_hint < hint)
-				printf("** HINT IS STALE\n");
-		}
-	}
-}
 
 void
 db_dmesg(db_expr_t addr, int haddr, db_expr_t count, char *modif)
Index: sys/callout.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/callout.h,v
retrieving revision 1.16
diff -u -r1.16 callout.h
--- sys/callout.h	2001/09/11 04:32:19	1.16
+++ sys/callout.h	2002/10/24 13:28:46
@@ -82,44 +82,33 @@
 #include <sys/queue.h>
 
 struct callout_queue {
-	uint64_t	cq_hint;		/* earliest callout in bkt */
-	TAILQ_HEAD(, callout) cq_q;		/* callouts in this bucket */
+	LIST_HEAD(, callout) cq_q;		/* callouts in this bucket */
 };
 
 struct callout {
-	TAILQ_ENTRY(callout) c_link;
+	LIST_ENTRY(callout) c_link;
 	u_int64_t	c_time;			/* when callout fires */
 	void		*c_arg;			/* function argument */
-	void		(*c_func)(void *);	/* functiuon to call */
+	void		(*c_func)(void *);	/* function to call */
 	int		c_flags;		/* state of this entry */
+#define	CALLOUT_ACTIVE		0x0001		/* callout is active */
 };
 
-#define	CALLOUT_ACTIVE		0x0001	/* callout is active */
-#define	CALLOUT_PENDING		0x0002	/* callout time has not yet arrived */
-
 #define	CALLOUT_INITIALIZER	{ { NULL, NULL }, 0, NULL, NULL, 0 }
 
 #ifdef _KERNEL
 extern struct callout_queue *callwheel;
-extern int callwheelsize, callwheelbits, callwheelmask;
-extern int ncallout;
-
-#ifdef CALLWHEEL_STATS
-extern int *callwheel_sizes;		/* for allocsys() */
-#endif
+extern int callwheelsize;
 
 void	callout_setsize(void);
 void	callout_startup(void);
 void	callout_init(struct callout *);
 void	callout_reset(struct callout *, int, void (*)(void *), void *);
 void	callout_stop(struct callout *);
-#ifdef CALLWHEEL_STATS
-void	callout_showstats(void);
-#endif
 
 #define	callout_active(c)	((c)->c_flags & CALLOUT_ACTIVE)
-#define	callout_pending(c)	((c)->c_flags & CALLOUT_PENDING)
-#define	callout_expired(c)	(callout_pending((c)) == 0)
+#define	callout_pending(c)	((c)->c_link.le_prev)
+#define	callout_expired(c)	(!callout_pending((c)))
 
 #define	callout_deactivate(c)	((c)->c_flags &= ~CALLOUT_ACTIVE)
 #endif /* _KERNEL */
-- 
David Laight: david@l8s.co.uk