Subject: i/o scheduling (was Re: NEW_BUFQ_STRATEGY)
To: None <tls@rek.tjls.com>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-perform
Date: 12/14/2003 11:16:53
--NextPart-20031214111413-0395500
Content-Type: Text/Plain; charset=us-ascii

hi,

> One thing I'm not so sure about is using a FCFS queue for the "time
> sensitive" requests.  I think that the average latency for requests from
> that queue is probably significantly increased by using FCFS rather than
> the increasing-block-number sort, because we lose the benefit of
> readahead, which will be particularly severe if the queues are long.  The
> sort seems particularly likely to be beneficial given the rather large
> number of requests we take from the queues in a "burst" and the consequent
> likelihood that we'd get a lot of track cache hits.

then how do you think about the attached one?

YAMAMOTO Takashi

--NextPart-20031214111413-0395500
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"

Index: subr_disk.c
===================================================================
--- subr_disk.c	(revision 457)
+++ subr_disk.c	(working copy)
@@ -758,6 +758,297 @@ bufq_prio_get(struct bufq_state *bufq, i
 	return (bp);
 }
 
+
+/*
+ * Cyclical scan (CSCAN)
+ */
+TAILQ_HEAD(bqhead, buf);
+struct cscan_queue {
+	struct bqhead cq_head[2];	/* actual lists of buffers */
+	int cq_idx;			/* current list index */
+	int cq_lastcylinder;		/* b_cylinder of the last request */
+	daddr_t cq_lastrawblkno;	/* b_rawblkno of the last request */
+};
+
+static int __inline cscan_empty(const struct cscan_queue *);
+static void cscan_put(struct cscan_queue *, struct buf *, int);
+static struct buf *cscan_get(struct cscan_queue *, int);
+static void cscan_init(struct cscan_queue *);
+
+static __inline int
+cscan_empty(const struct cscan_queue *q)
+{
+
+	return TAILQ_EMPTY(&q->cq_head[0]) && TAILQ_EMPTY(&q->cq_head[1]);
+}
+
+static void
+cscan_put(struct cscan_queue *q, struct buf *bp, int sortby)
+{
+	struct buf tmp;
+	struct buf *it;
+	struct bqhead *bqh;
+	int idx;
+
+	tmp.b_cylinder = q->cq_lastcylinder;
+	tmp.b_rawblkno = q->cq_lastrawblkno;
+
+	if (buf_inorder(bp, &tmp, sortby))
+		idx = 1 - q->cq_idx;
+	else
+		idx = q->cq_idx;
+
+	bqh = &q->cq_head[idx];
+
+	TAILQ_FOREACH(it, bqh, b_actq)
+		if (buf_inorder(it, bp, sortby))
+			break;
+
+	if (it != NULL)
+		TAILQ_INSERT_BEFORE(it, bp, b_actq);
+	else
+		TAILQ_INSERT_TAIL(bqh, bp, b_actq);
+}
+
+static struct buf *
+cscan_get(struct cscan_queue *q, int remove)
+{
+	int idx = q->cq_idx;
+	struct bqhead *bqh;
+	struct buf *bp;
+
+	bqh = &q->cq_head[idx];
+	bp = TAILQ_FIRST(bqh);
+
+	if (bp == NULL) {
+		/* switch queue */
+		idx = 1 - idx;
+		bqh = &q->cq_head[idx];
+		bp = TAILQ_FIRST(bqh);
+	}
+
+	KDASSERT((bp != NULL && !cscan_empty(q)) ||
+	         (bp == NULL && cscan_empty(q)));
+
+	if (bp != NULL && remove) {
+		q->cq_idx = idx;
+		TAILQ_REMOVE(bqh, bp, b_actq);
+
+		q->cq_lastcylinder = bp->b_cylinder;
+		q->cq_lastrawblkno =
+		    bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT);
+	}
+
+	return (bp);
+}
+
+static void
+cscan_init(struct cscan_queue *q)
+{
+
+	TAILQ_INIT(&q->cq_head[0]);
+	TAILQ_INIT(&q->cq_head[1]);
+}
+
+
+/*
+ * Per-prioritiy CSCAN.
+ *
+ * XXX probably we should have a way to raise
+ * priority of the on-queue requests.
+ */
+#define	PRIOCSCAN_NQUEUE	3
+
+struct priocscan_queue {
+	struct cscan_queue q_queue;
+	int q_burst;
+};
+
+struct bufq_priocscan {
+	struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE];
+
+#if 0
+	/*
+	 * XXX using "global" head position can reduce positioning time
+	 * when switching between queues.
+	 * although it might affect against fairness.
+	 */
+	daddr_t bq_lastrawblkno;
+	int bq_lastcylinder;
+#endif
+};
+
+/*
+ * how many requests to serve when having pending requests on other queues.
+ *
+ * XXX tune
+ */
+const int priocscan_burst[] = {
+	64, 16, 4
+};
+
+static void bufq_priocscan_put(struct bufq_state *, struct buf *);
+static struct buf *bufq_priocscan_get(struct bufq_state *, int);
+static void bufq_priocscan_init(struct bufq_state *);
+static __inline struct cscan_queue *bufq_priocscan_selectqueue(
+    struct bufq_priocscan *, const struct buf *);
+
+static __inline struct cscan_queue *
+bufq_priocscan_selectqueue(struct bufq_priocscan *q, const struct buf *bp)
+{
+	static const int priocscan_priomap[] = {
+		[BPRIO_TIMENONCRITICAL] = 2,
+		[BPRIO_TIMELIMITED] = 1,
+		[BPRIO_TIMECRITICAL] = 0
+	};
+
+	return &q->bq_queue[priocscan_priomap[BIO_GETPRIO(bp)]].q_queue;
+}
+
+static void
+bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp)
+{
+	struct bufq_priocscan *q = bufq->bq_private;
+	struct cscan_queue *cq;
+	const int sortby = bufq->bq_flags & BUFQ_SORT_MASK;
+
+#if 1
+	bp->b_bufqpriv = hardclock_ticks;
+#endif
+	cq = bufq_priocscan_selectqueue(q, bp);
+	cscan_put(cq, bp, sortby);
+}
+
+#if 1
+int bmaxtime[3];
+int bavgtime[3];
+int btotaltime[3];
+int bnum[3];
+int bcut;
+#endif
+
+static struct buf *
+bufq_priocscan_get(struct bufq_state *bufq, int remove)
+{
+	struct bufq_priocscan *q = bufq->bq_private;
+	struct priocscan_queue *pq, *npq;
+	struct priocscan_queue *first; /* first non-empty queue */
+	const struct priocscan_queue *epq;
+	const struct cscan_queue *cq;
+	struct buf *bp;
+	boolean_t single; /* true if there's only one non-empty queue */
+
+	pq = &q->bq_queue[0];
+	epq = pq + PRIOCSCAN_NQUEUE;
+	for (; pq < epq; pq++) {
+		cq = &pq->q_queue;
+		if (!cscan_empty(cq))
+			break;
+	}
+	if (pq == epq) {
+		/* there's no requests */
+		return NULL;
+	}
+
+	first = pq;
+	single = TRUE;
+	for (npq = first + 1; npq < epq; npq++) {
+		cq = &npq->q_queue;
+		if (!cscan_empty(cq)) {
+			single = FALSE;
+			if (pq->q_burst > 0)
+				break;
+			pq = npq;
+		}
+	}
+	if (single) {
+		/*
+		 * there's only a non-empty queue.  just serve it.
+		 */
+		pq = first;
+	} else if (pq->q_burst > 0) {
+		/*
+		 * XXX account only by number of requests.  is it good enough?
+		 */
+		pq->q_burst--;
+	} else {
+		/*
+		 * no queue was selected due to burst counts
+		 */
+		int i;
+#ifdef DEBUG
+		for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
+			pq = &q->bq_queue[i];
+			cq = &pq->q_queue;
+			if (!cscan_empty(cq) && pq->q_burst)
+				panic("%s: inconsist", __func__);
+		}
+#endif /* DEBUG */
+
+		/*
+		 * reset burst counts
+		 */
+		for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
+			pq = &q->bq_queue[i];
+			pq->q_burst = priocscan_burst[i];
+		}
+
+		/*
+		 * serve first non-empty queue.
+		 */
+		pq = first;
+	}
+
+	KDASSERT(!cscan_empty(&pq->q_queue));
+	bp = cscan_get(&pq->q_queue, remove);
+	KDASSERT(bp != NULL);
+	KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp));
+
+#if 1
+	if (remove) {
+		int now = hardclock_ticks;
+		int dif = now - bp->b_bufqpriv;
+		int type = 2 - BIO_GETPRIO(bp);
+
+		if (bmaxtime[type] < dif)
+			bmaxtime[type] = dif;
+		btotaltime[type] += dif;
+		bnum[type]++;
+		bavgtime[type] = btotaltime[type] / bnum[type];
+		if (btotaltime[type] > INT_MAX / 2) {
+			int i;
+			for (i = 0; i < 3; i++) {
+				btotaltime[type] /= 2;
+				bnum[type] /= 2;
+				bcut++;
+			}
+		}
+	}
+#endif
+
+	return bp;
+}
+
+static void
+bufq_priocscan_init(struct bufq_state *bufq)
+{
+	struct bufq_priocscan *q;
+	int i;
+
+	bufq->bq_get = bufq_priocscan_get;
+	bufq->bq_put = bufq_priocscan_put;
+	bufq->bq_private = malloc(sizeof(struct bufq_priocscan),
+	    M_DEVBUF, M_ZERO);
+
+	q = bufq->bq_private;
+	for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
+		struct cscan_queue *cq = &q->bq_queue[i].q_queue;
+
+		cscan_init(cq);
+	}
+}
+
+
 /*
  * Create a device buffer queue.
  */
@@ -808,6 +1099,9 @@ bufq_alloc(struct bufq_state *bufq, int 
 		TAILQ_INIT(&prio->bq_read);
 		TAILQ_INIT(&prio->bq_write);
 		break;
+	case BUFQ_PRIOCSCAN:
+		bufq_priocscan_init(bufq);
+		break;
 	default:
 		panic("bufq_alloc: method out of range");
 	}

--NextPart-20031214111413-0395500--