Subject: Re: NEW_BUFQ_STRATEGY
To: None <chuq@chuq.com>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 12/04/2003 10:38:26
--NextPart-20031204103217-0108700
Content-Type: Text/Plain; charset=us-ascii

hi,

> On Sun, Nov 30, 2003 at 10:33:47AM -0800, Jason Thorpe wrote:
> > 
> > On Nov 28, 2003, at 2:10 PM, Chuck Silvers wrote:
> > 
> > >has anyone had any problems with NEW_BUFQ_STRATEGY lately?
> > >is there consensus on whether or not this is ready to become the 
> > >default?
> > 
> > I think the consensus is that it's stable, but I heard that it can have 
> > some pretty horrible performance effects with some file systems if 
> > they're exported by NFS.
> 
> I tried bonnie++ over NFS, and NEW_BUFQ_STRATEGY didn't affect the results
> significantly.  if this effect really does exist, I don't know how to
> trigger it.

because synchronous writes, which nfsd often requests, easily suffer?
i guess explicitly distinguishing sync/async requests like the
attached diff makes it better.

YAMAMOTO Takashi

--NextPart-20031204103217-0108700
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="bprio.diff"

Index: miscfs/genfs/genfs_vnops.c
===================================================================
--- miscfs/genfs/genfs_vnops.c	(revision 444)
+++ miscfs/genfs/genfs_vnops.c	(working copy)
@@ -822,6 +822,10 @@ genfs_getpages(void *v)
 		    "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
 		    bp, offset, iobytes, bp->b_blkno);
 
+		if (async)
+			BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
+		else
+			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 		VOP_STRATEGY(bp);
 	}
 
@@ -1489,6 +1493,10 @@ genfs_gop_write(struct vnode *vp, struct
 		UVMHIST_LOG(ubchist,
 		    "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
 		    vp, offset, bp->b_bcount, bp->b_blkno);
+		if (async)
+			BIO_SETPRIO(bp, BPRIO_TIMELIMITED); /* XXXnot exactly */
+		else
+			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 		VOP_STRATEGY(bp);
 	}
 	if (skipbytes) {
Index: ufs/ufs/ufs_bmap.c
===================================================================
--- ufs/ufs/ufs_bmap.c	(revision 266)
+++ ufs/ufs/ufs_bmap.c	(working copy)
@@ -243,6 +243,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, run
 			trace(TR_BREADMISS, pack(vp, size), metalbn);
 			bp->b_blkno = blkptrtodb(ump, daddr);
 			bp->b_flags |= B_READ;
+			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 			VOP_STRATEGY(bp);
 			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
 			if ((error = biowait(bp)) != 0) {
Index: ufs/ffs/ffs_inode.c
===================================================================
--- ufs/ffs/ffs_inode.c	(revision 266)
+++ ufs/ffs/ffs_inode.c	(working copy)
@@ -563,6 +563,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, lev
 		if (bp->b_bcount > bp->b_bufsize)
 			panic("ffs_indirtrunc: bad buffer size");
 		bp->b_blkno = dbn;
+		BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 		VOP_STRATEGY(bp);
 		error = biowait(bp);
 	}
Index: kern/vfs_bio.c
===================================================================
--- kern/vfs_bio.c	(revision 413)
+++ kern/vfs_bio.c	(working copy)
@@ -272,6 +272,10 @@ bio_doread(vp, blkno, size, cred, async)
 	if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) {
 		/* Start I/O for the buffer. */
 		SET(bp->b_flags, B_READ | async);
+		if (async)
+			BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
+		else
+			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 		VOP_STRATEGY(bp);
 
 		/* Pay for the read. */
@@ -426,6 +430,11 @@ bwrite(bp)
 	simple_unlock(&bp->b_interlock);
 	splx(s);
 
+	if (sync)
+		BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
+	else
+		BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
+
 	VOP_STRATEGY(bp);
 
 	if (sync) {
@@ -757,6 +766,7 @@ start:
 	} else {
 		allocbuf(bp, size);
 	}
+	BIO_SETPRIO(bp, BPRIO_DEFAULT);
 	return (bp);
 }
 
@@ -781,6 +791,7 @@ geteblk(size)
 	simple_unlock(&bp->b_interlock);
 	splx(s);
 	allocbuf(bp, size);
+	BIO_SETPRIO(bp, BPRIO_DEFAULT);
 	return (bp);
 }
 
@@ -1019,6 +1030,7 @@ biodone(bp)
 	if (ISSET(bp->b_flags, B_DONE))
 		panic("biodone already");
 	SET(bp->b_flags, B_DONE);		/* note that it's done */
+	BIO_SETPRIO(bp, BPRIO_DEFAULT);
 
 	if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
 		(*bioops.io_complete)(bp);
Index: kern/subr_disk.c
===================================================================
--- kern/subr_disk.c	(revision 283)
+++ kern/subr_disk.c	(working copy)
@@ -679,7 +679,7 @@ bufq_disksort_get(struct bufq_state *buf
 	return (bp);
 }
 
-
+int btypes[2][2][2];
 /*
  * Seek sort for disks.
  *
@@ -699,10 +699,16 @@ bufq_prio_put(struct bufq_state *bufq, s
 
 	sortby = bufq->bq_flags & BUFQ_SORT_MASK;
 
+	btypes[bp->b_prio][(bp->b_flags & B_READ) ? 0 : 1]
+	    [(bp->b_flags & B_ASYNC) ? 0 : 1]++;
 	/*
 	 * If it's a read request append it to the list.
 	 */
+#if 0
 	if ((bp->b_flags & B_READ) == B_READ) {
+#else
+	if (bp->b_prio > BPRIO_DEFAULT) {
+#endif
 		TAILQ_INSERT_TAIL(&prio->bq_read, bp, b_actq);
 		return;
 	}
@@ -797,7 +803,11 @@ bufq_prio_get(struct bufq_state *bufq, i
 	bp = prio->bq_next;
 
 	if (bp != NULL && remove) {
+#if 0
 		if ((bp->b_flags & B_READ) == B_READ) {
+#else
+		if (bp->b_prio > BPRIO_DEFAULT) {
+#endif
 			TAILQ_REMOVE(&prio->bq_read, bp, b_actq);
 			bufq_cluster_stat(bp, TAILQ_FIRST(&prio->bq_read));
 		} else {
Index: kern/kern_physio.c
===================================================================
--- kern/kern_physio.c	(revision 283)
+++ kern/kern_physio.c	(working copy)
@@ -208,6 +208,8 @@ physio(strategy, bp, dev, flags, minphys
 			}
 			vmapbuf(bp, todo);
 
+			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
+
 			/* [call strategy to start the transfer] */
 			(*strategy)(bp);
 
Index: sys/buf.h
===================================================================
--- sys/buf.h	(revision 284)
+++ sys/buf.h	(working copy)
@@ -156,6 +156,7 @@ struct buf {
 	struct  proc *b_proc;		/* Associated proc if B_PHYS set. */
 	volatile long	b_flags;	/* B_* flags. */
 	struct simplelock b_interlock;	/* Lock for b_flags changes */
+	int	b_prio;
 	int	b_error;		/* Errno value. */
 	long	b_bufsize;		/* Allocated buffer size. */
 	long	b_bcount;		/* Valid bytes in buffer. */
@@ -250,6 +251,11 @@ do {									\
 
 #ifdef _KERNEL
 
+#define	BIO_SETPRIO(bp, prio)	(bp)->b_prio = (prio)
+#define	BPRIO_TIMECRITICAL	1
+#define	BPRIO_TIMELIMITED	0
+#define	BPRIO_DEFAULT		0
+
 extern	struct bio_ops bioops;
 extern	u_int nbuf;		/* The number of buffer headers */
 extern	struct buf *buf;	/* The buffer headers. */

--NextPart-20031204103217-0108700--