Subject: Re: poolifying fileassoc
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Brett Lymn <blymn@baesystems.com.au>
List: tech-kern
Date: 03/21/2007 22:29:25
--tKW2IUtsqtDRztdT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Fri, Feb 02, 2007 at 08:29:22AM +0900, YAMAMOTO Takashi wrote:
> 
> it takes too much arguments.  it should be more self-contained, IMO.
> eg. it's better to make uvm_io_done acquire locks by itself.
> 

OK - I have redone uvm_io_done(), it takes far fewer arguments, works
out some things from the iobuf and acquires the locks itself.  I have
tried this code with a multiprocessor DIAGNOSTIC kernel, it appears to
work fine.

-- 
Brett Lymn

--tKW2IUtsqtDRztdT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="iodone.diff"

Index: sys/uvm/uvm_pager.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.79
diff -u -r1.79 uvm_pager.c
--- sys/uvm/uvm_pager.c	21 Dec 2006 15:55:26 -0000	1.79
+++ sys/uvm/uvm_pager.c	21 Mar 2007 11:33:36 -0000
@@ -43,6 +43,7 @@
 
 #include "opt_uvmhist.h"
 #include "opt_readahead.h"
+#include "veriexec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -52,6 +53,7 @@
 #include <sys/vnode.h>
 
 #include <uvm/uvm.h>
+#include <sys/verified_exec.h>
 
 struct pool *uvm_aiobuf_pool;
 
@@ -279,41 +281,73 @@
 }
 
 /*
- * uvm_aio_aiodone: do iodone processing for async i/os.
- * this should be called in thread context, not interrupt context.
+ *
+ * uvm_io_done: Set page flag status when an i/o (synch or asynch) has
+ * completed or an async i/o is started.  This function handles the
+ * following situations:
+ *
+ *  - Synchronous i/o has completed
+ *  - Asynchronous i/o has been initiated
+ *  - Asynchronous i/o had completed
+ *  - i/o is associated with swap
+ *
+ * All these cases what needs to be done to the page structures is very
+ * similar with some small variations depending on the calling situation.
+ *
+ * Function arguments are:
+ *
+ * pgs		array of pages to handle
+ * npages	number of pages in request
+ * flags	type of completion, see uvm_pager.h
+ * overwrite	TRUE if overwriting pages
+ * ridx		The io may have been rounded by prepending pages to
+ *		the pgs array, this gives the index into the pgs array
+ *		of the first page prior to rounding  (set to 0 if no
+ *		rounding done)
+ * orignpages	original transaction size before rounding.  If no
+ *		rounding was done then set to npages.
+ * bp		pointer to the io buffer, we need the flags and vnode
+ * 		pointer from this.
+ *
+ * This function must be called with no locks held, the page queue and uvm
+ * object locks will be acquired and released as needed.
+ *
  */
-
-void
-uvm_aio_aiodone(struct buf *bp)
+int
+uvm_io_done(struct vm_page **pgs, int npages, int flags, boolean_t overwrite,
+	    int ridx, int orignpages, struct buf *bp)
 {
-	int npages = bp->b_bufsize >> PAGE_SHIFT;
-	struct vm_page *pg, *pgs[npages];
-	struct uvm_object *uobj;
+	struct vm_page *pg;
+	int i, error, swslot;
 	struct simplelock *slock;
-	int s, i, error, swslot;
+	struct uvm_object *uobj;
 	boolean_t write, swap;
-	UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
-	UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
 
 	error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
 	write = (bp->b_flags & B_READ) == 0;
-	/* XXXUBC B_NOCACHE is for swap pager, should be done differently */
-	if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
-		(*bioops.io_pageiodone)(bp);
-	}
 
-	uobj = NULL;
 	for (i = 0; i < npages; i++) {
-		pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
-		UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+		pg = pgs[i];
+		if (pg != NULL)
+			break;
 	}
-	uvm_pagermapout((vaddr_t)bp->b_data, npages);
+
+	if (pg == NULL) /* no pages to do... */
+		return 0;
 
 	swslot = 0;
 	slock = NULL;
-	pg = pgs[0];
+	uobj = NULL;
+
 	swap = (pg->uanon != NULL && pg->uobject == NULL) ||
 		(pg->pqflags & PQ_AOBJ) != 0;
+
+#if NVERIEXEC > 0
+	if (!error && !write && !swap && (flags != UVMPAGER_ASYNCIO_START)) {
+		error = veriexec_block_verify(bp->b_vp, pgs, npages);
+	}
+#endif
+
 	if (!swap) {
 		uobj = pg->uobject;
 		slock = &uobj->vmobjlock;
@@ -335,9 +369,11 @@
 		panic("%s: swap", __func__);
 #endif /* defined(VMSWAP) */
 	}
+
 	for (i = 0; i < npages; i++) {
+		if (pgs[i] == NULL)
+			continue;
 		pg = pgs[i];
-		KASSERT(swap || pg->uobject == uobj);
 		UVMHIST_LOG(ubchist, "pg %p", pg, 0,0,0);
 
 #if defined(VMSWAP)
@@ -347,11 +383,10 @@
 		 */
 
 		if (swap) {
-			if (pg->uobject != NULL) {
+			if (pg->uobject != NULL)
 				slock = &pg->uobject->vmobjlock;
-			} else {
+			else
 				slock = &pg->uanon->an_lock;
-			}
 			simple_lock(slock);
 			uvm_lock_pageq();
 		}
@@ -369,7 +404,9 @@
 		if (error) {
 			int slot;
 			if (!write) {
-				pg->flags |= PG_RELEASED;
+				if ((flags == UVMPAGER_ASYNCIO_DONE) ||
+				    (pg->flags & PG_FAKE))
+					pg->flags |= PG_RELEASED;
 				continue;
 			} else if (error == ENOMEM) {
 				if (pg->flags & PG_PAGEOUT) {
@@ -405,16 +442,46 @@
 		 * still be set from the page's previous identity.
 		 */
 
-		if (pg->flags & PG_FAKE) {
-			KASSERT(!write);
-			pg->flags &= ~PG_FAKE;
+		if (pg->flags & PG_FAKE && !overwrite) {
+			pg->flags &= ~(PG_FAKE);
+			pmap_clear_modify(pgs[i]);
+		}
+
+		if (i < ridx || i >= ridx + orignpages ||
+		    (flags == UVMPAGER_ASYNCIO_START)) {
+			UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
+			    pg, pg->offset,0,0);
+			if (pg->flags & PG_FAKE) {
+				KASSERT(!write);
+				KASSERT(overwrite);
+				KASSERT((pg->flags & PG_CLEAN) != 0);
+				if (flags != UVMPAGER_ASYNCIO_DONE)
+					uvm_pagezero(pg);
+				else {
+					pmap_clear_modify(pg);
 #if defined(READAHEAD_STATS)
-			pg->pqflags |= PQ_READAHEAD;
-			uvm_ra_total.ev_count++;
+					pg->pqflags |= PQ_READAHEAD;
+					uvm_ra_total.ev_count++;
 #endif /* defined(READAHEAD_STATS) */
-			KASSERT((pg->flags & PG_CLEAN) != 0);
+				}
+			}
+
+			if (flags != UVMPAGER_ASYNCIO_DONE) {
+				pg->flags &= ~PG_BUSY;
+			}
+
+			if (pg->flags & PG_WANTED) {
+				wakeup(pg);
+			}
+
+			if ((pg->flags & PG_RELEASED) &&
+			    (flags != UVMPAGER_ASYNCIO_DONE)) {
+				uvm_pagefree(pg);
+				continue;
+			}
 			uvm_pageenqueue(pg);
-			pmap_clear_modify(pg);
+			pg->flags &= ~(PG_WANTED|PG_FAKE);
+			UVM_PAGE_OWN(pg, NULL);
 		}
 
 		/*
@@ -435,6 +502,17 @@
 		 */
 
 		if (swap) {
+			if (i < ridx || i >= ridx + orignpages
+			    || (flags == UVMPAGER_ASYNCIO_START)) {
+				if ((pg->flags & PG_WANTED) &&
+				    ((flags != UVMPAGER_ASYNCIO_DONE))) {
+					wakeup(pg);
+				}
+			}
+			if ((flags != UVMPAGER_ASYNCIO_DONE))
+				pg->flags &= ~(PG_WANTED|PG_BUSY);
+			pg->flags &= ~PG_FAKE;
+
 			if (pg->uobject == NULL && pg->uanon->an_ref == 0 &&
 			    (pg->flags & PG_RELEASED) != 0) {
 				uvm_unlock_pageq();
@@ -448,7 +526,8 @@
 #endif /* defined(VMSWAP) */
 	}
 	if (!swap) {
-		uvm_page_unbusy(pgs, npages);
+		if ((flags == UVMPAGER_ASYNCIO_DONE) || error)
+			uvm_page_unbusy(pgs, npages);
 		uvm_unlock_pageq();
 		simple_unlock(slock);
 	} else {
@@ -470,6 +549,40 @@
 		uvmexp.pdpending--;
 #endif /* defined(VMSWAP) */
 	}
+
+	return error;
+}
+
+/*
+ * uvm_aio_aiodone: do iodone processing for async i/os.
+ * this should be called in thread context, not interrupt context.
+ */
+
+void
+uvm_aio_aiodone(struct buf *bp)
+{
+	int npages = bp->b_bufsize >> PAGE_SHIFT;
+	struct vm_page *pgs[npages];
+	int s, i, error;
+	boolean_t write;
+
+	UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
+	UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
+
+	write = (bp->b_flags & B_READ) == 0;
+	/* XXXUBC B_NOCACHE is for swap pager, should be done differently */
+	if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
+		(*bioops.io_pageiodone)(bp);
+	}
+
+	for (i = 0; i < npages; i++) {
+		pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+		UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+	}
+	uvm_pagermapout((vaddr_t)bp->b_data, npages);
+
+	error = uvm_io_done(pgs, npages, UVMPAGER_ASYNCIO_DONE, FALSE,
+			    0, npages, bp);
 	s = splbio();
 	if (write && (bp->b_flags & B_AGE) != 0) {
 		vwakeup(bp);
Index: sys/uvm/uvm_pager.h
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.h,v
retrieving revision 1.34
diff -u -r1.34 uvm_pager.h
--- sys/uvm/uvm_pager.h	22 Feb 2006 22:28:18 -0000	1.34
+++ sys/uvm/uvm_pager.h	21 Mar 2007 11:33:36 -0000
@@ -176,12 +176,19 @@
 struct vm_page *uvm_pageratop(vaddr_t);
 vaddr_t	uvm_pagermapin(struct vm_page **, int, int);
 void	uvm_pagermapout(vaddr_t, int);
+int	uvm_io_done(struct vm_page **, int, int, boolean_t, int, int,
+		    struct buf *);
 
 /* Flags to uvm_pagermapin() */
 #define	UVMPAGER_MAPIN_WAITOK	0x01	/* it's okay to wait */
 #define	UVMPAGER_MAPIN_READ	0x02	/* device -> host */
 #define	UVMPAGER_MAPIN_WRITE	0x00	/* host -> device (pseudo flag) */
 
+/* Flags to uvm_io_done */
+#define UVMPAGER_SYNCIO_DONE	0x01	/* Synch i/o has completed */
+#define UVMPAGER_ASYNCIO_START	0x02	/* Asynch i/o started */
+#define UVMPAGER_ASYNCIO_DONE	0x03	/* Asynch i/o has completed */
+
 /*
  * XXX
  * this is needed until the device strategy interface
Index: sys/miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.142
diff -u -r1.142 genfs_vnops.c
--- sys/miscfs/genfs/genfs_vnops.c	27 Dec 2006 12:10:09 -0000	1.142
+++ sys/miscfs/genfs/genfs_vnops.c	21 Mar 2007 11:33:36 -0000
@@ -453,6 +453,8 @@
 		panic("genfs_getpages: too many pages");
 	}
 
+	mbp = NULL;
+
 startover:
 	error = 0;
 	origvsize = vp->v_size;
@@ -635,6 +637,7 @@
 		lockmgr(&gp->g_glock, LK_RELEASE, NULL);
 		UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
 		npages += ridx;
+		simple_unlock(&uobj->vmobjlock);
 		goto out;
 	}
 
@@ -652,6 +655,7 @@
 			pg1->flags &= ~(PG_RDONLY|PG_CLEAN);
 		}
 		npages += ridx;
+		simple_unlock(&uobj->vmobjlock);
 		goto out;
 	}
 
@@ -861,7 +865,6 @@
 	if (bp != NULL) {
 		error = biowait(mbp);
 	}
-	putiobuf(mbp);
 	uvm_pagermapout(kva, npages);
 
 	/*
@@ -889,79 +892,30 @@
 		}
 	}
 	lockmgr(&gp->g_glock, LK_RELEASE, NULL);
-	simple_lock(&uobj->vmobjlock);
-
-	/*
-	 * we're almost done!  release the pages...
-	 * for errors, we free the pages.
-	 * otherwise we activate them and mark them as valid and clean.
-	 * also, unbusy pages that were not actually requested.
-	 */
-
-	if (error) {
-		for (i = 0; i < npages; i++) {
-			if (pgs[i] == NULL) {
-				continue;
-			}
-			UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
-			    pgs[i], pgs[i]->flags, 0,0);
-			if (pgs[i]->flags & PG_FAKE) {
-				pgs[i]->flags |= PG_RELEASED;
-			}
-		}
-		uvm_lock_pageq();
-		uvm_page_unbusy(pgs, npages);
-		uvm_unlock_pageq();
-		simple_unlock(&uobj->vmobjlock);
-		UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
-		if (pgs != pgs_onstack)
-			kmem_free(pgs, pgs_size);
-		return (error);
-	}
 
 out:
-	UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
-	uvm_lock_pageq();
-	for (i = 0; i < npages; i++) {
-		pg = pgs[i];
-		if (pg == NULL) {
-			continue;
-		}
-		UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
-		    pg, pg->flags, 0,0);
-		if (pg->flags & PG_FAKE && !overwrite) {
-			pg->flags &= ~(PG_FAKE);
-			pmap_clear_modify(pgs[i]);
-		}
-		KASSERT(!write || !blockalloc || (pg->flags & PG_RDONLY) == 0);
-		if (i < ridx || i >= ridx + orignpages || async) {
-			UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
-			    pg, pg->offset,0,0);
-			if (pg->flags & PG_WANTED) {
-				wakeup(pg);
-			}
-			if (pg->flags & PG_FAKE) {
-				KASSERT(overwrite);
-				uvm_pagezero(pg);
-			}
-			if (pg->flags & PG_RELEASED) {
-				uvm_pagefree(pg);
-				continue;
-			}
-			uvm_pageenqueue(pg);
-			pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
-			UVM_PAGE_OWN(pg, NULL);
-		}
+	if (mbp == NULL) {
+		/*
+		 * fake up a io buf to use with uvm_io_done iff the pages
+		 * were already resident, just fill in the bits used.
+		 */
+		mbp = getiobuf();
+		mbp->b_flags = B_READ| (async ? B_ASYNC : 0);
+		mbp->b_vp = vp;
 	}
-	uvm_unlock_pageq();
-	simple_unlock(&uobj->vmobjlock);
-	if (ap->a_m != NULL) {
+
+	error = uvm_io_done(pgs, npages,
+			    ((async)? UVMPAGER_ASYNCIO_START : UVMPAGER_SYNCIO_DONE),
+			    overwrite, ridx, orignpages, mbp);
+
+	putiobuf(mbp);
+	if ((!error) && (ap->a_m != NULL)) {
 		memcpy(ap->a_m, &pgs[ridx],
-		    orignpages * sizeof(struct vm_page *));
+		       orignpages * sizeof(struct vm_page *));
 	}
 	if (pgs != pgs_onstack)
 		kmem_free(pgs, pgs_size);
-	return (0);
+	return (error);
 }
 
 /*

--tKW2IUtsqtDRztdT--