Subject: Re: poolifying fileassoc
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Brett Lymn <blymn@baesystems.com.au>
List: tech-kern
Date: 03/21/2007 22:29:25
--tKW2IUtsqtDRztdT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Fri, Feb 02, 2007 at 08:29:22AM +0900, YAMAMOTO Takashi wrote:
>
> it takes too much arguments. it should be more self-contained, IMO.
> eg. it's better to make uvm_io_done acquire locks by itself.
>
OK - I have redone uvm_io_done(), it takes far fewer arguments, works
out some things from the iobuf and acquires the locks itself. I have
tried this code with a multiprocessor DIAGNOSTIC kernel, it appears to
work fine.
--
Brett Lymn
--tKW2IUtsqtDRztdT
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="iodone.diff"
Index: sys/uvm/uvm_pager.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.79
diff -u -r1.79 uvm_pager.c
--- sys/uvm/uvm_pager.c 21 Dec 2006 15:55:26 -0000 1.79
+++ sys/uvm/uvm_pager.c 21 Mar 2007 11:33:36 -0000
@@ -43,6 +43,7 @@
#include "opt_uvmhist.h"
#include "opt_readahead.h"
+#include "veriexec.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -52,6 +53,7 @@
#include <sys/vnode.h>
#include <uvm/uvm.h>
+#include <sys/verified_exec.h>
struct pool *uvm_aiobuf_pool;
@@ -279,41 +281,73 @@
}
/*
- * uvm_aio_aiodone: do iodone processing for async i/os.
- * this should be called in thread context, not interrupt context.
+ *
+ * uvm_io_done: Set page flag status when an i/o (synch or asynch) has
+ * completed or an async i/o is started. This function handles the
+ * following situations:
+ *
+ * - Synchronous i/o has completed
+ * - Asynchronous i/o has been initiated
+ * - Asynchronous i/o had completed
+ * - i/o is associated with swap
+ *
+ * All these cases what needs to be done to the page structures is very
+ * similar with some small variations depending on the calling situation.
+ *
+ * Function arguments are:
+ *
+ * pgs array of pages to handle
+ * npages number of pages in request
+ * flags type of completion, see uvm_pager.h
+ * overwrite TRUE if overwriting pages
+ * ridx The io may have been rounded by prepending pages to
+ * the pgs array, this gives the index into the pgs array
+ * of the first page prior to rounding (set to 0 if no
+ * rounding done)
+ * orignpages original transaction size before rounding. If no
+ * rounding was done then set to npages.
+ * bp pointer to the io buffer, we need the flags and vnode
+ * pointer from this.
+ *
+ * This function must be called with no locks held, the page queue and uvm
+ * object locks will be acquired and released as needed.
+ *
*/
-
-void
-uvm_aio_aiodone(struct buf *bp)
+int
+uvm_io_done(struct vm_page **pgs, int npages, int flags, boolean_t overwrite,
+ int ridx, int orignpages, struct buf *bp)
{
- int npages = bp->b_bufsize >> PAGE_SHIFT;
- struct vm_page *pg, *pgs[npages];
- struct uvm_object *uobj;
+ struct vm_page *pg;
+ int i, error, swslot;
struct simplelock *slock;
- int s, i, error, swslot;
+ struct uvm_object *uobj;
boolean_t write, swap;
- UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
- UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
write = (bp->b_flags & B_READ) == 0;
- /* XXXUBC B_NOCACHE is for swap pager, should be done differently */
- if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
- (*bioops.io_pageiodone)(bp);
- }
- uobj = NULL;
for (i = 0; i < npages; i++) {
- pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
- UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+ pg = pgs[i];
+ if (pg != NULL)
+ break;
}
- uvm_pagermapout((vaddr_t)bp->b_data, npages);
+
+ if (pg == NULL) /* no pages to do... */
+ return 0;
swslot = 0;
slock = NULL;
- pg = pgs[0];
+ uobj = NULL;
+
swap = (pg->uanon != NULL && pg->uobject == NULL) ||
(pg->pqflags & PQ_AOBJ) != 0;
+
+#if NVERIEXEC > 0
+ if (!error && !write && !swap && (flags != UVMPAGER_ASYNCIO_START)) {
+ error = veriexec_block_verify(bp->b_vp, pgs, npages);
+ }
+#endif
+
if (!swap) {
uobj = pg->uobject;
slock = &uobj->vmobjlock;
@@ -335,9 +369,11 @@
panic("%s: swap", __func__);
#endif /* defined(VMSWAP) */
}
+
for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL)
+ continue;
pg = pgs[i];
- KASSERT(swap || pg->uobject == uobj);
UVMHIST_LOG(ubchist, "pg %p", pg, 0,0,0);
#if defined(VMSWAP)
@@ -347,11 +383,10 @@
*/
if (swap) {
- if (pg->uobject != NULL) {
+ if (pg->uobject != NULL)
slock = &pg->uobject->vmobjlock;
- } else {
+ else
slock = &pg->uanon->an_lock;
- }
simple_lock(slock);
uvm_lock_pageq();
}
@@ -369,7 +404,9 @@
if (error) {
int slot;
if (!write) {
- pg->flags |= PG_RELEASED;
+ if ((flags == UVMPAGER_ASYNCIO_DONE) ||
+ (pg->flags & PG_FAKE))
+ pg->flags |= PG_RELEASED;
continue;
} else if (error == ENOMEM) {
if (pg->flags & PG_PAGEOUT) {
@@ -405,16 +442,46 @@
* still be set from the page's previous identity.
*/
- if (pg->flags & PG_FAKE) {
- KASSERT(!write);
- pg->flags &= ~PG_FAKE;
+ if (pg->flags & PG_FAKE && !overwrite) {
+ pg->flags &= ~(PG_FAKE);
+ pmap_clear_modify(pgs[i]);
+ }
+
+ if (i < ridx || i >= ridx + orignpages ||
+ (flags == UVMPAGER_ASYNCIO_START)) {
+ UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
+ pg, pg->offset,0,0);
+ if (pg->flags & PG_FAKE) {
+ KASSERT(!write);
+ KASSERT(overwrite);
+ KASSERT((pg->flags & PG_CLEAN) != 0);
+ if (flags != UVMPAGER_ASYNCIO_DONE)
+ uvm_pagezero(pg);
+ else {
+ pmap_clear_modify(pg);
#if defined(READAHEAD_STATS)
- pg->pqflags |= PQ_READAHEAD;
- uvm_ra_total.ev_count++;
+ pg->pqflags |= PQ_READAHEAD;
+ uvm_ra_total.ev_count++;
#endif /* defined(READAHEAD_STATS) */
- KASSERT((pg->flags & PG_CLEAN) != 0);
+ }
+ }
+
+ if (flags != UVMPAGER_ASYNCIO_DONE) {
+ pg->flags &= ~PG_BUSY;
+ }
+
+ if (pg->flags & PG_WANTED) {
+ wakeup(pg);
+ }
+
+ if ((pg->flags & PG_RELEASED) &&
+ (flags != UVMPAGER_ASYNCIO_DONE)) {
+ uvm_pagefree(pg);
+ continue;
+ }
uvm_pageenqueue(pg);
- pmap_clear_modify(pg);
+ pg->flags &= ~(PG_WANTED|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
}
/*
@@ -435,6 +502,17 @@
*/
if (swap) {
+ if (i < ridx || i >= ridx + orignpages
+ || (flags == UVMPAGER_ASYNCIO_START)) {
+ if ((pg->flags & PG_WANTED) &&
+ ((flags != UVMPAGER_ASYNCIO_DONE))) {
+ wakeup(pg);
+ }
+ }
+ if ((flags != UVMPAGER_ASYNCIO_DONE))
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ pg->flags &= ~PG_FAKE;
+
if (pg->uobject == NULL && pg->uanon->an_ref == 0 &&
(pg->flags & PG_RELEASED) != 0) {
uvm_unlock_pageq();
@@ -448,7 +526,8 @@
#endif /* defined(VMSWAP) */
}
if (!swap) {
- uvm_page_unbusy(pgs, npages);
+ if ((flags == UVMPAGER_ASYNCIO_DONE) || error)
+ uvm_page_unbusy(pgs, npages);
uvm_unlock_pageq();
simple_unlock(slock);
} else {
@@ -470,6 +549,40 @@
uvmexp.pdpending--;
#endif /* defined(VMSWAP) */
}
+
+ return error;
+}
+
+/*
+ * uvm_aio_aiodone: do iodone processing for async i/os.
+ * this should be called in thread context, not interrupt context.
+ */
+
+void
+uvm_aio_aiodone(struct buf *bp)
+{
+ int npages = bp->b_bufsize >> PAGE_SHIFT;
+ struct vm_page *pgs[npages];
+ int s, i, error;
+ boolean_t write;
+
+ UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
+
+ write = (bp->b_flags & B_READ) == 0;
+ /* XXXUBC B_NOCACHE is for swap pager, should be done differently */
+ if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
+ (*bioops.io_pageiodone)(bp);
+ }
+
+ for (i = 0; i < npages; i++) {
+ pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+ UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+ }
+ uvm_pagermapout((vaddr_t)bp->b_data, npages);
+
+ error = uvm_io_done(pgs, npages, UVMPAGER_ASYNCIO_DONE, FALSE,
+ 0, npages, bp);
s = splbio();
if (write && (bp->b_flags & B_AGE) != 0) {
vwakeup(bp);
Index: sys/uvm/uvm_pager.h
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.h,v
retrieving revision 1.34
diff -u -r1.34 uvm_pager.h
--- sys/uvm/uvm_pager.h 22 Feb 2006 22:28:18 -0000 1.34
+++ sys/uvm/uvm_pager.h 21 Mar 2007 11:33:36 -0000
@@ -176,12 +176,19 @@
struct vm_page *uvm_pageratop(vaddr_t);
vaddr_t uvm_pagermapin(struct vm_page **, int, int);
void uvm_pagermapout(vaddr_t, int);
+int uvm_io_done(struct vm_page **, int, int, boolean_t, int, int,
+ struct buf *);
/* Flags to uvm_pagermapin() */
#define UVMPAGER_MAPIN_WAITOK 0x01 /* it's okay to wait */
#define UVMPAGER_MAPIN_READ 0x02 /* device -> host */
#define UVMPAGER_MAPIN_WRITE 0x00 /* host -> device (pseudo flag) */
+/* Flags to uvm_io_done */
+#define UVMPAGER_SYNCIO_DONE 0x01 /* Synch i/o has completed */
+#define UVMPAGER_ASYNCIO_START 0x02 /* Asynch i/o started */
+#define UVMPAGER_ASYNCIO_DONE 0x03 /* Asynch i/o has completed */
+
/*
* XXX
* this is needed until the device strategy interface
Index: sys/miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.142
diff -u -r1.142 genfs_vnops.c
--- sys/miscfs/genfs/genfs_vnops.c 27 Dec 2006 12:10:09 -0000 1.142
+++ sys/miscfs/genfs/genfs_vnops.c 21 Mar 2007 11:33:36 -0000
@@ -453,6 +453,8 @@
panic("genfs_getpages: too many pages");
}
+ mbp = NULL;
+
startover:
error = 0;
origvsize = vp->v_size;
@@ -635,6 +637,7 @@
lockmgr(&gp->g_glock, LK_RELEASE, NULL);
UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
npages += ridx;
+ simple_unlock(&uobj->vmobjlock);
goto out;
}
@@ -652,6 +655,7 @@
pg1->flags &= ~(PG_RDONLY|PG_CLEAN);
}
npages += ridx;
+ simple_unlock(&uobj->vmobjlock);
goto out;
}
@@ -861,7 +865,6 @@
if (bp != NULL) {
error = biowait(mbp);
}
- putiobuf(mbp);
uvm_pagermapout(kva, npages);
/*
@@ -889,79 +892,30 @@
}
}
lockmgr(&gp->g_glock, LK_RELEASE, NULL);
- simple_lock(&uobj->vmobjlock);
-
- /*
- * we're almost done! release the pages...
- * for errors, we free the pages.
- * otherwise we activate them and mark them as valid and clean.
- * also, unbusy pages that were not actually requested.
- */
-
- if (error) {
- for (i = 0; i < npages; i++) {
- if (pgs[i] == NULL) {
- continue;
- }
- UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
- pgs[i], pgs[i]->flags, 0,0);
- if (pgs[i]->flags & PG_FAKE) {
- pgs[i]->flags |= PG_RELEASED;
- }
- }
- uvm_lock_pageq();
- uvm_page_unbusy(pgs, npages);
- uvm_unlock_pageq();
- simple_unlock(&uobj->vmobjlock);
- UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
- if (pgs != pgs_onstack)
- kmem_free(pgs, pgs_size);
- return (error);
- }
out:
- UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
- uvm_lock_pageq();
- for (i = 0; i < npages; i++) {
- pg = pgs[i];
- if (pg == NULL) {
- continue;
- }
- UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
- pg, pg->flags, 0,0);
- if (pg->flags & PG_FAKE && !overwrite) {
- pg->flags &= ~(PG_FAKE);
- pmap_clear_modify(pgs[i]);
- }
- KASSERT(!write || !blockalloc || (pg->flags & PG_RDONLY) == 0);
- if (i < ridx || i >= ridx + orignpages || async) {
- UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
- pg, pg->offset,0,0);
- if (pg->flags & PG_WANTED) {
- wakeup(pg);
- }
- if (pg->flags & PG_FAKE) {
- KASSERT(overwrite);
- uvm_pagezero(pg);
- }
- if (pg->flags & PG_RELEASED) {
- uvm_pagefree(pg);
- continue;
- }
- uvm_pageenqueue(pg);
- pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
- UVM_PAGE_OWN(pg, NULL);
- }
+ if (mbp == NULL) {
+ /*
+ * fake up a io buf to use with uvm_io_done iff the pages
+ * were already resident, just fill in the bits used.
+ */
+ mbp = getiobuf();
+ mbp->b_flags = B_READ| (async ? B_ASYNC : 0);
+ mbp->b_vp = vp;
}
- uvm_unlock_pageq();
- simple_unlock(&uobj->vmobjlock);
- if (ap->a_m != NULL) {
+
+ error = uvm_io_done(pgs, npages,
+ ((async)? UVMPAGER_ASYNCIO_START : UVMPAGER_SYNCIO_DONE),
+ overwrite, ridx, orignpages, mbp);
+
+ putiobuf(mbp);
+ if ((!error) && (ap->a_m != NULL)) {
memcpy(ap->a_m, &pgs[ridx],
- orignpages * sizeof(struct vm_page *));
+ orignpages * sizeof(struct vm_page *));
}
if (pgs != pgs_onstack)
kmem_free(pgs, pgs_size);
- return (0);
+ return (error);
}
/*
--tKW2IUtsqtDRztdT--