Subject: Re: poolifying fileassoc
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Brett Lymn <blymn@baesystems.com.au>
List: tech-kern
Date: 01/21/2007 20:37:51
--OgqxwSJOaUobr8KG
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Sun, Jan 07, 2007 at 10:07:29PM +0900, YAMAMOTO Takashi wrote:
>
> keeping pages locked (PG_BUSY) is enough for your check, isn't it?
>
Yes, it seems so.
>
> i don't think we need such a hack in this case.
>
OK, I have changed things around in uvm_io_done() so that the veriexec
check is done without locks held but before the pages are unlocked.
This seems to work ok for me with a kernel that has both LOCK_DEBUG
and DIAGNOSTIC enabled on a 2 processor machine (well, a 2 processor
qemu machine). Diff is attached (again, sorry for the horrible diff -
I cannot seem to get cvs to give a better/clearer one)
--
Brett Lymn
--OgqxwSJOaUobr8KG
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="uvm_pager.diff"
Index: uvm_pager.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.79
diff -u -r1.79 uvm_pager.c
--- uvm_pager.c 21 Dec 2006 15:55:26 -0000 1.79
+++ uvm_pager.c 21 Jan 2007 09:59:59 -0000
@@ -43,6 +43,7 @@
#include "opt_uvmhist.h"
#include "opt_readahead.h"
+#include "veriexec.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -52,6 +53,7 @@
#include <sys/vnode.h>
#include <uvm/uvm.h>
+#include <sys/verified_exec.h>
struct pool *uvm_aiobuf_pool;
@@ -279,65 +281,53 @@
}
/*
- * uvm_aio_aiodone: do iodone processing for async i/os.
- * this should be called in thread context, not interrupt context.
+ *
+ * uvm_io_done: Set page flag status when an i/o (synch or asynch) has
+ * completed or an async i/o is started. This function handles the
+ * following situations:
+ *
+ * - Synchronous i/o has completed
+ * - Asynchronous i/o has been initiated
+ * - Asynchronous i/o had completed
+ * - i/o is associated with swap
+ *
+ * All these cases what needs to be done to the page structures is very
+ * similar with some small variations depending on the calling situation.
+ *
+ * pgs - array of pages to handle
+ * npages - number of pages in request
+ * write - TRUE if a write request
+ * swap - TRUE if a swap request
+ * aio_complete - TRUE if called from aio completion
+ * overwrite - TRUE if overwriting pages
+ * ext_error - error status from caller
+ * ext_slock - ignored iff swap otherwise is uobj lock held
+ * swslot - swap slot (used iff swap == TRUE)
+ * ridx - extra pages to round to fs block size transaction
+ * (set to npages if no rounding done)
+ * orignpages - original transaction size before rounding.
+ * aio_initiate - TRUE if pages are being brought in asynchronously.
+ * vn - pointer to vnode for object.
+ *
+ * pageqlock and object locks must be held iff swap is FALSE. The
+ * locks will be released before return.
*/
-
-void
-uvm_aio_aiodone(struct buf *bp)
+int
+uvm_io_done(struct vm_page **pgs, int npages, boolean_t write, boolean_t swap,
+ boolean_t aio_complete, boolean_t overwrite, int ext_error,
+ struct simplelock *ext_slock, int swslot, int ridx,
+ int orignpages, boolean_t aio_initiate, struct vnode *vp)
{
- int npages = bp->b_bufsize >> PAGE_SHIFT;
- struct vm_page *pg, *pgs[npages];
- struct uvm_object *uobj;
+ struct vm_page *pg;
+ int i, error;
struct simplelock *slock;
- int s, i, error, swslot;
- boolean_t write, swap;
- UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
- UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
- error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
- write = (bp->b_flags & B_READ) == 0;
- /* XXXUBC B_NOCACHE is for swap pager, should be done differently */
- if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
- (*bioops.io_pageiodone)(bp);
- }
-
- uobj = NULL;
- for (i = 0; i < npages; i++) {
- pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
- UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
- }
- uvm_pagermapout((vaddr_t)bp->b_data, npages);
-
- swslot = 0;
- slock = NULL;
- pg = pgs[0];
- swap = (pg->uanon != NULL && pg->uobject == NULL) ||
- (pg->pqflags & PQ_AOBJ) != 0;
- if (!swap) {
- uobj = pg->uobject;
- slock = &uobj->vmobjlock;
- simple_lock(slock);
- uvm_lock_pageq();
- } else {
-#if defined(VMSWAP)
- if (error) {
- if (pg->uobject != NULL) {
- swslot = uao_find_swslot(pg->uobject,
- pg->offset >> PAGE_SHIFT);
- } else {
- KASSERT(pg->uanon != NULL);
- swslot = pg->uanon->an_swslot;
- }
- KASSERT(swslot);
- }
-#else /* defined(VMSWAP) */
- panic("%s: swap", __func__);
-#endif /* defined(VMSWAP) */
- }
+ error = ext_error;
+ slock = ext_slock;
for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL)
+ continue;
pg = pgs[i];
- KASSERT(swap || pg->uobject == uobj);
UVMHIST_LOG(ubchist, "pg %p", pg, 0,0,0);
#if defined(VMSWAP)
@@ -347,11 +337,10 @@
*/
if (swap) {
- if (pg->uobject != NULL) {
+ if (pg->uobject != NULL)
slock = &pg->uobject->vmobjlock;
- } else {
+ else
slock = &pg->uanon->an_lock;
- }
simple_lock(slock);
uvm_lock_pageq();
}
@@ -369,7 +358,9 @@
if (error) {
int slot;
if (!write) {
- pg->flags |= PG_RELEASED;
+ if (aio_complete ||
+ (pg->flags & PG_FAKE))
+ pg->flags |= PG_RELEASED;
continue;
} else if (error == ENOMEM) {
if (pg->flags & PG_PAGEOUT) {
@@ -405,16 +396,34 @@
* still be set from the page's previous identity.
*/
- if (pg->flags & PG_FAKE) {
- KASSERT(!write);
- pg->flags &= ~PG_FAKE;
+ if (pg->flags & PG_FAKE && !overwrite) {
+ pg->flags &= ~(PG_FAKE);
+ pmap_clear_modify(pgs[i]);
+ }
+
+ if (i < ridx || i >= ridx + orignpages || aio_initiate) {
+ UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
+ pg, pg->offset,0,0);
+ if (pg->flags & PG_FAKE) {
+ KASSERT(!write);
+ KASSERT(overwrite);
+ KASSERT((pg->flags & PG_CLEAN) != 0);
+ if (!aio_complete)
+ uvm_pagezero(pg);
+ else {
+ pmap_clear_modify(pg);
#if defined(READAHEAD_STATS)
- pg->pqflags |= PQ_READAHEAD;
- uvm_ra_total.ev_count++;
+ pg->pqflags |= PQ_READAHEAD;
+ uvm_ra_total.ev_count++;
#endif /* defined(READAHEAD_STATS) */
- KASSERT((pg->flags & PG_CLEAN) != 0);
+ }
+ }
+ if ((pg->flags & PG_RELEASED) && (!aio_complete)) {
+ uvm_pagefree(pg);
+ continue;
+ }
uvm_pageenqueue(pg);
- pmap_clear_modify(pg);
+ UVM_PAGE_OWN(pg, NULL);
}
/*
@@ -435,6 +444,17 @@
*/
if (swap) {
+ if (i < ridx || i >= ridx + orignpages
+ || aio_initiate) {
+ if ((pg->flags & PG_WANTED) &&
+ (!aio_complete)) {
+ wakeup(pg);
+ }
+ }
+ if (!aio_complete)
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ pg->flags &= ~PG_FAKE;
+
if (pg->uobject == NULL && pg->uanon->an_ref == 0 &&
(pg->flags & PG_RELEASED) != 0) {
uvm_unlock_pageq();
@@ -448,7 +468,40 @@
#endif /* defined(VMSWAP) */
}
if (!swap) {
- uvm_page_unbusy(pgs, npages);
+#if NVERIEXEC > 0
+ if (!error && !write && !aio_initiate) {
+ uvm_unlock_pageq();
+ simple_unlock(slock);
+ error = veriexec_block_verify(vp, pgs, npages);
+ simple_lock(slock);
+ uvm_lock_pageq();
+
+ if (error) {
+ for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL)
+ continue;
+ pgs[i]->flags |= PG_RELEASED;
+ }
+ }
+ }
+#endif
+ if (!aio_complete && !error) {
+ for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL)
+ continue;
+ pg = pgs[i];
+ if (i < ridx || i >= ridx + orignpages
+ || aio_initiate) {
+ if (pg->flags & PG_WANTED)
+ wakeup(pg);
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ pg->flags &= ~PG_FAKE;
+ }
+ }
+ }
+
+ if (aio_complete || error)
+ uvm_page_unbusy(pgs, npages);
uvm_unlock_pageq();
simple_unlock(slock);
} else {
@@ -470,6 +523,70 @@
uvmexp.pdpending--;
#endif /* defined(VMSWAP) */
}
+
+ return error;
+}
+
+/*
+ * uvm_aio_aiodone: do iodone processing for async i/os.
+ * this should be called in thread context, not interrupt context.
+ */
+
+void
+uvm_aio_aiodone(struct buf *bp)
+{
+ int npages = bp->b_bufsize >> PAGE_SHIFT;
+ struct vm_page *pg, *pgs[npages];
+ struct uvm_object *uobj;
+ struct simplelock *slock;
+ int s, i, error, swslot;
+ boolean_t write, swap;
+ UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
+
+ error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
+ write = (bp->b_flags & B_READ) == 0;
+ /* XXXUBC B_NOCACHE is for swap pager, should be done differently */
+ if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
+ (*bioops.io_pageiodone)(bp);
+ }
+
+ uobj = NULL;
+ for (i = 0; i < npages; i++) {
+ pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+ UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+ }
+ uvm_pagermapout((vaddr_t)bp->b_data, npages);
+
+ swslot = 0;
+ slock = NULL;
+ pg = pgs[0];
+ swap = (pg->uanon != NULL && pg->uobject == NULL) ||
+ (pg->pqflags & PQ_AOBJ) != 0;
+ if (!swap) {
+ uobj = pg->uobject;
+ slock = &uobj->vmobjlock;
+ simple_lock(slock);
+ uvm_lock_pageq();
+ } else {
+#if defined(VMSWAP)
+ if (error) {
+ if (pg->uobject != NULL) {
+ swslot = uao_find_swslot(pg->uobject,
+ pg->offset >> PAGE_SHIFT);
+ } else {
+ KASSERT(pg->uanon != NULL);
+ swslot = pg->uanon->an_swslot;
+ }
+ KASSERT(swslot);
+ }
+#else /* defined(VMSWAP) */
+ panic("%s: swap", __func__);
+#endif /* defined(VMSWAP) */
+ }
+
+ error = uvm_io_done(pgs, npages, write, swap, TRUE, FALSE, error,
+ slock, swslot, npages, npages, FALSE, bp->b_vp);
s = splbio();
if (write && (bp->b_flags & B_AGE) != 0) {
vwakeup(bp);
--OgqxwSJOaUobr8KG--