Subject: Re: ffs with UBC rewrite performance improvement
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Isaku Yamahata <yamahata@private.email.ne.jp>
List: tech-kern
Date: 05/23/2003 00:37:12
--xHFwDpU9dbj6ez1V
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: 7bit
Hello
> because ufs_balloc_range_with_pages keeps pages busy,
> it can deadlock if some of them are mapped to the userspace and
> a page fault occurs on one of them in uiomove.
I solved this deadlock by wiring down the usespace beforehand.
I attach a new patch that is for NetBSD 1.6.1.
Almost all part of this patch is same.
Does this seem ok? or do I miss anything else?
--
Isaku Yamahata <yamahata@private.email.ne.jp>
--xHFwDpU9dbj6ez1V
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="ubc_1.diff"
Content-Transfer-Encoding: 7bit
Index: ufs/ufs/ufs_extern.h
===================================================================
RCS file: /usr/home/cvsroot/NetBSD/1.6/usr/src/sys/ufs/ufs/ufs_extern.h,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.8.1
diff -u -r1.1.1.1 -r1.1.1.1.8.1
--- ufs/ufs/ufs_extern.h 7 Nov 2002 04:54:38 -0000 1.1.1.1
+++ ufs/ufs/ufs_extern.h 22 May 2003 11:43:27 -0000 1.1.1.1.8.1
@@ -115,6 +115,7 @@
/* ufs_inode.c */
int ufs_reclaim __P((struct vnode *, struct proc *));
int ufs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, int));
+int ufs_balloc_range_with_pages(struct vnode *vp, off_t off, off_t len, struct ucred *cred, int flags, struct vm_page** pgs, int* npages);
/* ufs_lookup.c */
void ufs_dirbad __P((struct inode *, doff_t, char *));
Index: ufs/ufs/ufs_inode.c
===================================================================
RCS file: /usr/home/cvsroot/NetBSD/1.6/usr/src/sys/ufs/ufs/ufs_inode.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.8.1
diff -u -r1.1.1.1 -r1.1.1.1.8.1
--- ufs/ufs/ufs_inode.c 7 Nov 2002 04:54:38 -0000 1.1.1.1
+++ ufs/ufs/ufs_inode.c 22 May 2003 11:43:27 -0000 1.1.1.1.8.1
@@ -172,21 +172,28 @@
* accessible to others.
*/
-int
-ufs_balloc_range(vp, off, len, cred, flags)
+// pgs array of struct vm_page* must be longer than
+// MAX(block size >> PAGE_SHIT, 1)
+static int ufs_balloc_range_with_pages_internal(struct vnode *vp, off_t off, off_t len, struct ucred *cred, int flags, struct vm_page** pgs, int* npages, boolean_t partial_unbusy);
+
+
+static int
+ufs_balloc_range_with_pages_internal(vp, off, len, cred, flags, pgs, npages, partial_unbusy)
struct vnode *vp;
off_t off, len;
struct ucred *cred;
int flags;
+ struct vm_page** pgs;
+ int* npages;
+ boolean_t partial_unbusy;
{
off_t oldeof, neweof, oldeob, neweob, pagestart;
struct uvm_object *uobj;
struct genfs_node *gp = VTOG(vp);
- int i, delta, error, npages;
- int bshift = vp->v_mount->mnt_fs_bshift;
- int bsize = 1 << bshift;
- int ppb = MAX(bsize >> PAGE_SHIFT, 1);
- struct vm_page *pgs[ppb];
+ int i, delta, error;
+ const int bshift = vp->v_mount->mnt_fs_bshift;
+ const int bsize = 1 << bshift;
+ const int ppb = MAX(bsize >> PAGE_SHIFT, 1);
UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
vp, off, len, vp->v_size);
@@ -209,17 +216,29 @@
*/
pagestart = trunc_page(off) & ~(bsize - 1);
- npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT);
- memset(pgs, 0, npages * sizeof(struct vm_page *));
+ *npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT);
+ memset(pgs, 0, (*npages) * sizeof(struct vm_page *));
+#if 0
+ printf("off = 0x%llx, len = 0x%llx, pagestart = 0x%llx, *npages = %d, "
+ "bsize = %d, 0x%llx 0x%llx 0x%llx 0x%llx\n",
+ off, len, pagestart, *npages, bsize,
+ oldeof, oldeob, neweof, neweob);
+#endif
simple_lock(&uobj->vmobjlock);
- error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
- VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
+ error = VOP_GETPAGES(vp, pagestart, pgs, npages, 0,
+ VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF | (flags & PGO_OVERWRITE));
if (error) {
return error;
}
simple_lock(&uobj->vmobjlock);
+#if 0
+ printf("off = 0x%llx, len = 0x%llx, pagestart = 0x%llx, *npages = %d, "
+ "bsize = %d, 0x%llx 0x%llx 0x%llx 0x%llx\n",
+ off, len, pagestart, *npages, bsize,
+ oldeof, oldeob, neweof, neweob);
+#endif
uvm_lock_pageq();
- for (i = 0; i < npages; i++) {
+ for (i = 0; i < *npages; i++) {
UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
pgs[i]->flags &= ~PG_CLEAN;
@@ -250,7 +269,7 @@
*/
simple_lock(&uobj->vmobjlock);
- for (i = 0; i < npages; i++) {
+ for (i = 0; i < *npages; i++) {
pgs[i]->flags &= ~PG_RDONLY;
if (error) {
pgs[i]->flags |= PG_RELEASED;
@@ -258,11 +277,59 @@
}
if (error) {
uvm_lock_pageq();
- uvm_page_unbusy(pgs, npages);
+ uvm_page_unbusy(pgs, *npages);
uvm_unlock_pageq();
} else {
- uvm_page_unbusy(pgs, npages);
+ if (partial_unbusy) {
+ int start_npages = trunc_page(delta) >> PAGE_SHIFT;
+ int end_npages = round_page(len) >> PAGE_SHIFT;
+ if (start_npages > 0){
+ assert(!(flags & PGO_OVERWRITE));
+ uvm_page_unbusy(pgs, start_npages);
+ }
+ if (end_npages < *npages) {
+ assert(!(flags & PGO_OVERWRITE));
+ uvm_page_unbusy(pgs + end_npages,
+ *npages - end_npages);
+ }
+ *npages = end_npages - start_npages;
+ assert(*npages > 0);
+ memmove(pgs, pgs + start_npages,
+ sizeof(pgs[0]) * (*npages));
+ } else {
+ uvm_page_unbusy(pgs, *npages);
+ }
}
simple_unlock(&uobj->vmobjlock);
return error;
+}
+
+int
+ufs_balloc_range_with_pages(vp, off, len, cred, flags, pgs, npages)
+ struct vnode *vp;
+ off_t off, len;
+ struct ucred *cred;
+ int flags;
+ struct vm_page** pgs;
+ int* npages;
+{
+ return ufs_balloc_range_with_pages_internal(vp, off, len ,cred, flags, pgs, npages, TRUE);
+}
+
+
+
+int
+ufs_balloc_range(vp, off, len, cred, flags)
+ struct vnode *vp;
+ off_t off, len;
+ struct ucred *cred;
+ int flags;
+{
+ const int bshift = vp->v_mount->mnt_fs_bshift;
+ const int bsize = 1 << bshift;
+ const int ppb = MAX(bsize >> PAGE_SHIFT, 1);
+ struct vm_page *pgs[ppb];
+ int npages;
+
+ return ufs_balloc_range_with_pages_internal(vp, off, len ,cred, flags, pgs, &npages, FALSE);
}
Index: ufs/ufs/ufs_readwrite.c
===================================================================
RCS file: /usr/home/cvsroot/NetBSD/1.6/usr/src/sys/ufs/ufs/ufs_readwrite.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 ufs_readwrite.c
--- ufs/ufs/ufs_readwrite.c 7 May 2003 09:04:01 -0000 1.1.1.2
+++ ufs/ufs/ufs_readwrite.c 22 May 2003 14:12:57 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ufs_readwrite.c,v 1.42.4.1 2002/10/21 01:54:27 lukem Exp $ */
+/* $NetBSD: ufs_readwrite.c,v 1.42 2002/03/25 02:23:56 chs Exp $ */
/*-
* Copyright (c) 1993
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.42.4.1 2002/10/21 01:54:27 lukem Exp $");
+__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.42 2002/03/25 02:23:56 chs Exp $");
#ifdef LFS_READWRITE
#define BLKSIZE(a, b, c) blksize(a, b, c)
@@ -115,14 +115,17 @@
#endif
if (usepc) {
while (uio->uio_resid > 0) {
+ void* cookie;
+
bytelen = MIN(ip->i_ffs_size - uio->uio_offset,
uio->uio_resid);
if (bytelen == 0)
break;
-
- win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
- &bytelen, UBC_READ);
- error = uiomove(win, bytelen, uio);
+ win = ubc_alloc_with_cookie(&vp->v_uobj, uio->uio_offset, &bytelen, UBC_READ, &cookie);
+ error = ubc_pages_mapin(cookie, uio->uio_offset);
+ if (error == 0) {
+ error = uiomove(win, bytelen, uio);
+ }
ubc_release(win, 0);
if (error)
break;
@@ -211,6 +214,10 @@
boolean_t async;
boolean_t usepc = FALSE;
+ const int bshift = ap->a_vp->v_mount->mnt_fs_bshift;
+ const int ppb = MAX((1 << bshift) >> PAGE_SHIFT, 1);
+ struct vm_page *pgs[ppb];
+
cred = ap->a_cred;
ioflag = ap->a_ioflag;
uio = ap->a_uio;
@@ -312,10 +319,31 @@
boolean_t extending; /* if we're extending a whole block */
off_t newoff;
+ int owrite_flag;
+ int npages;
+ int ubc_npages;
+ void* cookie;
+
+ caddr_t addr;
+ size_t len;
+
oldoff = uio->uio_offset;
blkoffset = blkoff(fs, uio->uio_offset);
- bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
-
+ while (uio->uio_iov[0].iov_len == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ }
+ bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_iov[0].iov_len);
+ /*
+ * to avoid dead lock.
+ */
+ addr = uio->uio_iov[0].iov_base;
+ len = bytelen;
+ error = uvm_vslock(p, addr, len, VM_PROT_READ);
+ if (error) {
+ break;
+ }
+
/*
* if we're filling in a hole, allocate the blocks now and
* initialize the pages first. if we're extending the file,
@@ -325,58 +353,68 @@
*/
extending = uio->uio_offset >= preallocoff &&
uio->uio_offset < endallocoff;
-
- if (!extending) {
- error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
- cred, aflag);
- if (error) {
- break;
- }
- ubc_alloc_flags &= ~UBC_FAULTBUSY;
+ if (blkoffset > 0 || bytelen < fs->fs_bsize) {
+ owrite_flag = 0;
} else {
- lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
- error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
- aflag, cred);
- lockmgr(&gp->g_glock, LK_RELEASE, NULL);
- if (error) {
- break;
- }
- ubc_alloc_flags |= UBC_FAULTBUSY;
+ owrite_flag = PGO_OVERWRITE;
}
-
- /*
- * copy the data.
- */
-
- win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
- ubc_alloc_flags);
- error = uiomove(win, bytelen, uio);
- if (error && extending) {
- /*
- * if we haven't initialized the pages yet,
- * do it now. it's safe to use memset here
- * because we just mapped the pages above.
- */
- memset(win, 0, bytelen);
+
+ cookie = ubc_reserve(&vp->v_uobj, ubc_alloc_flags);
+ error = ufs_balloc_range_with_pages(vp, uio->uio_offset, bytelen, cred, aflag | owrite_flag, pgs, &npages);
+ if (error) {
+ uvm_vsunlock(p, addr, len);
+ ubc_release_with_cookie(cookie);
+ break;
}
- ubc_release(win, 0);
/*
- * update UVM's notion of the size now that we've
- * copied the data into the vnode's pages.
- *
- * we should update the size even when uiomove failed.
- * otherwise ffs_truncate can't flush soft update states.
+ * copy the data.
*/
-
- newoff = oldoff + bytelen;
- if (vp->v_size < newoff) {
- uvm_vnp_setsize(vp, newoff);
+ ubc_npages = 0;
+ while (bytelen > 0) {
+ int alloc_npages;
+ vsize_t ubc_bytelen = bytelen;
+ off_t ubc_oldoff = uio->uio_offset;
+
+ win = ubc_pages_enter(cookie, uio->uio_offset, &ubc_bytelen, pgs + ubc_npages, npages - ubc_npages, &alloc_npages);
+
+ assert(ubc_bytelen > 0);
+ assert(((round_page(uio->uio_offset + ubc_bytelen) - trunc_page(uio->uio_offset)) >> PAGE_SHIFT) == alloc_npages);
+ assert(alloc_npages == npages ||
+ round_page(uio->uio_offset + ubc_bytelen) ==
+ uio->uio_offset + ubc_bytelen);
+
+ error = uiomove(win, ubc_bytelen, uio);
+ if (error & extending) {
+ /*
+ * if we haven't initialized the pages yet,
+ * do it now. it's safe to use memset here
+ * because we just mapped the pages above.
+ */
+ memset(win, 0, ubc_bytelen);
+ }
+ ubc_pages_remove(cookie, pgs + ubc_npages, alloc_npages);
+
+ ubc_npages += alloc_npages;
+ bytelen -= ubc_bytelen;
+
+ newoff = ubc_oldoff + ubc_bytelen;
+ if (vp->v_size < newoff) {
+ uvm_vnp_setsize(vp, newoff);
+ }
+ if (error) {
+ uvm_page_unbusy(pgs + ubc_npages,
+ npages - ubc_npages);
+ break;
+ }
}
-
+ ubc_release_with_cookie(cookie);
+ uvm_vsunlock(p, addr, len);
+
if (error) {
break;
}
+ assert(npages == ubc_npages);
/*
* flush what we just wrote if necessary.
Index: uvm/uvm_bio.c
===================================================================
RCS file: /usr/home/cvsroot/NetBSD/1.6/usr/src/sys/uvm/uvm_bio.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.8.1
diff -u -r1.1.1.1 -r1.1.1.1.8.1
--- uvm/uvm_bio.c 7 Nov 2002 04:54:39 -0000 1.1.1.1
+++ uvm/uvm_bio.c 22 May 2003 11:43:27 -0000 1.1.1.1.8.1
@@ -71,11 +71,19 @@
(&ubc_object.inactive[(((u_long)(offset)) >> ubc_winshift) & \
(UBC_NQUEUES - 1)])
+#define UBC_QUEUE_WITH_INDEX(i) (&ubc_object.inactive[(i) & (UBC_NQUEUES - 1)])
+#define UBC_QUEUE_WITH_UMAP(u) UBC_QUEUE_WITH_INDEX((u) - ubc_object.umap)
+
+
+
#define UBC_UMAP_ADDR(u) \
(vaddr_t)(ubc_object.kva + (((u) - ubc_object.umap) << ubc_winshift))
#define UMAP_PAGES_LOCKED 0x0001
+#define UMAP_PAGES_MAPIN 0x1000
+#define UMAP_PAGES_RESERVED 0x2000
+#define UMAP_PAGES_ENTERED 0x4000
#define UMAP_MAPPING_CACHED 0x0002
struct ubc_map
@@ -102,7 +110,6 @@
TAILQ_HEAD(ubc_inactive_head, ubc_map) *inactive;
/* inactive queues for ubc_map's */
-
} ubc_object;
struct uvm_pagerops ubc_pager =
@@ -190,8 +197,7 @@
}
for (i = 0; i < ubc_nwins; i++) {
umap = &ubc_object.umap[i];
- TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
- umap, inactive);
+ TAILQ_INSERT_TAIL(UBC_QUEUE_WITH_INDEX(i), umap, inactive);
}
ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, M_TEMP, M_NOWAIT,
@@ -257,6 +263,8 @@
uobj = umap->uobj;
vp = (struct vnode *)uobj;
KASSERT(vp != NULL);
+ assert(!(umap->flags & UMAP_PAGES_RESERVED));
+ assert(!(umap->flags & UMAP_PAGES_MAPIN));
npages = MIN(ubc_winsize - slot_offset,
(round_page(MAX(vp->v_size, umap->offset +
@@ -347,7 +355,8 @@
struct ubc_map *umap;
LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
- if (umap->uobj == uobj && umap->offset == offset) {
+ if (umap->uobj == uobj && umap->offset == offset &&
+ !(umap->flags & UMAP_PAGES_RESERVED)) {
return umap;
}
}
@@ -359,30 +368,21 @@
* ubc interface functions
*/
-/*
- * ubc_alloc: allocate a file mapping window
- */
-
-void *
-ubc_alloc(uobj, offset, lenp, flags)
- struct uvm_object *uobj;
- voff_t offset;
- vsize_t *lenp;
- int flags;
+static vaddr_t
+ubc_alloc_internal(struct uvm_object* uobj, voff_t offset, vsize_t *lenp,
+ int flags,
+ struct ubc_map** umap, vaddr_t* slot_offset)
{
- struct vnode *vp = (struct vnode *)uobj;
- vaddr_t slot_offset, va;
- struct ubc_map *umap;
+ vaddr_t va;
voff_t umap_offset;
- int error;
- UVMHIST_FUNC("ubc_alloc"); UVMHIST_CALLED(ubchist);
+ UVMHIST_FUNC(__FUNCTION__); UVMHIST_CALLED(ubchist);
UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx filesize 0x%x",
- uobj, offset, *lenp, vp->v_size);
+ uobj, offset, *lenp, ((struct vnode*)uobj)->v_size);
umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
- slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
- *lenp = MIN(*lenp, ubc_winsize - slot_offset);
+ *slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
+ *lenp = MIN(*lenp, ubc_winsize - *slot_offset);
/*
* the vnode is always locked here, so we don't need to add a ref.
@@ -390,10 +390,10 @@
again:
simple_lock(&ubc_object.uobj.vmobjlock);
- umap = ubc_find_mapping(uobj, umap_offset);
- if (umap == NULL) {
- umap = TAILQ_FIRST(UBC_QUEUE(offset));
- if (umap == NULL) {
+ (*umap) = ubc_find_mapping(uobj, umap_offset);
+ if ((*umap) == NULL) {
+ (*umap) = TAILQ_FIRST(UBC_QUEUE(offset));
+ if ((*umap) == NULL) {
simple_unlock(&ubc_object.uobj.vmobjlock);
tsleep(&lbolt, PVM, "ubc_alloc", 0);
goto again;
@@ -403,44 +403,74 @@
* remove from old hash (if any), add to new hash.
*/
- if (umap->uobj != NULL) {
- LIST_REMOVE(umap, hash);
+ if ((*umap)->uobj != NULL) {
+ LIST_REMOVE(*umap, hash);
}
- umap->uobj = uobj;
- umap->offset = umap_offset;
+ (*umap)->uobj = uobj;
+ (*umap)->offset = umap_offset;
LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
- umap, hash);
- va = UBC_UMAP_ADDR(umap);
- if (umap->flags & UMAP_MAPPING_CACHED) {
- umap->flags &= ~UMAP_MAPPING_CACHED;
+ *umap, hash);
+ va = UBC_UMAP_ADDR(*umap);
+ if ((*umap)->flags & UMAP_MAPPING_CACHED) {
+ (*umap)->flags &= ~UMAP_MAPPING_CACHED;
pmap_remove(pmap_kernel(), va, va + ubc_winsize);
pmap_update(pmap_kernel());
}
} else {
- va = UBC_UMAP_ADDR(umap);
+ va = UBC_UMAP_ADDR(*umap);
}
- if (umap->refcount == 0) {
- TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
+ if ((*umap)->refcount == 0) {
+ TAILQ_REMOVE(UBC_QUEUE_WITH_UMAP(*umap), *umap, inactive);
}
#ifdef DIAGNOSTIC
- if ((flags & UBC_WRITE) && (umap->writeoff || umap->writelen)) {
+ if ((flags & UBC_WRITE) && ((*umap)->writeoff || (*umap)->writelen)) {
panic("ubc_fault: concurrent writes vp %p", uobj);
}
#endif
if (flags & UBC_WRITE) {
- umap->writeoff = slot_offset;
- umap->writelen = *lenp;
+ (*umap)->writeoff = *slot_offset;
+ (*umap)->writelen = *lenp;
}
- umap->refcount++;
+ (*umap)->refcount++;
simple_unlock(&ubc_object.uobj.vmobjlock);
UVMHIST_LOG(ubchist, "umap %p refs %d va %p flags 0x%x",
- umap, umap->refcount, va, flags);
+ *umap, (*umap)->refcount, va, flags);
+ assert(!((*umap)->flags & UMAP_PAGES_RESERVED));
+ assert(!((*umap)->flags & UMAP_PAGES_ENTERED));
+
+ return va;
+}
+
+
+/*
+ * ubc_alloc: allocate a file mapping window
+ */
+
+void *
+ubc_alloc_with_cookie(uobj, offset, lenp, flags, cookie)
+ struct uvm_object *uobj;
+ voff_t offset;
+ vsize_t *lenp;
+ int flags;
+ void** cookie;
+{
+ struct vnode *vp = (struct vnode *)uobj;
+ vaddr_t slot_offset, va;
+ struct ubc_map *umap;
+ int error;
+ UVMHIST_FUNC(__FUNCTION__); UVMHIST_CALLED(ubchist);
+
+ UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx filesize 0x%x",
+ uobj, offset, *lenp, vp->v_size);
+
+ va = ubc_alloc_internal(uobj, offset, lenp, flags, &umap,
+ &slot_offset);
if (flags & UBC_FAULTBUSY) {
- int npages = (*lenp + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ int npages = (int)((round_page(*lenp + offset) - trunc_page(offset)) >> PAGE_SHIFT);
struct vm_page *pgs[npages];
int gpflags = PGO_SYNCIO|PGO_OVERWRITE|PGO_PASTEOF;
int i;
@@ -467,31 +497,62 @@
}
out:
+ if (cookie != NULL) {
+ *cookie = umap;
+ }
return (void *)(va + slot_offset);
}
-/*
- * ubc_release: free a file mapping window.
- */
-
-void
-ubc_release(va, flags)
- void *va;
+void *
+ubc_alloc(uobj, offset, lenp, flags)
+ struct uvm_object *uobj;
+ voff_t offset;
+ vsize_t *lenp;
int flags;
{
- struct ubc_map *umap;
- struct uvm_object *uobj;
- vaddr_t umapva;
- boolean_t unmapped;
- UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
-
- UVMHIST_LOG(ubchist, "va %p", va, 0, 0, 0);
- umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
- umapva = UBC_UMAP_ADDR(umap);
- uobj = umap->uobj;
- KASSERT(uobj != NULL);
+ return ubc_alloc_with_cookie(uobj, offset, lenp, flags, NULL);
+}
- if (umap->flags & UMAP_PAGES_LOCKED) {
+static void
+ubc_release_umap(struct ubc_map* umap)
+{
+ boolean_t unmapped;
+ struct uvm_object *uobj = umap->uobj;
+ const vaddr_t umapva = UBC_UMAP_ADDR(umap);
+ UVMHIST_FUNC(__FUNCTION__); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "umap %p offset 0x%lx refcount %d flags 0x%x",
+ umap, umap->offset, umap->refcount, umap->flags);
+
+ if (umap->flags & UMAP_PAGES_RESERVED) {
+ assert(umap->refcount == 1);
+ assert(!(umap->flags & UMAP_PAGES_ENTERED));
+ unmapped = TRUE;
+ umap->flags &= ~UMAP_PAGES_RESERVED;
+ } else if (umap->flags & UMAP_PAGES_MAPIN) {
+ int slot_offset = umap->writeoff;
+ int npages = (int)(round_page(umap->writeoff + umap->writelen)
+ - trunc_page(umap->writeoff)) >> PAGE_SHIFT;
+ struct vm_page *pgs[npages];
+ paddr_t pa;
+ int i;
+ boolean_t rv;
+
+ umap->flags &= ~UMAP_PAGES_MAPIN;
+ uvm_lock_pageq();
+ for (i = 0; i < npages; i++) {
+ rv = pmap_extract(pmap_kernel(),
+ umapva + slot_offset + (i << PAGE_SHIFT), &pa);
+ KASSERT(rv);
+ pgs[i] = PHYS_TO_VM_PAGE(pa);
+ pgs[i]->flags &= ~(PG_FAKE|PG_CLEAN);
+ uvm_pageactivate(pgs[i]);
+ }
+ uvm_unlock_pageq();
+ pmap_kremove(umapva, ubc_winsize);
+ pmap_update(pmap_kernel());
+ uvm_page_unbusy(pgs, npages);
+ unmapped = TRUE;
+ } else if (umap->flags & UMAP_PAGES_LOCKED) {
int slot_offset = umap->writeoff;
int endoff = umap->writeoff + umap->writelen;
int zerolen = round_page(endoff) - endoff;
@@ -524,6 +585,8 @@
unmapped = FALSE;
}
+ assert(!(umap->flags & UMAP_PAGES_RESERVED));
+ assert(!(umap->flags & UMAP_PAGES_ENTERED));
simple_lock(&ubc_object.uobj.vmobjlock);
umap->writeoff = 0;
umap->writelen = 0;
@@ -549,14 +612,18 @@
pmap_update(pmap_kernel());
LIST_REMOVE(umap, hash);
umap->uobj = NULL;
- TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
- inactive);
+ TAILQ_INSERT_HEAD(UBC_QUEUE_WITH_UMAP(umap), umap,
+ inactive);
+
} else {
if (!unmapped) {
umap->flags |= UMAP_MAPPING_CACHED;
+ TAILQ_INSERT_TAIL(UBC_QUEUE_WITH_UMAP(umap),
+ umap, inactive);
+ } else {
+ TAILQ_INSERT_HEAD(UBC_QUEUE_WITH_UMAP(umap),
+ umap, inactive);
}
- TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
- inactive);
}
}
UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount, 0, 0);
@@ -565,6 +632,35 @@
/*
+ * ubc_release: free a file mapping window.
+ */
+
+void
+ubc_release(va, flags)
+ void *va;
+ int flags;
+{
+ struct ubc_map *umap;
+ struct uvm_object *uobj;
+ vaddr_t umapva;
+ UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
+
+ UVMHIST_LOG(ubchist, "va %p", va, 0, 0, 0);
+ umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
+ umapva = UBC_UMAP_ADDR(umap);
+ uobj = umap->uobj;
+ KASSERT(uobj != NULL);
+
+ ubc_release_umap(umap);
+}
+
+void
+ubc_release_with_cookie(void* cookie)
+{
+ ubc_release_umap((struct ubc_map*)cookie);
+}
+
+/*
* removing a range of mappings from the ubc mapping cache.
*/
@@ -602,9 +698,244 @@
LIST_REMOVE(umap, hash);
umap->uobj = NULL;
- TAILQ_REMOVE(UBC_QUEUE(umap->offset), umap, inactive);
- TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive);
+ TAILQ_REMOVE(UBC_QUEUE_WITH_UMAP(umap), umap, inactive);
+ TAILQ_INSERT_HEAD(UBC_QUEUE_WITH_UMAP(umap), umap, inactive);
+ }
+ pmap_update(pmap_kernel());
+ simple_unlock(&ubc_object.uobj.vmobjlock);
+}
+
+/*
+ * for read
+ */
+int
+ubc_pages_mapin(void* cookie, voff_t offset)
+{
+ struct uvm_object *uobj;
+ struct vnode *vp;
+ struct ubc_map* const umap = (struct ubc_map*)cookie;
+ vaddr_t va, eva, slot_offset;
+ int i, error, npages;
+ struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT], *pg;
+ vm_prot_t prot;
+ vm_prot_t access_type = VM_PROT_READ;
+ int flags;
+ UVMHIST_FUNC(__FUNCTION__); UVMHIST_CALLED(ubchist);
+
+ KASSERT(umap->refcount != 0);
+ slot_offset = offset & (ubc_winsize - 1);
+
+ /* no umap locking needed since we have a ref on the umap */
+ //printf("mapin:umap = %p, cookie = %p\n", umap, cookie);
+ uobj = umap->uobj;
+ vp = (struct vnode *)uobj;
+ KASSERT(vp != NULL);
+ assert(!(umap->flags & UMAP_PAGES_MAPIN));
+
+ npages = MIN(ubc_winsize - slot_offset,
+ (round_page(MAX(vp->v_size, umap->offset +
+ umap->writeoff + umap->writelen)) -
+ umap->offset)) >> PAGE_SHIFT;
+
+again:
+ memset(pgs, 0, sizeof (pgs));
+ simple_lock(&uobj->vmobjlock);
+
+ UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x "
+ "v_size 0x%x", slot_offset, umap->writeoff, umap->writelen,
+ vp->v_size);
+ UVMHIST_LOG(ubchist, "getpages vp %p offset 0x%x npages %d",
+ uobj, umap->offset + slot_offset, npages, 0);
+
+ flags = PGO_SYNCIO;
+ error = VOP_GETPAGES(vp, umap->offset + slot_offset, pgs, &npages, 0,
+ access_type, 0, flags);
+ UVMHIST_LOG(ubchist, "getpages error %d npages %d", error, npages, 0,
+ 0);
+
+ if (error == EAGAIN) {
+ tsleep(&lbolt, PVM, "ubc_fault", 0);
+ goto again;
}
+ if (error) {
+ return error;
+ }
+
+ va = trunc_page(UBC_UMAP_ADDR(umap) + slot_offset);
+ eva = va + (npages << PAGE_SHIFT);
+
+ /*
+ * for virtually-indexed, virtually-tagged caches we should avoid
+ * creating writable mappings when we don't absolutely need them,
+ * since the "compatible alias" trick doesn't work on such caches.
+ * otherwise, we can always map the pages writable.
+ */
+
+#ifdef PMAP_CACHE_VIVT
+ prot = VM_PROT_READ | access_type;
+#else
+ prot = VM_PROT_READ | VM_PROT_WRITE;
+#endif
+ UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx", va, eva, 0, 0);
+ simple_lock(&uobj->vmobjlock);
+ uvm_lock_pageq();
+ for (i = 0; va < eva; i++, va += PAGE_SIZE) {
+ UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i], 0, 0);
+ pg = pgs[i];
+
+ if (pg == NULL || pg == PGO_DONTCARE) {
+ continue;
+ }
+ if (pg->flags & PG_WANTED) {
+ wakeup(pg);
+ }
+ KASSERT((pg->flags & PG_FAKE) == 0);
+ if (pg->flags & PG_RELEASED) {
+ uvm_pagefree(pg);
+ continue;
+ }
+ KASSERT(access_type == VM_PROT_READ ||
+ (pg->flags & PG_RDONLY) == 0);
+
+ pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg),
+ (pg->flags & PG_RDONLY) ? prot & ~VM_PROT_WRITE : prot,
+ access_type);
+
+ uvm_pageactivate(pg);
+ pg->flags &= ~(PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ pmap_update(pmap_kernel());
+
+ return 0;
+}
+
+/*
+ * for write
+ */
+void*
+ubc_reserve(struct uvm_object* uobj, int flags)
+{
+ struct ubc_map *umap;
+ // these are protected by uobj.vmobjlock
+ static int queue_index = 0;
+ static voff_t umap_offset = 0;
+ vaddr_t va;
+ UVMHIST_FUNC(__FUNCTION__); UVMHIST_CALLED(ubchist);
+
+again:
+ simple_lock(&ubc_object.uobj.vmobjlock);
+ ++queue_index;
+ umap_offset += PAGE_SIZE;
+ umap = TAILQ_FIRST(UBC_QUEUE_WITH_INDEX(queue_index));
+ if (umap == NULL) {
+ simple_unlock(&ubc_object.uobj.vmobjlock);
+ tsleep(&lbolt, PVM, __FUNCTION__, 0);
+ goto again;
+ }
+
+ /*
+ * remove from old hash (if any)
+ */
+
+ if (umap->uobj != NULL) {
+ LIST_REMOVE(umap, hash);
+ }
+ umap->uobj = uobj;
+ umap->offset = umap_offset;
+ LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(umap->uobj, umap_offset)],
+ umap, hash);
+ va = UBC_UMAP_ADDR(umap);
+ if (umap->flags & UMAP_MAPPING_CACHED) {
+ umap->flags &= ~UMAP_MAPPING_CACHED;
+ pmap_remove(pmap_kernel(), va, va + ubc_winsize);
+ pmap_update(pmap_kernel());
+ }
+ umap->flags |= UMAP_PAGES_RESERVED;
+ umap->flags &= ~UMAP_PAGES_ENTERED;
+ assert(umap->refcount == 0);
+ TAILQ_REMOVE(UBC_QUEUE_WITH_UMAP(umap), umap, inactive);
+ umap->refcount++;
+ simple_unlock(&ubc_object.uobj.vmobjlock);
+ UVMHIST_LOG(ubchist, "umap %p refs %d flags 0x%x",
+ umap, umap->refcount, flags, 0);
+
+ return (void*)umap;
+}
+
+static void
+ubc_pages_umap_rehash(struct ubc_map* umap, voff_t umap_offset)
+{
+ LIST_REMOVE(umap, hash);
+ umap->offset = umap_offset;
+ assert(umap->uobj != NULL);
+ LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(umap->uobj, umap_offset)],
+ umap, hash);
+}
+
+void*
+ubc_pages_enter(void* cookie, voff_t offset, vsize_t* lenp, struct vm_page* pgs[], int npages, int* alloc_npages)
+{
+ struct ubc_map* const umap = (struct ubc_map*)cookie;
+ vaddr_t va = UBC_UMAP_ADDR(umap);
+ int i;
+ voff_t umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
+ vaddr_t slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
+
+ UVMHIST_FUNC(__FUNCTION__); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "cookie 0x%x offset 0x%lx *lenp 0x%lx npages %d",
+ cookie, offset, *lenp, npages);
+
+ *lenp = MIN(*lenp, ubc_winsize - slot_offset);
+
+ simple_lock(&ubc_object.uobj.vmobjlock);
+ umap->writeoff = offset;
+ umap->writelen = *lenp;
+ ubc_pages_umap_rehash(umap, umap_offset);
+
+ *alloc_npages = (int)(round_page(*lenp + offset) - trunc_page(offset)) >> PAGE_SHIFT;
+ assert(*alloc_npages <= npages);
+ for (i = 0; i < *alloc_npages; ++i) {
+ pmap_kenter_pa(va + slot_offset + (i << PAGE_SHIFT),
+ VM_PAGE_TO_PHYS(pgs[i]),
+ VM_PROT_READ | VM_PROT_WRITE);
+ }
+ pmap_update(pmap_kernel());
+ assert(!(umap->flags & UMAP_PAGES_ENTERED));
+ umap->flags |= UMAP_PAGES_ENTERED;
+ simple_unlock(&ubc_object.uobj.vmobjlock);
+
+ UVMHIST_LOG(ubchist, "win 0x%x psg 0x%x *lenp 0x%lx *alloc_npages %d",
+ (void*)(va + slot_offset), pgs, *lenp, *alloc_npages);
+ return (void*)(va + slot_offset);
+}
+
+void
+ubc_pages_remove(void* cookie, struct vm_page* pgs[], int npages)
+{
+ int i;
+ struct ubc_map* const umap = (struct ubc_map*)cookie;
+ const vaddr_t umapva = UBC_UMAP_ADDR(umap);
+ UVMHIST_FUNC(__FUNCTION__); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "cookie %p pgs 0x%x npages %d",
+ cookie, pgs, npages, 0);
+
+ simple_lock(&ubc_object.uobj.vmobjlock);
+ assert(umap->flags & UMAP_PAGES_ENTERED);
+ umap->flags &= ~UMAP_PAGES_ENTERED;
+ umap->writeoff = 0;
+ umap->writelen = 0;
+
+ uvm_lock_pageq();
+ for (i = 0; i < npages; i++) {
+ pgs[i]->flags &= ~(PG_FAKE|PG_CLEAN);
+ uvm_pageactivate(pgs[i]);
+ }
+ uvm_unlock_pageq();
+ pmap_kremove(umapva, ubc_winsize);
pmap_update(pmap_kernel());
simple_unlock(&ubc_object.uobj.vmobjlock);
+ uvm_page_unbusy(pgs, npages);
}
Index: uvm/uvm_extern.h
===================================================================
RCS file: /usr/home/cvsroot/NetBSD/1.6/usr/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.8.1
diff -u -r1.1.1.1 -r1.1.1.1.8.1
--- uvm/uvm_extern.h 7 Nov 2002 04:54:39 -0000 1.1.1.1
+++ uvm/uvm_extern.h 22 May 2003 11:43:27 -0000 1.1.1.1.8.1
@@ -545,10 +545,20 @@
/* uvm_bio.c */
void ubc_init __P((void));
+void * ubc_alloc_with_cookie(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int flags, void** cookie);
+
void * ubc_alloc __P((struct uvm_object *, voff_t, vsize_t *,
int));
+void* ubc_alloc_with_pages __P((struct uvm_object* uobj, voff_t offset, vsize_t* lenp, int flags, struct vm_page* pgs[], int npages, int* alloc_npages));
void ubc_release __P((void *, int));
+void ubc_release_with_cookie(void* cookie);
void ubc_flush __P((struct uvm_object *, voff_t, voff_t));
+
+void* ubc_reserve(struct uvm_object* uobj, int flags);
+int ubc_pages_mapin(void* cookie, voff_t offset);
+void* ubc_pages_enter(void* cookie, voff_t offset, vsize_t* lenp, struct vm_page* pgs[], int npsages, int* alloc_npages);
+void ubc_pages_remove(void* cookie, struct vm_page* pgs[], int npages);
+
/* uvm_fault.c */
int uvm_fault __P((struct vm_map *, vaddr_t, vm_fault_t,
--xHFwDpU9dbj6ez1V--