Subject: anon allocation
To: None <tech-kern@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 04/30/2005 18:44:13
--NextPart-20050430183334-0125500
Content-Type: Text/Plain; charset=us-ascii
hi,
if no one objects, i'll change how anons are allocated.
- allocate anons dynamically.
- use pool(9). for swapoff, traverse anons via amaps.
(see the attached diff for details)
YAMAMOTO Takashi
--NextPart-20050430183334-0125500
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="anon.diff"
Index: uvm/uvm_swap.c
===================================================================
--- uvm/uvm_swap.c (revision 1139)
+++ uvm/uvm_swap.c (working copy)
@@ -934,15 +934,6 @@ swap_on(p, sdp)
printf("leaving %d pages of swap\n", size);
}
- /*
- * try to add anons to reflect the new swap space.
- */
-
- error = uvm_anon_add(size);
- if (error) {
- goto bad;
- }
-
/*
* add a ref to vp to reflect usage as a swap device.
*/
@@ -990,7 +981,8 @@ swap_off(p, sdp)
struct proc *p;
struct swapdev *sdp;
{
- int npages = sdp->swd_npages;
+ int npages = sdp->swd_npages;
+ int error = 0;
UVMHIST_FUNC("swap_off"); UVMHIST_CALLED(pdhist);
UVMHIST_LOG(pdhist, " dev=%x, npages=%d", sdp->swd_dev,npages,0,0);
@@ -1009,16 +1001,21 @@ swap_off(p, sdp)
if (uao_swap_off(sdp->swd_drumoffset,
sdp->swd_drumoffset + sdp->swd_drumsize) ||
- anon_swap_off(sdp->swd_drumoffset,
+ amap_swap_off(sdp->swd_drumoffset,
sdp->swd_drumoffset + sdp->swd_drumsize)) {
+ error = ENOMEM;
+ } else if (sdp->swd_npginuse > sdp->swd_npgbad) {
+ error = EBUSY;
+ }
+ if (error) {
simple_lock(&uvm.swap_data_lock);
sdp->swd_flags |= SWF_ENABLE;
uvmexp.swpgavail += npages;
simple_unlock(&uvm.swap_data_lock);
- return ENOMEM;
+
+ return error;
}
- KASSERT(sdp->swd_npginuse == sdp->swd_npgbad);
/*
* done with the vnode.
@@ -1029,9 +1026,6 @@ swap_off(p, sdp)
if (sdp->swd_vp != rootvp) {
(void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
}
-
- /* remove anons from the system */
- uvm_anon_remove(npages);
simple_lock(&uvm.swap_data_lock);
uvmexp.swpages -= npages;
Index: uvm/uvm_fault.c
===================================================================
--- uvm/uvm_fault.c (revision 1156)
+++ uvm/uvm_fault.c (working copy)
@@ -1193,7 +1193,7 @@ ReFault:
uvm_anfree(anon);
}
uvmfault_unlockall(&ufi, amap, uobj, oanon);
- if (anon == NULL || !uvm_reclaimable()) {
+ if (!uvm_reclaimable()) {
UVMHIST_LOG(maphist,
"<- failed. out of VM",0,0,0,0);
uvmexp.fltnoanon++;
@@ -1561,7 +1561,7 @@ Case2:
/* unlock and fail ... */
uvmfault_unlockall(&ufi, amap, uobj, NULL);
- if (anon == NULL || !uvm_reclaimable()) {
+ if (!uvm_reclaimable()) {
UVMHIST_LOG(maphist, " promote: out of VM",
0,0,0,0);
uvmexp.fltnoanon++;
Index: uvm/uvm_amap.c
===================================================================
--- uvm/uvm_amap.c (revision 1139)
+++ uvm/uvm_amap.c (working copy)
@@ -71,12 +71,35 @@ POOL_INIT(uvm_amap_pool, sizeof(struct v
MALLOC_DEFINE(M_UVMAMAP, "UVM amap", "UVM amap and related structures");
+static struct simplelock amap_list_lock = SIMPLELOCK_INITIALIZER;
+static LIST_HEAD(, vm_amap) amap_list;
+
/*
* local functions
*/
static struct vm_amap *amap_alloc1(int, int, int);
+static __inline void amap_list_insert(struct vm_amap *);
+static __inline void amap_list_remove(struct vm_amap *);
+static __inline void
+amap_list_insert(struct vm_amap *amap)
+{
+
+ simple_lock(&amap_list_lock);
+ LIST_INSERT_HEAD(&amap_list, amap, am_list);
+ simple_unlock(&amap_list_lock);
+}
+
+static __inline void
+amap_list_remove(struct vm_amap *amap)
+{
+
+ simple_lock(&amap_list_lock);
+ LIST_REMOVE(amap, am_list);
+ simple_unlock(&amap_list_lock);
+}
+
#ifdef UVM_AMAP_PPREF
/*
* what is ppref? ppref is an _optional_ amap feature which is used
@@ -243,6 +266,8 @@ amap_alloc(sz, padsz, waitf)
memset(amap->am_anon, 0,
amap->am_maxslot * sizeof(struct vm_anon *));
+ amap_list_insert(amap);
+
UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
return(amap);
}
@@ -261,6 +286,7 @@ amap_free(amap)
UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
+ KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
LOCK_ASSERT(!simple_lock_held(&amap->am_l));
free(amap->am_slots, M_UVMAMAP);
free(amap->am_bckptr, M_UVMAMAP);
@@ -653,7 +679,19 @@ amap_wipeout(amap)
UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
+ KASSERT(amap->am_ref == 1);
+ amap->am_ref = 0; /* ... was one */
+
+ if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
+ /*
+ * amap_swap_off will call us again.
+ */
+ amap_unlock(amap);
+ return;
+ }
+ amap_list_remove(amap);
amap_unlock(amap);
+
for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
int refs;
@@ -685,7 +723,6 @@ amap_wipeout(amap)
* now we free the map
*/
- amap->am_ref = 0; /* ... was one */
amap->am_nused = 0;
amap_free(amap); /* will unlock and free amap */
UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
@@ -844,6 +881,8 @@ amap_copy(map, entry, waitf, canchunk, s
amap_unlock(srcamap);
+ amap_list_insert(amap);
+
/*
* install new amap.
*/
@@ -1223,3 +1262,90 @@ amap_wiperange(amap, slotoff, slots)
}
#endif
+
+/*
+ * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
+ *
+ * => called with swap_syscall_lock held.
+ * => note that we don't always traverse all anons.
+ * eg. amaps being wiped out, released anons.
+ * => return TRUE if failed.
+ */
+
+boolean_t
+amap_swap_off(int startslot, int endslot)
+{
+ struct vm_amap *am;
+ struct vm_amap *am_next;
+ struct vm_amap marker;
+ struct lwp *l = curlwp;
+ boolean_t rv = FALSE;
+
+ memset(&marker, 0, sizeof(marker));
+
+ PHOLD(l);
+restart:
+ simple_lock(&amap_list_lock);
+ for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
+ int i;
+
+ if (!amap_lock_try(am)) {
+ simple_unlock(&amap_list_lock);
+ preempt(1);
+ goto restart;
+ }
+ if (am->am_nused <= 0) {
+ am_next = LIST_NEXT(am, am_list);
+ amap_unlock(am);
+ continue;
+ }
+ LIST_INSERT_AFTER(am, &marker, am_list);
+ simple_unlock(&amap_list_lock);
+
+ for (i = 0; i < am->am_nused; i++) {
+ int slot;
+ int swslot;
+ struct vm_anon *anon;
+
+ slot = am->am_slots[i];
+ anon = am->am_anon[slot];
+ simple_lock(&anon->an_lock);
+
+ swslot = anon->an_swslot;
+ if (swslot < startslot || endslot <= swslot) {
+ simple_unlock(&anon->an_lock);
+ continue;
+ }
+
+ am->am_flags |= AMAP_SWAPOFF;
+ amap_unlock(am);
+
+ rv = uvm_anon_pagein(anon);
+
+ amap_lock(am);
+ am->am_flags &= ~AMAP_SWAPOFF;
+ if (amap_refs(am) == 0) {
+ am->am_ref = 1; /* XXX */
+ amap_wipeout(am);
+ am = NULL;
+ break;
+ }
+ if (rv) {
+ break;
+ }
+ i = 0;
+ }
+
+ if (am) {
+ amap_unlock(am);
+ }
+
+ simple_lock(&amap_list_lock);
+ am_next = LIST_NEXT(&marker, am_list);
+ LIST_REMOVE(&marker, am_list);
+ }
+ simple_unlock(&amap_list_lock);
+ PRELE(l);
+
+ return rv;
+}
Index: uvm/uvm_amap.h
===================================================================
--- uvm/uvm_amap.h (revision 679)
+++ uvm/uvm_amap.h (working copy)
@@ -125,6 +125,8 @@ void amap_unref /* drop reference to an
(struct vm_amap *, vaddr_t, vsize_t, int);
void amap_wipeout /* remove all anons from amap */
(struct vm_amap *);
+boolean_t amap_swap_off
+ (int, int);
/*
* amap flag values
@@ -132,6 +134,7 @@ void amap_wipeout /* remove all anons f
#define AMAP_SHARED 0x1 /* amap is shared */
#define AMAP_REFALL 0x2 /* amap_ref: reference entire amap */
+#define AMAP_SWAPOFF 0x4 /* amap_swap_off() is in progress */
/*
* amap_extend flags
@@ -174,6 +177,7 @@ struct vm_amap {
#ifdef UVM_AMAP_PPREF
int *am_ppref; /* per page reference count (if !NULL) */
#endif
+ LIST_ENTRY(vm_amap) am_list;
};
/*
@@ -264,6 +268,7 @@ MALLOC_DECLARE(M_UVMAMAP);
#define amap_flags(AMAP) ((AMAP)->am_flags)
#define amap_lock(AMAP) simple_lock(&(AMAP)->am_l)
+#define amap_lock_try(AMAP) simple_lock_try(&(AMAP)->am_l)
#define amap_refs(AMAP) ((AMAP)->am_ref)
#define amap_unlock(AMAP) simple_unlock(&(AMAP)->am_l)
Index: uvm/uvm.h
===================================================================
--- uvm/uvm.h (revision 1035)
+++ uvm/uvm.h (working copy)
@@ -106,10 +106,6 @@ struct uvm {
int page_hashmask; /* hash mask */
struct simplelock hashlock; /* lock on page_hash array */
- /* anon stuff */
- struct vm_anon *afree; /* anon free list */
- struct simplelock afreelock; /* lock on anon free list */
-
struct simplelock kentry_lock;
/* aio_done is locked by uvm.pagedaemon_lock and splbio! */
Index: uvm/uvm_anon.c
===================================================================
--- uvm/uvm_anon.c (revision 1109)
+++ uvm/uvm_anon.c (working copy)
@@ -51,107 +51,37 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v
#include <uvm/uvm.h>
#include <uvm/uvm_swap.h>
-/*
- * anonblock_list: global list of anon blocks,
- * locked by swap_syscall_lock (since we never remove
- * anything from this list and we only add to it via swapctl(2)).
- */
+static POOL_INIT(uvm_anon_pool, sizeof(struct vm_anon), 0, 0, 0, "anonpl",
+ &pool_allocator_nointr);
+static struct pool_cache uvm_anon_pool_cache;
-struct uvm_anonblock {
- LIST_ENTRY(uvm_anonblock) list;
- int count;
- struct vm_anon *anons;
-};
-static LIST_HEAD(anonlist, uvm_anonblock) anonblock_list;
+static int uvm_anon_ctor(void *, void *, int);
-
-static boolean_t anon_pagein(struct vm_anon *);
-
-
/*
* allocate anons
*/
void
uvm_anon_init()
{
- int nanon = uvmexp.free - (uvmexp.free / 16); /* XXXCDC ??? */
- simple_lock_init(&uvm.afreelock);
- LIST_INIT(&anonblock_list);
-
- /*
- * Allocate the initial anons.
- */
- uvm_anon_add(nanon);
+ pool_cache_init(&uvm_anon_pool_cache, &uvm_anon_pool,
+ uvm_anon_ctor, NULL, NULL);
}
-/*
- * add some more anons to the free pool. called when we add
- * more swap space.
- *
- * => swap_syscall_lock should be held (protects anonblock_list).
- */
-int
-uvm_anon_add(count)
- int count;
+static int
+uvm_anon_ctor(void *arg, void *object, int flags)
{
- struct uvm_anonblock *anonblock;
- struct vm_anon *anon;
- int lcv, needed;
+ struct vm_anon *anon = object;
- simple_lock(&uvm.afreelock);
- uvmexp.nanonneeded += count;
- needed = uvmexp.nanonneeded - uvmexp.nanon;
- simple_unlock(&uvm.afreelock);
+ anon->an_ref = 0;
+ simple_lock_init(&anon->an_lock);
+ anon->u.an_page = NULL;
+ anon->an_swslot = 0;
- if (needed <= 0) {
- return 0;
- }
- anon = (void *)uvm_km_alloc(kernel_map, sizeof(*anon) * needed, 0,
- UVM_KMF_WIRED);
- if (anon == NULL) {
- simple_lock(&uvm.afreelock);
- uvmexp.nanonneeded -= count;
- simple_unlock(&uvm.afreelock);
- return ENOMEM;
- }
- MALLOC(anonblock, void *, sizeof(*anonblock), M_UVMAMAP, M_WAITOK);
-
- anonblock->count = needed;
- anonblock->anons = anon;
- LIST_INSERT_HEAD(&anonblock_list, anonblock, list);
- memset(anon, 0, sizeof(*anon) * needed);
-
- simple_lock(&uvm.afreelock);
- uvmexp.nanon += needed;
- uvmexp.nfreeanon += needed;
- for (lcv = 0; lcv < needed; lcv++) {
- simple_lock_init(&anon[lcv].an_lock);
- anon[lcv].u.an_nxt = uvm.afree;
- uvm.afree = &anon[lcv];
- }
- simple_unlock(&uvm.afreelock);
return 0;
}
/*
- * remove anons from the free pool.
- */
-void
-uvm_anon_remove(count)
- int count;
-{
- /*
- * we never actually free any anons, to avoid allocation overhead.
- * XXX someday we might want to try to free anons.
- */
-
- simple_lock(&uvm.afreelock);
- uvmexp.nanonneeded -= count;
- simple_unlock(&uvm.afreelock);
-}
-
-/*
* allocate an anon
*
* => new anon is returned locked!
@@ -159,21 +89,18 @@ uvm_anon_remove(count)
struct vm_anon *
uvm_analloc()
{
- struct vm_anon *a;
+ struct vm_anon *anon;
- simple_lock(&uvm.afreelock);
- a = uvm.afree;
- if (a) {
- uvm.afree = a->u.an_nxt;
- uvmexp.nfreeanon--;
- a->an_ref = 1;
- a->an_swslot = 0;
- a->u.an_page = NULL; /* so we can free quickly */
- LOCK_ASSERT(simple_lock_held(&a->an_lock) == 0);
- simple_lock(&a->an_lock);
+ anon = pool_cache_get(&uvm_anon_pool_cache, PR_NOWAIT);
+ if (anon) {
+ KASSERT(anon->an_ref == 0);
+ LOCK_ASSERT(simple_lock_held(&anon->an_lock) == 0);
+ KASSERT(anon->u.an_page == NULL);
+ KASSERT(anon->an_swslot == 0);
+ anon->an_ref = 1;
+ simple_lock(&anon->an_lock);
}
- simple_unlock(&uvm.afreelock);
- return(a);
+ return anon;
}
/*
@@ -283,11 +210,7 @@ uvm_anfree(anon)
KASSERT(anon->u.an_page == NULL);
KASSERT(anon->an_swslot == 0);
- simple_lock(&uvm.afreelock);
- anon->u.an_nxt = uvm.afree;
- uvm.afree = anon;
- uvmexp.nfreeanon++;
- simple_unlock(&uvm.afreelock);
+ pool_cache_put(&uvm_anon_pool_cache, anon);
UVMHIST_LOG(maphist,"<- done!",0,0,0,0);
}
@@ -400,80 +323,15 @@ uvm_anon_lockloanpg(anon)
return(pg);
}
-
-
/*
- * page in every anon that is paged out to a range of swslots.
- *
- * swap_syscall_lock should be held (protects anonblock_list).
- */
-
-boolean_t
-anon_swap_off(startslot, endslot)
- int startslot, endslot;
-{
- struct uvm_anonblock *anonblock;
-
- LIST_FOREACH(anonblock, &anonblock_list, list) {
- int i;
-
- /*
- * loop thru all the anons in the anonblock,
- * paging in where needed.
- */
-
- for (i = 0; i < anonblock->count; i++) {
- struct vm_anon *anon = &anonblock->anons[i];
- int slot;
-
- /*
- * lock anon to work on it.
- */
-
- simple_lock(&anon->an_lock);
-
- /*
- * is this anon's swap slot in range?
- */
-
- slot = anon->an_swslot;
- if (slot >= startslot && slot < endslot) {
- boolean_t rv;
-
- /*
- * yup, page it in.
- */
-
- /* locked: anon */
- rv = anon_pagein(anon);
- /* unlocked: anon */
-
- if (rv) {
- return rv;
- }
- } else {
-
- /*
- * nope, unlock and proceed.
- */
-
- simple_unlock(&anon->an_lock);
- }
- }
- }
- return FALSE;
-}
-
-
-/*
* fetch an anon's page.
*
* => anon must be locked, and is unlocked upon return.
* => returns TRUE if pagein was aborted due to lack of memory.
*/
-static boolean_t
-anon_pagein(anon)
+boolean_t
+uvm_anon_pagein(anon)
struct vm_anon *anon;
{
struct vm_page *pg;
Index: uvm/uvm_anon.h
===================================================================
--- uvm/uvm_anon.h (revision 699)
+++ uvm/uvm_anon.h (working copy)
@@ -51,7 +51,6 @@ struct vm_anon {
int an_ref; /* reference count [an_lock] */
struct simplelock an_lock; /* lock for an_ref */
union {
- struct vm_anon *an_nxt; /* if on free list [afreelock] */
struct vm_page *an_page;/* if in RAM [an_lock] */
} u;
int an_swslot; /* drum swap slot # (if != 0)
@@ -60,9 +59,7 @@ struct vm_anon {
};
/*
- * a pool of vm_anon data structures is allocated and put on a global
- * free list at boot time. vm_anon's on the free list use "an_nxt" as
- * a pointer to the next item on the free list. for active vm_anon's
+ * for active vm_anon's
* the data can be in one of the following state: [1] in a vm_page
* with no backing store allocated yet, [2] in a vm_page with backing
* store allocated, or [3] paged out to backing store (no vm_page).
@@ -101,12 +98,10 @@ struct vm_aref {
struct vm_anon *uvm_analloc(void);
void uvm_anfree(struct vm_anon *);
void uvm_anon_init(void);
-int uvm_anon_add(int);
-void uvm_anon_remove(int);
struct vm_page *uvm_anon_lockloanpg(struct vm_anon *);
void uvm_anon_dropswap(struct vm_anon *);
-boolean_t anon_swap_off(int, int);
void uvm_anon_release(struct vm_anon *);
+boolean_t uvm_anon_pagein(struct vm_anon *);
#endif /* _KERNEL */
#endif /* _UVM_UVM_ANON_H_ */
Index: uvm/uvm_init.c
===================================================================
--- uvm/uvm_init.c (revision 1068)
+++ uvm/uvm_init.c (working copy)
@@ -136,15 +136,9 @@ uvm_init()
uvm_pager_init();
/*
- * step 8: init anonymous memory systems
+ * step 8: init the uvm_loan() facility.
*/
- uvm_anon_init(); /* allocate initial anons */
-
- /*
- * step 9: init the uvm_loan() facility.
- */
-
uvm_loan_init();
/*
@@ -163,4 +157,10 @@ uvm_init()
*/
link_pool_init();
+
+ /*
+ * init anonymous memory systems
+ */
+
+ uvm_anon_init();
}
--NextPart-20050430183334-0125500--