Subject: anon allocation
To: None <tech-kern@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 04/30/2005 18:44:13
--NextPart-20050430183334-0125500
Content-Type: Text/Plain; charset=us-ascii

hi,

if no one objects, i'll change how anons are allocated.

- allocate anons dynamically.
- use pool(9).  for swapoff, traverse anons via amaps.
(see the attached diff for details)

YAMAMOTO Takashi

--NextPart-20050430183334-0125500
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="anon.diff"

Index: uvm/uvm_swap.c
===================================================================
--- uvm/uvm_swap.c	(revision 1139)
+++ uvm/uvm_swap.c	(working copy)
@@ -934,15 +934,6 @@ swap_on(p, sdp)
 		printf("leaving %d pages of swap\n", size);
 	}
 
-  	/*
-	 * try to add anons to reflect the new swap space.
-	 */
-
-	error = uvm_anon_add(size);
-	if (error) {
-		goto bad;
-	}
-
 	/*
 	 * add a ref to vp to reflect usage as a swap device.
 	 */
@@ -990,7 +981,8 @@ swap_off(p, sdp)
 	struct proc *p;
 	struct swapdev *sdp;
 {
-	int npages =  sdp->swd_npages;
+	int npages = sdp->swd_npages;
+	int error = 0;
 
 	UVMHIST_FUNC("swap_off"); UVMHIST_CALLED(pdhist);
 	UVMHIST_LOG(pdhist, "  dev=%x, npages=%d", sdp->swd_dev,npages,0,0);
@@ -1009,16 +1001,21 @@ swap_off(p, sdp)
 
 	if (uao_swap_off(sdp->swd_drumoffset,
 			 sdp->swd_drumoffset + sdp->swd_drumsize) ||
-	    anon_swap_off(sdp->swd_drumoffset,
+	    amap_swap_off(sdp->swd_drumoffset,
 			  sdp->swd_drumoffset + sdp->swd_drumsize)) {
+		error = ENOMEM;
+	} else if (sdp->swd_npginuse > sdp->swd_npgbad) {
+		error = EBUSY;
+	}
 
+	if (error) {
 		simple_lock(&uvm.swap_data_lock);
 		sdp->swd_flags |= SWF_ENABLE;
 		uvmexp.swpgavail += npages;
 		simple_unlock(&uvm.swap_data_lock);
-		return ENOMEM;
+
+		return error;
 	}
-	KASSERT(sdp->swd_npginuse == sdp->swd_npgbad);
 
 	/*
 	 * done with the vnode.
@@ -1029,9 +1026,6 @@ swap_off(p, sdp)
 	if (sdp->swd_vp != rootvp) {
 		(void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
 	}
-
-	/* remove anons from the system */
-	uvm_anon_remove(npages);
 
 	simple_lock(&uvm.swap_data_lock);
 	uvmexp.swpages -= npages;
Index: uvm/uvm_fault.c
===================================================================
--- uvm/uvm_fault.c	(revision 1156)
+++ uvm/uvm_fault.c	(working copy)
@@ -1193,7 +1193,7 @@ ReFault:
 				uvm_anfree(anon);
 			}
 			uvmfault_unlockall(&ufi, amap, uobj, oanon);
-			if (anon == NULL || !uvm_reclaimable()) {
+			if (!uvm_reclaimable()) {
 				UVMHIST_LOG(maphist,
 				    "<- failed.  out of VM",0,0,0,0);
 				uvmexp.fltnoanon++;
@@ -1561,7 +1561,7 @@ Case2:
 
 			/* unlock and fail ... */
 			uvmfault_unlockall(&ufi, amap, uobj, NULL);
-			if (anon == NULL || !uvm_reclaimable()) {
+			if (!uvm_reclaimable()) {
 				UVMHIST_LOG(maphist, "  promote: out of VM",
 				    0,0,0,0);
 				uvmexp.fltnoanon++;
Index: uvm/uvm_amap.c
===================================================================
--- uvm/uvm_amap.c	(revision 1139)
+++ uvm/uvm_amap.c	(working copy)
@@ -71,12 +71,35 @@ POOL_INIT(uvm_amap_pool, sizeof(struct v
 
 MALLOC_DEFINE(M_UVMAMAP, "UVM amap", "UVM amap and related structures");
 
+static struct simplelock amap_list_lock = SIMPLELOCK_INITIALIZER;
+static LIST_HEAD(, vm_amap) amap_list;
+
 /*
  * local functions
  */
 
 static struct vm_amap *amap_alloc1(int, int, int);
+static __inline void amap_list_insert(struct vm_amap *);
+static __inline void amap_list_remove(struct vm_amap *);
 
+static __inline void
+amap_list_insert(struct vm_amap *amap)
+{
+
+	simple_lock(&amap_list_lock);
+	LIST_INSERT_HEAD(&amap_list, amap, am_list);
+	simple_unlock(&amap_list_lock);
+}
+
+static __inline void
+amap_list_remove(struct vm_amap *amap)
+{
+
+	simple_lock(&amap_list_lock);
+	LIST_REMOVE(amap, am_list);
+	simple_unlock(&amap_list_lock);
+}
+
 #ifdef UVM_AMAP_PPREF
 /*
  * what is ppref?   ppref is an _optional_ amap feature which is used
@@ -243,6 +266,8 @@ amap_alloc(sz, padsz, waitf)
 		memset(amap->am_anon, 0,
 		    amap->am_maxslot * sizeof(struct vm_anon *));
 
+	amap_list_insert(amap);
+
 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
 	return(amap);
 }
@@ -261,6 +286,7 @@ amap_free(amap)
 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
 
 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
+	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
 	LOCK_ASSERT(!simple_lock_held(&amap->am_l));
 	free(amap->am_slots, M_UVMAMAP);
 	free(amap->am_bckptr, M_UVMAMAP);
@@ -653,7 +679,19 @@ amap_wipeout(amap)
 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
 
+	KASSERT(amap->am_ref == 1);
+	amap->am_ref = 0;	/* ... was one */
+
+	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
+		/*
+		 * amap_swap_off will call us again.
+		 */
+		amap_unlock(amap);
+		return;
+	}
+	amap_list_remove(amap);
 	amap_unlock(amap);
+
 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
 		int refs;
 
@@ -685,7 +723,6 @@ amap_wipeout(amap)
 	 * now we free the map
 	 */
 
-	amap->am_ref = 0;	/* ... was one */
 	amap->am_nused = 0;
 	amap_free(amap);	/* will unlock and free amap */
 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
@@ -844,6 +881,8 @@ amap_copy(map, entry, waitf, canchunk, s
 
 	amap_unlock(srcamap);
 
+	amap_list_insert(amap);
+
 	/*
 	 * install new amap.
 	 */
@@ -1223,3 +1262,90 @@ amap_wiperange(amap, slotoff, slots)
 }
 
 #endif
+
+/*
+ * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
+ *
+ * => called with swap_syscall_lock held.
+ * => note that we don't always traverse all anons.
+ *    eg. amaps being wiped out, released anons.
+ * => return TRUE if failed.
+ */
+
+boolean_t
+amap_swap_off(int startslot, int endslot)
+{
+	struct vm_amap *am;
+	struct vm_amap *am_next;
+	struct vm_amap marker;
+	struct lwp *l = curlwp;
+	boolean_t rv = FALSE;
+
+	memset(&marker, 0, sizeof(marker));
+
+	PHOLD(l);
+restart:
+	simple_lock(&amap_list_lock);
+	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
+		int i;
+
+		if (!amap_lock_try(am)) {
+			simple_unlock(&amap_list_lock);
+			preempt(1);
+			goto restart;
+		}
+		if (am->am_nused <= 0) {
+			am_next = LIST_NEXT(am, am_list);
+			amap_unlock(am);
+			continue;
+		}
+		LIST_INSERT_AFTER(am, &marker, am_list);
+		simple_unlock(&amap_list_lock);
+
+		for (i = 0; i < am->am_nused; i++) {
+			int slot;
+			int swslot;
+			struct vm_anon *anon;
+
+			slot = am->am_slots[i];
+			anon = am->am_anon[slot];
+			simple_lock(&anon->an_lock);
+
+			swslot = anon->an_swslot;
+			if (swslot < startslot || endslot <= swslot) {
+				simple_unlock(&anon->an_lock);
+				continue;
+			}
+
+			am->am_flags |= AMAP_SWAPOFF;
+			amap_unlock(am);
+
+			rv = uvm_anon_pagein(anon);
+
+			amap_lock(am);
+			am->am_flags &= ~AMAP_SWAPOFF;
+			if (amap_refs(am) == 0) {
+				am->am_ref = 1; /* XXX */
+				amap_wipeout(am);
+				am = NULL;
+				break;
+			}
+			if (rv) {
+				break;
+			}
+			i = 0;
+		}
+
+		if (am) {
+			amap_unlock(am);
+		}
+		
+		simple_lock(&amap_list_lock);
+		am_next = LIST_NEXT(&marker, am_list);
+		LIST_REMOVE(&marker, am_list);
+	}
+	simple_unlock(&amap_list_lock);
+	PRELE(l);
+
+	return rv;
+}
Index: uvm/uvm_amap.h
===================================================================
--- uvm/uvm_amap.h	(revision 679)
+++ uvm/uvm_amap.h	(working copy)
@@ -125,6 +125,8 @@ void		amap_unref	/* drop reference to an
 			(struct vm_amap *, vaddr_t, vsize_t, int);
 void		amap_wipeout	/* remove all anons from amap */
 			(struct vm_amap *);
+boolean_t	amap_swap_off
+			(int, int);
 
 /*
  * amap flag values
@@ -132,6 +134,7 @@ void		amap_wipeout	/* remove all anons f
 
 #define AMAP_SHARED	0x1	/* amap is shared */
 #define AMAP_REFALL	0x2	/* amap_ref: reference entire amap */
+#define AMAP_SWAPOFF	0x4	/* amap_swap_off() is in progress */
 
 /*
  * amap_extend flags
@@ -174,6 +177,7 @@ struct vm_amap {
 #ifdef UVM_AMAP_PPREF
 	int *am_ppref;		/* per page reference count (if !NULL) */
 #endif
+	LIST_ENTRY(vm_amap) am_list;
 };
 
 /*
@@ -264,6 +268,7 @@ MALLOC_DECLARE(M_UVMAMAP);
 
 #define amap_flags(AMAP)	((AMAP)->am_flags)
 #define amap_lock(AMAP)		simple_lock(&(AMAP)->am_l)
+#define amap_lock_try(AMAP)	simple_lock_try(&(AMAP)->am_l)
 #define amap_refs(AMAP)		((AMAP)->am_ref)
 #define amap_unlock(AMAP)	simple_unlock(&(AMAP)->am_l)
 
Index: uvm/uvm.h
===================================================================
--- uvm/uvm.h	(revision 1035)
+++ uvm/uvm.h	(working copy)
@@ -106,10 +106,6 @@ struct uvm {
 	int page_hashmask;		/* hash mask */
 	struct simplelock hashlock;	/* lock on page_hash array */
 
-	/* anon stuff */
-	struct vm_anon *afree;		/* anon free list */
-	struct simplelock afreelock; 	/* lock on anon free list */
-
 	struct simplelock kentry_lock;
 
 	/* aio_done is locked by uvm.pagedaemon_lock and splbio! */
Index: uvm/uvm_anon.c
===================================================================
--- uvm/uvm_anon.c	(revision 1109)
+++ uvm/uvm_anon.c	(working copy)
@@ -51,107 +51,37 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v
 #include <uvm/uvm.h>
 #include <uvm/uvm_swap.h>
 
-/*
- * anonblock_list: global list of anon blocks,
- * locked by swap_syscall_lock (since we never remove
- * anything from this list and we only add to it via swapctl(2)).
- */
+static POOL_INIT(uvm_anon_pool, sizeof(struct vm_anon), 0, 0, 0, "anonpl",
+    &pool_allocator_nointr);
+static struct pool_cache uvm_anon_pool_cache;
 
-struct uvm_anonblock {
-	LIST_ENTRY(uvm_anonblock) list;
-	int count;
-	struct vm_anon *anons;
-};
-static LIST_HEAD(anonlist, uvm_anonblock) anonblock_list;
+static int uvm_anon_ctor(void *, void *, int);
 
-
-static boolean_t anon_pagein(struct vm_anon *);
-
-
 /*
  * allocate anons
  */
 void
 uvm_anon_init()
 {
-	int nanon = uvmexp.free - (uvmexp.free / 16); /* XXXCDC ??? */
 
-	simple_lock_init(&uvm.afreelock);
-	LIST_INIT(&anonblock_list);
-
-	/*
-	 * Allocate the initial anons.
-	 */
-	uvm_anon_add(nanon);
+	pool_cache_init(&uvm_anon_pool_cache, &uvm_anon_pool,
+	    uvm_anon_ctor, NULL, NULL);
 }
 
-/*
- * add some more anons to the free pool.  called when we add
- * more swap space.
- *
- * => swap_syscall_lock should be held (protects anonblock_list).
- */
-int
-uvm_anon_add(count)
-	int	count;
+static int
+uvm_anon_ctor(void *arg, void *object, int flags)
 {
-	struct uvm_anonblock *anonblock;
-	struct vm_anon *anon;
-	int lcv, needed;
+	struct vm_anon *anon = object;
 
-	simple_lock(&uvm.afreelock);
-	uvmexp.nanonneeded += count;
-	needed = uvmexp.nanonneeded - uvmexp.nanon;
-	simple_unlock(&uvm.afreelock);
+	anon->an_ref = 0;
+	simple_lock_init(&anon->an_lock);
+	anon->u.an_page = NULL;
+	anon->an_swslot = 0;
 
-	if (needed <= 0) {
-		return 0;
-	}
-	anon = (void *)uvm_km_alloc(kernel_map, sizeof(*anon) * needed, 0, 
-	    UVM_KMF_WIRED);
-	if (anon == NULL) {
-		simple_lock(&uvm.afreelock);
-		uvmexp.nanonneeded -= count;
-		simple_unlock(&uvm.afreelock);
-		return ENOMEM;
-	}
-	MALLOC(anonblock, void *, sizeof(*anonblock), M_UVMAMAP, M_WAITOK);
-
-	anonblock->count = needed;
-	anonblock->anons = anon;
-	LIST_INSERT_HEAD(&anonblock_list, anonblock, list);
-	memset(anon, 0, sizeof(*anon) * needed);
-
-	simple_lock(&uvm.afreelock);
-	uvmexp.nanon += needed;
-	uvmexp.nfreeanon += needed;
-	for (lcv = 0; lcv < needed; lcv++) {
-		simple_lock_init(&anon[lcv].an_lock);
-		anon[lcv].u.an_nxt = uvm.afree;
-		uvm.afree = &anon[lcv];
-	}
-	simple_unlock(&uvm.afreelock);
 	return 0;
 }
 
 /*
- * remove anons from the free pool.
- */
-void
-uvm_anon_remove(count)
-	int count;
-{
-	/*
-	 * we never actually free any anons, to avoid allocation overhead.
-	 * XXX someday we might want to try to free anons.
-	 */
-
-	simple_lock(&uvm.afreelock);
-	uvmexp.nanonneeded -= count;
-	simple_unlock(&uvm.afreelock);
-}
-
-/*
  * allocate an anon
  *
  * => new anon is returned locked!
@@ -159,21 +89,18 @@ uvm_anon_remove(count)
 struct vm_anon *
 uvm_analloc()
 {
-	struct vm_anon *a;
+	struct vm_anon *anon;
 
-	simple_lock(&uvm.afreelock);
-	a = uvm.afree;
-	if (a) {
-		uvm.afree = a->u.an_nxt;
-		uvmexp.nfreeanon--;
-		a->an_ref = 1;
-		a->an_swslot = 0;
-		a->u.an_page = NULL;		/* so we can free quickly */
-		LOCK_ASSERT(simple_lock_held(&a->an_lock) == 0);
-		simple_lock(&a->an_lock);
+	anon = pool_cache_get(&uvm_anon_pool_cache, PR_NOWAIT);
+	if (anon) {
+		KASSERT(anon->an_ref == 0);
+		LOCK_ASSERT(simple_lock_held(&anon->an_lock) == 0);
+		KASSERT(anon->u.an_page == NULL);
+		KASSERT(anon->an_swslot == 0);
+		anon->an_ref = 1;
+		simple_lock(&anon->an_lock);
 	}
-	simple_unlock(&uvm.afreelock);
-	return(a);
+	return anon;
 }
 
 /*
@@ -283,11 +210,7 @@ uvm_anfree(anon)
 	KASSERT(anon->u.an_page == NULL);
 	KASSERT(anon->an_swslot == 0);
 
-	simple_lock(&uvm.afreelock);
-	anon->u.an_nxt = uvm.afree;
-	uvm.afree = anon;
-	uvmexp.nfreeanon++;
-	simple_unlock(&uvm.afreelock);
+	pool_cache_put(&uvm_anon_pool_cache, anon);
 	UVMHIST_LOG(maphist,"<- done!",0,0,0,0);
 }
 
@@ -400,80 +323,15 @@ uvm_anon_lockloanpg(anon)
 	return(pg);
 }
 
-
-
 /*
- * page in every anon that is paged out to a range of swslots.
- *
- * swap_syscall_lock should be held (protects anonblock_list).
- */
-
-boolean_t
-anon_swap_off(startslot, endslot)
-	int startslot, endslot;
-{
-	struct uvm_anonblock *anonblock;
-
-	LIST_FOREACH(anonblock, &anonblock_list, list) {
-		int i;
-
-		/*
-		 * loop thru all the anons in the anonblock,
-		 * paging in where needed.
-		 */
-
-		for (i = 0; i < anonblock->count; i++) {
-			struct vm_anon *anon = &anonblock->anons[i];
-			int slot;
-
-			/*
-			 * lock anon to work on it.
-			 */
-
-			simple_lock(&anon->an_lock);
-
-			/*
-			 * is this anon's swap slot in range?
-			 */
-
-			slot = anon->an_swslot;
-			if (slot >= startslot && slot < endslot) {
-				boolean_t rv;
-
-				/*
-				 * yup, page it in.
-				 */
-
-				/* locked: anon */
-				rv = anon_pagein(anon);
-				/* unlocked: anon */
-
-				if (rv) {
-					return rv;
-				}
-			} else {
-
-				/*
-				 * nope, unlock and proceed.
-				 */
-
-				simple_unlock(&anon->an_lock);
-			}
-		}
-	}
-	return FALSE;
-}
-
-
-/*
  * fetch an anon's page.
  *
  * => anon must be locked, and is unlocked upon return.
  * => returns TRUE if pagein was aborted due to lack of memory.
  */
 
-static boolean_t
-anon_pagein(anon)
+boolean_t
+uvm_anon_pagein(anon)
 	struct vm_anon *anon;
 {
 	struct vm_page *pg;
Index: uvm/uvm_anon.h
===================================================================
--- uvm/uvm_anon.h	(revision 699)
+++ uvm/uvm_anon.h	(working copy)
@@ -51,7 +51,6 @@ struct vm_anon {
 	int an_ref;			/* reference count [an_lock] */
 	struct simplelock an_lock;	/* lock for an_ref */
 	union {
-		struct vm_anon *an_nxt;	/* if on free list [afreelock] */
 		struct vm_page *an_page;/* if in RAM [an_lock] */
 	} u;
 	int an_swslot;		/* drum swap slot # (if != 0)
@@ -60,9 +59,7 @@ struct vm_anon {
 };
 
 /*
- * a pool of vm_anon data structures is allocated and put on a global
- * free list at boot time.  vm_anon's on the free list use "an_nxt" as
- * a pointer to the next item on the free list.  for active vm_anon's
+ * for active vm_anon's
  * the data can be in one of the following state: [1] in a vm_page
  * with no backing store allocated yet, [2] in a vm_page with backing
  * store allocated, or [3] paged out to backing store (no vm_page).
@@ -101,12 +98,10 @@ struct vm_aref {
 struct vm_anon *uvm_analloc(void);
 void uvm_anfree(struct vm_anon *);
 void uvm_anon_init(void);
-int uvm_anon_add(int);
-void uvm_anon_remove(int);
 struct vm_page *uvm_anon_lockloanpg(struct vm_anon *);
 void uvm_anon_dropswap(struct vm_anon *);
-boolean_t anon_swap_off(int, int);
 void uvm_anon_release(struct vm_anon *);
+boolean_t uvm_anon_pagein(struct vm_anon *);
 #endif /* _KERNEL */
 
 #endif /* _UVM_UVM_ANON_H_ */
Index: uvm/uvm_init.c
===================================================================
--- uvm/uvm_init.c	(revision 1068)
+++ uvm/uvm_init.c	(working copy)
@@ -136,15 +136,9 @@ uvm_init()
 	uvm_pager_init();
 
 	/*
-	 * step 8: init anonymous memory systems
+	 * step 8: init the uvm_loan() facility.
 	 */
 
-	uvm_anon_init();	/* allocate initial anons */
-
-	/*
-	 * step 9: init the uvm_loan() facility.
-	 */
-
 	uvm_loan_init();
 
 	/*
@@ -163,4 +157,10 @@ uvm_init()
 	 */
 
 	link_pool_init();
+
+	/*
+	 * init anonymous memory systems
+	 */
+
+	uvm_anon_init();
 }

--NextPart-20050430183334-0125500--