Subject: UBC usage balancing
To: None <tech-kern@netbsd.org>
From: Chuck Silvers <chuq@chuq.com>
List: tech-kern
Date: 02/06/2001 23:39:16
--PNTmBPCT7hxwcZjr
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

hi folks,

attached is a diff for a prototype of what I'm working on to fix
the problems with unbalanced memory usage in UBC (ie. where
cached file data will push too much anonymous data out of RAM).
the way it works is that we keep counts of three kinds of pages:
anonymous data, VTEXT vnode data, and non-VTEXT vnode data.
there is a minimum percentage of RAM reserved for each type of data,
where the pagedaemon won't reuse a given page if doing so will
reduce the page count for type of data below the minimum level.

in this prototype, the thresholds are set by some variables in
uvm_pdaemon.c:  anonmin, vnodemin, and vtextmin.  the values
of these variables are interpreted as the percentage of RAM
reserved for the different types of data.  these will be
settable via sysctl in the final version.  the values I've got
in there new are just something to start with, I'm sure there
will be great debate about what the default values should be.
the sum of the three values must be less than 100.

so I'd like people to play around with this and see how well it
works.  try different settings for the thresholds and see what
you like best.  comments welcome.

-Chuck

--PNTmBPCT7hxwcZjr
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="diff.pdcount"

Index: kern/vfs_subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/vfs_subr.c,v
retrieving revision 1.145
diff -u -r1.145 vfs_subr.c
--- kern/vfs_subr.c	2001/02/06 10:58:55	1.145
+++ kern/vfs_subr.c	2001/02/07 07:16:17
@@ -1217,6 +1217,10 @@
 	else
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	simple_unlock(&vnode_free_list_slock);
+	if (vp->v_flag & VTEXT) {
+		uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+		uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+	}
 	vp->v_flag &= ~VTEXT;
 	simple_unlock(&vp->v_interlock);
 	VOP_INACTIVE(vp, p);
@@ -1257,6 +1261,10 @@
 	else
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	simple_unlock(&vnode_free_list_slock);
+	if (vp->v_flag & VTEXT) {
+		uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+		uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+	}
 	vp->v_flag &= ~VTEXT;
 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
 		VOP_INACTIVE(vp, p);
@@ -1481,6 +1489,10 @@
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock, vp %p", vp);
 	vp->v_flag |= VXLOCK;
+	if (vp->v_flag & VTEXT) {
+		uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+		uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+	}
 	vp->v_flag &= ~VTEXT;
 
 	/*
Index: kern/vfs_vnops.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/vfs_vnops.c,v
retrieving revision 1.45
diff -u -r1.45 vfs_vnops.c
--- kern/vfs_vnops.c	2000/11/27 08:39:44	1.45
+++ kern/vfs_vnops.c	2001/02/07 07:16:17
@@ -195,6 +195,10 @@
 vn_marktext(vp)
 	struct vnode *vp;
 {
+	if ((vp->v_flag & VTEXT) == 0) {
+		uvmexp.vnodepages -= vp->v_uvm.u_obj.uo_npages;
+		uvmexp.vtextpages += vp->v_uvm.u_obj.uo_npages;
+	}
 	vp->v_flag |= VTEXT;
 }
 
Index: uvm/uvm_object.h
===================================================================
RCS file: /cvsroot/syssrc/sys/uvm/uvm_object.h,v
retrieving revision 1.10
diff -u -r1.10 uvm_object.h
--- uvm/uvm_object.h	2001/01/28 22:23:06	1.10
+++ uvm/uvm_object.h	2001/02/07 07:16:17
@@ -89,6 +89,11 @@
 #define	UVM_OBJ_IS_VNODE(uobj)						\
 	((uobj)->pgops == &uvm_vnodeops)
 
+#define	UVM_OBJ_IS_VTEXT(uobj)						\
+	((uobj)->pgops == &uvm_vnodeops &&				\
+	 ((struct vnode *)uobj)->v_flag & VTEXT)
+
+
 #endif /* _KERNEL */
 
 #endif /* _UVM_UVM_OBJECT_H_ */
Index: uvm/uvm_page.c
===================================================================
RCS file: /cvsroot/syssrc/sys/uvm/uvm_page.c,v
retrieving revision 1.50
diff -u -r1.50 uvm_page.c
--- uvm/uvm_page.c	2001/01/28 22:23:04	1.50
+++ uvm/uvm_page.c	2001/02/07 07:16:18
@@ -77,6 +77,7 @@
 #include <sys/malloc.h>
 #include <sys/sched.h>
 #include <sys/kernel.h>
+#include <sys/vnode.h>
 
 #define UVM_PAGE                /* pull in uvm_page.h functions */
 #include <uvm/uvm.h>
@@ -190,8 +191,11 @@
 	simple_unlock(&uvm.hashlock);
 	splx(s);
 
-	if (UVM_OBJ_IS_VNODE(pg->uobject))
+	if (UVM_OBJ_IS_VTEXT(pg->uobject)) {
+		uvmexp.vtextpages--;
+	} else if (UVM_OBJ_IS_VNODE(pg->uobject)) {
 		uvmexp.vnodepages--;
+	}
 
 	/* object should be locked */
 	TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvsroot/syssrc/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.29
diff -u -r1.29 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c	2001/01/28 23:30:46	1.29
+++ uvm/uvm_pdaemon.c	2001/02/07 07:16:19
@@ -78,6 +78,7 @@
 #include <sys/kernel.h>
 #include <sys/pool.h>
 #include <sys/buf.h>
+#include <sys/vnode.h>
 
 #include <uvm/uvm.h>
 
@@ -100,6 +101,11 @@
 static void		uvmpd_tune __P((void));
 
 
+int anonmin = 10;
+int vnodemin = 10;
+int vtextmin = 5;
+
+
 /*
  * uvm_wait: wait (sleep) for the page daemon to free some pages
  *
@@ -250,10 +256,7 @@
 		 */
 
 		if (uvmexp.free + uvmexp.paging < uvmexp.freetarg ||
-		    uvmexp.inactive < uvmexp.inactarg ||
-		    uvmexp.vnodepages >
-		    (uvmexp.active + uvmexp.inactive + uvmexp.wired +
-		     uvmexp.free) * 13 / 16) {
+		    uvmexp.inactive < uvmexp.inactarg) {
 			uvmpd_scan();
 		}
 
@@ -357,6 +360,8 @@
  * => we return TRUE if we are exiting because we met our target
  */
 
+int chuq_s[3];
+
 static boolean_t
 uvmpd_scan_inactive(pglst)
 	struct pglist *pglst;
@@ -371,9 +376,9 @@
 	int swnpages, swcpages;				/* XXX: see below */
 	int swslot;
 	struct vm_anon *anon;
-	boolean_t swap_backed, vnode_only;
+	boolean_t swap_backed;
 	vaddr_t start;
-	int dirtyreacts, vpgs;
+	int dirtyreacts;
 	UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
 
 	/*
@@ -394,7 +399,6 @@
 	swnpages = swcpages = 0;
 	free = 0;
 	dirtyreacts = 0;
-	vnode_only = FALSE;
 
 	for (p = TAILQ_FIRST(pglst); p != NULL || swslot != 0; p = nextpg) {
 
@@ -417,33 +421,24 @@
 			free = uvmexp.free;
 			uvm_unlock_fpageq(s);
 
-			/* XXXUBC */
-			vpgs = uvmexp.vnodepages -
-				(uvmexp.active + uvmexp.inactive +
-				 uvmexp.wired + uvmexp.free) * 13 / 16;
-
 			if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
-			    vpgs > 0 || dirtyreacts == UVMPD_NUMDIRTYREACTS) {
-				if (vpgs <= 0) {
-					UVMHIST_LOG(pdhist,"  met free target: "
-						    "exit loop", 0, 0, 0, 0);
-					retval = TRUE;
-
-					if (swslot == 0)
-						/* exit now if no
-                                                   swap-i/o pending */
-						break;
-
-					/* set p to null to signal final
-                                           swap i/o */
-					p = NULL;
-				} else {
-					vnode_only = TRUE;
+			    dirtyreacts == UVMPD_NUMDIRTYREACTS) {
+				UVMHIST_LOG(pdhist,"  met free target: "
+					    "exit loop", 0, 0, 0, 0);
+				retval = TRUE;
+
+				if (swslot == 0) {
+					/* exit now if no swap-i/o pending */
+					break;
 				}
+
+				/* set p to null to signal final swap i/o */
+				p = NULL;
 			}
 		}
 
 		if (p) {	/* if (we have a new page to consider) */
+
 			/*
 			 * we are below target and have a new page to consider.
 			 */
@@ -452,16 +447,40 @@
 
 			/*
 			 * move referenced pages back to active queue and
-			 * skip to next page (unlikely to happen since
-			 * inactive pages shouldn't have any valid mappings
-			 * and we cleared reference before deactivating).
+			 * skip to next page.
 			 */
+
 			if (pmap_is_referenced(p)) {
 				uvm_pageactivate(p);
 				uvmexp.pdreact++;
 				continue;
+			}
+
+#if 1
+			{ int tpgs;
+			tpgs = uvmexp.active + uvmexp.inactive + uvmexp.free;
+			if (p->uanon &&
+			    uvmexp.anonpages <= tpgs * anonmin / 100) {
+				uvm_pageactivate(p);
+				chuq_s[0]++;
+				continue;
+			}
+			if (p->uobject && UVM_OBJ_IS_VTEXT(p->uobject) &&
+			    uvmexp.vtextpages <= tpgs * vtextmin / 100) {
+				uvm_pageactivate(p);
+				chuq_s[2]++;
+				continue;
 			}
-			
+			if (p->uobject && UVM_OBJ_IS_VNODE(p->uobject) &&
+			    !UVM_OBJ_IS_VTEXT(p->uobject) &&
+			    uvmexp.vnodepages <= tpgs * vnodemin / 100) {
+				uvm_pageactivate(p);
+				chuq_s[1]++;
+				continue;
+			}
+			}
+#endif
+
 			/*
 			 * first we attempt to lock the object that this page
 			 * belongs to.  if our attempt fails we skip on to
@@ -477,18 +496,15 @@
 			 * case, the anon can "take over" the loaned page
 			 * and make it its own.
 			 */
-		
+
 			/* is page part of an anon or ownerless ? */
 			if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
-				if (vnode_only) {
-					uvm_pageactivate(p);
-					continue;
-				}
 				anon = p->uanon;
 				KASSERT(anon != NULL);
-				if (!simple_lock_try(&anon->an_lock))
+				if (!simple_lock_try(&anon->an_lock)) {
 					/* lock failed, skip this page */
 					continue;
+				}
 
 				/*
 				 * if the page is ownerless, claim it in the
@@ -511,15 +527,16 @@
 			} else {
 				uobj = p->uobject;
 				KASSERT(uobj != NULL);
-				if (vnode_only &&
-				    UVM_OBJ_IS_VNODE(uobj) == 0) {
+#if 0
+				if (UVM_OBJ_IS_VNODE(uobj) == 0) {
 					uvm_pageactivate(p);
 					continue;
 				}
-				if (!simple_lock_try(&uobj->vmobjlock))
+#endif
+				if (!simple_lock_try(&uobj->vmobjlock)) {
 					/* lock failed, skip this page */
 					continue;
-
+				}
 				if (p->flags & PG_BUSY) {
 					simple_unlock(&uobj->vmobjlock);
 					uvmexp.pdbusy++;
@@ -538,8 +555,9 @@
 			 */
 
 			pmap_page_protect(p, VM_PROT_NONE);
-			if ((p->flags & PG_CLEAN) != 0 && pmap_is_modified(p))
+			if ((p->flags & PG_CLEAN) != 0 && pmap_is_modified(p)) {
 				p->flags &= ~PG_CLEAN;
+			}
 
 			if (p->flags & PG_CLEAN) {
 				if (p->pqflags & PQ_SWAPBACKED) {
@@ -577,8 +595,7 @@
 			 * free target when all the current pageouts complete.
 			 */
 
-			if (free + uvmexp.paging > uvmexp.freetarg << 2 &&
-			    !vnode_only) {
+			if (free + uvmexp.paging > uvmexp.freetarg << 2) {
 				if (anon) {
 					simple_unlock(&anon->an_lock);
 				} else {
@@ -634,7 +651,7 @@
 			 * first mark the page busy so that no one else will
 			 * touch the page.
 			 */
-		
+
 			swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
 			p->flags |= PG_BUSY;		/* now we own it */
 			UVM_PAGE_OWN(p, "scan_inactive");
@@ -936,7 +953,7 @@
 			 */
 
 			nextpg = NULL;
-			
+
 			/*
 			 * lock page queues here just so they're always locked
 			 * at the end of the loop.
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvsroot/syssrc/sys/uvm/uvm_vnode.c,v
retrieving revision 1.43
diff -u -r1.43 uvm_vnode.c
--- uvm/uvm_vnode.c	2001/02/06 10:53:23	1.43
+++ uvm/uvm_vnode.c	2001/02/07 07:16:19
@@ -439,13 +439,6 @@
 	} else {
 		start = trunc_page(start);
 		stop = round_page(stop);
-#ifdef DEBUG
-		if (stop > round_page(uvn->u_size)) {
-			printf("uvn_flush: oor vp %p start 0x%x stop 0x%x "
-			       "size 0x%x\n", uvn, (int)start, (int)stop,
-			       (int)round_page(uvn->u_size));
-		}
-#endif
 		all = FALSE;
 		by_list = (uobj->uo_npages <= 
 		    ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY);
@@ -947,14 +940,8 @@
 			if (flags & UFP_NOALLOC) {
 				UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0);
 				return 0;
-			}
-			if (uvmexp.vnodepages > 
-			    (uvmexp.active + uvmexp.inactive + uvmexp.wired +
-			     uvmexp.free) * 7 / 8) {
-				pg = NULL;
-			} else {
-				pg = uvm_pagealloc(uobj, offset, NULL, 0);
 			}
+			pg = uvm_pagealloc(uobj, offset, NULL, 0);
 			if (pg == NULL) {
 				if (flags & UFP_NOWAIT) {
 					UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
@@ -964,8 +951,12 @@
 				uvm_wait("uvn_fp1");
 				simple_lock(&uobj->vmobjlock);
 				continue;
+			}
+			if (UVM_OBJ_IS_VTEXT(uobj)) {
+				uvmexp.vtextpages++;
+			} else {
+				uvmexp.vnodepages++;
 			}
-			uvmexp.vnodepages++;
 			UVMHIST_LOG(ubchist, "alloced",0,0,0,0);
 			break;
 		} else if (flags & UFP_NOCACHE) {

--PNTmBPCT7hxwcZjr--