Subject: Re: new memory allocation scheme and disk access
To: None <enami@but-b.or.jp>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 01/10/2004 12:33:34
hi,

> > My system has 1G bytes RAM and free memory was about 550M bytes at
> > the time of second build of the kernel. But it was very slow by hard
> > disk accesses. It seems to be related with new memory allocation
> > scheme for the buffer cache.
> 
> It looks like more directory vnodes stay in hold_list (as it has valid
> buf) than before and thus regular file vnodes tend to be reclaimed
> sooner (note that reg. vnode with page cache buf no reference is put
> in free_list).
> 
> enami.

i agree.

i'll commit the following diff if no one objects.

YAMAMOTO Takashi


Index: kern/vfs_subr.c
===================================================================
--- kern/vfs_subr.c	(revision 492)
+++ kern/vfs_subr.c	(revision 493)
@@ -1364,7 +1364,7 @@ vrele(vp)
  * Page or buffer structure gets a reference.
  */
 void
-vhold(vp)
+vholdl(vp)
 	struct vnode *vp;
 {
 
@@ -1381,7 +1381,6 @@ vhold(vp)
 	 * getnewvnode after removing it from a freelist to ensure
 	 * that we do not try to move it here.
 	 */
-  	simple_lock(&vp->v_interlock);
 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
 		simple_lock(&vnode_free_list_slock);
@@ -1390,18 +1389,16 @@ vhold(vp)
 		simple_unlock(&vnode_free_list_slock);
 	}
 	vp->v_holdcnt++;
-	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Page or buffer structure frees a reference.
  */
 void
-holdrele(vp)
+holdrelel(vp)
 	struct vnode *vp;
 {
 
-	simple_lock(&vp->v_interlock);
 	if (vp->v_holdcnt <= 0)
 		panic("holdrele: holdcnt vp %p", vp);
 	vp->v_holdcnt--;
@@ -1427,7 +1424,6 @@ holdrele(vp)
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 		simple_unlock(&vnode_free_list_slock);
 	}
-	simple_unlock(&vp->v_interlock);
 }
 
 /*
Index: uvm/uvm_page.c
===================================================================
--- uvm/uvm_page.c	(revision 492)
+++ uvm/uvm_page.c	(revision 493)
@@ -178,10 +178,17 @@ uvm_pageinsert(pg)
 	TAILQ_INSERT_TAIL(buck, pg, hashq);
 	simple_unlock(&uvm.hashlock);
 
-	if (UVM_OBJ_IS_VTEXT(uobj)) {
-		uvmexp.execpages++;
-	} else if (UVM_OBJ_IS_VNODE(uobj)) {
-		uvmexp.filepages++;
+	if (UVM_OBJ_IS_VNODE(uobj)) {
+		if (uobj->uo_npages == 0) {
+			struct vnode *vp = (struct vnode *)uobj;
+
+			vholdl(vp);
+		}
+		if (UVM_OBJ_IS_VTEXT(uobj)) {
+			uvmexp.execpages++;
+		} else {
+			uvmexp.filepages++;
+		}
 	} else if (UVM_OBJ_IS_AOBJ(uobj)) {
 		uvmexp.anonpages++;
 	}
@@ -211,10 +218,17 @@ uvm_pageremove(pg)
 	TAILQ_REMOVE(buck, pg, hashq);
 	simple_unlock(&uvm.hashlock);
 
-	if (UVM_OBJ_IS_VTEXT(uobj)) {
-		uvmexp.execpages--;
-	} else if (UVM_OBJ_IS_VNODE(uobj)) {
-		uvmexp.filepages--;
+	if (UVM_OBJ_IS_VNODE(uobj)) {
+		if (uobj->uo_npages == 1) {
+			struct vnode *vp = (struct vnode *)uobj;
+
+			holdrelel(vp);
+		}
+		if (UVM_OBJ_IS_VTEXT(uobj)) {
+			uvmexp.execpages--;
+		} else {
+			uvmexp.filepages--;
+		}
 	} else if (UVM_OBJ_IS_AOBJ(uobj)) {
 		uvmexp.anonpages--;
 	}
Index: sys/vnode.h
===================================================================
--- sys/vnode.h	(revision 492)
+++ sys/vnode.h	(revision 493)
@@ -289,10 +289,13 @@ extern struct simplelock vnode_free_list
 #define	ilstatic static
 #endif
 
-ilstatic void holdrele(struct vnode *);
-ilstatic void vhold(struct vnode *);
+ilstatic void holdrelel(struct vnode *);
+ilstatic void vholdl(struct vnode *);
 ilstatic void vref(struct vnode *);
 
+static __inline void holdrele(struct vnode *) __attribute__((__unused__));
+static __inline void vhold(struct vnode *) __attribute__((__unused__));
+
 #ifdef DIAGNOSTIC
 #define	VATTR_NULL(vap)	vattr_null(vap)
 #else
@@ -300,12 +303,13 @@ ilstatic void vref(struct vnode *);
 
 /*
  * decrease buf or page ref
+ *
+ * called with v_interlock held
  */
 static __inline void
-holdrele(struct vnode *vp)
+holdrelel(struct vnode *vp)
 {
 
-	simple_lock(&vp->v_interlock);
 	vp->v_holdcnt--;
 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
@@ -314,17 +318,17 @@ holdrele(struct vnode *vp)
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 		simple_unlock(&vnode_free_list_slock);
 	}
-	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * increase buf or page ref
+ *
+ * called with v_interlock held
  */
 static __inline void
-vhold(struct vnode *vp)
+vholdl(struct vnode *vp)
 {
 
-	simple_lock(&vp->v_interlock);
 	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
 		simple_lock(&vnode_free_list_slock);
@@ -333,7 +337,6 @@ vhold(struct vnode *vp)
 		simple_unlock(&vnode_free_list_slock);
 	}
 	vp->v_holdcnt++;
-	simple_unlock(&vp->v_interlock);
 }
 
 /*
@@ -349,6 +352,30 @@ vref(struct vnode *vp)
 }
 #endif /* DIAGNOSTIC */
 
+/*
+ * decrease buf or page ref
+ */
+static __inline void
+holdrele(struct vnode *vp)
+{
+
+	simple_lock(&vp->v_interlock);
+	holdrelel(vp);
+	simple_unlock(&vp->v_interlock);
+}
+
+/*
+ * increase buf or page ref
+ */
+static __inline void
+vhold(struct vnode *vp)
+{
+
+	simple_lock(&vp->v_interlock);
+	vholdl(vp);
+	simple_unlock(&vp->v_interlock);
+}
+
 #define	NULLVP	((struct vnode *)NULL)
 
 #define	VN_KNOTE(vp, b)		KNOTE(&vp->v_klist, (b))