Subject: Re: more on mysql benchmark
To: None <soda@sra.co.jp>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 03/10/2005 06:46:05
--NextPart-20050310064325-0029300
Content-Type: Text/Plain; charset=us-ascii

hi,

> At memory shortage condition, sum > 100% makes the page daemon
> abandon page-access-history due to the page-queue-reordering effect.
> That's one of things that I'd like to avoid.

i don't think tweaking sysctl is a right way to avoid it.

it reminds me an old patch in my local tree.  (attached)
i suspended it because i don't think tuning page balancing etc makes
much sense without fixing PR/27030.

YAMAMOTO Takashi

--NextPart-20050310064325-0029300
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="fbsdbalance.diff"

Index: uvm_page.c
===================================================================
--- uvm_page.c	(revision 947)
+++ uvm_page.c	(working copy)
@@ -1414,6 +1414,7 @@ uvm_pagefree(pg)
 	    pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN];
 
 	pg->pqflags = PQ_FREE;
+	pg->active_count = 0;
 #ifdef DEBUG
 	pg->uobject = (void *)0xdeadbeef;
 	pg->offset = 0xdeadbeef;
Index: uvm_page.h
===================================================================
--- uvm_page.h	(revision 947)
+++ uvm_page.h	(working copy)
@@ -131,7 +131,9 @@ struct vm_page {
 						 * to read: [O or P]
 						 * to modify: [O _and_ P] */
 	uint16_t		wire_count;	/* wired down map refs [P] */
-	uint16_t		pqflags;	/* page queue flags [P] */
+	uint8_t			pqflags;	/* page queue flags [P] */
+	uint8_t			active_count;	/* logical position in the
+						 * active queue [P] */
 	paddr_t			phys_addr;	/* physical address of page */
 
 #ifdef __HAVE_VM_PAGE_MD
@@ -181,6 +183,22 @@ struct vm_page {
 					   uvm_object */
 #define PQ_SWAPBACKED	(PQ_ANON|PQ_AOBJ)
 
+/* active_count values */
+#define	UVM_ACT_INIT	3
+#define	UVM_ACT_DECLINE	1
+#define	UVM_ACT_ADVANCE	5
+#define	UVM_ACT_MAX	65
+
+#define	UVM_PAGEACT_ADD(pg, dif) \
+	do { \
+		(pg)->active_count = \
+		MIN((pg)->active_count + (dif), UVM_ACT_MAX); \
+	} while (/* CONSTCOND */ 0)
+#define	UVM_PAGEACT_SUB(pg, dif) \
+	do { \
+		(pg)->active_count -= MIN(p->active_count, (dif)); \
+	} while (/* CONSTCOND */ 0)
+
 /*
  * physical memory layout structure
  *
@@ -262,6 +280,7 @@ vaddr_t uvm_pageboot_alloc(vsize_t);
 PAGE_INLINE void uvm_pagecopy(struct vm_page *, struct vm_page *);
 PAGE_INLINE void uvm_pagedeactivate(struct vm_page *);
 PAGE_INLINE void uvm_pagedequeue(struct vm_page *);
+PAGE_INLINE void uvm_pagerequeue(struct vm_page *);
 void uvm_pagefree(struct vm_page *);
 void uvm_page_unbusy(struct vm_page **, int);
 PAGE_INLINE struct vm_page *uvm_pagelookup(struct uvm_object *, voff_t);
Index: uvm_pdaemon.c
===================================================================
--- uvm_pdaemon.c	(revision 920)
+++ uvm_pdaemon.c	(working copy)
@@ -197,7 +197,7 @@ uvmpd_tune(void)
 void
 uvm_pageout(void *arg)
 {
-	int bufcnt, npages = 0;
+	int npages = 0;
 	UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
 
 	UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
@@ -217,11 +217,13 @@ uvm_pageout(void *arg)
 	 */
 
 	for (;;) {
+		int free_shortage;
+
 		simple_lock(&uvm.pagedaemon_lock);
 
 		UVMHIST_LOG(pdhist,"  <<SLEEPING>>",0,0,0,0);
 		UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
-		    &uvm.pagedaemon_lock, FALSE, "pgdaemon", 0);
+		    &uvm.pagedaemon_lock, FALSE, "pgdaemon", 5 * hz);
 		uvmexp.pdwoke++;
 		UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
 
@@ -244,9 +246,7 @@ uvm_pageout(void *arg)
 		 * Estimate a hint.  Note that bufmem are returned to
 		 * system only when entire pool page is empty.
 		 */
-		bufcnt = uvmexp.freetarg - uvmexp.free;
-		if (bufcnt < 0)
-			bufcnt = 0;
+		free_shortage = uvmexp.freetarg - uvmexp.free;
 
 		UVMHIST_LOG(pdhist,"  free/ftarg=%d/%d, inact/itarg=%d/%d",
 		    uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
@@ -277,8 +277,12 @@ uvm_pageout(void *arg)
 
 		uvm_unlock_pageq();
 
-		buf_drain(bufcnt << PAGE_SHIFT);
+		if (free_shortage <= 0) {
+			continue;
+		}
 
+		buf_drain(free_shortage << PAGE_SHIFT);
+
 		/*
 		 * drain pool resources now that we're not holding any locks
 		 */
@@ -454,6 +458,7 @@ uvmpd_scan_inactive(pglst)
 
 			if (pmap_clear_reference(p)) {
 				uvm_pageactivate(p);
+				UVM_PAGEACT_ADD(p, UVM_ACT_ADVANCE);
 				uvmexp.pdreact++;
 				continue;
 			}
@@ -755,10 +760,11 @@ void
 uvmpd_scan(void)
 {
 	int inactive_shortage, swap_shortage, pages_freed;
-	struct vm_page *p, *nextpg;
+	struct vm_page *p;
 	struct uvm_object *uobj;
 	struct vm_anon *anon;
 	struct simplelock *slock;
+	struct vm_page marker;
 	UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
 
 	uvmexp.pdrevs++;
@@ -793,8 +799,10 @@ uvmpd_scan(void)
 	UVMHIST_LOG(pdhist, "  starting 'free' loop",0,0,0,0);
 
 	pages_freed = uvmexp.pdfreed;
-	uvmpd_scan_inactive(&uvm.page_inactive);
-	pages_freed = uvmexp.pdfreed - pages_freed;
+	if (uvmexp.free < uvmexp.freetarg) { /* XXX */
+		uvmpd_scan_inactive(&uvm.page_inactive);
+		pages_freed = uvmexp.pdfreed - pages_freed;
+	}
 
 	/*
 	 * we have done the scan to get free pages.   now we work on meeting
@@ -818,15 +826,43 @@ uvmpd_scan(void)
 
 	UVMHIST_LOG(pdhist, "  loop 2: inactive_shortage=%d swap_shortage=%d",
 		    inactive_shortage, swap_shortage,0,0);
-	for (p = TAILQ_FIRST(&uvm.page_active);
-	     p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
-	     p = nextpg) {
-		nextpg = TAILQ_NEXT(p, pageq);
+	marker.flags = PG_BUSY;
+
+	KASSERT((curproc->p_flag & P_SYSTEM) != 0); /* otherwise need PHOLD */
+	TAILQ_INSERT_TAIL(&uvm.page_active, &marker, pageq);
+	while ((inactive_shortage > 0 || swap_shortage > 0) &&
+	    (p = TAILQ_FIRST(&uvm.page_active)) != &marker) {
+
+		/* no need to check wire_count as pg is "active" */
+		KASSERT(p->wire_count == 0);
+
+		uvm_pagerequeue(p);
+
 		if (p->flags & PG_BUSY) {
 			continue;
 		}
 
 		/*
+		 * update page activity accounting.
+		 *
+		 * if there's a shortage of inactive pages, deactivate.
+		 */
+
+		if (pmap_clear_reference(p)) {
+			UVM_PAGEACT_ADD(p, UVM_ACT_ADVANCE);
+		} else if (p->active_count > 0) {
+			UVM_PAGEACT_SUB(p, UVM_ACT_DECLINE);
+		} else if (inactive_shortage > 0) {
+			uvm_pagedeactivate(p);
+			uvmexp.pddeact++;
+			inactive_shortage--;
+		}
+
+		if (swap_shortage <= 0) {
+			continue;
+		}
+
+		/*
 		 * lock the page's owner.
 		 */
 
@@ -884,20 +920,45 @@ uvmpd_scan(void)
 		}
 
 		/*
-		 * if there's a shortage of inactive pages, deactivate.
-		 */
-
-		if (inactive_shortage > 0) {
-			/* no need to check wire_count as pg is "active" */
-			uvm_pagedeactivate(p);
-			uvmexp.pddeact++;
-			inactive_shortage--;
-		}
-
-		/*
 		 * we're done with this page.
 		 */
 
 		simple_unlock(slock);
 	}
+	TAILQ_REMOVE(&uvm.page_active, &marker, pageq);
 }
+#if 1
+void uvm_printact(void);
+void uvm_printq(const char *, const struct pglist *);
+
+void
+uvm_printact()
+{
+
+	uvm_printq("active", &uvm.page_active);
+	uvm_printq("inactive", &uvm.page_inactive);
+}
+
+void
+uvm_printq(const char *name, const struct pglist *list)
+{
+	const struct vm_page *pg;
+	int counts[UVM_ACT_MAX+1];
+	int i;
+	int inval = 0;
+
+	memset(&counts, 0, sizeof(counts));
+	printf("%s:\n", name);
+	TAILQ_FOREACH(pg, list, pageq) {
+		if (pg->active_count > UVM_ACT_MAX)
+			inval++;
+		else
+			counts[pg->active_count]++;
+	}
+	for (i = 0; i <= UVM_ACT_MAX; i++)
+		printf(" %06d\n", counts[i]);
+	if (inval)
+		printf(" inval=%d\n", inval);
+	//printf("\n");
+}
+#endif
Index: uvm_page_i.h
===================================================================
--- uvm_page_i.h	(revision 703)
+++ uvm_page_i.h	(working copy)
@@ -219,12 +219,17 @@ uvm_pageactivate(pg)
 	struct vm_page *pg;
 {
 	UVM_LOCK_ASSERT_PAGEQ();
-	uvm_pagedequeue(pg);
-	if (pg->wire_count == 0) {
-		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
-		pg->pqflags |= PQ_ACTIVE;
-		uvmexp.active++;
+	if ((pg->pqflags & PQ_ACTIVE) == 0) {
+		uvm_pagedequeue(pg);
+		if (pg->wire_count == 0) {
+			TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
+			pg->pqflags |= PQ_ACTIVE;
+			uvmexp.active++;
+		}
 	}
+	if (pg->active_count < UVM_ACT_INIT) {
+		pg->active_count = UVM_ACT_INIT;
+	}
 }
 
 /*
@@ -246,6 +251,33 @@ uvm_pagedequeue(pg)
 		pg->pqflags &= ~PQ_INACTIVE;
 		uvmexp.inactive--;
 	}
+}
+
+/*
+ * uvm_pagerequeue: move a page to the top of the queue
+ */
+
+PAGE_INLINE void
+uvm_pagerequeue(pg)
+	struct vm_page *pg;
+{
+	struct pglist *q;
+
+	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(pg->wire_count == 0);
+	if (pg->pqflags & PQ_ACTIVE) {
+		q = &uvm.page_active;
+	} else {
+		q = &uvm.page_inactive;
+#if defined(DIAGNOSTIC)
+		if ((pg->pqflags & PQ_INACTIVE) == 0) {
+			panic("uvm_pagerequeue: not on queue");
+		}
+#endif /* defined(DIAGNOSTIC) */
+	}
+
+	TAILQ_REMOVE(q, pg, pageq);
+	TAILQ_INSERT_TAIL(q, pg, pageq);
 }
 
 /*

--NextPart-20050310064325-0029300--