Subject: Re: more on mysql benchmark
To: None <soda@sra.co.jp>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 03/10/2005 06:46:05
--NextPart-20050310064325-0029300
Content-Type: Text/Plain; charset=us-ascii
hi,
> At memory shortage condition, sum > 100% makes the page daemon
> abandon page-access-history due to the page-queue-reordering effect.
> That's one of things that I'd like to avoid.
i don't think tweaking sysctl is a right way to avoid it.
it reminds me an old patch in my local tree. (attached)
i suspended it because i don't think tuning page balancing etc makes
much sense without fixing PR/27030.
YAMAMOTO Takashi
--NextPart-20050310064325-0029300
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="fbsdbalance.diff"
Index: uvm_page.c
===================================================================
--- uvm_page.c (revision 947)
+++ uvm_page.c (working copy)
@@ -1414,6 +1414,7 @@ uvm_pagefree(pg)
pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN];
pg->pqflags = PQ_FREE;
+ pg->active_count = 0;
#ifdef DEBUG
pg->uobject = (void *)0xdeadbeef;
pg->offset = 0xdeadbeef;
Index: uvm_page.h
===================================================================
--- uvm_page.h (revision 947)
+++ uvm_page.h (working copy)
@@ -131,7 +131,9 @@ struct vm_page {
* to read: [O or P]
* to modify: [O _and_ P] */
uint16_t wire_count; /* wired down map refs [P] */
- uint16_t pqflags; /* page queue flags [P] */
+ uint8_t pqflags; /* page queue flags [P] */
+ uint8_t active_count; /* logical position in the
+ * active queue [P] */
paddr_t phys_addr; /* physical address of page */
#ifdef __HAVE_VM_PAGE_MD
@@ -181,6 +183,22 @@ struct vm_page {
uvm_object */
#define PQ_SWAPBACKED (PQ_ANON|PQ_AOBJ)
+/* active_count values */
+#define UVM_ACT_INIT 3
+#define UVM_ACT_DECLINE 1
+#define UVM_ACT_ADVANCE 5
+#define UVM_ACT_MAX 65
+
+#define UVM_PAGEACT_ADD(pg, dif) \
+ do { \
+ (pg)->active_count = \
+ MIN((pg)->active_count + (dif), UVM_ACT_MAX); \
+ } while (/* CONSTCOND */ 0)
+#define UVM_PAGEACT_SUB(pg, dif) \
+ do { \
+ (pg)->active_count -= MIN(p->active_count, (dif)); \
+ } while (/* CONSTCOND */ 0)
+
/*
* physical memory layout structure
*
@@ -262,6 +280,7 @@ vaddr_t uvm_pageboot_alloc(vsize_t);
PAGE_INLINE void uvm_pagecopy(struct vm_page *, struct vm_page *);
PAGE_INLINE void uvm_pagedeactivate(struct vm_page *);
PAGE_INLINE void uvm_pagedequeue(struct vm_page *);
+PAGE_INLINE void uvm_pagerequeue(struct vm_page *);
void uvm_pagefree(struct vm_page *);
void uvm_page_unbusy(struct vm_page **, int);
PAGE_INLINE struct vm_page *uvm_pagelookup(struct uvm_object *, voff_t);
Index: uvm_pdaemon.c
===================================================================
--- uvm_pdaemon.c (revision 920)
+++ uvm_pdaemon.c (working copy)
@@ -197,7 +197,7 @@ uvmpd_tune(void)
void
uvm_pageout(void *arg)
{
- int bufcnt, npages = 0;
+ int npages = 0;
UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
@@ -217,11 +217,13 @@ uvm_pageout(void *arg)
*/
for (;;) {
+ int free_shortage;
+
simple_lock(&uvm.pagedaemon_lock);
UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
- &uvm.pagedaemon_lock, FALSE, "pgdaemon", 0);
+ &uvm.pagedaemon_lock, FALSE, "pgdaemon", 5 * hz);
uvmexp.pdwoke++;
UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
@@ -244,9 +246,7 @@ uvm_pageout(void *arg)
* Estimate a hint. Note that bufmem are returned to
* system only when entire pool page is empty.
*/
- bufcnt = uvmexp.freetarg - uvmexp.free;
- if (bufcnt < 0)
- bufcnt = 0;
+ free_shortage = uvmexp.freetarg - uvmexp.free;
UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d",
uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
@@ -277,8 +277,12 @@ uvm_pageout(void *arg)
uvm_unlock_pageq();
- buf_drain(bufcnt << PAGE_SHIFT);
+ if (free_shortage <= 0) {
+ continue;
+ }
+ buf_drain(free_shortage << PAGE_SHIFT);
+
/*
* drain pool resources now that we're not holding any locks
*/
@@ -454,6 +458,7 @@ uvmpd_scan_inactive(pglst)
if (pmap_clear_reference(p)) {
uvm_pageactivate(p);
+ UVM_PAGEACT_ADD(p, UVM_ACT_ADVANCE);
uvmexp.pdreact++;
continue;
}
@@ -755,10 +760,11 @@ void
uvmpd_scan(void)
{
int inactive_shortage, swap_shortage, pages_freed;
- struct vm_page *p, *nextpg;
+ struct vm_page *p;
struct uvm_object *uobj;
struct vm_anon *anon;
struct simplelock *slock;
+ struct vm_page marker;
UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
uvmexp.pdrevs++;
@@ -793,8 +799,10 @@ uvmpd_scan(void)
UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
pages_freed = uvmexp.pdfreed;
- uvmpd_scan_inactive(&uvm.page_inactive);
- pages_freed = uvmexp.pdfreed - pages_freed;
+ if (uvmexp.free < uvmexp.freetarg) { /* XXX */
+ uvmpd_scan_inactive(&uvm.page_inactive);
+ pages_freed = uvmexp.pdfreed - pages_freed;
+ }
/*
* we have done the scan to get free pages. now we work on meeting
@@ -818,15 +826,43 @@ uvmpd_scan(void)
UVMHIST_LOG(pdhist, " loop 2: inactive_shortage=%d swap_shortage=%d",
inactive_shortage, swap_shortage,0,0);
- for (p = TAILQ_FIRST(&uvm.page_active);
- p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
- p = nextpg) {
- nextpg = TAILQ_NEXT(p, pageq);
+ marker.flags = PG_BUSY;
+
+ KASSERT((curproc->p_flag & P_SYSTEM) != 0); /* otherwise need PHOLD */
+ TAILQ_INSERT_TAIL(&uvm.page_active, &marker, pageq);
+ while ((inactive_shortage > 0 || swap_shortage > 0) &&
+ (p = TAILQ_FIRST(&uvm.page_active)) != &marker) {
+
+ /* no need to check wire_count as pg is "active" */
+ KASSERT(p->wire_count == 0);
+
+ uvm_pagerequeue(p);
+
if (p->flags & PG_BUSY) {
continue;
}
/*
+ * update page activity accounting.
+ *
+ * if there's a shortage of inactive pages, deactivate.
+ */
+
+ if (pmap_clear_reference(p)) {
+ UVM_PAGEACT_ADD(p, UVM_ACT_ADVANCE);
+ } else if (p->active_count > 0) {
+ UVM_PAGEACT_SUB(p, UVM_ACT_DECLINE);
+ } else if (inactive_shortage > 0) {
+ uvm_pagedeactivate(p);
+ uvmexp.pddeact++;
+ inactive_shortage--;
+ }
+
+ if (swap_shortage <= 0) {
+ continue;
+ }
+
+ /*
* lock the page's owner.
*/
@@ -884,20 +920,45 @@ uvmpd_scan(void)
}
/*
- * if there's a shortage of inactive pages, deactivate.
- */
-
- if (inactive_shortage > 0) {
- /* no need to check wire_count as pg is "active" */
- uvm_pagedeactivate(p);
- uvmexp.pddeact++;
- inactive_shortage--;
- }
-
- /*
* we're done with this page.
*/
simple_unlock(slock);
}
+ TAILQ_REMOVE(&uvm.page_active, &marker, pageq);
}
+#if 1
+void uvm_printact(void);
+void uvm_printq(const char *, const struct pglist *);
+
+void
+uvm_printact()
+{
+
+ uvm_printq("active", &uvm.page_active);
+ uvm_printq("inactive", &uvm.page_inactive);
+}
+
+void
+uvm_printq(const char *name, const struct pglist *list)
+{
+ const struct vm_page *pg;
+ int counts[UVM_ACT_MAX+1];
+ int i;
+ int inval = 0;
+
+ memset(&counts, 0, sizeof(counts));
+ printf("%s:\n", name);
+ TAILQ_FOREACH(pg, list, pageq) {
+ if (pg->active_count > UVM_ACT_MAX)
+ inval++;
+ else
+ counts[pg->active_count]++;
+ }
+ for (i = 0; i <= UVM_ACT_MAX; i++)
+ printf(" %06d\n", counts[i]);
+ if (inval)
+ printf(" inval=%d\n", inval);
+ //printf("\n");
+}
+#endif
Index: uvm_page_i.h
===================================================================
--- uvm_page_i.h (revision 703)
+++ uvm_page_i.h (working copy)
@@ -219,12 +219,17 @@ uvm_pageactivate(pg)
struct vm_page *pg;
{
UVM_LOCK_ASSERT_PAGEQ();
- uvm_pagedequeue(pg);
- if (pg->wire_count == 0) {
- TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
- pg->pqflags |= PQ_ACTIVE;
- uvmexp.active++;
+ if ((pg->pqflags & PQ_ACTIVE) == 0) {
+ uvm_pagedequeue(pg);
+ if (pg->wire_count == 0) {
+ TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
+ pg->pqflags |= PQ_ACTIVE;
+ uvmexp.active++;
+ }
}
+ if (pg->active_count < UVM_ACT_INIT) {
+ pg->active_count = UVM_ACT_INIT;
+ }
}
/*
@@ -246,6 +251,33 @@ uvm_pagedequeue(pg)
pg->pqflags &= ~PQ_INACTIVE;
uvmexp.inactive--;
}
+}
+
+/*
+ * uvm_pagerequeue: move a page to the top of the queue
+ */
+
+PAGE_INLINE void
+uvm_pagerequeue(pg)
+ struct vm_page *pg;
+{
+ struct pglist *q;
+
+ UVM_LOCK_ASSERT_PAGEQ();
+ KASSERT(pg->wire_count == 0);
+ if (pg->pqflags & PQ_ACTIVE) {
+ q = &uvm.page_active;
+ } else {
+ q = &uvm.page_inactive;
+#if defined(DIAGNOSTIC)
+ if ((pg->pqflags & PQ_INACTIVE) == 0) {
+ panic("uvm_pagerequeue: not on queue");
+ }
+#endif /* defined(DIAGNOSTIC) */
+ }
+
+ TAILQ_REMOVE(q, pg, pageq);
+ TAILQ_INSERT_TAIL(q, pg, pageq);
}
/*
--NextPart-20050310064325-0029300--