Subject: zeroed page recycling
To: None <tech-kern@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 10/29/2003 00:41:46
--NextPart-20031029003206-2364600
Content-Type: Text/Plain; charset=us-ascii

hi,

some subsystems know the page that is about to be freed is zero-filled.
however, there's no way to tell it to uvm page allocator.
attached patches allow subsystems (in this case, i386 pmap) to
do it by setting PG_ZERO flag when freeing the page.

comments?

YAMAMOTO Takashi

--NextPart-20031029003206-2364600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="uvm.pgzero.diff"

Index: /sys/uvm/uvm_page.c
===================================================================
--- /sys/uvm/uvm_page.c	(revision 171)
+++ /sys/uvm/uvm_page.c	(working copy)
@@ -1218,6 +1218,8 @@ uvm_pagefree(pg)
 	struct vm_page *pg;
 {
 	int s;
+	struct pglist *pgfl;
+	boolean_t iszero;
 
 	KASSERT((pg->flags & PG_PAGEOUT) == 0);
 	LOCK_ASSERT(simple_lock_held(&uvm.pageqlock) ||
@@ -1306,19 +1308,23 @@ uvm_pagefree(pg)
 	 * and put on free queue
 	 */
 
-	pg->flags &= ~PG_ZERO;
+	iszero = (pg->flags & PG_ZERO);
+	pgfl = &uvm.page_free[uvm_page_lookup_freelist(pg)].
+	    pgfl_buckets[VM_PGCOLOR_BUCKET(pg)].
+	    pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN];
 
-	s = uvm_lock_fpageq();
-	TAILQ_INSERT_TAIL(&uvm.page_free[
-	    uvm_page_lookup_freelist(pg)].pgfl_buckets[
-	    VM_PGCOLOR_BUCKET(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
 	pg->pqflags = PQ_FREE;
 #ifdef DEBUG
 	pg->uobject = (void *)0xdeadbeef;
 	pg->offset = 0xdeadbeef;
 	pg->uanon = (void *)0xdeadbeef;
 #endif
+
+	s = uvm_lock_fpageq();
+	TAILQ_INSERT_TAIL(pgfl, pg, pageq);
 	uvmexp.free++;
+	if (iszero)
+		uvmexp.zeropages++;
 
 	if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
 		uvm.page_idle_zero = vm_page_zero_enable;
Index: /sys/uvm/uvm_pglist.c
===================================================================
--- /sys/uvm/uvm_pglist.c	(revision 294)
+++ /sys/uvm/uvm_pglist.c	(working copy)
@@ -468,13 +468,23 @@ uvm_pglistfree(list)
 
 	s = uvm_lock_fpageq();
 	while ((pg = TAILQ_FIRST(list)) != NULL) {
+		boolean_t iszero;
+
 		KASSERT((pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) == 0);
 		TAILQ_REMOVE(list, pg, pageq);
+		iszero = (pg->flags & PG_ZERO);
 		pg->pqflags = PQ_FREE;
+#ifdef DEBUG
+		pg->uobject = (void *)0xdeadbeef;
+		pg->offset = 0xdeadbeef;
+		pg->uanon = (void *)0xdeadbeef;
+#endif
 		TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)].
 		    pgfl_buckets[VM_PGCOLOR_BUCKET(pg)].
-		    pgfl_queues[PGFL_UNKNOWN], pg, pageq);
+		    pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN], pg, pageq);
 		uvmexp.free++;
+		if (iszero)
+			uvmexp.zeropages++;
 		if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
 			uvm.page_idle_zero = vm_page_zero_enable;
 		STAT_DECR(uvm_pglistalloc_npages);

--NextPart-20031029003206-2364600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="i386.pmap.pgzero.diff"

Index: /sys/arch/i386/i386/pmap.c
===================================================================
--- /sys/arch/i386/i386/pmap.c	(revision 374)
+++ /sys/arch/i386/i386/pmap.c	(working copy)
@@ -1738,7 +1738,6 @@ void
 pmap_destroy(pmap)
 	struct pmap *pmap;
 {
-	struct vm_page *pg;
 	int refs;
 
 	/*
@@ -1765,15 +1764,11 @@ pmap_destroy(pmap)
 	simple_unlock(&pmaps_lock);
 
 	/*
-	 * free any remaining PTPs
+	 * destroyed pmap shouldn't have remaining PTPs
 	 */
 
-	while ((pg = TAILQ_FIRST(&pmap->pm_obj.memq)) != NULL) {
-		KASSERT((pg->flags & PG_BUSY) == 0);
-
-		pg->wire_count = 0;
-		uvm_pagefree(pg);
-	}
+	KASSERT(pmap->pm_obj.uo_npages == 0);
+	KASSERT(TAILQ_EMPTY(&pmap->pm_obj.memq));
 
 	/*
 	 * MULTIPROCESSOR -- no need to flush out of other processors'
@@ -2294,8 +2289,30 @@ pmap_remove_pte(pmap, ptp, pte, va, cpum
 		pmap->pm_stats.wired_count--;
 	pmap->pm_stats.resident_count--;
 
-	if (ptp)
+	if (ptp) {
 		ptp->wire_count--;		/* dropping a PTE */
+#ifdef DEBUG
+		if (ptp->wire_count == 1) {
+			/*
+			 * make sure the PTP is zero filled.
+			 *
+			 * XXXyamt
+			 * it's better to do this check
+			 * in uvm rather than here.
+			 */
+			const char *cp =
+			    (const char *)(((vaddr_t)pte) & ~PAGE_MASK);
+			const char *ep = cp + PAGE_SIZE;
+
+			while (cp < ep) {
+				if (*cp != 0)
+					panic("pmap_remove_pte: "
+					    "free ptp not zero-filled");
+				cp++;
+			}
+		}
+#endif /* DEBUG */
+	}
 
 	pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
 
@@ -2440,6 +2457,7 @@ pmap_do_remove(pmap, sva, eva, flags)
 					pmap->pm_ptphint =
 					    TAILQ_FIRST(&pmap->pm_obj.memq);
 				ptp->wire_count = 0;
+				ptp->flags |= PG_ZERO;
 				uvm_pagefree(ptp);
 			}
 		}
@@ -2530,6 +2548,7 @@ pmap_do_remove(pmap, sva, eva, flags)
 			if (pmap->pm_ptphint == ptp)	/* update hint? */
 				pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first;
 			ptp->wire_count = 0;
+			ptp->flags |= PG_ZERO;
 			uvm_pagefree(ptp);
 		}
 	}
@@ -2609,6 +2628,27 @@ pmap_page_remove(pg)
 		if (pve->pv_ptp) {
 			pve->pv_ptp->wire_count--;
 			if (pve->pv_ptp->wire_count <= 1) {
+#ifdef DEBUG
+				/*
+				 * make sure the PTP is zero filled.
+				 *
+				 * XXXyamt
+				 * it's better to do this check
+				 * in uvm rather than here.
+				 */
+				const char *cp =
+				    (const char *)
+				    (((vaddr_t)ptes[x86_btop(pve->pv_va)]) &
+				    ~PAGE_MASK);
+				const char *ep = cp + PAGE_SIZE;
+
+				while (cp < ep) {
+					if (*cp != 0)
+						panic("pmap_page_remove: "
+						    "free ptp not zero-filled");
+					cp++;
+				}
+#endif /* DEBUG */
 				/* zap! */
 				opte = x86_atomic_testset_ul(
 				    &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)],
@@ -2631,6 +2671,7 @@ pmap_page_remove(pg)
 					pve->pv_pmap->pm_ptphint =
 					    pve->pv_pmap->pm_obj.memq.tqh_first;
 				pve->pv_ptp->wire_count = 0;
+				pve->pv_ptp->flags |= PG_ZERO;
 				uvm_pagefree(pve->pv_ptp);
 			}
 		}

--NextPart-20031029003206-2364600--