Subject: Re: port-i386/37193 (x86 pmap concurrency strategy could use
To: None <port-i386-maintainer@netbsd.org, netbsd-bugs@netbsd.org,>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: port-xen
Date: 01/10/2008 20:01:52
> Synopsis: x86 pmap concurrency strategy could use improvement
> 
> Responsible-Changed-From-To: port-i386-maintainer->yamt
> Responsible-Changed-By: yamt@netbsd.org
> Responsible-Changed-When: Wed, 09 Jan 2008 11:45:50 +0000
> Responsible-Changed-Why:
> i'll take a look.

here's a patch.  can anyone test it on xen+amd64?

YAMAMOTO Takashi

Index: i386/include/pmap.h
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/include/pmap.h,v
retrieving revision 1.97
diff -u -p -r1.97 pmap.h
--- i386/include/pmap.h	28 Nov 2007 16:44:46 -0000	1.97
+++ i386/include/pmap.h	10 Jan 2008 10:57:25 -0000
@@ -264,6 +264,7 @@
 #define pmap_pa2pte(a)			(a)
 #define pmap_pte2pa(a)			((a) & PG_FRAME)
 #define pmap_pte_set(p, n)		do { *(p) = (n); } while (0)
+#define pmap_pte_cas(p, o, n)		atomic_cas_32((p), (o), (n))
 #define pmap_pte_testset(p, n)		\
     atomic_swap_ulong((volatile unsigned long *)p, n)
 #define pmap_pte_setbits(p, b)		\
Index: amd64/include/pmap.h
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/include/pmap.h,v
retrieving revision 1.18
diff -u -p -r1.18 pmap.h
--- amd64/include/pmap.h	3 Jan 2008 19:30:10 -0000	1.18
+++ amd64/include/pmap.h	10 Jan 2008 10:57:25 -0000
@@ -257,6 +257,7 @@
 #define pmap_pa2pte(a)			(a)
 #define pmap_pte2pa(a)			((a) & PG_FRAME)
 #define pmap_pte_set(p, n)		do { *(p) = (n); } while (0)
+#define pmap_pte_cas(p, o, n)		atomic_cas_64((p), (o), (n))
 #define pmap_pte_testset(p, n)		\
     atomic_swap_ulong((volatile unsigned long *)p, n)
 #define pmap_pte_setbits(p, b)		\
@@ -285,6 +286,20 @@ pmap_pte_set(pt_entry_t *pte, pt_entry_t
 }
 
 static __inline pt_entry_t
+pmap_pte_cas(pt_entry_t *ptep, pt_entry_t o, pt_entry_t n)
+{
+	int s = splvm();
+	pt_entry_t opte = *ptep;
+
+	if (opte == o) {
+		xpq_queue_pte_update((pt_entry_t *)xpmap_ptetomach(ptep), n);
+		xpq_flush_queue();
+	}
+	splx(s);
+	return opte;
+}
+
+static __inline pt_entry_t
 pmap_pte_testset(volatile pt_entry_t *pte, pt_entry_t npte)
 {
 	int s = splvm();
Index: x86/x86/pmap.c
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/x86/pmap.c,v
retrieving revision 1.23
diff -u -p -r1.23 pmap.c
--- x86/x86/pmap.c	9 Jan 2008 11:13:16 -0000	1.23
+++ x86/x86/pmap.c	10 Jan 2008 10:57:25 -0000
@@ -346,7 +346,6 @@ pd_entry_t *normal_pdes[] = PDES_INITIAL
 pd_entry_t *alternate_pdes[] = APDES_INITIALIZER;
 
 static kmutex_t pmaps_lock;
-static krwlock_t pmap_main_lock;
 
 static vaddr_t pmap_maxkvaddr;
 
@@ -555,6 +554,32 @@ static bool		 pmap_reactivate(struct pma
  * p m a p   h e l p e r   f u n c t i o n s
  */
 
+static inline void
+pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff)
+{
+
+	if (pmap == pmap_kernel()) {
+		atomic_add_long(&pmap->pm_stats.resident_count, resid_diff);
+		atomic_add_long(&pmap->pm_stats.wired_count, wired_diff);
+	} else {
+		KASSERT(mutex_owned(&pmap->pm_lock));
+		pmap->pm_stats.resident_count += resid_diff;
+		pmap->pm_stats.wired_count += wired_diff;
+	}
+}
+
+static inline void
+pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
+{
+	int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0);
+	int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0);
+
+	KASSERT((npte & (PG_V | PG_W)) != PG_W);
+	KASSERT((opte & (PG_V | PG_W)) != PG_W);
+
+	pmap_stats_update(pmap, resid_diff, wired_diff);
+}
+
 /*
  * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
  *		of course the kernel is always loaded
@@ -1306,7 +1331,6 @@ pmap_bootstrap(vaddr_t kva_start)
 	 *	again is never taken from interrupt context.
 	 */
 
-	rw_init(&pmap_main_lock);
 	mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
 	LIST_INIT(&pmaps);
 	pmap_cpu_init_early(curcpu());
@@ -1512,6 +1536,8 @@ pmap_enter_pv(struct pv_head *pvh,
 	      vaddr_t va,
 	      struct vm_page *ptp)	/* PTP in pmap that maps this VA */
 {
+
+	KASSERT(mutex_owned(&pvh->pvh_lock));
 	pve->pv_pmap = pmap;
 	pve->pv_va = va;
 	pve->pv_ptp = ptp;			/* NULL for kernel pmap */
@@ -1533,6 +1559,7 @@ pmap_remove_pv(struct pv_head *pvh, stru
 {
 	struct pv_entry tmp, *pve;
 
+	KASSERT(mutex_owned(&pvh->pvh_lock));
 	tmp.pv_pmap = pmap;
 	tmp.pv_va = va;
 	pve = SPLAY_FIND(pvtree, &pvh->pvh_root, &tmp);
@@ -1576,7 +1603,7 @@ pmap_freepage(struct pmap *pmap, struct 
 	lidx = level - 1;
 
 	obj = &pmap->pm_obj[lidx];
-	pmap->pm_stats.resident_count--;
+	pmap_stats_update(pmap, -1, 0);
 	if (lidx != 0)
 		mutex_enter(&obj->vmobjlock);
 	if (pmap->pm_ptphint[lidx] == ptp)
@@ -1706,7 +1733,7 @@ pmap_get_ptp(struct pmap *pmap, vaddr_t 
 		}
 #endif /* XEN */
 		pmap_pte_flush();
-		pmap->pm_stats.resident_count++;
+		pmap_stats_update(pmap, 1, 0);
 		/*
 		 * If we're not in the top level, increase the
 		 * wire count of the parent page.
@@ -2496,6 +2523,7 @@ pmap_extract_ma(pmap, va, pap)
  
 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
 	if (!pmap_pdes_valid(va, pdes, &pde)) {
+		pmap_unmap_ptes(pmap, pmap2);
 		return false;
 	}
  
@@ -2644,6 +2672,53 @@ pmap_copy_page(paddr_t srcpa, paddr_t ds
 #endif
 }
 
+static pt_entry_t *
+pmap_map_ptp(struct vm_page *ptp)
+{
+	pt_entry_t *ptppte;
+	void *ptpva;
+	int id;
+
+	id = cpu_number();
+	ptppte = PTESLEW(ptp_pte, id);
+	ptpva = VASLEW(ptpp, id);
+	pmap_pte_set(ptppte, pmap_pa2pte(VM_PAGE_TO_PHYS(ptp)) |
+	    PG_V | PG_RW | PG_M | PG_U | PG_k);
+	pmap_pte_flush();
+	pmap_update_pg((vaddr_t)ptpva);
+
+	return (pt_entry_t *)ptpva;
+}
+
+static void
+pmap_unmap_ptp(void)
+{
+#if defined(DIAGNOSTIC) || defined(XEN)
+	pt_entry_t *pte;
+
+	pte = PTESLEW(ptp_pte, cpu_number());
+	pmap_pte_set(pte, 0);
+	pmap_pte_flush();
+#endif
+}
+
+static pt_entry_t *
+pmap_map_pte(struct vm_page *ptp, vaddr_t va)
+{
+
+	if (ptp == NULL) {
+		return kvtopte(va);
+	}
+	return pmap_map_ptp(ptp) + pl1_pi(va);
+}
+
+static void
+pmap_unmap_pte(void)
+{
+
+	pmap_unmap_ptp();
+}
+
 /*
  * p m a p   r e m o v e   f u n c t i o n s
  *
@@ -2695,9 +2770,7 @@ pmap_remove_ptes(struct pmap *pmap, stru
 		pmap_exec_account(pmap, startva, opte, 0);
 		KASSERT(pmap_valid_entry(opte));
 
-		if (opte & PG_W)
-			pmap->pm_stats.wired_count--;
-		pmap->pm_stats.resident_count--;
+		pmap_stats_update_bypte(pmap, 0, opte);
 		xpte |= opte;
 
 		if (ptp) {
@@ -2778,9 +2851,7 @@ pmap_remove_pte(struct pmap *pmap, struc
 	pmap_exec_account(pmap, va, opte, 0);
 	KASSERT(pmap_valid_entry(opte));
 
-	if (opte & PG_W)
-		pmap->pm_stats.wired_count--;
-	pmap->pm_stats.resident_count--;
+	pmap_stats_update_bypte(pmap, 0, opte);
 
 	if (opte & PG_U)
 		pmap_tlb_shootdown(pmap, va, 0, opte);
@@ -2858,12 +2929,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_
 	struct vm_page *ptp, *empty_ptps = NULL;
 	struct pmap *pmap2;
 
-	/*
-	 * we lock in the pmap => pv_head direction
-	 */
-
-	rw_enter(&pmap_main_lock, RW_READER);
-
 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);	/* locks pmap */
 
 	/*
@@ -2965,7 +3030,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_
 	}
 	pmap_tlb_shootwait();
 	pmap_unmap_ptes(pmap, pmap2);		/* unlock pmap */
-	rw_exit(&pmap_main_lock);
 
 	/* Now we can free unused PVs and ptps */
 	if (pv_tofree)
@@ -2987,15 +3051,10 @@ void
 pmap_page_remove(struct vm_page *pg)
 {
 	struct pv_head *pvh;
-	struct pv_entry *pve, *npve, *killlist = NULL;
-	pt_entry_t *ptes, opte;
-	pd_entry_t **pdes;
-#ifdef DIAGNOSTIC
-	pd_entry_t pde;
-#endif
+	struct pv_entry *pve, *killlist = NULL;
 	struct vm_page *empty_ptps = NULL;
 	struct vm_page *ptp;
-	struct pmap *pmap2;
+	pt_entry_t expect;
 
 #ifdef DIAGNOSTIC
 	int bank, off;
@@ -3010,37 +3069,26 @@ pmap_page_remove(struct vm_page *pg)
 		return;
 	}
 
-	/* set pv_head => pmap locking */
-	rw_enter(&pmap_main_lock, RW_WRITER);
-
-	for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root); pve != NULL; pve = npve) {
-		npve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve);
-
-		/* locks pmap */
-		pmap_map_ptes(pve->pv_pmap, &pmap2, &ptes, &pdes);
-
-#ifdef DIAGNOSTIC
-		if (pve->pv_ptp && pmap_pdes_valid(pve->pv_va, pdes, &pde) &&
-		   pmap_pte2pa(pde) != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
-			printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
-			       pg, pve->pv_va, pve->pv_ptp);
-			printf("pmap_page_remove: PTP's phys addr: "
-			       "actual=%lx, recorded=%lx\n",
-			       (unsigned long)pmap_pte2pa(pde),
-			       (unsigned long)VM_PAGE_TO_PHYS(pve->pv_ptp));
-			panic("pmap_page_remove: mapped managed page has "
-			      "invalid pv_ptp field");
-		}
-#endif
+	expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
+retry:
+	mutex_spin_enter(&pvh->pvh_lock);
+	while ((pve = SPLAY_MIN(pvtree, &pvh->pvh_root)) != NULL) {
+		struct pmap *pmap = pve->pv_pmap;
+		pt_entry_t *ptep;
+		pt_entry_t opte;
 
 		/* atomically save the old PTE and zap! it */
-		opte = pmap_pte_testset(&ptes[pl1_i(pve->pv_va)], 0);
-		KASSERT(pmap_valid_entry(opte));
-		KDASSERT(pmap_pte2pa(opte) == VM_PAGE_TO_PHYS(pg));
-
-		if (opte & PG_W)
-			pve->pv_pmap->pm_stats.wired_count--;
-		pve->pv_pmap->pm_stats.resident_count--;
+		ptep = pmap_map_pte(pve->pv_ptp, pve->pv_va);
+		do {
+			opte = *ptep;
+			if ((opte & (PG_FRAME | PG_V)) != expect) {
+				pmap_unmap_pte();
+				mutex_spin_exit(&pvh->pvh_lock);
+				x86_pause();
+				goto retry;
+			}
+		} while (pmap_pte_cas(ptep, opte, 0) != opte);
+		pmap_unmap_pte();
 
 		/* Shootdown only if referenced */
 		if (opte & PG_U)
@@ -3049,21 +3097,37 @@ pmap_page_remove(struct vm_page *pg)
 		/* sync R/M bits */
 		pg->mdpage.mp_attrs |= (opte & (PG_U|PG_M));
 
+		if (pve->pv_ptp) {
+			pmap_reference(pmap);
+		}
+		SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); /* remove it */
+		mutex_spin_exit(&pvh->pvh_lock);
+
 		/* update the PTP reference count.  free if last reference. */
 		if (pve->pv_ptp) {
+			struct pmap *pmap2;
+			pt_entry_t *ptes;
+			pd_entry_t **pdes;
+
+			KASSERT(pmap != pmap_kernel());
+			pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
+			pmap_stats_update_bypte(pmap, 0, opte);
 			pve->pv_ptp->wire_count--;
 			if (pve->pv_ptp->wire_count <= 1) {
 				pmap_free_ptp(pve->pv_pmap, pve->pv_ptp,
 				    pve->pv_va, ptes, pdes, &empty_ptps);
 			}
+			pmap_unmap_ptes(pmap, pmap2);
+			pmap_destroy(pmap);
+		} else {
+			pmap_stats_update_bypte(pmap, 0, opte);
 		}
-		
-		pmap_unmap_ptes(pve->pv_pmap, pmap2);	/* unlocks pmap */
-		SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); /* remove it */
+
 		SPLAY_RIGHT(pve, pv_node) = killlist;	/* mark it for death */
 		killlist = pve;
+		mutex_spin_enter(&pvh->pvh_lock);
 	}
-	rw_exit(&pmap_main_lock);
+	mutex_spin_exit(&pvh->pvh_lock);
 
 	crit_enter();
 	pmap_tlb_shootwait();
@@ -3097,8 +3161,8 @@ pmap_test_attrs(struct vm_page *pg, unsi
 	int *myattrs;
 	struct pv_head *pvh;
 	struct pv_entry *pve;
-	struct pmap *pmap2;
 	pt_entry_t pte;
+	pt_entry_t expect;
 
 #if DIAGNOSTIC
 	int bank, off;
@@ -3125,26 +3189,28 @@ pmap_test_attrs(struct vm_page *pg, unsi
 	}
 
 	/* nope, gonna have to do it the hard way */
-	rw_enter(&pmap_main_lock, RW_WRITER);
 
+	expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
+	mutex_spin_enter(&pvh->pvh_lock);
 	for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root);
 	     pve != NULL && (*myattrs & testbits) == 0;
 	     pve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve)) {
-		pt_entry_t *ptes;
-		pd_entry_t **pdes;
+		pt_entry_t *ptep;
 
-		pmap_map_ptes(pve->pv_pmap, &pmap2, &ptes, &pdes);
-		pte = ptes[pl1_i(pve->pv_va)];
-		pmap_unmap_ptes(pve->pv_pmap, pmap2);
-		*myattrs |= pte;
+		ptep = pmap_map_pte(pve->pv_ptp, pve->pv_va);
+		pte = *ptep;
+		pmap_unmap_pte();
+		if ((pte & (PG_FRAME | PG_V)) == expect) {
+			*myattrs |= pte;
+		}
 	}
+	mutex_spin_exit(&pvh->pvh_lock);
 
 	/*
 	 * note that we will exit the for loop with a non-null pve if
 	 * we have found the bits we are testing for.
 	 */
 
-	rw_exit(&pmap_main_lock);
 	return((*myattrs & testbits) != 0);
 }
 
@@ -3162,10 +3228,8 @@ pmap_clear_attrs(struct vm_page *pg, uns
 	uint32_t result;
 	struct pv_head *pvh;
 	struct pv_entry *pve;
-	pt_entry_t *ptes, opte;
 	int *myattrs;
-	struct pmap *pmap2;
-
+	pt_entry_t expect;
 #ifdef DIAGNOSTIC
 	int bank, off;
 
@@ -3174,33 +3238,26 @@ pmap_clear_attrs(struct vm_page *pg, uns
 		panic("pmap_change_attrs: unmanaged page?");
 #endif
 	mdpg = &pg->mdpage;
-
-	rw_enter(&pmap_main_lock, RW_WRITER);
 	pvh = &mdpg->mp_pvhead;
-
 	myattrs = &mdpg->mp_attrs;
+	expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
+
+	mutex_spin_enter(&pvh->pvh_lock);
 	result = *myattrs & clearbits;
 	*myattrs &= ~clearbits;
-
 	SPLAY_FOREACH(pve, pvtree, &pvh->pvh_root) {
 		pt_entry_t *ptep;
-		pd_entry_t **pdes;
+		pt_entry_t opte;
 
-		/* locks pmap */
-		pmap_map_ptes(pve->pv_pmap, &pmap2, &ptes, &pdes);
-#ifdef DIAGNOSTIC
-		if (!pmap_pdes_valid(pve->pv_va, pdes, NULL))
-			panic("pmap_change_attrs: mapping without PTP "
-			      "detected");
-#endif
-		ptep = &ptes[pl1_i(pve->pv_va)];
+		ptep = pmap_map_pte(pve->pv_ptp, pve->pv_va);
+retry:
 		opte = *ptep;
-		KASSERT(pmap_valid_entry(opte));
-		KDASSERT(pmap_pte2pa(opte) == VM_PAGE_TO_PHYS(pg));
+		if ((opte & (PG_FRAME | PG_V)) != expect) {
+			goto no_tlb_shootdown;
+		}
 		if (opte & clearbits) {
 			/* We need to do something */
 			if (clearbits == PG_RW) {
-				result |= PG_RW;
 
 				/*
 				 * On write protect we might not need to flush 
@@ -3208,8 +3265,12 @@ pmap_clear_attrs(struct vm_page *pg, uns
 				 */
 
 				/* First zap the RW bit! */
-				pmap_pte_clearbits(ptep, PG_RW); 
-				opte = *ptep;
+				if (pmap_pte_cas(ptep, opte, opte & ~PG_RW)
+				    != opte) {
+					goto retry;
+				}
+				opte &= ~PG_RW;
+				result |= PG_RW;
 
 				/*
 				 * Then test if it is not cached as RW the TLB
@@ -3224,7 +3285,10 @@ pmap_clear_attrs(struct vm_page *pg, uns
 			 */
 
 			/* zap! */
-			opte = pmap_pte_testset(ptep, (opte & ~(PG_U | PG_M)));
+			if (pmap_pte_cas(ptep, opte, opte & ~(PG_U | PG_M))
+			    != opte) {
+				goto retry;
+			}
 
 			result |= (opte & clearbits);
 			*myattrs |= (opte & ~(clearbits));
@@ -3232,10 +3296,9 @@ pmap_clear_attrs(struct vm_page *pg, uns
 			pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, 0, opte);
 		}
 no_tlb_shootdown:
-		pmap_unmap_ptes(pve->pv_pmap, pmap2);	/* unlocks pmap */
+		pmap_unmap_pte();
 	}
-
-	rw_exit(&pmap_main_lock);
+	mutex_spin_exit(&pvh->pvh_lock);
 
 	crit_enter();
 	pmap_tlb_shootwait();
@@ -3360,14 +3423,18 @@ pmap_unwire(struct pmap *pmap, vaddr_t v
 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);	/* locks pmap */
 
 	if (pmap_pdes_valid(va, pdes, NULL)) {
+		pt_entry_t *ptep = &ptes[pl1_i(va)];
+		pt_entry_t opte = *ptep;
 
 #ifdef DIAGNOSTIC
-		if (!pmap_valid_entry(ptes[pl1_i(va)]))
+		if (!pmap_valid_entry(opte))
 			panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
 #endif
-		if ((ptes[pl1_i(va)] & PG_W) != 0) {
-			pmap_pte_clearbits(&ptes[pl1_i(va)], PG_W);
-			pmap->pm_stats.wired_count--;
+		if ((opte & PG_W) != 0) {
+			pt_entry_t npte = opte & ~PG_W;
+
+			opte = pmap_pte_testset(ptep, npte);
+			pmap_stats_update_bypte(pmap, npte, opte);
 		}
 #ifdef DIAGNOSTIC
 		else {
@@ -3482,9 +3549,6 @@ pmap_enter(struct pmap *pmap, vaddr_t va
 	/* get a pve. */
 	freepve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
 
-	/* get lock */
-	rw_enter(&pmap_main_lock, RW_READER);
-
 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);	/* locks pmap */
 	if (pmap == pmap_kernel()) {
 		ptp = NULL;
@@ -3534,8 +3598,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va
 		 * change since we are replacing/changing a valid mapping.
 		 * wired count might change...
 		 */
-		pmap->pm_stats.wired_count +=
-		    ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0);
+		pmap_stats_update_bypte(pmap, npte, opte);
 
 		/*
 		 * if this is on the PVLIST, sync R/M bit
@@ -3602,8 +3665,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va
 		 * change since we are replacing/changing a valid mapping.
 		 * wired count might change...
 		 */
-		pmap->pm_stats.wired_count +=
-		    ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0);
+		pmap_stats_update_bypte(pmap, npte, opte);
 
 		if (opte & PG_PVLIST) {
 			pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte));
@@ -3627,9 +3689,8 @@ pmap_enter(struct pmap *pmap, vaddr_t va
 				    (pt_entry_t *)vtomach((vaddr_t)ptep),
 				    npte, domid);
 				if (error) {
-					pmap->pm_stats.wired_count -=
-					    ((npte & PG_W) ? 1 : 0 -
-					    (opte & PG_W) ? 1 : 0);
+					pmap_stats_update_bypte(pmap, opte,
+					    npte);
 					splx(s);
 					mutex_spin_exit(&old_pvh->pvh_lock);
 					goto out;
@@ -3653,9 +3714,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va
 			goto shootdown_test;
 		}
 	} else {	/* opte not valid */
-		pmap->pm_stats.resident_count++;
-		if (wired) 
-			pmap->pm_stats.wired_count++;
+		pmap_stats_update(pmap, 1, (wired) ? 1 : 0);
 		if (ptp)
 			ptp->wire_count++;
 	}
@@ -3670,13 +3729,11 @@ pmap_enter(struct pmap *pmap, vaddr_t va
 		splx(s);
 		if (error) {
 			if (pmap_valid_entry(opte)) {
-				pmap->pm_stats.wired_count -=
-				    ((npte & PG_W) ? 1 : 0 -
-				    (opte & PG_W) ? 1 : 0);
+				pmap_stats_update(pmap, 0,
+				    -((npte & PG_W) ? 1 : 0 -
+				    (opte & PG_W) ? 1 : 0));
 			} else {	/* opte not valid */
-				pmap->pm_stats.resident_count--;
-				if (wired) 
-					pmap->pm_stats.wired_count--;
+				pmap_stats_update(pmap, -1, wired ? -1 : 0);
 				if (ptp)
 					ptp->wire_count--;
 			}
@@ -3704,7 +3761,6 @@ shootdown_now:
 
 out:
 	pmap_unmap_ptes(pmap, pmap2);
-	rw_exit(&pmap_main_lock);
 
 	if (freepve != NULL) {
 		/* put back the pv, we don't need it. */
@@ -3771,7 +3827,7 @@ pmap_get_physpage(vaddr_t va, int level,
 		ptp->wire_count = 1;
 		*paddrp = VM_PAGE_TO_PHYS(ptp);
 	}
-	kpm->pm_stats.resident_count++;
+	pmap_stats_update(kpm, 1, 0);
 	return true;
 }
 
@@ -3928,7 +3984,6 @@ pmap_dump(struct pmap *pmap, vaddr_t sva
 	 * we lock in the pmap => pv_head direction
 	 */
 
-	rw_enter(&pmap_main_lock, RW_READER);
 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);	/* locks pmap */
 
 	/*
@@ -3956,7 +4011,6 @@ pmap_dump(struct pmap *pmap, vaddr_t sva
 		}
 	}
 	pmap_unmap_ptes(pmap, pmap2);
-	rw_exit(&pmap_main_lock);
 }
 #endif