Subject: wired entry count problem on sun3x pmap
To: None <port-sun3@NetBSD.org>
From: Izumi Tsutsui <tsutsui@ceres.dti.ne.jp>
List: port-sun3
Date: 08/31/2006 15:32:24
Today I've manage to track the real problem on sun3x pmap
which causes "processes can't get memory" problem and
"panic: get_a_table: out of A tables." (and also B, C tables)
panics during pkgsrc/lang/perl5 build etc.

It isn't a simple resource shortage but bad wired entry accounting:

- pmap_enter(9) doesn't adjust wired counts on wired -> unwired changes
- pmap_remove(9) doesn't adjust wired counts even if removed pages
  have wired mappings

So any tables whose pages are unwired by these functions are never
put back into the active queues, and it causes resource shortage.

These possible problems are already noted in comment. It says
"This may be ok if pmap_unwire() is the only interface
 used to UNWIRE a page" but I guess it's false in these days.

The attached patch seems to fix the panics,
but I'd appreciate comments or suggestions from VM gurus.
---
Izumi Tsutsui


Index: sun3x/pmap.c
===================================================================
RCS file: /cvsroot/src/sys/arch/sun3/sun3x/pmap.c,v
retrieving revision 1.90
diff -u -r1.90 pmap.c
--- sun3x/pmap.c	10 May 2006 06:24:03 -0000	1.90
+++ sun3x/pmap.c	31 Aug 2006 06:04:51 -0000
@@ -1363,6 +1363,7 @@
 			}
 		}
 		a_tbl->at_ecnt = 0;
+		a_tbl->at_wcnt = 0;
 	}
 	if (relink) {
 		a_tbl->at_parent = NULL;
@@ -1398,6 +1399,7 @@
 			}
 		}
 		b_tbl->bt_ecnt = 0;
+		b_tbl->bt_wcnt = 0;
 	}
 
 	if (relink) {
@@ -1431,6 +1433,7 @@
 			}
 		}
 		c_tbl->ct_ecnt = 0;
+		c_tbl->ct_wcnt = 0;
 	}
 
 	if (relink) {
@@ -1821,23 +1824,34 @@
 		 *     change protection of a page
 		 *     change wiring status of a page
 		 *     remove the mapping of a page
-		 *
-		 * XXX - Semi critical: This code should unwire the PTE
-		 * and, possibly, associated parent tables if this is a
-		 * change wiring operation.  Currently it does not.
-		 *
-		 * This may be ok if pmap_unwire() is the only
-		 * interface used to UNWIRE a page.
 		 */
 
 		/* First check if this is a wiring operation. */
-		if (wired && (c_pte->attr.raw & MMU_SHORT_PTE_WIRED)) {
+		if (c_pte->attr.raw & MMU_SHORT_PTE_WIRED) {
 			/*
-			 * The PTE is already wired.  To prevent it from being
-			 * counted as a new wiring operation, reset the 'wired'
-			 * variable.
+			 * The PTE is already wired. To prevent it from being
+			 * counted as a new wiring operation, adjust wired
+			 * entry count here.
 			 */
-			wired = FALSE;
+			c_tbl->ct_wcnt--;
+			if (!wired) {
+				/*
+				 * The mapping of this PTE is being changed
+				 * from wired to unwired.
+				 * Adjust wired entry counts in each table and
+				 * set llevel flag to put unwired tables back
+				 * into the active pool.
+				 */
+				if (c_tbl->ct_wcnt == 0) {
+					llevel = NEWC;
+					if (--b_tbl->bt_wcnt == 0) {
+						llevel = NEWB;
+						if (--a_tbl->at_wcnt == 0) {
+							llevel = NEWA;
+						}
+					}
+				}
+			}
 		}
 
 		/* Is the new address the same as the old? */
@@ -1944,7 +1958,7 @@
 		pv->pv_idx = nidx;
 	}
 
-	/* Move any allocated tables back into the active pool. */
+	/* Move any allocated or unwired tables back into the active pool. */
 	
 	switch (llevel) {
 		case NEWA:
@@ -2902,8 +2916,6 @@
  **
  * Remove the mapping of a range of virtual addresses from the given pmap.
  *
- * If the range contains any wired entries, this function will probably create
- * disaster.
  */
 void 
 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
@@ -2972,6 +2984,7 @@
 	b_tmgr_t *b_tbl;
 	mmu_long_dte_t  *a_dte;
 	mmu_short_dte_t *b_dte;
+	uint8_t at_wcnt, bt_wcnt;
 
 	/*
 	 * The following code works with what I call a 'granularity
@@ -3005,6 +3018,8 @@
 	nstart = MMU_ROUND_UP_A(sva);
 	nend = MMU_ROUND_A(eva);
 
+	at_wcnt = a_tbl->at_wcnt;
+
 	if (sva < nstart) {
 		/*
 		 * This block is executed if the range starts between
@@ -3024,6 +3039,7 @@
 		if (MMU_VALID_DT(*a_dte)) {
 			b_dte = mmu_ptov(a_dte->addr.raw);
 			b_tbl = mmuB2tmgr(b_dte);
+			bt_wcnt = b_tbl->bt_wcnt;
 
 			/*
 			 * The sub range to be removed starts at the start
@@ -3038,6 +3054,10 @@
 			else
 				empty = pmap_remove_b(b_tbl, sva, nstart);
 
+
+			if (bt_wcnt > 0 && b_tbl->bt_wcnt == 0)
+				a_tbl->at_wcnt--;
+
 			/*
 			 * If the removal resulted in an empty B table,
 			 * invalidate the DTE that points to it and decrement
@@ -3076,8 +3096,13 @@
 				 */
 				b_dte = mmu_ptov(a_dte->addr.raw);
 				b_tbl = mmuB2tmgr(b_dte);
+				bt_wcnt = b_tbl->bt_wcnt;
+
 				free_b_table(b_tbl, TRUE);
 
+				if (bt_wcnt > 0 && b_tbl->bt_wcnt == 0)
+					a_tbl->at_wcnt--;
+
 				/*
 				 * Invalidate the DTE that points to the
 				 * B table and decrement the valid entry
@@ -3112,9 +3137,12 @@
 			 */
 			b_dte = mmu_ptov(a_dte->addr.raw);
 			b_tbl = mmuB2tmgr(b_dte);
+			bt_wcnt = b_tbl->bt_wcnt;
 
 			empty = pmap_remove_b(b_tbl, nend, eva);
 
+			if (bt_wcnt > 0 && b_tbl->bt_wcnt == 0)
+				a_tbl->at_wcnt--;
 			/*
 			 * If the removal resulted in an empty B table,
 			 * invalidate the DTE that points to it and decrement
@@ -3127,12 +3155,16 @@
 		}
 	}
 
+	if (at_wcnt > 0 && a_tbl->at_wcnt == 0)
+		TAILQ_INSERT_TAIL(&a_pool, a_tbl, at_link);
+
 	/*
 	 * If there are no more entries in the A table, release it
 	 * back to the available pool and return TRUE.
 	 */
 	if (a_tbl->at_ecnt == 0) {
 		a_tbl->at_parent = NULL;
+		a_tbl->at_wcnt = 0;	/* XXX bogus; should be KASSERT() */
 		TAILQ_REMOVE(&a_pool, a_tbl, at_link);
 		TAILQ_INSERT_HEAD(&a_pool, a_tbl, at_link);
 		empty = TRUE;
@@ -3159,21 +3191,29 @@
 	c_tmgr_t *c_tbl;
 	mmu_short_dte_t  *b_dte;
 	mmu_short_pte_t  *c_dte;
+	uint8_t bt_wcnt, ct_wcnt;
 	
-
 	nstart = MMU_ROUND_UP_B(sva);
 	nend = MMU_ROUND_B(eva);
 
+	bt_wcnt = b_tbl->bt_wcnt;
+
 	if (sva < nstart) {
 		idx = MMU_TIB(sva);
 		b_dte = &b_tbl->bt_dtbl[idx];
 		if (MMU_VALID_DT(*b_dte)) {
 			c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
 			c_tbl = mmuC2tmgr(c_dte);
+			ct_wcnt = c_tbl->ct_wcnt;
+
 			if (eva < nstart)
 				empty = pmap_remove_c(c_tbl, sva, eva);
 			else
 				empty = pmap_remove_c(c_tbl, sva, nstart);
+
+			if (ct_wcnt > 0 && c_tbl->ct_wcnt == 0)
+				b_tbl->bt_wcnt--;
+
 			if (empty) {
 				b_dte->attr.raw = MMU_DT_INVALID;
 				b_tbl->bt_ecnt--;
@@ -3188,7 +3228,13 @@
 			if (MMU_VALID_DT(*b_dte)) {
 				c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
 				c_tbl = mmuC2tmgr(c_dte);
+				ct_wcnt = c_tbl->ct_wcnt;
+
 				free_c_table(c_tbl, TRUE);
+
+				if (ct_wcnt > 0 && c_tbl->ct_wcnt == 0)
+					b_tbl->bt_wcnt--;
+
 				b_dte->attr.raw = MMU_DT_INVALID;
 				b_tbl->bt_ecnt--;
 			}
@@ -3202,7 +3248,12 @@
 		if (MMU_VALID_DT(*b_dte)) {
 			c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
 			c_tbl = mmuC2tmgr(c_dte);
+			ct_wcnt = c_tbl->ct_wcnt;
 			empty = pmap_remove_c(c_tbl, nend, eva);
+
+			if (ct_wcnt > 0 && c_tbl->ct_wcnt == 0)
+				b_tbl->bt_wcnt--;
+
 			if (empty) {
 				b_dte->attr.raw = MMU_DT_INVALID;
 				b_tbl->bt_ecnt--;
@@ -3210,8 +3261,12 @@
 		}
 	}
 
+	if (bt_wcnt > 0 && b_tbl->bt_wcnt == 0)
+		TAILQ_INSERT_TAIL(&b_pool, b_tbl, bt_link);
+
 	if (b_tbl->bt_ecnt == 0) {
 		b_tbl->bt_parent = NULL;
+		b_tbl->bt_wcnt = 0;	/* XXX bogus; should be KASSERT() */
 		TAILQ_REMOVE(&b_pool, b_tbl, bt_link);
 		TAILQ_INSERT_HEAD(&b_pool, b_tbl, bt_link);
 		empty = TRUE;
@@ -3232,18 +3287,28 @@
 	boolean_t empty;
 	int idx;
 	mmu_short_pte_t *c_pte;
+	uint8_t ct_wcnt;
 	
+	ct_wcnt = c_tbl->ct_wcnt;
+
 	idx = MMU_TIC(sva);
 	c_pte = &c_tbl->ct_dtbl[idx];
 	for (;sva < eva; sva += MMU_PAGE_SIZE, c_pte++) {
 		if (MMU_VALID_DT(*c_pte)) {
+			if (c_pte->attr.raw & MMU_SHORT_PTE_WIRED) {
+				c_tbl->ct_wcnt--;
+			}
 			pmap_remove_pte(c_pte);
 			c_tbl->ct_ecnt--;
 		}
 	}
 
+	if (ct_wcnt > 0 && c_tbl->ct_wcnt == 0)
+		TAILQ_INSERT_TAIL(&c_pool, c_tbl, ct_link);
+
 	if (c_tbl->ct_ecnt == 0) {
 		c_tbl->ct_parent = NULL;
+		c_tbl->ct_wcnt = 0;	/* XXX bogus; should be KASSERT() */
 		TAILQ_REMOVE(&c_pool, c_tbl, ct_link);
 		TAILQ_INSERT_HEAD(&c_pool, c_tbl, ct_link);
 		empty = TRUE;

---