Subject: towards MP profiling on powerpc/oea
To: None <port-powerpc@netbsd.org, port-macppc@netbsd.org>
From: Chuck Silvers <chuq@chuq.com>
List: port-powerpc
Date: 10/09/2005 10:14:32
--pWyiEgJYm5f9v55/
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

hi,

I've been trying to fix profiling with MP kernels on macppc.
in addition to the usual problem of the profiling data structures
not being locked, there's also the problem that we can take DSI traps
while accessing the profiling data, and the code handling these traps
will recurse back into mcount.  to deal with this, I wanted to mark
all the functions in the pmap_pte_spill() path with the gcc attribute
that prevents it from calling mcount, but currently that's all under
kernel_lock and I didn't want to disable profiling of kernel_lock
acquisition.  so instead, I tried moving all the code down to
pmap_pte_spill() out from under kernel_lock (using a separate lock to
protect the global hash table).  this seems to work fine for non-profiling
kernels, but a profiling kernel will quickly crash with corruption of the
pmap_pvo_table even if profiling is never enabled.

does anyone see what else might be wrong with kernel profiling after
the attached patch is applied?  anyone object to me checking this in
even though profiling doesn't work yet?

-Chuck

--pWyiEgJYm5f9v55/
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="diff.ppc-hash"

Index: arch/powerpc/include/profile.h
===================================================================
RCS file: /cvsroot/src/sys/arch/powerpc/include/profile.h,v
retrieving revision 1.6
diff -u -p -r1.6 profile.h
--- arch/powerpc/include/profile.h	7 Feb 2002 05:13:35 -0000	1.6
+++ arch/powerpc/include/profile.h	9 Oct 2005 16:24:20 -0000
@@ -26,6 +26,10 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifdef _KERNEL_OPT
+#include "opt_multiprocessor.h"
+#endif
+
 #define	_MCOUNT_DECL	void __mcount
 
 #ifdef PIC
@@ -71,13 +75,30 @@ __asm("	.globl	_mcount			\n" \
 "	.size	_mcount,_mcount_end-_mcount");
 
 #ifdef _KERNEL
+
+#ifdef MULTIPROCESSOR
+__cpu_simple_lock_t __mcount_lock;
+
+#define MCOUNT_ENTER_MP						\
+	__cpu_simple_lock(&__mcount_lock);
+#define MCOUNT_EXIT_MP						\
+	__cpu_simple_unlock(&__mcount_lock);
+
+#else
+#define MCOUNT_ENTER_MP
+#define MCOUNT_EXIT_MP
+#endif
+
 #define MCOUNT_ENTER						\
-	__asm volatile("mfmsr %0" : "=r"(s));			\
+	s = mfmsr();						\
 	if ((s & (PSL_IR | PSL_DR)) != (PSL_IR | PSL_DR))	\
 		return;		/* XXX */			\
 	s &= ~PSL_POW;						\
-	__asm volatile("mtmsr %0" :: "r"(s & ~PSL_EE))
+	mtmsr(s & ~PSL_EE);					\
+	MCOUNT_ENTER_MP
 
 #define MCOUNT_EXIT						\
-	__asm volatile("mtmsr %0" :: "r"(s))
+	MCOUNT_EXIT_MP						\
+	mtmsr(s)
+
 #endif
Index: arch/powerpc/include/oea/pmap.h
===================================================================
RCS file: /cvsroot/src/sys/arch/powerpc/include/oea/pmap.h,v
retrieving revision 1.6
diff -u -p -r1.6 pmap.h
--- arch/powerpc/include/oea/pmap.h	21 Nov 2003 22:57:14 -0000	1.6
+++ arch/powerpc/include/oea/pmap.h	9 Oct 2005 16:24:20 -0000
@@ -104,7 +104,8 @@ vaddr_t pmap_unsetusr (void);
 #define PMAP_NEED_PROCWR
 void pmap_procwr(struct proc *, vaddr_t, size_t);
 
-int pmap_pte_spill(struct pmap *, vaddr_t, boolean_t);
+int pmap_pte_spill(struct pmap *, vaddr_t, boolean_t)
+	__attribute__((__no_instrument_function__));
 
 #define	PMAP_NC			0x1000
 
Index: arch/powerpc/oea/pmap.c
===================================================================
RCS file: /cvsroot/src/sys/arch/powerpc/oea/pmap.c,v
retrieving revision 1.33
diff -u -p -r1.33 pmap.c
--- arch/powerpc/oea/pmap.c	27 Sep 2005 08:03:11 -0000	1.33
+++ arch/powerpc/oea/pmap.c	9 Oct 2005 16:24:22 -0000
@@ -72,6 +72,8 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.3
 #include "opt_ppcarch.h"
 #include "opt_altivec.h"
 #include "opt_pmap.h"
+#include "opt_multiprocessor.h"
+
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
@@ -231,7 +233,8 @@ int pmapcheck = 1;
 int pmapcheck = 0;
 #endif
 void pmap_pvo_verify(void);
-STATIC void pmap_pvo_check(const struct pvo_entry *);
+STATIC void pmap_pvo_check(const struct pvo_entry *)
+	__attribute__((__no_instrument_function__));
 #define	PMAP_PVO_CHECK(pvo)	 		\
 	do {					\
 		if (pmapcheck)			\
@@ -240,7 +243,6 @@ STATIC void pmap_pvo_check(const struct 
 #else
 #define	PMAP_PVO_CHECK(pvo)	do { } while (/*CONSTCOND*/0)
 #endif
-STATIC int pmap_pte_insert(int, struct pte *);
 STATIC int pmap_pvo_enter(pmap_t, struct pool *, struct pvo_head *,
 	vaddr_t, paddr_t, register_t, int);
 STATIC void pmap_pvo_remove(struct pvo_entry *, int, struct pvo_head *);
@@ -267,6 +269,17 @@ static uint32_t pmap_vsid_bitmap[NPMAPS 
 
 static int pmap_initialized;
 
+#ifdef MULTIPROCESSOR
+__cpu_simple_lock_t pmap_hashlock;
+#define PMAP_HASHLOCK() \
+	__cpu_simple_lock(&pmap_hashlock);
+#define PMAP_HASHUNLOCK() \
+	__cpu_simple_unlock(&pmap_hashlock);
+#else
+#define PMAP_HASHLOCK()
+#define PMAP_HASHUNLOCK()
+#endif
+
 #if defined(DEBUG) || defined(PMAPDEBUG)
 #define	PMAPDEBUG_BOOT		0x0001
 #define	PMAPDEBUG_PTE		0x0002
@@ -496,12 +509,15 @@ pmap_interrupts_off(void)
 	register_t msr = MFMSR();
 	if (msr & PSL_EE)
 		MTMSR(msr & ~PSL_EE);
+	PMAP_HASHLOCK();
 	return msr;
 }
 
-static void
+static void __inline
 pmap_interrupts_restore(register_t msr)
 {
+
+	PMAP_HASHUNLOCK();
 	if (msr & PSL_EE)
 		MTMSR(msr);
 }
@@ -717,14 +733,8 @@ pmap_attr_save(struct vm_page *pg, int p
 static __inline int
 pmap_pte_compare(const volatile struct pte *pt, const struct pte *pvo_pt)
 {
-	if (pt->pte_hi == pvo_pt->pte_hi
-#if 0
-	    && ((pt->pte_lo ^ pvo_pt->pte_lo) &
-	        ~(PTE_REF|PTE_CHG)) == 0
-#endif
-	    )
-		return 1;
-	return 0;
+
+	return (pt->pte_hi == pvo_pt->pte_hi);
 }
 
 static __inline void
@@ -830,7 +840,7 @@ pmap_pte_change(volatile struct pte *pt,
  * Note: both the destination and source PTEs must not have PTE_VALID set.
  */
 
-STATIC int
+static __inline int
 pmap_pte_insert(int ptegidx, struct pte *pvo_pt)
 {
 	volatile struct pte *pt;
@@ -2436,9 +2446,9 @@ pmap_procwr(struct proc *p, vaddr_t va, 
 {
 	struct pvo_entry *pvo;
 	size_t offset = va & ADDR_POFF;
-	int s;
+	register_t msr;
 
-	s = splvm();
+	msr = pmap_interrupts_off();
 	while (len > 0) {
 		size_t seglen = PAGE_SIZE - offset;
 		if (seglen > len)
@@ -2453,7 +2463,7 @@ pmap_procwr(struct proc *p, vaddr_t va, 
 		len -= seglen;
 		offset = 0;
 	}
-	splx(s);
+	pmap_interrupts_restore(msr);
 }
 
 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
Index: arch/powerpc/powerpc/trap.c
===================================================================
RCS file: /cvsroot/src/sys/arch/powerpc/powerpc/trap.c,v
retrieving revision 1.107
diff -u -p -r1.107 trap.c
--- arch/powerpc/powerpc/trap.c	2 Jun 2005 10:16:31 -0000	1.107
+++ arch/powerpc/powerpc/trap.c	9 Oct 2005 16:24:23 -0000
@@ -74,6 +74,8 @@ void trap(struct trapframe *);	/* Called
 int badaddr(void *, size_t);
 int badaddr_read(void *, size_t, int *);
 
+void trap(struct trapframe *) __attribute__((__no_instrument_function__));
+
 void
 trap(struct trapframe *frame)
 {
@@ -126,19 +128,15 @@ trap(struct trapframe *frame)
 		 * Only query UVM if no interrupts are active.
 		 */
 		if (ci->ci_intrdepth < 0) {
-			KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 			if ((va >> ADDR_SR_SHFT) == pcb->pcb_kmapsr) {
 				va &= ADDR_PIDX | ADDR_POFF;
 				va |= pcb->pcb_umapsr << ADDR_SR_SHFT;
 				map = &p->p_vmspace->vm_map;
-				/* KERNEL_PROC_LOCK(l); */
 #ifdef PPC_OEA64
 				if ((frame->dsisr & DSISR_NOTFOUND) &&
 				    vm_map_pmap(map)->pm_ste_evictions > 0 &&
 				    pmap_ste_spill(vm_map_pmap(map),
 					    trunc_page(va), FALSE)) {
-					/* KERNEL_PROC_UNLOCK(l); */
-					KERNEL_UNLOCK();
 					return;
 				}
 #endif
@@ -147,16 +145,16 @@ trap(struct trapframe *frame)
 				    vm_map_pmap(map)->pm_evictions > 0 &&
 				    pmap_pte_spill(vm_map_pmap(map),
 					    trunc_page(va), FALSE)) {
-					/* KERNEL_PROC_UNLOCK(l); */
-					KERNEL_UNLOCK();
 					return;
 				}
+				KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 				if (l->l_flag & L_SA) {
 					l->l_savp->savp_faultaddr = va;
 					l->l_flag |= L_SA_PAGEFAULT;
 				}
 #if defined(DIAGNOSTIC) && defined(PPC_OEA)
 			} else if ((va >> ADDR_SR_SHFT) == USER_SR) {
+				KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 				printf("trap: kernel %s DSI trap @ %#lx by %#lx"
 				    " (DSISR %#x): USER_SR unset\n",
 				    (frame->dsisr & DSISR_STORE)
@@ -166,6 +164,7 @@ trap(struct trapframe *frame)
 #endif
 			} else {
 				map = kernel_map;
+				KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 			}
 
 			if (frame->dsisr & DSISR_STORE)
@@ -185,7 +184,6 @@ trap(struct trapframe *frame)
 				if (rv == 0)
 					uvm_grow(p, trunc_page(va));
 				l->l_flag &= ~L_SA_PAGEFAULT;
-				/* KERNEL_PROC_UNLOCK(l); */
 			}
 			KERNEL_UNLOCK();
 			if (rv == 0)
@@ -209,13 +207,13 @@ trap(struct trapframe *frame)
 			    sizeof(fb->fb_fixreg));
 			return;
 		}
+		KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 		printf("trap: kernel %s DSI trap @ %#lx by %#lx (DSISR %#x, err"
 		    "=%d)\n", (frame->dsisr & DSISR_STORE) ? "write" : "read",
 		    va, frame->srr0, frame->dsisr, rv);
 		goto brain_damage2;
 	}
 	case EXC_DSI|EXC_USER:
-		KERNEL_PROC_LOCK(l);
 		ci->ci_ev_udsi.ev_count++;
 		if (frame->dsisr & DSISR_STORE)
 			ftype = VM_PROT_WRITE;
@@ -246,6 +244,7 @@ trap(struct trapframe *frame)
 			break;
 		}
 
+		KERNEL_PROC_LOCK(l);
 		if (l->l_flag & L_SA) {
 			l->l_savp->savp_faultaddr = (vaddr_t)frame->dar;
 			l->l_flag |= L_SA_PAGEFAULT;
@@ -290,12 +289,12 @@ trap(struct trapframe *frame)
 	case EXC_ISI:
 		ci->ci_ev_kisi.ev_count++;
 
+		KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 		printf("trap: kernel ISI by %#lx (SRR1 %#lx)\n",
 		    frame->srr0, frame->srr1);
 		goto brain_damage2;
 
 	case EXC_ISI|EXC_USER:
-		KERNEL_PROC_LOCK(l);
 		ci->ci_ev_isi.ev_count++;
 
 		/*
@@ -308,7 +307,6 @@ trap(struct trapframe *frame)
 		if (vm_map_pmap(map)->pm_ste_evictions > 0 &&
 		    pmap_ste_spill(vm_map_pmap(map), trunc_page(frame->srr0),
 				   TRUE)) {
-			KERNEL_PROC_UNLOCK(l);
 			break;
 		}
 #endif
@@ -316,10 +314,10 @@ trap(struct trapframe *frame)
 		if (vm_map_pmap(map)->pm_evictions > 0 &&
 		    pmap_pte_spill(vm_map_pmap(map), trunc_page(frame->srr0),
 				   TRUE)) {
-			KERNEL_PROC_UNLOCK(l);
 			break;
 		}
 
+		KERNEL_PROC_LOCK(l);
 		if (l->l_flag & L_SA) {
 			l->l_savp->savp_faultaddr = (vaddr_t)frame->srr0;
 			l->l_flag |= L_SA_PAGEFAULT;
@@ -490,17 +488,20 @@ trap(struct trapframe *frame)
 			    sizeof(fb->fb_fixreg));
 			return;
 		}
+		KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 		printf("trap: pid %d.%d (%s): kernel MCHK trap @"
 		    " %#lx (SRR1=%#lx)\n", p->p_pid, l->l_lid,
 		    p->p_comm, frame->srr0, frame->srr1);
 		goto brain_damage2;
 	}
 	case EXC_ALI:
+		KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 		printf("trap: pid %d.%d (%s): kernel ALI trap @ %#lx by %#lx "
 		    "(DSISR %#x)\n", p->p_pid, l->l_lid, p->p_comm,
 		    frame->dar, frame->srr0, frame->dsisr);
 		goto brain_damage2;
 	case EXC_PGM:
+		KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
 		printf("trap: pid %d.%d (%s): kernel PGM trap @"
 		    " %#lx (SRR1=%#lx)\n", p->p_pid, l->l_lid,
 		    p->p_comm, frame->srr0, frame->srr1);

--pWyiEgJYm5f9v55/--