NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: port-arm/52603: arm(v7?) vfp register corruption



Some progress. I added various KASSERTs, especially this one:

Index: subr_pcu.c
===================================================================
RCS file: /cvsroot/src/sys/kern/subr_pcu.c,v
retrieving revision 1.20
diff -u -p -u -r1.20 subr_pcu.c
--- subr_pcu.c	16 Mar 2017 16:13:21 -0000	1.20
+++ subr_pcu.c	15 Oct 2017 07:55:08 -0000
@@ -59,6 +59,8 @@ __KERNEL_RCSID(0, "$NetBSD: subr_pcu.c,v
 #include <sys/lwp.h>
 #include <sys/pcu.h>
 #include <sys/ipi.h>
+#include <arm/pcb.h>
+#include <arm/vfpreg.h>
 
 #if PCU_UNIT_COUNT > 0
 
@@ -92,25 +94,42 @@ extern const pcu_ops_t * const pcu_ops_m
  * pcu_switchpoint: release PCU state if the LWP is being run on another CPU.
  * This routine is called on each context switch by by mi_switch().
  */
+__asm(".fpu\tvfpv4");
 void
 pcu_switchpoint(lwp_t *l)
 {
 	const uint32_t pcu_valid = l->l_pcu_valid;
 	int s;
+	struct pcb * const pcb = lwp_getpcb(l);
 
 	KASSERTMSG(l == curlwp, "l %p != curlwp %p", l, curlwp);
 
 	if (__predict_true(pcu_valid == 0)) {
 		/* PCUs are not in use. */
+		KASSERT((pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN) == 0);
 		return;
 	}
 	s = splpcu();
 	for (u_int id = 0; id < PCU_UNIT_COUNT; id++) {
 		if ((pcu_valid & (1U << id)) == 0) {
+			if (id == PCU_FPU) {
+				KASSERT((pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN) == 0);
+				KASSERTMSG((armreg_fpexc_read() & VFP_FPEXC_EN) == 0,
+				    "fpexc 0x%x/0x%x vfpid 0x%x",
+				    pcb->pcb_vfp.vfp_fpexc, armreg_fpexc_read(), curcpu()->ci_vfp_id);
+			}
 			continue;
 		}
 		struct cpu_info * const pcu_ci = l->l_pcu_cpu[id];
 		if (pcu_ci == NULL || pcu_ci == l->l_cpu) {
+			if (pcu_ci == NULL && id == PCU_FPU) {
+				KASSERT((pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN) == 0);
+				KASSERTMSG((armreg_fpexc_read() & VFP_FPEXC_EN) == 0,
+				    "fpexc 0x%x/0x%x vfpid 0x%x",
+				    pcb->pcb_vfp.vfp_fpexc, armreg_fpexc_read(), curcpu()->ci_vfp_id);
+			} else {
+				KASSERT(pcu_ci->ci_pcu_curlwp[id] == l);
+			}
 			continue;
 		}
 		const pcu_ops_t * const pcu = pcu_ops_md_defs[id];

And it fired:
panic: kernel diagnostic assertion "(armreg_fpexc_read() & VFP_FPEXC_EN) == 0" failed: file "/dsk/l1/misc/bouyer/netbsd-8/src/sys/kern/subr_pcu.c", line 129 fpexc 0x0/0x40000000 vfpid 0x41023074
Stopped in pid 713.1 (fptest) at        netbsd:cpu_Debugger+0x4:        bx      r14
0xbb397eac: netbsd:vpanic+0x10
0xbb397ec4: netbsd:__udivmoddi4
0xbb397f04: netbsd:pcu_switchpoint+0x19c
0xbb397f64: netbsd:mi_switch+0x2a4
0xbb397f84: netbsd:preempt+0x80
0xbb397fac: netbsd:ast+0x58

So the lwp's pcb has the FPU disabled in it's fpexc copy, but it's enabled
on the CPU (so the lwp will use the FPU with the wrong state)
This would suggest that cpu_switchto() failed to properly
reload fpexc, or that there is another code path that can cause a lwp
to return to mi_switch(), which is not cpu_switchto. I didn't find
this other path yet ...

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--


Home | Main Index | Thread Index | Old Index