NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: port-arm/52603: arm(v7?) vfp register corruption



The following reply was made to PR port-arm/52603; it has been noted by GNATS.

From: Manuel Bouyer <bouyer%antioche.eu.org@localhost>
To: port-arm-maintainer%NetBSD.org@localhost, gnats-bugs%NetBSD.org@localhost,
        netbsd-bugs%NetBSD.org@localhost
Cc: 
Subject: Re: port-arm/52603: arm(v7?) vfp register corruption
Date: Sun, 15 Oct 2017 10:02:53 +0200

 Some progress. I added various KASSERTs, especially this one:
 
 Index: subr_pcu.c
 ===================================================================
 RCS file: /cvsroot/src/sys/kern/subr_pcu.c,v
 retrieving revision 1.20
 diff -u -p -u -r1.20 subr_pcu.c
 --- subr_pcu.c	16 Mar 2017 16:13:21 -0000	1.20
 +++ subr_pcu.c	15 Oct 2017 07:55:08 -0000
 @@ -59,6 +59,8 @@ __KERNEL_RCSID(0, "$NetBSD: subr_pcu.c,v
  #include <sys/lwp.h>
  #include <sys/pcu.h>
  #include <sys/ipi.h>
 +#include <arm/pcb.h>
 +#include <arm/vfpreg.h>
  
  #if PCU_UNIT_COUNT > 0
  
 @@ -92,25 +94,42 @@ extern const pcu_ops_t * const pcu_ops_m
   * pcu_switchpoint: release PCU state if the LWP is being run on another CPU.
   * This routine is called on each context switch by by mi_switch().
   */
 +__asm(".fpu\tvfpv4");
  void
  pcu_switchpoint(lwp_t *l)
  {
  	const uint32_t pcu_valid = l->l_pcu_valid;
  	int s;
 +	struct pcb * const pcb = lwp_getpcb(l);
  
  	KASSERTMSG(l == curlwp, "l %p != curlwp %p", l, curlwp);
  
  	if (__predict_true(pcu_valid == 0)) {
  		/* PCUs are not in use. */
 +		KASSERT((pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN) == 0);
  		return;
  	}
  	s = splpcu();
  	for (u_int id = 0; id < PCU_UNIT_COUNT; id++) {
  		if ((pcu_valid & (1U << id)) == 0) {
 +			if (id == PCU_FPU) {
 +				KASSERT((pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN) == 0);
 +				KASSERTMSG((armreg_fpexc_read() & VFP_FPEXC_EN) == 0,
 +				    "fpexc 0x%x/0x%x vfpid 0x%x",
 +				    pcb->pcb_vfp.vfp_fpexc, armreg_fpexc_read(), curcpu()->ci_vfp_id);
 +			}
  			continue;
  		}
  		struct cpu_info * const pcu_ci = l->l_pcu_cpu[id];
  		if (pcu_ci == NULL || pcu_ci == l->l_cpu) {
 +			if (pcu_ci == NULL && id == PCU_FPU) {
 +				KASSERT((pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN) == 0);
 +				KASSERTMSG((armreg_fpexc_read() & VFP_FPEXC_EN) == 0,
 +				    "fpexc 0x%x/0x%x vfpid 0x%x",
 +				    pcb->pcb_vfp.vfp_fpexc, armreg_fpexc_read(), curcpu()->ci_vfp_id);
 +			} else {
 +				KASSERT(pcu_ci->ci_pcu_curlwp[id] == l);
 +			}
  			continue;
  		}
  		const pcu_ops_t * const pcu = pcu_ops_md_defs[id];
 
 And it fired:
 panic: kernel diagnostic assertion "(armreg_fpexc_read() & VFP_FPEXC_EN) == 0" failed: file "/dsk/l1/misc/bouyer/netbsd-8/src/sys/kern/subr_pcu.c", line 129 fpexc 0x0/0x40000000 vfpid 0x41023074
 Stopped in pid 713.1 (fptest) at        netbsd:cpu_Debugger+0x4:        bx      r14
 0xbb397eac: netbsd:vpanic+0x10
 0xbb397ec4: netbsd:__udivmoddi4
 0xbb397f04: netbsd:pcu_switchpoint+0x19c
 0xbb397f64: netbsd:mi_switch+0x2a4
 0xbb397f84: netbsd:preempt+0x80
 0xbb397fac: netbsd:ast+0x58
 
 So the lwp's pcb has the FPU disabled in it's fpexc copy, but it's enabled
 on the CPU (so the lwp will use the FPU with the wrong state)
 This would suggest that cpu_switchto() failed to properly
 reload fpexc, or that there is another code path that can cause a lwp
 to return to mi_switch(), which is not cpu_switchto. I didn't find
 this other path yet ...
 
 -- 
 Manuel Bouyer <bouyer%antioche.eu.org@localhost>
      NetBSD: 26 ans d'experience feront toujours la difference
 --
 


Home | Main Index | Thread Index | Old Index