Port-amd64 archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

amd64 panic in FPU (Xen and bare-metal)



Hello,
I'm trakcing down an issue where the daily anita run on Xen panics in
the fpu code (see http://www-soc.lip6.fr/~bouyer/NetBSD-tests/xen/HEAD/)
    traceme_signalmasked_crash_fpe: [ 2047.9968889] panic: kernel diagnostic assertion "curlwp->l_md.md_flags & MDL_FPU_IN_CPU" failed: file "/home/source/ab/HEAD/src/sys/arch/x86/x86/fpu.c", line 524 
[ 2047.9968889] cpu0: Begin traceback...
[ 2047.9968889] vpanic() at netbsd:vpanic+0x146
[ 2047.9968889] kern_assert() at netbsd:kern_assert+0x48
[ 2047.9968889] fputrap() at netbsd:fputrap+0x171
[ 2047.9968889] cpu0: End traceback...

I can easily reproduce it. Tring to track it down, I added a hand-coded
KASSERT() in the syscall entry point that the MDL_FPU_IN_CPU flag is set
(see attached patch), and it fires too.
This would means that occasionally, the kernel will return to
userland withoout calling fpu_handle_deferred() (via HANDLE_DEFERRED_FPU
macro). But I failed to spot where this happens

Now, what's interesting is that I can reproduce this on bare metal
too, with this script:
#!/bin/sh

cd /usr/tests/lib/libc/sys/
i=1
while true; do
        echo pass $i
        atf-run t_ptrace_wait4 | atf-report
        i=$(($i + 1))
done

so it's not an Xen-specific problem.
The panic happens eventually after several pass, so it looks like
a race condition.

The kernel prints:
[ 157.4185191] ci_flags 0xb002 md_flags 0x0


Did I miss something with my hand-coded assert in the syscall entry ?
Any idea how to debug this further ?

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--
Index: amd64/amd64/locore.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.209
diff -u -p -u -r1.209 locore.S
--- amd64/amd64/locore.S	27 May 2020 19:33:40 -0000	1.209
+++ amd64/amd64/locore.S	21 Jun 2020 11:21:10 -0000
@@ -1347,6 +1347,10 @@ ENTRY(handle_syscall)
 	STI(si)
 
 	movq	CPUVAR(CURLWP),%r14
+	testl   $MDL_FPU_IN_CPU,L_MD_FLAGS(%r14)
+	jnz 1f
+	call myfpu_panic_1
+1:
 	incq	CPUVAR(NSYSCALL)	/* count it atomically */
 	movq	%rsp,L_MD_REGS(%r14)	/* save pointer to frame */
 	movq	L_PROC(%r14),%r15
@@ -1541,6 +1545,13 @@ IDTVEC_END(osyscall)
 	TEXT_USER_BEGIN
 	_ALIGN_TEXT
 LABEL(syscall_sysret)
+	pushq   %r14
+	movq    CPUVAR(CURLWP),%r14
+	testl   $MDL_FPU_IN_CPU,L_MD_FLAGS(%r14)
+	jnz 1f
+	call myfpu_panic
+1:
+	popq %r14
 	KMSAN_LEAVE
 	MDS_LEAVE
 	SVS_LEAVE
Index: x86/include/cpu.h
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/include/cpu.h,v
retrieving revision 1.126
diff -u -p -u -r1.126 cpu.h
--- x86/include/cpu.h	19 Jun 2020 16:20:22 -0000	1.126
+++ x86/include/cpu.h	21 Jun 2020 11:21:10 -0000
@@ -358,6 +358,7 @@ struct cpu_info {
 #define	CPUF_RUNNING	0x2000		/* CPU is running */
 #define	CPUF_PAUSE	0x4000		/* CPU is paused in DDB */
 #define	CPUF_GO		0x8000		/* CPU should start running */
+#define	CPUF_FPU	0x10000		/* FPU active */
 
 #endif /* _KERNEL || __KMEMUSER */
 
Index: x86/x86/fpu.c
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/x86/fpu.c,v
retrieving revision 1.65
diff -u -p -u -r1.65 fpu.c
--- x86/x86/fpu.c	14 Jun 2020 16:12:05 -0000	1.65
+++ x86/x86/fpu.c	21 Jun 2020 11:21:10 -0000
@@ -122,8 +122,12 @@ __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.65
 #include <x86/fpu.h>
 
 #ifdef XENPV
-#define clts() HYPERVISOR_fpu_taskswitch(0)
-#define stts() HYPERVISOR_fpu_taskswitch(1)
+#define ASSERT_NOPR KASSERT(kpreempt_disabled() || curcpu()->ci_vcpu->evtchn_upcall_mask != 0)
+#define fpu_clts() {ASSERT_NOPR ; curcpu()->ci_flags |= CPUF_FPU; HYPERVISOR_fpu_taskswitch(0);}
+#define fpu_stts() {ASSERT_NOPR ; curcpu()->ci_flags &= ~CPUF_FPU; HYPERVISOR_fpu_taskswitch(1);}
+#else
+#define fpu_clts() {curcpu()->ci_flags |= CPUF_FPU; clts();}
+#define fpu_stts() {curcpu()->ci_flags &= ~CPUF_FPU; stts();}
 #endif
 
 void fpu_handle_deferred(void);
@@ -142,6 +146,7 @@ fpu_lwp_area(struct lwp *l)
 		fpu_save();
 	}
 	KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU));
+	KASSERT(!(curcpu()->ci_flags & CPUF_FPU) || l != curlwp);
 
 	return area;
 }
@@ -157,6 +162,7 @@ fpu_save_lwp(struct lwp *l)
 		KASSERT((l->l_flag & LW_SYSTEM) == 0);
 		fpu_area_save(area, x86_xsave_features);
 		l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
+		KASSERT(!(curcpu()->ci_flags & CPUF_FPU));
 	}
 	kpreempt_enable();
 }
@@ -178,9 +184,9 @@ fpuinit(struct cpu_info *ci)
 	 * This might not be strictly necessary since it will be initialized
 	 * for each process. However it does no harm.
 	 */
-	clts();
+	fpu_clts();
 	fninit();
-	stts();
+	fpu_stts();
 }
 
 void
@@ -195,13 +201,13 @@ fpuinit_mxcsr_mask(void)
 	/* Disable interrupts, and enable FPU */
 	psl = x86_read_psl();
 	x86_disable_intr();
-	clts();
+	fpu_clts();
 
 	/* Fill in the FPU area */
 	fxsave(&fpusave);
 
 	/* Restore previous state */
-	stts();
+	fpu_stts();
 	x86_write_psl(psl);
 
 	if (fpusave.sv_xmm.fx_mxcsr_mask == 0) {
@@ -216,6 +222,7 @@ fpuinit_mxcsr_mask(void)
 	 * somewhere, it seems.
 	 */
 	x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+	fpu_stts();
 #endif
 }
 
@@ -263,13 +270,13 @@ fpu_area_save(void *area, uint64_t xsave
 		break;
 	}
 
-	stts();
+	fpu_stts();
 }
 
 void
 fpu_area_restore(const void *area, uint64_t xsave_features)
 {
-	clts();
+	fpu_clts();
 
 	switch (x86_fpu_save) {
 	case FPU_SAVE_FSAVE:
@@ -287,6 +294,7 @@ fpu_area_restore(const void *area, uint6
 		xrstor(area, xsave_features);
 		break;
 	}
+	KASSERT((curcpu()->ci_flags & CPUF_FPU));
 }
 
 void
@@ -308,6 +316,7 @@ fpu_switch(struct lwp *oldlwp, struct lw
 		oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU;
 	}
 	KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU));
+	KASSERT(!(curcpu()->ci_flags & CPUF_FPU));
 }
 
 void
@@ -337,7 +346,8 @@ fpu_lwp_abandon(struct lwp *l)
 	KASSERT(l == curlwp);
 	kpreempt_disable();
 	l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
-	stts();
+	fpu_stts();
+	KASSERT(!(curcpu()->ci_flags & CPUF_FPU));
 	kpreempt_enable();
 }
 
@@ -359,6 +369,8 @@ fpu_kern_enter(void)
 	struct cpu_info *ci;
 	int s;
 
+	printf("fpu_kern_enter\n");
+
 	s = splhigh();
 
 	ci = curcpu();
@@ -381,7 +393,7 @@ fpu_kern_enter(void)
 	 * the false impression that there has been a task switch since
 	 * the last FPU usage requiring that we save the FPU state.
 	 */
-	clts();
+	fpu_clts();
 }
 
 /*
@@ -398,6 +410,7 @@ fpu_kern_leave(void)
 
 	KASSERT(ci->ci_ilevel == IPL_HIGH);
 	KASSERT(ci->ci_kfpu_spl != -1);
+	printf("fpu_kern_leave\n");
 
 	/*
 	 * Zero the fpu registers; otherwise we might leak secrets
@@ -410,7 +423,7 @@ fpu_kern_leave(void)
 	 * Set CR0_TS again so that the kernel can't accidentally use
 	 * the FPU.
 	 */
-	stts();
+	fpu_stts();
 
 	s = ci->ci_kfpu_spl;
 	ci->ci_kfpu_spl = -1;
@@ -521,6 +534,11 @@ fputrap(struct trapframe *frame)
 		panic("fpu trap from kernel, trapframe %p\n", frame);
 	}
 
+#ifdef XENPV
+	KASSERT(curcpu()->ci_vcpu->evtchn_upcall_mask != 0);
+#endif
+	KASSERTMSG(curcpu()->ci_flags & CPUF_FPU, "ci_flags 0x%x md_flags 0x%x",
+	    curcpu()->ci_flags, curlwp->l_md.md_flags);
 	KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU);
 
 	if (frame->tf_trapno == T_XMM) {
@@ -982,3 +1000,28 @@ process_write_xstate(struct lwp *l, cons
 
 	return 0;
 }
+
+void myfpu_panic(void);
+void
+myfpu_panic(void)
+{
+	panic("!MDL_FPU_IN_CPU");
+}
+
+void myfpu_panic_1(void);
+void
+myfpu_panic_1(void)
+{
+	printf("ci_flags 0x%x md_flags 0x%x\n",
+	    curcpu()->ci_flags, curlwp->l_md.md_flags);
+	panic("!MDL_FPU_IN_CPU 1");
+}
+
+void myfpu_panic_2(void);
+void
+myfpu_panic_2(void)
+{
+	printf("ci_flags 0x%x md_flags 0x%x\n",
+	    curcpu()->ci_flags, curlwp->l_md.md_flags);
+	panic("!MDL_FPU_IN_CPU 2");
+}


Home | Main Index | Thread Index | Old Index