Subject: Re: port-xen/30977: FPU troubles
To: Manuel Bouyer <bouyer@antioche.eu.org>
From: Paul Ripke <stix@stix.id.au>
List: port-xen
Date: 12/19/2005 09:34:01
OK, I've had a closer look at this, and come up with the included patch,
which appears to work fine for me, with threaded and non-threaded 
programs.
I can't repeat the behaviour of the FPU state being not being restored,
restored to the wrong thread, or wiped.

Thoughts behind the patch:
- since clts() is a no-op under xen 2 (in xen 3, 
HYPERVISOR_fpu_taskswitch
   takes an int parameter allowing set/clear), it makes no sense to 
stts()
   inside the DNA handler. We can't clear it again.
- I don't get the npxsave_lwp() calls in the DNA handler. On SMP, it's
   used to dump an LWP's FPU state from a different CPU. On a 
uniprocessor,
   the FPU state has already been dumped by npxsave_cpu(). SMP support 
isn't
   relevant for xen 2.

Anyway, please test the patch and see how it goes. I note that my sound
(SBlive) is still crook, looks like another bug.

Index: sys/arch/xen/i386/machdep.c
===================================================================
RCS file: /export/netbsd/cvsroot/src/sys/arch/xen/i386/machdep.c,v
retrieving revision 1.13.2.3
diff -u -d -b -w -r1.13.2.3 machdep.c
--- sys/arch/xen/i386/machdep.c	25 Aug 2005 20:16:21 -0000	1.13.2.3
+++ sys/arch/xen/i386/machdep.c	18 Dec 2005 08:00:53 -0000
@@ -1,4 +1,4 @@
-/*	$NetBSD$	*/
+/*	$NetBSD: machdep.c,v 1.13.2.3 2005/08/25 20:16:21 tron Exp $	*/
  /*	NetBSD: machdep.c,v 1.559 2004/07/22 15:12:46 mycroft Exp 	*/

  /*-
@@ -73,7 +73,7 @@
   */

  #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD$");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.13.2.3 2005/08/25 20:16:21 
tron Exp $");

  #include "opt_beep.h"
  #include "opt_compat_ibcs2.h"
@@ -1174,8 +1174,10 @@

  #if NNPX > 0
  	/* If we were using the FPU, forget about it. */
-	if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
+	if (l->l_addr->u_pcb.pcb_fpcpu != NULL) {
  		npxsave_lwp(l, 0);
+		HYPERVISOR_fpu_taskswitch();
+	}
  #endif

  #ifdef USER_LDT
@@ -2335,6 +2337,7 @@
  		 */
  		if (l->l_addr->u_pcb.pcb_fpcpu) {
  			npxsave_lwp(l, 1);
+			HYPERVISOR_fpu_taskswitch();
  		}
  #endif
  		if (i386_use_fxsave) {
@@ -2418,8 +2421,10 @@
  		/*
  		 * If we were using the FPU, forget that we were.
  		 */
-		if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
+		if (l->l_addr->u_pcb.pcb_fpcpu != NULL) {
  			npxsave_lwp(l, 0);
+			HYPERVISOR_fpu_taskswitch();
+		}
  #endif
  		if (flags & _UC_FXSAVE) {
  			if (i386_use_fxsave) {
Index: sys/arch/xen/i386/npx.c
===================================================================
RCS file: /export/netbsd/cvsroot/src/sys/arch/xen/i386/npx.c,v
retrieving revision 1.3.14.1
diff -u -d -b -w -r1.3.14.1 npx.c
--- sys/arch/xen/i386/npx.c	20 Mar 2005 14:38:21 -0000	1.3.14.1
+++ sys/arch/xen/i386/npx.c	18 Dec 2005 21:40:44 -0000
@@ -1,4 +1,4 @@
-/*	$NetBSD$	*/
+/*	$NetBSD: npx.c,v 1.3.14.1 2005/03/20 14:38:21 tron Exp $	*/
  /*	NetBSD: npx.c,v 1.103 2004/03/21 10:56:24 simonb Exp 	*/

  /*-
@@ -68,10 +68,11 @@
   */

  #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD$");
+__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.3.14.1 2005/03/20 14:38:21 tron 
Exp $");

  #if 0
  #define IPRINTF(x)	printf x
+#define	XXXXENDEBUG_LOW
  #else
  #define	IPRINTF(x)
  #endif
@@ -575,12 +576,8 @@
  	KDASSERT(ci->ci_fpcurlwp == NULL);
  #ifndef MULTIPROCESSOR
  	KDASSERT(l->l_addr->u_pcb.pcb_fpcpu == NULL);
-#else
-	if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
-		npxsave_lwp(l, 1);
  #endif
  	l->l_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
-	clts();
  	s = splipi();
  	ci->ci_fpcurlwp = l;
  	l->l_addr->u_pcb.pcb_fpcpu = ci;
@@ -629,11 +626,9 @@
  	if (ci->ci_fpcurlwp != NULL)
  		npxsave_cpu(ci, 1);
  	else {
-		clts();
  		IPRINTF(("%s: fp init\n", ci->ci_dev->dv_xname));
  		fninit();
  		fwait();
-		stts();
  	}
  	splx(s);

@@ -641,12 +636,8 @@
  	KDASSERT(ci->ci_fpcurlwp == NULL);
  #ifndef MULTIPROCESSOR
  	KDASSERT(l->l_addr->u_pcb.pcb_fpcpu == NULL);
-#else
-	if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
-		npxsave_lwp(l, 1);
  #endif
  	l->l_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
-	clts();
  	s = splipi();
  	ci->ci_fpcurlwp = l;
  	l->l_addr->u_pcb.pcb_fpcpu = ci;
@@ -710,7 +701,6 @@
  		  * which report FP failures via traps rather than irq13).
  		  * XXX punting for now..
  		  */
-		clts();
  		ci->ci_fpsaving = 1;
  		fpu_save(&l->l_addr->u_pcb.pcb_savefpu);
  		ci->ci_fpsaving = 0;

--
Paul Ripke