Subject: Re: port-xen/30977: FPU troubles
To: Paul Ripke <stix@stix.id.au>
From: Manuel Bouyer <bouyer@antioche.eu.org>
List: port-xen
Date: 11/23/2005 13:47:04
--T4sUOijqQbZv57TR
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Wed, Nov 23, 2005 at 08:15:04AM +1100, Paul Ripke wrote:
> I've finally got around to trying xen (NetBSD-3.0BETA, xen 2.0.7) and
> have run into this PR in DOM0.
>
> First sign was xearth coredumping on login. Next was only "clicking"
> from xmms, and I've had xpdf core, too.
>
> A little debugging shows that the FPU state is not being correctly
> saved or restored during "some" context switches. The following code
> appears to exhibit the behaviour reliably:
Interesting. I modified your test prog to also print a thread id (see attached
file). I get the same result as you with a stock current kernel:
1 33 873373 -2147483648
1 55 2001359 -2147483648
0 62 2770587 -2147483648
1 92 3483395 -2147483648
0 128 5914000 -2147483648
1 132 5228374 -2147483648
1 172 6972647 6098061
0 182 8008230 6098061
0 222 9753133 -2147483648
1 232 9592029 -2147483648
1 272 11335805 -2147483648
0 282 12371751 -2147483648
1 312 13079804 13241628
0 328 14641093 -2147483648
1 352 14824306 14985832
1 412 17443856 -2147483648
0 482 21107482 20059525
But with a kernel with the FPU patch I posted here some times ago (see second
attachement), I get:
1 72 2622678 3646769
0 81 3646769 2622678
1 152 6105605 7113173
0 162 7113173 6105605
0 281 12357157 12217903
1 291 12217903 12357157
0 321 14100004 13958543
1 331 13958543 14970612
0 341 14970612 14100004
1 751 32317194 33249945
0 761 33249945 32317194
Now the values are just swapped between threads. It seems that the fpu context
is properly restored, but sometimes to the wrong thread ...
--
Manuel Bouyer <bouyer@antioche.eu.org>
NetBSD: 26 ans d'experience feront toujours la difference
--
--T4sUOijqQbZv57TR
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="fp2.c"
#include <assert.h>
#include <pthread.h>
#include <stdio.h>
#include <time.h>
void *
runloop(void *p)
{
int i;
double fi;
int *myid = p;
i = 1;
while (1) {
fi = i;
if ((int)fi != i) {
printf("%d %9ld %9d %9d\n", *myid, clock(), i, (int)fi);
}
i++;
}
}
int
main(int argc, char **argv)
{
pthread_t a, b;
int ia = 0, ib = 1;
setlinebuf(stdout);
assert(pthread_create(&a, NULL, &runloop, &ia) == 0);
assert(pthread_create(&b, NULL, &runloop, &ib) == 0);
assert(pthread_join(a, NULL) == 0);
return 0;
}
--T4sUOijqQbZv57TR
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff
Index: i386/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/i386/machdep.c,v
retrieving revision 1.18
diff -u -r1.18 machdep.c
--- i386/machdep.c 19 Aug 2005 16:06:12 -0000 1.18
+++ i386/machdep.c 7 Nov 2005 15:55:22 -0000
@@ -2419,6 +2419,7 @@
if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
npxsave_lwp(l, 0);
#endif
+ KASSERT((flags & _UC_FXSAVE) == 0);
if (flags & _UC_FXSAVE) {
if (i386_use_fxsave) {
memcpy(
@@ -2444,6 +2445,11 @@
}
/* If not set already. */
l->l_md.md_flags |= MDL_USEDFPU;
+#define frstor(addr) __asm("frstor %0" : : "m" (*addr))
+ frstor(&l->l_addr->u_pcb.pcb_savefpu.sv_87);
+#undef frstor
+ l->l_addr->u_pcb.pcb_fpcpu = curcpu();
+ curcpu()->ci_fpcurlwp = l;
#if 0
/* Apparently unused. */
l->l_addr->u_pcb.pcb_saveemc = mcp->mc_fp.fp_emcsts;
Index: i386/npx.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/i386/npx.c,v
retrieving revision 1.4
diff -u -r1.4 npx.c
--- i386/npx.c 20 Mar 2005 13:12:59 -0000 1.4
+++ i386/npx.c 7 Nov 2005 15:55:22 -0000
@@ -70,7 +70,7 @@
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.4 2005/03/20 13:12:59 bouyer Exp $");
-#if 0
+#if 1
#define IPRINTF(x) printf x
#else
#define IPRINTF(x)
@@ -116,7 +116,7 @@
* 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
*
* We do lazy initialization and switching using the TS bit in cr0 and the
- * MDL_USEDFPU bit in mdproc.
+ * MDL_USEDFPU bit in mdlwp.
*
* DNA exceptions are handled like this:
*
@@ -149,7 +149,6 @@
#define stts() HYPERVISOR_fpu_taskswitch()
#endif
-int npxdna(struct cpu_info *);
static int npxdna_notset(struct cpu_info *);
static int npxdna_s87(struct cpu_info *);
#ifdef I686_CPU
@@ -319,8 +318,7 @@
}
#endif
-void npxinit(ci)
- struct cpu_info *ci;
+void npxinit(struct cpu_info *ci)
{
lcr0(rcr0() & ~(CR0_EM|CR0_TS));
fninit();
@@ -534,7 +532,7 @@
{
struct lwp *l;
int s;
-#ifdef XXXXENDEBUG_LOW
+#if 1 /* def XXXXENDEBUG_LOW */
struct trapframe *f = (struct trapframe *)((uint32_t *)(void *)&ci)[1];
#endif
@@ -565,9 +563,11 @@
IPRINTF(("Save"));
npxsave_cpu(ci, 1);
} else {
+ clts();
IPRINTF(("Init"));
fninit();
fwait();
+ stts();
}
splx(s);
@@ -610,8 +610,11 @@
* fpu_save, if we're saving the FPU state not from interrupt
* context (f.i. during fork()). Just return in this case.
*/
- if (ci->ci_fpsaving)
+ if (ci->ci_fpsaving) {
+ printf("recursive npx trap; cr0=%x\n", rcr0());
+ Debugger();
return (1);
+ }
s = splipi(); /* lock out IPI's while we clean house.. */
#ifdef MULTIPROCESSOR
@@ -650,7 +653,7 @@
s = splipi();
ci->ci_fpcurlwp = l;
l->l_addr->u_pcb.pcb_fpcpu = ci;
- ci->ci_fpused = 1;
+ //ci->ci_fpused = 1;
splx(s);
@@ -689,8 +692,8 @@
if (l == NULL)
return;
- IPRINTF(("%s: fp CPU %s lwp %p\n", ci->ci_dev->dv_xname,
- save? "save" : "flush", l));
+ /* IPRINTF(("%s: fp CPU %s lwp %p\n", ci->ci_dev->dv_xname,
+ save? "save" : "flush", l)); */
if (save) {
#ifdef DIAGNOSTIC
@@ -720,12 +723,12 @@
* We set the TS bit in the saved CR0 for this process, so that it
* will get a DNA exception on any FPU instruction and force a reload.
*/
- l->l_addr->u_pcb.pcb_cr0 |= CR0_TS;
+ //l->l_addr->u_pcb.pcb_cr0 |= CR0_TS;
s = splipi();
l->l_addr->u_pcb.pcb_fpcpu = NULL;
ci->ci_fpcurlwp = NULL;
- ci->ci_fpused = 1;
+ //ci->ci_fpused = 1;
splx(s);
}
--T4sUOijqQbZv57TR--