Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch Add support for saving the AVX-256 ymm registers du...



details:   https://anonhg.NetBSD.org/src/rev/c9896234e1dc
branches:  trunk
changeset: 793759:c9896234e1dc
user:      dsl <dsl%NetBSD.org@localhost>
date:      Tue Feb 25 22:16:52 2014 +0000

description:
Add support for saving the AVX-256 ymm registers during FPU context switches.
Add support for the forthcoming AVX-512 registers.
Code compiled with -mavx seems to work, but I've not tested context
  switches with live ymm registers.
There is a small cost on fork/exec (a larger area is copied/zerod),
  but I don't think the ymm registers are read/written unless they
  have been used.
The code use XSAVE on all cpus, I'm not brave enough to enable XSAVEOPT.

diffstat:

 sys/arch/i386/i386/cpufunc.S              |  48 ++++++++++++++++++++++-
 sys/arch/x86/include/cpu_extended_state.h |  29 +++++++++----
 sys/arch/x86/include/cpufunc.h            |  31 +++++++-------
 sys/arch/x86/include/fpu.h                |   5 +-
 sys/arch/x86/x86/cpu.c                    |  22 ++++++++--
 sys/arch/x86/x86/fpu.c                    |  64 +++++++++++++++++++++---------
 sys/arch/x86/x86/vm_machdep.c             |   6 +-
 7 files changed, 151 insertions(+), 54 deletions(-)

diffs (truncated from 450 to 300 lines):

diff -r b854f373d590 -r c9896234e1dc sys/arch/i386/i386/cpufunc.S
--- a/sys/arch/i386/i386/cpufunc.S      Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/i386/i386/cpufunc.S      Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpufunc.S,v 1.18 2014/02/12 23:24:09 dsl Exp $ */
+/*     $NetBSD: cpufunc.S,v 1.19 2014/02/25 22:16:52 dsl Exp $ */
 
 /*-
  * Copyright (c) 1998, 2007 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
 #include <sys/errno.h>
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: cpufunc.S,v 1.18 2014/02/12 23:24:09 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpufunc.S,v 1.19 2014/02/25 22:16:52 dsl Exp $");
 
 #include "opt_xen.h"
 
@@ -160,6 +160,24 @@
        movl    %eax, PCB_ONFAULT(%ecx)
 
        ret
+END(rdmsr_safe)
+
+/* uint64_t rdxcr(uint32_t) */
+ENTRY(rdxcr)
+       movl    4(%esp), %ecx   /* extended control reg number */
+       xgetbv                  /* Read to %edx:%eax */
+       ret
+END(rdxcr)
+
+/* void wrxcr(uint32_t, uint64_t) */
+ENTRY(wrxcr)
+       movl    4(%esp), %ecx   /* extended control reg number */
+       movl    8(%esp), %eax   /* feature mask bits */
+       movl    12(%esp), %edx
+       xsetbv
+       ret
+END(wrxcr)
+       
 
 /*
  * MSR operations fault handler
@@ -389,15 +407,41 @@
        ret
 END(fxrstor)
 
+ENTRY(xsave)
+       movl    4(%esp), %ecx
+       movl    8(%esp), %eax   /* feature mask bits */
+       movl    12(%esp), %edx
+       xsave   (%ecx)
+       ret
+END(xsave)
+
+ENTRY(xsaveopt)
+       movl    4(%esp), %ecx
+       movl    8(%esp), %eax   /* feature mask bits */
+       movl    12(%esp), %edx
+       xsaveopt        (%ecx)
+       ret
+END(xsaveopt)
+
+ENTRY(xrstor)
+       movl    4(%esp), %ecx
+       movl    8(%esp), %eax   /* feature mask bits */
+       movl    12(%esp), %edx
+       xrstor  (%eax)
+       ret
+END(xrstor)
+
 ENTRY(x86_stmxcsr)
        movl    4(%esp), %eax
        stmxcsr (%eax)
        ret
+END(x86_stmxcsr)
 
 ENTRY(x86_ldmxcsr)
        movl    4(%esp), %eax
        ldmxcsr (%eax)
        ret
+END(x86_ldmxcsr)
 
 ENTRY(fldummy)
        ffree   %st(7)
diff -r b854f373d590 -r c9896234e1dc sys/arch/x86/include/cpu_extended_state.h
--- a/sys/arch/x86/include/cpu_extended_state.h Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/x86/include/cpu_extended_state.h Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu_extended_state.h,v 1.8 2014/02/18 18:39:10 dsl Exp $       */
+/*     $NetBSD: cpu_extended_state.h,v 1.9 2014/02/25 22:16:52 dsl Exp $       */
 
 #ifndef _X86_CPU_EXTENDED_STATE_H_
 #define _X86_CPU_EXTENDED_STATE_H_
@@ -122,19 +122,13 @@
        uint16_t        fxo_dflt_cw;    /* Control word for signal handlers */
 };
 
-union savefpu {
-       struct save87           sv_87;
-       struct fxsave           sv_xmm;
-       struct fxsave_os        sv_os;
-};
-
 /*
- * For XSAVE a 64byte header follows the above.
+ * For XSAVE a 64byte header follows the fxsave data.
  * Currently it only contains one field of which only 3 bits are defined.
  * Some other parts must be zero - zero it all.
  *
  * The xsh_xstate_bv bits match those of XCR0:
- *   XCR0_X87        0x00000001      x87 FPU/MMX state (always set)
+ *   XCR0_X87        0x00000001      x87 FPU/MMX state
  *   XCR0_SSE        0x00000002      SSE state
  *   XCR0_AVX        0x00000004      AVX state (ymmn registers)
  *
@@ -143,11 +137,12 @@
  */
 
 struct xsave_header {
+       uint64_t        xsh_fxsave[64]; /* to align in the union */
        uint64_t        xsh_xstate_bv;  /* bitmap of saved sub structures */
        uint64_t        xsh_rsrvd[2];   /* must be zero */
        uint64_t        xsh_reserved[5];/* best if zero */
 };
-__CTASSERT(sizeof (struct xsave_header) == 64);
+__CTASSERT(sizeof (struct xsave_header) == 512 + 64);
 
 /*
  * The ymm save area actually follows the xsave_header.
@@ -157,6 +152,20 @@
 };
 __CTASSERT(sizeof (struct xsave_ymm) == 256);
 
+/*
+ * The following union is placed at the end of the pcb.
+ * It is defined this way to separate the definitions and to
+ * minimise the number of union/struct selectors.
+ * NB: Some userspace stuff (eg firefox) uses it to parse ucontext.
+ */
+union savefpu {
+       struct save87           sv_87;
+       struct fxsave           sv_xmm;
+#ifdef _KERNEL
+       struct fxsave_os        sv_os;
+       struct xsave_header     sv_xsave_hdr;
+#endif
+};
 
 /*
  * 80387 control and status word bits
diff -r b854f373d590 -r c9896234e1dc sys/arch/x86/include/cpufunc.h
--- a/sys/arch/x86/include/cpufunc.h    Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/x86/include/cpufunc.h    Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpufunc.h,v 1.17 2014/02/13 19:37:08 dsl Exp $ */
+/*     $NetBSD: cpufunc.h,v 1.18 2014/02/25 22:16:52 dsl Exp $ */
 
 /*-
  * Copyright (c) 1998, 2007 The NetBSD Foundation, Inc.
@@ -77,27 +77,36 @@
 void   x86_stihlt(void);
 u_int  x86_getss(void);
 
-struct save87;
-struct fxsave;
+/* fpu save, restore etc */
+union savefpu;
 void   fldcw(const uint16_t *);
 void   fnclex(void);
 void   fninit(void);
-void   fnsave(struct save87 *);
+void   fnsave(union savefpu *);
 void   fnstcw(uint16_t *);
 uint16_t fngetsw(void);
 void   fnstsw(uint16_t *);
-void   frstor(const struct save87 *);
+void   frstor(const union savefpu *);
 void   fwait(void);
 void   clts(void);
 void   stts(void);
-void   fxsave(struct fxsave *);
-void   fxrstor(const struct fxsave *);
+void   fxsave(union savefpu *);
+void   fxrstor(const union savefpu *);
 void   x86_ldmxcsr(const uint32_t *);
 void   x86_stmxcsr(uint32_t *);
 
 void   fldummy(void);
 void   fp_divide_by_0(void);
 
+/* Extended processor state functions (for AVX registers etc) */
+
+uint64_t rdxcr(uint32_t);              /* xgetbv */
+void   wrxcr(uint32_t, uint64_t);      /* xsetgv */
+
+void   xrstor(const union savefpu *, uint64_t);
+void   xsave(union savefpu *, uint64_t);
+void   xsaveopt(union savefpu *, uint64_t);
+
 void   x86_monitor(const void *, uint32_t, uint32_t);
 void   x86_mwait(uint32_t, uint32_t);
 /* x86_cpuid2() writes four 32bit values, %eax, %ebx, %ecx and %edx */
@@ -134,14 +143,6 @@
 void           setfs(int);
 void           setusergs(int);
 
-/* Extended processor state functions (for AVX registers etc) */
-
-uint64_t       rdxcr(uint32_t);                /* xgetbv */
-void           wrxcr(uint32_t, uint64_t);      /* xsetgv */
-void           xrstor(const void *, uint64_t);
-void           xsave(void *, uint64_t);
-void           xsaveopt(const void *, uint64_t);
-
 #endif /* _KERNEL */
 
 #endif /* !_X86_CPUFUNC_H_ */
diff -r b854f373d590 -r c9896234e1dc sys/arch/x86/include/fpu.h
--- a/sys/arch/x86/include/fpu.h        Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/x86/include/fpu.h        Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: fpu.h,v 1.5 2014/02/23 22:35:27 dsl Exp $      */
+/*     $NetBSD: fpu.h,v 1.6 2014/02/25 22:16:52 dsl Exp $      */
 
 #ifndef        _X86_FPU_H_
 #define        _X86_FPU_H_
@@ -28,6 +28,9 @@
 /* Reset control words only - for signal handlers */
 void fpu_save_area_reset(struct lwp *);
 
+/* Copy data outside pcb during fork */
+void fpu_save_area_fork(struct pcb *, const struct pcb *);
+
 /* Load FP registers with user-supplied values */
 void process_write_fpregs_xmm(struct lwp *lwp, const struct fxsave *fpregs);
 void process_write_fpregs_s87(struct lwp *lwp, const struct save87 *fpregs);
diff -r b854f373d590 -r c9896234e1dc sys/arch/x86/x86/cpu.c
--- a/sys/arch/x86/x86/cpu.c    Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/x86/x86/cpu.c    Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.c,v 1.109 2014/02/19 21:23:02 dsl Exp $    */
+/*     $NetBSD: cpu.c,v 1.110 2014/02/25 22:16:52 dsl Exp $    */
 
 /*-
  * Copyright (c) 2000-2012 NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.109 2014/02/19 21:23:02 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.110 2014/02/25 22:16:52 dsl Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"                /* for MPDEBUG */
@@ -552,29 +552,41 @@
 void
 cpu_init(struct cpu_info *ci)
 {
+       uint32_t cr4;
 
        lcr0(rcr0() | CR0_WP);
 
+       cr4 = rcr4();
        /*
         * On a P6 or above, enable global TLB caching if the
         * hardware supports it.
         */
        if (cpu_feature[0] & CPUID_PGE)
-               lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */
+               cr4 |= CR4_PGE; /* enable global TLB caching */
 
        /*
         * If we have FXSAVE/FXRESTOR, use them.
         */
        if (cpu_feature[0] & CPUID_FXSR) {
-               lcr4(rcr4() | CR4_OSFXSR);
+               cr4 |= CR4_OSFXSR;
 
                /*
                 * If we have SSE/SSE2, enable XMM exceptions.
                 */
                if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2))
-                       lcr4(rcr4() | CR4_OSXMMEXCPT);
+                       cr4 |= CR4_OSXMMEXCPT;
        }
 
+       /* If xsave is supported, enable it */
+       if (cpu_feature[1] & CPUID2_XSAVE)
+               cr4 |= CR4_OSXSAVE;
+
+       lcr4(cr4);
+
+       /* If xsave is enabled, enable all fpu features */
+       if (cr4 & CR4_OSXSAVE)
+               wrxcr(0, x86_xsave_features & XCR0_FPU);
+
 #ifdef MTRR
        /*
         * On a P6 or above, initialize MTRR's if the hardware supports them.



Home | Main Index | Thread Index | Old Index