Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch Add support for saving the AVX-256 ymm registers du...
details: https://anonhg.NetBSD.org/src/rev/c9896234e1dc
branches: trunk
changeset: 793759:c9896234e1dc
user: dsl <dsl%NetBSD.org@localhost>
date: Tue Feb 25 22:16:52 2014 +0000
description:
Add support for saving the AVX-256 ymm registers during FPU context switches.
Add support for the forthcoming AVX-512 registers.
Code compiled with -mavx seems to work, but I've not tested context
switches with live ymm registers.
There is a small cost on fork/exec (a larger area is copied/zerod),
but I don't think the ymm registers are read/written unless they
have been used.
The code use XSAVE on all cpus, I'm not brave enough to enable XSAVEOPT.
diffstat:
sys/arch/i386/i386/cpufunc.S | 48 ++++++++++++++++++++++-
sys/arch/x86/include/cpu_extended_state.h | 29 +++++++++----
sys/arch/x86/include/cpufunc.h | 31 +++++++-------
sys/arch/x86/include/fpu.h | 5 +-
sys/arch/x86/x86/cpu.c | 22 ++++++++--
sys/arch/x86/x86/fpu.c | 64 +++++++++++++++++++++---------
sys/arch/x86/x86/vm_machdep.c | 6 +-
7 files changed, 151 insertions(+), 54 deletions(-)
diffs (truncated from 450 to 300 lines):
diff -r b854f373d590 -r c9896234e1dc sys/arch/i386/i386/cpufunc.S
--- a/sys/arch/i386/i386/cpufunc.S Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/i386/i386/cpufunc.S Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpufunc.S,v 1.18 2014/02/12 23:24:09 dsl Exp $ */
+/* $NetBSD: cpufunc.S,v 1.19 2014/02/25 22:16:52 dsl Exp $ */
/*-
* Copyright (c) 1998, 2007 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
#include <sys/errno.h>
#include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: cpufunc.S,v 1.18 2014/02/12 23:24:09 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpufunc.S,v 1.19 2014/02/25 22:16:52 dsl Exp $");
#include "opt_xen.h"
@@ -160,6 +160,24 @@
movl %eax, PCB_ONFAULT(%ecx)
ret
+END(rdmsr_safe)
+
+/* uint64_t rdxcr(uint32_t) */
+ENTRY(rdxcr)
+ movl 4(%esp), %ecx /* extended control reg number */
+ xgetbv /* Read to %edx:%eax */
+ ret
+END(rdxcr)
+
+/* void wrxcr(uint32_t, uint64_t) */
+ENTRY(wrxcr)
+ movl 4(%esp), %ecx /* extended control reg number */
+ movl 8(%esp), %eax /* feature mask bits */
+ movl 12(%esp), %edx
+ xsetbv
+ ret
+END(wrxcr)
+
/*
* MSR operations fault handler
@@ -389,15 +407,41 @@
ret
END(fxrstor)
+ENTRY(xsave)
+ movl 4(%esp), %ecx
+ movl 8(%esp), %eax /* feature mask bits */
+ movl 12(%esp), %edx
+ xsave (%ecx)
+ ret
+END(xsave)
+
+ENTRY(xsaveopt)
+ movl 4(%esp), %ecx
+ movl 8(%esp), %eax /* feature mask bits */
+ movl 12(%esp), %edx
+ xsaveopt (%ecx)
+ ret
+END(xsaveopt)
+
+ENTRY(xrstor)
+ movl 4(%esp), %ecx
+ movl 8(%esp), %eax /* feature mask bits */
+ movl 12(%esp), %edx
+ xrstor (%eax)
+ ret
+END(xrstor)
+
ENTRY(x86_stmxcsr)
movl 4(%esp), %eax
stmxcsr (%eax)
ret
+END(x86_stmxcsr)
ENTRY(x86_ldmxcsr)
movl 4(%esp), %eax
ldmxcsr (%eax)
ret
+END(x86_ldmxcsr)
ENTRY(fldummy)
ffree %st(7)
diff -r b854f373d590 -r c9896234e1dc sys/arch/x86/include/cpu_extended_state.h
--- a/sys/arch/x86/include/cpu_extended_state.h Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/x86/include/cpu_extended_state.h Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu_extended_state.h,v 1.8 2014/02/18 18:39:10 dsl Exp $ */
+/* $NetBSD: cpu_extended_state.h,v 1.9 2014/02/25 22:16:52 dsl Exp $ */
#ifndef _X86_CPU_EXTENDED_STATE_H_
#define _X86_CPU_EXTENDED_STATE_H_
@@ -122,19 +122,13 @@
uint16_t fxo_dflt_cw; /* Control word for signal handlers */
};
-union savefpu {
- struct save87 sv_87;
- struct fxsave sv_xmm;
- struct fxsave_os sv_os;
-};
-
/*
- * For XSAVE a 64byte header follows the above.
+ * For XSAVE a 64byte header follows the fxsave data.
* Currently it only contains one field of which only 3 bits are defined.
* Some other parts must be zero - zero it all.
*
* The xsh_xstate_bv bits match those of XCR0:
- * XCR0_X87 0x00000001 x87 FPU/MMX state (always set)
+ * XCR0_X87 0x00000001 x87 FPU/MMX state
* XCR0_SSE 0x00000002 SSE state
* XCR0_AVX 0x00000004 AVX state (ymmn registers)
*
@@ -143,11 +137,12 @@
*/
struct xsave_header {
+ uint64_t xsh_fxsave[64]; /* to align in the union */
uint64_t xsh_xstate_bv; /* bitmap of saved sub structures */
uint64_t xsh_rsrvd[2]; /* must be zero */
uint64_t xsh_reserved[5];/* best if zero */
};
-__CTASSERT(sizeof (struct xsave_header) == 64);
+__CTASSERT(sizeof (struct xsave_header) == 512 + 64);
/*
* The ymm save area actually follows the xsave_header.
@@ -157,6 +152,20 @@
};
__CTASSERT(sizeof (struct xsave_ymm) == 256);
+/*
+ * The following union is placed at the end of the pcb.
+ * It is defined this way to separate the definitions and to
+ * minimise the number of union/struct selectors.
+ * NB: Some userspace stuff (eg firefox) uses it to parse ucontext.
+ */
+union savefpu {
+ struct save87 sv_87;
+ struct fxsave sv_xmm;
+#ifdef _KERNEL
+ struct fxsave_os sv_os;
+ struct xsave_header sv_xsave_hdr;
+#endif
+};
/*
* 80387 control and status word bits
diff -r b854f373d590 -r c9896234e1dc sys/arch/x86/include/cpufunc.h
--- a/sys/arch/x86/include/cpufunc.h Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/x86/include/cpufunc.h Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpufunc.h,v 1.17 2014/02/13 19:37:08 dsl Exp $ */
+/* $NetBSD: cpufunc.h,v 1.18 2014/02/25 22:16:52 dsl Exp $ */
/*-
* Copyright (c) 1998, 2007 The NetBSD Foundation, Inc.
@@ -77,27 +77,36 @@
void x86_stihlt(void);
u_int x86_getss(void);
-struct save87;
-struct fxsave;
+/* fpu save, restore etc */
+union savefpu;
void fldcw(const uint16_t *);
void fnclex(void);
void fninit(void);
-void fnsave(struct save87 *);
+void fnsave(union savefpu *);
void fnstcw(uint16_t *);
uint16_t fngetsw(void);
void fnstsw(uint16_t *);
-void frstor(const struct save87 *);
+void frstor(const union savefpu *);
void fwait(void);
void clts(void);
void stts(void);
-void fxsave(struct fxsave *);
-void fxrstor(const struct fxsave *);
+void fxsave(union savefpu *);
+void fxrstor(const union savefpu *);
void x86_ldmxcsr(const uint32_t *);
void x86_stmxcsr(uint32_t *);
void fldummy(void);
void fp_divide_by_0(void);
+/* Extended processor state functions (for AVX registers etc) */
+
+uint64_t rdxcr(uint32_t); /* xgetbv */
+void wrxcr(uint32_t, uint64_t); /* xsetgv */
+
+void xrstor(const union savefpu *, uint64_t);
+void xsave(union savefpu *, uint64_t);
+void xsaveopt(union savefpu *, uint64_t);
+
void x86_monitor(const void *, uint32_t, uint32_t);
void x86_mwait(uint32_t, uint32_t);
/* x86_cpuid2() writes four 32bit values, %eax, %ebx, %ecx and %edx */
@@ -134,14 +143,6 @@
void setfs(int);
void setusergs(int);
-/* Extended processor state functions (for AVX registers etc) */
-
-uint64_t rdxcr(uint32_t); /* xgetbv */
-void wrxcr(uint32_t, uint64_t); /* xsetgv */
-void xrstor(const void *, uint64_t);
-void xsave(void *, uint64_t);
-void xsaveopt(const void *, uint64_t);
-
#endif /* _KERNEL */
#endif /* !_X86_CPUFUNC_H_ */
diff -r b854f373d590 -r c9896234e1dc sys/arch/x86/include/fpu.h
--- a/sys/arch/x86/include/fpu.h Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/x86/include/fpu.h Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu.h,v 1.5 2014/02/23 22:35:27 dsl Exp $ */
+/* $NetBSD: fpu.h,v 1.6 2014/02/25 22:16:52 dsl Exp $ */
#ifndef _X86_FPU_H_
#define _X86_FPU_H_
@@ -28,6 +28,9 @@
/* Reset control words only - for signal handlers */
void fpu_save_area_reset(struct lwp *);
+/* Copy data outside pcb during fork */
+void fpu_save_area_fork(struct pcb *, const struct pcb *);
+
/* Load FP registers with user-supplied values */
void process_write_fpregs_xmm(struct lwp *lwp, const struct fxsave *fpregs);
void process_write_fpregs_s87(struct lwp *lwp, const struct save87 *fpregs);
diff -r b854f373d590 -r c9896234e1dc sys/arch/x86/x86/cpu.c
--- a/sys/arch/x86/x86/cpu.c Tue Feb 25 22:11:11 2014 +0000
+++ b/sys/arch/x86/x86/cpu.c Tue Feb 25 22:16:52 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.109 2014/02/19 21:23:02 dsl Exp $ */
+/* $NetBSD: cpu.c,v 1.110 2014/02/25 22:16:52 dsl Exp $ */
/*-
* Copyright (c) 2000-2012 NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.109 2014/02/19 21:23:02 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.110 2014/02/25 22:16:52 dsl Exp $");
#include "opt_ddb.h"
#include "opt_mpbios.h" /* for MPDEBUG */
@@ -552,29 +552,41 @@
void
cpu_init(struct cpu_info *ci)
{
+ uint32_t cr4;
lcr0(rcr0() | CR0_WP);
+ cr4 = rcr4();
/*
* On a P6 or above, enable global TLB caching if the
* hardware supports it.
*/
if (cpu_feature[0] & CPUID_PGE)
- lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */
+ cr4 |= CR4_PGE; /* enable global TLB caching */
/*
* If we have FXSAVE/FXRESTOR, use them.
*/
if (cpu_feature[0] & CPUID_FXSR) {
- lcr4(rcr4() | CR4_OSFXSR);
+ cr4 |= CR4_OSFXSR;
/*
* If we have SSE/SSE2, enable XMM exceptions.
*/
if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2))
- lcr4(rcr4() | CR4_OSXMMEXCPT);
+ cr4 |= CR4_OSXMMEXCPT;
}
+ /* If xsave is supported, enable it */
+ if (cpu_feature[1] & CPUID2_XSAVE)
+ cr4 |= CR4_OSXSAVE;
+
+ lcr4(cr4);
+
+ /* If xsave is enabled, enable all fpu features */
+ if (cr4 & CR4_OSXSAVE)
+ wrxcr(0, x86_xsave_features & XCR0_FPU);
+
#ifdef MTRR
/*
* On a P6 or above, initialize MTRR's if the hardware supports them.
Home |
Main Index |
Thread Index |
Old Index