Hi, Attached is the next version of unified patch. I've updated the boolean logic as requested, and fixed indentations. -- Best regards, Michał Górny
diff --git a/lib/libc/sys/ptrace.2 b/lib/libc/sys/ptrace.2 index 9cd99ac94bd1..2ea13872e421 100644 --- a/lib/libc/sys/ptrace.2 +++ b/lib/libc/sys/ptrace.2 @@ -1,7 +1,7 @@ .\" $NetBSD: ptrace.2,v 1.74 2019/06/12 12:33:42 wiz Exp $ .\" .\" This file is in the public domain. -.Dd June 12, 2019 +.Dd June 22, 2019 .Dt PTRACE 2 .Os .Sh NAME @@ -771,6 +771,69 @@ The argument contains the LWP ID of the thread whose registers are to be written. If zero is supplied, the first thread of the process is written. +.It Dv PT_GETXSTATE +This request reads the traced process' FPU extended state into +the +.Dq Li "struct xstate" +(defined in +.In machine/cpu_extended_state.h ) . +.Fa addr +should be a pointer to +.Dq Li "struct iovec" +(defined in +.In sys/uio.h ) +specifying the pointer to the aforementioned struct as +.Fa iov_base +and its size as +.Fa iov_len . +The +.Fa data +argument contains the LWP ID of the thread whose registers are to +be read. +If zero is supplied, the first thread of the process is read. +The struct will be filled up to the specified +.Fa iov_len . +The caller needs to check +.Fa xs_rfbm +bitmap in order to determine which fields were provided by the CPU, +and may check +.Fa xs_xstate_bv +to determine which component states were changed from the initial state. +.It Dv PT_SETXSTATE +This request is the converse of +.Dv PT_GETXSTATE ; +it loads the traced process' extended FPU state from the +.Dq Li "struct xstate" +(defined in +.In machine/cpu_extended_state.h ) . +.Fa addr +should be a pointer to +.Dq Li "struct iovec" +(defined in +.In sys/uio.h ) +specifying the pointer to the aforementioned struct as +.Fa iov_base +and its size as +.Fa iov_len . +The +.Fa data +argument contains the LWP ID of the thread whose registers are to +be written. +If zero is supplied, the first thread of the process is written. +The +.Fa xs_rfbm +field of the supplied xstate specifies which state components are to +be updated. Other components (fields) will be ignored. The +.Fa xs_xstate_bv +specifies whether component state should be set to provided values +(when 1) or reset to unitialized (when 0). The request +will fail if +.Fa xs_xstate_bv +is not a subset of +.Fa xs_rfbm , +or any of the specified components is not supported by the CPU or kernel +(i.e. not returned by +.Dv PT_GETXSTATE . .El .Sh ERRORS Some requests can cause @@ -819,8 +882,10 @@ was neither 0 nor a legal signal number. .Dv PT_GETREGS , .Dv PT_SETREGS , .Dv PT_GETFPREGS , +.Dv PT_SETFPREGS , +.Dv PT_GETXSTATE , or -.Dv PT_SETFPREGS +.Dv PT_SETXSTATE was attempted on a process with no valid register set. (This is normally true only of system processes.) .It @@ -832,6 +897,13 @@ or with .Dv vm.user_va0_disable set to 1. +.It +.Dv PT_SETXSTATE +attempted to set state components not supported by the kernel, +or +.Dv xs_xstate_bv +was not a subset of +.Dv xs_rfbm . .El .It Bq Er EPERM .Bl -bullet -compact diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c b/sys/arch/amd64/amd64/netbsd32_machdep.c index 81bf78f6ecc4..3e007c79761b 100644 --- a/sys/arch/amd64/amd64/netbsd32_machdep.c +++ b/sys/arch/amd64/amd64/netbsd32_machdep.c @@ -353,6 +353,8 @@ netbsd32_ptrace_translate_request(int req) case PT32_SETDBREGS: return PT_SETDBREGS; case PT32_SETSTEP: return PT_SETSTEP; case PT32_CLEARSTEP: return PT_CLEARSTEP; + case PT32_GETXSTATE: return PT_GETXSTATE; + case PT32_SETXSTATE: return PT_SETXSTATE; default: return -1; } } diff --git a/sys/arch/amd64/amd64/process_machdep.c b/sys/arch/amd64/amd64/process_machdep.c index c204556c9168..d4e2c9a4009e 100644 --- a/sys/arch/amd64/amd64/process_machdep.c +++ b/sys/arch/amd64/amd64/process_machdep.c @@ -84,6 +84,9 @@ __KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.39 2019/02/11 14:59:32 cherry #include <sys/proc.h> #include <sys/ptrace.h> +#include <uvm/uvm_extern.h> + +#include <compat/netbsd32/netbsd32.h> #include <machine/psl.h> #include <machine/reg.h> #include <machine/segments.h> @@ -288,3 +291,131 @@ process_set_pc(struct lwp *l, void *addr) return 0; } + +#ifdef __HAVE_PTRACE_MACHDEP +static int +process_machdep_read_xstate(struct lwp *l, struct xstate *regs) +{ + return process_read_xstate(l, regs); +} + +static int +process_machdep_write_xstate(struct lwp *l, const struct xstate *regs) +{ + int error; + + /* + * Check for security violations. + */ + error = process_verify_xstate(regs); + if (error != 0) + return error; + + return process_write_xstate(l, regs); +} + +int +ptrace_machdep_dorequest( + struct lwp *l, + struct lwp *lt, + int req, + void *addr, + int data +) +{ + struct uio uio; + struct iovec iov; + struct vmspace *vm; + int error; + int write = 0; + + switch (req) { + case PT_SETXSTATE: + write = 1; + + /* FALLTHROUGH */ + case PT_GETXSTATE: + /* write = 0 done above. */ + if (!process_machdep_validxstate(lt->l_proc)) + return EINVAL; + if (__predict_false(l->l_proc->p_flag & PK_32)) { + struct netbsd32_iovec *user_iov; + user_iov = (struct netbsd32_iovec*)addr; + iov.iov_base = NETBSD32PTR64(user_iov->iov_base); + iov.iov_len = user_iov->iov_len; + } else { + struct iovec *user_iov; + user_iov = (struct iovec*)addr; + iov.iov_base = user_iov->iov_base; + iov.iov_len = user_iov->iov_len; + } + + error = proc_vmspace_getref(l->l_proc, &vm); + if (error) + return error; + if (iov.iov_len > sizeof(struct xstate)) + iov.iov_len = sizeof(struct xstate); + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = 0; + uio.uio_resid = iov.iov_len; + uio.uio_rw = write ? UIO_WRITE : UIO_READ; + uio.uio_vmspace = vm; + error = process_machdep_doxstate(l, lt, &uio); + uvmspace_free(vm); + return error; + } + +#ifdef DIAGNOSTIC + panic("ptrace_machdep: impossible"); +#endif + + return 0; +} + +/* + * The following functions are used by both ptrace(2) and procfs. + */ + +int +process_machdep_doxstate(struct lwp *curl, struct lwp *l, struct uio *uio) + /* curl: tracer */ + /* l: traced */ +{ + int error; + struct xstate r; + char *kv; + ssize_t kl; + + memset(&r, 0, sizeof(r)); + kl = MIN(uio->uio_iov->iov_len, sizeof(r)); + kv = (char *) &r; + + kv += uio->uio_offset; + kl -= uio->uio_offset; + if (kl > uio->uio_resid) + kl = uio->uio_resid; + + if (kl < 0) + error = EINVAL; + else + error = process_machdep_read_xstate(l, &r); + if (error == 0) + error = uiomove(kv, kl, uio); + if (error == 0 && uio->uio_rw == UIO_WRITE) + error = process_machdep_write_xstate(l, &r); + + uio->uio_offset = 0; + return error; +} + +int +process_machdep_validxstate(struct proc *p) +{ + + if (p->p_flag & PK_SYSTEM) + return 0; + + return 1; +} +#endif /* __HAVE_PTRACE_MACHDEP */ diff --git a/sys/arch/amd64/include/netbsd32_machdep.h b/sys/arch/amd64/include/netbsd32_machdep.h index e7f018708c26..c2efb4f1315d 100644 --- a/sys/arch/amd64/include/netbsd32_machdep.h +++ b/sys/arch/amd64/include/netbsd32_machdep.h @@ -22,6 +22,8 @@ #define PT32_SETDBREGS (PT_FIRSTMACH + 8) #define PT32_SETSTEP (PT_FIRSTMACH + 9) #define PT32_CLEARSTEP (PT_FIRSTMACH + 10) +#define PT32_GETXSTATE (PT_FIRSTMACH + 11) +#define PT32_SETXSTATE (PT_FIRSTMACH + 12) #define NETBSD32_POINTER_TYPE uint32_t typedef struct { NETBSD32_POINTER_TYPE i32; } netbsd32_pointer_t; diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h index 4eddffb6d23e..5bc4433b254e 100644 --- a/sys/arch/amd64/include/ptrace.h +++ b/sys/arch/amd64/include/ptrace.h @@ -45,6 +45,11 @@ #define PT_SETDBREGS (PT_FIRSTMACH + 6) #define PT_SETSTEP (PT_FIRSTMACH + 7) #define PT_CLEARSTEP (PT_FIRSTMACH + 8) +#define PT_GETXSTATE (PT_FIRSTMACH + 9) +#define PT_SETXSTATE (PT_FIRSTMACH + 10) + +/* We have machine-dependent process tracing needs. */ +#define __HAVE_PTRACE_MACHDEP #define PT_MACHDEP_STRINGS \ "PT_STEP", \ @@ -55,7 +60,9 @@ "PT_GETDBREGS", \ "PT_SETDBREGS", \ "PT_SETSTEP", \ - "PT_CLEARSTEP", + "PT_CLEARSTEP", \ + "PT_GETXSTATE", \ + "PT_SETXSTATE" #include <machine/reg.h> #define PTRACE_REG_PC(r) (r)->regs[_REG_RIP] @@ -71,6 +78,20 @@ #define PTRACE_BREAKPOINT_SIZE 1 #define PTRACE_BREAKPOINT_ADJ 1 +#ifdef _KERNEL + +/* + * These are used in sys_ptrace() to find good ptrace(2) requests. + */ +#define PTRACE_MACHDEP_REQUEST_CASES \ + case PT_GETXSTATE: \ + case PT_SETXSTATE: + +int process_machdep_doxstate(struct lwp *, struct lwp *, struct uio *); +int process_machdep_validxstate(struct proc *); + +#endif /* _KERNEL */ + #ifdef _KERNEL_OPT #include "opt_compat_netbsd32.h" diff --git a/sys/arch/i386/i386/process_machdep.c b/sys/arch/i386/i386/process_machdep.c index 29216fd6a451..7ed1ceda5846 100644 --- a/sys/arch/i386/i386/process_machdep.c +++ b/sys/arch/i386/i386/process_machdep.c @@ -231,6 +231,12 @@ process_set_pc(struct lwp *l, void *addr) } #ifdef __HAVE_PTRACE_MACHDEP +static int +process_machdep_read_xstate(struct lwp *l, struct xstate *regs) +{ + return process_read_xstate(l, regs); +} + static int process_machdep_read_xmmregs(struct lwp *l, struct xmmregs *regs) { @@ -240,6 +246,21 @@ process_machdep_read_xmmregs(struct lwp *l, struct xmmregs *regs) return 0; } +static int +process_machdep_write_xstate(struct lwp *l, const struct xstate *regs) +{ + int error; + + /* + * Check for security violations. + */ + error = process_verify_xstate(regs); + if (error != 0) + return error; + + return process_write_xstate(l, regs); +} + static int process_machdep_write_xmmregs(struct lwp *l, struct xmmregs *regs) { @@ -260,6 +281,9 @@ ptrace_machdep_dorequest( { struct uio uio; struct iovec iov; + struct iovec *user_iov = (struct iovec*)addr; + struct vmspace *vm; + int error; int write = 0; switch (req) { @@ -271,33 +295,54 @@ ptrace_machdep_dorequest( /* write = 0 done above. */ if (!process_machdep_validxmmregs(lt->l_proc)) return (EINVAL); - else { - struct vmspace *vm; - int error; - - error = proc_vmspace_getref(l->l_proc, &vm); - if (error) { - return error; - } - iov.iov_base = addr; - iov.iov_len = sizeof(struct xmmregs); - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_offset = 0; - uio.uio_resid = sizeof(struct xmmregs); - uio.uio_rw = write ? UIO_WRITE : UIO_READ; - uio.uio_vmspace = vm; - error = process_machdep_doxmmregs(l, lt, &uio); - uvmspace_free(vm); + error = proc_vmspace_getref(l->l_proc, &vm); + if (error) { + return error; + } + iov.iov_base = addr; + iov.iov_len = sizeof(struct xmmregs); + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = 0; + uio.uio_resid = sizeof(struct xmmregs); + uio.uio_rw = write ? UIO_WRITE : UIO_READ; + uio.uio_vmspace = vm; + error = process_machdep_doxmmregs(l, lt, &uio); + uvmspace_free(vm); + return error; + + case PT_SETXSTATE: + write = 1; + + /* FALLTHROUGH */ + case PT_GETXSTATE: + /* write = 0 done above. */ + if (!process_machdep_validxstate(lt->l_proc)) + return EINVAL; + error = proc_vmspace_getref(l->l_proc, &vm); + if (error) { return error; } + iov.iov_base = user_iov->iov_base; + iov.iov_len = user_iov->iov_len; + if (iov.iov_len > sizeof(struct xstate)) + iov.iov_len = sizeof(struct xstate); + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = 0; + uio.uio_resid = iov.iov_len; + uio.uio_rw = write ? UIO_WRITE : UIO_READ; + uio.uio_vmspace = vm; + error = process_machdep_doxstate(l, lt, &uio); + uvmspace_free(vm); + return error; } #ifdef DIAGNOSTIC panic("ptrace_machdep: impossible"); #endif - return (0); + return 0; } /* @@ -348,5 +393,47 @@ process_machdep_validxmmregs(struct proc *p) return (i386_use_fxsave); } + +int +process_machdep_doxstate(struct lwp *curl, struct lwp *l, struct uio *uio) + /* curl: tracer */ + /* l: traced */ +{ + int error; + struct xstate r; + char *kv; + ssize_t kl; + + memset(&r, 0, sizeof(r)); + kl = MIN(uio->uio_iov->iov_len, sizeof(r)); + kv = (char *) &r; + + kv += uio->uio_offset; + kl -= uio->uio_offset; + if (kl > uio->uio_resid) + kl = uio->uio_resid; + + if (kl < 0) + error = EINVAL; + else + error = process_machdep_read_xstate(l, &r); + if (error == 0) + error = uiomove(kv, kl, uio); + if (error == 0 && uio->uio_rw == UIO_WRITE) + error = process_machdep_write_xstate(l, &r); + + uio->uio_offset = 0; + return error; +} + +int +process_machdep_validxstate(struct proc *p) +{ + + if (p->p_flag & PK_SYSTEM) + return 0; + + return 1; +} #endif /* __HAVE_PTRACE_MACHDEP */ #endif /* PTRACE_HOOKS */ diff --git a/sys/arch/i386/include/ptrace.h b/sys/arch/i386/include/ptrace.h index 425651b14559..7e0ec5214135 100644 --- a/sys/arch/i386/include/ptrace.h +++ b/sys/arch/i386/include/ptrace.h @@ -90,6 +90,8 @@ #define PT_SETDBREGS (PT_FIRSTMACH + 8) #define PT_SETSTEP (PT_FIRSTMACH + 9) #define PT_CLEARSTEP (PT_FIRSTMACH + 10) +#define PT_GETXSTATE (PT_FIRSTMACH + 11) +#define PT_SETXSTATE (PT_FIRSTMACH + 12) #define PT_MACHDEP_STRINGS \ "PT_STEP", \ @@ -102,8 +104,9 @@ "PT_GETDBREGS", \ "PT_SETDBREGS", \ "PT_SETSTEP", \ - "PT_CLEARSTEP", - + "PT_CLEARSTEP", \ + "PT_GETXSTATE", \ + "PT_SETXSTATE" #include <machine/reg.h> #define PTRACE_REG_PC(r) (r)->r_eip @@ -126,7 +129,9 @@ */ #define PTRACE_MACHDEP_REQUEST_CASES \ case PT_GETXMMREGS: \ - case PT_SETXMMREGS: + case PT_SETXMMREGS: \ + case PT_GETXSTATE: \ + case PT_SETXSTATE: /* * These are used to define machine-dependent procfs node types. @@ -159,6 +164,8 @@ struct xmmregs; /* Functions used by both ptrace(2) and procfs. */ int process_machdep_doxmmregs(struct lwp *, struct lwp *, struct uio *); int process_machdep_validxmmregs(struct proc *); +int process_machdep_doxstate(struct lwp *, struct lwp *, struct uio *); +int process_machdep_validxstate(struct proc *); /* Functions used by procfs. */ struct mount; diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h index 143ae3c5c5ec..589f179ce758 100644 --- a/sys/arch/x86/include/cpu.h +++ b/sys/arch/x86/include/cpu.h @@ -459,6 +459,8 @@ extern int x86_fpu_save; #define FPU_SAVE_XSAVEOPT 3 extern unsigned int x86_fpu_save_size; extern uint64_t x86_xsave_features; +extern size_t x86_xsave_offsets[]; +extern size_t x86_xsave_sizes[]; extern uint32_t x86_fpu_mxcsr_mask; extern bool x86_fpu_eager; diff --git a/sys/arch/x86/include/cpu_extended_state.h b/sys/arch/x86/include/cpu_extended_state.h index 38cb1d6c3396..8590a6814d6c 100644 --- a/sys/arch/x86/include/cpu_extended_state.h +++ b/sys/arch/x86/include/cpu_extended_state.h @@ -79,6 +79,17 @@ struct ymmreg { uint8_t ymm_bytes[16]; }; +/* The AVX-512 registers are 512 bits but the low bits are in xmmregs + * and ymmregs */ +struct zmmreg { + uint8_t zmm_bytes[32]; +}; + +/* 512-bit ZMM register. */ +struct hi16_zmmreg { + uint8_t zmm_bytes[64]; +}; + /* * Floating point unit registers (FSAVE instruction). * @@ -139,6 +150,77 @@ struct xsave_ymm { }; __CTASSERT(sizeof(struct xsave_ymm) == 256); +/* + * AVX-512: opmask state. + */ +struct xsave_opmask { + uint64_t xs_k[8]; /* k0..k7 registers. */ +}; +__CTASSERT(sizeof(struct xsave_opmask) == 64); + +/* + * AVX-512: ZMM_Hi256 state. + */ +struct xsave_zmm_hi256 { + struct zmmreg xs_zmm[16]; /* High bits of zmm0..zmm15 registers. */ +}; +__CTASSERT(sizeof(struct xsave_zmm_hi256) == 512); + +/* + * AVX-512: Hi16_ZMM state. + */ +struct xsave_hi16_zmm { + struct hi16_zmmreg xs_hi16_zmm[16]; /* zmm16..zmm31 registers. */ +}; +__CTASSERT(sizeof(struct xsave_hi16_zmm) == 1024); + +/* + * Structure used to hold all interesting data from XSAVE, in predictable form. + * Note that this structure can have new members added to the end. + */ +struct xstate { + /* + * The two following fields are bitmaps of XSAVE components. They can be + * matched against XCR0_* constants from <machine/specialreg.h>). + */ + /* + * XSAVE/XRSTOR RFBM parameter. + * + * PT_GETXSTATE: 1 indicates that the respective XSAVE component is + * supported and has been enabled for saving. 0 indicates that it is not + * supported by the platform or kernel. + * + * PT_SETXSTATE: 1 indicates that the respective XSAVE component should + * be updated to the value of respective field (or reset if xs_xsave_bv + * bit is 0). 0 indicates that it should be left intact. It is an error + * to enable bits that are not supported by the platform or kernel. + */ + uint64_t xs_rfbm; + /* + * XSAVE/XRSTOR xstate header. + * + * PT_GETXSTATE: 1 indicates that the respective XSAVE component has been + * saved. 0 indicates that it had been in its CPU-defined initial value + * at the time of saving (i.e. was not used by the program). + * + * PT_SETXSTATE: 1 indicates that the respective XSAVE component (if present + * in xs_rfbm) should be set to the values in respective field. 0 indicates + * that it should be reset to CPU-defined initial value. + */ + uint64_t xs_xstate_bv; + + /* legacy FXSAVE area (used for x87 & SSE state) */ + struct fxsave xs_fxsave; + /* AVX state: high bits of ymm0..ymm15 registers */ + struct xsave_ymm xs_ymm_hi128; + /* AVX-512: opmask */ + struct xsave_opmask xs_opmask; + /* AVX-512: high bits of zmm0..zmm15 registers */ + struct xsave_zmm_hi256 xs_zmm_hi256; + /* AVX-512: whole zmm16..zmm31 registers */ + struct xsave_hi16_zmm xs_hi16_zmm; +}; + /* * The following union is placed at the end of the pcb. * It is defined this way to separate the definitions and to diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h index 1f5ff58570de..334848afc76b 100644 --- a/sys/arch/x86/include/fpu.h +++ b/sys/arch/x86/include/fpu.h @@ -38,6 +38,10 @@ void process_write_fpregs_s87(struct lwp *, const struct save87 *); void process_read_fpregs_xmm(struct lwp *, struct fxsave *); void process_read_fpregs_s87(struct lwp *, struct save87 *); +int process_read_xstate(struct lwp *, struct xstate *); +int process_verify_xstate(const struct xstate *); +int process_write_xstate(struct lwp *, const struct xstate *); + #endif #endif /* _X86_FPU_H_ */ diff --git a/sys/arch/x86/include/specialreg.h b/sys/arch/x86/include/specialreg.h index 4f8c4cca6db7..1c0e8c972b07 100644 --- a/sys/arch/x86/include/specialreg.h +++ b/sys/arch/x86/include/specialreg.h @@ -146,6 +146,26 @@ #define XCR0_FPU (XCR0_X87 | XCR0_SSE | XCR0_YMM_Hi128 | \ XCR0_Opmask | XCR0_ZMM_Hi256 | XCR0_Hi16_ZMM) +/* + * XSAVE component indices. + */ +#define XSAVE_X87 0 +#define XSAVE_SSE 1 +#define XSAVE_YMM_Hi128 2 +#define XSAVE_BNDREGS 3 +#define XSAVE_BNDCSR 4 +#define XSAVE_Opmask 5 +#define XSAVE_ZMM_Hi256 6 +#define XSAVE_Hi16_ZMM 7 +#define XSAVE_PT 8 +#define XSAVE_PKRU 9 +#define XSAVE_HDC 10 + +/* + * Highest XSAVE component enabled by XCR0_FPU. + */ +#define XSAVE_MAX_COMPONENT XSAVE_Hi16_ZMM + /* * CPUID "features" bits */ diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c index fac08d12db22..56782ff8e9f9 100644 --- a/sys/arch/x86/x86/fpu.c +++ b/sys/arch/x86/x86/fpu.c @@ -912,6 +912,165 @@ process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) } } +int +process_read_xstate(struct lwp *l, struct xstate *xstate) +{ + union savefpu *fpu_save; + + fpusave_lwp(l, true); + fpu_save = lwp_fpuarea(l); + + if (x86_fpu_save == FPU_SAVE_FSAVE) { + /* Convert from legacy FSAVE format. */ + memset(&(xstate->xs_fxsave), 0, sizeof(xstate->xs_fxsave)); + process_s87_to_xmm(&fpu_save->sv_87, &(xstate->xs_fxsave)); + + /* We only got x87 data. */ + xstate->xs_rfbm = XCR0_X87; + xstate->xs_xstate_bv = XCR0_X87; + return 0; + } + + /* Copy the legacy area. */ + memcpy(&(xstate->xs_fxsave), fpu_save->sv_xsave_hdr.xsh_fxsave, + sizeof(xstate->xs_fxsave)); + + if (x86_fpu_save == FPU_SAVE_FXSAVE) { + /* FXSAVE means we've got x87 + SSE data. */ + xstate->xs_rfbm = XCR0_X87 | XCR0_SSE; + xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE; + return 0; + } + + /* Copy the bitmap indicating which states are available. */ + xstate->xs_rfbm = x86_xsave_features & XCR0_FPU; + xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv; + KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm)); + +#define COPY_COMPONENT(xcr0_val, xsave_val, field) \ + if (xstate->xs_xstate_bv & xcr0_val) { \ + KASSERT(x86_xsave_offsets[xsave_val] \ + >= sizeof(struct xsave_header)); \ + KASSERT(x86_xsave_sizes[xsave_val] \ + >= sizeof(xstate -> field)); \ + \ + memcpy(&(xstate -> field), \ + (char*)fpu_save + x86_xsave_offsets[xsave_val], \ + sizeof(xstate -> field)); \ + } + + COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); + COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); + COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); + COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); + +#undef COPY_COMPONENT + + return 0; +} + +int +process_verify_xstate(const struct xstate *xstate) +{ + /* xstate_bv must be a subset of RFBM */ + if (xstate->xs_xstate_bv & ~xstate->xs_rfbm) + return EINVAL; + + switch (x86_fpu_save) { + case FPU_SAVE_FSAVE: + if ((xstate->xs_rfbm & ~XCR0_X87)) + return EINVAL; + break; + case FPU_SAVE_FXSAVE: + if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE))) + return EINVAL; + break; + default: + /* Verify whether no unsupported features are enabled */ + if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0) + return EINVAL; + } + + return 0; +} + +int +process_write_xstate(struct lwp *l, const struct xstate *xstate) +{ + union savefpu *fpu_save; + + fpusave_lwp(l, true); + fpu_save = lwp_fpuarea(l); + + /* Convert data into legacy FSAVE format. */ + if (x86_fpu_save == FPU_SAVE_FSAVE) { + if (xstate->xs_xstate_bv & XCR0_X87) + process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87); + return 0; + } + + /* If XSAVE is supported, make sure that xstate_bv is set correctly. */ + if (x86_fpu_save >= FPU_SAVE_XSAVE) { + /* + * Bit-wise xstate->xs_rfbm ? xstate->xs_xstate_bv + * : fpu_save->sv_xsave_hdr.xsh_xstate_bv + */ + fpu_save->sv_xsave_hdr.xsh_xstate_bv = + (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) | + xstate->xs_xstate_bv; + } + + if (xstate->xs_xstate_bv & XCR0_X87) { + /* + * X87 state is split into two areas, interspersed with SSE + * data. + */ + memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24); + memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac, + sizeof(xstate->xs_fxsave.fx_87_ac)); + } + + /* + * Copy MXCSR if either SSE or AVX state is requested, to match the XSAVE + * behavior for those flags. + */ + if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) { + /* + * Invalid bits in mxcsr or mxcsr_mask will cause faults. + */ + fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask + & x86_fpu_mxcsr_mask; + fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr & + fpu_save->sv_xmm.fx_mxcsr_mask; + } + + if (xstate->xs_xstate_bv & XCR0_SSE) { + memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160], + xstate->xs_fxsave.fx_xmm, + sizeof(xstate->xs_fxsave.fx_xmm)); + } + +#define COPY_COMPONENT(xcr0_val, xsave_val, field) \ + if (xstate->xs_xstate_bv & xcr0_val) { \ + KASSERT(x86_xsave_offsets[xsave_val] \ + >= sizeof(struct xsave_header)); \ + KASSERT(x86_xsave_sizes[xsave_val] \ + >= sizeof(xstate -> field)); \ + \ + memcpy((char*)fpu_save + x86_xsave_offsets[xsave_val], \ + &(xstate -> field), sizeof(xstate -> field)); \ + } + + COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); + COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); + COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); + COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); + +#undef COPY_COMPONENT + + return 0; +} + /* -------------------------------------------------------------------------- */ static volatile unsigned long eagerfpu_cpu_barrier1 __cacheline_aligned; diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c index 9037fb2673fd..491c99ac06db 100644 --- a/sys/arch/x86/x86/identcpu.c +++ b/sys/arch/x86/x86/identcpu.c @@ -74,6 +74,8 @@ char cpu_brand_string[49]; int x86_fpu_save __read_mostly; unsigned int x86_fpu_save_size __read_mostly = sizeof(struct save87); uint64_t x86_xsave_features __read_mostly = 0; +size_t x86_xsave_offsets[XSAVE_MAX_COMPONENT+1] __read_mostly; +size_t x86_xsave_sizes[XSAVE_MAX_COMPONENT+1] __read_mostly; /* * Note: these are just the ones that may not have a cpuid instruction. @@ -755,6 +757,7 @@ static void cpu_probe_fpu(struct cpu_info *ci) { u_int descs[4]; + int i; x86_fpu_eager = true; x86_fpu_save = FPU_SAVE_FSAVE; @@ -816,6 +819,15 @@ cpu_probe_fpu(struct cpu_info *ci) x86_fpu_save_size = descs[2]; x86_xsave_features = (uint64_t)descs[3] << 32 | descs[0]; + + /* Get component offsets and sizes for the save area */ + for (i = XSAVE_YMM_Hi128; i < __arraycount(x86_xsave_offsets); i++) { + if (x86_xsave_features & __BIT(i)) { + x86_cpuid2(0xd, i, descs); + x86_xsave_offsets[i] = descs[1]; + x86_xsave_sizes[i] = descs[0]; + } + } } void diff --git a/tests/lib/libc/sys/t_ptrace_wait.c b/tests/lib/libc/sys/t_ptrace_wait.c index 64885e839c28..9d89c9d20c9c 100644 --- a/tests/lib/libc/sys/t_ptrace_wait.c +++ b/tests/lib/libc/sys/t_ptrace_wait.c @@ -37,6 +37,7 @@ __RCSID("$NetBSD: t_ptrace_wait.c,v 1.128 2019/06/18 21:14:26 kamil Exp $"); #include <sys/stat.h> #include <sys/syscall.h> #include <sys/sysctl.h> +#include <sys/uio.h> #include <sys/wait.h> #include <machine/reg.h> #include <elf.h> @@ -62,6 +63,7 @@ __RCSID("$NetBSD: t_ptrace_wait.c,v 1.128 2019/06/18 21:14:26 kamil Exp $"); #if defined(__i386__) || defined(__x86_64__) #include <cpuid.h> #include <x86/cpu_extended_state.h> +#include <x86/specialreg.h> #endif #include <atf-c.h> diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h b/tests/lib/libc/sys/t_ptrace_x86_wait.h index ba3165495ade..b50f8f7530e7 100644 --- a/tests/lib/libc/sys/t_ptrace_x86_wait.h +++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h @@ -2802,6 +2802,890 @@ ATF_TC_BODY(x86_regs_xmm_write, tc) DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); } + +ATF_TC(x86_xstate_mm_read); +ATF_TC_HEAD(x86_xstate_mm_read, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Set MMX (mm0..mm7) reg values from debugged program and read " + "them via PT_GETXSTATE, comparing values against expected."); +} + +ATF_TC_BODY(x86_xstate_mm_read, tc) +{ + const int exitval = 5; + pid_t child, wpid; +#if defined(TWAIT_HAVE_STATUS) + const int sigval = SIGTRAP; + int status; +#endif + struct iovec iov; + struct xstate xst; + + const uint64_t mm[] = { + 0x0001020304050607, + 0x1011121314151617, + 0x2021222324252627, + 0x3031323334353637, + 0x4041424344454647, + 0x5051525354555657, + 0x6061626364656667, + 0x7071727374757677, + }; + + /* verify whether MMX is supported here */ + DPRINTF("Before invoking cpuid\n"); + { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) + atf_tc_skip("CPUID is not supported by the CPU"); + + DPRINTF("cpuid: EDX = %08x\n", edx); + + if (!(edx & bit_MMX)) + atf_tc_skip("MMX is not supported by the CPU"); + } + + DPRINTF("Before forking process PID=%d\n", getpid()); + SYSCALL_REQUIRE((child = fork()) != -1); + if (child == 0) { + DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid()); + FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); + + DPRINTF("Before running assembly from child\n"); + set_mm_regs(mm); + + DPRINTF("Before exiting of the child process\n"); + _exit(exitval); + } + DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_stopped(status, sigval); + + iov.iov_base = &xst; + iov.iov_len = sizeof(xst); + + DPRINTF("Call GETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1); + + ATF_REQUIRE(xst.xs_rfbm & XCR0_X87); + ATF_REQUIRE(xst.xs_xstate_bv & XCR0_X87); + + ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[0].r.f87_mantissa, mm[0]); + ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[1].r.f87_mantissa, mm[1]); + ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[2].r.f87_mantissa, mm[2]); + ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[3].r.f87_mantissa, mm[3]); + ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[4].r.f87_mantissa, mm[4]); + ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[5].r.f87_mantissa, mm[5]); + ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[6].r.f87_mantissa, mm[6]); + ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[7].r.f87_mantissa, mm[7]); + + DPRINTF("Before resuming the child process where it left off and " + "without signal to be sent\n"); + SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_exited(status, exitval); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); +} + +ATF_TC(x86_xstate_mm_write); +ATF_TC_HEAD(x86_xstate_mm_write, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Set mm0..mm7 reg values into a debugged program via " + "PT_SETXSTATE and compare the result against expected."); +} + +ATF_TC_BODY(x86_xstate_mm_write, tc) +{ + const int exitval = 5; + pid_t child, wpid; +#if defined(TWAIT_HAVE_STATUS) + const int sigval = SIGTRAP; + int status; +#endif + struct iovec iov; + struct xstate xst; + + const uint64_t mm[] = { + 0x0001020304050607, + 0x1011121314151617, + 0x2021222324252627, + 0x3031323334353637, + 0x4041424344454647, + 0x5051525354555657, + 0x6061626364656667, + 0x7071727374757677, + }; + + /* verify whether MMX is supported here */ + DPRINTF("Before invoking cpuid\n"); + { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) + atf_tc_skip("CPUID is not supported by the CPU"); + + DPRINTF("cpuid: EDX = %08x\n", edx); + + if (!(edx & bit_MMX)) + atf_tc_skip("MMX is not supported by the CPU"); + } + + DPRINTF("Before forking process PID=%d\n", getpid()); + SYSCALL_REQUIRE((child = fork()) != -1); + if (child == 0) { + uint64_t v_mm[8]; + + DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid()); + FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); + + DPRINTF("Before running assembly from child\n"); + get_mm_regs(v_mm); + + DPRINTF("Before comparing results\n"); + FORKEE_ASSERT_EQ(v_mm[0], mm[0]); + FORKEE_ASSERT_EQ(v_mm[1], mm[1]); + FORKEE_ASSERT_EQ(v_mm[2], mm[2]); + FORKEE_ASSERT_EQ(v_mm[3], mm[3]); + FORKEE_ASSERT_EQ(v_mm[4], mm[4]); + FORKEE_ASSERT_EQ(v_mm[5], mm[5]); + FORKEE_ASSERT_EQ(v_mm[6], mm[6]); + FORKEE_ASSERT_EQ(v_mm[7], mm[7]); + + DPRINTF("Before exiting of the child process\n"); + _exit(exitval); + } + DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_stopped(status, sigval); + + iov.iov_base = &xst; + iov.iov_len = sizeof(xst); + + DPRINTF("Call GETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1); + + ATF_REQUIRE(xst.xs_rfbm & XCR0_X87); + + xst.xs_rfbm = XCR0_X87; + xst.xs_xstate_bv = XCR0_X87; + + xst.xs_fxsave.fx_87_ac[0].r.f87_mantissa = mm[0]; + xst.xs_fxsave.fx_87_ac[1].r.f87_mantissa = mm[1]; + xst.xs_fxsave.fx_87_ac[2].r.f87_mantissa = mm[2]; + xst.xs_fxsave.fx_87_ac[3].r.f87_mantissa = mm[3]; + xst.xs_fxsave.fx_87_ac[4].r.f87_mantissa = mm[4]; + xst.xs_fxsave.fx_87_ac[5].r.f87_mantissa = mm[5]; + xst.xs_fxsave.fx_87_ac[6].r.f87_mantissa = mm[6]; + xst.xs_fxsave.fx_87_ac[7].r.f87_mantissa = mm[7]; + + DPRINTF("Call SETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_SETXSTATE, child, &iov, 0) != -1); + + DPRINTF("Before resuming the child process where it left off and " + "without signal to be sent\n"); + SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_exited(status, exitval); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); +} + +ATF_TC(x86_xstate_xmm_read); +ATF_TC_HEAD(x86_xstate_xmm_read, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Set xmm0..xmm15 (..xmm7 on i386) reg values from debugged program " + "and read them via PT_GETXSTATE, comparing values against expected."); +} + +ATF_TC_BODY(x86_xstate_xmm_read, tc) +{ + const int exitval = 5; + pid_t child, wpid; +#if defined(TWAIT_HAVE_STATUS) + const int sigval = SIGTRAP; + int status; +#endif + struct xstate xst; + struct iovec iov; + + const struct { + uint64_t a, b; + } xmm[] __aligned(16) = { + { 0x0706050403020100, 0x0F0E0D0C0B0A0908, }, + { 0x0807060504030201, 0x100F0E0D0C0B0A09, }, + { 0x0908070605040302, 0x11100F0E0D0C0B0A, }, + { 0x0A09080706050403, 0x1211100F0E0D0C0B, }, + { 0x0B0A090807060504, 0x131211100F0E0D0C, }, + { 0x0C0B0A0908070605, 0x14131211100F0E0D, }, + { 0x0D0C0B0A09080706, 0x1514131211100F0E, }, + { 0x0E0D0C0B0A090807, 0x161514131211100F, }, +#if defined(__x86_64__) + { 0x0F0E0D0C0B0A0908, 0x1716151413121110, }, + { 0x100F0E0D0C0B0A09, 0x1817161514131211, }, + { 0x11100F0E0D0C0B0A, 0x1918171615141312, }, + { 0x1211100F0E0D0C0B, 0x1A19181716151413, }, + { 0x131211100F0E0D0C, 0x1B1A191817161514, }, + { 0x14131211100F0E0D, 0x1C1B1A1918171615, }, + { 0x1514131211100F0E, 0x1D1C1B1A19181716, }, + { 0x161514131211100F, 0x1E1D1C1B1A191817, }, +#endif + }; + + /* verify whether SSE is supported here */ + DPRINTF("Before invoking cpuid\n"); + { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) + atf_tc_skip("CPUID is not supported by the CPU"); + + DPRINTF("cpuid: EDX = %08x\n", edx); + + if (!(edx & bit_SSE)) + atf_tc_skip("SSE is not supported by the CPU"); + } + + DPRINTF("Before forking process PID=%d\n", getpid()); + SYSCALL_REQUIRE((child = fork()) != -1); + if (child == 0) { + DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid()); + FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); + + DPRINTF("Before running assembly from child\n"); + set_xmm_regs(xmm); + + DPRINTF("Before exiting of the child process\n"); + _exit(exitval); + } + DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_stopped(status, sigval); + + iov.iov_base = &xst; + iov.iov_len = sizeof(xst); + + DPRINTF("Call GETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1); + + ATF_REQUIRE(xst.xs_rfbm & XCR0_SSE); + ATF_REQUIRE(xst.xs_xstate_bv & XCR0_SSE); + + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[0], &xmm[0], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[1], &xmm[1], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[2], &xmm[2], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[3], &xmm[3], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[4], &xmm[4], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[5], &xmm[5], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[6], &xmm[6], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[7], &xmm[7], sizeof(*xmm))); +#if defined(__x86_64__) + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[8], &xmm[8], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[9], &xmm[9], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[10], &xmm[10], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[11], &xmm[11], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[12], &xmm[12], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[13], &xmm[13], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[14], &xmm[14], sizeof(*xmm))); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[15], &xmm[15], sizeof(*xmm))); +#endif + + DPRINTF("Before resuming the child process where it left off and " + "without signal to be sent\n"); + SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_exited(status, exitval); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); +} + +ATF_TC(x86_xstate_xmm_write); +ATF_TC_HEAD(x86_xstate_xmm_write, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Set xmm0..xmm15 (..xmm7 on i386) reg values into a debugged " + "program via PT_SETXSTATE and compare the result against expected."); +} + +ATF_TC_BODY(x86_xstate_xmm_write, tc) +{ + const int exitval = 5; + pid_t child, wpid; +#if defined(TWAIT_HAVE_STATUS) + const int sigval = SIGTRAP; + int status; +#endif + struct xstate xst; + struct iovec iov; + + const struct { + uint64_t a, b; + } xmm[] __aligned(16) = { + { 0x0706050403020100, 0x0F0E0D0C0B0A0908, }, + { 0x0807060504030201, 0x100F0E0D0C0B0A09, }, + { 0x0908070605040302, 0x11100F0E0D0C0B0A, }, + { 0x0A09080706050403, 0x1211100F0E0D0C0B, }, + { 0x0B0A090807060504, 0x131211100F0E0D0C, }, + { 0x0C0B0A0908070605, 0x14131211100F0E0D, }, + { 0x0D0C0B0A09080706, 0x1514131211100F0E, }, + { 0x0E0D0C0B0A090807, 0x161514131211100F, }, +#if defined(__x86_64__) + { 0x0F0E0D0C0B0A0908, 0x1716151413121110, }, + { 0x100F0E0D0C0B0A09, 0x1817161514131211, }, + { 0x11100F0E0D0C0B0A, 0x1918171615141312, }, + { 0x1211100F0E0D0C0B, 0x1A19181716151413, }, + { 0x131211100F0E0D0C, 0x1B1A191817161514, }, + { 0x14131211100F0E0D, 0x1C1B1A1918171615, }, + { 0x1514131211100F0E, 0x1D1C1B1A19181716, }, + { 0x161514131211100F, 0x1E1D1C1B1A191817, }, +#endif + }; + + /* verify whether SSE is supported here */ + DPRINTF("Before invoking cpuid\n"); + { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) + atf_tc_skip("CPUID is not supported by the CPU"); + + DPRINTF("cpuid: EDX = %08x\n", edx); + + if (!(edx & bit_SSE)) + atf_tc_skip("SSE is not supported by the CPU"); + } + + DPRINTF("Before forking process PID=%d\n", getpid()); + SYSCALL_REQUIRE((child = fork()) != -1); + if (child == 0) { + struct { + uint64_t a, b; + } v_xmm[16] __aligned(16); + + DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid()); + FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); + + DPRINTF("Before running assembly from child\n"); + get_xmm_regs(v_xmm); + + DPRINTF("Before comparing results\n"); + FORKEE_ASSERT(!memcmp(&v_xmm[0], &xmm[0], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[1], &xmm[1], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[2], &xmm[2], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[3], &xmm[3], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[4], &xmm[4], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[5], &xmm[5], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[6], &xmm[6], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[7], &xmm[7], sizeof(*xmm))); +#if defined(__x86_64__) + FORKEE_ASSERT(!memcmp(&v_xmm[8], &xmm[8], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[9], &xmm[9], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[10], &xmm[10], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[11], &xmm[11], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[12], &xmm[12], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[13], &xmm[13], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[14], &xmm[14], sizeof(*xmm))); + FORKEE_ASSERT(!memcmp(&v_xmm[15], &xmm[15], sizeof(*xmm))); +#endif + + DPRINTF("Before exiting of the child process\n"); + _exit(exitval); + } + DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_stopped(status, sigval); + + iov.iov_base = &xst; + iov.iov_len = sizeof(xst); + + DPRINTF("Call GETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1); + + ATF_REQUIRE(xst.xs_rfbm & XCR0_SSE); + + xst.xs_rfbm = XCR0_SSE; + xst.xs_xstate_bv = XCR0_SSE; + + memcpy(&xst.xs_fxsave.fx_xmm[0], &xmm[0], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[1], &xmm[1], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[2], &xmm[2], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[3], &xmm[3], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[4], &xmm[4], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[5], &xmm[5], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[6], &xmm[6], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[7], &xmm[7], sizeof(*xmm)); +#if defined(__x86_64__) + memcpy(&xst.xs_fxsave.fx_xmm[8], &xmm[8], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[9], &xmm[9], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[10], &xmm[10], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[11], &xmm[11], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[12], &xmm[12], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[13], &xmm[13], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[14], &xmm[14], sizeof(*xmm)); + memcpy(&xst.xs_fxsave.fx_xmm[15], &xmm[15], sizeof(*xmm)); +#endif + + DPRINTF("Call SETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_SETXSTATE, child, &iov, 0) != -1); + + DPRINTF("Before resuming the child process where it left off and " + "without signal to be sent\n"); + SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_exited(status, exitval); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); +} + +__attribute__((target("avx"))) +static __inline void set_ymm_regs(const void* ymm) +{ + __asm__ __volatile__( + "vmovaps 0x000(%0), %%ymm0\n\t" + "vmovaps 0x020(%0), %%ymm1\n\t" + "vmovaps 0x040(%0), %%ymm2\n\t" + "vmovaps 0x060(%0), %%ymm3\n\t" + "vmovaps 0x080(%0), %%ymm4\n\t" + "vmovaps 0x0A0(%0), %%ymm5\n\t" + "vmovaps 0x0C0(%0), %%ymm6\n\t" + "vmovaps 0x0E0(%0), %%ymm7\n\t" +#if defined(__x86_64__) + "vmovaps 0x100(%0), %%ymm8\n\t" + "vmovaps 0x120(%0), %%ymm9\n\t" + "vmovaps 0x140(%0), %%ymm10\n\t" + "vmovaps 0x160(%0), %%ymm11\n\t" + "vmovaps 0x180(%0), %%ymm12\n\t" + "vmovaps 0x1A0(%0), %%ymm13\n\t" + "vmovaps 0x1C0(%0), %%ymm14\n\t" + "vmovaps 0x1E0(%0), %%ymm15\n\t" +#endif + "int3\n\t" + : + : "b"(ymm) + : "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6", + "%ymm7" +#if defined(__x86_64__) + , "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13", + "%ymm14", "%ymm15" +#endif + ); +} + +ATF_TC(x86_xstate_ymm_read); +ATF_TC_HEAD(x86_xstate_ymm_read, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Set ymm0..ymm15 (..ymm7 on i386) reg values from debugged program " + "and read them via PT_GETXSTATE, comparing values against expected."); +} + +ATF_TC_BODY(x86_xstate_ymm_read, tc) +{ + const int exitval = 5; + pid_t child, wpid; +#if defined(TWAIT_HAVE_STATUS) + const int sigval = SIGTRAP; + int status; +#endif + struct xstate xst; + struct iovec iov; + + const struct { + uint64_t a, b, c, d; + } ymm[] __aligned(32) = { + { 0x0706050403020100, 0x0F0E0D0C0B0A0908, + 0x1716151413121110, 0x1F1E1D1C1B1A1918, }, + { 0x0807060504030201, 0x100F0E0D0C0B0A09, + 0x1817161514131211, 0x201F1E1D1C1B1A19, }, + { 0x0908070605040302, 0x11100F0E0D0C0B0A, + 0x1918171615141312, 0x21201F1E1D1C1B1A, }, + { 0x0A09080706050403, 0x1211100F0E0D0C0B, + 0x1A19181716151413, 0x2221201F1E1D1C1B, }, + { 0x0B0A090807060504, 0x131211100F0E0D0C, + 0x1B1A191817161514, 0x232221201F1E1D1C, }, + { 0x0C0B0A0908070605, 0x14131211100F0E0D, + 0x1C1B1A1918171615, 0x24232221201F1E1D, }, + { 0x0D0C0B0A09080706, 0x1514131211100F0E, + 0x1D1C1B1A19181716, 0x2524232221201F1E, }, + { 0x0E0D0C0B0A090807, 0x161514131211100F, + 0x1E1D1C1B1A191817, 0x262524232221201F, }, +#if defined(__x86_64__) + { 0x0F0E0D0C0B0A0908, 0x1716151413121110, + 0x1F1E1D1C1B1A1918, 0x2726252423222120, }, + { 0x100F0E0D0C0B0A09, 0x1817161514131211, + 0x201F1E1D1C1B1A19, 0x2827262524232221, }, + { 0x11100F0E0D0C0B0A, 0x1918171615141312, + 0x21201F1E1D1C1B1A, 0x2928272625242322, }, + { 0x1211100F0E0D0C0B, 0x1A19181716151413, + 0x2221201F1E1D1C1B, 0x2A29282726252423, }, + { 0x131211100F0E0D0C, 0x1B1A191817161514, + 0x232221201F1E1D1C, 0x2B2A292827262524, }, + { 0x14131211100F0E0D, 0x1C1B1A1918171615, + 0x24232221201F1E1D, 0x2C2B2A2928272625, }, + { 0x1514131211100F0E, 0x1D1C1B1A19181716, + 0x2524232221201F1E, 0x2D2C2B2A29282726, }, + { 0x161514131211100F, 0x1E1D1C1B1A191817, + 0x262524232221201F, 0x2E2D2C2B2A292827, }, +#endif + }; + + /* verify whether AVX is supported here */ + DPRINTF("Before invoking cpuid\n"); + { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) + atf_tc_skip("CPUID is not supported by the CPU"); + + DPRINTF("cpuid: ECX = %08x\n", ecx); + + if (!(ecx & bit_AVX)) + atf_tc_skip("AVX is not supported by the CPU"); + } + + DPRINTF("Before forking process PID=%d\n", getpid()); + SYSCALL_REQUIRE((child = fork()) != -1); + if (child == 0) { + DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid()); + FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); + + DPRINTF("Before running assembly from child\n"); + set_ymm_regs(ymm); + + DPRINTF("Before exiting of the child process\n"); + _exit(exitval); + } + DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_stopped(status, sigval); + + iov.iov_base = &xst; + iov.iov_len = sizeof(xst); + + DPRINTF("Call GETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1); + + ATF_REQUIRE(xst.xs_rfbm & XCR0_SSE); + ATF_REQUIRE(xst.xs_rfbm & XCR0_YMM_Hi128); + ATF_REQUIRE(xst.xs_xstate_bv & XCR0_SSE); + ATF_REQUIRE(xst.xs_xstate_bv & XCR0_YMM_Hi128); + + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[0], &ymm[0].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[0], &ymm[0].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[1], &ymm[1].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[1], &ymm[1].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[2], &ymm[2].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[2], &ymm[2].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[3], &ymm[3].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[3], &ymm[3].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[4], &ymm[4].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[4], &ymm[4].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[5], &ymm[5].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[5], &ymm[5].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[6], &ymm[6].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[6], &ymm[6].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[7], &ymm[7].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[7], &ymm[7].c, sizeof(*ymm)/2)); +#if defined(__x86_64__) + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[8], &ymm[8].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[8], &ymm[8].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[9], &ymm[9].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[9], &ymm[9].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[10], &ymm[10].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[10], &ymm[10].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[11], &ymm[11].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[11], &ymm[11].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[12], &ymm[12].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[12], &ymm[12].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[13], &ymm[13].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[13], &ymm[13].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[14], &ymm[14].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[14], &ymm[14].c, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[15], &ymm[15].a, sizeof(*ymm)/2)); + ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[15], &ymm[15].c, sizeof(*ymm)/2)); +#endif + + DPRINTF("Before resuming the child process where it left off and " + "without signal to be sent\n"); + SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_exited(status, exitval); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); +} + +__attribute__((target("avx"))) +static __inline void get_ymm_regs(void* v_ymm) +{ + const struct { + uint64_t a, b, c, d; + } fill __aligned(32) = { + 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F, + 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + }; + + __asm__ __volatile__( + /* fill registers with clobber pattern */ + "vmovaps %1, %%ymm0\n\t" + "vmovaps %1, %%ymm1\n\t" + "vmovaps %1, %%ymm2\n\t" + "vmovaps %1, %%ymm3\n\t" + "vmovaps %1, %%ymm4\n\t" + "vmovaps %1, %%ymm5\n\t" + "vmovaps %1, %%ymm6\n\t" + "vmovaps %1, %%ymm7\n\t" +#if defined(__x86_64__) + "vmovaps %1, %%ymm8\n\t" + "vmovaps %1, %%ymm9\n\t" + "vmovaps %1, %%ymm10\n\t" + "vmovaps %1, %%ymm11\n\t" + "vmovaps %1, %%ymm12\n\t" + "vmovaps %1, %%ymm13\n\t" + "vmovaps %1, %%ymm14\n\t" + "vmovaps %1, %%ymm15\n\t" +#endif + "\n\t" + "int3\n\t" + "\n\t" + "vmovaps %%ymm0, 0x000(%0)\n\t" + "vmovaps %%ymm1, 0x020(%0)\n\t" + "vmovaps %%ymm2, 0x040(%0)\n\t" + "vmovaps %%ymm3, 0x060(%0)\n\t" + "vmovaps %%ymm4, 0x080(%0)\n\t" + "vmovaps %%ymm5, 0x0A0(%0)\n\t" + "vmovaps %%ymm6, 0x0C0(%0)\n\t" + "vmovaps %%ymm7, 0x0E0(%0)\n\t" +#if defined(__x86_64__) + "vmovaps %%ymm8, 0x100(%0)\n\t" + "vmovaps %%ymm9, 0x120(%0)\n\t" + "vmovaps %%ymm10, 0x140(%0)\n\t" + "vmovaps %%ymm11, 0x160(%0)\n\t" + "vmovaps %%ymm12, 0x180(%0)\n\t" + "vmovaps %%ymm13, 0x1A0(%0)\n\t" + "vmovaps %%ymm14, 0x1C0(%0)\n\t" + "vmovaps %%ymm15, 0x1E0(%0)\n\t" +#endif + : + : "a"(v_ymm), "m"(fill) + : "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6", "%ymm7" +#if defined(__x86_64__) + , "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13", "%ymm14", + "%ymm15" +#endif + ); +} + +ATF_TC(x86_xstate_ymm_write); +ATF_TC_HEAD(x86_xstate_ymm_write, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Set ymm0..ymm15 (..ymm7 on i386) reg values into a debugged " + "program via PT_SETXSTATE and compare the result against expected."); +} + +ATF_TC_BODY(x86_xstate_ymm_write, tc) +{ + const int exitval = 5; + pid_t child, wpid; +#if defined(TWAIT_HAVE_STATUS) + const int sigval = SIGTRAP; + int status; +#endif + struct xstate xst; + struct iovec iov; + + const struct { + uint64_t a, b, c, d; + } ymm[] __aligned(32) = { + { 0x0706050403020100, 0x0F0E0D0C0B0A0908, + 0x1716151413121110, 0x1F1E1D1C1B1A1918, }, + { 0x0807060504030201, 0x100F0E0D0C0B0A09, + 0x1817161514131211, 0x201F1E1D1C1B1A19, }, + { 0x0908070605040302, 0x11100F0E0D0C0B0A, + 0x1918171615141312, 0x21201F1E1D1C1B1A, }, + { 0x0A09080706050403, 0x1211100F0E0D0C0B, + 0x1A19181716151413, 0x2221201F1E1D1C1B, }, + { 0x0B0A090807060504, 0x131211100F0E0D0C, + 0x1B1A191817161514, 0x232221201F1E1D1C, }, + { 0x0C0B0A0908070605, 0x14131211100F0E0D, + 0x1C1B1A1918171615, 0x24232221201F1E1D, }, + { 0x0D0C0B0A09080706, 0x1514131211100F0E, + 0x1D1C1B1A19181716, 0x2524232221201F1E, }, + { 0x0E0D0C0B0A090807, 0x161514131211100F, + 0x1E1D1C1B1A191817, 0x262524232221201F, }, +#if defined(__x86_64__) + { 0x0F0E0D0C0B0A0908, 0x1716151413121110, + 0x1F1E1D1C1B1A1918, 0x2726252423222120, }, + { 0x100F0E0D0C0B0A09, 0x1817161514131211, + 0x201F1E1D1C1B1A19, 0x2827262524232221, }, + { 0x11100F0E0D0C0B0A, 0x1918171615141312, + 0x21201F1E1D1C1B1A, 0x2928272625242322, }, + { 0x1211100F0E0D0C0B, 0x1A19181716151413, + 0x2221201F1E1D1C1B, 0x2A29282726252423, }, + { 0x131211100F0E0D0C, 0x1B1A191817161514, + 0x232221201F1E1D1C, 0x2B2A292827262524, }, + { 0x14131211100F0E0D, 0x1C1B1A1918171615, + 0x24232221201F1E1D, 0x2C2B2A2928272625, }, + { 0x1514131211100F0E, 0x1D1C1B1A19181716, + 0x2524232221201F1E, 0x2D2C2B2A29282726, }, + { 0x161514131211100F, 0x1E1D1C1B1A191817, + 0x262524232221201F, 0x2E2D2C2B2A292827, }, +#endif + }; + + /* verify whether AVX is supported here */ + DPRINTF("Before invoking cpuid\n"); + { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) + atf_tc_skip("CPUID is not supported by the CPU"); + + DPRINTF("cpuid: ECX = %08x\n", ecx); + + if (!(ecx & bit_AVX)) + atf_tc_skip("AVX is not supported by the CPU"); + } + + DPRINTF("Before forking process PID=%d\n", getpid()); + SYSCALL_REQUIRE((child = fork()) != -1); + if (child == 0) { + struct { + uint64_t a, b, c, d; + } v_ymm[16] __aligned(32); + + DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid()); + FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); + + DPRINTF("Before running assembly from child\n"); + get_ymm_regs(v_ymm); + + DPRINTF("Before comparing results\n"); + FORKEE_ASSERT(!memcmp(&v_ymm[0], &ymm[0], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[1], &ymm[1], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[2], &ymm[2], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[3], &ymm[3], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[4], &ymm[4], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[5], &ymm[5], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[6], &ymm[6], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[7], &ymm[7], sizeof(*ymm))); +#if defined(__x86_64__) + FORKEE_ASSERT(!memcmp(&v_ymm[8], &ymm[8], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[9], &ymm[9], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[10], &ymm[10], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[11], &ymm[11], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[12], &ymm[12], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[13], &ymm[13], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[14], &ymm[14], sizeof(*ymm))); + FORKEE_ASSERT(!memcmp(&v_ymm[15], &ymm[15], sizeof(*ymm))); +#endif + + DPRINTF("Before exiting of the child process\n"); + _exit(exitval); + } + DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_stopped(status, sigval); + + iov.iov_base = &xst; + iov.iov_len = sizeof(xst); + + DPRINTF("Call GETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1); + + ATF_REQUIRE(xst.xs_rfbm & XCR0_SSE); + ATF_REQUIRE(xst.xs_rfbm & XCR0_YMM_Hi128); + + xst.xs_rfbm = XCR0_SSE | XCR0_YMM_Hi128; + xst.xs_xstate_bv = XCR0_SSE | XCR0_YMM_Hi128; + + memcpy(&xst.xs_fxsave.fx_xmm[0], &ymm[0].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[0], &ymm[0].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[1], &ymm[1].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[1], &ymm[1].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[2], &ymm[2].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[2], &ymm[2].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[3], &ymm[3].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[3], &ymm[3].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[4], &ymm[4].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[4], &ymm[4].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[5], &ymm[5].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[5], &ymm[5].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[6], &ymm[6].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[6], &ymm[6].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[7], &ymm[7].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[7], &ymm[7].c, sizeof(*ymm)/2); +#if defined(__x86_64__) + memcpy(&xst.xs_fxsave.fx_xmm[8], &ymm[8].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[8], &ymm[8].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[9], &ymm[9].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[9], &ymm[9].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[10], &ymm[10].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[10], &ymm[10].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[11], &ymm[11].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[11], &ymm[11].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[12], &ymm[12].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[12], &ymm[12].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[13], &ymm[13].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[13], &ymm[13].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[14], &ymm[14].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[14], &ymm[14].c, sizeof(*ymm)/2); + memcpy(&xst.xs_fxsave.fx_xmm[15], &ymm[15].a, sizeof(*ymm)/2); + memcpy(&xst.xs_ymm_hi128.xs_ymm[15], &ymm[15].c, sizeof(*ymm)/2); +#endif + + DPRINTF("Call SETXSTATE for the child process\n"); + SYSCALL_REQUIRE(ptrace(PT_SETXSTATE, child, &iov, 0) != -1); + + DPRINTF("Before resuming the child process where it left off and " + "without signal to be sent\n"); + SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child); + + validate_status_exited(status, exitval); + + DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME); + TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0)); +} + /// ---------------------------------------------------------------------------- #define ATF_TP_ADD_TCS_PTRACE_WAIT_X86() \ @@ -2870,7 +3754,13 @@ ATF_TC_BODY(x86_regs_xmm_write, tc) ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_mm_read); \ ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_mm_write); \ ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_xmm_read); \ - ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_xmm_write); + ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_xmm_write); \ + ATF_TP_ADD_TC(tp, x86_xstate_mm_read); \ + ATF_TP_ADD_TC(tp, x86_xstate_mm_write); \ + ATF_TP_ADD_TC(tp, x86_xstate_xmm_read); \ + ATF_TP_ADD_TC(tp, x86_xstate_xmm_write); \ + ATF_TP_ADD_TC(tp, x86_xstate_ymm_read); \ + ATF_TP_ADD_TC(tp, x86_xstate_ymm_write); #else #define ATF_TP_ADD_TCS_PTRACE_WAIT_X86() #endif
Attachment:
signature.asc
Description: This is a digitally signed message part