tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: [PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE



Hi,

Attached is the next version of unified patch.  I've updated the boolean
logic as requested, and fixed indentations.

-- 
Best regards,
Michał Górny

diff --git a/lib/libc/sys/ptrace.2 b/lib/libc/sys/ptrace.2
index 9cd99ac94bd1..2ea13872e421 100644
--- a/lib/libc/sys/ptrace.2
+++ b/lib/libc/sys/ptrace.2
@@ -1,7 +1,7 @@
 .\"	$NetBSD: ptrace.2,v 1.74 2019/06/12 12:33:42 wiz Exp $
 .\"
 .\" This file is in the public domain.
-.Dd June 12, 2019
+.Dd June 22, 2019
 .Dt PTRACE 2
 .Os
 .Sh NAME
@@ -771,6 +771,69 @@ The
 argument contains the LWP ID of the thread whose registers are to
 be written.
 If zero is supplied, the first thread of the process is written.
+.It Dv PT_GETXSTATE
+This request reads the traced process' FPU extended state into
+the
+.Dq Li "struct xstate"
+(defined in
+.In machine/cpu_extended_state.h ) .
+.Fa addr
+should be a pointer to
+.Dq Li "struct iovec"
+(defined in
+.In sys/uio.h )
+specifying the pointer to the aforementioned struct as
+.Fa iov_base
+and its size as
+.Fa iov_len .
+The
+.Fa data
+argument contains the LWP ID of the thread whose registers are to
+be read.
+If zero is supplied, the first thread of the process is read.
+The struct will be filled up to the specified
+.Fa iov_len .
+The caller needs to check
+.Fa xs_rfbm
+bitmap in order to determine which fields were provided by the CPU,
+and may check
+.Fa xs_xstate_bv
+to determine which component states were changed from the initial state.
+.It Dv PT_SETXSTATE
+This request is the converse of
+.Dv PT_GETXSTATE ;
+it loads the traced process' extended FPU state from the
+.Dq Li "struct xstate"
+(defined in
+.In machine/cpu_extended_state.h ) .
+.Fa addr
+should be a pointer to
+.Dq Li "struct iovec"
+(defined in
+.In sys/uio.h )
+specifying the pointer to the aforementioned struct as
+.Fa iov_base
+and its size as
+.Fa iov_len .
+The
+.Fa data
+argument contains the LWP ID of the thread whose registers are to
+be written.
+If zero is supplied, the first thread of the process is written.
+The
+.Fa xs_rfbm
+field of the supplied xstate specifies which state components are to
+be updated.  Other components (fields) will be ignored.  The
+.Fa xs_xstate_bv
+specifies whether component state should be set to provided values
+(when 1) or reset to unitialized (when 0).  The request
+will fail if
+.Fa xs_xstate_bv
+is not a subset of
+.Fa xs_rfbm ,
+or any of the specified components is not supported by the CPU or kernel
+(i.e. not returned by
+.Dv PT_GETXSTATE .
 .El
 .Sh ERRORS
 Some requests can cause
@@ -819,8 +882,10 @@ was neither 0 nor a legal signal number.
 .Dv PT_GETREGS ,
 .Dv PT_SETREGS ,
 .Dv PT_GETFPREGS ,
+.Dv PT_SETFPREGS ,
+.Dv PT_GETXSTATE ,
 or
-.Dv PT_SETFPREGS
+.Dv PT_SETXSTATE
 was attempted on a process with no valid register set.
 (This is normally true only of system processes.)
 .It
@@ -832,6 +897,13 @@ or
 with
 .Dv vm.user_va0_disable
 set to 1.
+.It
+.Dv PT_SETXSTATE
+attempted to set state components not supported by the kernel,
+or
+.Dv xs_xstate_bv
+was not a subset of
+.Dv xs_rfbm .
 .El
 .It Bq Er EPERM
 .Bl -bullet -compact
diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 81bf78f6ecc4..3e007c79761b 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -353,6 +353,8 @@ netbsd32_ptrace_translate_request(int req)
 	case PT32_SETDBREGS:		return PT_SETDBREGS;
 	case PT32_SETSTEP:		return PT_SETSTEP;
 	case PT32_CLEARSTEP:		return PT_CLEARSTEP;
+	case PT32_GETXSTATE:		return PT_GETXSTATE;
+	case PT32_SETXSTATE:		return PT_SETXSTATE;
 	default:			return -1;
 	}
 }
diff --git a/sys/arch/amd64/amd64/process_machdep.c b/sys/arch/amd64/amd64/process_machdep.c
index c204556c9168..d4e2c9a4009e 100644
--- a/sys/arch/amd64/amd64/process_machdep.c
+++ b/sys/arch/amd64/amd64/process_machdep.c
@@ -84,6 +84,9 @@ __KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.39 2019/02/11 14:59:32 cherry
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 
+#include <uvm/uvm_extern.h>
+
+#include <compat/netbsd32/netbsd32.h>
 #include <machine/psl.h>
 #include <machine/reg.h>
 #include <machine/segments.h>
@@ -288,3 +291,131 @@ process_set_pc(struct lwp *l, void *addr)
 
 	return 0;
 }
+
+#ifdef __HAVE_PTRACE_MACHDEP
+static int
+process_machdep_read_xstate(struct lwp *l, struct xstate *regs)
+{
+	return process_read_xstate(l, regs);
+}
+
+static int
+process_machdep_write_xstate(struct lwp *l, const struct xstate *regs)
+{
+	int error;
+
+	/*
+	 * Check for security violations.
+	 */
+	error = process_verify_xstate(regs);
+	if (error != 0)
+		return error;
+
+	return process_write_xstate(l, regs);
+}
+
+int
+ptrace_machdep_dorequest(
+    struct lwp *l,
+    struct lwp *lt,
+    int req,
+    void *addr,
+    int data
+)
+{
+	struct uio uio;
+	struct iovec iov;
+	struct vmspace *vm;
+	int error;
+	int write = 0;
+
+	switch (req) {
+	case PT_SETXSTATE:
+		write = 1;
+
+		/* FALLTHROUGH */
+	case PT_GETXSTATE:
+		/* write = 0 done above. */
+		if (!process_machdep_validxstate(lt->l_proc))
+			return EINVAL;
+		if (__predict_false(l->l_proc->p_flag & PK_32)) {
+			struct netbsd32_iovec *user_iov;
+			user_iov = (struct netbsd32_iovec*)addr;
+			iov.iov_base = NETBSD32PTR64(user_iov->iov_base);
+			iov.iov_len = user_iov->iov_len;
+		} else {
+			struct iovec *user_iov;
+			user_iov = (struct iovec*)addr;
+			iov.iov_base = user_iov->iov_base;
+			iov.iov_len = user_iov->iov_len;
+		}
+
+		error = proc_vmspace_getref(l->l_proc, &vm);
+		if (error)
+			return error;
+		if (iov.iov_len > sizeof(struct xstate))
+			iov.iov_len = sizeof(struct xstate);
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		uio.uio_offset = 0;
+		uio.uio_resid = iov.iov_len;
+		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
+		uio.uio_vmspace = vm;
+		error = process_machdep_doxstate(l, lt, &uio);
+		uvmspace_free(vm);
+		return error;
+	}
+
+#ifdef DIAGNOSTIC
+	panic("ptrace_machdep: impossible");
+#endif
+
+	return 0;
+}
+
+/*
+ * The following functions are used by both ptrace(2) and procfs.
+ */
+
+int
+process_machdep_doxstate(struct lwp *curl, struct lwp *l, struct uio *uio)
+	/* curl:		 tracer */
+	/* l:			 traced */
+{
+	int error;
+	struct xstate r;
+	char *kv;
+	ssize_t kl;
+
+	memset(&r, 0, sizeof(r));
+	kl = MIN(uio->uio_iov->iov_len, sizeof(r));
+	kv = (char *) &r;
+
+	kv += uio->uio_offset;
+	kl -= uio->uio_offset;
+	if (kl > uio->uio_resid)
+		kl = uio->uio_resid;
+
+	if (kl < 0)
+		error = EINVAL;
+	else
+		error = process_machdep_read_xstate(l, &r);
+	if (error == 0)
+		error = uiomove(kv, kl, uio);
+	if (error == 0 && uio->uio_rw == UIO_WRITE)
+		error = process_machdep_write_xstate(l, &r);
+
+	uio->uio_offset = 0;
+	return error;
+}
+
+int
+process_machdep_validxstate(struct proc *p)
+{
+
+	if (p->p_flag & PK_SYSTEM)
+		return 0;
+
+	return 1;
+}
+#endif /* __HAVE_PTRACE_MACHDEP */
diff --git a/sys/arch/amd64/include/netbsd32_machdep.h b/sys/arch/amd64/include/netbsd32_machdep.h
index e7f018708c26..c2efb4f1315d 100644
--- a/sys/arch/amd64/include/netbsd32_machdep.h
+++ b/sys/arch/amd64/include/netbsd32_machdep.h
@@ -22,6 +22,8 @@
 #define	PT32_SETDBREGS		(PT_FIRSTMACH + 8)
 #define	PT32_SETSTEP		(PT_FIRSTMACH + 9)
 #define	PT32_CLEARSTEP		(PT_FIRSTMACH + 10)
+#define	PT32_GETXSTATE		(PT_FIRSTMACH + 11)
+#define	PT32_SETXSTATE		(PT_FIRSTMACH + 12)
 
 #define NETBSD32_POINTER_TYPE uint32_t
 typedef	struct { NETBSD32_POINTER_TYPE i32; } netbsd32_pointer_t;
diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index 4eddffb6d23e..5bc4433b254e 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -45,6 +45,11 @@
 #define	PT_SETDBREGS		(PT_FIRSTMACH + 6)
 #define	PT_SETSTEP		(PT_FIRSTMACH + 7)
 #define	PT_CLEARSTEP		(PT_FIRSTMACH + 8)
+#define	PT_GETXSTATE		(PT_FIRSTMACH + 9)
+#define	PT_SETXSTATE		(PT_FIRSTMACH + 10)
+
+/* We have machine-dependent process tracing needs. */
+#define	__HAVE_PTRACE_MACHDEP
 
 #define PT_MACHDEP_STRINGS \
 	"PT_STEP", \
@@ -55,7 +60,9 @@
 	"PT_GETDBREGS", \
 	"PT_SETDBREGS", \
 	"PT_SETSTEP", \
-	"PT_CLEARSTEP",
+	"PT_CLEARSTEP", \
+	"PT_GETXSTATE", \
+	"PT_SETXSTATE"
 
 #include <machine/reg.h>
 #define PTRACE_REG_PC(r)	(r)->regs[_REG_RIP]
@@ -71,6 +78,20 @@
 #define PTRACE_BREAKPOINT_SIZE	1
 #define PTRACE_BREAKPOINT_ADJ	1
 
+#ifdef _KERNEL
+
+/*
+ * These are used in sys_ptrace() to find good ptrace(2) requests.
+ */
+#define	PTRACE_MACHDEP_REQUEST_CASES					\
+	case PT_GETXSTATE:						\
+	case PT_SETXSTATE:
+
+int process_machdep_doxstate(struct lwp *, struct lwp *, struct uio *);
+int process_machdep_validxstate(struct proc *);
+
+#endif /* _KERNEL */
+
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd32.h"
 
diff --git a/sys/arch/i386/i386/process_machdep.c b/sys/arch/i386/i386/process_machdep.c
index 29216fd6a451..7ed1ceda5846 100644
--- a/sys/arch/i386/i386/process_machdep.c
+++ b/sys/arch/i386/i386/process_machdep.c
@@ -231,6 +231,12 @@ process_set_pc(struct lwp *l, void *addr)
 }
 
 #ifdef __HAVE_PTRACE_MACHDEP
+static int
+process_machdep_read_xstate(struct lwp *l, struct xstate *regs)
+{
+	return process_read_xstate(l, regs);
+}
+
 static int
 process_machdep_read_xmmregs(struct lwp *l, struct xmmregs *regs)
 {
@@ -240,6 +246,21 @@ process_machdep_read_xmmregs(struct lwp *l, struct xmmregs *regs)
 	return 0;
 }
 
+static int
+process_machdep_write_xstate(struct lwp *l, const struct xstate *regs)
+{
+	int error;
+
+	/*
+	 * Check for security violations.
+	 */
+	error = process_verify_xstate(regs);
+	if (error != 0)
+		return error;
+
+	return process_write_xstate(l, regs);
+}
+
 static int
 process_machdep_write_xmmregs(struct lwp *l, struct xmmregs *regs)
 {
@@ -260,6 +281,9 @@ ptrace_machdep_dorequest(
 {
 	struct uio uio;
 	struct iovec iov;
+	struct iovec *user_iov = (struct iovec*)addr;
+	struct vmspace *vm;
+	int error;
 	int write = 0;
 
 	switch (req) {
@@ -271,33 +295,54 @@ ptrace_machdep_dorequest(
 		/* write = 0 done above. */
 		if (!process_machdep_validxmmregs(lt->l_proc))
 			return (EINVAL);
-		else {
-			struct vmspace *vm;
-			int error;
-
-			error = proc_vmspace_getref(l->l_proc, &vm);
-			if (error) {
-				return error;
-			}
-			iov.iov_base = addr;
-			iov.iov_len = sizeof(struct xmmregs);
-			uio.uio_iov = &iov;
-			uio.uio_iovcnt = 1;
-			uio.uio_offset = 0;
-			uio.uio_resid = sizeof(struct xmmregs);
-			uio.uio_rw = write ? UIO_WRITE : UIO_READ;
-			uio.uio_vmspace = vm;
-			error = process_machdep_doxmmregs(l, lt, &uio);
-			uvmspace_free(vm);
+		error = proc_vmspace_getref(l->l_proc, &vm);
+		if (error) {
+			return error;
+		}
+		iov.iov_base = addr;
+		iov.iov_len = sizeof(struct xmmregs);
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		uio.uio_offset = 0;
+		uio.uio_resid = sizeof(struct xmmregs);
+		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
+		uio.uio_vmspace = vm;
+		error = process_machdep_doxmmregs(l, lt, &uio);
+		uvmspace_free(vm);
+		return error;
+
+	case PT_SETXSTATE:
+		write = 1;
+
+		/* FALLTHROUGH */
+	case PT_GETXSTATE:
+		/* write = 0 done above. */
+		if (!process_machdep_validxstate(lt->l_proc))
+			return EINVAL;
+		error = proc_vmspace_getref(l->l_proc, &vm);
+		if (error) {
 			return error;
 		}
+		iov.iov_base = user_iov->iov_base;
+		iov.iov_len = user_iov->iov_len;
+		if (iov.iov_len > sizeof(struct xstate))
+			iov.iov_len = sizeof(struct xstate);
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		uio.uio_offset = 0;
+		uio.uio_resid = iov.iov_len;
+		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
+		uio.uio_vmspace = vm;
+		error = process_machdep_doxstate(l, lt, &uio);
+		uvmspace_free(vm);
+		return error;
 	}
 
 #ifdef DIAGNOSTIC
 	panic("ptrace_machdep: impossible");
 #endif
 
-	return (0);
+	return 0;
 }
 
 /*
@@ -348,5 +393,47 @@ process_machdep_validxmmregs(struct proc *p)
 
 	return (i386_use_fxsave);
 }
+
+int
+process_machdep_doxstate(struct lwp *curl, struct lwp *l, struct uio *uio)
+	/* curl:		 tracer */
+	/* l:			 traced */
+{
+	int error;
+	struct xstate r;
+	char *kv;
+	ssize_t kl;
+
+	memset(&r, 0, sizeof(r));
+	kl = MIN(uio->uio_iov->iov_len, sizeof(r));
+	kv = (char *) &r;
+
+	kv += uio->uio_offset;
+	kl -= uio->uio_offset;
+	if (kl > uio->uio_resid)
+		kl = uio->uio_resid;
+
+	if (kl < 0)
+		error = EINVAL;
+	else
+		error = process_machdep_read_xstate(l, &r);
+	if (error == 0)
+		error = uiomove(kv, kl, uio);
+	if (error == 0 && uio->uio_rw == UIO_WRITE)
+		error = process_machdep_write_xstate(l, &r);
+
+	uio->uio_offset = 0;
+	return error;
+}
+
+int
+process_machdep_validxstate(struct proc *p)
+{
+
+	if (p->p_flag & PK_SYSTEM)
+		return 0;
+
+	return 1;
+}
 #endif /* __HAVE_PTRACE_MACHDEP */
 #endif /* PTRACE_HOOKS */
diff --git a/sys/arch/i386/include/ptrace.h b/sys/arch/i386/include/ptrace.h
index 425651b14559..7e0ec5214135 100644
--- a/sys/arch/i386/include/ptrace.h
+++ b/sys/arch/i386/include/ptrace.h
@@ -90,6 +90,8 @@
 #define	PT_SETDBREGS		(PT_FIRSTMACH + 8)
 #define	PT_SETSTEP		(PT_FIRSTMACH + 9)
 #define	PT_CLEARSTEP		(PT_FIRSTMACH + 10)
+#define	PT_GETXSTATE		(PT_FIRSTMACH + 11)
+#define	PT_SETXSTATE		(PT_FIRSTMACH + 12)
 
 #define PT_MACHDEP_STRINGS \
 	"PT_STEP", \
@@ -102,8 +104,9 @@
 	"PT_GETDBREGS", \
 	"PT_SETDBREGS", \
 	"PT_SETSTEP", \
-	"PT_CLEARSTEP",
-
+	"PT_CLEARSTEP", \
+	"PT_GETXSTATE", \
+	"PT_SETXSTATE"
 
 #include <machine/reg.h>
 #define PTRACE_REG_PC(r)	(r)->r_eip
@@ -126,7 +129,9 @@
  */
 #define	PTRACE_MACHDEP_REQUEST_CASES					\
 	case PT_GETXMMREGS:						\
-	case PT_SETXMMREGS:
+	case PT_SETXMMREGS:						\
+	case PT_GETXSTATE:						\
+	case PT_SETXSTATE:
 
 /*
  * These are used to define machine-dependent procfs node types.
@@ -159,6 +164,8 @@ struct xmmregs;
 /* Functions used by both ptrace(2) and procfs. */
 int	process_machdep_doxmmregs(struct lwp *, struct lwp *, struct uio *);
 int	process_machdep_validxmmregs(struct proc *);
+int	process_machdep_doxstate(struct lwp *, struct lwp *, struct uio *);
+int	process_machdep_validxstate(struct proc *);
 
 /* Functions used by procfs. */
 struct mount;
diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h
index 143ae3c5c5ec..589f179ce758 100644
--- a/sys/arch/x86/include/cpu.h
+++ b/sys/arch/x86/include/cpu.h
@@ -459,6 +459,8 @@ extern int x86_fpu_save;
 #define	FPU_SAVE_XSAVEOPT	3
 extern unsigned int x86_fpu_save_size;
 extern uint64_t x86_xsave_features;
+extern size_t x86_xsave_offsets[];
+extern size_t x86_xsave_sizes[];
 extern uint32_t x86_fpu_mxcsr_mask;
 extern bool x86_fpu_eager;
 
diff --git a/sys/arch/x86/include/cpu_extended_state.h b/sys/arch/x86/include/cpu_extended_state.h
index 38cb1d6c3396..8590a6814d6c 100644
--- a/sys/arch/x86/include/cpu_extended_state.h
+++ b/sys/arch/x86/include/cpu_extended_state.h
@@ -79,6 +79,17 @@ struct ymmreg {
 	uint8_t ymm_bytes[16];
 };
 
+/* The AVX-512 registers are 512 bits but the low bits are in xmmregs
+ * and ymmregs */
+struct zmmreg {
+	uint8_t zmm_bytes[32];
+};
+
+/* 512-bit ZMM register. */
+struct hi16_zmmreg {
+	uint8_t zmm_bytes[64];
+};
+
 /*
  * Floating point unit registers (FSAVE instruction).
  *
@@ -139,6 +150,77 @@ struct xsave_ymm {
 };
 __CTASSERT(sizeof(struct xsave_ymm) == 256);
 
+/*
+ * AVX-512: opmask state.
+ */
+struct xsave_opmask {
+	uint64_t xs_k[8];			/* k0..k7 registers. */
+};
+__CTASSERT(sizeof(struct xsave_opmask) == 64);
+
+/*
+ * AVX-512: ZMM_Hi256 state.
+ */
+struct xsave_zmm_hi256 {
+	struct zmmreg xs_zmm[16];	/* High bits of zmm0..zmm15 registers. */
+};
+__CTASSERT(sizeof(struct xsave_zmm_hi256) == 512);
+
+/*
+ * AVX-512: Hi16_ZMM state.
+ */
+struct xsave_hi16_zmm {
+	struct hi16_zmmreg xs_hi16_zmm[16];	/* zmm16..zmm31 registers. */
+};
+__CTASSERT(sizeof(struct xsave_hi16_zmm) == 1024);
+
+/*
+ * Structure used to hold all interesting data from XSAVE, in predictable form.
+ * Note that this structure can have new members added to the end.
+ */
+struct xstate {
+	/*
+	 * The two following fields are bitmaps of XSAVE components.  They can be
+	 * matched against XCR0_* constants from <machine/specialreg.h>).
+	 */
+	/*
+	 * XSAVE/XRSTOR RFBM parameter.
+	 *
+	 * PT_GETXSTATE: 1 indicates that the respective XSAVE component is
+	 * supported and has been enabled for saving.  0 indicates that it is not
+	 * supported by the platform or kernel.
+	 *
+	 * PT_SETXSTATE: 1 indicates that the respective XSAVE component should
+	 * be updated to the value of respective field (or reset if xs_xsave_bv
+	 * bit is 0).  0 indicates that it should be left intact.  It is an error
+	 * to enable bits that are not supported by the platform or kernel.
+	 */
+	uint64_t xs_rfbm;
+	/*
+	 * XSAVE/XRSTOR xstate header.
+	 *
+	 * PT_GETXSTATE: 1 indicates that the respective XSAVE component has been
+	 * saved.  0 indicates that it had been in its CPU-defined initial value
+	 * at the time of saving (i.e. was not used by the program).
+	 *
+	 * PT_SETXSTATE: 1 indicates that the respective XSAVE component (if present
+	 * in xs_rfbm) should be set to the values in respective field.  0 indicates
+	 * that it should be reset to CPU-defined initial value.
+	 */
+	uint64_t xs_xstate_bv;
+
+	/* legacy FXSAVE area (used for x87 & SSE state) */
+	struct fxsave xs_fxsave;
+	/* AVX state: high bits of ymm0..ymm15 registers */
+	struct xsave_ymm xs_ymm_hi128;
+	/* AVX-512: opmask */
+	struct xsave_opmask xs_opmask;
+	/* AVX-512: high bits of zmm0..zmm15 registers */
+	struct xsave_zmm_hi256 xs_zmm_hi256;
+	/* AVX-512: whole zmm16..zmm31 registers */
+	struct xsave_hi16_zmm xs_hi16_zmm;
+};
+
 /*
  * The following union is placed at the end of the pcb.
  * It is defined this way to separate the definitions and to
diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h
index 1f5ff58570de..334848afc76b 100644
--- a/sys/arch/x86/include/fpu.h
+++ b/sys/arch/x86/include/fpu.h
@@ -38,6 +38,10 @@ void process_write_fpregs_s87(struct lwp *, const struct save87 *);
 void process_read_fpregs_xmm(struct lwp *, struct fxsave *);
 void process_read_fpregs_s87(struct lwp *, struct save87 *);
 
+int process_read_xstate(struct lwp *, struct xstate *);
+int process_verify_xstate(const struct xstate *);
+int process_write_xstate(struct lwp *, const struct xstate *);
+
 #endif
 
 #endif /* _X86_FPU_H_ */
diff --git a/sys/arch/x86/include/specialreg.h b/sys/arch/x86/include/specialreg.h
index 4f8c4cca6db7..1c0e8c972b07 100644
--- a/sys/arch/x86/include/specialreg.h
+++ b/sys/arch/x86/include/specialreg.h
@@ -146,6 +146,26 @@
 #define XCR0_FPU	(XCR0_X87 | XCR0_SSE | XCR0_YMM_Hi128 | \
 			 XCR0_Opmask | XCR0_ZMM_Hi256 | XCR0_Hi16_ZMM)
 
+/*
+ * XSAVE component indices.
+ */
+#define XSAVE_X87	0
+#define XSAVE_SSE	1
+#define XSAVE_YMM_Hi128	2
+#define XSAVE_BNDREGS	3
+#define XSAVE_BNDCSR	4
+#define XSAVE_Opmask	5
+#define XSAVE_ZMM_Hi256	6
+#define XSAVE_Hi16_ZMM	7
+#define XSAVE_PT	8
+#define XSAVE_PKRU	9
+#define XSAVE_HDC	10
+
+/*
+ * Highest XSAVE component enabled by XCR0_FPU.
+ */
+#define XSAVE_MAX_COMPONENT XSAVE_Hi16_ZMM
+
 /*
  * CPUID "features" bits
  */
diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c
index fac08d12db22..56782ff8e9f9 100644
--- a/sys/arch/x86/x86/fpu.c
+++ b/sys/arch/x86/x86/fpu.c
@@ -912,6 +912,165 @@ process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
 	}
 }
 
+int
+process_read_xstate(struct lwp *l, struct xstate *xstate)
+{
+	union savefpu *fpu_save;
+
+	fpusave_lwp(l, true);
+	fpu_save = lwp_fpuarea(l);
+
+	if (x86_fpu_save == FPU_SAVE_FSAVE) {
+		/* Convert from legacy FSAVE format. */
+		memset(&(xstate->xs_fxsave), 0, sizeof(xstate->xs_fxsave));
+		process_s87_to_xmm(&fpu_save->sv_87, &(xstate->xs_fxsave));
+
+		/* We only got x87 data. */
+		xstate->xs_rfbm = XCR0_X87;
+		xstate->xs_xstate_bv = XCR0_X87;
+		return 0;
+	}
+
+	/* Copy the legacy area. */
+	memcpy(&(xstate->xs_fxsave), fpu_save->sv_xsave_hdr.xsh_fxsave,
+	    sizeof(xstate->xs_fxsave));
+
+	if (x86_fpu_save == FPU_SAVE_FXSAVE) {
+		/* FXSAVE means we've got x87 + SSE data. */
+		xstate->xs_rfbm = XCR0_X87 | XCR0_SSE;
+		xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE;
+		return 0;
+	}
+
+	/* Copy the bitmap indicating which states are available. */
+	xstate->xs_rfbm = x86_xsave_features & XCR0_FPU;
+	xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv;
+	KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm));
+
+#define COPY_COMPONENT(xcr0_val, xsave_val, field)				\
+	if (xstate->xs_xstate_bv & xcr0_val) {					\
+		KASSERT(x86_xsave_offsets[xsave_val]				\
+		    >= sizeof(struct xsave_header));				\
+		KASSERT(x86_xsave_sizes[xsave_val]				\
+		    >= sizeof(xstate -> field));				\
+										\
+		memcpy(&(xstate -> field),					\
+		    (char*)fpu_save + x86_xsave_offsets[xsave_val],		\
+		    sizeof(xstate -> field));					\
+	}
+
+	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
+	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
+	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
+	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
+
+#undef COPY_COMPONENT
+
+	return 0;
+}
+
+int
+process_verify_xstate(const struct xstate *xstate)
+{
+	/* xstate_bv must be a subset of RFBM */
+	if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
+		return EINVAL;
+
+	switch (x86_fpu_save) {
+	case FPU_SAVE_FSAVE:
+		if ((xstate->xs_rfbm & ~XCR0_X87))
+			return EINVAL;
+		break;
+	case FPU_SAVE_FXSAVE:
+		if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
+			return EINVAL;
+		break;
+	default:
+		/* Verify whether no unsupported features are enabled */
+		if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0)
+			return EINVAL;
+	}
+
+	return 0;
+}
+
+int
+process_write_xstate(struct lwp *l, const struct xstate *xstate)
+{
+	union savefpu *fpu_save;
+
+	fpusave_lwp(l, true);
+	fpu_save = lwp_fpuarea(l);
+
+	/* Convert data into legacy FSAVE format. */
+	if (x86_fpu_save == FPU_SAVE_FSAVE) {
+		if (xstate->xs_xstate_bv & XCR0_X87)
+			process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
+		return 0;
+	}
+
+	/* If XSAVE is supported, make sure that xstate_bv is set correctly. */
+	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
+		/*
+		 * Bit-wise xstate->xs_rfbm ? xstate->xs_xstate_bv
+		 *                          : fpu_save->sv_xsave_hdr.xsh_xstate_bv
+		 */
+		fpu_save->sv_xsave_hdr.xsh_xstate_bv =
+		    (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
+		    xstate->xs_xstate_bv;
+	}
+
+	if (xstate->xs_xstate_bv & XCR0_X87) {
+		/*
+		 * X87 state is split into two areas, interspersed with SSE
+		 * data.
+		 */
+		memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
+		memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
+		    sizeof(xstate->xs_fxsave.fx_87_ac));
+	}
+
+	/*
+	 * Copy MXCSR if either SSE or AVX state is requested, to match the XSAVE
+	 * behavior for those flags.
+	 */
+	if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
+		/*
+		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
+		 */
+		fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
+		    & x86_fpu_mxcsr_mask;
+		fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
+		    fpu_save->sv_xmm.fx_mxcsr_mask;
+	}
+
+	if (xstate->xs_xstate_bv & XCR0_SSE) {
+		memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
+		    xstate->xs_fxsave.fx_xmm,
+		    sizeof(xstate->xs_fxsave.fx_xmm));
+	}
+
+#define COPY_COMPONENT(xcr0_val, xsave_val, field)				\
+	if (xstate->xs_xstate_bv & xcr0_val) {					\
+		KASSERT(x86_xsave_offsets[xsave_val]				\
+		    >= sizeof(struct xsave_header));				\
+		KASSERT(x86_xsave_sizes[xsave_val]				\
+		    >= sizeof(xstate -> field));				\
+										\
+		memcpy((char*)fpu_save + x86_xsave_offsets[xsave_val],		\
+		    &(xstate -> field), sizeof(xstate -> field));		\
+	}
+
+	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
+	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
+	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
+	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
+
+#undef COPY_COMPONENT
+
+	return 0;
+}
+
 /* -------------------------------------------------------------------------- */
 
 static volatile unsigned long eagerfpu_cpu_barrier1 __cacheline_aligned;
diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c
index 9037fb2673fd..491c99ac06db 100644
--- a/sys/arch/x86/x86/identcpu.c
+++ b/sys/arch/x86/x86/identcpu.c
@@ -74,6 +74,8 @@ char cpu_brand_string[49];
 int x86_fpu_save __read_mostly;
 unsigned int x86_fpu_save_size __read_mostly = sizeof(struct save87);
 uint64_t x86_xsave_features __read_mostly = 0;
+size_t x86_xsave_offsets[XSAVE_MAX_COMPONENT+1] __read_mostly;
+size_t x86_xsave_sizes[XSAVE_MAX_COMPONENT+1] __read_mostly;
 
 /*
  * Note: these are just the ones that may not have a cpuid instruction.
@@ -755,6 +757,7 @@ static void
 cpu_probe_fpu(struct cpu_info *ci)
 {
 	u_int descs[4];
+	int i;
 
 	x86_fpu_eager = true;
 	x86_fpu_save = FPU_SAVE_FSAVE;
@@ -816,6 +819,15 @@ cpu_probe_fpu(struct cpu_info *ci)
 		x86_fpu_save_size = descs[2];
 
 	x86_xsave_features = (uint64_t)descs[3] << 32 | descs[0];
+
+	/* Get component offsets and sizes for the save area */
+	for (i = XSAVE_YMM_Hi128; i < __arraycount(x86_xsave_offsets); i++) {
+		if (x86_xsave_features & __BIT(i)) {
+			x86_cpuid2(0xd, i, descs);
+			x86_xsave_offsets[i] = descs[1];
+			x86_xsave_sizes[i] = descs[0];
+		}
+	}
 }
 
 void
diff --git a/tests/lib/libc/sys/t_ptrace_wait.c b/tests/lib/libc/sys/t_ptrace_wait.c
index 64885e839c28..9d89c9d20c9c 100644
--- a/tests/lib/libc/sys/t_ptrace_wait.c
+++ b/tests/lib/libc/sys/t_ptrace_wait.c
@@ -37,6 +37,7 @@ __RCSID("$NetBSD: t_ptrace_wait.c,v 1.128 2019/06/18 21:14:26 kamil Exp $");
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
+#include <sys/uio.h>
 #include <sys/wait.h>
 #include <machine/reg.h>
 #include <elf.h>
@@ -62,6 +63,7 @@ __RCSID("$NetBSD: t_ptrace_wait.c,v 1.128 2019/06/18 21:14:26 kamil Exp $");
 #if defined(__i386__) || defined(__x86_64__)
 #include <cpuid.h>
 #include <x86/cpu_extended_state.h>
+#include <x86/specialreg.h>
 #endif
 
 #include <atf-c.h>
diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h b/tests/lib/libc/sys/t_ptrace_x86_wait.h
index ba3165495ade..b50f8f7530e7 100644
--- a/tests/lib/libc/sys/t_ptrace_x86_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h
@@ -2802,6 +2802,890 @@ ATF_TC_BODY(x86_regs_xmm_write, tc)
 	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
 	TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
 }
+
+ATF_TC(x86_xstate_mm_read);
+ATF_TC_HEAD(x86_xstate_mm_read, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+		"Set MMX (mm0..mm7) reg values from debugged program and read "
+		"them via PT_GETXSTATE, comparing values against expected.");
+}
+
+ATF_TC_BODY(x86_xstate_mm_read, tc)
+{
+	const int exitval = 5;
+	pid_t child, wpid;
+#if defined(TWAIT_HAVE_STATUS)
+	const int sigval = SIGTRAP;
+	int status;
+#endif
+	struct iovec iov;
+	struct xstate xst;
+
+	const uint64_t mm[] = {
+		0x0001020304050607,
+		0x1011121314151617,
+		0x2021222324252627,
+		0x3031323334353637,
+		0x4041424344454647,
+		0x5051525354555657,
+		0x6061626364656667,
+		0x7071727374757677,
+	};
+
+	/* verify whether MMX is supported here */
+	DPRINTF("Before invoking cpuid\n");
+	{
+		unsigned int eax, ebx, ecx, edx;
+		if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
+			atf_tc_skip("CPUID is not supported by the CPU");
+
+		DPRINTF("cpuid: EDX = %08x\n", edx);
+
+		if (!(edx & bit_MMX))
+			atf_tc_skip("MMX is not supported by the CPU");
+	}
+
+	DPRINTF("Before forking process PID=%d\n", getpid());
+	SYSCALL_REQUIRE((child = fork()) != -1);
+	if (child == 0) {
+		DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
+		FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
+
+		DPRINTF("Before running assembly from child\n");
+		set_mm_regs(mm);
+
+		DPRINTF("Before exiting of the child process\n");
+		_exit(exitval);
+	}
+	DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_stopped(status, sigval);
+
+	iov.iov_base = &xst;
+	iov.iov_len = sizeof(xst);
+
+	DPRINTF("Call GETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
+
+	ATF_REQUIRE(xst.xs_rfbm & XCR0_X87);
+	ATF_REQUIRE(xst.xs_xstate_bv & XCR0_X87);
+
+	ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[0].r.f87_mantissa, mm[0]);
+	ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[1].r.f87_mantissa, mm[1]);
+	ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[2].r.f87_mantissa, mm[2]);
+	ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[3].r.f87_mantissa, mm[3]);
+	ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[4].r.f87_mantissa, mm[4]);
+	ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[5].r.f87_mantissa, mm[5]);
+	ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[6].r.f87_mantissa, mm[6]);
+	ATF_CHECK_EQ(xst.xs_fxsave.fx_87_ac[7].r.f87_mantissa, mm[7]);
+
+	DPRINTF("Before resuming the child process where it left off and "
+	    "without signal to be sent\n");
+	SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_exited(status, exitval);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
+}
+
+ATF_TC(x86_xstate_mm_write);
+ATF_TC_HEAD(x86_xstate_mm_write, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+		"Set mm0..mm7 reg values into a debugged program via "
+		"PT_SETXSTATE and compare the result against expected.");
+}
+
+ATF_TC_BODY(x86_xstate_mm_write, tc)
+{
+	const int exitval = 5;
+	pid_t child, wpid;
+#if defined(TWAIT_HAVE_STATUS)
+	const int sigval = SIGTRAP;
+	int status;
+#endif
+	struct iovec iov;
+	struct xstate xst;
+
+	const uint64_t mm[] = {
+		0x0001020304050607,
+		0x1011121314151617,
+		0x2021222324252627,
+		0x3031323334353637,
+		0x4041424344454647,
+		0x5051525354555657,
+		0x6061626364656667,
+		0x7071727374757677,
+	};
+
+	/* verify whether MMX is supported here */
+	DPRINTF("Before invoking cpuid\n");
+	{
+		unsigned int eax, ebx, ecx, edx;
+		if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
+			atf_tc_skip("CPUID is not supported by the CPU");
+
+		DPRINTF("cpuid: EDX = %08x\n", edx);
+
+		if (!(edx & bit_MMX))
+			atf_tc_skip("MMX is not supported by the CPU");
+	}
+
+	DPRINTF("Before forking process PID=%d\n", getpid());
+	SYSCALL_REQUIRE((child = fork()) != -1);
+	if (child == 0) {
+		uint64_t v_mm[8];
+
+		DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
+		FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
+
+		DPRINTF("Before running assembly from child\n");
+		get_mm_regs(v_mm);
+
+		DPRINTF("Before comparing results\n");
+		FORKEE_ASSERT_EQ(v_mm[0], mm[0]);
+		FORKEE_ASSERT_EQ(v_mm[1], mm[1]);
+		FORKEE_ASSERT_EQ(v_mm[2], mm[2]);
+		FORKEE_ASSERT_EQ(v_mm[3], mm[3]);
+		FORKEE_ASSERT_EQ(v_mm[4], mm[4]);
+		FORKEE_ASSERT_EQ(v_mm[5], mm[5]);
+		FORKEE_ASSERT_EQ(v_mm[6], mm[6]);
+		FORKEE_ASSERT_EQ(v_mm[7], mm[7]);
+
+		DPRINTF("Before exiting of the child process\n");
+		_exit(exitval);
+	}
+	DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_stopped(status, sigval);
+
+	iov.iov_base = &xst;
+	iov.iov_len = sizeof(xst);
+
+	DPRINTF("Call GETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
+
+	ATF_REQUIRE(xst.xs_rfbm & XCR0_X87);
+
+	xst.xs_rfbm = XCR0_X87;
+	xst.xs_xstate_bv = XCR0_X87;
+
+	xst.xs_fxsave.fx_87_ac[0].r.f87_mantissa = mm[0];
+	xst.xs_fxsave.fx_87_ac[1].r.f87_mantissa = mm[1];
+	xst.xs_fxsave.fx_87_ac[2].r.f87_mantissa = mm[2];
+	xst.xs_fxsave.fx_87_ac[3].r.f87_mantissa = mm[3];
+	xst.xs_fxsave.fx_87_ac[4].r.f87_mantissa = mm[4];
+	xst.xs_fxsave.fx_87_ac[5].r.f87_mantissa = mm[5];
+	xst.xs_fxsave.fx_87_ac[6].r.f87_mantissa = mm[6];
+	xst.xs_fxsave.fx_87_ac[7].r.f87_mantissa = mm[7];
+
+	DPRINTF("Call SETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_SETXSTATE, child, &iov, 0) != -1);
+
+	DPRINTF("Before resuming the child process where it left off and "
+	    "without signal to be sent\n");
+	SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_exited(status, exitval);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
+}
+
+ATF_TC(x86_xstate_xmm_read);
+ATF_TC_HEAD(x86_xstate_xmm_read, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+		"Set xmm0..xmm15 (..xmm7 on i386) reg values from debugged program "
+		"and read them via PT_GETXSTATE, comparing values against expected.");
+}
+
+ATF_TC_BODY(x86_xstate_xmm_read, tc)
+{
+	const int exitval = 5;
+	pid_t child, wpid;
+#if defined(TWAIT_HAVE_STATUS)
+	const int sigval = SIGTRAP;
+	int status;
+#endif
+	struct xstate xst;
+	struct iovec iov;
+
+	const struct {
+		uint64_t a, b;
+	} xmm[] __aligned(16) = {
+		{ 0x0706050403020100, 0x0F0E0D0C0B0A0908, },
+		{ 0x0807060504030201, 0x100F0E0D0C0B0A09, },
+		{ 0x0908070605040302, 0x11100F0E0D0C0B0A, },
+		{ 0x0A09080706050403, 0x1211100F0E0D0C0B, },
+		{ 0x0B0A090807060504, 0x131211100F0E0D0C, },
+		{ 0x0C0B0A0908070605, 0x14131211100F0E0D, },
+		{ 0x0D0C0B0A09080706, 0x1514131211100F0E, },
+		{ 0x0E0D0C0B0A090807, 0x161514131211100F, },
+#if defined(__x86_64__)
+		{ 0x0F0E0D0C0B0A0908, 0x1716151413121110, },
+		{ 0x100F0E0D0C0B0A09, 0x1817161514131211, },
+		{ 0x11100F0E0D0C0B0A, 0x1918171615141312, },
+		{ 0x1211100F0E0D0C0B, 0x1A19181716151413, },
+		{ 0x131211100F0E0D0C, 0x1B1A191817161514, },
+		{ 0x14131211100F0E0D, 0x1C1B1A1918171615, },
+		{ 0x1514131211100F0E, 0x1D1C1B1A19181716, },
+		{ 0x161514131211100F, 0x1E1D1C1B1A191817, },
+#endif
+	};
+
+	/* verify whether SSE is supported here */
+	DPRINTF("Before invoking cpuid\n");
+	{
+		unsigned int eax, ebx, ecx, edx;
+		if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
+			atf_tc_skip("CPUID is not supported by the CPU");
+
+		DPRINTF("cpuid: EDX = %08x\n", edx);
+
+		if (!(edx & bit_SSE))
+			atf_tc_skip("SSE is not supported by the CPU");
+	}
+
+	DPRINTF("Before forking process PID=%d\n", getpid());
+	SYSCALL_REQUIRE((child = fork()) != -1);
+	if (child == 0) {
+		DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
+		FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
+
+		DPRINTF("Before running assembly from child\n");
+		set_xmm_regs(xmm);
+
+		DPRINTF("Before exiting of the child process\n");
+		_exit(exitval);
+	}
+	DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_stopped(status, sigval);
+
+	iov.iov_base = &xst;
+	iov.iov_len = sizeof(xst);
+
+	DPRINTF("Call GETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
+
+	ATF_REQUIRE(xst.xs_rfbm & XCR0_SSE);
+	ATF_REQUIRE(xst.xs_xstate_bv & XCR0_SSE);
+
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[0], &xmm[0], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[1], &xmm[1], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[2], &xmm[2], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[3], &xmm[3], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[4], &xmm[4], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[5], &xmm[5], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[6], &xmm[6], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[7], &xmm[7], sizeof(*xmm)));
+#if defined(__x86_64__)
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[8], &xmm[8], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[9], &xmm[9], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[10], &xmm[10], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[11], &xmm[11], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[12], &xmm[12], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[13], &xmm[13], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[14], &xmm[14], sizeof(*xmm)));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[15], &xmm[15], sizeof(*xmm)));
+#endif
+
+	DPRINTF("Before resuming the child process where it left off and "
+	    "without signal to be sent\n");
+	SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_exited(status, exitval);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
+}
+
+ATF_TC(x86_xstate_xmm_write);
+ATF_TC_HEAD(x86_xstate_xmm_write, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+		"Set xmm0..xmm15 (..xmm7 on i386) reg values into a debugged "
+		"program via PT_SETXSTATE and compare the result against expected.");
+}
+
+ATF_TC_BODY(x86_xstate_xmm_write, tc)
+{
+	const int exitval = 5;
+	pid_t child, wpid;
+#if defined(TWAIT_HAVE_STATUS)
+	const int sigval = SIGTRAP;
+	int status;
+#endif
+	struct xstate xst;
+	struct iovec iov;
+
+	const struct {
+		uint64_t a, b;
+	} xmm[] __aligned(16) = {
+		{ 0x0706050403020100, 0x0F0E0D0C0B0A0908, },
+		{ 0x0807060504030201, 0x100F0E0D0C0B0A09, },
+		{ 0x0908070605040302, 0x11100F0E0D0C0B0A, },
+		{ 0x0A09080706050403, 0x1211100F0E0D0C0B, },
+		{ 0x0B0A090807060504, 0x131211100F0E0D0C, },
+		{ 0x0C0B0A0908070605, 0x14131211100F0E0D, },
+		{ 0x0D0C0B0A09080706, 0x1514131211100F0E, },
+		{ 0x0E0D0C0B0A090807, 0x161514131211100F, },
+#if defined(__x86_64__)
+		{ 0x0F0E0D0C0B0A0908, 0x1716151413121110, },
+		{ 0x100F0E0D0C0B0A09, 0x1817161514131211, },
+		{ 0x11100F0E0D0C0B0A, 0x1918171615141312, },
+		{ 0x1211100F0E0D0C0B, 0x1A19181716151413, },
+		{ 0x131211100F0E0D0C, 0x1B1A191817161514, },
+		{ 0x14131211100F0E0D, 0x1C1B1A1918171615, },
+		{ 0x1514131211100F0E, 0x1D1C1B1A19181716, },
+		{ 0x161514131211100F, 0x1E1D1C1B1A191817, },
+#endif
+	};
+
+	/* verify whether SSE is supported here */
+	DPRINTF("Before invoking cpuid\n");
+	{
+		unsigned int eax, ebx, ecx, edx;
+		if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
+			atf_tc_skip("CPUID is not supported by the CPU");
+
+		DPRINTF("cpuid: EDX = %08x\n", edx);
+
+		if (!(edx & bit_SSE))
+			atf_tc_skip("SSE is not supported by the CPU");
+	}
+
+	DPRINTF("Before forking process PID=%d\n", getpid());
+	SYSCALL_REQUIRE((child = fork()) != -1);
+	if (child == 0) {
+		struct {
+			uint64_t a, b;
+		} v_xmm[16] __aligned(16);
+
+		DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
+		FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
+
+		DPRINTF("Before running assembly from child\n");
+		get_xmm_regs(v_xmm);
+
+		DPRINTF("Before comparing results\n");
+		FORKEE_ASSERT(!memcmp(&v_xmm[0], &xmm[0], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[1], &xmm[1], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[2], &xmm[2], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[3], &xmm[3], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[4], &xmm[4], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[5], &xmm[5], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[6], &xmm[6], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[7], &xmm[7], sizeof(*xmm)));
+#if defined(__x86_64__)
+		FORKEE_ASSERT(!memcmp(&v_xmm[8], &xmm[8], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[9], &xmm[9], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[10], &xmm[10], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[11], &xmm[11], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[12], &xmm[12], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[13], &xmm[13], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[14], &xmm[14], sizeof(*xmm)));
+		FORKEE_ASSERT(!memcmp(&v_xmm[15], &xmm[15], sizeof(*xmm)));
+#endif
+
+		DPRINTF("Before exiting of the child process\n");
+		_exit(exitval);
+	}
+	DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_stopped(status, sigval);
+
+	iov.iov_base = &xst;
+	iov.iov_len = sizeof(xst);
+
+	DPRINTF("Call GETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
+
+	ATF_REQUIRE(xst.xs_rfbm & XCR0_SSE);
+
+	xst.xs_rfbm = XCR0_SSE;
+	xst.xs_xstate_bv = XCR0_SSE;
+
+	memcpy(&xst.xs_fxsave.fx_xmm[0], &xmm[0], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[1], &xmm[1], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[2], &xmm[2], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[3], &xmm[3], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[4], &xmm[4], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[5], &xmm[5], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[6], &xmm[6], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[7], &xmm[7], sizeof(*xmm));
+#if defined(__x86_64__)
+	memcpy(&xst.xs_fxsave.fx_xmm[8], &xmm[8], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[9], &xmm[9], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[10], &xmm[10], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[11], &xmm[11], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[12], &xmm[12], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[13], &xmm[13], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[14], &xmm[14], sizeof(*xmm));
+	memcpy(&xst.xs_fxsave.fx_xmm[15], &xmm[15], sizeof(*xmm));
+#endif
+
+	DPRINTF("Call SETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_SETXSTATE, child, &iov, 0) != -1);
+
+	DPRINTF("Before resuming the child process where it left off and "
+	    "without signal to be sent\n");
+	SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_exited(status, exitval);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
+}
+
+__attribute__((target("avx")))
+static __inline void set_ymm_regs(const void* ymm)
+{
+	__asm__ __volatile__(
+		"vmovaps  0x000(%0), %%ymm0\n\t"
+		"vmovaps  0x020(%0), %%ymm1\n\t"
+		"vmovaps  0x040(%0), %%ymm2\n\t"
+		"vmovaps  0x060(%0), %%ymm3\n\t"
+		"vmovaps  0x080(%0), %%ymm4\n\t"
+		"vmovaps  0x0A0(%0), %%ymm5\n\t"
+		"vmovaps  0x0C0(%0), %%ymm6\n\t"
+		"vmovaps  0x0E0(%0), %%ymm7\n\t"
+#if defined(__x86_64__)
+		"vmovaps  0x100(%0), %%ymm8\n\t"
+		"vmovaps  0x120(%0), %%ymm9\n\t"
+		"vmovaps  0x140(%0), %%ymm10\n\t"
+		"vmovaps  0x160(%0), %%ymm11\n\t"
+		"vmovaps  0x180(%0), %%ymm12\n\t"
+		"vmovaps  0x1A0(%0), %%ymm13\n\t"
+		"vmovaps  0x1C0(%0), %%ymm14\n\t"
+		"vmovaps  0x1E0(%0), %%ymm15\n\t"
+#endif
+		"int3\n\t"
+		:
+		: "b"(ymm)
+		: "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6",
+		"%ymm7"
+#if defined(__x86_64__)
+		, "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13",
+		"%ymm14", "%ymm15"
+#endif
+	);
+}
+
+ATF_TC(x86_xstate_ymm_read);
+ATF_TC_HEAD(x86_xstate_ymm_read, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+		"Set ymm0..ymm15 (..ymm7 on i386) reg values from debugged program "
+		"and read them via PT_GETXSTATE, comparing values against expected.");
+}
+
+ATF_TC_BODY(x86_xstate_ymm_read, tc)
+{
+	const int exitval = 5;
+	pid_t child, wpid;
+#if defined(TWAIT_HAVE_STATUS)
+	const int sigval = SIGTRAP;
+	int status;
+#endif
+	struct xstate xst;
+	struct iovec iov;
+
+	const struct {
+		uint64_t a, b, c, d;
+	} ymm[] __aligned(32) = {
+		{ 0x0706050403020100, 0x0F0E0D0C0B0A0908,
+		  0x1716151413121110, 0x1F1E1D1C1B1A1918, },
+		{ 0x0807060504030201, 0x100F0E0D0C0B0A09,
+		  0x1817161514131211, 0x201F1E1D1C1B1A19, },
+		{ 0x0908070605040302, 0x11100F0E0D0C0B0A,
+		  0x1918171615141312, 0x21201F1E1D1C1B1A, },
+		{ 0x0A09080706050403, 0x1211100F0E0D0C0B,
+		  0x1A19181716151413, 0x2221201F1E1D1C1B, },
+		{ 0x0B0A090807060504, 0x131211100F0E0D0C,
+		  0x1B1A191817161514, 0x232221201F1E1D1C, },
+		{ 0x0C0B0A0908070605, 0x14131211100F0E0D,
+		  0x1C1B1A1918171615, 0x24232221201F1E1D, },
+		{ 0x0D0C0B0A09080706, 0x1514131211100F0E,
+		  0x1D1C1B1A19181716, 0x2524232221201F1E, },
+		{ 0x0E0D0C0B0A090807, 0x161514131211100F,
+		  0x1E1D1C1B1A191817, 0x262524232221201F, },
+#if defined(__x86_64__)
+		{ 0x0F0E0D0C0B0A0908, 0x1716151413121110,
+		  0x1F1E1D1C1B1A1918, 0x2726252423222120, },
+		{ 0x100F0E0D0C0B0A09, 0x1817161514131211,
+		  0x201F1E1D1C1B1A19, 0x2827262524232221, },
+		{ 0x11100F0E0D0C0B0A, 0x1918171615141312,
+		  0x21201F1E1D1C1B1A, 0x2928272625242322, },
+		{ 0x1211100F0E0D0C0B, 0x1A19181716151413,
+		  0x2221201F1E1D1C1B, 0x2A29282726252423, },
+		{ 0x131211100F0E0D0C, 0x1B1A191817161514,
+		  0x232221201F1E1D1C, 0x2B2A292827262524, },
+		{ 0x14131211100F0E0D, 0x1C1B1A1918171615,
+		  0x24232221201F1E1D, 0x2C2B2A2928272625, },
+		{ 0x1514131211100F0E, 0x1D1C1B1A19181716,
+		  0x2524232221201F1E, 0x2D2C2B2A29282726, },
+		{ 0x161514131211100F, 0x1E1D1C1B1A191817,
+		  0x262524232221201F, 0x2E2D2C2B2A292827, },
+#endif
+	};
+
+	/* verify whether AVX is supported here */
+	DPRINTF("Before invoking cpuid\n");
+	{
+		unsigned int eax, ebx, ecx, edx;
+		if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
+			atf_tc_skip("CPUID is not supported by the CPU");
+
+		DPRINTF("cpuid: ECX = %08x\n", ecx);
+
+		if (!(ecx & bit_AVX))
+			atf_tc_skip("AVX is not supported by the CPU");
+	}
+
+	DPRINTF("Before forking process PID=%d\n", getpid());
+	SYSCALL_REQUIRE((child = fork()) != -1);
+	if (child == 0) {
+		DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
+		FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
+
+		DPRINTF("Before running assembly from child\n");
+		set_ymm_regs(ymm);
+
+		DPRINTF("Before exiting of the child process\n");
+		_exit(exitval);
+	}
+	DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_stopped(status, sigval);
+
+	iov.iov_base = &xst;
+	iov.iov_len = sizeof(xst);
+
+	DPRINTF("Call GETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
+
+	ATF_REQUIRE(xst.xs_rfbm & XCR0_SSE);
+	ATF_REQUIRE(xst.xs_rfbm & XCR0_YMM_Hi128);
+	ATF_REQUIRE(xst.xs_xstate_bv & XCR0_SSE);
+	ATF_REQUIRE(xst.xs_xstate_bv & XCR0_YMM_Hi128);
+
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[0], &ymm[0].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[0], &ymm[0].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[1], &ymm[1].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[1], &ymm[1].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[2], &ymm[2].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[2], &ymm[2].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[3], &ymm[3].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[3], &ymm[3].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[4], &ymm[4].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[4], &ymm[4].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[5], &ymm[5].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[5], &ymm[5].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[6], &ymm[6].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[6], &ymm[6].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[7], &ymm[7].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[7], &ymm[7].c, sizeof(*ymm)/2));
+#if defined(__x86_64__)
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[8], &ymm[8].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[8], &ymm[8].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[9], &ymm[9].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[9], &ymm[9].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[10], &ymm[10].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[10], &ymm[10].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[11], &ymm[11].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[11], &ymm[11].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[12], &ymm[12].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[12], &ymm[12].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[13], &ymm[13].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[13], &ymm[13].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[14], &ymm[14].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[14], &ymm[14].c, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_fxsave.fx_xmm[15], &ymm[15].a, sizeof(*ymm)/2));
+	ATF_CHECK(!memcmp(&xst.xs_ymm_hi128.xs_ymm[15], &ymm[15].c, sizeof(*ymm)/2));
+#endif
+
+	DPRINTF("Before resuming the child process where it left off and "
+	    "without signal to be sent\n");
+	SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_exited(status, exitval);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
+}
+
+__attribute__((target("avx")))
+static __inline void get_ymm_regs(void* v_ymm)
+{
+	const struct {
+		uint64_t a, b, c, d;
+	} fill __aligned(32) = {
+		0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F,
+		0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+	};
+
+	__asm__ __volatile__(
+		/* fill registers with clobber pattern */
+		"vmovaps  %1, %%ymm0\n\t"
+		"vmovaps  %1, %%ymm1\n\t"
+		"vmovaps  %1, %%ymm2\n\t"
+		"vmovaps  %1, %%ymm3\n\t"
+		"vmovaps  %1, %%ymm4\n\t"
+		"vmovaps  %1, %%ymm5\n\t"
+		"vmovaps  %1, %%ymm6\n\t"
+		"vmovaps  %1, %%ymm7\n\t"
+#if defined(__x86_64__)
+		"vmovaps  %1, %%ymm8\n\t"
+		"vmovaps  %1, %%ymm9\n\t"
+		"vmovaps  %1, %%ymm10\n\t"
+		"vmovaps  %1, %%ymm11\n\t"
+		"vmovaps  %1, %%ymm12\n\t"
+		"vmovaps  %1, %%ymm13\n\t"
+		"vmovaps  %1, %%ymm14\n\t"
+		"vmovaps  %1, %%ymm15\n\t"
+#endif
+		"\n\t"
+		"int3\n\t"
+		"\n\t"
+		"vmovaps %%ymm0,  0x000(%0)\n\t"
+		"vmovaps %%ymm1,  0x020(%0)\n\t"
+		"vmovaps %%ymm2,  0x040(%0)\n\t"
+		"vmovaps %%ymm3,  0x060(%0)\n\t"
+		"vmovaps %%ymm4,  0x080(%0)\n\t"
+		"vmovaps %%ymm5,  0x0A0(%0)\n\t"
+		"vmovaps %%ymm6,  0x0C0(%0)\n\t"
+		"vmovaps %%ymm7,  0x0E0(%0)\n\t"
+#if defined(__x86_64__)
+		"vmovaps %%ymm8,  0x100(%0)\n\t"
+		"vmovaps %%ymm9,  0x120(%0)\n\t"
+		"vmovaps %%ymm10, 0x140(%0)\n\t"
+		"vmovaps %%ymm11, 0x160(%0)\n\t"
+		"vmovaps %%ymm12, 0x180(%0)\n\t"
+		"vmovaps %%ymm13, 0x1A0(%0)\n\t"
+		"vmovaps %%ymm14, 0x1C0(%0)\n\t"
+		"vmovaps %%ymm15, 0x1E0(%0)\n\t"
+#endif
+		:
+		: "a"(v_ymm), "m"(fill)
+		: "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6", "%ymm7"
+#if defined(__x86_64__)
+		, "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13", "%ymm14",
+		"%ymm15"
+#endif
+	);
+}
+
+ATF_TC(x86_xstate_ymm_write);
+ATF_TC_HEAD(x86_xstate_ymm_write, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+		"Set ymm0..ymm15 (..ymm7 on i386) reg values into a debugged "
+		"program via PT_SETXSTATE and compare the result against expected.");
+}
+
+ATF_TC_BODY(x86_xstate_ymm_write, tc)
+{
+	const int exitval = 5;
+	pid_t child, wpid;
+#if defined(TWAIT_HAVE_STATUS)
+	const int sigval = SIGTRAP;
+	int status;
+#endif
+	struct xstate xst;
+	struct iovec iov;
+
+	const struct {
+		uint64_t a, b, c, d;
+	} ymm[] __aligned(32) = {
+		{ 0x0706050403020100, 0x0F0E0D0C0B0A0908,
+		  0x1716151413121110, 0x1F1E1D1C1B1A1918, },
+		{ 0x0807060504030201, 0x100F0E0D0C0B0A09,
+		  0x1817161514131211, 0x201F1E1D1C1B1A19, },
+		{ 0x0908070605040302, 0x11100F0E0D0C0B0A,
+		  0x1918171615141312, 0x21201F1E1D1C1B1A, },
+		{ 0x0A09080706050403, 0x1211100F0E0D0C0B,
+		  0x1A19181716151413, 0x2221201F1E1D1C1B, },
+		{ 0x0B0A090807060504, 0x131211100F0E0D0C,
+		  0x1B1A191817161514, 0x232221201F1E1D1C, },
+		{ 0x0C0B0A0908070605, 0x14131211100F0E0D,
+		  0x1C1B1A1918171615, 0x24232221201F1E1D, },
+		{ 0x0D0C0B0A09080706, 0x1514131211100F0E,
+		  0x1D1C1B1A19181716, 0x2524232221201F1E, },
+		{ 0x0E0D0C0B0A090807, 0x161514131211100F,
+		  0x1E1D1C1B1A191817, 0x262524232221201F, },
+#if defined(__x86_64__)
+		{ 0x0F0E0D0C0B0A0908, 0x1716151413121110,
+		  0x1F1E1D1C1B1A1918, 0x2726252423222120, },
+		{ 0x100F0E0D0C0B0A09, 0x1817161514131211,
+		  0x201F1E1D1C1B1A19, 0x2827262524232221, },
+		{ 0x11100F0E0D0C0B0A, 0x1918171615141312,
+		  0x21201F1E1D1C1B1A, 0x2928272625242322, },
+		{ 0x1211100F0E0D0C0B, 0x1A19181716151413,
+		  0x2221201F1E1D1C1B, 0x2A29282726252423, },
+		{ 0x131211100F0E0D0C, 0x1B1A191817161514,
+		  0x232221201F1E1D1C, 0x2B2A292827262524, },
+		{ 0x14131211100F0E0D, 0x1C1B1A1918171615,
+		  0x24232221201F1E1D, 0x2C2B2A2928272625, },
+		{ 0x1514131211100F0E, 0x1D1C1B1A19181716,
+		  0x2524232221201F1E, 0x2D2C2B2A29282726, },
+		{ 0x161514131211100F, 0x1E1D1C1B1A191817,
+		  0x262524232221201F, 0x2E2D2C2B2A292827, },
+#endif
+	};
+
+	/* verify whether AVX is supported here */
+	DPRINTF("Before invoking cpuid\n");
+	{
+		unsigned int eax, ebx, ecx, edx;
+		if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
+			atf_tc_skip("CPUID is not supported by the CPU");
+
+		DPRINTF("cpuid: ECX = %08x\n", ecx);
+
+		if (!(ecx & bit_AVX))
+			atf_tc_skip("AVX is not supported by the CPU");
+	}
+
+	DPRINTF("Before forking process PID=%d\n", getpid());
+	SYSCALL_REQUIRE((child = fork()) != -1);
+	if (child == 0) {
+		struct {
+			uint64_t a, b, c, d;
+		} v_ymm[16] __aligned(32);
+
+		DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
+		FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
+
+		DPRINTF("Before running assembly from child\n");
+		get_ymm_regs(v_ymm);
+
+		DPRINTF("Before comparing results\n");
+		FORKEE_ASSERT(!memcmp(&v_ymm[0], &ymm[0], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[1], &ymm[1], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[2], &ymm[2], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[3], &ymm[3], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[4], &ymm[4], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[5], &ymm[5], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[6], &ymm[6], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[7], &ymm[7], sizeof(*ymm)));
+#if defined(__x86_64__)
+		FORKEE_ASSERT(!memcmp(&v_ymm[8], &ymm[8], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[9], &ymm[9], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[10], &ymm[10], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[11], &ymm[11], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[12], &ymm[12], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[13], &ymm[13], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[14], &ymm[14], sizeof(*ymm)));
+		FORKEE_ASSERT(!memcmp(&v_ymm[15], &ymm[15], sizeof(*ymm)));
+#endif
+
+		DPRINTF("Before exiting of the child process\n");
+		_exit(exitval);
+	}
+	DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_stopped(status, sigval);
+
+	iov.iov_base = &xst;
+	iov.iov_len = sizeof(xst);
+
+	DPRINTF("Call GETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
+
+	ATF_REQUIRE(xst.xs_rfbm & XCR0_SSE);
+	ATF_REQUIRE(xst.xs_rfbm & XCR0_YMM_Hi128);
+
+	xst.xs_rfbm = XCR0_SSE | XCR0_YMM_Hi128;
+	xst.xs_xstate_bv = XCR0_SSE | XCR0_YMM_Hi128;
+
+	memcpy(&xst.xs_fxsave.fx_xmm[0], &ymm[0].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[0], &ymm[0].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[1], &ymm[1].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[1], &ymm[1].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[2], &ymm[2].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[2], &ymm[2].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[3], &ymm[3].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[3], &ymm[3].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[4], &ymm[4].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[4], &ymm[4].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[5], &ymm[5].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[5], &ymm[5].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[6], &ymm[6].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[6], &ymm[6].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[7], &ymm[7].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[7], &ymm[7].c, sizeof(*ymm)/2);
+#if defined(__x86_64__)
+	memcpy(&xst.xs_fxsave.fx_xmm[8], &ymm[8].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[8], &ymm[8].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[9], &ymm[9].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[9], &ymm[9].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[10], &ymm[10].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[10], &ymm[10].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[11], &ymm[11].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[11], &ymm[11].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[12], &ymm[12].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[12], &ymm[12].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[13], &ymm[13].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[13], &ymm[13].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[14], &ymm[14].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[14], &ymm[14].c, sizeof(*ymm)/2);
+	memcpy(&xst.xs_fxsave.fx_xmm[15], &ymm[15].a, sizeof(*ymm)/2);
+	memcpy(&xst.xs_ymm_hi128.xs_ymm[15], &ymm[15].c, sizeof(*ymm)/2);
+#endif
+
+	DPRINTF("Call SETXSTATE for the child process\n");
+	SYSCALL_REQUIRE(ptrace(PT_SETXSTATE, child, &iov, 0) != -1);
+
+	DPRINTF("Before resuming the child process where it left off and "
+	    "without signal to be sent\n");
+	SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
+
+	validate_status_exited(status, exitval);
+
+	DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
+	TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
+}
+
 /// ----------------------------------------------------------------------------
 
 #define ATF_TP_ADD_TCS_PTRACE_WAIT_X86() \
@@ -2870,7 +3754,13 @@ ATF_TC_BODY(x86_regs_xmm_write, tc)
 	ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_mm_read); \
 	ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_mm_write); \
 	ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_xmm_read); \
-	ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_xmm_write);
+	ATF_TP_ADD_TC_HAVE_FPREGS(tp, x86_regs_xmm_write); \
+	ATF_TP_ADD_TC(tp, x86_xstate_mm_read); \
+	ATF_TP_ADD_TC(tp, x86_xstate_mm_write); \
+	ATF_TP_ADD_TC(tp, x86_xstate_xmm_read); \
+	ATF_TP_ADD_TC(tp, x86_xstate_xmm_write); \
+	ATF_TP_ADD_TC(tp, x86_xstate_ymm_read); \
+	ATF_TP_ADD_TC(tp, x86_xstate_ymm_write);
 #else
 #define ATF_TP_ADD_TCS_PTRACE_WAIT_X86()
 #endif

Attachment: signature.asc
Description: This is a digitally signed message part



Home | Main Index | Thread Index | Old Index