Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/tests/lib/libc/sys Add tests for AVX-512 registers (zmm0..zm...



details:   https://anonhg.NetBSD.org/src/rev/0970f95de8c4
branches:  trunk
changeset: 941699:0970f95de8c4
user:      mgorny <mgorny%NetBSD.org@localhost>
date:      Tue Oct 27 08:32:36 2020 +0000

description:
Add tests for AVX-512 registers (zmm0..zmm31, k0..7)

Thanks to David Seifert <soap%gentoo.org@localhost> for providing a VM
on an AVX-512 capable hardware

Reviewed by kamil

diffstat:

 tests/lib/libc/sys/t_ptrace_x86_wait.h |  785 ++++++++++++++++++++++++++++----
 1 files changed, 669 insertions(+), 116 deletions(-)

diffs (truncated from 987 to 300 lines):

diff -r 0341b207933c -r 0970f95de8c4 tests/lib/libc/sys/t_ptrace_x86_wait.h
--- a/tests/lib/libc/sys/t_ptrace_x86_wait.h    Tue Oct 27 08:05:20 2020 +0000
+++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h    Tue Oct 27 08:32:36 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: t_ptrace_x86_wait.h,v 1.30 2020/10/24 07:14:30 mgorny Exp $    */
+/*     $NetBSD: t_ptrace_x86_wait.h,v 1.31 2020/10/27 08:32:36 mgorny Exp $    */
 
 /*-
  * Copyright (c) 2016, 2017, 2018, 2019 The NetBSD Foundation, Inc.
@@ -2177,6 +2177,9 @@
 
 union x86_test_register {
        struct {
+               uint64_t a, b, c, d, e, f, g, h;
+       } zmm;
+       struct {
                uint64_t a, b, c, d;
        } ymm;
        struct {
@@ -2220,7 +2223,8 @@
        FPREGS_MM,
        FPREGS_XMM,
        /* TEST_XSTATE */
-       FPREGS_YMM
+       FPREGS_YMM,
+       FPREGS_ZMM
 };
 
 enum x86_test_regmode {
@@ -2396,14 +2400,14 @@
                "\n\t"
                "int3\n\t"
                "\n\t"
-               "movq    %%r8, 0x00(%0)\n\t"
-               "movq    %%r9, 0x20(%0)\n\t"
-               "movq    %%r10, 0x40(%0)\n\t"
-               "movq    %%r11, 0x60(%0)\n\t"
-               "movq    %%r12, 0x80(%0)\n\t"
-               "movq    %%r13, 0xA0(%0)\n\t"
-               "movq    %%r14, 0xC0(%0)\n\t"
-               "movq    %%r15, 0xE0(%0)\n\t"
+               "movq    %%r8, 0x000(%0)\n\t"
+               "movq    %%r9, 0x040(%0)\n\t"
+               "movq    %%r10, 0x080(%0)\n\t"
+               "movq    %%r11, 0x0C0(%0)\n\t"
+               "movq    %%r12, 0x100(%0)\n\t"
+               "movq    %%r13, 0x140(%0)\n\t"
+               "movq    %%r14, 0x180(%0)\n\t"
+               "movq    %%r15, 0x1C0(%0)\n\t"
                :
                : "a"(out), "m"(fill)
                : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
@@ -2417,14 +2421,14 @@
 {
 #if defined(__x86_64__)
        __asm__ __volatile__(
-               "movq    0x00(%0), %%r8\n\t"
-               "movq    0x20(%0), %%r9\n\t"
-               "movq    0x40(%0), %%r10\n\t"
-               "movq    0x60(%0), %%r11\n\t"
-               "movq    0x80(%0), %%r12\n\t"
-               "movq    0xA0(%0), %%r13\n\t"
-               "movq    0xC0(%0), %%r14\n\t"
-               "movq    0xE0(%0), %%r15\n\t"
+               "movq    0x000(%0), %%r8\n\t"
+               "movq    0x040(%0), %%r9\n\t"
+               "movq    0x080(%0), %%r10\n\t"
+               "movq    0x0C0(%0), %%r11\n\t"
+               "movq    0x100(%0), %%r12\n\t"
+               "movq    0x140(%0), %%r13\n\t"
+               "movq    0x180(%0), %%r14\n\t"
+               "movq    0x1C0(%0), %%r15\n\t"
                "int3\n\t"
                :
                : "b"(data)
@@ -2526,14 +2530,14 @@
                "\n\t"
                "int3\n\t"
                "\n\t"
-               "movq    %%mm0, 0x00(%0)\n\t"
-               "movq    %%mm1, 0x20(%0)\n\t"
-               "movq    %%mm2, 0x40(%0)\n\t"
-               "movq    %%mm3, 0x60(%0)\n\t"
-               "movq    %%mm4, 0x80(%0)\n\t"
-               "movq    %%mm5, 0xA0(%0)\n\t"
-               "movq    %%mm6, 0xC0(%0)\n\t"
-               "movq    %%mm7, 0xE0(%0)\n\t"
+               "movq    %%mm0, 0x000(%0)\n\t"
+               "movq    %%mm1, 0x040(%0)\n\t"
+               "movq    %%mm2, 0x080(%0)\n\t"
+               "movq    %%mm3, 0x0C0(%0)\n\t"
+               "movq    %%mm4, 0x100(%0)\n\t"
+               "movq    %%mm5, 0x140(%0)\n\t"
+               "movq    %%mm6, 0x180(%0)\n\t"
+               "movq    %%mm7, 0x1C0(%0)\n\t"
                :
                : "a"(out), "m"(fill)
                : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
@@ -2544,14 +2548,14 @@
 static __inline void set_mm_regs(const union x86_test_register data[])
 {
        __asm__ __volatile__(
-               "movq    0x00(%0), %%mm0\n\t"
-               "movq    0x20(%0), %%mm1\n\t"
-               "movq    0x40(%0), %%mm2\n\t"
-               "movq    0x60(%0), %%mm3\n\t"
-               "movq    0x80(%0), %%mm4\n\t"
-               "movq    0xA0(%0), %%mm5\n\t"
-               "movq    0xC0(%0), %%mm6\n\t"
-               "movq    0xE0(%0), %%mm7\n\t"
+               "movq    0x000(%0), %%mm0\n\t"
+               "movq    0x040(%0), %%mm1\n\t"
+               "movq    0x080(%0), %%mm2\n\t"
+               "movq    0x0C0(%0), %%mm3\n\t"
+               "movq    0x100(%0), %%mm4\n\t"
+               "movq    0x140(%0), %%mm5\n\t"
+               "movq    0x180(%0), %%mm6\n\t"
+               "movq    0x1C0(%0), %%mm7\n\t"
                "int3\n\t"
                :
                : "b"(data)
@@ -2590,22 +2594,22 @@
                "int3\n\t"
                "\n\t"
                "movaps  %%xmm0, 0x000(%0)\n\t"
-               "movaps  %%xmm1, 0x020(%0)\n\t"
-               "movaps  %%xmm2, 0x040(%0)\n\t"
-               "movaps  %%xmm3, 0x060(%0)\n\t"
-               "movaps  %%xmm4, 0x080(%0)\n\t"
-               "movaps  %%xmm5, 0x0A0(%0)\n\t"
-               "movaps  %%xmm6, 0x0C0(%0)\n\t"
-               "movaps  %%xmm7, 0x0E0(%0)\n\t"
+               "movaps  %%xmm1, 0x040(%0)\n\t"
+               "movaps  %%xmm2, 0x080(%0)\n\t"
+               "movaps  %%xmm3, 0x0C0(%0)\n\t"
+               "movaps  %%xmm4, 0x100(%0)\n\t"
+               "movaps  %%xmm5, 0x140(%0)\n\t"
+               "movaps  %%xmm6, 0x180(%0)\n\t"
+               "movaps  %%xmm7, 0x1C0(%0)\n\t"
 #if defined(__x86_64__)
-               "movaps  %%xmm8, 0x100(%0)\n\t"
-               "movaps  %%xmm9, 0x120(%0)\n\t"
-               "movaps  %%xmm10, 0x140(%0)\n\t"
-               "movaps  %%xmm11, 0x160(%0)\n\t"
-               "movaps  %%xmm12, 0x180(%0)\n\t"
-               "movaps  %%xmm13, 0x1A0(%0)\n\t"
-               "movaps  %%xmm14, 0x1C0(%0)\n\t"
-               "movaps  %%xmm15, 0x1E0(%0)\n\t"
+               "movaps  %%xmm8, 0x200(%0)\n\t"
+               "movaps  %%xmm9, 0x240(%0)\n\t"
+               "movaps  %%xmm10, 0x280(%0)\n\t"
+               "movaps  %%xmm11, 0x2C0(%0)\n\t"
+               "movaps  %%xmm12, 0x300(%0)\n\t"
+               "movaps  %%xmm13, 0x340(%0)\n\t"
+               "movaps  %%xmm14, 0x380(%0)\n\t"
+               "movaps  %%xmm15, 0x3C0(%0)\n\t"
 #endif
                :
                : "a"(out), "m"(fill)
@@ -2622,22 +2626,22 @@
 {
        __asm__ __volatile__(
                "movaps   0x000(%0), %%xmm0\n\t"
-               "movaps   0x020(%0), %%xmm1\n\t"
-               "movaps   0x040(%0), %%xmm2\n\t"
-               "movaps   0x060(%0), %%xmm3\n\t"
-               "movaps   0x080(%0), %%xmm4\n\t"
-               "movaps   0x0A0(%0), %%xmm5\n\t"
-               "movaps   0x0C0(%0), %%xmm6\n\t"
-               "movaps   0x0E0(%0), %%xmm7\n\t"
+               "movaps   0x040(%0), %%xmm1\n\t"
+               "movaps   0x080(%0), %%xmm2\n\t"
+               "movaps   0x0C0(%0), %%xmm3\n\t"
+               "movaps   0x100(%0), %%xmm4\n\t"
+               "movaps   0x140(%0), %%xmm5\n\t"
+               "movaps   0x180(%0), %%xmm6\n\t"
+               "movaps   0x1C0(%0), %%xmm7\n\t"
 #if defined(__x86_64__)
-               "movaps   0x100(%0), %%xmm8\n\t"
-               "movaps   0x120(%0), %%xmm9\n\t"
-               "movaps   0x140(%0), %%xmm10\n\t"
-               "movaps   0x160(%0), %%xmm11\n\t"
-               "movaps   0x180(%0), %%xmm12\n\t"
-               "movaps   0x1A0(%0), %%xmm13\n\t"
-               "movaps   0x1C0(%0), %%xmm14\n\t"
-               "movaps   0x1E0(%0), %%xmm15\n\t"
+               "movaps   0x200(%0), %%xmm8\n\t"
+               "movaps   0x240(%0), %%xmm9\n\t"
+               "movaps   0x280(%0), %%xmm10\n\t"
+               "movaps   0x2C0(%0), %%xmm11\n\t"
+               "movaps   0x300(%0), %%xmm12\n\t"
+               "movaps   0x340(%0), %%xmm13\n\t"
+               "movaps   0x380(%0), %%xmm14\n\t"
+               "movaps   0x3C0(%0), %%xmm15\n\t"
 #endif
                "int3\n\t"
                :
@@ -2655,8 +2659,10 @@
 static __inline void get_ymm_regs(union x86_test_register out[])
 {
        union x86_test_register fill __aligned(32) = {
-               { 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F,
-                 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F }
+               .ymm = {
+                       0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F,
+                       0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+               }
        };
 
        __asm__ __volatile__(
@@ -2683,22 +2689,22 @@
                "int3\n\t"
                "\n\t"
                "vmovaps %%ymm0,  0x000(%0)\n\t"
-               "vmovaps %%ymm1,  0x020(%0)\n\t"
-               "vmovaps %%ymm2,  0x040(%0)\n\t"
-               "vmovaps %%ymm3,  0x060(%0)\n\t"
-               "vmovaps %%ymm4,  0x080(%0)\n\t"
-               "vmovaps %%ymm5,  0x0A0(%0)\n\t"
-               "vmovaps %%ymm6,  0x0C0(%0)\n\t"
-               "vmovaps %%ymm7,  0x0E0(%0)\n\t"
+               "vmovaps %%ymm1,  0x040(%0)\n\t"
+               "vmovaps %%ymm2,  0x080(%0)\n\t"
+               "vmovaps %%ymm3,  0x0C0(%0)\n\t"
+               "vmovaps %%ymm4,  0x100(%0)\n\t"
+               "vmovaps %%ymm5,  0x140(%0)\n\t"
+               "vmovaps %%ymm6,  0x180(%0)\n\t"
+               "vmovaps %%ymm7,  0x1C0(%0)\n\t"
 #if defined(__x86_64__)
-               "vmovaps %%ymm8,  0x100(%0)\n\t"
-               "vmovaps %%ymm9,  0x120(%0)\n\t"
-               "vmovaps %%ymm10, 0x140(%0)\n\t"
-               "vmovaps %%ymm11, 0x160(%0)\n\t"
-               "vmovaps %%ymm12, 0x180(%0)\n\t"
-               "vmovaps %%ymm13, 0x1A0(%0)\n\t"
-               "vmovaps %%ymm14, 0x1C0(%0)\n\t"
-               "vmovaps %%ymm15, 0x1E0(%0)\n\t"
+               "vmovaps %%ymm8,  0x200(%0)\n\t"
+               "vmovaps %%ymm9,  0x240(%0)\n\t"
+               "vmovaps %%ymm10, 0x280(%0)\n\t"
+               "vmovaps %%ymm11, 0x2C0(%0)\n\t"
+               "vmovaps %%ymm12, 0x300(%0)\n\t"
+               "vmovaps %%ymm13, 0x340(%0)\n\t"
+               "vmovaps %%ymm14, 0x380(%0)\n\t"
+               "vmovaps %%ymm15, 0x3C0(%0)\n\t"
 #endif
                :
                : "a"(out), "m"(fill)
@@ -2715,22 +2721,22 @@
 {
        __asm__ __volatile__(
                "vmovaps  0x000(%0), %%ymm0\n\t"
-               "vmovaps  0x020(%0), %%ymm1\n\t"
-               "vmovaps  0x040(%0), %%ymm2\n\t"
-               "vmovaps  0x060(%0), %%ymm3\n\t"
-               "vmovaps  0x080(%0), %%ymm4\n\t"
-               "vmovaps  0x0A0(%0), %%ymm5\n\t"
-               "vmovaps  0x0C0(%0), %%ymm6\n\t"
-               "vmovaps  0x0E0(%0), %%ymm7\n\t"
+               "vmovaps  0x040(%0), %%ymm1\n\t"
+               "vmovaps  0x080(%0), %%ymm2\n\t"
+               "vmovaps  0x0C0(%0), %%ymm3\n\t"
+               "vmovaps  0x100(%0), %%ymm4\n\t"
+               "vmovaps  0x140(%0), %%ymm5\n\t"
+               "vmovaps  0x180(%0), %%ymm6\n\t"
+               "vmovaps  0x1C0(%0), %%ymm7\n\t"
 #if defined(__x86_64__)
-               "vmovaps  0x100(%0), %%ymm8\n\t"
-               "vmovaps  0x120(%0), %%ymm9\n\t"
-               "vmovaps  0x140(%0), %%ymm10\n\t"
-               "vmovaps  0x160(%0), %%ymm11\n\t"
-               "vmovaps  0x180(%0), %%ymm12\n\t"
-               "vmovaps  0x1A0(%0), %%ymm13\n\t"
-               "vmovaps  0x1C0(%0), %%ymm14\n\t"
-               "vmovaps  0x1E0(%0), %%ymm15\n\t"
+               "vmovaps  0x200(%0), %%ymm8\n\t"
+               "vmovaps  0x240(%0), %%ymm9\n\t"
+               "vmovaps  0x280(%0), %%ymm10\n\t"
+               "vmovaps  0x2C0(%0), %%ymm11\n\t"
+               "vmovaps  0x300(%0), %%ymm12\n\t"
+               "vmovaps  0x340(%0), %%ymm13\n\t"
+               "vmovaps  0x380(%0), %%ymm14\n\t"
+               "vmovaps  0x3C0(%0), %%ymm15\n\t"
 #endif
                "int3\n\t"
                :
@@ -2744,6 +2750,181 @@
        );
 }
 
+__attribute__((target("avx512f")))
+static __inline void get_zmm_regs(union x86_test_register out[])
+{
+       union x86_test_register fill __aligned(64) = {
+               .zmm = {
+                       0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F,
+                       0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F,
+                       0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F,
+                       0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+               }



Home | Main Index | Thread Index | Old Index