Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch Add a dynamic detection for SVS.



details:   https://anonhg.NetBSD.org/src/rev/31600211800f
branches:  trunk
changeset: 830019:31600211800f
user:      maxv <maxv%NetBSD.org@localhost>
date:      Thu Feb 22 08:56:51 2018 +0000

description:
Add a dynamic detection for SVS.

The SVS_* macros are now compiled as skip-noopt. When the system boots, if
the cpu is from Intel, they are hotpatched to their real content.
Typically:

                jmp     1f
                int3
                int3
                int3
                ... int3 ...
        1:

gets hotpatched to:

                movq    SVS_UTLS+UTLS_KPDIRPA,%rax
                movq    %rax,%cr3
                movq    CPUVAR(KRSP0),%rsp

These two chunks of code being of the exact same size. We put int3 (0xCC)
to make sure we never execute there.

In the non-SVS (ie non-Intel) case, all it costs is one jump. Given that
the SVS_* macros are small, this jump will likely leave us in the same
icache line, so it's pretty fast.

The syscall entry point is special, because there we use a scratch uint64_t
not in curcpu but in the UTLS page, and it's difficult to hotpatch this
properly. So instead of hotpatching we declare the entry point as an ASM
macro, and define two functions: syscall and syscall_svs, the latter being
the one used in the SVS case.

While here 'syscall' is optimized not to contain an SVS_ENTER - this way
we don't even need to do a jump on the non-SVS case.

When adding pages in the user page tables, make sure we don't have PG_G,
now that it's dynamic.

A read-only sysctl is added, machdep.svs_enabled, that tells whether the
kernel uses SVS or not.

More changes to come, svs_init() is not very clean.

diffstat:

 sys/arch/amd64/amd64/locore.S     |  132 +++++++++++++++++++++++++++++--------
 sys/arch/amd64/include/frameasm.h |   42 ++++++-----
 sys/arch/x86/x86/cpu.c            |   11 ++-
 sys/arch/x86/x86/patch.c          |   36 +++++++++-
 sys/arch/x86/x86/svs.c            |   15 ++-
 sys/arch/x86/x86/x86_machdep.c    |   12 ++-
 6 files changed, 185 insertions(+), 63 deletions(-)

diffs (truncated from 446 to 300 lines):

diff -r d4e6597ce4aa -r 31600211800f sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S     Thu Feb 22 08:49:42 2018 +0000
+++ b/sys/arch/amd64/amd64/locore.S     Thu Feb 22 08:56:51 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.151 2018/02/18 14:07:29 maxv Exp $        */
+/*     $NetBSD: locore.S,v 1.152 2018/02/22 08:56:51 maxv Exp $        */
 
 /*
  * Copyright-o-rama!
@@ -1117,17 +1117,27 @@
 
        /* Switch ring0 stack */
 #ifdef SVS
+       movb    _C_LABEL(svs_enabled),%al
+       testb   %al,%al
+       jz      .Lno_svs_switch
+
        movq    CPUVAR(RSP0),%rax
        movq    CPUVAR(TSS),%rdi
        movq    %rax,TSS_RSP0(%rdi)
-#elif !defined(XEN)
+       jmp     .Lring0_switched
+
+.Lno_svs_switch:
+#endif
+
+#if !defined(XEN)
        movq    PCB_RSP0(%r14),%rax
        movq    CPUVAR(TSS),%rdi
        movq    %rax,TSS_RSP0(%rdi)
 #else
        movq    %r14,%rdi
-       callq   _C_LABEL(x86_64_switch_context);
+       callq   _C_LABEL(x86_64_switch_context)
 #endif
+.Lring0_switched:
 
        /* Don't bother with the rest if switching to a system process. */
        testl   $LW_SYSTEM,L_FLAG(%r12)
@@ -1347,9 +1357,10 @@
 /*
  * Entry points of the 'syscall' instruction, 64bit and 32bit mode.
  */
-       TEXT_USER_BEGIN
+
 
-IDTVEC(syscall)
+.macro SYSCALL_ENTRY   name,is_svs
+IDTVEC(\name)
 #ifndef XEN
        /*
         * The user %rip is in %rcx and the user %rflags in %r11. The kernel %cs
@@ -1365,31 +1376,39 @@
         */
        swapgs
 
-#ifdef SVS
-       movq    %rax,SVS_UTLS+UTLS_SCRATCH
-       movq    SVS_UTLS+UTLS_RSP0,%rax
-#define SP(x)  (x)-(TF_SS+8)(%rax)
-#else
-       movq    %r15,CPUVAR(SCRATCH)
-       movq    CPUVAR(CURLWP),%r15
-       movq    L_PCB(%r15),%r15
-       movq    PCB_RSP0(%r15),%r15     /* LWP's kernel stack pointer */
-#define SP(x)  (x)-(TF_SS+8)(%r15)
-#endif
+#define SP(x,reg)      (x)-(TF_SS+8)(reg)
+
+       .if     \is_svs
+               movq    %rax,SVS_UTLS+UTLS_SCRATCH
+               movq    SVS_UTLS+UTLS_RSP0,%rax
+
+               /* Make stack look like an 'int nn' frame */
+               movq    $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS,%rax)     /* user %ss */
+               movq    %rsp,SP(TF_RSP,%rax)                            /* user %rsp */
+               movq    %r11,SP(TF_RFLAGS,%rax)                         /* user %rflags */
+               movq    $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS,%rax)     /* user %cs */
+               movq    %rcx,SP(TF_RIP,%rax)                            /* user %rip */
+               leaq    SP(0,%rax),%rsp                 /* %rsp now valid after frame */
 
-       /* Make stack look like an 'int nn' frame */
-       movq    $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS)  /* user %ss */
-       movq    %rsp,SP(TF_RSP)                         /* user %rsp */
-       movq    %r11,SP(TF_RFLAGS)                      /* user %rflags */
-       movq    $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS)  /* user %cs */
-       movq    %rcx,SP(TF_RIP)                         /* user %rip */
+               movq    SVS_UTLS+UTLS_SCRATCH,%rax
+       .else
+               movq    %r15,CPUVAR(SCRATCH)
+               movq    CPUVAR(CURLWP),%r15
+               movq    L_PCB(%r15),%r15
+               movq    PCB_RSP0(%r15),%r15     /* LWP's kernel stack pointer */
 
-       leaq    SP(0),%rsp              /* %rsp now valid after frame */
-#ifdef SVS
-       movq    SVS_UTLS+UTLS_SCRATCH,%rax
-#else
-       movq    CPUVAR(SCRATCH),%r15
-#endif
+               /* Make stack look like an 'int nn' frame */
+               movq    $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS,%r15)     /* user %ss */
+               movq    %rsp,SP(TF_RSP,%r15)                            /* user %rsp */
+               movq    %r11,SP(TF_RFLAGS,%r15)                         /* user %rflags */
+               movq    $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS,%r15)     /* user %cs */
+               movq    %rcx,SP(TF_RIP,%r15)                            /* user %rip */
+               leaq    SP(0,%r15),%rsp                 /* %rsp now valid after frame */
+
+               movq    CPUVAR(SCRATCH),%r15
+       .endif
+
+#undef SP
 
        movq    $2,TF_ERR(%rsp)         /* syscall instruction size */
        movq    $T_ASTFLT,TF_TRAPNO(%rsp)
@@ -1406,9 +1425,18 @@
        movw    $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
        movw    $0,TF_FS(%rsp)
        movw    $0,TF_GS(%rsp)
-       SVS_ENTER
+       .if     \is_svs
+               SVS_ENTER
+       .endif
        jmp     handle_syscall
-IDTVEC_END(syscall)
+IDTVEC_END(\name)
+.endm
+
+SYSCALL_ENTRY  syscall,is_svs=0
+
+       TEXT_USER_BEGIN
+
+SYSCALL_ENTRY  syscall_svs,is_svs=1
 
 IDTVEC(syscall32)
        sysret          /* go away please */
@@ -1559,3 +1587,47 @@
 do_iret:
        iretq
 END(intrfastexit)
+
+       TEXT_USER_END
+
+#ifdef SVS
+       .globl  svs_enter
+       .globl  svs_enter_end
+       .globl  svs_enter_altstack
+       .globl  svs_enter_altstack_end
+       .globl  svs_leave
+       .globl  svs_leave_end
+       .globl  svs_leave_altstack
+       .globl  svs_leave_altstack_end
+
+LABEL(svs_enter)
+       movq    SVS_UTLS+UTLS_KPDIRPA,%rax
+       movq    %rax,%cr3
+       movq    CPUVAR(KRSP0),%rsp
+LABEL(svs_enter_end)
+
+LABEL(svs_enter_altstack)
+       testb   $SEL_UPL,TF_CS(%rsp)
+       jz      1234f
+       movq    SVS_UTLS+UTLS_KPDIRPA,%rax
+       movq    %rax,%cr3
+1234:
+LABEL(svs_enter_altstack_end)
+
+LABEL(svs_leave)
+       testb   $SEL_UPL,TF_CS(%rsp)
+       jz      1234f
+       movq    CPUVAR(URSP0),%rsp
+       movq    CPUVAR(UPDIRPA),%rax
+       movq    %rax,%cr3
+1234:
+LABEL(svs_leave_end)
+
+LABEL(svs_leave_altstack)
+       testb   $SEL_UPL,TF_CS(%rsp)
+       jz      1234f
+       movq    CPUVAR(UPDIRPA),%rax
+       movq    %rax,%cr3
+1234:
+LABEL(svs_leave_altstack_end)
+#endif
diff -r d4e6597ce4aa -r 31600211800f sys/arch/amd64/include/frameasm.h
--- a/sys/arch/amd64/include/frameasm.h Thu Feb 22 08:49:42 2018 +0000
+++ b/sys/arch/amd64/include/frameasm.h Thu Feb 22 08:56:51 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: frameasm.h,v 1.34 2018/01/27 18:27:08 maxv Exp $       */
+/*     $NetBSD: frameasm.h,v 1.35 2018/02/22 08:56:51 maxv Exp $       */
 
 #ifndef _AMD64_MACHINE_FRAMEASM_H
 #define _AMD64_MACHINE_FRAMEASM_H
@@ -40,6 +40,10 @@
 #define HP_NAME_STAC           2
 #define HP_NAME_NOLOCK         3
 #define HP_NAME_RETFENCE       4
+#define HP_NAME_SVS_ENTER      5
+#define HP_NAME_SVS_LEAVE      6
+#define HP_NAME_SVS_ENTER_ALT  7
+#define HP_NAME_SVS_LEAVE_ALT  8
 
 #define HOTPATCH(name, size) \
 123:                                           ; \
@@ -107,32 +111,30 @@
 #define UTLS_SCRATCH           8
 #define UTLS_RSP0              16
 
+#define SVS_ENTER_BYTES        22
 #define SVS_ENTER \
-       movq    SVS_UTLS+UTLS_KPDIRPA,%rax      ; \
-       movq    %rax,%cr3                       ; \
-       movq    CPUVAR(KRSP0),%rsp
+       HOTPATCH(HP_NAME_SVS_ENTER, SVS_ENTER_BYTES)    ; \
+       .byte 0xEB, (SVS_ENTER_BYTES-2) /* jmp */       ; \
+       .fill   (SVS_ENTER_BYTES-2),1,0xCC
 
+#define SVS_LEAVE_BYTES        31
 #define SVS_LEAVE \
-       testb   $SEL_UPL,TF_CS(%rsp)            ; \
-       jz      1234f                           ; \
-       movq    CPUVAR(URSP0),%rsp              ; \
-       movq    CPUVAR(UPDIRPA),%rax            ; \
-       movq    %rax,%cr3                       ; \
-1234:
+       HOTPATCH(HP_NAME_SVS_LEAVE, SVS_LEAVE_BYTES)    ; \
+       .byte 0xEB, (SVS_LEAVE_BYTES-2) /* jmp */       ; \
+       .fill   (SVS_LEAVE_BYTES-2),1,0xCC
 
+#define SVS_ENTER_ALT_BYTES    23
 #define SVS_ENTER_ALTSTACK \
-       testb   $SEL_UPL,TF_CS(%rsp)            ; \
-       jz      1234f                           ; \
-       movq    SVS_UTLS+UTLS_KPDIRPA,%rax      ; \
-       movq    %rax,%cr3                       ; \
-1234:
+       HOTPATCH(HP_NAME_SVS_ENTER_ALT, SVS_ENTER_ALT_BYTES)    ; \
+       .byte 0xEB, (SVS_ENTER_ALT_BYTES-2)     /* jmp */       ; \
+       .fill   (SVS_ENTER_ALT_BYTES-2),1,0xCC
 
+#define SVS_LEAVE_ALT_BYTES    22
 #define SVS_LEAVE_ALTSTACK \
-       testb   $SEL_UPL,TF_CS(%rsp)            ; \
-       jz      1234f                           ; \
-       movq    CPUVAR(UPDIRPA),%rax            ; \
-       movq    %rax,%cr3                       ; \
-1234:
+       HOTPATCH(HP_NAME_SVS_LEAVE_ALT, SVS_LEAVE_ALT_BYTES)    ; \
+       .byte 0xEB, (SVS_LEAVE_ALT_BYTES-2)     /* jmp */       ; \
+       .fill   (SVS_LEAVE_ALT_BYTES-2),1,0xCC
+
 #else
 #define SVS_ENTER      /* nothing */
 #define SVS_LEAVE      /* nothing */
diff -r d4e6597ce4aa -r 31600211800f sys/arch/x86/x86/cpu.c
--- a/sys/arch/x86/x86/cpu.c    Thu Feb 22 08:49:42 2018 +0000
+++ b/sys/arch/x86/x86/cpu.c    Thu Feb 22 08:56:51 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.c,v 1.147 2018/01/27 09:33:25 maxv Exp $   */
+/*     $NetBSD: cpu.c,v 1.148 2018/02/22 08:56:52 maxv Exp $   */
 
 /*
  * Copyright (c) 2000-2012 NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.147 2018/01/27 09:33:25 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.148 2018/02/22 08:56:52 maxv Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"                /* for MPDEBUG */
@@ -1090,7 +1090,7 @@
 
 #ifdef __x86_64__
 typedef void (vector)(void);
-extern vector Xsyscall, Xsyscall32;
+extern vector Xsyscall, Xsyscall32, Xsyscall_svs;
 #endif
 
 void
@@ -1104,6 +1104,11 @@
        wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32);
        wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC);
 
+#ifdef SVS
+       if (svs_enabled)
+               wrmsr(MSR_LSTAR, (uint64_t)Xsyscall_svs);
+#endif
+
        if (full) {
                wrmsr(MSR_FSBASE, 0);
                wrmsr(MSR_GSBASE, (uint64_t)ci);
diff -r d4e6597ce4aa -r 31600211800f sys/arch/x86/x86/patch.c
--- a/sys/arch/x86/x86/patch.c  Thu Feb 22 08:49:42 2018 +0000
+++ b/sys/arch/x86/x86/patch.c  Thu Feb 22 08:56:51 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: patch.c,v 1.31 2018/01/27 09:33:25 maxv Exp $  */
+/*     $NetBSD: patch.c,v 1.32 2018/02/22 08:56:52 maxv Exp $  */
 



Home | Main Index | Thread Index | Old Index