Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/amd64 Improve our segregs model. Pass 1/3.



details:   https://anonhg.NetBSD.org/src/rev/af3a971b062e
branches:  trunk
changeset: 827193:af3a971b062e
user:      maxv <maxv%NetBSD.org@localhost>
date:      Thu Oct 19 18:36:31 2017 +0000

description:
Improve our segregs model. Pass 1/3.

Right now, we are saving and restoring %ds/%es each time we enter/leave the
kernel. However, we let %fs/%gs live in the kernel space, and we rely on
the fact that when switching to an LWP, %fs/%gs are set right away (via
cpu_switchto or setregs).

It has two drawbacks: we are taking care of %ds/%es while they are
deprecated (useless) on 64bit LWPs, and we are restricting %fs/%gs while
they still have a meaning on 32bit LWPs.

Therefore, handle 32bit and 64bit LWPs differently:
 * 64bit LWPs use fixed segregs, which are not taken care of.
 * 32bit LWPs have dynamic segregs, always saved/restored.

For now, only %ds and %es are changed; %fs and %gs will be in the next
passes.

The trapframe is constructed as usual. In INTRFASTEXIT, we restore %ds/%es
depending on the %cs value. If %cs contains one of the two standard 64bit
selectors, don't do anything. Otherwise, restore everything.

When doing a context switch, just restore %ds/%es to their default values.
On a 32bit LWP they will be overwritten by INTRFASTEXIT; on a 64bit LWP
they won't be updated.

In the ACPI wakeup code, restore %ds/%es to the default 64bit user value.

diffstat:

 sys/arch/amd64/acpi/acpi_wakeup_low.S |  11 +++++--
 sys/arch/amd64/amd64/locore.S         |  47 +++++++++++++++++++++++-----------
 sys/arch/amd64/amd64/machdep.c        |  14 ++++++++--
 3 files changed, 50 insertions(+), 22 deletions(-)

diffs (185 lines):

diff -r a8c2469d1ac9 -r af3a971b062e sys/arch/amd64/acpi/acpi_wakeup_low.S
--- a/sys/arch/amd64/acpi/acpi_wakeup_low.S     Thu Oct 19 16:01:58 2017 +0000
+++ b/sys/arch/amd64/acpi/acpi_wakeup_low.S     Thu Oct 19 18:36:31 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: acpi_wakeup_low.S,v 1.6 2017/09/23 10:18:49 maxv Exp $ */
+/*     $NetBSD: acpi_wakeup_low.S,v 1.7 2017/10/19 18:36:31 maxv Exp $ */
 
 /*-
  * Copyright (c) 2007 Joerg Sonnenberger <joerg%netbsd.org@localhost>
@@ -40,12 +40,17 @@
        lgdt    ACPI_SUSPEND_GDT(%r8)
 
        /* Reload fixed descriptors for new GDT */
-       movw    $GSEL(GDATA_SEL, SEL_KPL),%ax
+       movw    $GSEL(GUDATA_SEL, SEL_UPL),%ax
        movw    %ax,%ds
        movw    %ax,%es
+       movw    $GSEL(GDATA_SEL, SEL_KPL),%ax
        movw    %ax,%ss
 
-       /* FS and GS are driven by MSRs, so use NULL for them */
+       /*
+        * FS and GS are driven by MSRs, so use NULL for them.
+        * XXX XXX XXX That's not the case if we're returning to a 32bit
+        * LWP!
+        */
        xorw    %ax,%ax
        movw    %ax,%fs
        movw    %ax,%gs
diff -r a8c2469d1ac9 -r af3a971b062e sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S     Thu Oct 19 16:01:58 2017 +0000
+++ b/sys/arch/amd64/amd64/locore.S     Thu Oct 19 18:36:31 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.133 2017/10/17 07:48:10 maxv Exp $        */
+/*     $NetBSD: locore.S,v 1.134 2017/10/19 18:36:31 maxv Exp $        */
 
 /*
  * Copyright-o-rama!
@@ -1159,7 +1159,10 @@
        jnz     lwp_32bit
 
 lwp_64bit:
-       /* Zero out %fs/%gs registers. */
+       /* Set default 64bit values in %ds, %es, %fs and %gs. */
+       movq    $GSEL(GUDATA_SEL, SEL_UPL),%rax
+       movw    %ax,%ds
+       movw    %ax,%es
        xorq    %rax,%rax
        movw    %ax,%fs
        CLI(cx)
@@ -1193,8 +1196,11 @@
        movq    PCB_GS(%r14),%rax
        movq    %rax,(GUGS_SEL*8)(%rcx)
 
-       /* Reload %fs and %gs */
+       /* Set default 32bit values in %ds, %es. %fs and %gs are special. */
        movq    L_MD_REGS(%r12),%rbx
+       movq    $GSEL(GUDATA32_SEL, SEL_UPL),%rax
+       movw    %ax,%ds
+       movw    %ax,%es
        movw    TF_FS(%rbx),%fs
        CLI(ax)
        SWAPGS
@@ -1281,10 +1287,10 @@
        cld
 #endif
        INTR_SAVE_GPRS
-       movw    %es,TF_ES(%rsp)
-       movw    %fs,TF_FS(%rsp)
-       movw    %gs,TF_GS(%rsp)
-       movw    $(GSEL(GUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
+       movw    $GSEL(GUDATA_SEL, SEL_UPL),TF_DS(%rsp)
+       movw    $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
+       movw    $0,TF_FS(%rsp)
+       movw    $0,TF_GS(%rsp)
        STI(si)
 
 do_syscall:
@@ -1313,18 +1319,18 @@
 #endif
 
        /*
-        * If the syscall might have modified some registers, or we are a 32bit
-        * process we must return to user with an 'iret' instruction.
-        * If the iret faults in kernel (assumed due to illegal register values)
-        * then a SIGSEGV will be signalled.
+        * Decide if we need to take a slow path. That's the case when we
+        * want to reload %cs and %ss on a 64bit LWP (MDL_IRET set), or when
+        * we're returning to a 32bit LWP (MDL_COMPAT32 set).
+        *
+        * In either case, we jump into intrfastexit and return to userland
+        * with the iret instruction.
         */
        testl   $(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14)
+       jnz     intrfastexit
+
        INTR_RESTORE_GPRS
-       movw    TF_ES(%rsp),%es
-       movw    TF_DS(%rsp),%ds
        SWAPGS
-       jnz     .Lkexit
-
 #ifndef XEN
        movq    TF_RIP(%rsp),%rcx       /* %rip for sysret */
        movq    TF_RFLAGS(%rsp),%r11    /* %flags for sysret */
@@ -1469,12 +1475,21 @@
        INTR_RESTORE_GPRS
        testq   $SEL_UPL,TF_CS(%rsp)    /* interrupted %cs */
        jz      .Lkexit
+       cmpq    $LSEL(LUCODE_SEL, SEL_UPL),TF_CS(%rsp)
+       je      .Luexit64
+       cmpq    $GSEL(GUCODE_SEL, SEL_UPL),TF_CS(%rsp)
+       je      .Luexit64
 
-       /* Disable interrupts until the 'iret', user registers loaded. */
+.Luexit32:
        NOT_XEN(cli;)
        movw    TF_ES(%rsp),%es
        movw    TF_DS(%rsp),%ds
        SWAPGS
+       jmp     .Lkexit
+
+.Luexit64:
+       NOT_XEN(cli;)
+       SWAPGS
 
 .Lkexit:
        addq    $TF_REGSIZE+16,%rsp     /* + T_xxx and error code */
diff -r a8c2469d1ac9 -r af3a971b062e sys/arch/amd64/amd64/machdep.c
--- a/sys/arch/amd64/amd64/machdep.c    Thu Oct 19 16:01:58 2017 +0000
+++ b/sys/arch/amd64/amd64/machdep.c    Thu Oct 19 18:36:31 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: machdep.c,v 1.269 2017/10/19 10:01:09 maxv Exp $       */
+/*     $NetBSD: machdep.c,v 1.270 2017/10/19 18:36:31 maxv Exp $       */
 
 /*
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -110,7 +110,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.269 2017/10/19 10:01:09 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.270 2017/10/19 18:36:31 maxv Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -447,15 +447,19 @@
                HYPERVISOR_fpu_taskswitch(1);
        }
 
-       /* Update TLS segment pointers */
+       /* Update segment registers */
        if (pcb->pcb_flags & PCB_COMPAT32) {
                update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs);
                update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs);
+               setds(GSEL(GUDATA32_SEL, SEL_UPL));
+               setes(GSEL(GUDATA32_SEL, SEL_UPL));
                setfs(tf->tf_fs);
                HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs);
        } else {
                update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero);
                update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero);
+               setds(GSEL(GUDATA_SEL, SEL_UPL));
+               setes(GSEL(GUDATA_SEL, SEL_UPL));
                setfs(0);
                HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0);
                HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs);
@@ -2063,6 +2067,8 @@
        kpreempt_disable();
        tf->tf_fs = 0;
        tf->tf_gs = 0;
+       setds(GSEL(GUDATA_SEL, SEL_UPL));
+       setes(GSEL(GUDATA_SEL, SEL_UPL));
        setfs(0);
        setusergs(0);
 
@@ -2100,6 +2106,8 @@
        kpreempt_disable();
        tf->tf_fs = 0;
        tf->tf_gs = 0;
+       setds(GSEL(GUDATA32_SEL, SEL_UPL));
+       setes(GSEL(GUDATA32_SEL, SEL_UPL));
        setfs(0);
        setusergs(0);
        pcb->pcb_fs = 0;



Home | Main Index | Thread Index | Old Index