Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch Add a new option, SVS (for Separate Virtual Space), ...



details:   https://anonhg.NetBSD.org/src/rev/1c19e44e063d
branches:  trunk
changeset: 828854:1c19e44e063d
user:      maxv <maxv%NetBSD.org@localhost>
date:      Sun Jan 07 16:10:16 2018 +0000

description:
Add a new option, SVS (for Separate Virtual Space), that unmaps kernel
pages when running in userland. For now, only the PTE area is unmapped.

Sent on tech-kern@.

diffstat:

 sys/arch/amd64/amd64/amd64_trap.S |    7 +-
 sys/arch/amd64/amd64/genassym.cf  |    4 +-
 sys/arch/amd64/amd64/locore.S     |   13 +++-
 sys/arch/amd64/amd64/machdep.c    |  140 +++++++++++++++++++++++++++++++++++++-
 sys/arch/amd64/conf/GENERIC       |    5 +-
 sys/arch/amd64/conf/files.amd64   |    3 +-
 sys/arch/amd64/include/frameasm.h |   20 +++++-
 sys/arch/amd64/include/pmap.h     |    6 +-
 sys/arch/i386/conf/files.i386     |    3 +-
 sys/arch/x86/include/cpu.h        |   11 ++-
 sys/arch/x86/x86/cpu.c            |   13 ++-
 sys/arch/x86/x86/pmap.c           |   33 ++++++--
 12 files changed, 234 insertions(+), 24 deletions(-)

diffs (truncated from 614 to 300 lines):

diff -r f7693e1fcb3b -r 1c19e44e063d sys/arch/amd64/amd64/amd64_trap.S
--- a/sys/arch/amd64/amd64/amd64_trap.S Sun Jan 07 16:08:12 2018 +0000
+++ b/sys/arch/amd64/amd64/amd64_trap.S Sun Jan 07 16:10:16 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: amd64_trap.S,v 1.16 2018/01/07 12:42:46 maxv Exp $     */
+/*     $NetBSD: amd64_trap.S,v 1.17 2018/01/07 16:10:16 maxv Exp $     */
 
 /*
  * Copyright (c) 1998, 2007, 2008, 2017 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
 
 #if 0
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: amd64_trap.S,v 1.16 2018/01/07 12:42:46 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: amd64_trap.S,v 1.17 2018/01/07 16:10:16 maxv Exp $");
 #endif
 
 /*
@@ -120,6 +120,7 @@
 #else
        ZTRAP_NJ(T_NMI)
        subq    $TF_REGSIZE,%rsp
+       SVS_ENTER
        INTR_SAVE_GPRS
        cld
        SMAP_ENABLE
@@ -137,6 +138,7 @@
        movq    %rsp,%rdi
        incq    CPUVAR(NTRAP)
        call    _C_LABEL(nmitrap)
+       SVS_LEAVE
        swapgs
        jmp     .Lnmileave
 
@@ -144,6 +146,7 @@
        movq    %rsp,%rdi
        incq    CPUVAR(NTRAP)
        call    _C_LABEL(nmitrap)
+       SVS_LEAVE
 
 .Lnmileave:
        movw    TF_ES(%rsp),%es
diff -r f7693e1fcb3b -r 1c19e44e063d sys/arch/amd64/amd64/genassym.cf
--- a/sys/arch/amd64/amd64/genassym.cf  Sun Jan 07 16:08:12 2018 +0000
+++ b/sys/arch/amd64/amd64/genassym.cf  Sun Jan 07 16:10:16 2018 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: genassym.cf,v 1.64 2018/01/04 13:36:30 maxv Exp $
+#      $NetBSD: genassym.cf,v 1.65 2018/01/07 16:10:16 maxv Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -236,6 +236,8 @@
 define CPU_INFO_IDLELWP        offsetof(struct cpu_info, ci_data.cpu_idlelwp)
 define CPU_INFO_PMAP           offsetof(struct cpu_info, ci_pmap)
 define CPU_INFO_TSS            offsetof(struct cpu_info, ci_tss)
+define CPU_INFO_UPDIRPA        offsetof(struct cpu_info, ci_svs_updirpa)
+define CPU_INFO_KPDIRPA        offsetof(struct cpu_info, ci_svs_kpdirpa)
 define CPU_INFO_NSYSCALL       offsetof(struct cpu_info, ci_data.cpu_nsyscall)
 define CPU_INFO_NTRAP          offsetof(struct cpu_info, ci_data.cpu_ntrap)
 define CPU_INFO_NINTR          offsetof(struct cpu_info, ci_data.cpu_nintr)
diff -r f7693e1fcb3b -r 1c19e44e063d sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S     Sun Jan 07 16:08:12 2018 +0000
+++ b/sys/arch/amd64/amd64/locore.S     Sun Jan 07 16:10:16 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.144 2018/01/04 13:36:30 maxv Exp $        */
+/*     $NetBSD: locore.S,v 1.145 2018/01/07 16:10:16 maxv Exp $        */
 
 /*
  * Copyright-o-rama!
@@ -159,6 +159,7 @@
 #include "opt_compat_netbsd.h"
 #include "opt_compat_netbsd32.h"
 #include "opt_xen.h"
+#include "opt_svs.h"
 
 #include "assym.h"
 #include "lapic.h"
@@ -1088,6 +1089,12 @@
        movq    %rbp,PCB_RBP(%rax)
 .Lskip_save:
 
+#ifdef SVS
+       pushq   %rdx
+       callq   _C_LABEL(svs_lwp_switch)
+       popq    %rdx
+#endif
+
        /* Switch to newlwp's stack. */
        movq    L_PCB(%r12),%r14
        movq    PCB_RSP(%r14),%rsp
@@ -1288,6 +1295,7 @@
        subq    $TF_REGSIZE,%rsp
        cld
 #endif
+       SVS_ENTER
        INTR_SAVE_GPRS
        movw    $GSEL(GUDATA_SEL, SEL_UPL),TF_DS(%rsp)
        movw    $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
@@ -1332,6 +1340,7 @@
        jnz     intrfastexit
 
        INTR_RESTORE_GPRS
+       SVS_LEAVE
        SWAPGS
 #ifndef XEN
        movq    TF_RIP(%rsp),%rcx       /* %rip for sysret */
@@ -1494,6 +1503,7 @@
        movw    TF_DS(%rsp),%ds
 do_mov_fs:
        movw    TF_FS(%rsp),%fs
+       SVS_LEAVE
        SWAPGS
 #ifndef XEN
 do_mov_gs:
@@ -1503,6 +1513,7 @@
 
 .Luexit64:
        NOT_XEN(cli;)
+       SVS_LEAVE
        SWAPGS
 
 .Lkexit:
diff -r f7693e1fcb3b -r 1c19e44e063d sys/arch/amd64/amd64/machdep.c
--- a/sys/arch/amd64/amd64/machdep.c    Sun Jan 07 16:08:12 2018 +0000
+++ b/sys/arch/amd64/amd64/machdep.c    Sun Jan 07 16:10:16 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: machdep.c,v 1.284 2018/01/05 08:04:20 maxv Exp $       */
+/*     $NetBSD: machdep.c,v 1.285 2018/01/07 16:10:16 maxv Exp $       */
 
 /*
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -110,7 +110,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.284 2018/01/05 08:04:20 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.285 2018/01/07 16:10:16 maxv Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -123,6 +123,7 @@
 #include "opt_realmem.h"
 #include "opt_xen.h"
 #include "opt_kaslr.h"
+#include "opt_svs.h"
 #ifndef XEN
 #include "opt_physmem.h"
 #endif
@@ -2228,3 +2229,138 @@
        return true;
 }
 #endif
+
+/* -------------------------------------------------------------------------- */
+
+#ifdef SVS
+/*
+ * Separate Virtual Space
+ *
+ * A per-cpu L4 page is maintained in ci_svs_updirpa. During each context
+ * switch to a user pmap, updirpa is populated with the entries of the new
+ * pmap, minus what we don't want to have mapped in userland.
+ *
+ * Note on locking/synchronization here:
+ *
+ * (a) Touching ci_svs_updir without holding ci_svs_mtx first is *not*
+ *     allowed.
+ *
+ * (b) pm_kernel_cpus contains the set of CPUs that have the pmap loaded
+ *     in their CR3 register. It must *not* be replaced by pm_cpus.
+ *
+ * (c) When a context switch on the current CPU is made from a user LWP
+ *     towards a kernel LWP, CR3 is not updated. Therefore, the pmap's
+ *     pm_kernel_cpus still contains the current CPU. It implies that the
+ *     remote CPUs that execute other threads of the user process we just
+ *     left will keep synchronizing us against their changes.
+ *
+ * TODO: for now, only PMAP_SLOT_PTE is unmapped.
+ */
+
+void
+cpu_svs_init(struct cpu_info *ci)
+{
+       struct vm_page *pg;
+
+       KASSERT(ci != NULL);
+
+       pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+       if (pg == 0)
+               panic("%s: failed to allocate L4 PA for CPU %d\n",
+                       __func__, cpu_index(ci));
+       ci->ci_svs_updirpa = VM_PAGE_TO_PHYS(pg);
+
+       ci->ci_svs_updir = (pt_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+               UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
+       if (ci->ci_svs_updir == NULL)
+               panic("%s: failed to allocate L4 VA for CPU %d\n",
+                       __func__, cpu_index(ci));
+
+       pmap_kenter_pa((vaddr_t)ci->ci_svs_updir, ci->ci_svs_updirpa,
+               VM_PROT_READ | VM_PROT_WRITE, 0);
+
+       pmap_update(pmap_kernel());
+
+       ci->ci_svs_kpdirpa = pmap_pdirpa(pmap_kernel(), 0);
+
+       mutex_init(&ci->ci_svs_mtx, MUTEX_DEFAULT, IPL_VM);
+}
+
+void
+svs_pmap_sync(struct pmap *pmap, int index)
+{
+       CPU_INFO_ITERATOR cii;
+       struct cpu_info *ci;
+       cpuid_t cid;
+
+       KASSERT(pmap != NULL);
+       KASSERT(pmap != pmap_kernel());
+       KASSERT(mutex_owned(pmap->pm_lock));
+       KASSERT(kpreempt_disabled());
+       KASSERT(index <= 255);
+
+       for (CPU_INFO_FOREACH(cii, ci)) {
+               cid = cpu_index(ci);
+
+               if (!kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
+                       continue;
+               }
+
+               /* take the lock and check again */
+               mutex_enter(&ci->ci_svs_mtx);
+               if (kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
+                       ci->ci_svs_updir[index] = pmap->pm_pdir[index];
+               }
+               mutex_exit(&ci->ci_svs_mtx);
+       }
+}
+
+void
+svs_lwp_switch(struct lwp *oldlwp, struct lwp *newlwp)
+{
+       /* Switch rsp0 */
+}
+
+static inline pt_entry_t
+svs_pte_atomic_read(struct pmap *pmap, size_t idx)
+{
+       /*
+        * XXX: We don't have a basic atomic_fetch_64 function?
+        */
+       return atomic_cas_64(&pmap->pm_pdir[idx], 666, 666);
+}
+
+/*
+ * We may come here with the pmap unlocked. So read its PTEs atomically. If
+ * a remote CPU is updating them at the same time, it's not that bad: the
+ * remote CPU will call svs_pmap_sync afterwards, and our updirpa will be
+ * synchronized properly.
+ */
+void
+svs_pdir_switch(struct pmap *pmap)
+{
+       struct cpu_info *ci = curcpu();
+       pt_entry_t pte;
+       size_t i;
+
+       KASSERT(kpreempt_disabled());
+       KASSERT(pmap != pmap_kernel());
+
+       ci->ci_svs_kpdirpa = pmap_pdirpa(pmap, 0);
+
+       mutex_enter(&ci->ci_svs_mtx);
+
+       for (i = 0; i < 512; i++) {
+               if (i == PDIR_SLOT_PTE) {
+                       /* We don't want to have this mapped. */
+                       ci->ci_svs_updir[i] = 0;
+               } else {
+                       pte = svs_pte_atomic_read(pmap, i);
+                       ci->ci_svs_updir[i] = pte;
+               }
+       }
+
+       mutex_exit(&ci->ci_svs_mtx);
+}
+#endif
+
diff -r f7693e1fcb3b -r 1c19e44e063d sys/arch/amd64/conf/GENERIC
--- a/sys/arch/amd64/conf/GENERIC       Sun Jan 07 16:08:12 2018 +0000
+++ b/sys/arch/amd64/conf/GENERIC       Sun Jan 07 16:10:16 2018 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: GENERIC,v 1.476 2017/12/31 03:38:06 christos Exp $
+# $NetBSD: GENERIC,v 1.477 2018/01/07 16:10:16 maxv Exp $
 #
 # GENERIC machine description file
 #
@@ -22,7 +22,7 @@
 
 options        INCLUDE_CONFIG_FILE     # embed config file in kernel binary
 



Home | Main Index | Thread Index | Old Index