[src/trunk]: src/sys Another locking issue in NVMM: the {svm, vmx}_tlb_flush f...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/sys Another locking issue in NVMM: the {svm, vmx}_tlb_flush f...
From: maxv <maxv%NetBSD.org@localhost>
Date: Thu, 21 Feb 2019 14:59:42 +0000
details:   https://anonhg.NetBSD.org/src/rev/a626767ad19a
branches:  trunk
changeset: 449080:a626767ad19a
user:      maxv <maxv%NetBSD.org@localhost>
date:      Thu Feb 21 12:17:52 2019 +0000

description:
Another locking issue in NVMM: the {svm,vmx}_tlb_flush functions take VCPU
mutexes which can sleep, but their context does not allow it.

Rewrite the TLB handling code to fix that. It becomes a bit complex. In
short, we use a per-VM generation number, which we increase on each TLB
flush, before sending a broadcast IPI to everybody. The IPIs cause a
#VMEXIT of each VCPU, and each VCPU Loop will synchronize the per-VM gen
with a per-VCPU copy, and apply the flushes as neededi lazily.

The behavior differs between AMD and Intel; in short, on Intel we don't
flush the hTLB (EPT cache) if a context switch of a VCPU occurs, so now,
we need to maintain a kcpuset to know which VCPU's hTLBs are active on
which hCPU. This creates some redundancy on Intel, ie there are cases
where we flush the hTLB several times unnecessarily; but hTLB flushes are
very rare, so there is no real performance regression.

The thing is lock-less and non-blocking, so it solves our problem.

diffstat:

 sys/arch/x86/x86/x86_tlb.c      |   5 +-
 sys/dev/nvmm/x86/nvmm_x86_svm.c |  75 ++++++++++++++++++++++++------
 sys/dev/nvmm/x86/nvmm_x86_vmx.c |  99 +++++++++++++++++++++++++++-------------
 3 files changed, 128 insertions(+), 51 deletions(-)

diffs (truncated from 398 to 300 lines):

diff -r 16f17060507c -r a626767ad19a sys/arch/x86/x86/x86_tlb.c
--- a/sys/arch/x86/x86/x86_tlb.c        Thu Feb 21 11:58:04 2019 +0000
+++ b/sys/arch/x86/x86/x86_tlb.c        Thu Feb 21 12:17:52 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: x86_tlb.c,v 1.5 2019/02/11 14:59:33 cherry Exp $       */
+/*     $NetBSD: x86_tlb.c,v 1.6 2019/02/21 12:17:52 maxv Exp $ */
 
 /*-
  * Copyright (c) 2008-2012 The NetBSD Foundation, Inc.
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.5 2019/02/11 14:59:33 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.6 2019/02/21 12:17:52 maxv Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -229,6 +229,7 @@
 
        if (__predict_false(pm->pm_tlb_flush != NULL)) {
                (*pm->pm_tlb_flush)(pm);
+               return;
        }
 
        /*
diff -r 16f17060507c -r a626767ad19a sys/dev/nvmm/x86/nvmm_x86_svm.c
--- a/sys/dev/nvmm/x86/nvmm_x86_svm.c   Thu Feb 21 11:58:04 2019 +0000
+++ b/sys/dev/nvmm/x86/nvmm_x86_svm.c   Thu Feb 21 12:17:52 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: nvmm_x86_svm.c,v 1.28 2019/02/21 11:58:04 maxv Exp $   */
+/*     $NetBSD: nvmm_x86_svm.c,v 1.29 2019/02/21 12:17:52 maxv Exp $   */
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.28 2019/02/21 11:58:04 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.29 2019/02/21 12:17:52 maxv Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -493,6 +493,7 @@
 struct svm_machdata {
        bool cpuidpresent[SVM_NCPUIDS];
        struct nvmm_x86_conf_cpuid cpuid[SVM_NCPUIDS];
+       volatile uint64_t mach_htlb_gen;
 };
 
 static const size_t svm_conf_sizes[NVMM_X86_NCONF] = {
@@ -503,6 +504,7 @@
        /* General */
        bool shared_asid;
        bool gtlb_want_flush;
+       uint64_t vcpu_htlb_gen;
 
        /* VMCB */
        struct vmcb *vmcb;
@@ -1101,6 +1103,8 @@
        svm_inject_gp(mach, vcpu);
 }
 
+/* -------------------------------------------------------------------------- */
+
 static void
 svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu)
 {
@@ -1197,18 +1201,57 @@
        }
 }
 
+static inline void
+svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu)
+{
+       /*
+        * Nothing to do. If an hTLB flush was needed, either the VCPU was
+        * executing on this hCPU and the hTLB already got flushed, or it
+        * was executing on another hCPU in which case the catchup is done
+        * in svm_gtlb_catchup().
+        */
+}
+
+static inline uint64_t
+svm_htlb_flush(struct svm_machdata *machdata, struct svm_cpudata *cpudata)
+{
+       struct vmcb *vmcb = cpudata->vmcb;
+       uint64_t machgen;
+
+       machgen = machdata->mach_htlb_gen;
+       if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) {
+               return machgen;
+       }
+
+       vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush;
+       return machgen;
+}
+
+static inline void
+svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen)
+{
+       struct vmcb *vmcb = cpudata->vmcb;
+
+       if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) {
+               cpudata->vcpu_htlb_gen = machgen;
+       }
+}
+
 static int
 svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
     struct nvmm_exit *exit)
 {
+       struct svm_machdata *machdata = mach->machdata;
        struct svm_cpudata *cpudata = vcpu->cpudata;
        struct vmcb *vmcb = cpudata->vmcb;
+       uint64_t machgen;
        int hcpu, s;
 
        kpreempt_disable();
        hcpu = cpu_number();
 
        svm_gtlb_catchup(vcpu, hcpu);
+       svm_htlb_catchup(vcpu, hcpu);
 
        if (vcpu->hcpu_last != hcpu) {
                vmcb->ctrl.tsc_offset = cpudata->tsc_offset +
@@ -1227,9 +1270,11 @@
                }
 
                s = splhigh();
+               machgen = svm_htlb_flush(machdata, cpudata);
                svm_vcpu_guest_fpu_enter(vcpu);
                svm_vmrun(cpudata->vmcb_pa, cpudata->gprs);
                svm_vcpu_guest_fpu_leave(vcpu);
+               svm_htlb_flush_ack(cpudata, machgen);
                splx(s);
 
                svm_vmcb_cache_default(vmcb);
@@ -1982,30 +2027,28 @@
 svm_tlb_flush(struct pmap *pm)
 {
        struct nvmm_machine *mach = pm->pm_data;
-       struct svm_cpudata *cpudata;
-       struct nvmm_cpu *vcpu;
-       int error;
-       size_t i;
+       struct svm_machdata *machdata = mach->machdata;
 
-       /* Request TLB flushes. */
-       for (i = 0; i < NVMM_MAX_VCPUS; i++) {
-               error = nvmm_vcpu_get(mach, i, &vcpu);
-               if (error)
-                       continue;
-               cpudata = vcpu->cpudata;
-               cpudata->gtlb_want_flush = true;
-               nvmm_vcpu_put(vcpu);
-       }
+       atomic_inc_64(&machdata->mach_htlb_gen);
+
+       /* Generates IPIs, which cause #VMEXITs. */
+       pmap_tlb_shootdown(pmap_kernel(), -1, PG_G, TLBSHOOT_UPDATE);
 }
 
 static void
 svm_machine_create(struct nvmm_machine *mach)
 {
+       struct svm_machdata *machdata;
+
        /* Fill in pmap info. */
        mach->vm->vm_map.pmap->pm_data = (void *)mach;
        mach->vm->vm_map.pmap->pm_tlb_flush = svm_tlb_flush;
 
-       mach->machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP);
+       machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP);
+       mach->machdata = machdata;
+
+       /* Start with an hTLB flush everywhere. */
+       machdata->mach_htlb_gen = 1;
 }
 
 static void
diff -r 16f17060507c -r a626767ad19a sys/dev/nvmm/x86/nvmm_x86_vmx.c
--- a/sys/dev/nvmm/x86/nvmm_x86_vmx.c   Thu Feb 21 11:58:04 2019 +0000
+++ b/sys/dev/nvmm/x86/nvmm_x86_vmx.c   Thu Feb 21 12:17:52 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: nvmm_x86_vmx.c,v 1.8 2019/02/21 11:58:04 maxv Exp $    */
+/*     $NetBSD: nvmm_x86_vmx.c,v 1.9 2019/02/21 12:17:52 maxv Exp $    */
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.8 2019/02/21 11:58:04 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.9 2019/02/21 12:17:52 maxv Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -627,7 +627,7 @@
 struct vmx_machdata {
        bool cpuidpresent[VMX_NCPUIDS];
        struct nvmm_x86_conf_cpuid cpuid[VMX_NCPUIDS];
-       kcpuset_t *ept_want_flush;
+       volatile uint64_t mach_htlb_gen;
 };
 
 static const size_t vmx_conf_sizes[NVMM_X86_NCONF] = {
@@ -638,6 +638,8 @@
        /* General */
        uint64_t asid;
        bool gtlb_want_flush;
+       uint64_t vcpu_htlb_gen;
+       kcpuset_t *htlb_want_flush;
 
        /* VMCS */
        struct vmcs *vmcs;
@@ -1510,6 +1512,8 @@
        exit->u.mem.inst_len = 0;
 }
 
+/* -------------------------------------------------------------------------- */
+
 static void
 vmx_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu)
 {
@@ -1601,7 +1605,7 @@
        wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase);
 }
 
-/* --------------------------------------------------------------------- */
+/* -------------------------------------------------------------------------- */
 
 #define VMX_INVVPID_ADDRESS            0
 #define VMX_INVVPID_CONTEXT            1
@@ -1621,6 +1625,49 @@
        }
 }
 
+static inline void
+vmx_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu)
+{
+       struct vmx_cpudata *cpudata = vcpu->cpudata;
+       struct ept_desc ept_desc;
+
+       if (__predict_true(!kcpuset_isset(cpudata->htlb_want_flush, hcpu))) {
+               return;
+       }
+
+       vmx_vmread(VMCS_EPTP, &ept_desc.eptp);
+       ept_desc.mbz = 0;
+       vmx_invept(vmx_ept_flush_op, &ept_desc);
+       kcpuset_clear(cpudata->htlb_want_flush, hcpu);
+}
+
+static inline uint64_t
+vmx_htlb_flush(struct vmx_machdata *machdata, struct vmx_cpudata *cpudata)
+{
+       struct ept_desc ept_desc;
+       uint64_t machgen;
+
+       machgen = machdata->mach_htlb_gen;
+       if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) {
+               return machgen;
+       }
+
+       kcpuset_copy(cpudata->htlb_want_flush, kcpuset_running);
+
+       vmx_vmread(VMCS_EPTP, &ept_desc.eptp);
+       ept_desc.mbz = 0;
+       vmx_invept(vmx_ept_flush_op, &ept_desc);
+
+       return machgen;
+}
+
+static inline void
+vmx_htlb_flush_ack(struct vmx_cpudata *cpudata, uint64_t machgen)
+{
+       cpudata->vcpu_htlb_gen = machgen;
+       kcpuset_clear(cpudata->htlb_want_flush, cpu_number());
+}
+
 static int
 vmx_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
     struct nvmm_exit *exit)
@@ -1628,10 +1675,10 @@
        struct vmx_machdata *machdata = mach->machdata;
        struct vmx_cpudata *cpudata = vcpu->cpudata;
        struct vpid_desc vpid_desc;
-       struct ept_desc ept_desc;
        struct cpu_info *ci;
        uint64_t exitcode;
        uint64_t intstate;
+       uint64_t machgen;
        int hcpu, s, ret;
        bool launched = false;
 
@@ -1640,13 +1687,7 @@
        hcpu = cpu_number();
Prev by Date: [src/trunk]: src/sys/dev/nvmm/x86 Clarify the gTLB code a little.
Next by Date: [src/trunk]: src/sys/dev/nvmm/x86 Reorder the detection in vmx_ident(), to fi...
Previous by Thread: [src/trunk]: src/sys/dev/nvmm/x86 Clarify the gTLB code a little.
Next by Thread: [src/trunk]: src/sys/dev/nvmm/x86 Reorder the detection in vmx_ident(), to fi...
Indexes:
Home | Main Index | Thread Index | Old Index