Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src Modify the communication layer between the kernel NVMM drive...



details:   https://anonhg.NetBSD.org/src/rev/7662533bca8f
branches:  trunk
changeset: 450896:7662533bca8f
user:      maxv <maxv%NetBSD.org@localhost>
date:      Sun Apr 28 14:22:13 2019 +0000

description:
Modify the communication layer between the kernel NVMM driver and libnvmm:
introduce a bidirectionnal "comm page", a page of memory shared between
the kernel and userland, and used to transfer data in and out in a more
performant manner than ioctls.

The comm page contains the VCPU state, plus three flags:

 - "wanted": the states the kernel must get/set when requested via ioctls
 - "cached": the states that are in the comm page
 - "commit": the states the kernel must set in vcpu_run

The idea is to avoid performing expensive syscalls, by using the VCPU
state cached, either explicitly or speculatively, in the comm page. For
example, if the state is cached we do a direct 1->5 with no syscall:

          +---------------------------------------------+
          |                    Qemu                     |
          +---------------------------------------------+
               |                                   ^
               | (0) nvmm_vcpu_getstate            | (6) Done
               |                                   |
               V                                   |
             +---------------------------------------+
             |                libnvmm                |
             +---------------------------------------+
                  |   ^          |               ^
        (1) State |   | (2) No   | (3) Ioctl:    | (5) Ok, state
        cached?   |   |          | "please cache | fetched
                  |   |          |  the state"   |
                  V   |          |               |
              +-----------+      |               |
              | Comm Page |------+---------------+
              +-----------+      |
                       ^         |
          (4) "Alright |         V
               babe"   |     +--------+
                       +-----| Kernel |
                             +--------+

The main changes in behavior are:

 - nvmm_vcpu_getstate(): won't emit a syscall if the state is already
   cached in the comm page, will just fetch from the comm page directly
 - nvmm_vcpu_setstate(): won't emit a syscall at all, will just cache
   the wanted state in the comm page
 - nvmm_vcpu_run(): will commit the to-be-set state in the comm page,
   as previously requested by nvmm_vcpu_setstate()

In addition to this, the kernel NVMM driver is changed to speculatively
cache certain states known to be of interest, so that the future
nvmm_vcpu_getstate() calls libnvmm or the emulator will perform will use
the comm page rather than expensive syscalls. For example, if an I/O
VMEXIT occurs, the I/O Assist in libnvmm will want GPRS+SEGS+CRS+MSRS,
and now the kernel caches all of that in the comm page before returning
to userland.

Overall, in a normal run of Windows 10, this saves several millions of
syscalls. Eg on a 4CPU Intel with 4VCPUs, booting the Win10 install ISO
goes from taking 1min35 to taking 1min16.

The libnvmm API is not changed, but the ABI is. If we changed the API it
would be possible to save expensive memcpys on libnvmm's side. This will
be avoided in a future version. The comm page can also be extended to
implement future services.

diffstat:

 lib/libnvmm/Makefile            |   4 +-
 lib/libnvmm/libnvmm.c           |  74 +++++++++++++++++++++++++++++-------
 lib/libnvmm/libnvmm_x86.c       |  35 +++++++++++++++--
 lib/libnvmm/nvmm.h              |   4 +-
 sys/dev/nvmm/nvmm.c             |  82 ++++++++++++++++++++++++++++++++++-------
 sys/dev/nvmm/nvmm.h             |  15 +++++++-
 sys/dev/nvmm/nvmm_internal.h    |  13 ++++--
 sys/dev/nvmm/nvmm_ioctl.h       |   6 +--
 sys/dev/nvmm/x86/nvmm_x86.h     |   9 ++++-
 sys/dev/nvmm/x86/nvmm_x86_svm.c |  64 ++++++++++++++++++++++++++++---
 sys/dev/nvmm/x86/nvmm_x86_vmx.c |  66 +++++++++++++++++++++++++++++----
 11 files changed, 309 insertions(+), 63 deletions(-)

diffs (truncated from 869 to 300 lines):

diff -r d133e96eaf17 -r 7662533bca8f lib/libnvmm/Makefile
--- a/lib/libnvmm/Makefile      Sun Apr 28 07:33:02 2019 +0000
+++ b/lib/libnvmm/Makefile      Sun Apr 28 14:22:13 2019 +0000
@@ -1,11 +1,11 @@
-# $NetBSD: Makefile,v 1.5 2018/11/13 09:24:37 martin Exp $
+# $NetBSD: Makefile,v 1.6 2019/04/28 14:22:13 maxv Exp $
 
 .include <bsd.own.mk>
 
 LIB=           nvmm
 MAN=           libnvmm.3
 
-SRCS=          libnvmm.c libnvmm_x86.c
+SRCS=          libnvmm.c
 
 INCS=          nvmm.h
 INCSDIR=       /usr/include
diff -r d133e96eaf17 -r 7662533bca8f lib/libnvmm/libnvmm.c
--- a/lib/libnvmm/libnvmm.c     Sun Apr 28 07:33:02 2019 +0000
+++ b/lib/libnvmm/libnvmm.c     Sun Apr 28 14:22:13 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: libnvmm.c,v 1.9 2019/04/10 18:49:04 maxv Exp $ */
+/*     $NetBSD: libnvmm.c,v 1.10 2019/04/28 14:22:13 maxv Exp $        */
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -40,10 +40,16 @@
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/queue.h>
+#include <machine/vmparam.h>
 
 #include "nvmm.h"
 
-struct nvmm_callbacks __callbacks;
+static struct nvmm_callbacks __callbacks;
+static struct nvmm_capability __capability;
+
+#ifdef __x86_64__
+#include "libnvmm_x86.c"
+#endif
 
 typedef struct __area {
        LIST_ENTRY(__area) list;
@@ -159,6 +165,11 @@
        nvmm_fd = open("/dev/nvmm", O_RDWR);
        if (nvmm_fd == -1)
                return -1;
+       if (nvmm_capability(&__capability) == -1) {
+               close(nvmm_fd);
+               nvmm_fd = -1;
+               return -1;
+       }
        return 0;
 }
 
@@ -185,6 +196,7 @@
 nvmm_machine_create(struct nvmm_machine *mach)
 {
        struct nvmm_ioc_machine_create args;
+       struct nvmm_comm_page **pages;
        area_list_t *areas;
        int ret;
 
@@ -196,16 +208,25 @@
        if (areas == NULL)
                return -1;
 
+       pages = calloc(__capability.max_vcpus, sizeof(*pages));
+       if (pages == NULL) {
+               free(areas);
+               return -1;
+       }
+
        ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CREATE, &args);
        if (ret == -1) {
                free(areas);
                return -1;
        }
 
+       LIST_INIT(areas);
+
        memset(mach, 0, sizeof(*mach));
-       LIST_INIT(areas);
+       mach->machid = args.machid;
+       mach->pages = pages;
+       mach->npages = __capability.max_vcpus;
        mach->areas = areas;
-       mach->machid = args.machid;
 
        return 0;
 }
@@ -227,6 +248,7 @@
                return -1;
 
        __area_remove_all(mach);
+       free(mach->pages);
 
        return 0;
 }
@@ -256,6 +278,7 @@
 nvmm_vcpu_create(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
 {
        struct nvmm_ioc_vcpu_create args;
+       struct nvmm_comm_page *comm;
        int ret;
 
        if (nvmm_init() == -1) {
@@ -269,6 +292,13 @@
        if (ret == -1)
                return -1;
 
+       comm = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
+           nvmm_fd, NVMM_COMM_OFF(mach->machid, cpuid));
+       if (comm == MAP_FAILED)
+               return -1;
+
+       mach->pages[cpuid] = comm;
+
        return 0;
 }
 
@@ -276,6 +306,7 @@
 nvmm_vcpu_destroy(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
 {
        struct nvmm_ioc_vcpu_destroy args;
+       struct nvmm_comm_page *comm;
        int ret;
 
        if (nvmm_init() == -1) {
@@ -289,6 +320,9 @@
        if (ret == -1)
                return -1;
 
+       comm = mach->pages[cpuid];
+       munmap(comm, PAGE_SIZE);
+
        return 0;
 }
 
@@ -296,21 +330,20 @@
 nvmm_vcpu_setstate(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
     void *state, uint64_t flags)
 {
-       struct nvmm_ioc_vcpu_setstate args;
-       int ret;
+       struct nvmm_comm_page *comm;
 
        if (nvmm_init() == -1) {
                return -1;
        }
 
-       args.machid = mach->machid;
-       args.cpuid = cpuid;
-       args.state = state;
-       args.flags = flags;
+       if (__predict_false(cpuid >= mach->npages)) {
+               return -1;
+       }
+       comm = mach->pages[cpuid];
 
-       ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_SETSTATE, &args);
-       if (ret == -1)
-               return -1;
+       nvmm_arch_copystate(&comm->state, state, flags);
+       comm->state_commit |= flags;
+       comm->state_cached |= flags;
 
        return 0;
 }
@@ -320,21 +353,32 @@
     void *state, uint64_t flags)
 {
        struct nvmm_ioc_vcpu_getstate args;
+       struct nvmm_comm_page *comm;
        int ret;
 
        if (nvmm_init() == -1) {
                return -1;
        }
 
+       if (__predict_false(cpuid >= mach->npages)) {
+               return -1;
+       }
+       comm = mach->pages[cpuid];
+
+       if (__predict_true((flags & ~comm->state_cached) == 0)) {
+               goto out;
+       }
+       comm->state_wanted = flags & ~comm->state_cached;
+
        args.machid = mach->machid;
        args.cpuid = cpuid;
-       args.state = state;
-       args.flags = flags;
 
        ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_GETSTATE, &args);
        if (ret == -1)
                return -1;
 
+out:
+       nvmm_arch_copystate(state, &comm->state, flags);
        return 0;
 }
 
diff -r d133e96eaf17 -r 7662533bca8f lib/libnvmm/libnvmm_x86.c
--- a/lib/libnvmm/libnvmm_x86.c Sun Apr 28 07:33:02 2019 +0000
+++ b/lib/libnvmm/libnvmm_x86.c Sun Apr 28 14:22:13 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: libnvmm_x86.c,v 1.28 2019/04/04 17:33:47 maxv Exp $    */
+/*     $NetBSD: libnvmm_x86.c,v 1.29 2019/04/28 14:22:13 maxv Exp $    */
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -43,14 +43,41 @@
 #include <machine/pte.h>
 #include <machine/psl.h>
 
-#include "nvmm.h"
-
 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
 #define __cacheline_aligned __attribute__((__aligned__(64)))
 
 #include <x86/specialreg.h>
 
-extern struct nvmm_callbacks __callbacks;
+/* -------------------------------------------------------------------------- */
+
+static void
+nvmm_arch_copystate(void *_dst, void *_src, uint64_t flags)
+{
+       struct nvmm_x64_state *src = _src;
+       struct nvmm_x64_state *dst = _dst;
+
+       if (flags & NVMM_X64_STATE_GPRS) {
+               memcpy(dst->gprs, src->gprs, sizeof(dst->gprs));
+       }
+       if (flags & NVMM_X64_STATE_SEGS) {
+               memcpy(dst->segs, src->segs, sizeof(dst->segs));
+       }
+       if (flags & NVMM_X64_STATE_CRS) {
+               memcpy(dst->crs, src->crs, sizeof(dst->crs));
+       }
+       if (flags & NVMM_X64_STATE_DRS) {
+               memcpy(dst->drs, src->drs, sizeof(dst->drs));
+       }
+       if (flags & NVMM_X64_STATE_MSRS) {
+               memcpy(dst->msrs, src->msrs, sizeof(dst->msrs));
+       }
+       if (flags & NVMM_X64_STATE_INTR) {
+               memcpy(&dst->intr, &src->intr, sizeof(dst->intr));
+       }
+       if (flags & NVMM_X64_STATE_FPU) {
+               memcpy(&dst->fpu, &src->fpu, sizeof(dst->fpu));
+       }
+}
 
 /* -------------------------------------------------------------------------- */
 
diff -r d133e96eaf17 -r 7662533bca8f lib/libnvmm/nvmm.h
--- a/lib/libnvmm/nvmm.h        Sun Apr 28 07:33:02 2019 +0000
+++ b/lib/libnvmm/nvmm.h        Sun Apr 28 14:22:13 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: nvmm.h,v 1.9 2019/04/27 15:45:21 maxv Exp $    */
+/*     $NetBSD: nvmm.h,v 1.10 2019/04/28 14:22:13 maxv Exp $   */
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -40,6 +40,8 @@
 
 struct nvmm_machine {
        nvmm_machid_t machid;
+       struct nvmm_comm_page **pages;
+       size_t npages;
        void *areas; /* opaque */
 };
 
diff -r d133e96eaf17 -r 7662533bca8f sys/dev/nvmm/nvmm.c
--- a/sys/dev/nvmm/nvmm.c       Sun Apr 28 07:33:02 2019 +0000
+++ b/sys/dev/nvmm/nvmm.c       Sun Apr 28 14:22:13 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: nvmm.c,v 1.18 2019/04/27 17:30:38 maxv Exp $   */
+/*     $NetBSD: nvmm.c,v 1.19 2019/04/28 14:22:13 maxv Exp $   */
 
 /*
  * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.18 2019/04/27 17:30:38 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.19 2019/04/28 14:22:13 maxv Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -155,7 +155,7 @@
        }
 
        vcpu->present = true;
-       vcpu->state = kmem_zalloc(nvmm_impl->state_size, KM_SLEEP);



Home | Main Index | Thread Index | Old Index