pkgsrc-WIP-changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

qemu-nvmm: improvements



Module Name:	pkgsrc-wip
Committed By:	Maxime Villard <max%m00nbsd.net@localhost>
Pushed By:	maxv
Date:		Sun Jan 13 08:43:16 2019 +0100
Changeset:	03b138476b596c99faa185e0e852aa5bf91c8dab

Modified Files:
	qemu-nvmm/distinfo
	qemu-nvmm/patches/patch-nvmm-support

Log Message:
qemu-nvmm: improvements

Sync with libnvmm, inject events only when they can be taken immediately,
fetch the window-exiting and shadowing status, handle HLT correctly, and
hide MCEs.

With that, Qemu+NVMM works, and I can run NetBSD/OpenBSD/FreeBSD reliably
with multiple VCPUs.

To see a diff of this commit:
https://wip.pkgsrc.org/cgi-bin/gitweb.cgi?p=pkgsrc-wip.git;a=commitdiff;h=03b138476b596c99faa185e0e852aa5bf91c8dab

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

diffstat:
 qemu-nvmm/distinfo                   |   2 +-
 qemu-nvmm/patches/patch-nvmm-support | 204 ++++++++++++++++++++++++++---------
 2 files changed, 157 insertions(+), 49 deletions(-)

diffs:
diff --git a/qemu-nvmm/distinfo b/qemu-nvmm/distinfo
index 0a3fea5218..5ad290cb00 100644
--- a/qemu-nvmm/distinfo
+++ b/qemu-nvmm/distinfo
@@ -13,5 +13,5 @@ SHA1 (patch-hw_display_omap__dss.c) = 6b13242f28e32346bc70548c216c578d98fd3420
 SHA1 (patch-hw_net_etraxfs__eth.c) = e5dd1661d60dbcd27b332403e0843500ba9544bc
 SHA1 (patch-hw_net_xilinx__axienet.c) = ebcd2676d64ce6f31e4a8c976d4fdf530ad5e8b7
 SHA1 (patch-hw_usb_dev-mtp.c) = 66543b5559d92f8e2fa9a6eb85e5dfe7c1ad3339
-SHA1 (patch-nvmm-support) = efb5cf245a9ea19bd392156979a009e3eca2b5af
+SHA1 (patch-nvmm-support) = 9bd211a8cf557a10df859bf9c73d865ad4c67d94
 SHA1 (patch-tests_Makefile.include) = 42345d697cb2e324dccf1d68bd8d61e8001c6162
diff --git a/qemu-nvmm/patches/patch-nvmm-support b/qemu-nvmm/patches/patch-nvmm-support
index e37050ae8f..74f8c3ee1b 100644
--- a/qemu-nvmm/patches/patch-nvmm-support
+++ b/qemu-nvmm/patches/patch-nvmm-support
@@ -421,8 +421,8 @@ Add NVMM support.
  obj-$(CONFIG_WHPX) += whpx-all.o
 +obj-$(CONFIG_NVMM) += nvmm-all.o
 --- target/i386/nvmm-all.c	1970-01-01 01:00:00.000000000 +0100
-+++ target/i386/nvmm-all.c	2019-01-03 09:42:03.800979637 +0100
-@@ -0,0 +1,1122 @@
++++ target/i386/nvmm-all.c	2019-01-13 08:38:19.209949716 +0100
+@@ -0,0 +1,1230 @@
 +/*
 + * Copyright (c) 2018 The NetBSD Foundation, Inc.
 + * All rights reserved.
@@ -479,10 +479,15 @@ Add NVMM support.
 +
 +struct nvmm_vcpu {
 +	nvmm_cpuid_t cpuid;
-+	bool int_waiting;
-+	bool nmi_waiting;
 +	uint8_t tpr;
 +	bool stop;
++
++	/* Window-exiting for INTs/NMIs. */
++	bool int_window_exit;
++	bool nmi_window_exit;
++
++	/* The guest is an interrupt shadow (POP SS, etc). */
++	bool int_shadow;
 +};
 +
 +static struct {
@@ -772,6 +777,50 @@ Add NVMM support.
 +	x86_update_hflags(env);
 +}
 +
++static bool
++nvmm_can_take_int(CPUState *cpu)
++{
++	struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
++	struct nvmm_vcpu *vcpu = get_nvmm_vcpu(cpu);
++	struct nvmm_machine *mach = get_nvmm_mach();
++
++	if (vcpu->int_window_exit) {
++		return false;
++	}
++
++	if (vcpu->int_shadow || (!(env->eflags & IF_MASK))) {
++		struct nvmm_x64_state state;
++
++		/* Exit on interrupt window. */
++		nvmm_vcpu_getstate(mach, vcpu->cpuid, &state,
++		    NVMM_X64_STATE_MISC);
++		state.misc[NVMM_X64_MISC_INT_WINDOW_EXIT] = 1;
++		nvmm_vcpu_setstate(mach, vcpu->cpuid, &state,
++		    NVMM_X64_STATE_MISC);
++
++		return false;
++	}
++
++	return true;
++}
++
++static bool
++nvmm_can_take_nmi(CPUState *cpu)
++{
++	struct nvmm_vcpu *vcpu = get_nvmm_vcpu(cpu);
++
++	/*
++	 * Contrary to INTs, NMIs always schedule an exit when they are
++	 * completed. Therefore, if window-exiting is enabled, it means
++	 * NMIs are blocked.
++	 */
++	if (vcpu->nmi_window_exit) {
++		return false;
++	}
++
++	return true;
++}
++
 +/*
 + * Called before the VCPU is run. We inject events generated by the I/O
 + * thread, and synchronize the guest TPR.
@@ -794,6 +843,12 @@ Add NVMM support.
 +
 +	qemu_mutex_lock_iothread();
 +
++	tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
++	if (tpr != vcpu->tpr) {
++		vcpu->tpr = tpr;
++		sync_tpr = true;
++	}
++
 +	/*
 +	 * Force the VCPU out of its inner loop to process any INIT requests
 +	 * or commit pending TPR access.
@@ -802,22 +857,22 @@ Add NVMM support.
 +		cpu->exit_request = 1;
 +	}
 +
-+	/* Inject NMI, if any. */
-+	if (!has_event && !vcpu->nmi_waiting &&
-+	    (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
-+		cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
-+		event.type = NVMM_EVENT_INTERRUPT_HW;
-+		event.vector = 2;
-+		has_event = true;
++	if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
++		if (nvmm_can_take_nmi(cpu)) {
++			cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
++			event.type = NVMM_EVENT_INTERRUPT_HW;
++			event.vector = 2;
++			has_event = true;
++		}
 +	}
 +
-+	/* Inject INT, if any. */
-+	if (!has_event && !vcpu->int_waiting &&
-+	    (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
-+		cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
-+		event.type = NVMM_EVENT_INTERRUPT_HW;
-+		event.vector = cpu_get_pic_interrupt(env);
-+		has_event = true;
++	if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
++		if (nvmm_can_take_int(cpu)) {
++			cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
++			event.type = NVMM_EVENT_INTERRUPT_HW;
++			event.vector = cpu_get_pic_interrupt(env);
++			has_event = true;
++		}
 +	}
 +
 +	/* Don't want SMIs. */
@@ -825,15 +880,6 @@ Add NVMM support.
 +		cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
 +	}
 +
-+	/* Sync the TPR. */
-+	tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
-+	if (tpr != vcpu->tpr) {
-+		vcpu->tpr = tpr;
-+		sync_tpr = true;
-+	}
-+
-+	qemu_mutex_unlock_iothread();
-+
 +	if (sync_tpr) {
 +		ret = nvmm_vcpu_getstate(mach, vcpu->cpuid, &state,
 +		    NVMM_X64_STATE_CRS);
@@ -855,17 +901,12 @@ Add NVMM support.
 +	if (has_event) {
 +		ret = nvmm_vcpu_inject(mach, vcpu->cpuid, &event);
 +		if (ret == -1) {
-+			if (errno == EAGAIN) {
-+				if (event.vector == 2)
-+					vcpu->nmi_waiting = true;
-+				else
-+					vcpu->int_waiting = true;
-+			} else {
-+				error_report("NVMM: Failed to inject event,"
-+				    " error=%d", errno);
-+			}
++			error_report("NVMM: Failed to inject event,"
++			    " error=%d", errno);
 +		}
 +	}
++
++	qemu_mutex_unlock_iothread();
 +}
 +
 +/*
@@ -881,8 +922,15 @@ Add NVMM support.
 +	uint64_t tpr;
 +
 +	env->eflags = exit->exitstate[NVMM_X64_EXITSTATE_RFLAGS];
-+	tpr = exit->exitstate[NVMM_X64_EXITSTATE_CR8];
 +
++	vcpu->int_shadow =
++	    exit->exitstate[NVMM_X64_EXITSTATE_INT_SHADOW];
++	vcpu->int_window_exit =
++	    exit->exitstate[NVMM_X64_EXITSTATE_INT_WINDOW_EXIT];
++	vcpu->nmi_window_exit =
++	    exit->exitstate[NVMM_X64_EXITSTATE_NMI_WINDOW_EXIT];
++
++	tpr = exit->exitstate[NVMM_X64_EXITSTATE_CR8];
 +	if (vcpu->tpr != tpr) {
 +		vcpu->tpr = tpr;
 +		qemu_mutex_lock_iothread();
@@ -931,14 +979,30 @@ Add NVMM support.
 +nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
 +    struct nvmm_exit *exit)
 +{
-+	return nvmm_assist_mem(mach, vcpu->cpuid, exit);
++	int ret;
++
++	ret = nvmm_assist_mem(mach, vcpu->cpuid, exit);
++	if (ret == -1) {
++		error_report("NVMM: Mem Assist Failed [gpa=%p]",
++		    (void *)exit->u.mem.gpa);
++	}
++
++	return ret;
 +}
 +
 +static int
 +nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
 +    struct nvmm_exit *exit)
 +{
-+	return nvmm_assist_io(mach, vcpu->cpuid, exit);
++	int ret;
++
++	ret = nvmm_assist_io(mach, vcpu->cpuid, exit);
++	if (ret == -1) {
++		error_report("NVMM: I/O Assist Failed [port=%d]",
++		    (int)exit->u.io.port);
++	}
++
++	return ret;
 +}
 +
 +static int
@@ -963,8 +1027,12 @@ Add NVMM support.
 +		break;
 +	default:
 +		// TODO: more MSRs to add?
-+		error_report("NVMM: Unexpected MSR 0x%lx", exit->u.msr.msr);
-+		return -1;
++		error_report("NVMM: Unexpected MSR 0x%lx, ignored",
++		    exit->u.msr.msr);
++		if (exit->u.msr.type == NVMM_EXIT_MSR_RDMSR) {
++			val = 0;
++		}
++		break;
 +	}
 +
 +	ret = nvmm_vcpu_getstate(mach, vcpu->cpuid, &state,
@@ -990,12 +1058,50 @@ Add NVMM support.
 +}
 +
 +static int
-+nvmm_handle_hlt(CPUState *cpu)
++nvmm_handle_hlt(struct nvmm_machine *mach, CPUState *cpu,
++    struct nvmm_exit *exit)
 +{
 +	struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
++	struct nvmm_vcpu *vcpu = get_nvmm_vcpu(cpu);
++	struct nvmm_x64_state state;
 +	int ret = 0;
 +
++	/*
++	 * Disable the INT shadow, and the INT window-exiting, to have
++	 * nvmm_can_take_int() process pending interrupts and take us
++	 * out of HLT.
++	 *
++	 * XXX: This mechanism is not really optimized, maybe it should
++	 * be done in the kernel.
++	 */
++
++	ret = nvmm_vcpu_getstate(mach, vcpu->cpuid, &state,
++	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_MISC);
++	if (ret == -1) {
++		return -1;
++	}
++
++	/* Advance RIP. */
++	state.gprs[NVMM_X64_GPR_RIP] = exit->u.hlt.npc;
++
++	/* Disable the INT shadow, if any. */
++	state.misc[NVMM_X64_MISC_INT_SHADOW] = 0;
++	vcpu->int_shadow = false;
++
++	if (env->eflags & IF_MASK) {
++		/* Disable the INT window-exiting, if any. */
++		state.misc[NVMM_X64_MISC_INT_WINDOW_EXIT] = 0;
++		vcpu->int_window_exit = false;
++	}
++
++	ret = nvmm_vcpu_setstate(mach, vcpu->cpuid, &state,
++	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_MISC);
++	if (ret == -1) {
++		return -1;
++	}
++
 +	qemu_mutex_lock_iothread();
++
 +	if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 +	      (env->eflags & IF_MASK)) &&
 +	    !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
@@ -1003,6 +1109,7 @@ Add NVMM support.
 +		cpu->halted = true;
 +		ret = 1;
 +	}
++
 +	qemu_mutex_unlock_iothread();
 +
 +	return ret;
@@ -1037,8 +1144,7 @@ Add NVMM support.
 +	if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 +		nvmm_cpu_synchronize_state(cpu);
 +		do_cpu_init(x86_cpu);
-+		vcpu->int_waiting = false;
-+		vcpu->nmi_waiting = false;
++		/* XXX: reset the INT/NMI windows */
 +	}
 +	if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 +		cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
@@ -1113,13 +1219,11 @@ Add NVMM support.
 +			ret = nvmm_handle_msr(mach, cpu, &exit);
 +			break;
 +		case NVMM_EXIT_INT_READY:
-+			vcpu->int_waiting = false;
 +			break;
 +		case NVMM_EXIT_NMI_READY:
-+			vcpu->nmi_waiting = false;
 +			break;
 +		case NVMM_EXIT_HLT:
-+			ret = nvmm_handle_hlt(cpu);
++			ret = nvmm_handle_hlt(mach, cpu, &exit);
 +			break;
 +		case NVMM_EXIT_MONITOR:
 +		case NVMM_EXIT_MWAIT:
@@ -1458,7 +1562,7 @@ Add NVMM support.
 +	memset(&cpuid, 0, sizeof(cpuid));
 +	cpuid.leaf = 0x00000001;
 +	cpuid.del.ecx = CPUID_EXT_MONITOR;
-+	cpuid.del.edx = CPUID_MTRR;
++	cpuid.del.edx = CPUID_MCE | CPUID_MTRR;
 +	cpuid.set.ecx = CPUID_EXT_HYPERVISOR;
 +
 +	ret = nvmm_machine_configure(mach, NVMM_X86_CONF_CPUID, &cpuid);
@@ -1492,6 +1596,10 @@ Add NVMM support.
 +		error_report("NVMM: Unsupported version %lu", cap.version);
 +		return -ENOSPC;
 +	}
++	if (cap.state_size != sizeof(struct nvmm_x64_state)) {
++		error_report("NVMM: Wrong state size %zu", cap.state_size);
++		return -ENOSPC;
++	}
 +
 +	ret = nvmm_machine_create(&nvmm_global.mach);
 +	if (ret == -1) {


Home | Main Index | Thread Index | Old Index