tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: NVMM missing opcode REPE CMPS implementation, version 3



Dear folks,

On Thu, Oct 29, 2020 at 09:54:02PM +0100, Reinoud Zandijk wrote:
> i stumbled on an instruction that NVMM couldn't code and thus couldn't
> emulate either. It was as stated the REPE CMPS (0xF3A7) instruction as stated
> in https://c9x.me/x86/html/file_module_x86_id_279.html and confirmed by
> disassembly by ndisasm (from nasm).

I've created a new version of the patch that should fix this missing
instruction and added support for REPE too. Its not triggered often but the
test case now works and the ATF test confirms its doing the right thing.

With regards,
Reinoud


Index: lib/libnvmm/libnvmm_x86.c
===================================================================
RCS file: /cvsroot/src/lib/libnvmm/libnvmm_x86.c,v
retrieving revision 1.42
diff -u -p -r1.42 libnvmm_x86.c
--- lib/libnvmm/libnvmm_x86.c	31 Oct 2020 15:44:01 -0000	1.42
+++ lib/libnvmm/libnvmm_x86.c	15 Nov 2020 15:26:58 -0000
@@ -1,4 +1,4 @@
-/*	$NetBSD: libnvmm_x86.c,v 1.42 2020/10/31 15:44:01 reinoud Exp $	*/
+/*	$NetBSD: libnvmm_x86.c,v 1.41 2020/10/30 21:06:13 reinoud Exp $	*/
 
 /*
  * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net
@@ -49,6 +49,8 @@
 
 /* -------------------------------------------------------------------------- */
 
+//#define NVMM_DEBUG
+
 /*
  * Undocumented debugging function. Helpful.
  */
@@ -869,7 +871,6 @@ static void x86_func_test(struct nvmm_vc
 static void x86_func_mov(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
 static void x86_func_stos(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
 static void x86_func_lods(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
-static void x86_func_movs(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
 
 static const struct x86_emul x86_emul_or = {
 	.readreg = true,
@@ -919,10 +920,6 @@ static const struct x86_emul x86_emul_lo
 	.func = x86_func_lods
 };
 
-static const struct x86_emul x86_emul_movs = {
-	.func = x86_func_movs
-};
-
 /* Legacy prefixes. */
 #define LEG_LOCK	0xF0
 #define LEG_REPN	0xF2
@@ -941,6 +938,7 @@ struct x86_legpref {
 	bool adr_ovr:1;
 	bool rep:1;
 	bool repn:1;
+	bool repe:1;
 	int8_t seg;
 };
 
@@ -1049,6 +1047,7 @@ struct x86_opcode {
 	bool dmo:1;
 	bool todmo:1;
 	bool movs:1;
+	bool cmps:1;
 	bool stos:1;
 	bool lods:1;
 	bool szoverride:1;
@@ -1451,7 +1450,7 @@ static const struct x86_opcode primary_o
 		.movs = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.emul = &x86_emul_movs
+		.emul = NULL
 	},
 	[0xA5] = {
 		/* Yv, Xv */
@@ -1459,7 +1458,27 @@ static const struct x86_opcode primary_o
 		.movs = true,
 		.szoverride = true,
 		.defsize = -1,
-		.emul = &x86_emul_movs
+		.emul = NULL
+	},
+
+	/*
+	 * CMPS
+	 */
+	[0xA6] = {
+		/* Yb, Xb */
+		.valid = true,
+		.cmps = true,
+		.szoverride = false,
+		.defsize = OPSIZE_BYTE,
+		.emul = NULL
+	},
+	[0xA7] = {
+		/* Yv, Xv */
+		.valid = true,
+		.cmps = true,
+		.szoverride = true,
+		.defsize = -1,
+		.emul = NULL
 	},
 
 	/*
@@ -1871,6 +1890,35 @@ node_movs(struct x86_decode_fsm *fsm, st
 }
 
 /*
+ * Special node, for CMPS. Fake two displacements of zero on the source and
+ * destination registers.
+ * XXX coded as clone of movs as its similar in register usage
+ * XXX might be merged with node_movs()
+ */
+static int
+node_cmps(struct x86_decode_fsm *fsm, struct x86_instr *instr)
+{
+	size_t adrsize;
+
+	adrsize = instr->address_size;
+
+	/* DS:RSI */
+	instr->src.type = STORE_REG;
+	instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
+	instr->src.disp.type = DISP_0;
+
+	/* ES:RDI, force ES */
+	instr->dst.type = STORE_REG;
+	instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
+	instr->dst.disp.type = DISP_0;
+	instr->dst.hardseg = NVMM_X64_SEG_ES;
+
+	fsm_advance(fsm, 0, NULL);
+
+	return 0;
+}
+
+/*
  * Special node, for STOS and LODS. Fake a displacement of zero on the
  * destination register.
  */
@@ -2470,6 +2518,8 @@ node_primary_opcode(struct x86_decode_fs
 		fsm_advance(fsm, 1, node_stlo);
 	} else if (opcode->movs) {
 		fsm_advance(fsm, 1, node_movs);
+	} else if (opcode->cmps) {
+		fsm_advance(fsm, 1, node_cmps);
 	} else {
 		return -1;
 	}
@@ -2646,8 +2696,17 @@ x86_decode(uint8_t *inst_bytes, size_t i
 
 	while (fsm.fn != NULL) {
 		ret = (*fsm.fn)(&fsm, instr);
-		if (ret == -1)
+		if (ret == -1) {
+#ifdef NVMM_DEBUG
+			printf("\n%s debug: unrecognized instruction found " \
+			       "with max length %ld : [ ", __func__, inst_len);
+			for (uint i = 0; i < inst_len; i++)
+			      printf("%02x ", inst_bytes[i]);
+			printf("]\n");
+			fflush(stdout);
+#endif
 			return -1;
+		}
 	}
 
 	instr->len = fsm.buf - inst_bytes;
@@ -2966,23 +3025,6 @@ x86_func_lods(struct nvmm_vcpu *vcpu, st
 	}
 }
 
-static void
-x86_func_movs(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
-{
-	/*
-	 * Special instruction: double memory operand. Don't call the cb,
-	 * because the storage has already been performed earlier.
-	 */
-
-	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
-		gprs[NVMM_X64_GPR_RSI] -= mem->size;
-		gprs[NVMM_X64_GPR_RDI] -= mem->size;
-	} else {
-		gprs[NVMM_X64_GPR_RSI] += mem->size;
-		gprs[NVMM_X64_GPR_RDI] += mem->size;
-	}
-}
-
 /* -------------------------------------------------------------------------- */
 
 static inline uint64_t
@@ -3145,17 +3187,18 @@ fetch_instruction(struct nvmm_machine *m
 }
 
 static int
-assist_mem_double(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
+assist_mem_movs(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
     struct x86_instr *instr)
 {
 	struct nvmm_x64_state *state = vcpu->state;
-	struct nvmm_mem mem;
+	uint64_t *gprs;
 	uint8_t data[8];
 	gvaddr_t gva;
 	size_t size;
 	int ret;
 
 	size = instr->operand_size;
+	gprs = state->gprs;
 
 	/* Source. */
 	ret = store_to_gva(state, instr, &instr->src, &gva, size);
@@ -3173,8 +3216,72 @@ assist_mem_double(struct nvmm_machine *m
 	if (ret == -1)
 		return -1;
 
-	mem.size = size;
-	(*instr->emul->func)(vcpu, &mem, state->gprs);
+	/*
+	 * Inlined x86_func_movs() call
+	 * 	(*instr->emul->func)(vcpu, &mem, state->gprs);
+	 */
+
+	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
+		gprs[NVMM_X64_GPR_RSI] -= size;
+		gprs[NVMM_X64_GPR_RDI] -= size;
+	} else {
+		gprs[NVMM_X64_GPR_RSI] += size;
+		gprs[NVMM_X64_GPR_RDI] += size;
+	}
+
+	return 0;
+}
+
+static int
+assist_mem_cmps(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
+    struct x86_instr *instr)
+{
+	struct nvmm_x64_state *state = vcpu->state;
+	uint64_t *gprs, op1, op2, fl;
+	uint8_t data1[8], data2[8];
+	gvaddr_t gva;
+	size_t size;
+	int ret;
+
+	size = instr->operand_size;
+	gprs = state->gprs;
+
+	/* Source 1. */
+	ret = store_to_gva(state, instr, &instr->src, &gva, size);
+	if (ret == -1)
+		return -1;
+	ret = read_guest_memory(mach, vcpu, gva, data1, size);
+	if (ret == -1)
+		return -1;
+
+	/* Source 2. */
+	ret = store_to_gva(state, instr, &instr->dst, &gva, size);
+	if (ret == -1)
+		return -1;
+	ret = read_guest_memory(mach, vcpu, gva, data2, size);
+	if (ret == -1)
+		return -1;
+
+	/*
+	 * Inlined x86_func_cmps() call
+	 * 	(*instr->emul->func)(vcpu, &mem, state->gprs);
+	 */
+
+	/* Perform the CMP. */
+	op1 = *((uint64_t *) data1);
+	op2 = *((uint64_t *) data2);
+	exec_sub(op1, op2, &fl, size);
+
+	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
+	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
+
+	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
+		gprs[NVMM_X64_GPR_RSI] -= size;
+		gprs[NVMM_X64_GPR_RDI] -= size;
+	} else {
+		gprs[NVMM_X64_GPR_RSI] += size;
+		gprs[NVMM_X64_GPR_RDI] += size;
+	}
 
 	return 0;
 }
@@ -3349,7 +3456,10 @@ nvmm_assist_mem(struct nvmm_machine *mac
 	}
 
 	if (instr.opcode->movs) {
-		ret = assist_mem_double(mach, vcpu, &instr);
+		ret = assist_mem_movs(mach, vcpu, &instr);
+	} else if (instr.opcode->cmps) {
+		instr.legpref.repe = !instr.legpref.repn;
+		ret = assist_mem_cmps(mach, vcpu, &instr);
 	} else {
 		ret = assist_mem_single(mach, vcpu, &instr);
 	}
@@ -3364,9 +3474,15 @@ nvmm_assist_mem(struct nvmm_machine *mac
 		if (cnt == 0) {
 			state->gprs[NVMM_X64_GPR_RIP] += instr.len;
 		} else if (__predict_false(instr.legpref.repn)) {
+			/* repn */
 			if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
 				state->gprs[NVMM_X64_GPR_RIP] += instr.len;
 			}
+		} else if (__predict_false(instr.legpref.repe)) {
+			/* repe */
+			if ((state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) == 0) {
+				state->gprs[NVMM_X64_GPR_RIP] += instr.len;
+			}
 		}
 	} else {
 		state->gprs[NVMM_X64_GPR_RIP] += instr.len;
Index: tests/lib/libnvmm/h_mem_assist.c
===================================================================
RCS file: /cvsroot/src/tests/lib/libnvmm/h_mem_assist.c,v
retrieving revision 1.19
diff -u -p -r1.19 h_mem_assist.c
--- tests/lib/libnvmm/h_mem_assist.c	5 Sep 2020 07:22:26 -0000	1.19
+++ tests/lib/libnvmm/h_mem_assist.c	15 Nov 2020 15:27:03 -0000
@@ -174,6 +174,7 @@ extern uint8_t test13_begin, test13_end;
 extern uint8_t test14_begin, test14_end;
 extern uint8_t test_64bit_15_begin, test_64bit_15_end;
 extern uint8_t test_64bit_16_begin, test_64bit_16_end;
+extern uint8_t test17_begin, test17_end;
 
 static const struct test tests64[] = {
 	{ "64bit test1 - MOV", &test1_begin, &test1_end, 0x3004, 0 },
@@ -193,6 +194,7 @@ static const struct test tests64[] = {
 	{ "64bit test15 - XCHG", &test_64bit_15_begin, &test_64bit_15_end, 0x123456, 0 },
 	{ "64bit test16 - XCHG", &test_64bit_16_begin, &test_64bit_16_end,
 	  0x123456, 0 },
+	{ "64bit test17 - CMPS", &test17_begin, &test17_end, 0x00001, 0 },
 	{ NULL, NULL, NULL, -1, 0 }
 };
 
Index: tests/lib/libnvmm/h_mem_assist_asm.S
===================================================================
RCS file: /cvsroot/src/tests/lib/libnvmm/h_mem_assist_asm.S,v
retrieving revision 1.9
diff -u -p -r1.9 h_mem_assist_asm.S
--- tests/lib/libnvmm/h_mem_assist_asm.S	5 Sep 2020 07:22:26 -0000	1.9
+++ tests/lib/libnvmm/h_mem_assist_asm.S	15 Nov 2020 15:27:03 -0000
@@ -44,6 +44,7 @@
 	.globl	test14_begin, test14_end
 	.globl	test_64bit_15_begin, test_64bit_15_end
 	.globl	test_64bit_16_begin, test_64bit_16_end
+	.globl	test17_begin, test17_end
 	.text
 	.code64
 
@@ -324,6 +325,28 @@ test_64bit_16_begin:
 	TEST_END
 test_64bit_16_end:
 
+	.align	64
+test17_begin:
+	movq	$0x1000,%rax
+	movq	$0xdeadbeefcafe, %rbx
+	movq	%rbx,0x00(%rax)
+	movq	%rbx,0x08(%rax)
+
+	movq	$0xdeadbeefcafe, %rbx
+	movq	%rbx,0x20(%rax)
+	movq	$0, %rbx
+	movq	%rbx,0x28(%rax)
+
+	movq	$0x1000,%rsi
+	movq	$0x1020,%rdi
+
+	movq	$3,%rcx
+	repe cmpsq
+
+	movq	%rcx,(%rax)
+	TEST_END
+test17_end:
+
 /* -------------------------------------------------------------------------- */
 
 	.globl	test_16bit_1_begin, test_16bit_1_end

Attachment: signature.asc
Description: PGP signature



Home | Main Index | Thread Index | Old Index