NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: port-amd64/57661: Crash when booting on Xeon Silver 4416+ in KVM/Qemu



The attached patch bumps UPAGES and USPACE to make room for the extra
FPU save size.  Can you please try it out on the affected machines?
And, if it boots, can you run the attached program xfeat2.c and share
the output?

The patch isn't great as is -- it unconditionally increases the memory
usage of every thread by two pages, even on machines where they'll
never be used.  But if this works, we can look into sizing it (mostly)
dynamically based on the cpuid results.
diff -r 1cb0546d18b6 sys/arch/amd64/amd64/machdep.c
--- a/sys/arch/amd64/amd64/machdep.c	Thu Apr 10 18:53:29 2025 +0000
+++ b/sys/arch/amd64/amd64/machdep.c	Fri Apr 11 17:01:24 2025 +0000
@@ -1784,6 +1784,26 @@ init_x86_64(paddr_t first_avail)
 	consinit();	/* XXX SHOULD NOT BE DONE HERE */
 
 	/*
+	 * Validate x86_fpu_save_size (determined by cpu_probe above)
+	 * and set uspace based on it.  We do this now, rather than
+	 * earlier, so that if we have to panic, the console has been
+	 * initialized.  We don't allow arbitrary x86_fpu_save_size
+	 * because we statically preallocate the largest possible size
+	 * early at start in locore.S, before we know the save size.
+	 *
+	 * cpu_uarea_alloc in x86/vm_machdep.c relies on this
+	 * validation.
+	 */
+	__CTASSERT(offsetof(struct pcb, pcb_savefpu) < PAGE_SIZE);
+	if (x86_fpu_save_size > PAGE_SIZE - offsetof(struct pcb, pcb_savefpu) +
+	    UPAGES_FPU*PAGE_SIZE) {
+		panic("x86_fpu_save_size too large: %u > %zu",
+		    x86_fpu_save_size,
+		    (PAGE_SIZE - offsetof(struct pcb, pcb_savefpu) +
+			UPAGES_FPU*PAGE_SIZE));
+	}
+
+	/*
 	 * Initialize PAGE_SIZE-dependent variables.
 	 */
 	uvm_md_init();
diff -r 1cb0546d18b6 sys/arch/amd64/include/param.h
--- a/sys/arch/amd64/include/param.h	Thu Apr 10 18:53:29 2025 +0000
+++ b/sys/arch/amd64/include/param.h	Fri Apr 11 17:01:24 2025 +0000
@@ -69,11 +69,29 @@
 #define	SINCR		1		/* increment of stack/NBPG */
 
 #if defined(KASAN) || defined(KMSAN)
-#define	UPAGES		8
+#define UPAGES_KxSAN	2
+#else
+#define	UPAGES_KxSAN	0
+#endif
+#if defined(SVS)
+#define	UPAGES_SVS	1
+#else
+#define	UPAGES_SVS	0
+#endif
+#define	UPAGES_PCB	1	/* one page for the PCB */
+#define	UPAGES_FPU	2	/* two extra pages for fpusave */
+#define	UPAGES_RED	1	/* one page for red zone between pcb/stack */
+#define	UPAGES_STACK	3	/* three pages (12 KiB) of stack space */
+#define	UPAGES		\
+	(UPAGES_PCB + UPAGES_FPU + UPAGES_RED + UPAGES_STACK + UPAGES_SVS +   \
+	    UPAGES_KxSAN)
+
+#if defined(KASAN) || defined(KMSAN)
+__CTASSERT(UPAGES == 10);
 #elif defined(SVS)
-#define	UPAGES		6		/* 1 page used internally by SVS */
+__CTASSERT(UPAGES == 8);
 #else
-#define	UPAGES		5		/* pages of u-area (1 for redzone) */
+__CTASSERT(UPAGES == 7);
 #endif
 #define	USPACE		(UPAGES * NBPG)	/* total size of u-area */
 
diff -r 1cb0546d18b6 sys/arch/x86/x86/vm_machdep.c
--- a/sys/arch/x86/x86/vm_machdep.c	Thu Apr 10 18:53:29 2025 +0000
+++ b/sys/arch/x86/x86/vm_machdep.c	Fri Apr 11 17:01:24 2025 +0000
@@ -351,9 +351,11 @@ vunmapbuf(struct buf *bp, vsize_t len)
 #ifdef __HAVE_CPU_UAREA_ROUTINES
 /*
  * Layout of the uarea:
- *    Page[0]        = PCB
- *    Page[1]        = RedZone
- *    Page[2]        = Stack
+ *    Page[0]        = PCB and start of FPU save area
+ *    Page[1]        = FPU save extension page 1
+ *    Page[2]        = FPU save extension page 2
+ *    Page[3]        = RedZone
+ *    Page[4]        = Stack
  *    Page[...]      = Stack
  *    Page[UPAGES-1] = Stack
  *    Page[UPAGES]   = RedZone
@@ -370,8 +372,8 @@ cpu_uarea_alloc(bool system)
 	base = uvm_km_alloc(kernel_map, USPACE + PAGE_SIZE, 0,
 	    UVM_KMF_WIRED|UVM_KMF_WAITVA);
 
-	/* Page[1] = RedZone */
-	va = base + PAGE_SIZE;
+	/* Page[UPAGES_PCB + UPAGES_FPU] = RedZone */
+	va = base + (UPAGES_PCB + UPAGES_FPU)*PAGE_SIZE;
 	if (!pmap_extract(pmap_kernel(), va, &pa)) {
 		panic("%s: impossible, Page[1] unmapped", __func__);
 	}
#include <stdint.h>
#include <stdio.h>
#include <util.h>

#define	XCR0_FMT	"\177\020"					      \
	"b\000"		"x87 FPU/MMX\0"					      \
	"b\001"		"SSE\0"						      \
	"b\002"		"AVX\0"						      \
	"b\003"		"BNDREG\0"					      \
	"b\004"		"BNDCSR\0"					      \
	"b\005"		"Opmask\0"					      \
	"b\006"		"ZMM_Hi256\0"					      \
	"b\007"		"Hi16_ZMM\0"					      \
	"b\011"		"PKRU\0"					      \
	/* end of XCR0_FMT */

int
main(void)
{
	uint32_t eax, ebx, ecx, edx, xcr0lo, xcr0hi;
	uint64_t xcr0;
	char buf[128];

	asm("cpuid"
	    : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
	    : "a"(0x0d), "c"(0x00));
	snprintb(buf, sizeof(buf), XCR0_FMT, eax);
	printf("XFeatureSupportedMask[0:31]	= %s\n", buf);
	printf("XFeatureEnabledSizeMask		= 0x%x\n", ebx);
	printf("XFeatureSupportedSizeMask	= 0x%x\n", ecx);
	snprintb(buf, sizeof(buf), XCR0_FMT, (uint64_t)edx << 32);
	printf("XFeatureSupportedMask[32:63]	= %s\n", buf);
	fflush(stdout);

	asm("xgetbv" : "=a"(xcr0lo), "=d"(xcr0hi) : "c"(0));
	xcr0 = (uint64_t)xcr0hi << 32 | xcr0lo;
	snprintb(buf, sizeof(buf), XCR0_FMT, xcr0);
	printf("xcr0				= %s\n", buf);
	fflush(stdout);

	fflush(stdout);
	return ferror(stdout);
}


Home | Main Index | Thread Index | Old Index