NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/59235: efi(8) panics



I don't know what's going wrong here -- I suspect the firmware on this
machine might be buggy.  I reviewed arguments and units and EFI spec,
and I haven't seen anything wrong in what NetBSD is doing, either in
userland or in the kernel, except for allocating a buffer that's twice
as large as can ever be used in userland (which is now fixed).

The stack frame gdb shows is obviously bogus; there are no calls to
efi_runtime_nextvar with null `namesize'.  `name' pointing to all NUL
bytes is to be expected: to start iterating over all variables, you
first call GetNextVariableName (a.k.a. struct efi_rt::rt_scanvar) with
a variable name that starts with NUL.

The attached patch sets up an onfault handler while executing the EFI
runtime services call in an attempt to recover gracefully from certain
classes of buggy firmware rather than crash the system.  Of course,
this might suppress diagnostic information -- but the diagnostic
information we have doesn't seem to be very useful anyway.
# HG changeset patch
# User Taylor R Campbell <riastradh%NetBSD.org@localhost>
# Date 1743344228 0
#      Sun Mar 30 14:17:08 2025 +0000
# Branch trunk
# Node ID ccfb5337d46228a1ecee5217824307ac1f2ed5ff
# Parent  dc0a5778375a753d3e8b04e840cacb7f95896328
# EXP-Topic riastradh-pr59235-efifault
WIP: Set up a pcb_onfault handler during efirt access to abort.

PR kern/59235: efi(8) panics

diff -r dc0a5778375a -r ccfb5337d462 sys/arch/amd64/amd64/cpufunc.S
--- a/sys/arch/amd64/amd64/cpufunc.S	Sun Mar 30 00:07:51 2025 +0000
+++ b/sys/arch/amd64/amd64/cpufunc.S	Sun Mar 30 14:17:08 2025 +0000
@@ -503,3 +503,12 @@ ENTRY(svs_quad_copy)
 	movsq
 	ret
 END(svs_quad_copy)
+
+/*
+ * pcb_onfault routine for EFI runtime services.  Error code is in %rax.
+ * All other registers are undefined.  See efi_machdep.c for usage.
+ */
+ENTRY(efi_runtime_pcb_onfault)
+	movq	%rax,%rdi
+	jmp	_C_LABEL(efi_runtime_fault_abort)
+END(efi_runtime_pcb_onfault)
diff -r dc0a5778375a -r ccfb5337d462 sys/arch/x86/x86/efi_machdep.c
--- a/sys/arch/x86/x86/efi_machdep.c	Sun Mar 30 00:07:51 2025 +0000
+++ b/sys/arch/x86/x86/efi_machdep.c	Sun Mar 30 14:17:08 2025 +0000
@@ -805,9 +805,44 @@ fail:	/*
 
 struct efi_runtime_cookie {
 	void	*erc_pmap_cookie;
+	label_t	*erc_label;
 };
 
 /*
+ * efi_runtime_fault_cookie, efi_runtime_fault_error
+ *
+ *	State for longjmp on fault.  Access serialized by
+ *	efi_runtime_lock.
+ */
+static struct efi_runtime_cookie *efi_runtime_fault_cookie;
+static int efi_runtime_fault_error;
+
+/*
+ * efi_runtime_pcb_onfault
+ *
+ *	Return address for pcb_onfault during EFI runtime services
+ *	call.  This takes an error code in %rax as if it were a return
+ *	value -- it is not a normal function to call.
+ */
+void efi_runtime_pcb_onfault(void);
+
+/*
+ * efi_runtime_fault_abort(error)
+ *
+ *	Standard procedure call triggered by pcb_onfault.  Invoked by
+ *	efi_runtime_pcb_onfault.  Makes 1 come flying out of the setjmp
+ *	that began the EFI runtime services call.
+ */
+void efi_runtime_fault_abort(int);
+void
+efi_runtime_fault_abort(int error)
+{
+
+	efi_runtime_fault_error = error;
+	longjmp(efi_runtime_fault_cookie->erc_label);
+}
+
+/*
  * efi_runtime_enter(cookie)
  *
  *	Prepare to call an EFI runtime service, storing state for the
@@ -815,8 +850,9 @@ struct efi_runtime_cookie {
  *	done.
  */
 static void
-efi_runtime_enter(struct efi_runtime_cookie *cookie)
+efi_runtime_enter(struct efi_runtime_cookie *cookie, label_t *label)
 {
+	struct pcb * const pcb = lwp_getpcb(curlwp);
 
 	KASSERT(efi_runtime_pmap != NULL);
 
@@ -847,6 +883,15 @@ efi_runtime_enter(struct efi_runtime_coo
 	 * run privileged, which they need in order to do I/O anyway.
 	 */
 	cookie->erc_pmap_cookie = pmap_activate_sync(efi_runtime_pmap);
+
+	/*
+	 * If the EFI runtime services code is broken, try to recover
+	 * gracefully.
+	 */
+	cookie->erc_label = label;
+	efi_runtime_fault_cookie = cookie;
+	KASSERT(pcb->pcb_onfault == NULL);
+	pcb->pcb_onfault = &efi_runtime_pcb_onfault;
 }
 
 /*
@@ -858,13 +903,38 @@ efi_runtime_enter(struct efi_runtime_coo
 static void
 efi_runtime_exit(struct efi_runtime_cookie *cookie)
 {
+	struct pcb * const pcb = lwp_getpcb(curlwp);
 
+	KASSERT(pcb->pcb_onfault == &efi_runtime_pcb_onfault);
+	pcb->pcb_onfault = NULL;
 	pmap_deactivate_sync(efi_runtime_pmap, cookie->erc_pmap_cookie);
 	fpu_kern_leave();
 	mutex_exit(&efi_runtime_lock);
 }
 
 /*
+ * efi_runtime_faulted()
+ *
+ *	To be called by an EFI runtime services wrapper function when 1
+ *	comes flying out of setjmp, meaning the actual call had
+ *	faulted.
+ */
+static efi_status
+efi_runtime_faulted(void)
+{
+	struct efi_runtime_cookie *cookie = efi_runtime_fault_cookie;
+	int error = efi_runtime_fault_error;
+
+	KASSERT(mutex_owned(&efi_runtime_lock));
+	KASSERT(cookie != NULL);
+
+	efi_runtime_exit(cookie);
+
+	/* XXX */
+	return (error == EFAULT ? EFI_DEVICE_ERROR : EFI_INVALID_PARAMETER);
+}
+
+/*
  * efi_runtime_gettime(tm, tmcap)
  *
  *	Call RT->GetTime, or return EFI_UNSUPPORTED if unsupported.
@@ -874,11 +944,15 @@ efi_runtime_gettime(struct efi_tm *tm, s
 {
 	efi_status status;
 	struct efi_runtime_cookie cookie;
+	label_t label;
 
 	if (efi_rt.rt_gettime == NULL)
 		return EFI_UNSUPPORTED;
 
-	efi_runtime_enter(&cookie);
+	if (setjmp(&label))
+		return efi_runtime_faulted();
+
+	efi_runtime_enter(&cookie, &label);
 	status = efi_rt.rt_gettime(tm, tmcap);
 	efi_runtime_exit(&cookie);
 
@@ -896,11 +970,15 @@ efi_runtime_settime(struct efi_tm *tm)
 {
 	efi_status status;
 	struct efi_runtime_cookie cookie;
+	label_t label;
 
 	if (efi_rt.rt_settime == NULL)
 		return EFI_UNSUPPORTED;
 
-	efi_runtime_enter(&cookie);
+	if (setjmp(&label))
+		return efi_runtime_faulted();
+
+	efi_runtime_enter(&cookie, &label);
 	status = efi_rt.rt_settime(tm);
 	efi_runtime_exit(&cookie);
 
@@ -918,11 +996,15 @@ efi_runtime_getvar(efi_char *name, struc
 {
 	efi_status status;
 	struct efi_runtime_cookie cookie;
+	label_t label;
 
 	if (efi_rt.rt_getvar == NULL)
 		return EFI_UNSUPPORTED;
 
-	efi_runtime_enter(&cookie);
+	if (setjmp(&label))
+		return efi_runtime_faulted();
+
+	efi_runtime_enter(&cookie, &label);
 	status = efi_rt.rt_getvar(name, vendor, attrib, datasize, data);
 	efi_runtime_exit(&cookie);
 
@@ -940,11 +1022,15 @@ efi_runtime_nextvar(unsigned long *names
 {
 	efi_status status;
 	struct efi_runtime_cookie cookie;
+	label_t label;
 
 	if (efi_rt.rt_scanvar == NULL)
 		return EFI_UNSUPPORTED;
 
-	efi_runtime_enter(&cookie);
+	if (setjmp(&label))
+		return efi_runtime_faulted();
+
+	efi_runtime_enter(&cookie, &label);
 	status = efi_rt.rt_scanvar(namesize, name, vendor);
 	efi_runtime_exit(&cookie);
 
@@ -962,11 +1048,15 @@ efi_runtime_setvar(efi_char *name, struc
 {
 	efi_status status;
 	struct efi_runtime_cookie cookie;
+	label_t label;
 
 	if (efi_rt.rt_setvar == NULL)
 		return EFI_UNSUPPORTED;
 
-	efi_runtime_enter(&cookie);
+	if (setjmp(&label))
+		return efi_runtime_faulted();
+
+	efi_runtime_enter(&cookie, &label);
 	status = efi_rt.rt_setvar(name, vendor, attrib, datasize, data);
 	efi_runtime_exit(&cookie);
 


Home | Main Index | Thread Index | Old Index