NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: kern/58775 (apei(4) spamming console)
The following reply was made to PR kern/58775; it has been noted by GNATS.
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
To: "Hauke Fath (SPG)" <hf%spg.tu-darmstadt.de@localhost>
Cc: gnats-bugs%netbsd.org@localhost, gnats-admin%netbsd.org@localhost
Subject: Re: kern/58775 (apei(4) spamming console)
Date: Thu, 24 Oct 2024 20:27:07 +0000
This is a multi-part message in MIME format.
--=_UO/DuVWULAgwAF6z8mwyycn6j85jhood
Thanks, can you please try the attached patch, with apei(4) enabled
again in your kernel config?
(By the way, FYI: You can disable apei(4) at boot-time, without
building a new kernel, by putting `userconf=disable apei' on its own
line in boot.cfg, or by adding `userconf disable apei' to the
semicolon-separated list of commands in one of the `menu=...' lines in
boot.cfg.)
--=_UO/DuVWULAgwAF6z8mwyycn6j85jhood
Content-Type: text/plain; charset="ISO-8859-1"; name="pr58775-apeipcieerror"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment; filename="pr58775-apeipcieerror.patch"
diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei.c
--- a/sys/dev/acpi/apei.c Mon Oct 21 15:57:45 2024 +0000
+++ b/sys/dev/acpi/apei.c Thu Oct 24 20:08:59 2024 +0000
@@ -58,6 +58,7 @@
#include <dev/acpi/apei_hestvar.h>
#include <dev/acpi/apei_interp.h>
#include <dev/acpi/apeivar.h>
+#include <dev/pci/pci_error.h>
=20
#define _COMPONENT ACPI_RESOURCE_COMPONENT
ACPI_MODULE_NAME ("apei")
@@ -313,10 +314,10 @@ apei_format_guid(const struct uuid *uuid
{
=20
snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
- "0x%02x%02x,"
- "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
+ "{0x%02x,%02x,"
+ "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
- uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved,
+ uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
uuid->node[0], uuid->node[1], uuid->node[2],
uuid->node[3], uuid->node[4], uuid->node[5]);
}
@@ -356,6 +357,8 @@ static const char *const apei_gede_sever
};
=20
/*
+ * N.2.5. Memory Error Section
+ *
* https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
l#memory-error-section
*/
static const struct uuid CPER_MEMORY_ERROR_SECTION =3D
@@ -475,6 +478,98 @@ apei_cper_memory_error_report(struct ape
}
=20
/*
+ * N.2.7. PCI Express Error Section
+ *
+ * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
l#pci-express-error-section
+ */
+static const struct uuid CPER_PCIE_ERROR_SECTION =3D
+ {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}};
+
+static const char *const cper_pcie_error_port_type[] =3D {
+#define F(LN, SN, V) [LN] =3D #SN,
+ CPER_PCIE_ERROR_PORT_TYPES(F)
+#undef F
+};
+
+static void
+apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t=
len,
+ const char *ctx)
+{
+ const struct cper_pcie_error *PE =3D buf;
+ char bitbuf[1024];
+
+ snprintb(bitbuf, sizeof(bitbuf),
+ CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits);
+ aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=3D%s\n", ctx, bitbuf);
+ if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) {
+ const uint32_t t =3D PE->PortType;
+ const char *n =3D t < __arraycount(cper_pcie_error_port_type)
+ ? cper_pcie_error_port_type[t] : NULL;
+
+ if (n) {
+ device_printf(sc->sc_dev, "%s: PortType=3D%"PRIu32
+ " (%s)\n", ctx, t, n);
+ } else {
+ device_printf(sc->sc_dev, "%s: PortType=3D%"PRIu32"\n",
+ ctx, t);
+ }
+ }
+ if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) {
+ /* XXX BCD */
+ device_printf(sc->sc_dev, "%s: Version=3D0x%"PRIx32"\n",
+ ctx, PE->Version);
+ }
+ if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) {
+ device_printf(sc->sc_dev, "%s: CommandStatus=3D0x04%"PRIx32"\n",
+ ctx, PE->CommandStatus);
+ }
+ if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) {
+ /* XXX decode vendor/product/class/fun/dev/seg/bus */
+ char hex[2*sizeof(PE->DeviceID) + 1];
+ const unsigned char *p =3D (const void *)&PE->DeviceID;
+ unsigned i;
+
+ for (i =3D 0; i < sizeof(PE->DeviceID); i++)
+ snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx", p[i]);
+ device_printf(sc->sc_dev, "%s: DeviceID=3D{%s}\n", ctx, hex);
+ }
+ if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) {
+ device_printf(sc->sc_dev, "%s: DeviceSerial=3D{%016"PRIx64"}\n",
+ ctx, PE->DeviceSerial);
+ }
+ if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) {
+ device_printf(sc->sc_dev, "%s: BridgeControlStatus=3D%"PRIx32
+ "\n", ctx, PE->BridgeControlStatus);
+ }
+ if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) {
+ char hex[2*sizeof(PE->CapabilityStructure) + 1];
+ unsigned i;
+
+ for (i =3D 0; i < sizeof(PE->CapabilityStructure); i++) {
+ snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx",
+ PE->CapabilityStructure[i]);
+ }
+ device_printf(sc->sc_dev, "%s: CapabilityStructure=3D{%s}\n",
+ ctx, hex);
+ }
+ if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) {
+ char hex[2*sizeof(PE->AERInfo) + 1];
+ unsigned i;
+
+ for (i =3D 0; i < sizeof(PE->AERInfo); i++) {
+ snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx",
+ PE->AERInfo[i]);
+ }
+ device_printf(sc->sc_dev, "%s: AERInfo=3D{%s}\n", ctx, hex);
+ }
+
+ /*
+ * Let the PCI subsystem handle it.
+ */
+ pci_cper_error(PE);
+}
+
+/*
* apei_cper_reports
*
* Table of known Common Platform Error Record types, symbolic
@@ -494,6 +589,9 @@ static const struct apei_cper_report {
{ "memory", &CPER_MEMORY_ERROR_SECTION,
sizeof(struct cper_memory_error),
apei_cper_memory_error_report },
+ { "PCIe", &CPER_PCIE_ERROR_SECTION,
+ sizeof(struct cper_pcie_error),
+ apei_cper_pcie_error_report },
};
=20
/*
diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei_cper.h
--- a/sys/dev/acpi/apei_cper.h Mon Oct 21 15:57:45 2024 +0000
+++ b/sys/dev/acpi/apei_cper.h Thu Oct 24 20:08:59 2024 +0000
@@ -62,14 +62,14 @@ struct cper_header {
} __packed;
__CTASSERT(sizeof(struct cper_header) =3D=3D 128);
=20
-enum { /* struct cper_header::error_severity */
+enum { /* struct cper_header::ErrorSeverity */
CPER_ERROR_SEVERITY_RECOVERABLE =3D 0,
CPER_ERROR_SEVERITY_FATAL =3D 1,
CPER_ERROR_SEVERITY_CORRECTED =3D 2,
CPER_ERROR_SEVERITY_INFORMATIONAL =3D 3,
};
=20
-enum { /* struct cper_header::validation_bits */
+enum { /* struct cper_header::ValidationBits */
CPER_VALID_PLATFORM_ID =3D __BIT(0),
CPER_VALID_TIMESTAMP =3D __BIT(1),
CPER_VALID_PARTITION_ID =3D __BIT(2),
@@ -78,7 +78,7 @@ enum { /* struct cper_header::validat
/*
* https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
l#error-record-header-flags
*/
-enum { /* struct cper_header::flags */
+enum { /* struct cper_header::Flags */
CPER_HW_ERROR_FLAG_RECOVERED =3D __BIT(0),
CPER_HW_ERROR_FLAG_PREVERR =3D __BIT(1),
CPER_HW_ERROR_FLAG_SIMULATED =3D __BIT(2),
@@ -110,6 +110,8 @@ enum {
"\0"
=20
/*
+ * N.2.5. Memory Error Section
+ *
* https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
l#memory-error-section
*
* Type: {0xa5bc1114,0x6f64,0x4ede,{0xb8,0x63,0x3e,0x83,0xed,0x7c,0x83,0xb=
1}}
@@ -144,7 +146,7 @@ struct cper_memory_error_ext {
} __packed;
__CTASSERT(sizeof(struct cper_memory_error_ext) =3D=3D 80);
=20
-enum { /* struct cper_memory_error::validation_bits */
+enum { /* struct cper_memory_error::ValidationBits */
CPER_MEMORY_ERROR_VALID_ERROR_STATUS =3D __BIT(0),
CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS =3D __BIT(1),
CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK =3D __BIT(2),
@@ -194,7 +196,7 @@ enum { /* struct cper_memory_error::v
"b\025" "CHIP_ID\0" \
"\0"
=20
-enum { /* struct cper_memory_error::bank */
+enum { /* struct cper_memory_error::Bank */
CPER_MEMORY_ERROR_BANK_ADDRESS =3D __BITS(7,0),
CPER_MEMORY_ERROR_BANK_GROUP =3D __BITS(15,8),
};
@@ -219,16 +221,92 @@ enum { /* struct cper_memory_error::b
F(CPER_MEMORY_ERROR_PHYSMEM_MAPOUT_EVENT, PHYSMEM_MAPOUT_EVENT, 15) \
/* end of CPER_MEMORY_ERROR_TYPES */
=20
-enum cper_memory_error_type { /* struct cper_memory_error::memory_error_ty=
pe */
+enum cper_memory_error_type { /* struct cper_memory_error::MemoryErrorType=
*/
#define CPER_MEMORY_ERROR_TYPE_DEF(LN, SN, V) LN =3D V,
CPER_MEMORY_ERROR_TYPES(CPER_MEMORY_ERROR_TYPE_DEF)
#undef CPER_MEMORY_ERROR_TYPE_DEF
};
=20
-enum { /* struct cper_memory_error_ext::extended */
+enum { /* struct cper_memory_error_ext::Extended */
CPER_MEMORY_ERROR_EXTENDED_ROWBIT16 =3D __BIT(0),
CPER_MEMORY_ERROR_EXTENDED_ROWBIT17 =3D __BIT(1),
CPER_MEMORY_ERROR_EXTENDED_CHIPID =3D __BITS(7,5),
};
=20
+/*
+ * N.2.7. PCI Express Error Section
+ *
+ * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
l#pci-express-error-section
+ *
+ * Type: {0xd995e954,0xbbc1,0x430f,{0xad,0x91,0xb4,0x4d,0xcb,0x3c,0x6f,0x3=
5}}
+ */
+
+struct cper_pcie_error {
+ uint64_t ValidationBits;
+ uint32_t PortType;
+ uint32_t Version;
+ uint32_t CommandStatus;
+ uint32_t Reserved0;
+ struct {
+ uint8_t VendorID[2];
+ uint8_t DeviceID[2]; /* product */
+ uint8_t ClassCode[3];
+ uint8_t Function;
+ uint8_t Device;
+ uint8_t Segment[2];
+ uint8_t PrimaryBus;
+ uint8_t SecondaryBus;
+ uint8_t Slot[2]; /* bits 0:2 resv, bits 3:15 slot */
+ uint8_t Reserved0;
+ } DeviceID;
+ uint64_t DeviceSerial;
+ uint32_t BridgeControlStatus;
+ uint8_t CapabilityStructure[60];
+ uint8_t AERInfo[96];
+};
+__CTASSERT(sizeof(struct cper_pcie_error) =3D=3D 208);
+
+enum { /* struct cper_pcie_error::ValidationBits */
+ CPER_PCIE_ERROR_VALID_PORT_TYPE =3D __BIT(0),
+ CPER_PCIE_ERROR_VALID_VERSION =3D __BIT(1),
+ CPER_PCIE_ERROR_VALID_COMMAND_STATUS =3D __BIT(2),
+ CPER_PCIE_ERROR_VALID_DEVICE_ID =3D __BIT(3),
+ CPER_PCIE_ERROR_VALID_DEVICE_SERIAL =3D __BIT(4),
+ CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS =3D __BIT(5),
+ CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE =3D __BIT(6),
+ CPER_PCIE_ERROR_VALID_AER_INFO =3D __BIT(7),
+};
+
+#define CPER_PCIE_ERROR_VALIDATION_BITS_FMT "\177\020" \
+ "b\000" "PORT_TYPE\0" \
+ "b\001" "VERSION\0" \
+ "b\002" "COMMAND_STATUS\0" \
+ "b\003" "DEVICE_ID\0" \
+ "b\004" "DEVICE_SERIAL\0" \
+ "b\005" "BRIDGE_CONTROL_STATUS\0" \
+ "b\006" "CAPABILITY_STRUCTURE\0" \
+ "b\007" "AER_INFO\0" \
+ "\0"
+
+#define CPER_PCIE_ERROR_PORT_TYPES(F) \
+ F(CPER_PCIE_ERROR_PORT_TYPE_PCIE_ENDPOINT, PCIE_ENDPOINT, 0) \
+ F(CPER_PCIE_ERROR_PORT_TYPE_LEGACY_PCI_ENDPOINT, LEGACY_PCI_ENDPOINT, \
+ 1) \
+ F(CPER_PCIE_ERROR_PORT_TYPE_ROOTPORT5_UPSTREAMSWITCH, \
+ ROOTPORT5_UPSTREAMSWITCH, 4) \
+ F(CPER_PCIE_ERROR_PORT_TYPE_DOWNSTREAMSWITCH, DOWNSTREAMSWITCH, 6) \
+ F(CPER_PCIE_ERROR_PORT_TYPE_PCIE_PCI_BRIDGE, PCIE_PCI_BRIDGE, 7) \
+ F(CPER_PCIE_ERROR_PORT_TYPE_PCI_PCIE_BRIDGE, PCI_PCIE_BRIDGE, 8) \
+ F(CPER_PCIE_ERROR_PORT_TYPE_RCIEP_DEV, RCIEP_DEV, 9) \
+ /* Root Complex Integrated Endpoint Device */ \
+ F(CPER_PCIE_ERROR_PORT_TYPE_RCEC, RCEC, 10) \
+ /* Root Complex Event Collector */ \
+ /* end of CPER_PCIE_ERROR_PORT_TYPES */
+
+enum cper_pcie_error_port_type { /* struct cper_pcie_error::PortType */
+#define CPER_PCIE_ERROR_PORT_TYPE_DEF(LN, SN, V) LN =3D V,
+ CPER_PCIE_ERROR_PORT_TYPES(CPER_PCIE_ERROR_PORT_TYPE_DEF)
+#undef CPER_PCIE_ERROR_PORT_TYPE_DEF
+};
+
#endif /* _SYS_DEV_ACPI_APEI_CPER_H_ */
diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei_hest.c
--- a/sys/dev/acpi/apei_hest.c Mon Oct 21 15:57:45 2024 +0000
+++ b/sys/dev/acpi/apei_hest.c Thu Oct 24 20:08:59 2024 +0000
@@ -400,6 +400,8 @@ apei_hest_attach_ghes(struct apei_softc=20
*/
switch (ghes->Notify.Type) {
case ACPI_HEST_NOTIFY_POLLED:
+ if (ghes->Notify.PollInterval =3D=3D 0) /* paranoia */
+ break;
callout_init(&src->as_ch, CALLOUT_MPSAFE);
callout_setfunc(&src->as_ch, &apei_hest_ghes_poll, src);
callout_schedule(&src->as_ch, 0);
@@ -451,6 +453,8 @@ apei_hest_detach_ghes(struct apei_softc=20
*/
switch (ghes->Notify.Type) {
case ACPI_HEST_NOTIFY_POLLED:
+ if (ghes->Notify.PollInterval =3D=3D 0) /* paranoia */
+ break;
callout_halt(&src->as_ch, NULL);
callout_destroy(&src->as_ch);
break;
@@ -583,6 +587,8 @@ apei_hest_attach_ghes_v2(struct apei_sof
*/
switch (ghes_v2->Notify.Type) {
case ACPI_HEST_NOTIFY_POLLED:
+ if (ghes_v2->Notify.PollInterval =3D=3D 0) /* paranoia */
+ break;
callout_init(&src->as_ch, CALLOUT_MPSAFE);
callout_setfunc(&src->as_ch, &apei_hest_ghes_v2_poll, src);
callout_schedule(&src->as_ch, 0);
@@ -634,6 +640,8 @@ apei_hest_detach_ghes_v2(struct apei_sof
*/
switch (ghes_v2->Notify.Type) {
case ACPI_HEST_NOTIFY_POLLED:
+ if (ghes_v2->Notify.PollInterval =3D=3D 0) /* paranoia */
+ break;
callout_halt(&src->as_ch, NULL);
callout_destroy(&src->as_ch);
break;
diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/files.pci
--- a/sys/dev/pci/files.pci Mon Oct 21 15:57:45 2024 +0000
+++ b/sys/dev/pci/files.pci Thu Oct 24 20:08:59 2024 +0000
@@ -19,6 +19,7 @@ defflag opt_pciide.h PCIIDE_CMD064x_DISA
device pci {[dev =3D -1], [function =3D -1]}
attach pci at pcibus
file dev/pci/pci.c pci needs-flag
+file dev/pci/pci_error.c pci
file dev/pci/pci_map.c pci
file dev/pci/pci_quirks.c pci
file dev/pci/pci_resource.c pci & pci_resource
diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/pci_error.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/dev/pci/pci_error.c Thu Oct 24 20:08:59 2024 +0000
@@ -0,0 +1,257 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * PCI error reporting
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <dev/acpi/apei_cper.h> /* XXX not APEI- or even ACPI-specific */
+#include <dev/pci/pci_error.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+static int
+pci_cper_match(void *cookie, const struct pci_attach_args *pa)
+{
+ const struct cper_pcie_error *PE =3D cookie;
+
+ if (le16dec(PE->DeviceID.Segment) !=3D pci_get_segment(pa->pa_pc))
+ return 0;
+ if (PE->DeviceID.PrimaryBus !=3D pa->pa_bus)
+ return 0;
+ if (PE->DeviceID.Device !=3D pa->pa_device)
+ return 0;
+ if (PE->DeviceID.Function !=3D pa->pa_function)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * pci_cper_error(PE)
+ *
+ * Act on notification of a PCI error report via Common Platform
+ * Error Record.
+ */
+void
+pci_cper_error(const struct cper_pcie_error *PE)
+{
+ struct pci_attach_args pa;
+
+ /*
+ * If there's no device ID, nothing for us to do.
+ *
+ * XXX Report this back to the caller?
+ */
+ if ((PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) =3D=3D 0)
+ return;
+
+ /*
+ * Find a matching device. If none, do nothing -- we can't do
+ * anything to acknowledge this.
+ */
+ if (!pci_find_device1(&pa, pci_cper_match, __UNCONST(PE))) {
+ char devbuf[sizeof "0000:00:00.000"];
+
+ snprintf(devbuf, sizeof(devbuf), "PCI %04x:%02x:%02x.%u",
+ le16dec(PE->DeviceID.Segment),
+ PE->DeviceID.PrimaryBus,
+ PE->DeviceID.Device,
+ PE->DeviceID.Function);
+ aprint_debug("%s: hardware error in unknown device\n", devbuf);
+ return;
+ }
+
+ /*
+ * Handle via the pci_attach_args that we now have.
+ */
+ pci_error(&pa);
+}
+
+/*
+ * pci_error(pa)
+ *
+ * Check for, report, and acknowledge any errors in the PCI device
+ * described by pa.
+ */
+void
+pci_error(const struct pci_attach_args *pa)
+{
+ char devbuf[sizeof "0000:00:00.000"];
+ const pci_chipset_tag_t pc =3D pa->pa_pc;
+ const pcitag_t tag =3D pa->pa_tag;
+ pcireg_t aer, pcie;
+ char bitbuf[1024];
+
+ snprintf(devbuf, sizeof(devbuf), "PCI %04x:%02x:%02x.%u",
+ pci_get_segment(pa->pa_pc),
+ pa->pa_bus, pa->pa_device, pa->pa_function);
+
+ /*
+ * If we have Advanced Error Reporting capability, read and
+ * write back any uncorrectable or corrected error status.
+ */
+ if (pci_get_ext_capability(pc, tag, PCI_EXTCAP_AER, &aer, NULL)) {
+ pcireg_t uc_status, uc_mask, uc_sev;
+ pcireg_t control;
+ pcireg_t cor_status, cor_mask;
+
+ /*
+ * Read the status, mask, severity, and control (which
+ * has the number of the first error bit).
+ */
+ uc_status =3D pci_conf_read(pc, tag, aer + PCI_AER_UC_STATUS);
+ uc_mask =3D pci_conf_read(pc, tag, aer + PCI_AER_UC_MASK);
+ uc_sev =3D pci_conf_read(pc, tag, aer + PCI_AER_UC_SEVERITY);
+
+ cor_status =3D pci_conf_read(pc, tag, aer + PCI_AER_COR_STATUS);
+ cor_mask =3D pci_conf_read(pc, tag, aer + PCI_AER_COR_MASK);
+
+ control =3D pci_conf_read(pc, tag, aer + PCI_AER_CAP_CONTROL);
+
+ /*
+ * Acknowledge error status bits.
+ */
+ pci_conf_write(pc, tag, aer + PCI_AER_UC_STATUS, uc_status);
+ pci_conf_write(pc, tag, aer + PCI_AER_COR_STATUS, cor_status);
+
+ /* XXX move me to pcireg.h */
+#define PCI_AER_UC_STATUS_FMT "\177\020" \
+ "b\000" "UNDEFINED\0" \
+ "b\004" "DL_PROTOCOL_ERROR\0" \
+ "b\005" "SURPRISE_DOWN_ERROR\0" \
+ "b\014" "POISONED_TLP\0" \
+ "b\015" "FC_PROTOCOL_ERROR\0" \
+ "b\016" "COMPLETION_TIMEOUT\0" \
+ "b\017" "COMPLETION_ABORT\0" \
+ "b\020" "UNEXPECTED_COMPLETION\0" \
+ "b\021" "RECEIVER_OVERFLOW\0" \
+ "b\022" "MALFORMED_TLP\0" \
+ "b\023" "ECRC_ERROR\0" \
+ "b\024" "UNSUPPORTED_REQUEST_ERROR\0" \
+ "b\025" "ACS_VIOLATION\0" \
+ "b\026" "INTERNAL_ERROR\0" \
+ "b\027" "MC_BLOCKED_TLP\0" \
+ "b\030" "ATOMIC_OP_EGRESS_BLOCKED\0" \
+ "b\031" "TLP_PREFIX_BLOCKED_ERROR\0" \
+ "b\032" "POISONTLP_EGRESS_BLOCKED\0" \
+ "\0"
+
+ /*
+ * Report uncorrectable fatal errors.
+ */
+ if ((uc_status & uc_sev) !=3D 0) {
+ snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
+ uc_status & uc_sev);
+ aprint_error("%s: hardware fatal uncorrectable error:"
+ " %s (mask=3D0x%"PRIx32")\n",
+ devbuf, bitbuf,
+ (uint32_t)uc_mask);
+ }
+
+ /*
+ * Report uncorrectable non-fatal errors.
+ */
+ if ((uc_status & ~uc_sev) !=3D 0) {
+ snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
+ uc_status & ~uc_sev);
+ aprint_error("%s: hardware uncorrectable error: %s"
+ " (mask=3D0x%"PRIx32")\n",
+ devbuf, bitbuf,
+ (uint32_t)uc_mask);
+ }
+
+ /*
+ * Show the first error, if any.
+ */
+ if (uc_status !=3D 0) {
+ pcireg_t first =3D __SHIFTOUT(control,
+ PCI_AER_FIRST_ERROR_PTR);
+ snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
+ (uint32_t)1 << first);
+ aprint_error("%s: hardware first uncorrectable error:"
+ " %s\n",
+ devbuf, bitbuf);
+ }
+
+ /*
+ * Report corrected errors.
+ *
+ * XXX sysctl knob to suppress this
+ */
+ if (cor_status !=3D 0) {
+ /* XXX move me to pcireg.h */
+ snprintb(bitbuf, sizeof(bitbuf), "\177\020"
+ "b\000" "RECEIVER_ERROR\0"
+ "b\006" "BAD_TLP\0"
+ "b\007" "BAD_DLLP\0"
+ "b\010" "REPLAY_NUM_ROLLOVER\0"
+ "b\014" "REPLAY_TIMER_TIMEOUT\0"
+ "b\015" "ADVISORY_NF_ERROR\0"
+ "b\016" "INTERNAL_ERROR\0"
+ "b\017" "HEADER_LOG_OVERFLOW\0"
+ "\0", cor_status);
+ aprint_error("%s: hardware corrected error: %s"
+ " (mask=3D0x%"PRIx32")\n",
+ devbuf, bitbuf, (uint32_t)cor_mask);
+ }
+ }
+
+ /*
+ * If we have PCIe at all, read and write back any error
+ * status.
+ */
+ if (pci_get_capability(pc, tag, PCI_CAP_PCIEXPRESS, &pcie, NULL)) {
+ pcireg_t dcsr =3D pci_conf_read(pc, tag, pcie + PCIE_DCSR);
+ uint16_t dsr =3D __SHIFTOUT(dcsr, __BITS(31,16));
+
+ /*
+ * If any status bits are set, acknowledge all status
+ * bits, write back control bits unchanged, and print
+ * the status.
+ */
+ if (dsr !=3D 0) {
+ pci_conf_write(pc, tag, pcie + PCIE_DCSR, dcsr);
+
+ /* XXX move me to pcireg.h; note: high half of DCSR */
+ snprintb(bitbuf, sizeof(bitbuf), "\177\020"
+ "b\000" "CORRECTABLE_ERROR\0"
+ "b\001" "NONFATAL_UNCORRECTABLE_ERROR\0"
+ "b\002" "FATAL_ERROR\0"
+ "b\003" "UNSUPPORTED_REQUEST\0"
+ "b\004" "AUX_POWER\0"
+ "b\005" "TRANSACTIONS_PENDING\0"
+ "\0", dsr);
+ aprint_error("%s: hardware error: DSR=3D%s\n",
+ devbuf, bitbuf);
+ }
+ }
+}
diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/pci_error.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/dev/pci/pci_error.h Thu Oct 24 20:08:59 2024 +0000
@@ -0,0 +1,38 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _DEV_PCI_PCI_ERROR_H_
+#define _DEV_PCI_PCI_ERROR_H_
+
+struct cper_pcie_error;
+struct pci_attach_args;
+
+void pci_cper_error(const struct cper_pcie_error *);
+void pci_error(const struct pci_attach_args *);
+
+#endif /* _DEV_PCI_PCI_ERROR_H_ */
--=_UO/DuVWULAgwAF6z8mwyycn6j85jhood--
Home |
Main Index |
Thread Index |
Old Index