NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/58775 (apei(4) spamming console)



The following reply was made to PR kern/58775; it has been noted by GNATS.

From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
To: "Hauke Fath (SPG)" <hf%spg.tu-darmstadt.de@localhost>
Cc: gnats-bugs%netbsd.org@localhost, gnats-admin%netbsd.org@localhost
Subject: Re: kern/58775 (apei(4) spamming console)
Date: Thu, 24 Oct 2024 20:27:07 +0000

 This is a multi-part message in MIME format.
 --=_UO/DuVWULAgwAF6z8mwyycn6j85jhood
 
 Thanks, can you please try the attached patch, with apei(4) enabled
 again in your kernel config?
 
 (By the way, FYI: You can disable apei(4) at boot-time, without
 building a new kernel, by putting `userconf=disable apei' on its own
 line in boot.cfg, or by adding `userconf disable apei' to the
 semicolon-separated list of commands in one of the `menu=...' lines in
 boot.cfg.)
 
 --=_UO/DuVWULAgwAF6z8mwyycn6j85jhood
 Content-Type: text/plain; charset="ISO-8859-1"; name="pr58775-apeipcieerror"
 Content-Transfer-Encoding: quoted-printable
 Content-Disposition: attachment; filename="pr58775-apeipcieerror.patch"
 
 diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei.c
 --- a/sys/dev/acpi/apei.c	Mon Oct 21 15:57:45 2024 +0000
 +++ b/sys/dev/acpi/apei.c	Thu Oct 24 20:08:59 2024 +0000
 @@ -58,6 +58,7 @@
  #include <dev/acpi/apei_hestvar.h>
  #include <dev/acpi/apei_interp.h>
  #include <dev/acpi/apeivar.h>
 +#include <dev/pci/pci_error.h>
 =20
  #define	_COMPONENT	ACPI_RESOURCE_COMPONENT
  ACPI_MODULE_NAME	("apei")
 @@ -313,10 +314,10 @@ apei_format_guid(const struct uuid *uuid
  {
 =20
  	snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
 -	    "0x%02x%02x,"
 -	    "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
 +	    "{0x%02x,%02x,"
 +	    "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
  	    uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
 -	    uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved,
 +	    uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
  	    uuid->node[0], uuid->node[1], uuid->node[2],
  	    uuid->node[3], uuid->node[4], uuid->node[5]);
  }
 @@ -356,6 +357,8 @@ static const char *const apei_gede_sever
  };
 =20
  /*
 + * N.2.5. Memory Error Section
 + *
   * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
 l#memory-error-section
   */
  static const struct uuid CPER_MEMORY_ERROR_SECTION =3D
 @@ -475,6 +478,98 @@ apei_cper_memory_error_report(struct ape
  }
 =20
  /*
 + * N.2.7. PCI Express Error Section
 + *
 + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
 l#pci-express-error-section
 + */
 +static const struct uuid CPER_PCIE_ERROR_SECTION =3D
 +    {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}};
 +
 +static const char *const cper_pcie_error_port_type[] =3D {
 +#define	F(LN, SN, V)	[LN] =3D #SN,
 +	CPER_PCIE_ERROR_PORT_TYPES(F)
 +#undef	F
 +};
 +
 +static void
 +apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t=
  len,
 +    const char *ctx)
 +{
 +	const struct cper_pcie_error *PE =3D buf;
 +	char bitbuf[1024];
 +
 +	snprintb(bitbuf, sizeof(bitbuf),
 +	    CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits);
 +	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=3D%s\n", ctx, bitbuf);
 +	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) {
 +		const uint32_t t =3D PE->PortType;
 +		const char *n =3D t < __arraycount(cper_pcie_error_port_type)
 +		    ? cper_pcie_error_port_type[t] : NULL;
 +
 +		if (n) {
 +			device_printf(sc->sc_dev, "%s: PortType=3D%"PRIu32
 +			    " (%s)\n", ctx, t, n);
 +		} else {
 +			device_printf(sc->sc_dev, "%s: PortType=3D%"PRIu32"\n",
 +			    ctx, t);
 +		}
 +	}
 +	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) {
 +		/* XXX BCD */
 +		device_printf(sc->sc_dev, "%s: Version=3D0x%"PRIx32"\n",
 +		    ctx, PE->Version);
 +	}
 +	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) {
 +		device_printf(sc->sc_dev, "%s: CommandStatus=3D0x04%"PRIx32"\n",
 +		    ctx, PE->CommandStatus);
 +	}
 +	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) {
 +		/* XXX decode vendor/product/class/fun/dev/seg/bus */
 +		char hex[2*sizeof(PE->DeviceID) + 1];
 +		const unsigned char *p =3D (const void *)&PE->DeviceID;
 +		unsigned i;
 +
 +		for (i =3D 0; i < sizeof(PE->DeviceID); i++)
 +			snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx", p[i]);
 +		device_printf(sc->sc_dev, "%s: DeviceID=3D{%s}\n", ctx, hex);
 +	}
 +	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) {
 +		device_printf(sc->sc_dev, "%s: DeviceSerial=3D{%016"PRIx64"}\n",
 +		    ctx, PE->DeviceSerial);
 +	}
 +	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) {
 +		device_printf(sc->sc_dev, "%s: BridgeControlStatus=3D%"PRIx32
 +		    "\n", ctx, PE->BridgeControlStatus);
 +	}
 +	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) {
 +		char hex[2*sizeof(PE->CapabilityStructure) + 1];
 +		unsigned i;
 +
 +		for (i =3D 0; i < sizeof(PE->CapabilityStructure); i++) {
 +			snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx",
 +			    PE->CapabilityStructure[i]);
 +		}
 +		device_printf(sc->sc_dev, "%s: CapabilityStructure=3D{%s}\n",
 +		    ctx, hex);
 +	}
 +	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) {
 +		char hex[2*sizeof(PE->AERInfo) + 1];
 +		unsigned i;
 +
 +		for (i =3D 0; i < sizeof(PE->AERInfo); i++) {
 +			snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx",
 +			    PE->AERInfo[i]);
 +		}
 +		device_printf(sc->sc_dev, "%s: AERInfo=3D{%s}\n", ctx, hex);
 +	}
 +
 +	/*
 +	 * Let the PCI subsystem handle it.
 +	 */
 +	pci_cper_error(PE);
 +}
 +
 +/*
   * apei_cper_reports
   *
   *	Table of known Common Platform Error Record types, symbolic
 @@ -494,6 +589,9 @@ static const struct apei_cper_report {
  	{ "memory", &CPER_MEMORY_ERROR_SECTION,
  	  sizeof(struct cper_memory_error),
  	  apei_cper_memory_error_report },
 +	{ "PCIe", &CPER_PCIE_ERROR_SECTION,
 +	  sizeof(struct cper_pcie_error),
 +	  apei_cper_pcie_error_report },
  };
 =20
  /*
 diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei_cper.h
 --- a/sys/dev/acpi/apei_cper.h	Mon Oct 21 15:57:45 2024 +0000
 +++ b/sys/dev/acpi/apei_cper.h	Thu Oct 24 20:08:59 2024 +0000
 @@ -62,14 +62,14 @@ struct cper_header {
  } __packed;
  __CTASSERT(sizeof(struct cper_header) =3D=3D 128);
 =20
 -enum {				/* struct cper_header::error_severity */
 +enum {				/* struct cper_header::ErrorSeverity */
  	CPER_ERROR_SEVERITY_RECOVERABLE		=3D 0,
  	CPER_ERROR_SEVERITY_FATAL		=3D 1,
  	CPER_ERROR_SEVERITY_CORRECTED		=3D 2,
  	CPER_ERROR_SEVERITY_INFORMATIONAL	=3D 3,
  };
 =20
 -enum {				/* struct cper_header::validation_bits */
 +enum {				/* struct cper_header::ValidationBits */
  	CPER_VALID_PLATFORM_ID		=3D __BIT(0),
  	CPER_VALID_TIMESTAMP		=3D __BIT(1),
  	CPER_VALID_PARTITION_ID		=3D __BIT(2),
 @@ -78,7 +78,7 @@ enum {				/* struct cper_header::validat
  /*
   * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
 l#error-record-header-flags
   */
 -enum {				/* struct cper_header::flags */
 +enum {				/* struct cper_header::Flags */
  	CPER_HW_ERROR_FLAG_RECOVERED	=3D __BIT(0),
  	CPER_HW_ERROR_FLAG_PREVERR	=3D __BIT(1),
  	CPER_HW_ERROR_FLAG_SIMULATED	=3D __BIT(2),
 @@ -110,6 +110,8 @@ enum {
  	"\0"
 =20
  /*
 + * N.2.5. Memory Error Section
 + *
   * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
 l#memory-error-section
   *
   * Type: {0xa5bc1114,0x6f64,0x4ede,{0xb8,0x63,0x3e,0x83,0xed,0x7c,0x83,0xb=
 1}}
 @@ -144,7 +146,7 @@ struct cper_memory_error_ext {
  } __packed;
  __CTASSERT(sizeof(struct cper_memory_error_ext) =3D=3D 80);
 =20
 -enum {				/* struct cper_memory_error::validation_bits */
 +enum {				/* struct cper_memory_error::ValidationBits */
  	CPER_MEMORY_ERROR_VALID_ERROR_STATUS		=3D __BIT(0),
  	CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS	=3D __BIT(1),
  	CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK	=3D __BIT(2),
 @@ -194,7 +196,7 @@ enum {				/* struct cper_memory_error::v
  	"b\025"	"CHIP_ID\0"						      \
  	"\0"
 =20
 -enum {				/* struct cper_memory_error::bank */
 +enum {				/* struct cper_memory_error::Bank */
  	CPER_MEMORY_ERROR_BANK_ADDRESS	=3D __BITS(7,0),
  	CPER_MEMORY_ERROR_BANK_GROUP	=3D __BITS(15,8),
  };
 @@ -219,16 +221,92 @@ enum {				/* struct cper_memory_error::b
  	F(CPER_MEMORY_ERROR_PHYSMEM_MAPOUT_EVENT, PHYSMEM_MAPOUT_EVENT, 15)   \
  	/* end of CPER_MEMORY_ERROR_TYPES */
 =20
 -enum cper_memory_error_type { /* struct cper_memory_error::memory_error_ty=
 pe */
 +enum cper_memory_error_type { /* struct cper_memory_error::MemoryErrorType=
  */
  #define	CPER_MEMORY_ERROR_TYPE_DEF(LN, SN, V)	LN =3D V,
  	CPER_MEMORY_ERROR_TYPES(CPER_MEMORY_ERROR_TYPE_DEF)
  #undef	CPER_MEMORY_ERROR_TYPE_DEF
  };
 =20
 -enum {				/* struct cper_memory_error_ext::extended */
 +enum {				/* struct cper_memory_error_ext::Extended */
  	CPER_MEMORY_ERROR_EXTENDED_ROWBIT16		=3D __BIT(0),
  	CPER_MEMORY_ERROR_EXTENDED_ROWBIT17		=3D __BIT(1),
  	CPER_MEMORY_ERROR_EXTENDED_CHIPID		=3D __BITS(7,5),
  };
 =20
 +/*
 + * N.2.7. PCI Express Error Section
 + *
 + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.htm=
 l#pci-express-error-section
 + *
 + * Type: {0xd995e954,0xbbc1,0x430f,{0xad,0x91,0xb4,0x4d,0xcb,0x3c,0x6f,0x3=
 5}}
 + */
 +
 +struct cper_pcie_error {
 +	uint64_t	ValidationBits;
 +	uint32_t	PortType;
 +	uint32_t	Version;
 +	uint32_t	CommandStatus;
 +	uint32_t	Reserved0;
 +	struct {
 +		uint8_t		VendorID[2];
 +		uint8_t		DeviceID[2]; /* product */
 +		uint8_t		ClassCode[3];
 +		uint8_t		Function;
 +		uint8_t		Device;
 +		uint8_t		Segment[2];
 +		uint8_t		PrimaryBus;
 +		uint8_t		SecondaryBus;
 +		uint8_t		Slot[2]; /* bits 0:2 resv, bits 3:15 slot */
 +		uint8_t		Reserved0;
 +	}		DeviceID;
 +	uint64_t	DeviceSerial;
 +	uint32_t	BridgeControlStatus;
 +	uint8_t		CapabilityStructure[60];
 +	uint8_t		AERInfo[96];
 +};
 +__CTASSERT(sizeof(struct cper_pcie_error) =3D=3D 208);
 +
 +enum {				/* struct cper_pcie_error::ValidationBits */
 +	CPER_PCIE_ERROR_VALID_PORT_TYPE			=3D __BIT(0),
 +	CPER_PCIE_ERROR_VALID_VERSION			=3D __BIT(1),
 +	CPER_PCIE_ERROR_VALID_COMMAND_STATUS		=3D __BIT(2),
 +	CPER_PCIE_ERROR_VALID_DEVICE_ID			=3D __BIT(3),
 +	CPER_PCIE_ERROR_VALID_DEVICE_SERIAL		=3D __BIT(4),
 +	CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS	=3D __BIT(5),
 +	CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE	=3D __BIT(6),
 +	CPER_PCIE_ERROR_VALID_AER_INFO			=3D __BIT(7),
 +};
 +
 +#define	CPER_PCIE_ERROR_VALIDATION_BITS_FMT	"\177\020"		      \
 +	"b\000"	"PORT_TYPE\0"						      \
 +	"b\001"	"VERSION\0"						      \
 +	"b\002"	"COMMAND_STATUS\0"					      \
 +	"b\003"	"DEVICE_ID\0"						      \
 +	"b\004"	"DEVICE_SERIAL\0"					      \
 +	"b\005"	"BRIDGE_CONTROL_STATUS\0"				      \
 +	"b\006"	"CAPABILITY_STRUCTURE\0"				      \
 +	"b\007"	"AER_INFO\0"						      \
 +	"\0"
 +
 +#define	CPER_PCIE_ERROR_PORT_TYPES(F)					      \
 +	F(CPER_PCIE_ERROR_PORT_TYPE_PCIE_ENDPOINT, PCIE_ENDPOINT, 0)	      \
 +	F(CPER_PCIE_ERROR_PORT_TYPE_LEGACY_PCI_ENDPOINT, LEGACY_PCI_ENDPOINT, \
 +	    1)								      \
 +	F(CPER_PCIE_ERROR_PORT_TYPE_ROOTPORT5_UPSTREAMSWITCH,		      \
 +	    ROOTPORT5_UPSTREAMSWITCH, 4)				      \
 +	F(CPER_PCIE_ERROR_PORT_TYPE_DOWNSTREAMSWITCH, DOWNSTREAMSWITCH, 6)    \
 +	F(CPER_PCIE_ERROR_PORT_TYPE_PCIE_PCI_BRIDGE, PCIE_PCI_BRIDGE, 7)      \
 +	F(CPER_PCIE_ERROR_PORT_TYPE_PCI_PCIE_BRIDGE, PCI_PCIE_BRIDGE, 8)      \
 +	F(CPER_PCIE_ERROR_PORT_TYPE_RCIEP_DEV, RCIEP_DEV, 9)		      \
 +		/* Root Complex Integrated Endpoint Device */		      \
 +	F(CPER_PCIE_ERROR_PORT_TYPE_RCEC, RCEC, 10)			      \
 +		/* Root Complex Event Collector */			      \
 +	/* end of CPER_PCIE_ERROR_PORT_TYPES */
 +
 +enum cper_pcie_error_port_type { /* struct cper_pcie_error::PortType */
 +#define	CPER_PCIE_ERROR_PORT_TYPE_DEF(LN, SN, V)	LN =3D V,
 +	CPER_PCIE_ERROR_PORT_TYPES(CPER_PCIE_ERROR_PORT_TYPE_DEF)
 +#undef	CPER_PCIE_ERROR_PORT_TYPE_DEF
 +};
 +
  #endif	/* _SYS_DEV_ACPI_APEI_CPER_H_ */
 diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei_hest.c
 --- a/sys/dev/acpi/apei_hest.c	Mon Oct 21 15:57:45 2024 +0000
 +++ b/sys/dev/acpi/apei_hest.c	Thu Oct 24 20:08:59 2024 +0000
 @@ -400,6 +400,8 @@ apei_hest_attach_ghes(struct apei_softc=20
  	 */
  	switch (ghes->Notify.Type) {
  	case ACPI_HEST_NOTIFY_POLLED:
 +		if (ghes->Notify.PollInterval =3D=3D 0) /* paranoia */
 +			break;
  		callout_init(&src->as_ch, CALLOUT_MPSAFE);
  		callout_setfunc(&src->as_ch, &apei_hest_ghes_poll, src);
  		callout_schedule(&src->as_ch, 0);
 @@ -451,6 +453,8 @@ apei_hest_detach_ghes(struct apei_softc=20
  	 */
  	switch (ghes->Notify.Type) {
  	case ACPI_HEST_NOTIFY_POLLED:
 +		if (ghes->Notify.PollInterval =3D=3D 0) /* paranoia */
 +			break;
  		callout_halt(&src->as_ch, NULL);
  		callout_destroy(&src->as_ch);
  		break;
 @@ -583,6 +587,8 @@ apei_hest_attach_ghes_v2(struct apei_sof
  	 */
  	switch (ghes_v2->Notify.Type) {
  	case ACPI_HEST_NOTIFY_POLLED:
 +		if (ghes_v2->Notify.PollInterval =3D=3D 0) /* paranoia */
 +			break;
  		callout_init(&src->as_ch, CALLOUT_MPSAFE);
  		callout_setfunc(&src->as_ch, &apei_hest_ghes_v2_poll, src);
  		callout_schedule(&src->as_ch, 0);
 @@ -634,6 +640,8 @@ apei_hest_detach_ghes_v2(struct apei_sof
  	 */
  	switch (ghes_v2->Notify.Type) {
  	case ACPI_HEST_NOTIFY_POLLED:
 +		if (ghes_v2->Notify.PollInterval =3D=3D 0) /* paranoia */
 +			break;
  		callout_halt(&src->as_ch, NULL);
  		callout_destroy(&src->as_ch);
  		break;
 diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/files.pci
 --- a/sys/dev/pci/files.pci	Mon Oct 21 15:57:45 2024 +0000
 +++ b/sys/dev/pci/files.pci	Thu Oct 24 20:08:59 2024 +0000
 @@ -19,6 +19,7 @@ defflag	opt_pciide.h	PCIIDE_CMD064x_DISA
  device	pci {[dev =3D -1], [function =3D -1]}
  attach	pci at pcibus
  file	dev/pci/pci.c			pci			needs-flag
 +file	dev/pci/pci_error.c		pci
  file	dev/pci/pci_map.c		pci
  file	dev/pci/pci_quirks.c		pci
  file	dev/pci/pci_resource.c		pci & pci_resource
 diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/pci_error.c
 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
 +++ b/sys/dev/pci/pci_error.c	Thu Oct 24 20:08:59 2024 +0000
 @@ -0,0 +1,257 @@
 +/*	$NetBSD$	*/
 +
 +/*-
 + * Copyright (c) 2024 The NetBSD Foundation, Inc.
 + * All rights reserved.
 + *
 + * Redistribution and use in source and binary forms, with or without
 + * modification, are permitted provided that the following conditions
 + * are met:
 + * 1. Redistributions of source code must retain the above copyright
 + *    notice, this list of conditions and the following disclaimer.
 + * 2. Redistributions in binary form must reproduce the above copyright
 + *    notice, this list of conditions and the following disclaimer in the
 + *    documentation and/or other materials provided with the distribution.
 + *
 + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
 RS
 + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
 ITED
 + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
 LAR
 + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
 RS
 + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
 THE
 + * POSSIBILITY OF SUCH DAMAGE.
 + */
 +
 +/*
 + * PCI error reporting
 + */
 +
 +#include <sys/cdefs.h>
 +__KERNEL_RCSID(0, "$NetBSD$");
 +
 +#include <dev/acpi/apei_cper.h>	/* XXX not APEI- or even ACPI-specific */
 +#include <dev/pci/pci_error.h>
 +#include <dev/pci/pcireg.h>
 +#include <dev/pci/pcivar.h>
 +
 +static int
 +pci_cper_match(void *cookie, const struct pci_attach_args *pa)
 +{
 +	const struct cper_pcie_error *PE =3D cookie;
 +
 +	if (le16dec(PE->DeviceID.Segment) !=3D pci_get_segment(pa->pa_pc))
 +		return 0;
 +	if (PE->DeviceID.PrimaryBus !=3D pa->pa_bus)
 +		return 0;
 +	if (PE->DeviceID.Device !=3D pa->pa_device)
 +		return 0;
 +	if (PE->DeviceID.Function !=3D pa->pa_function)
 +		return 0;
 +
 +	return 1;
 +}
 +
 +/*
 + * pci_cper_error(PE)
 + *
 + *	Act on notification of a PCI error report via Common Platform
 + *	Error Record.
 + */
 +void
 +pci_cper_error(const struct cper_pcie_error *PE)
 +{
 +	struct pci_attach_args pa;
 +
 +	/*
 +	 * If there's no device ID, nothing for us to do.
 +	 *
 +	 * XXX Report this back to the caller?
 +	 */
 +	if ((PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) =3D=3D 0)
 +		return;
 +
 +	/*
 +	 * Find a matching device.  If none, do nothing -- we can't do
 +	 * anything to acknowledge this.
 +	 */
 +	if (!pci_find_device1(&pa, pci_cper_match, __UNCONST(PE))) {
 +		char devbuf[sizeof "0000:00:00.000"];
 +
 +		snprintf(devbuf, sizeof(devbuf), "PCI %04x:%02x:%02x.%u",
 +		    le16dec(PE->DeviceID.Segment),
 +		    PE->DeviceID.PrimaryBus,
 +		    PE->DeviceID.Device,
 +		    PE->DeviceID.Function);
 +		aprint_debug("%s: hardware error in unknown device\n", devbuf);
 +		return;
 +	}
 +
 +	/*
 +	 * Handle via the pci_attach_args that we now have.
 +	 */
 +	pci_error(&pa);
 +}
 +
 +/*
 + * pci_error(pa)
 + *
 + *	Check for, report, and acknowledge any errors in the PCI device
 + *	described by pa.
 + */
 +void
 +pci_error(const struct pci_attach_args *pa)
 +{
 +	char devbuf[sizeof "0000:00:00.000"];
 +	const pci_chipset_tag_t pc =3D pa->pa_pc;
 +	const pcitag_t tag =3D pa->pa_tag;
 +	pcireg_t aer, pcie;
 +	char bitbuf[1024];
 +
 +	snprintf(devbuf, sizeof(devbuf), "PCI %04x:%02x:%02x.%u",
 +	    pci_get_segment(pa->pa_pc),
 +	    pa->pa_bus, pa->pa_device, pa->pa_function);
 +
 +	/*
 +	 * If we have Advanced Error Reporting capability, read and
 +	 * write back any uncorrectable or corrected error status.
 +	 */
 +	if (pci_get_ext_capability(pc, tag, PCI_EXTCAP_AER, &aer, NULL)) {
 +		pcireg_t uc_status, uc_mask, uc_sev;
 +		pcireg_t control;
 +		pcireg_t cor_status, cor_mask;
 +
 +		/*
 +		 * Read the status, mask, severity, and control (which
 +		 * has the number of the first error bit).
 +		 */
 +		uc_status =3D pci_conf_read(pc, tag, aer + PCI_AER_UC_STATUS);
 +		uc_mask =3D pci_conf_read(pc, tag, aer + PCI_AER_UC_MASK);
 +		uc_sev =3D pci_conf_read(pc, tag, aer + PCI_AER_UC_SEVERITY);
 +
 +		cor_status =3D pci_conf_read(pc, tag, aer + PCI_AER_COR_STATUS);
 +		cor_mask =3D pci_conf_read(pc, tag, aer + PCI_AER_COR_MASK);
 +
 +		control =3D pci_conf_read(pc, tag, aer + PCI_AER_CAP_CONTROL);
 +
 +		/*
 +		 * Acknowledge error status bits.
 +		 */
 +		pci_conf_write(pc, tag, aer + PCI_AER_UC_STATUS, uc_status);
 +		pci_conf_write(pc, tag, aer + PCI_AER_COR_STATUS, cor_status);
 +
 +			/* XXX move me to pcireg.h */
 +#define	PCI_AER_UC_STATUS_FMT	"\177\020"				      \
 +	"b\000"	"UNDEFINED\0"						      \
 +	"b\004"	"DL_PROTOCOL_ERROR\0"					      \
 +	"b\005"	"SURPRISE_DOWN_ERROR\0"					      \
 +	"b\014"	"POISONED_TLP\0"					      \
 +	"b\015"	"FC_PROTOCOL_ERROR\0"					      \
 +	"b\016"	"COMPLETION_TIMEOUT\0"					      \
 +	"b\017"	"COMPLETION_ABORT\0"					      \
 +	"b\020"	"UNEXPECTED_COMPLETION\0"				      \
 +	"b\021"	"RECEIVER_OVERFLOW\0"					      \
 +	"b\022"	"MALFORMED_TLP\0"					      \
 +	"b\023"	"ECRC_ERROR\0"						      \
 +	"b\024"	"UNSUPPORTED_REQUEST_ERROR\0"				      \
 +	"b\025"	"ACS_VIOLATION\0"					      \
 +	"b\026"	"INTERNAL_ERROR\0"					      \
 +	"b\027"	"MC_BLOCKED_TLP\0"					      \
 +	"b\030"	"ATOMIC_OP_EGRESS_BLOCKED\0"				      \
 +	"b\031"	"TLP_PREFIX_BLOCKED_ERROR\0"				      \
 +	"b\032"	"POISONTLP_EGRESS_BLOCKED\0"				      \
 +	"\0"
 +
 +		/*
 +		 * Report uncorrectable fatal errors.
 +		 */
 +		if ((uc_status & uc_sev) !=3D 0) {
 +			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
 +			    uc_status & uc_sev);
 +			aprint_error("%s: hardware fatal uncorrectable error:"
 +			    " %s (mask=3D0x%"PRIx32")\n",
 +			    devbuf, bitbuf,
 +			    (uint32_t)uc_mask);
 +		}
 +
 +		/*
 +		 * Report uncorrectable non-fatal errors.
 +		 */
 +		if ((uc_status & ~uc_sev) !=3D 0) {
 +			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
 +			    uc_status & ~uc_sev);
 +			aprint_error("%s: hardware uncorrectable error: %s"
 +			    " (mask=3D0x%"PRIx32")\n",
 +			    devbuf, bitbuf,
 +			    (uint32_t)uc_mask);
 +		}
 +
 +		/*
 +		 * Show the first error, if any.
 +		 */
 +		if (uc_status !=3D 0) {
 +			pcireg_t first =3D __SHIFTOUT(control,
 +			    PCI_AER_FIRST_ERROR_PTR);
 +			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
 +			    (uint32_t)1 << first);
 +			aprint_error("%s: hardware first uncorrectable error:"
 +			    " %s\n",
 +			    devbuf, bitbuf);
 +		}
 +
 +		/*
 +		 * Report corrected errors.
 +		 *
 +		 * XXX sysctl knob to suppress this
 +		 */
 +		if (cor_status !=3D 0) {
 +			/* XXX move me to pcireg.h */
 +			snprintb(bitbuf, sizeof(bitbuf), "\177\020"
 +			    "b\000"	"RECEIVER_ERROR\0"
 +			    "b\006"	"BAD_TLP\0"
 +			    "b\007"	"BAD_DLLP\0"
 +			    "b\010"	"REPLAY_NUM_ROLLOVER\0"
 +			    "b\014"	"REPLAY_TIMER_TIMEOUT\0"
 +			    "b\015"	"ADVISORY_NF_ERROR\0"
 +			    "b\016"	"INTERNAL_ERROR\0"
 +			    "b\017"	"HEADER_LOG_OVERFLOW\0"
 +			    "\0", cor_status);
 +			aprint_error("%s: hardware corrected error: %s"
 +			    " (mask=3D0x%"PRIx32")\n",
 +			    devbuf, bitbuf, (uint32_t)cor_mask);
 +		}
 +	}
 +
 +	/*
 +	 * If we have PCIe at all, read and write back any error
 +	 * status.
 +	 */
 +	if (pci_get_capability(pc, tag, PCI_CAP_PCIEXPRESS, &pcie, NULL)) {
 +		pcireg_t dcsr =3D pci_conf_read(pc, tag, pcie + PCIE_DCSR);
 +		uint16_t dsr =3D __SHIFTOUT(dcsr, __BITS(31,16));
 +
 +		/*
 +		 * If any status bits are set, acknowledge all status
 +		 * bits, write back control bits unchanged, and print
 +		 * the status.
 +		 */
 +		if (dsr !=3D 0) {
 +			pci_conf_write(pc, tag, pcie + PCIE_DCSR, dcsr);
 +
 +			/* XXX move me to pcireg.h; note: high half of DCSR */
 +			snprintb(bitbuf, sizeof(bitbuf), "\177\020"
 +			    "b\000"	"CORRECTABLE_ERROR\0"
 +			    "b\001"	"NONFATAL_UNCORRECTABLE_ERROR\0"
 +			    "b\002"	"FATAL_ERROR\0"
 +			    "b\003"	"UNSUPPORTED_REQUEST\0"
 +			    "b\004"	"AUX_POWER\0"
 +			    "b\005"	"TRANSACTIONS_PENDING\0"
 +			    "\0", dsr);
 +			aprint_error("%s: hardware error: DSR=3D%s\n",
 +			    devbuf, bitbuf);
 +		}
 +	}
 +}
 diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/pci_error.h
 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
 +++ b/sys/dev/pci/pci_error.h	Thu Oct 24 20:08:59 2024 +0000
 @@ -0,0 +1,38 @@
 +/*	$NetBSD$	*/
 +
 +/*-
 + * Copyright (c) 2024 The NetBSD Foundation, Inc.
 + * All rights reserved.
 + *
 + * Redistribution and use in source and binary forms, with or without
 + * modification, are permitted provided that the following conditions
 + * are met:
 + * 1. Redistributions of source code must retain the above copyright
 + *    notice, this list of conditions and the following disclaimer.
 + * 2. Redistributions in binary form must reproduce the above copyright
 + *    notice, this list of conditions and the following disclaimer in the
 + *    documentation and/or other materials provided with the distribution.
 + *
 + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
 RS
 + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
 ITED
 + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
 LAR
 + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
 RS
 + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
 THE
 + * POSSIBILITY OF SUCH DAMAGE.
 + */
 +
 +#ifndef	_DEV_PCI_PCI_ERROR_H_
 +#define	_DEV_PCI_PCI_ERROR_H_
 +
 +struct cper_pcie_error;
 +struct pci_attach_args;
 +
 +void pci_cper_error(const struct cper_pcie_error *);
 +void pci_error(const struct pci_attach_args *);
 +
 +#endif	/* _DEV_PCI_PCI_ERROR_H_ */
 
 --=_UO/DuVWULAgwAF6z8mwyycn6j85jhood--
 


Home | Main Index | Thread Index | Old Index