Port-amd64 archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: WDCTL_RST



On Fri, Aug 24, 2018 at 04:59:16PM +0100, Patrick Welche wrote:
> I updated the BIOS for an Asus Prime X370-PRO from version 3803 to 4012,
> and now all of the real disks fail with "clearing WDCTL_RST failed".
...
> ahcisata0 channel 3: timeout sending FIS
> autoconfiguration error: ahcisata0 channel 3: clearing WDCTL_RST failed for drive 15
...

I connected a serial console and run a kernel with all the debugging switched
on. I now see:

[   3.5938151] Kernel lock error: _kernel_lock,220: spinout

[   3.6038214] lock address : 0xffffffff81897100 type     :               spin
...

ddb shows:

[Locks tracked through CPUs]
Locks held on CPU 7:
Lock 0 (initialized at main)
lock address : 0xffffffff81897100 type     :               spin
initialized  : 0xffffffff80e326fc
shared holds :                  0 exclusive:                  1
shares wanted:                  0 exclusive:                  7
current cpu  :                 12 last held:                  7
current lwp  : 0xffffe5e85e955860 last held: 0xffffe5e85ebff980
last locked* : 0xffffffff809cc1ed unlocked : 0xffffffff809cf198
[   3.7877692] curcpu holds :                  0 wanted by: 0xffffe5e85e955860
trace: pid 0 lid 116 at 0xffffbb02595dddf0
sleepq_block() at netbsd:sleepq_block+0x91
kpause() at netbsd:kpause+0xed
ahci_exec_fis() at netbsd:ahci_exec_fis+0x126
ahci_do_reset_drive() at netbsd:ahci_do_reset_drive+0x266
ahci_probe_drive() at netbsd:ahci_probe_drive+0x172
atabusconfig() at netbsd:atabusconfig+0x1ff
atabus_thread() at netbsd:atabus_thread+0x83
           
Lock 1 (initialized at ata_channel_init)
lock address : 0xffffbb00413ca6e0 type     :               spin
initialized  : 0xffffffff80281613
shared holds :                  0 exclusive:                  1
shares wanted:                  0 exclusive:                  0
current cpu  :                 12 last held:                  7
current lwp  : 0xffffe5e85e955860 last held: 0xffffe5e85ebff980
last locked* : 0xffffffff809cf1ca unlocked : 0xffffffff809cf12b
[   3.7877692] owner field  : 0x0000000000000600 wait/spin:                0/1
trace: pid 0 lid 116 at 0xffffbb02595dddf0
sleepq_block() at netbsd:sleepq_block+0x91
kpause() at netbsd:kpause+0xed
ahci_exec_fis() at netbsd:ahci_exec_fis+0x126
ahci_do_reset_drive() at netbsd:ahci_do_reset_drive+0x266
ahci_probe_drive() at netbsd:ahci_probe_drive+0x172
atabusconfig() at netbsd:atabusconfig+0x1ff
atabus_thread() at netbsd:atabus_thread+0x83
           
Lock 2 (initialized at kprintf_init)
lock address : 0xffffffff81e2f3c8 type     :               spin
initialized  : 0xffffffff809f8846
shared holds :                  0 exclusive:                  1
shares wanted:                  0 exclusive:                  0
current cpu  :                 12 last held:                  7
current lwp  : 0xffffe5e85e955860 last held: 0xffffe5e85ebff980
last locked* : 0xffffffff809fa69f unlocked : 0xffffffff809fa72d
[   3.7877692] owner field  : 0x0000000000000800 wait/spin:                0/1
trace: pid 0 lid 116 at 0xffffbb02595dddf0
sleepq_block() at netbsd:sleepq_block+0x91
kpause() at netbsd:kpause+0xed
ahci_exec_fis() at netbsd:ahci_exec_fis+0x126
ahci_do_reset_drive() at netbsd:ahci_do_reset_drive+0x266
ahci_probe_drive() at netbsd:ahci_probe_drive+0x172
atabusconfig() at netbsd:atabusconfig+0x1ff
atabus_thread() at netbsd:atabus_thread+0x83
...

Attached is the patch I applied to ahcisata_core.c in case it is my
aprint_debug()s which broke it...

Cheers,

Patrick
Index: ahcisata_core.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/ahcisata_core.c,v
retrieving revision 1.62
diff -u -r1.62 ahcisata_core.c
--- ahcisata_core.c	9 Jul 2018 10:44:44 -0000	1.62
+++ ahcisata_core.c	9 Sep 2018 20:51:13 -0000
@@ -51,7 +51,7 @@
 #include "atapibus.h"
 
 #ifdef AHCI_DEBUG
-int ahcidebug_mask = 0;
+int ahcidebug_mask = 0x0fffffff; /* 0; */
 #endif
 
 static void ahci_probe_drive(struct ata_channel *);
@@ -715,16 +715,23 @@
 	 * If we are allowed to sleep, wait a tick each round.
 	 * Otherwise delay for 10ms on each round.
 	 */
-	if (flags & AT_WAIT)
+	if (flags & AT_WAIT) {
 		timeout = MAX(1, mstohz(timeout));
-	else
+		aprint_debug("%s channel %d: AT_WAIT timeout %d\n",
+			    AHCINAME(sc), chp->ch_channel, timeout);
+	} else {
 		timeout = timeout / 10;
+		aprint_debug("%s channel %d: timeout %d\n",
+			    AHCINAME(sc), chp->ch_channel, timeout);
+	}
 
 	AHCI_CMDH_SYNC(sc, achp, slot,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	/* start command */
 	AHCI_WRITE(sc, AHCI_P_CI(chp->ch_channel), 1U << slot);
 	for (i = 0; i < timeout; i++) {
+		aprint_debug("%s channel %d: timeout loop %d\n",
+			    AHCINAME(sc), chp->ch_channel, i);
 		if ((AHCI_READ(sc, AHCI_P_CI(chp->ch_channel)) & (1U << slot)) ==
 		    0)
 			return 0;
@@ -817,7 +824,7 @@
 	cmd_tbl->cmdt_cfis[fis_type] = RHD_FISTYPE;
 	cmd_tbl->cmdt_cfis[rhd_c] = drive;
 	cmd_tbl->cmdt_cfis[rhd_control] = 0;
-	switch(ahci_exec_fis(chp, 310, flags, xfer->c_slot)) {
+	switch(ahci_exec_fis(chp, 620, flags, xfer->c_slot)) {
 	case ERR_DF:
 	case TIMEOUT:
 		if ((sc->sc_ahci_quirks & AHCI_QUIRK_BADPMPRESET) != 0 &&


Home | Main Index | Thread Index | Old Index