Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/jdolecek-ncq]: src/sys/dev/ic update error handling:



details:   https://anonhg.NetBSD.org/src/rev/ade954f5b86d
branches:  jdolecek-ncq
changeset: 822963:ade954f5b86d
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Wed Jul 19 20:21:42 2017 +0000

description:
update error handling:
- switch to ata_timeout()
- stop using ch_status/ch_error for passing state/error, stop setting
  ATACH_IRQ_WAIT in ch_flags; pass the state via the last parameter
  to c_intr() routine
- add NCQ recovery and KILL_REQUEUE
- only call atastart() in c_intr() if there was no error

ahcisata-specific tweaks:
- add some handling for PM in the error recovery using FBS register,
  according to spec it should be independant of actual FBSS feature; untested
  as my hw doesn't support PM

diffstat:

 sys/dev/ic/ahcisata_core.c |  307 +++++++++++++++++++++++++++++---------------
 sys/dev/ic/ahcisatareg.h   |   17 ++-
 sys/dev/ic/ahcisatavar.h   |    6 +-
 3 files changed, 223 insertions(+), 107 deletions(-)

diffs (truncated from 707 to 300 lines):

diff -r 36074f73c2f0 -r ade954f5b86d sys/dev/ic/ahcisata_core.c
--- a/sys/dev/ic/ahcisata_core.c        Wed Jul 19 20:03:29 2017 +0000
+++ b/sys/dev/ic/ahcisata_core.c        Wed Jul 19 20:21:42 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ahcisata_core.c,v 1.57.6.17 2017/06/27 18:36:03 jdolecek Exp $ */
+/*     $NetBSD: ahcisata_core.c,v 1.57.6.18 2017/07/19 20:21:42 jdolecek Exp $ */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ahcisata_core.c,v 1.57.6.17 2017/06/27 18:36:03 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ahcisata_core.c,v 1.57.6.18 2017/07/19 20:21:42 jdolecek Exp $");
 
 #include <sys/types.h>
 #include <sys/malloc.h>
@@ -68,7 +68,7 @@
 
 static void ahci_cmd_start(struct ata_channel *, struct ata_xfer *);
 static int  ahci_cmd_complete(struct ata_channel *, struct ata_xfer *, int);
-static void ahci_cmd_done(struct ata_channel *, struct ata_xfer *);
+static void ahci_cmd_done(struct ata_channel *, struct ata_xfer *, int);
 static void ahci_cmd_done_end(struct ata_channel *, struct ata_xfer *);
 static void ahci_cmd_kill_xfer(struct ata_channel *, struct ata_xfer *, int);
 static void ahci_bio_start(struct ata_channel *, struct ata_xfer *);
@@ -77,7 +77,7 @@
 static void ahci_channel_stop(struct ahci_softc *, struct ata_channel *, int);
 static void ahci_channel_start(struct ahci_softc *, struct ata_channel *,
                                int, int);
-static void ahci_timeout(void *);
+static void ahci_channel_recover(struct ahci_softc *, struct ata_channel *);
 static int  ahci_dma_setup(struct ata_channel *, int, void *, size_t, int);
 
 #if NATAPIBUS > 0
@@ -566,8 +566,11 @@
 
        is = AHCI_READ(sc, AHCI_P_IS(chp->ch_channel));
        AHCI_WRITE(sc, AHCI_P_IS(chp->ch_channel), is);
-       AHCIDEBUG_PRINT(("ahci_intr_port %s port %d is 0x%x CI 0x%x\n", AHCINAME(sc),
-           chp->ch_channel, is, AHCI_READ(sc, AHCI_P_CI(chp->ch_channel))),
+       AHCIDEBUG_PRINT(("ahci_intr_port %s port %d is 0x%x CI 0x%x TFD 0x%x\n",
+           AHCINAME(sc),
+           chp->ch_channel, is,
+           AHCI_READ(sc, AHCI_P_CI(chp->ch_channel)),
+           AHCI_READ(sc, AHCI_P_TFD(chp->ch_channel))),
            DEBUG_INTR);
 
        active = AHCI_READ(sc, AHCI_P_CI(chp->ch_channel))
@@ -584,38 +587,50 @@
        }
 
        /* Handle errors */
-       if (is & (AHCI_P_IX_TFES | AHCI_P_IX_HBFS | AHCI_P_IX_IFS |
-           AHCI_P_IX_OFS | AHCI_P_IX_UFS)) {
-               /* stop channel */
-               ahci_channel_stop(sc, chp, 0);
+       if (is & (AHCI_P_IX_TFES | AHCI_P_IX_HBFS | AHCI_P_IX_HBDS |
+           AHCI_P_IX_IFS | AHCI_P_IX_OFS | AHCI_P_IX_UFS)) {
                if (is & AHCI_P_IX_TFES) {
                        tfd = AHCI_READ(sc, AHCI_P_TFD(chp->ch_channel));
-                       chp->ch_error =
-                           (tfd & AHCI_P_TFD_ERR_MASK) >> AHCI_P_TFD_ERR_SHIFT;
-                       chp->ch_status = (tfd & 0xff);
+
+                       if ((AHCI_TFD_ST(tfd) & (WDCS_DRQ|WDCS_BSY)) == 0 &&
+                           !achp->ahcic_recovering)
+                               goto recover;
+
+                       aprint_error("%s port %d: active %x is 0x%x tfd 0x%x\n",
+                           AHCINAME(sc), chp->ch_channel, active, is, tfd);
                } else {
                        /* emulate a CRC error */
-                       chp->ch_error = WDCE_CRC;
-                       chp->ch_status = WDCS_ERR;
+                       tfd = (WDCE_CRC << AHCI_P_TFD_ERR_SHIFT) | WDCS_ERR;
                }
+
                if (is & AHCI_P_IX_IFS) {
                        aprint_error("%s port %d: SERR 0x%x\n",
                            AHCINAME(sc), chp->ch_channel,
                            AHCI_READ(sc, AHCI_P_SERR(chp->ch_channel)));
                }
-               /* complete all pending commands with error statuses */
-               for (slot=0; slot < sc->sc_ncmds; slot++) {
-                       if ((achp->ahcic_cmds_active & (1 << slot)) == 0)
-                               continue;
-                       if ((active & (1 << slot)) == 1) {
-                               xfer = ata_queue_hwslot_to_xfer(chp, slot);
-                               xfer->c_intr(chp, xfer, is);
-                       }
+
+               /* Request channel reset */
+               ata_reset_channel(chp, 0);
+               return;
+       } else if (is & (AHCI_P_IX_DHRS|AHCI_P_IX_SDBS)) {
+               /* D2H Register FIS or Set Device Bits */
+               tfd = AHCI_READ(sc, AHCI_P_TFD(chp->ch_channel));
+               if ((tfd & WDCS_ERR) != 0 && !achp->ahcic_recovering) {
+recover:
+                       aprint_error("%s port %d: transfer aborted 0x%x\n",
+                           AHCINAME(sc), chp->ch_channel, tfd);
+
+                       /*
+                        * Device indicated transfer aborted without any fatal
+                        * error. Transfers can be reissued after resetting
+                        * CMD.ST. Need to execute READ LOG EXT for NCQ
+                        * commands.
+                        */
+                       ahci_channel_stop(sc, chp, AT_POLL);
+                       ahci_channel_start(sc, chp, AT_POLL,
+                           (sc->sc_ahci_cap & AHCI_CAP_CLO) ? 1 : 0);
+                       ahci_channel_recover(sc, chp);
                }
-               /* if channel has not been restarted, do it now */
-               if ((AHCI_READ(sc, AHCI_P_CMD(chp->ch_channel)) & AHCI_P_CMD_CR)
-                   == 0)
-                       ahci_channel_start(sc, chp, 0, 0);
        }
 }
 
@@ -659,7 +674,8 @@
                if ((AHCI_READ(sc, AHCI_P_CI(chp->ch_channel)) & 1 << 0) == 0)
                        return 0;
                is = AHCI_READ(sc, AHCI_P_IS(chp->ch_channel));
-               if (is & (AHCI_P_IX_TFES | AHCI_P_IX_HBFS | AHCI_P_IX_IFS |
+               if (is & (AHCI_P_IX_TFES | AHCI_P_IX_HBFS | AHCI_P_IX_HBDS |
+                   AHCI_P_IX_IFS |
                    AHCI_P_IX_OFS | AHCI_P_IX_UFS)) {
                        if ((is & (AHCI_P_IX_DHRS|AHCI_P_IX_TFES)) ==
                            (AHCI_P_IX_DHRS|AHCI_P_IX_TFES)) {
@@ -827,8 +843,7 @@
        /* wait 31s for BSY to clear */
        for (i = 0; i <AHCI_RST_WAIT; i++) {
                tfd = AHCI_READ(sc, AHCI_P_TFD(chp->ch_channel));
-               if ((((tfd & AHCI_P_TFD_ST) >> AHCI_P_TFD_ST_SHIFT)
-                   & WDCS_BSY) == 0)
+               if ((AHCI_TFD_ST(tfd) & WDCS_BSY) == 0)
                        break;
                ata_delay(10, "ahcid2h", flags);
        }
@@ -895,7 +910,8 @@
                
                /* and enable interrupts */
                AHCI_WRITE(sc, AHCI_P_IE(chp->ch_channel),
-                   AHCI_P_IX_TFES | AHCI_P_IX_HBFS | AHCI_P_IX_IFS |
+                   AHCI_P_IX_TFES | AHCI_P_IX_HBFS | AHCI_P_IX_HBDS |
+                   AHCI_P_IX_IFS |
                    AHCI_P_IX_OFS | AHCI_P_IX_DPS | AHCI_P_IX_UFS |
                    AHCI_P_IX_PSS | AHCI_P_IX_DHRS | AHCI_P_IX_SDBS);
                /* wait 500ms before actually starting operations */
@@ -992,7 +1008,7 @@
            ata_c->bcount,
            (ata_c->flags & AT_READ) ? BUS_DMA_READ : BUS_DMA_WRITE)) {
                ata_c->flags |= AT_DF;
-               ahci_cmd_complete(chp, xfer, slot);
+               xfer->c_intr(chp, xfer, 0);
                return;
        }
        cmd_h->cmdh_flags = htole16(
@@ -1007,17 +1023,14 @@
                AHCI_WRITE(sc, AHCI_GHC,
                    AHCI_READ(sc, AHCI_GHC) & ~AHCI_GHC_IE);
        }
-       chp->ch_flags |= ATACH_IRQ_WAIT;
-       chp->ch_status = 0;
        /* start command */
        AHCI_WRITE(sc, AHCI_P_CI(chp->ch_channel), 1 << slot);
        /* and says we started this command */
        achp->ahcic_cmds_active |= 1 << slot;
 
        if ((ata_c->flags & AT_POLL) == 0) {
-               chp->ch_flags |= ATACH_IRQ_WAIT; /* wait for interrupt */
                callout_reset(&xfer->c_timo_callout, mstohz(ata_c->timeout),
-                   ahci_timeout, xfer);
+                   ata_timeout, xfer);
                return;
        }
        /*
@@ -1037,7 +1050,7 @@
            DEBUG_XFERS);
        if ((ata_c->flags & AT_DONE) == 0) {
                ata_c->flags |= AT_TIMEOU;
-               ahci_cmd_complete(chp, xfer, slot);
+               xfer->c_intr(chp, xfer, 0);
        }
        /* reenable interrupts */
        AHCI_WRITE(sc, AHCI_GHC, AHCI_READ(sc, AHCI_GHC) | AHCI_GHC_IE);
@@ -1063,12 +1076,16 @@
        case KILL_RESET:
                ata_c->flags |= AT_RESET;
                break;
+       case KILL_REQUEUE:
+               panic("%s: not supposed to be requeued\n", __func__);
+               break;
        default:
                printf("ahci_cmd_kill_xfer: unknown reason %d\n", reason);
                panic("ahci_cmd_kill_xfer");
        }
 
        if (deactivate) {
+               KASSERT((achp->ahcic_cmds_active & (1 << xfer->c_slot)) != 0);
                achp->ahcic_cmds_active &= ~(1 << xfer->c_slot);
                ata_deactivate_xfer(chp, xfer);
        }
@@ -1077,7 +1094,7 @@
 }
 
 static int
-ahci_cmd_complete(struct ata_channel *chp, struct ata_xfer *xfer, int is)
+ahci_cmd_complete(struct ata_channel *chp, struct ata_xfer *xfer, int tfd)
 {
        struct ata_command *ata_c = &xfer->c_ata_c;
        struct ahci_softc *sc = (struct ahci_softc *)chp->ch_atac;
@@ -1091,30 +1108,30 @@
        if (ata_waitdrain_xfer_check(chp, xfer))
                return 0;
 
+       KASSERT((achp->ahcic_cmds_active & (1 << xfer->c_slot)) != 0);
        achp->ahcic_cmds_active &= ~(1 << xfer->c_slot);
        ata_deactivate_xfer(chp, xfer);
 
-       chp->ch_flags &= ~ATACH_IRQ_WAIT;
        if (xfer->c_flags & C_TIMEOU) {
                ata_c->flags |= AT_TIMEOU;
        }
 
-       if (chp->ch_status & WDCS_BSY) {
+       if (AHCI_TFD_ERR(tfd) & WDCS_BSY) {
                ata_c->flags |= AT_TIMEOU;
-       } else if (chp->ch_status & WDCS_ERR) {
-               ata_c->r_error = chp->ch_error;
+       } else if (AHCI_TFD_ST(tfd) & WDCS_ERR) {
+               ata_c->r_error = AHCI_TFD_ERR(tfd);
                ata_c->flags |= AT_ERROR;
        }
 
        if (ata_c->flags & AT_READREG)
                satafis_rdh_cmd_readreg(ata_c, achp->ahcic_rfis->rfis_rfis);
 
-       ahci_cmd_done(chp, xfer);
+       ahci_cmd_done(chp, xfer, tfd);
        return 0;
 }
 
 static void
-ahci_cmd_done(struct ata_channel *chp, struct ata_xfer *xfer)
+ahci_cmd_done(struct ata_channel *chp, struct ata_xfer *xfer, int tfd)
 {
        struct ahci_softc *sc = (struct ahci_softc *)chp->ch_atac;
        struct ahci_channel *achp = (struct ahci_channel *)chp;
@@ -1122,8 +1139,8 @@
        uint16_t *idwordbuf;
        int i;
 
-       AHCIDEBUG_PRINT(("ahci_cmd_done channel %d (status %#x) flags %#x/%#x\n",
-           chp->ch_channel, chp->ch_status, xfer->c_flags, ata_c->flags), DEBUG_FUNCS);
+       AHCIDEBUG_PRINT(("ahci_cmd_done channel %d flags %#x/%#x\n",
+           chp->ch_channel, xfer->c_flags, ata_c->flags), DEBUG_FUNCS);
 
        if (ata_c->flags & (AT_READ|AT_WRITE) && ata_c->bcount > 0) {
                bus_dmamap_t map = achp->ahcic_datad[xfer->c_slot];
@@ -1147,9 +1164,9 @@
 
        if (achp->ahcic_cmdh[xfer->c_slot].cmdh_prdbc)
                ata_c->flags |= AT_XFDONE;
-
        ahci_cmd_done_end(chp, xfer);
-       atastart(chp);
+       if ((AHCI_TFD_ST(tfd) & WDCS_ERR) == 0)
+               atastart(chp);
 }
 
 static void
@@ -1214,7 +1231,7 @@
            (ata_bio->flags & ATA_READ) ? BUS_DMA_READ : BUS_DMA_WRITE)) {
                ata_bio->error = ERR_DMA;
                ata_bio->r_error = 0;
-               ahci_bio_complete(chp, xfer, 0);
+               xfer->c_intr(chp, xfer, 0);
                return;
        }
        cmd_h->cmdh_flags = htole16(
@@ -1229,8 +1246,6 @@
                AHCI_WRITE(sc, AHCI_GHC,
                    AHCI_READ(sc, AHCI_GHC) & ~AHCI_GHC_IE);
        }
-       chp->ch_flags |= ATACH_IRQ_WAIT;
-       chp->ch_status = 0;
        if (xfer->c_flags & C_NCQ)
                AHCI_WRITE(sc, AHCI_P_SACT(chp->ch_channel), 1 << xfer->c_slot);
        /* start command */
@@ -1239,9 +1254,8 @@
        achp->ahcic_cmds_active |= 1 << xfer->c_slot;
 
        if ((xfer->c_flags & C_POLL) == 0) {
-               chp->ch_flags |= ATACH_IRQ_WAIT; /* wait for interrupt */
                callout_reset(&xfer->c_timo_callout, mstohz(ATA_DELAY),
-                   ahci_timeout, xfer);



Home | Main Index | Thread Index | Old Index