Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/jdolecek-ncq]: src/sys/dev/ic fix logic bug in processing of finished co...



details:   https://anonhg.NetBSD.org/src/rev/197cfa63a18e
branches:  jdolecek-ncq
changeset: 822989:197cfa63a18e
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Tue Aug 01 22:02:32 2017 +0000

description:
fix logic bug in processing of finished commands - mask of active
commands can change during the loop as c_intr() callback can queue
new commands, so the interrupt routine should only mark as finished
those which were actually active before the loop started; otherwise
the code marked as finished commands which were just started, and
being executed by HBA, leading to all sorts of data corruption

while here mark the active mask volatile, as it is modified from
interrupt context

this fixes for good the random crashes, short reads, and fatal command
errors which I've been tracing down for past couple weeks

thanks to Jonathan (jakllsch@) for testing, and a script to easily
triggered the condition, and led to this bug being finally found and squashed

diffstat:

 sys/dev/ic/ahcisata_core.c |  25 +++++++++++++++----------
 sys/dev/ic/ahcisatavar.h   |   4 ++--
 sys/dev/ic/mvsatavar.h     |   4 ++--
 sys/dev/ic/siisata.c       |  11 ++++++++---
 sys/dev/ic/siisatavar.h    |   4 ++--
 5 files changed, 29 insertions(+), 19 deletions(-)

diffs (168 lines):

diff -r 311ed0ed3fef -r 197cfa63a18e sys/dev/ic/ahcisata_core.c
--- a/sys/dev/ic/ahcisata_core.c        Tue Aug 01 21:43:49 2017 +0000
+++ b/sys/dev/ic/ahcisata_core.c        Tue Aug 01 22:02:32 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ahcisata_core.c,v 1.57.6.24 2017/07/29 16:50:32 jdolecek Exp $ */
+/*     $NetBSD: ahcisata_core.c,v 1.57.6.25 2017/08/01 22:02:32 jdolecek Exp $ */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ahcisata_core.c,v 1.57.6.24 2017/07/29 16:50:32 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ahcisata_core.c,v 1.57.6.25 2017/08/01 22:02:32 jdolecek Exp $");
 
 #include <sys/types.h>
 #include <sys/malloc.h>
@@ -559,7 +559,7 @@
 static void
 ahci_intr_port(struct ahci_softc *sc, struct ahci_channel *achp)
 {
-       uint32_t is, tfd, active;
+       uint32_t is, tfd, sact;
        struct ata_channel *chp = &achp->ata_channel;
        struct ata_xfer *xfer;
        int slot;
@@ -578,12 +578,12 @@
 
        if ((chp->ch_flags & ATACH_NCQ) == 0) {
                /* Non-NCQ operation */
-               active = AHCI_READ(sc, AHCI_P_CI(chp->ch_channel));
+               sact = AHCI_READ(sc, AHCI_P_CI(chp->ch_channel));
                slot = (AHCI_READ(sc, AHCI_P_CMD(chp->ch_channel))
                        & AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT;
        } else {
                /* NCQ operation */
-               active = AHCI_READ(sc, AHCI_P_SACT(chp->ch_channel));
+               sact = AHCI_READ(sc, AHCI_P_SACT(chp->ch_channel));
                slot = -1;
        }
 
@@ -595,7 +595,7 @@
                        tfd = AHCI_READ(sc, AHCI_P_TFD(chp->ch_channel));
 
                        aprint_error("%s port %d: active %x is 0x%x tfd 0x%x\n",
-                           AHCINAME(sc), chp->ch_channel, active, is, tfd);
+                           AHCINAME(sc), chp->ch_channel, sact, is, tfd);
                } else {
                        /* mark an error, and set BSY */
                        tfd = (WDCE_ABRT << AHCI_P_TFD_ERR_SHIFT) |
@@ -630,8 +630,8 @@
                ata_channel_freeze(chp);
 
        if (slot >= 0) {
-               if ((achp->ahcic_cmds_active & (1 << slot)) != 0 &&
-                   (active & (1 << slot)) == 0) {
+               if ((achp->ahcic_cmds_active & __BIT(slot)) != 0 &&
+                   (sact & __BIT(slot)) == 0) {
                        xfer = ata_queue_hwslot_to_xfer(chp, slot);
                        xfer->c_intr(chp, xfer, tfd);
                }
@@ -641,10 +641,15 @@
                 * and any further D2H FISes are ignored until the error
                 * condition is cleared. Hence if a command is inactive,
                 * it means it actually already finished successfully.
+                * Note: active slots can change as c_intr() callback
+                * can activate another command(s), so must only process
+                * commands active before we start processing.
                 */
+               uint32_t aslots = achp->ahcic_cmds_active;
+
                for (slot=0; slot < sc->sc_ncmds; slot++) {
-                       if ((achp->ahcic_cmds_active & (1 << slot)) != 0 &&
-                           (active & (1 << slot)) == 0) {
+                       if ((aslots & __BIT(slot)) != 0 &&
+                           (sact & __BIT(slot)) == 0) {
                                xfer = ata_queue_hwslot_to_xfer(chp, slot);
                                xfer->c_intr(chp, xfer, 0);
                        }
diff -r 311ed0ed3fef -r 197cfa63a18e sys/dev/ic/ahcisatavar.h
--- a/sys/dev/ic/ahcisatavar.h  Tue Aug 01 21:43:49 2017 +0000
+++ b/sys/dev/ic/ahcisatavar.h  Tue Aug 01 22:02:32 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ahcisatavar.h,v 1.17.6.2 2017/07/29 15:07:46 jdolecek Exp $    */
+/*     $NetBSD: ahcisatavar.h,v 1.17.6.3 2017/08/01 22:02:32 jdolecek Exp $    */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -82,7 +82,7 @@
                struct ahci_cmd_tbl *ahcic_cmd_tbl[AHCI_MAX_CMDS];
                bus_addr_t ahcic_bus_cmd_tbl[AHCI_MAX_CMDS];
                bus_dmamap_t ahcic_datad[AHCI_MAX_CMDS];
-               uint32_t  ahcic_cmds_active;    /* active commands */
+               volatile uint32_t  ahcic_cmds_active;   /* active commands */
                uint32_t  ahcic_cmds_hold;      /* held commands */
                bool ahcic_recovering;
        } sc_channels[AHCI_MAX_PORTS];
diff -r 311ed0ed3fef -r 197cfa63a18e sys/dev/ic/mvsatavar.h
--- a/sys/dev/ic/mvsatavar.h    Tue Aug 01 21:43:49 2017 +0000
+++ b/sys/dev/ic/mvsatavar.h    Tue Aug 01 22:02:32 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: mvsatavar.h,v 1.2.48.2 2017/06/24 14:33:06 jdolecek Exp $      */
+/*     $NetBSD: mvsatavar.h,v 1.2.48.3 2017/08/01 22:02:32 jdolecek Exp $      */
 /*
  * Copyright (c) 2008 KIYOHARA Takashi
  * All rights reserved.
@@ -73,7 +73,7 @@
        enum mvsata_edmamode port_edmamode_negotiated;
        enum mvsata_edmamode port_edmamode_curr;
 
-       uint32_t port_quetagidx;        /* Host Queue Tag valiable */
+       volatile uint32_t port_quetagidx;       /* Host Queue Tag valid */
 
        int port_prev_erqqop;           /* previous Req Queue Out-Pointer */
        bus_dma_tag_t port_dmat;
diff -r 311ed0ed3fef -r 197cfa63a18e sys/dev/ic/siisata.c
--- a/sys/dev/ic/siisata.c      Tue Aug 01 21:43:49 2017 +0000
+++ b/sys/dev/ic/siisata.c      Tue Aug 01 22:02:32 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: siisata.c,v 1.30.4.30 2017/08/01 21:43:49 jdolecek Exp $ */
+/* $NetBSD: siisata.c,v 1.30.4.31 2017/08/01 22:02:32 jdolecek Exp $ */
 
 /* from ahcisata_core.c */
 
@@ -79,7 +79,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: siisata.c,v 1.30.4.30 2017/08/01 21:43:49 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: siisata.c,v 1.30.4.31 2017/08/01 22:02:32 jdolecek Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -560,9 +560,14 @@
                 * and any further D2H FISes are ignored until the error
                 * condition is cleared. Hence if a command is inactive,
                 * it means it actually already finished successfully.
+                * Note: active slots can change as c_intr() callback
+                * can activate another command(s), so must only process
+                * commands active before we start processing.
                 */
+               uint32_t aslots = schp->sch_active_slots;
+
                for (int slot=0; slot < SIISATA_MAX_SLOTS; slot++) {
-                       if ((schp->sch_active_slots & __BIT(slot)) != 0 &&
+                       if ((aslots & __BIT(slot)) != 0 &&
                            (pss & PR_PXSS(slot)) == 0) {
                                xfer = ata_queue_hwslot_to_xfer(chp, slot);
                                xfer->c_intr(chp, xfer, 0);
diff -r 311ed0ed3fef -r 197cfa63a18e sys/dev/ic/siisatavar.h
--- a/sys/dev/ic/siisatavar.h   Tue Aug 01 21:43:49 2017 +0000
+++ b/sys/dev/ic/siisatavar.h   Tue Aug 01 22:02:32 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: siisatavar.h,v 1.6.48.2 2017/07/19 20:03:29 jdolecek Exp $ */
+/* $NetBSD: siisatavar.h,v 1.6.48.3 2017/08/01 22:02:32 jdolecek Exp $ */
 
 /* from ahcisatavar.h */
 
@@ -100,7 +100,7 @@
 
                bus_dmamap_t sch_datad[SIISATA_MAX_SLOTS];
 
-               uint32_t sch_active_slots;
+               volatile uint32_t sch_active_slots;
                uint32_t sch_hold_slots;
                bool sch_recovering;
        } sc_channels[SIISATA_MAX_PORTS];



Home | Main Index | Thread Index | Old Index