Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/ata fix race in wd_lastclose() on systems with two i...



details:   https://anonhg.NetBSD.org/src/rev/58b15911847d
branches:  trunk
changeset: 834368:58b15911847d
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Fri Aug 10 22:43:22 2018 +0000

description:
fix race in wd_lastclose() on systems with two ide disks on same
channel, which happened when one disk had pending I/O while the other
disk executed the final disk flush - need to restart bufq processing
once xfer is freed in this case

it could happen e.g. on boot when system executes fsck on different
partitions on the two drives in parallell and hence open and closes
the disk devices repeatedly

add KASSERT() for empty bufq on wd_lastclose(), and fix similar issue
also on suspend/standby path

this was introduced by the NCQ merge and not dksubr - before the merge
each drive had their own xfer, so they could not block each other

fixes PR kern/52783 by Onno van der Linden; many thanks for extensive
help with tracking this down

diffstat:

 sys/dev/ata/ata_subr.c |   9 +++++----
 sys/dev/ata/atavar.h   |   4 ++--
 sys/dev/ata/wd.c       |  29 ++++++++++++++++++-----------
 3 files changed, 25 insertions(+), 17 deletions(-)

diffs (151 lines):

diff -r abdef7232499 -r 58b15911847d sys/dev/ata/ata_subr.c
--- a/sys/dev/ata/ata_subr.c    Fri Aug 10 22:34:36 2018 +0000
+++ b/sys/dev/ata/ata_subr.c    Fri Aug 10 22:43:22 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ata_subr.c,v 1.5 2018/08/06 20:07:05 jdolecek Exp $    */
+/*     $NetBSD: ata_subr.c,v 1.6 2018/08/10 22:43:22 jdolecek Exp $    */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ata_subr.c,v 1.5 2018/08/06 20:07:05 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ata_subr.c,v 1.6 2018/08/10 22:43:22 jdolecek Exp $");
 
 #include "opt_ata.h"
 
@@ -377,7 +377,7 @@
  * released.
  */ 
 void
-ata_channel_start(struct ata_channel *chp, int drive)
+ata_channel_start(struct ata_channel *chp, int drive, bool start_self)
 {
        int i, s;
        struct ata_drive_datas *drvp;
@@ -413,7 +413,8 @@
        }
 
        /* Now try to kick off xfers on the current drive */
-       ATA_DRIVE_START(chp, drive);
+       if (start_self)
+               ATA_DRIVE_START(chp, drive);
 
        splx(s);
 #undef ATA_DRIVE_START
diff -r abdef7232499 -r 58b15911847d sys/dev/ata/atavar.h
--- a/sys/dev/ata/atavar.h      Fri Aug 10 22:34:36 2018 +0000
+++ b/sys/dev/ata/atavar.h      Fri Aug 10 22:43:22 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: atavar.h,v 1.98 2018/08/06 20:07:05 jdolecek Exp $     */
+/*     $NetBSD: atavar.h,v 1.99 2018/08/10 22:43:22 jdolecek Exp $     */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -538,7 +538,7 @@
 void   ata_reset_channel(struct ata_channel *, int);
 void   ata_channel_freeze(struct ata_channel *);
 void   ata_channel_thaw(struct ata_channel *);
-void   ata_channel_start(struct ata_channel *, int);
+void   ata_channel_start(struct ata_channel *, int, bool);
 void   ata_channel_lock(struct ata_channel *);
 void   ata_channel_unlock(struct ata_channel *);
 void   ata_channel_lock_owned(struct ata_channel *);
diff -r abdef7232499 -r 58b15911847d sys/dev/ata/wd.c
--- a/sys/dev/ata/wd.c  Fri Aug 10 22:34:36 2018 +0000
+++ b/sys/dev/ata/wd.c  Fri Aug 10 22:43:22 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: wd.c,v 1.440 2018/08/06 20:07:05 jdolecek Exp $ */
+/*     $NetBSD: wd.c,v 1.441 2018/08/10 22:43:22 jdolecek Exp $ */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -54,7 +54,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.440 2018/08/06 20:07:05 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.441 2018/08/10 22:43:22 jdolecek Exp $");
 
 #include "opt_ata.h"
 #include "opt_wd.h"
@@ -917,7 +917,7 @@
        ata_free_xfer(wd->drvp->chnl_softc, xfer);
 
        dk_done(dksc, bp);
-       ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
+       ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, true);
 }
 
 static void
@@ -1072,6 +1072,8 @@
 {
        struct wd_softc *wd = device_private(self);
 
+       KASSERTMSG(bufq_peek(wd->sc_dksc.sc_bufq) == NULL, "bufq not empty");
+
        wd_flushcache(wd, AT_WAIT, false);
 
        wd->atabus->ata_delref(wd->drvp);
@@ -1667,7 +1669,7 @@
 
 out:
        ata_free_xfer(wd->drvp->chnl_softc, xfer);
-       ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
+       ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, true);
        return error;
 }
 
@@ -1711,12 +1713,18 @@
 
 out:
        ata_free_xfer(wd->drvp->chnl_softc, xfer);
-       /* drive is supposed to go idle, do not call ata_channel_start() */
+
+       /*
+        * Drive is supposed to go idle, start only other drives.
+        * bufq might be actually already freed at this moment.
+        */
+       ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, false);
+
        return error;
 }
 
 int
-wd_flushcache(struct wd_softc *wd, int flags, bool start)
+wd_flushcache(struct wd_softc *wd, int flags, bool start_self)
 {
        struct dk_softc *dksc = &wd->sc_dksc;
        struct ata_xfer *xfer;
@@ -1783,9 +1791,8 @@
        ata_free_xfer(wd->drvp->chnl_softc, xfer);
 
 out:
-       /* kick queue processing blocked while waiting for flush xfer */
-       if (start)
-               ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
+       /* start again I/O processing possibly stopped due to no xfer */
+       ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, start_self);
 
        return error;
 }
@@ -1848,7 +1855,7 @@
 
 out:
        ata_free_xfer(wd->drvp->chnl_softc, xfer);
-       ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
+       ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, true);
        return error;
 }
 
@@ -2060,7 +2067,7 @@
 out:
        ata_free_xfer(wi->wi_softc->drvp->chnl_softc, xfer);
        ata_channel_start(wi->wi_softc->drvp->chnl_softc,
-           wi->wi_softc->drvp->drive);
+           wi->wi_softc->drvp->drive, true);
 out2:
        bp->b_error = error;
        if (error)



Home | Main Index | Thread Index | Old Index