Port-i386 archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: ongoing major problems with NetBSD-5 and LOCKDEBUG on multi-core system (mfi(4) related?)



At Fri, 20 Jan 2012 12:48:32 +0100, Manuel Bouyer 
<bouyer%antioche.eu.org@localhost> wrote:
Subject: Re: ongoing major problems with NetBSD-5 and LOCKDEBUG on multi-core 
system (mfi(4) related?)
> 
> On Wed, Jan 18, 2012 at 08:59:47PM -0800, Greg A. Woods wrote:
> > [...]
> > So if I understand correctly these calls from other subsystems are not
> > in an interrupt context and if I remember correctly tsleep() should be
> > fine in any normal driver ioctl() function (for a non-MPSAFE driver, if
> > indeed its ioctl() function was in the device switch table), but clearly
> > grabbing the KERNEL_LOCK() is the wrong thing to do for this case, at
> > least without releasing it properly before the tsleep() call.
> 
> It's no problem calling tsleep() with the KERNEL_LOCK held: sleeping
> functions will release the lock before sleep, and reaquire it after.

Thanks, I finally got around to re-adding the KERNEL_LOCK() hooks
to mfi.c, but I did it quite differently than the original two
patches which did it, and instead I moved the splbio()/splx() calls into
the function where they're really needed, as was done in OpenBSD, since
that code can, I think, be called by other than bio(4) and
sysmon_envstat(9).  I'll attach my current changes, which include the
missing bug fixes from OpenBSD, as well as my changes to splbio()/splx()


> BTW, I run several dell PE2950 with this code and I don't have
> problems.

I'm more or less certain now that mfi(4) is not the cause of the
problems I have been seeing (the ones first reported in PR# kern/45827)


Are you running kernels from the netbsd-5 branch on any of them?

Kernels with LOCKDEBUG (and DIAGNOSTIC and DEBUG)?


I'm still stuck on how to use GDB to look at the different CPU stack
backtraces.

-- 
                                                Greg A. Woods
                                                Planix, Inc.

<woods%planix.com@localhost>       +1 250 762-7675        http://www.planix.com/

--- mfi.c       12 Apr 2010 10:30:24 -0700      1.19.4.4
+++ mfi.c       19 Jan 2012 18:19:46 -0800      
@@ -23,6 +23,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/mutex.h>
 #include <sys/buf.h>
 #include <sys/ioctl.h>
 #include <sys/device.h>
@@ -91,7 +92,7 @@
                            uint32_t, uint32_t, uint32_t, void *, uint8_t *);
 static void            mfi_mgmt_done(struct mfi_ccb *);
 
-#if NBIO > 0
+#if NBIO > 0 /* XXX this is bogus -- should allow for /dev entries and normal 
ioctl(2) as well!  */
 static int             mfi_ioctl(device_t, u_long, void *);
 static int             mfi_ioctl_inq(struct mfi_softc *, struct bioc_inq *);
 static int             mfi_ioctl_vol(struct mfi_softc *, struct bioc_vol *);
@@ -103,6 +104,7 @@
 static int             mfi_ioctl_setstate(struct mfi_softc *,
                                struct bioc_setstate *);
 static int             mfi_bio_hs(struct mfi_softc *, int, int, void *);
+/* XXX these next only depend on NBIO because without NBIO we have no ioctl() 
functions */
 static int             mfi_create_sensors(struct mfi_softc *);
 static void            mfi_sensor_refresh(struct sysmon_envsys *,
                                envsys_data_t *);
@@ -153,15 +155,14 @@
 mfi_get_ccb(struct mfi_softc *sc)
 {
        struct mfi_ccb          *ccb;
-       int                     s;
 
-       s = splbio();
+       mutex_enter(&sc->sc_ccb_mtx);   /* OpenBSD 1.97 */
        ccb = TAILQ_FIRST(&sc->sc_ccb_freeq);
        if (ccb) {
                TAILQ_REMOVE(&sc->sc_ccb_freeq, ccb, ccb_link);
                ccb->ccb_state = MFI_CCB_READY;
        }
-       splx(s);
+       mutex_exit(&sc->sc_ccb_mtx);    /* OpenBSD 1.97 */
 
        DNPRINTF(MFI_D_CCB, "%s: mfi_get_ccb: %p\n", DEVNAME(sc), ccb);
 
@@ -172,11 +173,9 @@
 mfi_put_ccb(struct mfi_ccb *ccb)
 {
        struct mfi_softc        *sc = ccb->ccb_sc;
-       int                     s;
 
        DNPRINTF(MFI_D_CCB, "%s: mfi_put_ccb: %p\n", DEVNAME(sc), ccb);
 
-       s = splbio();
        ccb->ccb_state = MFI_CCB_FREE;
        ccb->ccb_xs = NULL;
        ccb->ccb_flags = 0;
@@ -187,8 +186,10 @@
        ccb->ccb_sgl = NULL;
        ccb->ccb_data = NULL;
        ccb->ccb_len = 0;
+
+       mutex_enter(&sc->sc_ccb_mtx);   /* OpenBSD 1.97 */
        TAILQ_INSERT_TAIL(&sc->sc_ccb_freeq, ccb, ccb_link);
-       splx(s);
+       mutex_exit(&sc->sc_ccb_mtx);    /* OpenBSD 1.97 */
 }
 
 static int
@@ -640,6 +641,7 @@
                return 1;
 
        TAILQ_INIT(&sc->sc_ccb_freeq);
+       mutex_init(&sc->sc_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); /* OpenBSD 1.97 */
 
        status = mfi_fw_state(sc);
        sc->sc_max_cmds = status & MFI_STATE_MAXCMD_MASK;
@@ -1237,6 +1239,16 @@
                if (mfi_poll(ccb))
                        goto done;
        } else {
+               /*
+                * revision 1.90
+                * date: 2009/03/29 01:02:35;  author: dlg;  state: Exp;  
lines: +3 -0
+                * fix a small race in mfi_mgmt between the checking of a ccbs 
completion and
+                * the sleep waiting for the completion. it is possible to get 
the interrupt
+                * completing the command just before the tsleep, which will 
never get a
+                * wakeup because the interrupt with the wakeup has already 
happened.
+                */
+               int s = splbio();
+
                mfi_post(sc, ccb);
 
                DNPRINTF(MFI_D_MISC, "%s: mfi_mgmt_internal sleeping\n",
@@ -1244,6 +1256,8 @@
                while (ccb->ccb_state != MFI_CCB_DONE)
                        tsleep(ccb, PRIBIO, "mfi_mgmt", 0);
 
+               splx(s);
+
                if (ccb->ccb_flags & MFI_CCB_F_ERR)
                        goto done;
        }
@@ -1336,10 +1350,8 @@
 {
        struct mfi_softc *sc = device_private(dev);
        int error = 0;
-       int s;
 
        KERNEL_LOCK(1, curlwp);
-       s = splbio();
 
        DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl ", DEVNAME(sc));
 
@@ -1378,7 +1390,7 @@
                DNPRINTF(MFI_D_IOCTL, " invalid ioctl\n");
                error = EINVAL;
        }
-       splx(s);
+
        KERNEL_UNLOCK_ONE(curlwp);
 
        DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl return %x\n", DEVNAME(sc), error);
@@ -1428,6 +1440,13 @@
            sizeof(sc->sc_ld_list), &sc->sc_ld_list, NULL))
                goto done;
 
+       /* OpenBSD 1.86 */
+       if (bv->bv_volid >= sc->sc_ld_list.mll_no_ld) {
+               /* go do hotspares */
+               rv = mfi_bio_hs(sc, bv->bv_volid, MFI_MGMT_VD, bv);
+               goto done;
+       }
+
        i = bv->bv_volid;
        mbox[0] = sc->sc_ld_list.mll_list[i].mll_ld.mld_target;
        DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl_vol target %#x\n",
@@ -1437,12 +1456,6 @@
            sizeof(sc->sc_ld_details), &sc->sc_ld_details, mbox))
                goto done;
 
-       if (bv->bv_volid >= sc->sc_ld_list.mll_no_ld) {
-               /* go do hotspares */
-               rv = mfi_bio_hs(sc, bv->bv_volid, MFI_MGMT_VD, bv);
-               goto done;
-       }
-
        strlcpy(bv->bv_dev, sc->sc_ld[i].ld_dev, sizeof(bv->bv_dev));
 
        switch(sc->sc_ld_list.mll_list[i].mll_state) {
@@ -1513,8 +1526,8 @@
        struct mfi_pd_details   *pd;
        struct scsipi_inquiry_data *inqbuf;
        char                    vend[8+16+4+1];
-       int                     i, rv = EINVAL;
-       int                     arr, vol, disk;
+       int                     rv = EINVAL;
+       int                     arr, vol, disk, span;
        uint32_t                size;
        uint8_t                 mbox[MFI_MBOX_SIZE];
 
@@ -1540,11 +1553,6 @@
 
        ar = cfg->mfc_array;
 
-       /* calculate offset to ld structure */
-       ld = (struct mfi_ld_cfg *)(
-           ((uint8_t *)cfg) + offsetof(struct mfi_conf, mfc_array) +
-           cfg->mfc_array_size * cfg->mfc_no_array);
-
        vol = bd->bd_volid;
 
        if (vol >= cfg->mfc_no_ld) {
@@ -1553,20 +1561,29 @@
                goto freeme;
        }
 
-       /* find corresponding array for ld */
-       for (i = 0, arr = 0; i < vol; i++)
-               arr += ld[i].mlc_parm.mpa_span_depth;
+       /* OpenBSD 1.85 */
+       /* calculate offset to ld structure */
+       ld = (struct mfi_ld_cfg *)(
+           ((uint8_t *)cfg) + offsetof(struct mfi_conf, mfc_array) +
+           cfg->mfc_array_size * cfg->mfc_no_array);
 
+       /* use span 0 only when raid group is not spanned */
+       if (ld[vol].mlc_parm.mpa_span_depth > 1)
+               span = bd->bd_diskid / ld[vol].mlc_parm.mpa_no_drv_per_span;
+       else
+               span = 0;
+       arr = ld[vol].mlc_span[span].mls_index;
+
        /* offset disk into pd list */
        disk = bd->bd_diskid % ld[vol].mlc_parm.mpa_no_drv_per_span;
 
-       /* offset array index into the next spans */
-       arr += bd->bd_diskid / ld[vol].mlc_parm.mpa_no_drv_per_span;
-
        bd->bd_target = ar[arr].pd[disk].mar_enc_slot;
+
+       /* get status */
        switch (ar[arr].pd[disk].mar_pd_state){
        case MFI_PD_UNCONFIG_GOOD:
-               bd->bd_status = BIOC_SDUNUSED;
+       case MFI_PD_FAILED:             /* OpenBSD 1.82, OpenBSD PR#5645 */
+               bd->bd_status = BIOC_SDFAILED; /* OpenBSD 1.82, OpenBSD PR#5645 
*/
                break;
 
        case MFI_PD_HOTSPARE: /* XXX dedicated hotspare part of array? */
@@ -1577,10 +1594,11 @@
                bd->bd_status = BIOC_SDOFFLINE;
                break;
 
+#if 0 /* OpenBSD 1.82, OpenBSD PR#5645 */
        case MFI_PD_FAILED:
                bd->bd_status = BIOC_SDFAILED;
                break;
-
+#endif
        case MFI_PD_REBUILD:
                bd->bd_status = BIOC_SDREBUILD;
                break;
@@ -1600,8 +1618,11 @@
        *((uint16_t *)&mbox) = ar[arr].pd[disk].mar_pd.mfp_id;
        memset(pd, 0, sizeof(*pd));
        if (mfi_mgmt_internal(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN,
-           sizeof *pd, pd, mbox))
+           sizeof *pd, pd, mbox)) {
+               /* disk is missing but succeed command */
+               rv = 0; /* OpenBSD 1.82, OpenBSD PR#5645 */
                goto freeme;
+       }
 
        bd->bd_size = pd->mpd_size * 512; /* bytes per block */
 
@@ -1948,7 +1969,6 @@
 {
        struct mfi_softc        *sc = sme->sme_cookie;
        struct bioc_vol         bv;
-       int s;
        int error;
 
        if (edata->sensor >= sc->sc_ld_cnt)
@@ -1956,11 +1976,11 @@
 
        bzero(&bv, sizeof(bv));
        bv.bv_volid = edata->sensor;
+
        KERNEL_LOCK(1, curlwp);
-       s = splbio();
        error = mfi_ioctl_vol(sc, &bv);
-       splx(s);
        KERNEL_UNLOCK_ONE(curlwp);
+
        if (error)
                return;
 

Attachment: pgpwNnMWw9lXz.pgp
Description: PGP signature



Home | Main Index | Thread Index | Old Index