At Fri, 20 Jan 2012 12:48:32 +0100, Manuel Bouyer
<bouyer%antioche.eu.org@localhost> wrote:
Subject: Re: ongoing major problems with NetBSD-5 and LOCKDEBUG on multi-core
system (mfi(4) related?)
>
> On Wed, Jan 18, 2012 at 08:59:47PM -0800, Greg A. Woods wrote:
> > [...]
> > So if I understand correctly these calls from other subsystems are not
> > in an interrupt context and if I remember correctly tsleep() should be
> > fine in any normal driver ioctl() function (for a non-MPSAFE driver, if
> > indeed its ioctl() function was in the device switch table), but clearly
> > grabbing the KERNEL_LOCK() is the wrong thing to do for this case, at
> > least without releasing it properly before the tsleep() call.
>
> It's no problem calling tsleep() with the KERNEL_LOCK held: sleeping
> functions will release the lock before sleep, and reaquire it after.
Thanks, I finally got around to re-adding the KERNEL_LOCK() hooks
to mfi.c, but I did it quite differently than the original two
patches which did it, and instead I moved the splbio()/splx() calls into
the function where they're really needed, as was done in OpenBSD, since
that code can, I think, be called by other than bio(4) and
sysmon_envstat(9). I'll attach my current changes, which include the
missing bug fixes from OpenBSD, as well as my changes to splbio()/splx()
> BTW, I run several dell PE2950 with this code and I don't have
> problems.
I'm more or less certain now that mfi(4) is not the cause of the
problems I have been seeing (the ones first reported in PR# kern/45827)
Are you running kernels from the netbsd-5 branch on any of them?
Kernels with LOCKDEBUG (and DIAGNOSTIC and DEBUG)?
I'm still stuck on how to use GDB to look at the different CPU stack
backtraces.
--
Greg A. Woods
Planix, Inc.
<woods%planix.com@localhost> +1 250 762-7675 http://www.planix.com/
--- mfi.c 12 Apr 2010 10:30:24 -0700 1.19.4.4
+++ mfi.c 19 Jan 2012 18:19:46 -0800
@@ -23,6 +23,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/mutex.h>
#include <sys/buf.h>
#include <sys/ioctl.h>
#include <sys/device.h>
@@ -91,7 +92,7 @@
uint32_t, uint32_t, uint32_t, void *, uint8_t *);
static void mfi_mgmt_done(struct mfi_ccb *);
-#if NBIO > 0
+#if NBIO > 0 /* XXX this is bogus -- should allow for /dev entries and normal
ioctl(2) as well! */
static int mfi_ioctl(device_t, u_long, void *);
static int mfi_ioctl_inq(struct mfi_softc *, struct bioc_inq *);
static int mfi_ioctl_vol(struct mfi_softc *, struct bioc_vol *);
@@ -103,6 +104,7 @@
static int mfi_ioctl_setstate(struct mfi_softc *,
struct bioc_setstate *);
static int mfi_bio_hs(struct mfi_softc *, int, int, void *);
+/* XXX these next only depend on NBIO because without NBIO we have no ioctl()
functions */
static int mfi_create_sensors(struct mfi_softc *);
static void mfi_sensor_refresh(struct sysmon_envsys *,
envsys_data_t *);
@@ -153,15 +155,14 @@
mfi_get_ccb(struct mfi_softc *sc)
{
struct mfi_ccb *ccb;
- int s;
- s = splbio();
+ mutex_enter(&sc->sc_ccb_mtx); /* OpenBSD 1.97 */
ccb = TAILQ_FIRST(&sc->sc_ccb_freeq);
if (ccb) {
TAILQ_REMOVE(&sc->sc_ccb_freeq, ccb, ccb_link);
ccb->ccb_state = MFI_CCB_READY;
}
- splx(s);
+ mutex_exit(&sc->sc_ccb_mtx); /* OpenBSD 1.97 */
DNPRINTF(MFI_D_CCB, "%s: mfi_get_ccb: %p\n", DEVNAME(sc), ccb);
@@ -172,11 +173,9 @@
mfi_put_ccb(struct mfi_ccb *ccb)
{
struct mfi_softc *sc = ccb->ccb_sc;
- int s;
DNPRINTF(MFI_D_CCB, "%s: mfi_put_ccb: %p\n", DEVNAME(sc), ccb);
- s = splbio();
ccb->ccb_state = MFI_CCB_FREE;
ccb->ccb_xs = NULL;
ccb->ccb_flags = 0;
@@ -187,8 +186,10 @@
ccb->ccb_sgl = NULL;
ccb->ccb_data = NULL;
ccb->ccb_len = 0;
+
+ mutex_enter(&sc->sc_ccb_mtx); /* OpenBSD 1.97 */
TAILQ_INSERT_TAIL(&sc->sc_ccb_freeq, ccb, ccb_link);
- splx(s);
+ mutex_exit(&sc->sc_ccb_mtx); /* OpenBSD 1.97 */
}
static int
@@ -640,6 +641,7 @@
return 1;
TAILQ_INIT(&sc->sc_ccb_freeq);
+ mutex_init(&sc->sc_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); /* OpenBSD 1.97 */
status = mfi_fw_state(sc);
sc->sc_max_cmds = status & MFI_STATE_MAXCMD_MASK;
@@ -1237,6 +1239,16 @@
if (mfi_poll(ccb))
goto done;
} else {
+ /*
+ * revision 1.90
+ * date: 2009/03/29 01:02:35; author: dlg; state: Exp;
lines: +3 -0
+ * fix a small race in mfi_mgmt between the checking of a ccbs
completion and
+ * the sleep waiting for the completion. it is possible to get
the interrupt
+ * completing the command just before the tsleep, which will
never get a
+ * wakeup because the interrupt with the wakeup has already
happened.
+ */
+ int s = splbio();
+
mfi_post(sc, ccb);
DNPRINTF(MFI_D_MISC, "%s: mfi_mgmt_internal sleeping\n",
@@ -1244,6 +1256,8 @@
while (ccb->ccb_state != MFI_CCB_DONE)
tsleep(ccb, PRIBIO, "mfi_mgmt", 0);
+ splx(s);
+
if (ccb->ccb_flags & MFI_CCB_F_ERR)
goto done;
}
@@ -1336,10 +1350,8 @@
{
struct mfi_softc *sc = device_private(dev);
int error = 0;
- int s;
KERNEL_LOCK(1, curlwp);
- s = splbio();
DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl ", DEVNAME(sc));
@@ -1378,7 +1390,7 @@
DNPRINTF(MFI_D_IOCTL, " invalid ioctl\n");
error = EINVAL;
}
- splx(s);
+
KERNEL_UNLOCK_ONE(curlwp);
DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl return %x\n", DEVNAME(sc), error);
@@ -1428,6 +1440,13 @@
sizeof(sc->sc_ld_list), &sc->sc_ld_list, NULL))
goto done;
+ /* OpenBSD 1.86 */
+ if (bv->bv_volid >= sc->sc_ld_list.mll_no_ld) {
+ /* go do hotspares */
+ rv = mfi_bio_hs(sc, bv->bv_volid, MFI_MGMT_VD, bv);
+ goto done;
+ }
+
i = bv->bv_volid;
mbox[0] = sc->sc_ld_list.mll_list[i].mll_ld.mld_target;
DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl_vol target %#x\n",
@@ -1437,12 +1456,6 @@
sizeof(sc->sc_ld_details), &sc->sc_ld_details, mbox))
goto done;
- if (bv->bv_volid >= sc->sc_ld_list.mll_no_ld) {
- /* go do hotspares */
- rv = mfi_bio_hs(sc, bv->bv_volid, MFI_MGMT_VD, bv);
- goto done;
- }
-
strlcpy(bv->bv_dev, sc->sc_ld[i].ld_dev, sizeof(bv->bv_dev));
switch(sc->sc_ld_list.mll_list[i].mll_state) {
@@ -1513,8 +1526,8 @@
struct mfi_pd_details *pd;
struct scsipi_inquiry_data *inqbuf;
char vend[8+16+4+1];
- int i, rv = EINVAL;
- int arr, vol, disk;
+ int rv = EINVAL;
+ int arr, vol, disk, span;
uint32_t size;
uint8_t mbox[MFI_MBOX_SIZE];
@@ -1540,11 +1553,6 @@
ar = cfg->mfc_array;
- /* calculate offset to ld structure */
- ld = (struct mfi_ld_cfg *)(
- ((uint8_t *)cfg) + offsetof(struct mfi_conf, mfc_array) +
- cfg->mfc_array_size * cfg->mfc_no_array);
-
vol = bd->bd_volid;
if (vol >= cfg->mfc_no_ld) {
@@ -1553,20 +1561,29 @@
goto freeme;
}
- /* find corresponding array for ld */
- for (i = 0, arr = 0; i < vol; i++)
- arr += ld[i].mlc_parm.mpa_span_depth;
+ /* OpenBSD 1.85 */
+ /* calculate offset to ld structure */
+ ld = (struct mfi_ld_cfg *)(
+ ((uint8_t *)cfg) + offsetof(struct mfi_conf, mfc_array) +
+ cfg->mfc_array_size * cfg->mfc_no_array);
+ /* use span 0 only when raid group is not spanned */
+ if (ld[vol].mlc_parm.mpa_span_depth > 1)
+ span = bd->bd_diskid / ld[vol].mlc_parm.mpa_no_drv_per_span;
+ else
+ span = 0;
+ arr = ld[vol].mlc_span[span].mls_index;
+
/* offset disk into pd list */
disk = bd->bd_diskid % ld[vol].mlc_parm.mpa_no_drv_per_span;
- /* offset array index into the next spans */
- arr += bd->bd_diskid / ld[vol].mlc_parm.mpa_no_drv_per_span;
-
bd->bd_target = ar[arr].pd[disk].mar_enc_slot;
+
+ /* get status */
switch (ar[arr].pd[disk].mar_pd_state){
case MFI_PD_UNCONFIG_GOOD:
- bd->bd_status = BIOC_SDUNUSED;
+ case MFI_PD_FAILED: /* OpenBSD 1.82, OpenBSD PR#5645 */
+ bd->bd_status = BIOC_SDFAILED; /* OpenBSD 1.82, OpenBSD PR#5645
*/
break;
case MFI_PD_HOTSPARE: /* XXX dedicated hotspare part of array? */
@@ -1577,10 +1594,11 @@
bd->bd_status = BIOC_SDOFFLINE;
break;
+#if 0 /* OpenBSD 1.82, OpenBSD PR#5645 */
case MFI_PD_FAILED:
bd->bd_status = BIOC_SDFAILED;
break;
-
+#endif
case MFI_PD_REBUILD:
bd->bd_status = BIOC_SDREBUILD;
break;
@@ -1600,8 +1618,11 @@
*((uint16_t *)&mbox) = ar[arr].pd[disk].mar_pd.mfp_id;
memset(pd, 0, sizeof(*pd));
if (mfi_mgmt_internal(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN,
- sizeof *pd, pd, mbox))
+ sizeof *pd, pd, mbox)) {
+ /* disk is missing but succeed command */
+ rv = 0; /* OpenBSD 1.82, OpenBSD PR#5645 */
goto freeme;
+ }
bd->bd_size = pd->mpd_size * 512; /* bytes per block */
@@ -1948,7 +1969,6 @@
{
struct mfi_softc *sc = sme->sme_cookie;
struct bioc_vol bv;
- int s;
int error;
if (edata->sensor >= sc->sc_ld_cnt)
@@ -1956,11 +1976,11 @@
bzero(&bv, sizeof(bv));
bv.bv_volid = edata->sensor;
+
KERNEL_LOCK(1, curlwp);
- s = splbio();
error = mfi_ioctl_vol(sc, &bv);
- splx(s);
KERNEL_UNLOCK_ONE(curlwp);
+
if (error)
return;
Attachment:
pgp39wLQ_9nKq.pgp
Description: PGP signature