Subject: Re: kern/35071: panic: mpt_get_request: corrupted request free list (xfer)
To: None <gnats-bugs@NetBSD.org>
From: Manuel Bouyer <bouyer@antioche.eu.org>
List: netbsd-bugs
Date: 12/02/2006 19:55:01
--7AUc2qLy4jB3hD7Z
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Fri, Dec 01, 2006 at 03:20:02PM +0000, Tracy Di Marco White wrote:
> The following reply was made to PR kern/35071; it has been noted by GNATS.
> 
> From: Tracy Di Marco White <tjd-nb-pr@menelos.com>
> To: gnats-bugs@NetBSD.org
> Cc: 
> Subject: Re: kern/35071: panic: mpt_get_request: corrupted request free list (xfer) 
> Date: Fri, 01 Dec 2006 09:15:43 -0600
> 
>  I seem to be getting this every day, or every other day.  So, more messages.
>  
>  st2: already open
>  st0(mpt3:0:1:0): command timeout
>  mpt3: timeout on request index = 0xfe, seq = 0x0133d791
>  mpt3: Status 0x00000000, Mask 0x00000001, Doorbell 0x24000000
>  mpt3: request state: On Chip
>  mpt3: mpt_done: no scsipi_xfer, index = 0xfe, seq = 0x00000000
>  mpt3: request state: Free

OK, the command resets, and later the chip says it's complete while
we've already freed it. I think we should just issue a bus reset
(or bus_device_reset but it's harder to do) in case of timeout, and
let the controller complete the commands.

Attached is a patch that attemps to implement a bus_reset function for
mpt(4). You can easily test by starting some I/O (e.g dd if=/dev/rsdxd
of=/dev/null bs=1m) and while it's running issue several scsictl scsibusx reset

I expect to see "IOC Bus Reset Port %d" or "External Bus Reset" on console

-- 
Manuel Bouyer <bouyer@antioche.eu.org>
     NetBSD: 26 ans d'experience feront toujours la difference
--

--7AUc2qLy4jB3hD7Z
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff

Index: mpt_netbsd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/mpt_netbsd.c,v
retrieving revision 1.10
diff -u -r1.10 mpt_netbsd.c
--- mpt_netbsd.c	11 Dec 2005 12:21:28 -0000	1.10
+++ mpt_netbsd.c	2 Dec 2006 18:50:50 -0000
@@ -78,6 +78,7 @@
 __KERNEL_RCSID(0, "$NetBSD: mpt_netbsd.c,v 1.10 2005/12/11 12:21:28 christos Exp $");
 
 #include <dev/ic/mpt.h>			/* pulls in all headers */
+#include <sys/scsiio.h>
 
 #include <machine/stdarg.h>		/* for mpt_prt() */
 
@@ -89,10 +90,13 @@
 static void	mpt_get_xfer_mode(mpt_softc_t *, struct scsipi_periph *);
 static void	mpt_ctlop(mpt_softc_t *, void *vmsg, uint32_t);
 static void	mpt_event_notify_reply(mpt_softc_t *, MSG_EVENT_NOTIFY_REPLY *);
+static void	mpt_bus_reset(mpt_softc_t *);
 
 static void	mpt_scsipi_request(struct scsipi_channel *,
 		    scsipi_adapter_req_t, void *);
 static void	mpt_minphys(struct buf *);
+static int 	mpt_ioctl(struct scsipi_channel *, u_long, caddr_t, int,
+			struct proc *);
 
 void
 mpt_scsipi_attach(mpt_softc_t *mpt)
@@ -110,10 +114,11 @@
 	memset(adapt, 0, sizeof(*adapt));
 	adapt->adapt_dev = &mpt->sc_dev;
 	adapt->adapt_nchannels = 1;
-	adapt->adapt_openings = maxq;
-	adapt->adapt_max_periph = maxq;
+	adapt->adapt_openings = maxq - 1; /* keep one for mngt reqs */
+	adapt->adapt_max_periph = maxq - 1;
 	adapt->adapt_request = mpt_scsipi_request;
 	adapt->adapt_minphys = mpt_minphys;
+	adapt->adapt_ioctl = mpt_ioctl;
 
 	/* Fill in the scsipi_channel. */
 	memset(chan, 0, sizeof(*chan));
@@ -382,14 +387,15 @@
 	mpt_prt(mpt, "request state: %s", mpt_req_state(req->debug));
 	if (mpt->verbose > 1)
 		mpt_print_scsi_io_request((MSG_SCSI_IO_REQUEST *)req->req_vbuf);
-
+#if 0
 	/* XXX WHAT IF THE IOC IS STILL USING IT?? */
 	req->xfer = NULL;
 	mpt_free_request(mpt, req);
 
 	xs->error = XS_TIMEOUT;
 	scsipi_done(xs);
-
+#endif
+	mpt_bus_reset(mpt);
 	splx(s);
 }
 
@@ -461,6 +467,8 @@
 	if (__predict_false(mpt_req->Function == MPI_FUNCTION_SCSI_TASK_MGMT)) {
 		if (mpt->verbose > 1)
 			mpt_prt(mpt, "mpt_done: TASK MGMT");
+		KASSERT(req == mpt->mngt_req);
+		mpt->mngt_req = NULL;
 		goto done;
 	}
 
@@ -1280,7 +1288,43 @@
 	}
 }
 
-/* XXXJRT mpt_bus_reset() */
+static void
+mpt_bus_reset(mpt_softc_t *mpt)
+{
+	request_t *req;
+	MSG_SCSI_TASK_MGMT *mngt_req;
+	int s;
+
+	s = splbio();
+	if (mpt->mngt_req) {
+		/* request already queued; can't do more */
+		splx(s);
+		return;
+	}
+	req = mpt_get_request(mpt);
+	if (__predict_false(req == NULL)) {
+		printf("%s: no mngt request\n", mpt->sc_dev.dv_xname);
+		splx(s);
+		return;
+	}
+	mpt->mngt_req = req;
+	splx(s);
+	mngt_req = req->req_vbuf;
+	memset(mngt_req, 0, sizeof(*mngt_req));
+	mngt_req->Function = MPI_FUNCTION_SCSI_TASK_MGMT;
+	mngt_req->Bus = mpt->bus;
+	mngt_req->TargetID = 0;
+	mngt_req->ChainOffset = 0;
+	mngt_req->TaskType = MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS;
+	mngt_req->Reserved1 = 0;
+	mngt_req->MsgFlags =
+	    mpt->is_fc ? MPI_SCSITASKMGMT_MSGFLAGS_LIP_RESET_OPTION : 0;
+	mngt_req->MsgContext = req->index;
+	mngt_req->TaskMsgContext = 0;
+	s = splbio();
+	mpt_send_cmd(mpt, req);
+	splx(s);
+}
 
 /*****************************************************************************
  * SCSI interface routines
@@ -1322,3 +1366,19 @@
 		bp->b_bcount = MPT_MAX_XFER;
 	minphys(bp);
 }
+
+static int
+mpt_ioctl(struct scsipi_channel *chan, u_long cmd, caddr_t arg,
+    int flag, struct proc *p)
+{
+	struct scsipi_adapter *adapt = chan->chan_adapter;
+	mpt_softc_t *mpt = (void *) adapt->adapt_dev;
+
+	switch (cmd) {
+	case SCBUSIORESET:
+		mpt_bus_reset(mpt);
+		return(0);
+	default:
+		return (ENOTTY);
+	}
+}
Index: mpt_netbsd.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/mpt_netbsd.h,v
retrieving revision 1.4
diff -u -r1.4 mpt_netbsd.h
--- mpt_netbsd.h	11 Dec 2005 12:21:28 -0000	1.4
+++ mpt_netbsd.h	2 Dec 2006 18:50:50 -0000
@@ -227,6 +227,7 @@
 	/* SCSIPI and software management */
 	request_t		*request_pool;
 	SLIST_HEAD(req_queue, req_entry) request_free_list;
+	request_t		*mngt_req;
 
 	struct scsipi_adapter	sc_adapter;
 	struct scsipi_channel	sc_channel;

--7AUc2qLy4jB3hD7Z--