Port-vax archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

re: my simh-vax crashed: panic: pmap_enter on PG_SREF page



Jason Thorpe writes:
> 	https://www.netbsd.org/~thorpej/vax-pmap-tbi-ptps-patch.txt

i'm running with the first two chunks here added as well as my
patch below, which allows LOCKDEBUG to work for me (including
the recent pmap.c LOCKDEBUG fix as well.)

we'll see what happens next.


.mrg.


mscp(4): add ability for rronline() callback in a workqueue

when an ra(4) disk comes online the hardware interrupt ends up calling
disk_set_info(), which triggers a sleep lock/alloc in this path which
is triggered by LOCKDEBUG.

piggy-back on the existing workqueue for autoconfiguration to handle
this path and run the online completion in the work queue.

this is a little ugly, in that it puts two different types of work
into the one queue, but seems less ugly than creating a second
workqueue for what is likely another one-time event (infact, the other
user may be better handled via config_defer() -- i did not look too
closely.)

tested in simh.


Index: mscp.c
===================================================================
RCS file: /cvsroot/src/sys/dev/mscp/mscp.c,v
retrieving revision 1.38
diff -p -u -r1.38 mscp.c
--- mscp.c	7 Aug 2021 16:19:13 -0000	1.38
+++ mscp.c	21 Dec 2023 21:09:16 -0000
@@ -314,6 +314,7 @@ loop:
 
 				mw->mw_mi = mi;
 				mw->mw_mp = *mp;
+				mw->mw_online = false;
 				workqueue_enqueue(mi->mi_wq,
 				    (struct work *)mw, NULL);
 			}
@@ -483,15 +484,29 @@ mscp_requeue(struct mscp_softc *mi)
 	panic("mscp_requeue");
 }
 
+/*
+ * mscp_worker: Complete configuration and online events.
+ *
+ * If the new work mw_online is false, this is an autoconfiguration
+ * event, otherwise it is a online event that needs to be handled
+ * in a thread context.
+ */
 void
-mscp_worker(struct work *wk, void *dummy)
+mscp_worker(struct work *wk, void *arg)
 {
-	struct mscp_softc *mi;
-	struct mscp_work *mw;
-	struct	drive_attach_args da;
-
-	mw = (struct mscp_work *)wk;
-	mi = mw->mw_mi;
+	struct mscp_work *mw = (struct mscp_work *)wk;
+	struct mscp_softc *mi = mw->mw_mi;
+	struct drive_attach_args da;
+
+	/* This is an online event. */
+	if (mw->mw_online) {
+		struct mscp_device *me = mi->mi_me;
+
+		if (me->me_online_cb)
+			return (*me->me_online_cb)(wk);
+		/* Must be cb for this type. */
+		panic("mscp_worker");
+	}
 
 	da.da_mp = &mw->mw_mp;
 	da.da_typ = mi->mi_type;
Index: mscp_disk.c
===================================================================
RCS file: /cvsroot/src/sys/dev/mscp/mscp_disk.c,v
retrieving revision 1.90
diff -p -u -r1.90 mscp_disk.c
--- mscp_disk.c	9 Aug 2021 19:24:33 -0000	1.90
+++ mscp_disk.c	21 Dec 2023 21:09:16 -0000
@@ -112,6 +112,12 @@ __KERNEL_RCSID(0, "$NetBSD: mscp_disk.c,
 #include "ioconf.h"
 #include "ra.h"
 
+/* Embed mscp_work here, kinda ugly. */
+struct ra_work {
+	struct mscp_work ra_mw;
+	device_t ra_usc;
+};
+
 /*
  * Drive status, per drive
  */
@@ -123,6 +129,7 @@ struct ra_softc {
 	int	ra_hwunit;	/* Hardware unit number */
 	int	ra_havelabel;	/* true if we have a label */
 	int	ra_wlabel;	/* label sector is currently writable */
+	struct	ra_work ra_work;/* online callback handling */
 };
 
 #define rx_softc ra_softc
@@ -910,6 +917,7 @@ rxsize(dev_t dev)
 void	rrdgram(device_t, struct mscp *, struct mscp_softc *);
 void	rriodone(device_t, struct buf *);
 int	rronline(device_t, struct mscp *);
+void	rronline_cb(struct work *);
 int	rrgotstatus(device_t, struct mscp *);
 void	rrreplace(device_t, struct mscp *);
 int	rrioerror(device_t, struct mscp *, struct buf *);
@@ -921,6 +929,7 @@ struct	mscp_device ra_device = {
 	rrdgram,
 	rriodone,
 	rronline,
+	rronline_cb,
 	rrgotstatus,
 	rrreplace,
 	rrioerror,
@@ -962,19 +971,44 @@ rriodone(device_t usc, struct buf *bp)
 /*
  * A drive came on line.  Check its type and size.  Return DONE if
  * we think the drive is truly on line.	 In any case, awaken anyone
- * sleeping on the drive on-line-ness.
+ * sleeping on the drive on-line-ness.  We do most of this in a
+ * workqueue callback as the call to disk_set_info() will trigger a
+ * sleep lock while handling a hardware interrupt.
  */
 int
 rronline(device_t usc, struct mscp *mp)
 {
 	struct ra_softc *ra = device_private(usc);
+	device_t parent = device_parent(usc);
+	struct mscp_softc *mi;
+
+	if (!device_is_a(parent, "mscpbus"))
+		return (MSCP_FAILED);
+
+	mi = device_private(parent);
+	ra->ra_work.ra_usc = usc;
+	ra->ra_work.ra_mw.mw_mi = mi;
+	ra->ra_work.ra_mw.mw_mp = *mp;
+	ra->ra_work.ra_mw.mw_online = true;
+	workqueue_enqueue(mi->mi_wq, (struct work *)&ra->ra_work, NULL);
+
+	return (MSCP_DONE);
+}
+
+void
+rronline_cb(struct work *wk)
+{
+	struct ra_work *ra_work = (struct ra_work *)wk;
+	struct mscp *mp = &ra_work->ra_mw.mw_mp;
+	device_t usc = ra_work->ra_usc;
+	struct ra_softc *ra = device_private(usc);
 	struct disklabel *dl;
 
 	wakeup((void *)&ra->ra_state);
 	if ((mp->mscp_status & M_ST_MASK) != M_ST_SUCCESS) {
 		aprint_error_dev(usc, "attempt to bring on line failed: ");
 		mscp_printevent(mp);
-		return (MSCP_FAILED);
+		return;
 	}
 
 	ra->ra_state = DK_OPEN;
@@ -992,8 +1026,6 @@ rronline(device_t usc, struct mscp *mp)
 	}
 	rrmakelabel(dl, ra->ra_mediaid);
 	ra_set_geometry(ra);
-
-	return (MSCP_DONE);
 }
 
 void
Index: mscp_tape.c
===================================================================
RCS file: /cvsroot/src/sys/dev/mscp/mscp_tape.c,v
retrieving revision 1.43
diff -p -u -r1.43 mscp_tape.c
--- mscp_tape.c	25 Jul 2014 08:10:37 -0000	1.43
+++ mscp_tape.c	21 Dec 2023 21:09:16 -0000
@@ -97,6 +97,7 @@ struct	mscp_device mt_device = {
 	mtdgram,
 	mtiodone,
 	mtonline,
+	NULL,
 	mtgotstatus,
 	0,
 	mtioerror,
Index: mscpvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/mscp/mscpvar.h,v
retrieving revision 1.18
diff -p -u -r1.18 mscpvar.h
--- mscpvar.h	27 Oct 2012 17:18:27 -0000	1.18
+++ mscpvar.h	21 Dec 2023 21:09:16 -0000
@@ -129,6 +129,8 @@ struct	mscp_device {
 	   (device_t, struct buf *);
 	int	(*me_online)	/* drive on line */
 	   (device_t, struct mscp *);
+	void	(*me_online_cb)	/* drive on line, thread context */
+	   (struct work *wk);
 	int	(*me_gotstatus) /* got unit status */
 	   (device_t, struct mscp *);
 	void	(*me_replace)	/* replace done */
@@ -188,6 +190,7 @@ struct mscp_work {
 	struct work mw_work;
 	struct mscp_softc *mw_mi;
 	struct mscp mw_mp;
+	bool mw_online;
 	SLIST_ENTRY(mscp_work) mw_list;
 };
 


Home | Main Index | Thread Index | Old Index