tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[PATCH] Defer if_slowtimo to wq; make mii_down wait for mii_phy_auto



The attached patch series makes mii_down wait for mii_phy_auto.  This
means that if a network interface driver's if_stop routine

(a) ensures that any driver-triggered calls to mii_tick have stopped
    (e.g., callout_halt(&sc->sc_tick_ch)), and
(b) calls mii_down,

then after mii_down returns, the mii callbacks are guaranteed not to
be called again, until if_init starts it all up again.

I suspect drivers were historically written under this assumption, and
this lore was lost along the way in MPification.

In order to make this work, mii_down has to be called in sleepable
context (except possibly for holding the driver's ifmedia lock).  That
means, in turn, the if_stop routines that call mii_down must be called
in thread context.  They are usually called from one of two contexts:

(a) ioctl(SIOCSIFFLAGS), which is always sleepable and run with
    the thread-only IFNET_LOCK held; or
(b) if_watchdog (or driver-specific watchdog logic), which is
    currently called from softint context via callout and is never
    sleepable.

So the first patch defers if_watchdog to workqueue.  This way, drivers
that call if_stop from if_watchdog do so in thread context, so that
mii_down can safely block in the second patch.

The few MP-safe drivers that have been adapted to their own watchdog
timers can be arranged to ensure those defer to workqueue context
directly -- and some like wm(4), bge(4), ixg(4), ixv(4), and iavf(4)
already have.  The only driver I see from a cursory skim that still
needs to defer to workqueue is vmx(4).

The third patch adds a sysctl net.interfaces.ifN.watchdog.trigger to
trigger the watchdog for testing, and the last patch makes the
vioif(4) watchdog use vioif_init just to exercise that path -- not to
be committed.

Comments?  Review?  Testing?
From b95794b89e46c8c5d51166cc00f9fd2bf134e2b6 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Mon, 15 Aug 2022 09:49:08 +0000
Subject: [PATCH 1/4] ifnet(9): Defer if_watchdog (a.k.a. if_slowtimo) to
 workqueue.

This is necessary to make mii_down and the *_init/stop routines that
call it to sleep waiting for MII callouts on other CPUs.
---
 sys/net/if.c | 95 +++++++++++++++++++++++++++++++++++++++-------------
 sys/net/if.h |  2 +-
 2 files changed, 72 insertions(+), 25 deletions(-)

diff --git a/sys/net/if.c b/sys/net/if.c
index b94cbebe3c82..f471cebcbd73 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -197,6 +197,8 @@ static struct psref_class	*ifnet_psref_class __read_mostly;
 static pserialize_t		ifnet_psz;
 static struct workqueue		*ifnet_link_state_wq __read_mostly;
 
+static struct workqueue		*if_slowtimo_wq __read_mostly;
+
 static kmutex_t			if_clone_mtx;
 
 struct ifnet *lo0ifp;
@@ -221,7 +223,8 @@ static int doifioctl(struct socket *, u_long, void *, struct lwp *);
 static void if_detach_queues(struct ifnet *, struct ifqueue *);
 static void sysctl_sndq_setup(struct sysctllog **, const char *,
     struct ifaltq *);
-static void if_slowtimo(void *);
+static void if_slowtimo_intr(void *);
+static void if_slowtimo_work(struct work *, void *);
 static void if_attachdomain1(struct ifnet *);
 static int ifconf(u_long, void *);
 static int if_transmit(struct ifnet *, struct mbuf *);
@@ -255,6 +258,15 @@ static void if_deferred_start_softint(void *);
 static void if_deferred_start_common(struct ifnet *);
 static void if_deferred_start_destroy(struct ifnet *);
 
+struct if_slowtimo_data {
+	kmutex_t		isd_lock;
+	struct callout		isd_ch;
+	struct work		isd_work;
+	struct ifnet		*isd_ifp;
+	bool			isd_queued;
+	bool			isd_dying;
+};
+
 #if defined(INET) || defined(INET6)
 static void sysctl_net_pktq_setup(struct sysctllog **, int);
 #endif
@@ -333,6 +345,10 @@ ifinit1(void)
 	KASSERT(error == 0);
 	PSLIST_INIT(&ifnet_pslist);
 
+	error = workqueue_create(&if_slowtimo_wq, "ifwdog",
+	    if_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTCLOCK, 0);
+	KASSERTMSG(error == 0, "error=%d", error);
+
 	if_indexlim = 8;
 
 	if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL);
@@ -779,11 +795,17 @@ if_register(ifnet_t *ifp)
 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 
 	if (ifp->if_slowtimo != NULL) {
-		ifp->if_slowtimo_ch =
-		    kmem_zalloc(sizeof(*ifp->if_slowtimo_ch), KM_SLEEP);
-		callout_init(ifp->if_slowtimo_ch, 0);
-		callout_setfunc(ifp->if_slowtimo_ch, if_slowtimo, ifp);
-		if_slowtimo(ifp);
+		struct if_slowtimo_data *isd;
+
+		isd = kmem_zalloc(sizeof(*isd), KM_SLEEP);
+		mutex_init(&isd->isd_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK);
+		callout_init(&isd->isd_ch, 0);
+		callout_setfunc(&isd->isd_ch, if_slowtimo_intr, ifp);
+		isd->isd_ifp = ifp;
+
+		ifp->if_slowtimo_data = isd;
+
+		if_slowtimo_intr(ifp);
 	}
 
 	if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
@@ -1350,11 +1372,20 @@ if_detach(struct ifnet *ifp)
 	pserialize_perform(ifnet_psz);
 	IFNET_GLOBAL_UNLOCK();
 
-	if (ifp->if_slowtimo != NULL && ifp->if_slowtimo_ch != NULL) {
-		ifp->if_slowtimo = NULL;
-		callout_halt(ifp->if_slowtimo_ch, NULL);
-		callout_destroy(ifp->if_slowtimo_ch);
-		kmem_free(ifp->if_slowtimo_ch, sizeof(*ifp->if_slowtimo_ch));
+	if (ifp->if_slowtimo != NULL) {
+		struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
+
+		mutex_enter(&isd->isd_lock);
+		isd->isd_dying = true;
+		mutex_exit(&isd->isd_lock);
+		callout_halt(&isd->isd_ch, NULL);
+		workqueue_wait(if_slowtimo_wq, &isd->isd_work);
+		callout_destroy(&isd->isd_ch);
+		mutex_destroy(&isd->isd_lock);
+		kmem_free(isd, sizeof(*isd));
+
+		ifp->if_slowtimo_data = NULL; /* paraonia */
+		ifp->if_slowtimo = NULL;      /* paranoia */
 	}
 	if_deferred_start_destroy(ifp);
 
@@ -2651,24 +2682,40 @@ if_up_locked(struct ifnet *ifp)
  * call the appropriate interface routine on expiration.
  */
 static void
-if_slowtimo(void *arg)
+if_slowtimo_intr(void *arg)
 {
-	void (*slowtimo)(struct ifnet *);
 	struct ifnet *ifp = arg;
-	int s;
-
-	slowtimo = ifp->if_slowtimo;
-	if (__predict_false(slowtimo == NULL))
-		return;
+	struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
+
+	mutex_enter(&isd->isd_lock);
+	if (!isd->isd_dying) {
+		if (ifp->if_timer != 0 &&
+		    --ifp->if_timer == 0 &&
+		    !isd->isd_queued) {
+			isd->isd_queued = true;
+			workqueue_enqueue(if_slowtimo_wq, &isd->isd_work,
+			    NULL);
+		} else {
+			callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
+		}
+	}
+	mutex_exit(&isd->isd_lock);
+}
 
-	s = splnet();
-	if (ifp->if_timer != 0 && --ifp->if_timer == 0)
-		(*slowtimo)(ifp);
+static void
+if_slowtimo_work(struct work *work, void *arg)
+{
+	struct if_slowtimo_data *isd =
+	    container_of(work, struct if_slowtimo_data, isd_work);
+	struct ifnet *ifp = isd->isd_ifp;
 
-	splx(s);
+	(*ifp->if_slowtimo)(ifp);
 
-	if (__predict_true(ifp->if_slowtimo != NULL))
-		callout_schedule(ifp->if_slowtimo_ch, hz / IFNET_SLOWHZ);
+	mutex_enter(&isd->isd_lock);
+	isd->isd_queued = false;
+	if (!isd->isd_dying)
+		callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
+	mutex_exit(&isd->isd_lock);
 }
 
 /*
diff --git a/sys/net/if.h b/sys/net/if.h
index 5ff50b49949b..0b86ec1fd895 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -414,7 +414,7 @@ typedef struct ifnet {
 	kmutex_t	*if_ioctl_lock;	/* :: */
 	char		*if_description;	/* i: interface description */
 #ifdef _KERNEL /* XXX kvm(3) */
-	struct callout	*if_slowtimo_ch;/* :: */
+	struct if_slowtimo_data *if_slowtimo_data; /* :: */
 	struct krwlock	*if_afdata_lock;/* :: */
 	struct if_percpuq
 			*if_percpuq;	/* :: we should remove it in the future */

From e20632f9923a21d8da4fbb0633095bad574f6839 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Sun, 14 Aug 2022 22:03:06 +0000
Subject: [PATCH 2/4] mii(4): Make mii_down wait for concurrent mii_phy_auto to
 finish.

This is important for callers in if_stop routines to guarantee that
all concurrent access to the mii registers has ended.

Caveat: It won't work for drivers that call mii_down from softint
context, which usually happens via callout -> if_watchdog -> if_init
-> if_stop -> mii_down.  But the watchdog if_stop/init dance needs to
be deferred to thread context anyway -- TBD.
---
 sys/dev/mii/mii_physubr.c | 42 ++++++++++++---------------------------
 1 file changed, 13 insertions(+), 29 deletions(-)

diff --git a/sys/dev/mii/mii_physubr.c b/sys/dev/mii/mii_physubr.c
index 0f45b5ddd426..213195b3b008 100644
--- a/sys/dev/mii/mii_physubr.c
+++ b/sys/dev/mii/mii_physubr.c
@@ -435,23 +435,18 @@ mii_phy_down(struct mii_softc *sc)
 
 	KASSERT(mii_locked(sc->mii_pdata));
 
-	if ((sc->mii_flags & (MIIF_DOINGAUTO|MIIF_AUTOTSLEEP)) ==
-	    (MIIF_DOINGAUTO|MIIF_AUTOTSLEEP)) {
-		/*
-		 * Try to stop it.
-		 *
-		 * - If we stopped it before it expired, callout_stop
-		 *   returns 0, and it is our responsibility to clear
-		 *   MIIF_DOINGAUTO.
-		 *
-		 * - Otherwise, we're too late -- the callout has
-		 *   already begun, and we must leave MIIF_DOINGAUTO
-		 *   set so mii_phy_detach will wait for it to
-		 *   complete.
-		 */
-		if (!callout_stop(&sc->mii_nway_ch))
-			sc->mii_flags &= ~MIIF_DOINGAUTO;
+	if (sc->mii_flags & MIIF_AUTOTSLEEP) {
+		while (sc->mii_flags & MIIF_DOINGAUTO) {
+			cv_wait(&sc->mii_nway_cv,
+			    sc->mii_pdata->mii_media.ifm_lock);
+		}
+	} else {
+		if (sc->mii_flags & MIIF_DOINGAUTO) {
+			callout_halt(&sc->mii_nway_ch,
+			    sc->mii_pdata->mii_media.ifm_lock);
+		}
 	}
+	KASSERT((sc->mii_flags & MIIF_DOINGAUTO) == 0);
 }
 
 void
@@ -691,19 +686,8 @@ mii_phy_detach(device_t self, int flags)
 {
 	struct mii_softc *sc = device_private(self);
 
-	mii_lock(sc->mii_pdata);
-	if (sc->mii_flags & MIIF_AUTOTSLEEP) {
-		while (sc->mii_flags & MIIF_DOINGAUTO) {
-			cv_wait(&sc->mii_nway_cv,
-			    sc->mii_pdata->mii_media.ifm_lock);
-		}
-	} else {
-		if (sc->mii_flags & MIIF_DOINGAUTO) {
-			callout_halt(&sc->mii_nway_ch,
-			    sc->mii_pdata->mii_media.ifm_lock);
-		}
-	}
-	mii_unlock(sc->mii_pdata);
+	/* No mii_lock because mii_flags should be stable by now.  */
+	KASSERT((sc->mii_flags & MIIF_DOINGAUTO) == 0);
 
 	if (sc->mii_flags & MIIF_AUTOTSLEEP)
 		cv_destroy(&sc->mii_nway_cv);

From b5ce3456daae1d37452ffc0fea1a47a13c5b754e Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Mon, 15 Aug 2022 11:33:56 +0000
Subject: [PATCH 3/4] ifnet(9): Add sysctl net.interaces.ifN.watchdog.trigger.

For interfaces that use if_watchdog, this forces it to be called at
the next tick.
---
 sys/net/if.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 77 insertions(+), 6 deletions(-)

diff --git a/sys/net/if.c b/sys/net/if.c
index f471cebcbd73..a362b7a4d5e0 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -225,6 +225,8 @@ static void sysctl_sndq_setup(struct sysctllog **, const char *,
     struct ifaltq *);
 static void if_slowtimo_intr(void *);
 static void if_slowtimo_work(struct work *, void *);
+static int sysctl_if_watchdog(SYSCTLFN_PROTO);
+static void sysctl_watchdog_setup(struct ifnet *);
 static void if_attachdomain1(struct ifnet *);
 static int ifconf(u_long, void *);
 static int if_transmit(struct ifnet *, struct mbuf *);
@@ -265,6 +267,7 @@ struct if_slowtimo_data {
 	struct ifnet		*isd_ifp;
 	bool			isd_queued;
 	bool			isd_dying;
+	bool			isd_trigger;
 };
 
 #if defined(INET) || defined(INET6)
@@ -806,6 +809,8 @@ if_register(ifnet_t *ifp)
 		ifp->if_slowtimo_data = isd;
 
 		if_slowtimo_intr(ifp);
+
+		sysctl_watchdog_setup(ifp);
 	}
 
 	if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
@@ -2689,12 +2694,13 @@ if_slowtimo_intr(void *arg)
 
 	mutex_enter(&isd->isd_lock);
 	if (!isd->isd_dying) {
-		if (ifp->if_timer != 0 &&
-		    --ifp->if_timer == 0 &&
-		    !isd->isd_queued) {
-			isd->isd_queued = true;
-			workqueue_enqueue(if_slowtimo_wq, &isd->isd_work,
-			    NULL);
+		if (isd->isd_trigger ||
+		    (ifp->if_timer != 0 && --ifp->if_timer == 0)) {
+			if (!isd->isd_queued) {
+				isd->isd_queued = true;
+				workqueue_enqueue(if_slowtimo_wq,
+				    &isd->isd_work, NULL);
+			}
 		} else {
 			callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
 		}
@@ -2712,12 +2718,77 @@ if_slowtimo_work(struct work *work, void *arg)
 	(*ifp->if_slowtimo)(ifp);
 
 	mutex_enter(&isd->isd_lock);
+	if (isd->isd_trigger) {
+		isd->isd_trigger = false;
+		printf("%s: watchdog triggered\n", ifp->if_xname);
+	}
 	isd->isd_queued = false;
 	if (!isd->isd_dying)
 		callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
 	mutex_exit(&isd->isd_lock);
 }
 
+static int
+sysctl_if_watchdog(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node = *rnode;
+	struct ifnet *ifp = node.sysctl_data;
+	struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
+	int arg = 0;
+	int error;
+
+	node.sysctl_data = &arg;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return error;
+	if (arg) {
+		mutex_enter(&isd->isd_lock);
+		KASSERT(!isd->isd_dying);
+		isd->isd_trigger = true;
+		callout_schedule(&isd->isd_ch, 0);
+		mutex_exit(&isd->isd_lock);
+	}
+
+	return 0;
+}
+
+static void
+sysctl_watchdog_setup(struct ifnet *ifp)
+{
+	struct sysctllog **clog = &ifp->if_sysctl_log;
+	const struct sysctlnode *rnode;
+
+	if (sysctl_createv(clog, 0, NULL, &rnode,
+		CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces",
+		SYSCTL_DESCR("Per-interface controls"),
+		NULL, 0, NULL, 0,
+		CTL_NET, CTL_CREATE, CTL_EOL) != 0)
+		goto bad;
+	if (sysctl_createv(clog, 0, &rnode, &rnode,
+		CTLFLAG_PERMANENT, CTLTYPE_NODE, ifp->if_xname,
+		SYSCTL_DESCR("Interface controls"),
+		NULL, 0, NULL, 0,
+		CTL_CREATE, CTL_EOL) != 0)
+		goto bad;
+	if (sysctl_createv(clog, 0, &rnode, &rnode,
+		CTLFLAG_PERMANENT, CTLTYPE_NODE, "watchdog",
+		SYSCTL_DESCR("Interface watchdog controls"),
+		NULL, 0, NULL, 0,
+		CTL_CREATE, CTL_EOL) != 0)
+		goto bad;
+	if (sysctl_createv(clog, 0, &rnode, NULL,
+		CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "trigger",
+		SYSCTL_DESCR("Trigger watchdog timeout"),
+		sysctl_if_watchdog, 0, (int *)ifp, 0,
+		CTL_CREATE, CTL_EOL) != 0)
+		goto bad;
+
+	return;
+
+bad:
+	printf("%s: could not attach sysctl watchdog nodes\n", ifp->if_xname);
+}
+
 /*
  * Mark an interface up and notify protocols of
  * the transition.

From 06cc50ed2c982bd83fd990fc10158056acd63986 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Mon, 15 Aug 2022 11:35:30 +0000
Subject: [PATCH 4/4] WIP: vioif(4): On watchdog timeout, do vioif_init.

This exercises a heavier-weight blocking path.
---
 sys/dev/pci/if_vioif.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sys/dev/pci/if_vioif.c b/sys/dev/pci/if_vioif.c
index 1b9098e001fc..7dc12d737b8c 100644
--- a/sys/dev/pci/if_vioif.c
+++ b/sys/dev/pci/if_vioif.c
@@ -1483,6 +1483,11 @@ vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
 void
 vioif_watchdog(struct ifnet *ifp)
 {
+#if 1
+	IFNET_LOCK(ifp);
+	vioif_init(ifp);
+	IFNET_UNLOCK(ifp);
+#else
 	struct vioif_softc *sc = ifp->if_softc;
 	int i;
 
@@ -1491,6 +1496,7 @@ vioif_watchdog(struct ifnet *ifp)
 			vioif_tx_queue_clear(&sc->sc_txq[i]);
 		}
 	}
+#endif
 }
 
 /*


Home | Main Index | Thread Index | Old Index