tech-net archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[PATCH] Defer if_slowtimo to wq; make mii_down wait for mii_phy_auto
The attached patch series makes mii_down wait for mii_phy_auto. This
means that if a network interface driver's if_stop routine
(a) ensures that any driver-triggered calls to mii_tick have stopped
(e.g., callout_halt(&sc->sc_tick_ch)), and
(b) calls mii_down,
then after mii_down returns, the mii callbacks are guaranteed not to
be called again, until if_init starts it all up again.
I suspect drivers were historically written under this assumption, and
this lore was lost along the way in MPification.
In order to make this work, mii_down has to be called in sleepable
context (except possibly for holding the driver's ifmedia lock). That
means, in turn, the if_stop routines that call mii_down must be called
in thread context. They are usually called from one of two contexts:
(a) ioctl(SIOCSIFFLAGS), which is always sleepable and run with
the thread-only IFNET_LOCK held; or
(b) if_watchdog (or driver-specific watchdog logic), which is
currently called from softint context via callout and is never
sleepable.
So the first patch defers if_watchdog to workqueue. This way, drivers
that call if_stop from if_watchdog do so in thread context, so that
mii_down can safely block in the second patch.
The few MP-safe drivers that have been adapted to their own watchdog
timers can be arranged to ensure those defer to workqueue context
directly -- and some like wm(4), bge(4), ixg(4), ixv(4), and iavf(4)
already have. The only driver I see from a cursory skim that still
needs to defer to workqueue is vmx(4).
The third patch adds a sysctl net.interfaces.ifN.watchdog.trigger to
trigger the watchdog for testing, and the last patch makes the
vioif(4) watchdog use vioif_init just to exercise that path -- not to
be committed.
Comments? Review? Testing?
From b95794b89e46c8c5d51166cc00f9fd2bf134e2b6 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Mon, 15 Aug 2022 09:49:08 +0000
Subject: [PATCH 1/4] ifnet(9): Defer if_watchdog (a.k.a. if_slowtimo) to
workqueue.
This is necessary to make mii_down and the *_init/stop routines that
call it to sleep waiting for MII callouts on other CPUs.
---
sys/net/if.c | 95 +++++++++++++++++++++++++++++++++++++++-------------
sys/net/if.h | 2 +-
2 files changed, 72 insertions(+), 25 deletions(-)
diff --git a/sys/net/if.c b/sys/net/if.c
index b94cbebe3c82..f471cebcbd73 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -197,6 +197,8 @@ static struct psref_class *ifnet_psref_class __read_mostly;
static pserialize_t ifnet_psz;
static struct workqueue *ifnet_link_state_wq __read_mostly;
+static struct workqueue *if_slowtimo_wq __read_mostly;
+
static kmutex_t if_clone_mtx;
struct ifnet *lo0ifp;
@@ -221,7 +223,8 @@ static int doifioctl(struct socket *, u_long, void *, struct lwp *);
static void if_detach_queues(struct ifnet *, struct ifqueue *);
static void sysctl_sndq_setup(struct sysctllog **, const char *,
struct ifaltq *);
-static void if_slowtimo(void *);
+static void if_slowtimo_intr(void *);
+static void if_slowtimo_work(struct work *, void *);
static void if_attachdomain1(struct ifnet *);
static int ifconf(u_long, void *);
static int if_transmit(struct ifnet *, struct mbuf *);
@@ -255,6 +258,15 @@ static void if_deferred_start_softint(void *);
static void if_deferred_start_common(struct ifnet *);
static void if_deferred_start_destroy(struct ifnet *);
+struct if_slowtimo_data {
+ kmutex_t isd_lock;
+ struct callout isd_ch;
+ struct work isd_work;
+ struct ifnet *isd_ifp;
+ bool isd_queued;
+ bool isd_dying;
+};
+
#if defined(INET) || defined(INET6)
static void sysctl_net_pktq_setup(struct sysctllog **, int);
#endif
@@ -333,6 +345,10 @@ ifinit1(void)
KASSERT(error == 0);
PSLIST_INIT(&ifnet_pslist);
+ error = workqueue_create(&if_slowtimo_wq, "ifwdog",
+ if_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTCLOCK, 0);
+ KASSERTMSG(error == 0, "error=%d", error);
+
if_indexlim = 8;
if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL);
@@ -779,11 +795,17 @@ if_register(ifnet_t *ifp)
rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
if (ifp->if_slowtimo != NULL) {
- ifp->if_slowtimo_ch =
- kmem_zalloc(sizeof(*ifp->if_slowtimo_ch), KM_SLEEP);
- callout_init(ifp->if_slowtimo_ch, 0);
- callout_setfunc(ifp->if_slowtimo_ch, if_slowtimo, ifp);
- if_slowtimo(ifp);
+ struct if_slowtimo_data *isd;
+
+ isd = kmem_zalloc(sizeof(*isd), KM_SLEEP);
+ mutex_init(&isd->isd_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK);
+ callout_init(&isd->isd_ch, 0);
+ callout_setfunc(&isd->isd_ch, if_slowtimo_intr, ifp);
+ isd->isd_ifp = ifp;
+
+ ifp->if_slowtimo_data = isd;
+
+ if_slowtimo_intr(ifp);
}
if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
@@ -1350,11 +1372,20 @@ if_detach(struct ifnet *ifp)
pserialize_perform(ifnet_psz);
IFNET_GLOBAL_UNLOCK();
- if (ifp->if_slowtimo != NULL && ifp->if_slowtimo_ch != NULL) {
- ifp->if_slowtimo = NULL;
- callout_halt(ifp->if_slowtimo_ch, NULL);
- callout_destroy(ifp->if_slowtimo_ch);
- kmem_free(ifp->if_slowtimo_ch, sizeof(*ifp->if_slowtimo_ch));
+ if (ifp->if_slowtimo != NULL) {
+ struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
+
+ mutex_enter(&isd->isd_lock);
+ isd->isd_dying = true;
+ mutex_exit(&isd->isd_lock);
+ callout_halt(&isd->isd_ch, NULL);
+ workqueue_wait(if_slowtimo_wq, &isd->isd_work);
+ callout_destroy(&isd->isd_ch);
+ mutex_destroy(&isd->isd_lock);
+ kmem_free(isd, sizeof(*isd));
+
+ ifp->if_slowtimo_data = NULL; /* paraonia */
+ ifp->if_slowtimo = NULL; /* paranoia */
}
if_deferred_start_destroy(ifp);
@@ -2651,24 +2682,40 @@ if_up_locked(struct ifnet *ifp)
* call the appropriate interface routine on expiration.
*/
static void
-if_slowtimo(void *arg)
+if_slowtimo_intr(void *arg)
{
- void (*slowtimo)(struct ifnet *);
struct ifnet *ifp = arg;
- int s;
-
- slowtimo = ifp->if_slowtimo;
- if (__predict_false(slowtimo == NULL))
- return;
+ struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
+
+ mutex_enter(&isd->isd_lock);
+ if (!isd->isd_dying) {
+ if (ifp->if_timer != 0 &&
+ --ifp->if_timer == 0 &&
+ !isd->isd_queued) {
+ isd->isd_queued = true;
+ workqueue_enqueue(if_slowtimo_wq, &isd->isd_work,
+ NULL);
+ } else {
+ callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
+ }
+ }
+ mutex_exit(&isd->isd_lock);
+}
- s = splnet();
- if (ifp->if_timer != 0 && --ifp->if_timer == 0)
- (*slowtimo)(ifp);
+static void
+if_slowtimo_work(struct work *work, void *arg)
+{
+ struct if_slowtimo_data *isd =
+ container_of(work, struct if_slowtimo_data, isd_work);
+ struct ifnet *ifp = isd->isd_ifp;
- splx(s);
+ (*ifp->if_slowtimo)(ifp);
- if (__predict_true(ifp->if_slowtimo != NULL))
- callout_schedule(ifp->if_slowtimo_ch, hz / IFNET_SLOWHZ);
+ mutex_enter(&isd->isd_lock);
+ isd->isd_queued = false;
+ if (!isd->isd_dying)
+ callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
+ mutex_exit(&isd->isd_lock);
}
/*
diff --git a/sys/net/if.h b/sys/net/if.h
index 5ff50b49949b..0b86ec1fd895 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -414,7 +414,7 @@ typedef struct ifnet {
kmutex_t *if_ioctl_lock; /* :: */
char *if_description; /* i: interface description */
#ifdef _KERNEL /* XXX kvm(3) */
- struct callout *if_slowtimo_ch;/* :: */
+ struct if_slowtimo_data *if_slowtimo_data; /* :: */
struct krwlock *if_afdata_lock;/* :: */
struct if_percpuq
*if_percpuq; /* :: we should remove it in the future */
From e20632f9923a21d8da4fbb0633095bad574f6839 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Sun, 14 Aug 2022 22:03:06 +0000
Subject: [PATCH 2/4] mii(4): Make mii_down wait for concurrent mii_phy_auto to
finish.
This is important for callers in if_stop routines to guarantee that
all concurrent access to the mii registers has ended.
Caveat: It won't work for drivers that call mii_down from softint
context, which usually happens via callout -> if_watchdog -> if_init
-> if_stop -> mii_down. But the watchdog if_stop/init dance needs to
be deferred to thread context anyway -- TBD.
---
sys/dev/mii/mii_physubr.c | 42 ++++++++++++---------------------------
1 file changed, 13 insertions(+), 29 deletions(-)
diff --git a/sys/dev/mii/mii_physubr.c b/sys/dev/mii/mii_physubr.c
index 0f45b5ddd426..213195b3b008 100644
--- a/sys/dev/mii/mii_physubr.c
+++ b/sys/dev/mii/mii_physubr.c
@@ -435,23 +435,18 @@ mii_phy_down(struct mii_softc *sc)
KASSERT(mii_locked(sc->mii_pdata));
- if ((sc->mii_flags & (MIIF_DOINGAUTO|MIIF_AUTOTSLEEP)) ==
- (MIIF_DOINGAUTO|MIIF_AUTOTSLEEP)) {
- /*
- * Try to stop it.
- *
- * - If we stopped it before it expired, callout_stop
- * returns 0, and it is our responsibility to clear
- * MIIF_DOINGAUTO.
- *
- * - Otherwise, we're too late -- the callout has
- * already begun, and we must leave MIIF_DOINGAUTO
- * set so mii_phy_detach will wait for it to
- * complete.
- */
- if (!callout_stop(&sc->mii_nway_ch))
- sc->mii_flags &= ~MIIF_DOINGAUTO;
+ if (sc->mii_flags & MIIF_AUTOTSLEEP) {
+ while (sc->mii_flags & MIIF_DOINGAUTO) {
+ cv_wait(&sc->mii_nway_cv,
+ sc->mii_pdata->mii_media.ifm_lock);
+ }
+ } else {
+ if (sc->mii_flags & MIIF_DOINGAUTO) {
+ callout_halt(&sc->mii_nway_ch,
+ sc->mii_pdata->mii_media.ifm_lock);
+ }
}
+ KASSERT((sc->mii_flags & MIIF_DOINGAUTO) == 0);
}
void
@@ -691,19 +686,8 @@ mii_phy_detach(device_t self, int flags)
{
struct mii_softc *sc = device_private(self);
- mii_lock(sc->mii_pdata);
- if (sc->mii_flags & MIIF_AUTOTSLEEP) {
- while (sc->mii_flags & MIIF_DOINGAUTO) {
- cv_wait(&sc->mii_nway_cv,
- sc->mii_pdata->mii_media.ifm_lock);
- }
- } else {
- if (sc->mii_flags & MIIF_DOINGAUTO) {
- callout_halt(&sc->mii_nway_ch,
- sc->mii_pdata->mii_media.ifm_lock);
- }
- }
- mii_unlock(sc->mii_pdata);
+ /* No mii_lock because mii_flags should be stable by now. */
+ KASSERT((sc->mii_flags & MIIF_DOINGAUTO) == 0);
if (sc->mii_flags & MIIF_AUTOTSLEEP)
cv_destroy(&sc->mii_nway_cv);
From b5ce3456daae1d37452ffc0fea1a47a13c5b754e Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Mon, 15 Aug 2022 11:33:56 +0000
Subject: [PATCH 3/4] ifnet(9): Add sysctl net.interaces.ifN.watchdog.trigger.
For interfaces that use if_watchdog, this forces it to be called at
the next tick.
---
sys/net/if.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 77 insertions(+), 6 deletions(-)
diff --git a/sys/net/if.c b/sys/net/if.c
index f471cebcbd73..a362b7a4d5e0 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -225,6 +225,8 @@ static void sysctl_sndq_setup(struct sysctllog **, const char *,
struct ifaltq *);
static void if_slowtimo_intr(void *);
static void if_slowtimo_work(struct work *, void *);
+static int sysctl_if_watchdog(SYSCTLFN_PROTO);
+static void sysctl_watchdog_setup(struct ifnet *);
static void if_attachdomain1(struct ifnet *);
static int ifconf(u_long, void *);
static int if_transmit(struct ifnet *, struct mbuf *);
@@ -265,6 +267,7 @@ struct if_slowtimo_data {
struct ifnet *isd_ifp;
bool isd_queued;
bool isd_dying;
+ bool isd_trigger;
};
#if defined(INET) || defined(INET6)
@@ -806,6 +809,8 @@ if_register(ifnet_t *ifp)
ifp->if_slowtimo_data = isd;
if_slowtimo_intr(ifp);
+
+ sysctl_watchdog_setup(ifp);
}
if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
@@ -2689,12 +2694,13 @@ if_slowtimo_intr(void *arg)
mutex_enter(&isd->isd_lock);
if (!isd->isd_dying) {
- if (ifp->if_timer != 0 &&
- --ifp->if_timer == 0 &&
- !isd->isd_queued) {
- isd->isd_queued = true;
- workqueue_enqueue(if_slowtimo_wq, &isd->isd_work,
- NULL);
+ if (isd->isd_trigger ||
+ (ifp->if_timer != 0 && --ifp->if_timer == 0)) {
+ if (!isd->isd_queued) {
+ isd->isd_queued = true;
+ workqueue_enqueue(if_slowtimo_wq,
+ &isd->isd_work, NULL);
+ }
} else {
callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
}
@@ -2712,12 +2718,77 @@ if_slowtimo_work(struct work *work, void *arg)
(*ifp->if_slowtimo)(ifp);
mutex_enter(&isd->isd_lock);
+ if (isd->isd_trigger) {
+ isd->isd_trigger = false;
+ printf("%s: watchdog triggered\n", ifp->if_xname);
+ }
isd->isd_queued = false;
if (!isd->isd_dying)
callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
mutex_exit(&isd->isd_lock);
}
+static int
+sysctl_if_watchdog(SYSCTLFN_ARGS)
+{
+ struct sysctlnode node = *rnode;
+ struct ifnet *ifp = node.sysctl_data;
+ struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
+ int arg = 0;
+ int error;
+
+ node.sysctl_data = &arg;
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error || newp == NULL)
+ return error;
+ if (arg) {
+ mutex_enter(&isd->isd_lock);
+ KASSERT(!isd->isd_dying);
+ isd->isd_trigger = true;
+ callout_schedule(&isd->isd_ch, 0);
+ mutex_exit(&isd->isd_lock);
+ }
+
+ return 0;
+}
+
+static void
+sysctl_watchdog_setup(struct ifnet *ifp)
+{
+ struct sysctllog **clog = &ifp->if_sysctl_log;
+ const struct sysctlnode *rnode;
+
+ if (sysctl_createv(clog, 0, NULL, &rnode,
+ CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces",
+ SYSCTL_DESCR("Per-interface controls"),
+ NULL, 0, NULL, 0,
+ CTL_NET, CTL_CREATE, CTL_EOL) != 0)
+ goto bad;
+ if (sysctl_createv(clog, 0, &rnode, &rnode,
+ CTLFLAG_PERMANENT, CTLTYPE_NODE, ifp->if_xname,
+ SYSCTL_DESCR("Interface controls"),
+ NULL, 0, NULL, 0,
+ CTL_CREATE, CTL_EOL) != 0)
+ goto bad;
+ if (sysctl_createv(clog, 0, &rnode, &rnode,
+ CTLFLAG_PERMANENT, CTLTYPE_NODE, "watchdog",
+ SYSCTL_DESCR("Interface watchdog controls"),
+ NULL, 0, NULL, 0,
+ CTL_CREATE, CTL_EOL) != 0)
+ goto bad;
+ if (sysctl_createv(clog, 0, &rnode, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "trigger",
+ SYSCTL_DESCR("Trigger watchdog timeout"),
+ sysctl_if_watchdog, 0, (int *)ifp, 0,
+ CTL_CREATE, CTL_EOL) != 0)
+ goto bad;
+
+ return;
+
+bad:
+ printf("%s: could not attach sysctl watchdog nodes\n", ifp->if_xname);
+}
+
/*
* Mark an interface up and notify protocols of
* the transition.
From 06cc50ed2c982bd83fd990fc10158056acd63986 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Mon, 15 Aug 2022 11:35:30 +0000
Subject: [PATCH 4/4] WIP: vioif(4): On watchdog timeout, do vioif_init.
This exercises a heavier-weight blocking path.
---
sys/dev/pci/if_vioif.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/sys/dev/pci/if_vioif.c b/sys/dev/pci/if_vioif.c
index 1b9098e001fc..7dc12d737b8c 100644
--- a/sys/dev/pci/if_vioif.c
+++ b/sys/dev/pci/if_vioif.c
@@ -1483,6 +1483,11 @@ vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
void
vioif_watchdog(struct ifnet *ifp)
{
+#if 1
+ IFNET_LOCK(ifp);
+ vioif_init(ifp);
+ IFNET_UNLOCK(ifp);
+#else
struct vioif_softc *sc = ifp->if_softc;
int i;
@@ -1491,6 +1496,7 @@ vioif_watchdog(struct ifnet *ifp)
vioif_tx_queue_clear(&sc->sc_txq[i]);
}
}
+#endif
}
/*
Home |
Main Index |
Thread Index |
Old Index