tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[PATCH] vmx(4) MP bug fixes



The attached patch attempts to fix various MP bugs I found by code
inspection in vmx(4).

There remains a potentially serious deadlock -- because the core lock
is held across things like callout_halt and workqueue_wait, and yet is
also taken in softint context where callout_halt and workqueue_wait
are absolutely forbidden -- but this bug is not new and requires a
little more work to disentangle the core lock into separate purposes.
TBD in a later change.

I'm not sure how to test it.  Could anyone who is familiar with using
or developing vmx(4) take a look?
From 8399f65ec3c0e8e1ee7a40115a4ea24196e4e410 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Mon, 15 Aug 2022 12:37:58 +0000
Subject: [PATCH] vmxnet(4): Fix various MP bugs.

- Defer reset to workqueue.
  => vmxnet3_stop_locked is forbidden in softint.
  => XXX Problem: We still take the core lock in softint, and we
     still take the core lock around vmxnet3_stop_locked.  TBD.
- Touch if_flags only under IFNET_LOCK.
  => Cache ifp->if_flags & IFF_PROMISC in vmxnet3_ifflags_cb.
  => Don't call vmxnet3_set_rxfilter unless up and running; cache
     this as vmx_mcastactive.  Use ENETRESET in vmxnet3_ifflags_cb
     instead of calling vmxnet3_set_rxfilter directly.
     . (The cache is currently serialized by the core lock, but it
       might reasonably be serialized by an independent lock like in
       usbnet(9).)
- Fix vmxnet3_stop_rendezvous so it actually does something.
  => New vxtxq_stopping, vxrxq_stopping variables synchronize with
     Rx/Tx interrupt handlers.
- Sprinkle IFNET_LOCK and core lock assertions.
---
 sys/dev/pci/if_vmx.c | 130 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 110 insertions(+), 20 deletions(-)

diff --git a/sys/dev/pci/if_vmx.c b/sys/dev/pci/if_vmx.c
index d3850d143dd6..6291bc633c92 100644
--- a/sys/dev/pci/if_vmx.c
+++ b/sys/dev/pci/if_vmx.c
@@ -213,6 +213,8 @@ struct vmxnet3_txqueue {
 	struct evcnt vxtxq_watchdogto;
 	struct evcnt vxtxq_defragged;
 	struct evcnt vxtxq_defrag_failed;
+
+	bool vxtxq_stopping;
 };
 
 #if 0
@@ -242,6 +244,8 @@ struct vmxnet3_rxqueue {
 	struct evcnt vxrxq_deferreq;
 	struct evcnt vxrxq_mgetcl_failed;
 	struct evcnt vxrxq_mbuf_load_failed;
+
+	bool vxrxq_stopping;
 };
 
 struct vmxnet3_queue {
@@ -303,6 +307,9 @@ struct vmxnet3_softc {
 
 	kmutex_t *vmx_mtx;
 
+	int vmx_if_flags;
+	bool vmx_promisc;
+	bool vmx_mcastactive;
 	uint8_t *vmx_mcast;
 	void *vmx_qs;
 	struct vmxnet3_rss_shared *vmx_rss;
@@ -323,6 +330,10 @@ struct vmxnet3_softc {
 
 	bool vmx_txrx_workqueue;
 	struct workqueue *vmx_queue_wq;
+
+	struct workqueue *vmx_reset_wq;
+	struct work vmx_reset_work;
+	bool vmx_reset_pending;
 };
 
 #define VMXNET3_STAT
@@ -447,6 +458,7 @@ static int vmxnet3_ifflags_cb(struct ethercom *);
 static int vmxnet3_watchdog(struct vmxnet3_txqueue *);
 static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
 static void vmxnet3_tick(void *);
+static void vmxnet3_reset_work(struct work *, void *);
 static void vmxnet3_if_link_status(struct vmxnet3_softc *);
 static bool vmxnet3_cmd_link_status(struct ifnet *);
 static void vmxnet3_ifmedia_status(struct ifnet *, struct ifmediareq *);
@@ -645,6 +657,18 @@ vmxnet3_attach(device_t parent, device_t self, void *aux)
 	if (error)
 		return;
 
+	char buf[128];
+	snprintf(buf, sizeof(buf), "%s_reset", device_xname(sc->vmx_dev));
+	error = workqueue_create(&sc->vmx_reset_wq, "%s_reset",
+	    vmxnet3_reset_work, sc, VMXNET3_WORKQUEUE_PRI, IPL_SOFTCLOCK,
+	    WQ_MPSAFE);
+	if (error) {
+		aprint_error_dev(sc->vmx_dev,
+		    "failed to create reset workqueue: %d\n",
+		    error);
+		return;
+	}
+
 	sc->vmx_flags |= VMXNET3_FLAG_ATTACHED;
 }
 
@@ -1136,6 +1160,8 @@ vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
 		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
 	}
 
+	rxq->vxrxq_stopping = true;
+
 	return (0);
 }
 
@@ -1171,6 +1197,8 @@ vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
 
 	txq->vxtxq_interq = pcq_create(sc->vmx_ntxdescs, KM_SLEEP);
 
+	txq->vxtxq_stopping = true;
+
 	return (0);
 }
 
@@ -2348,7 +2376,7 @@ vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq, u_int limit)
 
 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
 
-	if ((ifp->if_flags & IFF_RUNNING) == 0)
+	if (rxq->vxrxq_stopping)
 		return more;
 
 	m_head = rxq->vxrxq_mhead;
@@ -2454,7 +2482,7 @@ vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq, u_int limit)
 			m_head = m_tail = NULL;
 
 			/* Must recheck after dropping the Rx lock. */
-			if ((ifp->if_flags & IFF_RUNNING) == 0)
+			if (rxq->vxrxq_stopping)
 				break;
 		}
 
@@ -2723,11 +2751,13 @@ vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
 		rxq = &sc->vmx_queue[i].vxq_rxqueue;
 		VMXNET3_RXQ_LOCK(rxq);
+		rxq->vxrxq_stopping = true;
 		VMXNET3_RXQ_UNLOCK(rxq);
 	}
 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
 		txq = &sc->vmx_queue[i].vxq_txqueue;
 		VMXNET3_TXQ_LOCK(txq);
+		txq->vxtxq_stopping = true;
 		VMXNET3_TXQ_UNLOCK(txq);
 	}
 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
@@ -2739,22 +2769,24 @@ vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
 static void
 vmxnet3_stop_locked(struct vmxnet3_softc *sc)
 {
-	struct ifnet *ifp;
+	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
 	int q;
 
-	ifp = &sc->vmx_ethercom.ec_if;
 	VMXNET3_CORE_LOCK_ASSERT(sc);
+	KASSERT(IFNET_LOCKED(ifp));
 
-	ifp->if_flags &= ~IFF_RUNNING;
+	vmxnet3_stop_rendezvous(sc);
+
+	sc->vmx_mcastactive = false;
 	sc->vmx_link_active = 0;
-	callout_stop(&sc->vmx_tick);
+	callout_halt(&sc->vmx_tick, sc->vmx_mtx);
+
+	ifp->if_flags &= ~IFF_RUNNING;
 
 	/* Disable interrupts. */
 	vmxnet3_disable_all_intrs(sc);
 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
 
-	vmxnet3_stop_rendezvous(sc);
-
 	for (q = 0; q < sc->vmx_ntxqueues; q++)
 		vmxnet3_txstop(sc, &sc->vmx_queue[q].vxq_txqueue);
 	for (q = 0; q < sc->vmx_nrxqueues; q++)
@@ -2768,6 +2800,8 @@ vmxnet3_stop(struct ifnet *ifp, int disable)
 {
 	struct vmxnet3_softc *sc = ifp->if_softc;
 
+	KASSERT(IFNET_LOCKED(ifp));
+
 	VMXNET3_CORE_LOCK(sc);
 	vmxnet3_stop_locked(sc);
 	VMXNET3_CORE_UNLOCK(sc);
@@ -2889,6 +2923,8 @@ static int
 vmxnet3_reinit(struct vmxnet3_softc *sc)
 {
 
+	VMXNET3_CORE_LOCK_ASSERT(sc);
+
 	vmxnet3_set_lladdr(sc);
 	vmxnet3_reinit_shared_data(sc);
 
@@ -2907,8 +2943,12 @@ static int
 vmxnet3_init_locked(struct vmxnet3_softc *sc)
 {
 	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
+	int q;
 	int error;
 
+	KASSERT(IFNET_LOCKED(ifp));
+	VMXNET3_CORE_LOCK_ASSERT(sc);
+
 	vmxnet3_stop_locked(sc);
 
 	error = vmxnet3_reinit(sc);
@@ -2919,10 +2959,22 @@ vmxnet3_init_locked(struct vmxnet3_softc *sc)
 
 	ifp->if_flags |= IFF_RUNNING;
 	vmxnet3_if_link_status(sc);
+	sc->vmx_mcastactive = true;
 
 	vmxnet3_enable_all_intrs(sc);
 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
 
+	for (q = 0; q < sc->vmx_ntxqueues; q++) {
+		VMXNET3_TXQ_LOCK(&sc->vmx_queue[q].vxq_txqueue);
+		sc->vmx_queue[q].vxq_txqueue.vxtxq_stopping = false;
+		VMXNET3_TXQ_UNLOCK(&sc->vmx_queue[q].vxq_txqueue);
+	}
+	for (q = 0; q < sc->vmx_nrxqueues; q++) {
+		VMXNET3_RXQ_LOCK(&sc->vmx_queue[q].vxq_rxqueue);
+		sc->vmx_queue[q].vxq_rxqueue.vxrxq_stopping = false;
+		VMXNET3_RXQ_UNLOCK(&sc->vmx_queue[q].vxq_rxqueue);
+	}
+
 	return (0);
 }
 
@@ -2932,6 +2984,8 @@ vmxnet3_init(struct ifnet *ifp)
 	struct vmxnet3_softc *sc = ifp->if_softc;
 	int error;
 
+	KASSERT(IFNET_LOCKED(ifp));
+
 	VMXNET3_CORE_LOCK(sc);
 	error = vmxnet3_init_locked(sc);
 	VMXNET3_CORE_UNLOCK(sc);
@@ -3176,8 +3230,7 @@ vmxnet3_tx_common_locked(struct ifnet *ifp, struct vmxnet3_txqueue *txq, int txt
 
 	VMXNET3_TXQ_LOCK_ASSERT(txq);
 
-	if ((ifp->if_flags & IFF_RUNNING) == 0 ||
-	    sc->vmx_link_active == 0)
+	if (txq->vxtxq_stopping || sc->vmx_link_active == 0)
 		return;
 
 	for (;;) {
@@ -3315,7 +3368,6 @@ vmxnet3_deferred_transmit(void *arg)
 static void
 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
 {
-	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
 	struct ethercom *ec = &sc->vmx_ethercom;
 	struct vmxnet3_driver_shared *ds = sc->vmx_ds;
 	struct ether_multi *enm;
@@ -3323,6 +3375,8 @@ vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
 	u_int mode;
 	uint8_t *p;
 
+	VMXNET3_CORE_LOCK_ASSERT(sc);
+
 	ds->mcast_tablelen = 0;
 	ETHER_LOCK(ec);
 	CLR(ec->ec_flags, ETHER_F_ALLMULTI);
@@ -3335,7 +3389,7 @@ vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
 
 	ETHER_LOCK(ec);
-	if (ISSET(ifp->if_flags, IFF_PROMISC) ||
+	if (sc->vmx_promisc ||
 	    ec->ec_multicnt > VMXNET3_MULTICAST_MAX)
 		goto allmulti;
 
@@ -3372,7 +3426,7 @@ allmulti:
 	SET(ec->ec_flags, ETHER_F_ALLMULTI);
 	ETHER_UNLOCK(ec);
 	SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
-	if (ifp->if_flags & IFF_PROMISC)
+	if (sc->vmx_promisc)
 		SET(mode, VMXNET3_RXMODE_PROMISC);
 
 setit:
@@ -3388,6 +3442,14 @@ vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, void *data)
 	struct ifreq *ifr = (struct ifreq *)data;
 	int s, error = 0;
 
+	switch (cmd) {
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		break;
+	default:
+		KASSERT(IFNET_LOCKED(ifp));
+	}
+
 	switch (cmd) {
 	case SIOCSIFMTU: {
 		int nmtu = ifr->ifr_mtu;
@@ -3414,7 +3476,7 @@ vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, void *data)
 
 	if (error == ENETRESET) {
 		VMXNET3_CORE_LOCK(sc);
-		if (ifp->if_flags & IFF_RUNNING)
+		if (sc->vmx_mcastactive)
 			vmxnet3_set_rxfilter(sc);
 		VMXNET3_CORE_UNLOCK(sc);
 		error = 0;
@@ -3426,17 +3488,29 @@ vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, void *data)
 static int
 vmxnet3_ifflags_cb(struct ethercom *ec)
 {
-	struct vmxnet3_softc *sc;
+	struct ifnet *ifp = &ec->ec_if;
+	struct vmxnet3_softc *sc = ifp->if_softc;
+	int error = 0;
 
-	sc = ec->ec_if.if_softc;
+	KASSERT(IFNET_LOCKED(ifp));
 
 	VMXNET3_CORE_LOCK(sc);
+	const unsigned short changed = ifp->if_flags ^ sc->vmx_if_flags;
+	if ((changed & ~(IFF_CANTCHANGE | IFF_DEBUG)) == 0) {
+		sc->vmx_if_flags = ifp->if_flags;
+		if ((changed & IFF_PROMISC) == 0) {
+			sc->vmx_promisc = ifp->if_flags & IFF_PROMISC;
+			error = ENETRESET;
+		}
+	} else {
+		error = ENETRESET;
+	}
 	vmxnet3_set_rxfilter(sc);
 	VMXNET3_CORE_UNLOCK(sc);
 
 	vmxnet3_if_link_status(sc);
 
-	return 0;
+	return error;
 }
 
 static int
@@ -3484,14 +3558,30 @@ vmxnet3_tick(void *xsc)
 	for (i = 0; i < sc->vmx_ntxqueues; i++)
 		timedout |= vmxnet3_watchdog(&sc->vmx_queue[i].vxq_txqueue);
 
-	if (timedout != 0)
-		vmxnet3_init_locked(sc);
-	else
+	if (timedout != 0) {
+		if (!sc->vmx_reset_pending) {
+			sc->vmx_reset_pending = true;
+			workqueue_enqueue(sc->vmx_reset_wq,
+			    &sc->vmx_reset_work, NULL);
+		}
+	} else {
 		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
+	}
 
 	VMXNET3_CORE_UNLOCK(sc);
 }
 
+static void
+vmxnet3_reset_work(struct work *work, void *arg)
+{
+	struct vmxnet3_softc *sc = arg;
+	struct ifnet *ifp = &sc->vmx_ethercom.ec_if;
+
+	IFNET_LOCK(ifp);
+	(void)vmxnet3_init(ifp);
+	IFNET_UNLOCK(ifp);
+}
+
 /*
  * update link state of ifnet and softc
  */


Home | Main Index | Thread Index | Old Index