NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/52211: vioif stops on dmamap load error



The driver fails to lock against interrupts without NET_MPSAFE, as it
doesn't do splnet()/splx() as it IMO should. Maybe the biglock
actually is enough, just looks unsafe. IMO it should just always use
the mutexes.

However, the wedge seems to be caused by something else.

If the dmamap load fails while there is no TX active, code still sets
IFF_OACTIVE. The flag is only reset when TX request finishes. Since
nothing ever resets it back, and the start function ignores any
further requests while the flag is set, it would stop sending anything
until interface reset. So minimal fix would be to set the IFF_OACTIVE
only when virtio_enqueue_prep() fails, and leave it alone for the
other errors.

The m_defrag() call anyway looks like good idea - after all it's
better to try to send the data then just dropping it on the floor. I
think it's perfectly possible to have mbufs very fragmented.

Also, the code for trying to dequeue finished requests when we run out
of resources looks somewhat not necessary, IMO it would be simplier to
remove it as it only obfuscates the logic.

What you think about attached patch, based on Masanobu SAITOH's diff?

Besides the things above, I've also changed it so that interface queue
limit is setup according to virtio queue size, i.e. upper layer
wouldn't ever queue more than interface can handle.

Jaromir

2017-05-03 10:40 GMT+02:00  <hannken%eis.cs.tu-bs.de@localhost>:
>>Number:         52211
>>Category:       kern
>>Synopsis:       vioif stops on dmamap load error
>>Confidential:   no
>>Severity:       serious
>>Priority:       medium
>>Responsible:    kern-bug-people
>>State:          open
>>Class:          sw-bug
>>Submitter-Id:   net
>>Arrival-Date:   Wed May 03 08:40:00 +0000 2017
>>Originator:     Juergen Hannken-Illjes
>>Release:        NetBSD 7.1
>>Organization:
>
>>Environment:
>
>
> System: NetBSD vpnserv.isf.cs.tu-bs.de 7.1 NetBSD 7.1 (gateway.i386) #0: Mon Mar 13 16:40:12 MET 2017  build%builder.isf.cs.tu-bs.de@localhost:/build/nbsd7/obj/obj.i386/sys/arch/i386/compile/gateway.i386 i386
> Architecture: i386
> Machine: i386
>>Description:
>
> From time to time the machine prints
>
>         vioif0: tx dmamap load failed, error code 27
>
> and most times the interface seems to stop as the machine is no longer
> accessible from the network.
>
>      $NetBSD: if_vioif.c,v 1.7.2.3 2016/12/23 05:57:40 snj Exp $
>>How-To-Repeat:
>
>>Fix:
>
>
>>Unformatted:
>
>
Index: if_vioif.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_vioif.c,v
retrieving revision 1.34
diff -u -p -r1.34 if_vioif.c
--- if_vioif.c	28 Mar 2017 04:10:33 -0000	1.34
+++ if_vioif.c	16 May 2017 22:22:32 -0000
@@ -226,8 +226,8 @@ struct vioif_softc {
 	}			sc_ctrl_inuse;
 	kcondvar_t		sc_ctrl_wait;
 	kmutex_t		sc_ctrl_wait_lock;
-	kmutex_t		*sc_tx_lock;
-	kmutex_t		*sc_rx_lock;
+	kmutex_t		sc_tx_lock;
+	kmutex_t		sc_rx_lock;
 	bool			sc_stopping;
 
 	bool			sc_has_ctrl;
@@ -235,12 +235,12 @@ struct vioif_softc {
 #define VIRTIO_NET_TX_MAXNSEGS		(16) /* XXX */
 #define VIRTIO_NET_CTRL_MAC_MAXENTRIES	(64) /* XXX */
 
-#define VIOIF_TX_LOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_enter((_sc)->sc_tx_lock)
-#define VIOIF_TX_UNLOCK(_sc)	if ((_sc)->sc_tx_lock) mutex_exit((_sc)->sc_tx_lock)
-#define VIOIF_TX_LOCKED(_sc)	(!(_sc)->sc_tx_lock || mutex_owned((_sc)->sc_tx_lock))
-#define VIOIF_RX_LOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_enter((_sc)->sc_rx_lock)
-#define VIOIF_RX_UNLOCK(_sc)	if ((_sc)->sc_rx_lock) mutex_exit((_sc)->sc_rx_lock)
-#define VIOIF_RX_LOCKED(_sc)	(!(_sc)->sc_rx_lock || mutex_owned((_sc)->sc_rx_lock))
+#define VIOIF_TX_LOCK(_sc)	mutex_enter(&(_sc)->sc_tx_lock)
+#define VIOIF_TX_UNLOCK(_sc)	mutex_exit(&(_sc)->sc_tx_lock)
+#define VIOIF_TX_LOCKED(_sc)	mutex_owned(&(_sc)->sc_tx_lock)
+#define VIOIF_RX_LOCK(_sc)	mutex_enter(&(_sc)->sc_rx_lock)
+#define VIOIF_RX_UNLOCK(_sc)	mutex_exit(&(_sc)->sc_rx_lock)
+#define VIOIF_RX_LOCKED(_sc)	mutex_owned(&(_sc)->sc_rx_lock)
 
 /* cfattach interface functions */
 static int	vioif_match(device_t, cfdata_t, void *);
@@ -439,7 +439,7 @@ vioif_alloc_mems(struct vioif_softc *sc)
 		C_L1(txhdr_dmamaps[i], tx_hdrs[i],
 		    sizeof(struct virtio_net_hdr), 1,
 		    WRITE, "tx header");
-		C(tx_dmamaps[i], NULL, ETHER_MAX_LEN, 16 /* XXX */, 0,
+		C(tx_dmamaps[i], NULL, ETHER_MAX_LEN, VIRTIO_NET_TX_MAXNSEGS, 0,
 		  "tx payload");
 	}
 
@@ -592,13 +592,8 @@ vioif_attach(device_t parent, device_t s
 
 	aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(sc->sc_mac));
 
-#ifdef VIOIF_MPSAFE
-	sc->sc_tx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
-	sc->sc_rx_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
-#else
-	sc->sc_tx_lock = NULL;
-	sc->sc_rx_lock = NULL;
-#endif
+	mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
+	mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
 	sc->sc_stopping = false;
 
 	/*
@@ -680,6 +675,8 @@ skip:
 	ifp->if_stop = vioif_stop;
 	ifp->if_capabilities = 0;
 	ifp->if_watchdog = vioif_watchdog;
+	IFQ_SET_MAXLEN(&ifp->if_snd, MAX(sc->sc_vq[VQ_TX].vq_num, IFQ_MAXLEN));
+	IFQ_SET_READY(&ifp->if_snd);
 
 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
 
@@ -690,10 +687,8 @@ skip:
 	return;
 
 err:
-	if (sc->sc_tx_lock)
-		mutex_obj_free(sc->sc_tx_lock);
-	if (sc->sc_rx_lock)
-		mutex_obj_free(sc->sc_rx_lock);
+	mutex_destroy(&sc->sc_tx_lock);
+	mutex_destroy(&sc->sc_rx_lock);
 
 	if (sc->sc_has_ctrl) {
 		cv_destroy(&sc->sc_ctrl_wait);
@@ -799,7 +794,7 @@ vioif_start(struct ifnet *ifp)
 	struct virtio_softc *vsc = sc->sc_virtio;
 	struct virtqueue *vq = &sc->sc_vq[VQ_TX];
 	struct mbuf *m;
-	int queued = 0, retry = 0;
+	int queued = 0;
 
 	VIOIF_TX_LOCK(sc);
 
@@ -814,42 +809,57 @@ vioif_start(struct ifnet *ifp)
 		int slot, r;
 
 		IFQ_DEQUEUE(&ifp->if_snd, m);
-
 		if (m == NULL)
 			break;
 
-retry:
 		r = virtio_enqueue_prep(vsc, vq, &slot);
 		if (r == EAGAIN) {
 			ifp->if_flags |= IFF_OACTIVE;
-			vioif_tx_vq_done_locked(vq);
-			if (retry++ == 0)
-				goto retry;
-			else
-				break;
+			break;
 		}
 		if (r != 0)
 			panic("enqueue_prep for a tx buffer");
+
 		r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
 					 sc->sc_tx_dmamaps[slot],
 					 m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
 		if (r != 0) {
-			virtio_enqueue_abort(vsc, vq, slot);
-			aprint_error_dev(sc->sc_dev,
-			    "tx dmamap load failed, error code %d\n", r);
-			break;
+			/* maybe just too fragmented */
+			struct mbuf *newm;
+
+			newm = m_defrag(m, M_NOWAIT);
+			if (newm == NULL) {
+				aprint_error_dev(sc->sc_dev,
+				    "m_defrag() failed\n");
+				m_freem(m);
+				goto skip;
+			}
+
+			r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
+					 sc->sc_tx_dmamaps[slot],
+					 newm, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
+			if (r != 0) {
+				aprint_error_dev(sc->sc_dev,
+	   			    "tx dmamap load failed, error code %d\n",
+				    r);
+				m_freem(newm);
+skip:
+				virtio_enqueue_abort(vsc, vq, slot);
+				continue;
+			}
 		}
+
+		/* This should actually never fail */
 		r = virtio_enqueue_reserve(vsc, vq, slot,
 					sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
 		if (r != 0) {
+			aprint_error_dev(sc->sc_dev,
+	   		    "virtio_enqueue_reserve failed, error code %d\n",
+			    r);
 			bus_dmamap_unload(virtio_dmat(vsc),
 					  sc->sc_tx_dmamaps[slot]);
-			ifp->if_flags |= IFF_OACTIVE;
-			vioif_tx_vq_done_locked(vq);
-			if (retry++ == 0)
-				goto retry;
-			else
-				break;
+			/* slot already freed by virtio_enqueue_reserve */
+			continue;
 		}
 
 		sc->sc_tx_mbufs[slot] = m;
@@ -864,14 +874,13 @@ retry:
 		virtio_enqueue(vsc, vq, slot, sc->sc_txhdr_dmamaps[slot], true);
 		virtio_enqueue(vsc, vq, slot, sc->sc_tx_dmamaps[slot], true);
 		virtio_enqueue_commit(vsc, vq, slot, false);
+
 		queued++;
 		bpf_mtap(ifp, m);
 	}
 
-	if (m != NULL) {
-		ifp->if_flags |= IFF_OACTIVE;
+	if (m != NULL)
 		m_freem(m);
-	}
 
 	if (queued > 0) {
 		virtio_enqueue_commit(vsc, vq, -1, true);


Home | Main Index | Thread Index | Old Index