NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
The following reply was made to PR port-sparc64/46260; it has been noted by
GNATS.
From: Havard Eidnes <he%NetBSD.org@localhost>
To: gnats-bugs%NetBSD.org@localhost, jdc%coris.org.uk@localhost
Cc: port-sparc64-maintainer%netbsd.org@localhost
Subject: Re: port-sparc64/46260: gem0 driver fails to recover after RX
overflow
Date: Wed, 11 Apr 2012 11:15:53 +0200 (CEST)
----Next_Part(Wed_Apr_11_11_15_53_2012_993)--
Content-Type: Text/Plain; charset=iso-8859-1
Content-Transfer-Encoding: quoted-printable
Hi,
I've taken a look at the OpenBSD driver, and copied their method
of detection & reset. I'm currently testing this, but so far it
has not yet triggered. Diff attached below.
- H=E5vard
----Next_Part(Wed_Apr_11_11_15_53_2012_993)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename=diff
Index: gem.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/gem.c,v
retrieving revision 1.98
diff -u -r1.98 gem.c
--- gem.c 2 Feb 2012 19:43:03 -0000 1.98
+++ gem.c 11 Apr 2012 09:12:46 -0000
@@ -89,6 +89,7 @@
int gem_ioctl(struct ifnet *, u_long, void *);
void gem_tick(void *);
void gem_watchdog(struct ifnet *);
+void gem_rx_watchdog(void *);
void gem_pcs_start(struct gem_softc *sc);
void gem_pcs_stop(struct gem_softc *sc, int);
int gem_init(struct ifnet *);
@@ -177,6 +178,7 @@
ifmedia_delete_instance(&sc->sc_mii.mii_media, IFM_INST_ANY);
callout_destroy(&sc->sc_tick_ch);
+ callout_destroy(&sc->sc_rx_watchdog);
/*FALLTHROUGH*/
case GEM_ATT_MII:
@@ -613,6 +615,8 @@
#endif
callout_init(&sc->sc_tick_ch, 0);
+ callout_init(&sc->sc_rx_watchdog, 0);
+ callout_setfunc(&sc->sc_rx_watchdog, gem_rx_watchdog, sc);
sc->sc_att_stage = GEM_ATT_FINISHED;
@@ -1824,6 +1828,8 @@
if (gem_add_rxbuf(sc, i) != 0) {
GEM_COUNTER_INCR(sc, sc_ev_rxnobuf);
ifp->if_ierrors++;
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX no buffer space\n");
GEM_INIT_RXDESC(sc, i);
bus_dmamap_sync(sc->sc_dmatag, rxs->rxs_dmamap, 0,
rxs->rxs_dmamap->dm_mapsize, BUS_DMASYNC_PREREAD);
@@ -1965,12 +1971,34 @@
DPRINTF(sc, ("gem_rint: done sc->rxptr %d, complete %d\n",
sc->sc_rxptr, bus_space_read_4(t, h, GEM_RX_COMPLETION)));
+ if ((i = bus_space_read_4(t, h, GEM_MAC_RX_LEN_ERR_CNT)) != 0) {
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX length error\n");
+ ifp->if_ierrors += i;
+ }
+ if ((i = bus_space_read_4(t, h, GEM_MAC_RX_ALIGN_ERR)) != 0) {
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX alignment error\n");
+ ifp->if_ierrors += i;
+ }
+ if ((i = bus_space_read_4(t, h, GEM_MAC_RX_CRC_ERR_CNT)) != 0) {
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX CRC error\n");
+ ifp->if_ierrors += i;
+ }
+ if ((i = bus_space_read_4(t, h, GEM_MAC_RX_CODE_VIOL)) != 0) {
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX code violationn");
+ ifp->if_ierrors += i;
+ }
+#if 0
/* Read error counters ... */
ifp->if_ierrors +=
bus_space_read_4(t, h, GEM_MAC_RX_LEN_ERR_CNT) +
bus_space_read_4(t, h, GEM_MAC_RX_ALIGN_ERR) +
bus_space_read_4(t, h, GEM_MAC_RX_CRC_ERR_CNT) +
bus_space_read_4(t, h, GEM_MAC_RX_CODE_VIOL);
+#endif
/* ... then clear the hardware counters. */
bus_space_write_4(t, h, GEM_MAC_RX_LEN_ERR_CNT, 0);
@@ -2209,7 +2237,21 @@
*/
if (rxstat & GEM_MAC_RX_OVERFLOW) {
ifp->if_ierrors++;
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX overflow\n");
gem_reset_rxdma(sc);
+ /*
+ * Apparently a silicon bug causes ERI to hang from
+ * time to time. So if we detect an RX FIFO overflow,
+ * we fire off a timer, and check whether we're still
+ * making progress by looking at the RX FIFO write
+ * and read pointers.
+ */
+ sc->sc_rx_fifo_wr_ptr =
+ bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
+ sc->sc_rx_fifo_rd_ptr =
+ bus_space_read_4(t, h, GEM_RX_FIFO_RD_PTR);
+ callout_schedule(&sc->sc_rx_watchdog, 400);
} else if (rxstat & ~(GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT))
printf("%s: MAC rx fault, status 0x%02x\n",
device_xname(sc->sc_dev), rxstat);
@@ -2236,6 +2278,40 @@
return (r);
}
+void
+gem_rx_watchdog(void *arg)
+{
+ struct gem_softc *sc = arg;
+ struct ifnet *ifp = &sc->sc_ethercom.ec_if;
+ bus_space_tag_t t = sc->sc_bustag;
+ bus_space_handle_t h = sc->sc_h1;
+ u_int32_t rx_fifo_wr_ptr;
+ u_int32_t rx_fifo_rd_ptr;
+ u_int32_t state;
+
+ if ((ifp->if_flags & IFF_RUNNING) == 0)
+ return;
+
+ rx_fifo_wr_ptr = bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
+ rx_fifo_rd_ptr = bus_space_read_4(t, h, GEM_RX_FIFO_RD_PTR);
+ state = bus_space_read_4(t, h, GEM_MAC_MAC_STATE);
+ if ((state & GEM_MAC_RX_OVERFLOW) == GEM_MAC_RX_OVERFLOW &&
+ ((rx_fifo_wr_ptr == rx_fifo_rd_ptr) ||
+ ((sc->sc_rx_fifo_wr_ptr == rx_fifo_wr_ptr) &&
+ (sc->sc_rx_fifo_rd_ptr == rx_fifo_rd_ptr))))
+ {
+ /*
+ * The RX state machine is still in overflow state and
+ * the RX FIFO write and read pointers seem to be
+ * stuck. Whack the chip over the head to get things
+ * going again.
+ */
+ aprint_error_dev(sc->sc_dev,
+ "receiver stuck in overflow, resetting\n");
+ gem_init(ifp);
+ }
+
+}
void
gem_watchdog(struct ifnet *ifp)
Index: gemvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/gemvar.h,v
retrieving revision 1.23
diff -u -r1.23 gemvar.h
--- gemvar.h 2 Feb 2012 19:43:03 -0000 1.23
+++ gemvar.h 11 Apr 2012 09:12:46 -0000
@@ -130,6 +130,7 @@
struct ethercom sc_ethercom; /* ethernet common data */
struct mii_data sc_mii; /* MII media control */
struct callout sc_tick_ch; /* tick callout */
+ struct callout sc_rx_watchdog; /* RX watchdog callout */
/* The following bus handles are to be provided by the bus front-end */
bus_space_tag_t sc_bustag; /* bus tag */
@@ -223,6 +224,10 @@
struct evcnt sc_ev_rxhist[9];
#endif
+ /* For use by the RX watchdog */
+ u_int32_t sc_rx_fifo_wr_ptr;
+ u_int32_t sc_rx_fifo_rd_ptr;
+
enum gem_attach_stage sc_att_stage;
};
----Next_Part(Wed_Apr_11_11_15_53_2012_993)----
Home |
Main Index |
Thread Index |
Old Index