NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: port-sparc64/46260: gem0 driver fails to recover after RX overflow
The following reply was made to PR port-sparc64/46260; it has been noted by
GNATS.
From: Havard Eidnes <he%NetBSD.org@localhost>
To: gnats-bugs%NetBSD.org@localhost, jdc%coris.org.uk@localhost
Cc: port-sparc64-maintainer%netbsd.org@localhost
Subject: Re: port-sparc64/46260: gem0 driver fails to recover after RX
overflow
Date: Wed, 11 Apr 2012 16:25:57 +0200 (CEST)
----Next_Part(Wed_Apr_11_16_25_57_2012_393)--
Content-Type: Text/Plain; charset=iso-8859-1
Content-Transfer-Encoding: quoted-printable
> I've taken a look at the OpenBSD driver, and copied their method
> of detection & reset. I'm currently testing this, but so far it
> has not yet triggered. Diff attached below.
Scratch that diff, here is one which works, but which ends up
resetting the interface Quite Often, despite the state register
indicating it's not in overflow mode -- it prints
gem0: rx_watchdog: not in overflow state: 0x10400
Only once in my testing did I see
gem0: rx_watchdog: rd pointer !=3D saved
occur, but it *did* occur.
Regards,
- H=E5vard
----Next_Part(Wed_Apr_11_16_25_57_2012_393)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename=diff
Index: gem.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/gem.c,v
retrieving revision 1.98
diff -u -r1.98 gem.c
--- gem.c 2 Feb 2012 19:43:03 -0000 1.98
+++ gem.c 11 Apr 2012 14:19:52 -0000
@@ -89,6 +89,7 @@
int gem_ioctl(struct ifnet *, u_long, void *);
void gem_tick(void *);
void gem_watchdog(struct ifnet *);
+void gem_rx_watchdog(void *);
void gem_pcs_start(struct gem_softc *sc);
void gem_pcs_stop(struct gem_softc *sc, int);
int gem_init(struct ifnet *);
@@ -177,6 +178,7 @@
ifmedia_delete_instance(&sc->sc_mii.mii_media, IFM_INST_ANY);
callout_destroy(&sc->sc_tick_ch);
+ callout_destroy(&sc->sc_rx_watchdog);
/*FALLTHROUGH*/
case GEM_ATT_MII:
@@ -613,6 +615,8 @@
#endif
callout_init(&sc->sc_tick_ch, 0);
+ callout_init(&sc->sc_rx_watchdog, 0);
+ callout_setfunc(&sc->sc_rx_watchdog, gem_rx_watchdog, sc);
sc->sc_att_stage = GEM_ATT_FINISHED;
@@ -764,6 +768,8 @@
/* Wait till it finishes */
if (!gem_bitwait(sc, h, GEM_RX_CONFIG, 1, 0))
aprint_error_dev(sc->sc_dev, "cannot disable read dma\n");
+ /* Wait 5ms extra. */
+ delay(5000);
/* Finally, reset the ERX */
bus_space_write_4(t, h2, GEM_RESET, GEM_RESET_RX);
@@ -1824,6 +1830,8 @@
if (gem_add_rxbuf(sc, i) != 0) {
GEM_COUNTER_INCR(sc, sc_ev_rxnobuf);
ifp->if_ierrors++;
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX no buffer space\n");
GEM_INIT_RXDESC(sc, i);
bus_dmamap_sync(sc->sc_dmatag, rxs->rxs_dmamap, 0,
rxs->rxs_dmamap->dm_mapsize, BUS_DMASYNC_PREREAD);
@@ -1965,12 +1973,34 @@
DPRINTF(sc, ("gem_rint: done sc->rxptr %d, complete %d\n",
sc->sc_rxptr, bus_space_read_4(t, h, GEM_RX_COMPLETION)));
+ if ((i = bus_space_read_4(t, h, GEM_MAC_RX_LEN_ERR_CNT)) != 0) {
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX length error\n");
+ ifp->if_ierrors += i;
+ }
+ if ((i = bus_space_read_4(t, h, GEM_MAC_RX_ALIGN_ERR)) != 0) {
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX alignment error\n");
+ ifp->if_ierrors += i;
+ }
+ if ((i = bus_space_read_4(t, h, GEM_MAC_RX_CRC_ERR_CNT)) != 0) {
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX CRC error\n");
+ ifp->if_ierrors += i;
+ }
+ if ((i = bus_space_read_4(t, h, GEM_MAC_RX_CODE_VIOL)) != 0) {
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX code violationn");
+ ifp->if_ierrors += i;
+ }
+#if 0
/* Read error counters ... */
ifp->if_ierrors +=
bus_space_read_4(t, h, GEM_MAC_RX_LEN_ERR_CNT) +
bus_space_read_4(t, h, GEM_MAC_RX_ALIGN_ERR) +
bus_space_read_4(t, h, GEM_MAC_RX_CRC_ERR_CNT) +
bus_space_read_4(t, h, GEM_MAC_RX_CODE_VIOL);
+#endif
/* ... then clear the hardware counters. */
bus_space_write_4(t, h, GEM_MAC_RX_LEN_ERR_CNT, 0);
@@ -2209,7 +2239,21 @@
*/
if (rxstat & GEM_MAC_RX_OVERFLOW) {
ifp->if_ierrors++;
+ aprint_error_dev(sc->sc_dev,
+ "receive error: RX overflow\n");
gem_reset_rxdma(sc);
+ /*
+ * Apparently a silicon bug causes ERI to hang from
+ * time to time. So if we detect an RX FIFO overflow,
+ * we fire off a timer, and check whether we're still
+ * making progress by looking at the RX FIFO write
+ * and read pointers.
+ */
+ sc->sc_rx_fifo_wr_ptr =
+ bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
+ sc->sc_rx_fifo_rd_ptr =
+ bus_space_read_4(t, h, GEM_RX_FIFO_RD_PTR);
+ callout_schedule(&sc->sc_rx_watchdog, 400);
} else if (rxstat & ~(GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT))
printf("%s: MAC rx fault, status 0x%02x\n",
device_xname(sc->sc_dev), rxstat);
@@ -2236,6 +2280,61 @@
return (r);
}
+void
+gem_rx_watchdog(void *arg)
+{
+ struct gem_softc *sc = arg;
+ struct ifnet *ifp = &sc->sc_ethercom.ec_if;
+ bus_space_tag_t t = sc->sc_bustag;
+ bus_space_handle_t h = sc->sc_h1;
+ u_int32_t rx_fifo_wr_ptr;
+ u_int32_t rx_fifo_rd_ptr;
+ u_int32_t state;
+
+ if ((ifp->if_flags & IFF_RUNNING) == 0) {
+ aprint_error_dev(sc->sc_dev, "receiver not running\n");
+ return;
+ }
+
+ rx_fifo_wr_ptr = bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
+ rx_fifo_rd_ptr = bus_space_read_4(t, h, GEM_RX_FIFO_RD_PTR);
+ state = bus_space_read_4(t, h, GEM_MAC_MAC_STATE);
+ if ((state & GEM_MAC_STATE_OVERFLOW) == GEM_MAC_STATE_OVERFLOW &&
+ ((rx_fifo_wr_ptr == rx_fifo_rd_ptr) ||
+ ((sc->sc_rx_fifo_wr_ptr == rx_fifo_wr_ptr) &&
+ (sc->sc_rx_fifo_rd_ptr == rx_fifo_rd_ptr))))
+ {
+ /*
+ * The RX state machine is still in overflow state and
+ * the RX FIFO write and read pointers seem to be
+ * stuck. Whack the chip over the head to get things
+ * going again.
+ */
+ aprint_error_dev(sc->sc_dev,
+ "receiver stuck in overflow, resetting\n");
+ gem_init(ifp);
+ } else {
+ if ((state & GEM_MAC_STATE_OVERFLOW) != GEM_MAC_STATE_OVERFLOW)
{
+ aprint_error_dev(sc->sc_dev,
+ "rx_watchdog: not in overflow state: 0x%x\n",
+ state);
+ }
+ if (rx_fifo_wr_ptr != rx_fifo_rd_ptr) {
+ aprint_error_dev(sc->sc_dev,
+ "rx_watchdog: wr & rd ptr different\n");
+ }
+ if (sc->sc_rx_fifo_wr_ptr != rx_fifo_wr_ptr) {
+ aprint_error_dev(sc->sc_dev,
+ "rx_watchdog: wr pointer != saved\n");
+ }
+ if (sc->sc_rx_fifo_rd_ptr != rx_fifo_rd_ptr) {
+ aprint_error_dev(sc->sc_dev,
+ "rx_watchdog: rd pointer != saved\n");
+ }
+ aprint_error_dev(sc->sc_dev, "resetting anyway\n");
+ gem_init(ifp);
+ }
+}
void
gem_watchdog(struct ifnet *ifp)
Index: gemreg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/gemreg.h,v
retrieving revision 1.14
diff -u -r1.14 gemreg.h
--- gemreg.h 15 Sep 2008 19:43:24 -0000 1.14
+++ gemreg.h 11 Apr 2012 14:19:52 -0000
@@ -516,6 +516,8 @@
#define GEM_MAC_CC_PASS_PAUSE 0x00000004 /* pass pause up */
#define GEM_MAC_CC_BITS
"\177\020b\0TXPAUSE\0b\1RXPAUSE\0b\2NOPAUSE\0\0"
+/* GEM_MAC_MAC_STATE register bits */
+#define GEM_MAC_STATE_OVERFLOW 0x03800000
/*
* Bits in GEM_MAC_SLOT_TIME register
Index: gemvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/gemvar.h,v
retrieving revision 1.23
diff -u -r1.23 gemvar.h
--- gemvar.h 2 Feb 2012 19:43:03 -0000 1.23
+++ gemvar.h 11 Apr 2012 14:19:52 -0000
@@ -130,6 +130,7 @@
struct ethercom sc_ethercom; /* ethernet common data */
struct mii_data sc_mii; /* MII media control */
struct callout sc_tick_ch; /* tick callout */
+ struct callout sc_rx_watchdog; /* RX watchdog callout */
/* The following bus handles are to be provided by the bus front-end */
bus_space_tag_t sc_bustag; /* bus tag */
@@ -223,6 +224,10 @@
struct evcnt sc_ev_rxhist[9];
#endif
+ /* For use by the RX watchdog */
+ u_int32_t sc_rx_fifo_wr_ptr;
+ u_int32_t sc_rx_fifo_rd_ptr;
+
enum gem_attach_stage sc_att_stage;
};
----Next_Part(Wed_Apr_11_16_25_57_2012_393)----
Home |
Main Index |
Thread Index |
Old Index