Current-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Call for testing: Attansic L1E



Some comments with a quick glance:

> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/dev/pci/if_ale.c      7 Mar 2009 00:03:24 -0000
> @@ -0,0 +1,2033 @@
> +/*   $OpenBSD: if_ale.c,v 1.3 2009/02/27 03:40:25 kevlo Exp $        */

$NetBSD$

> +#include "vlan.h"

OpenBSD often uses #if VLAN > 0 (to shrink install kernel?)
but I'm not sure if it's worth for us.

> +#include <machine/bus.h>

<sys/bus.h> in these days?

> +     ifp->if_baudrate = IF_Gbps(1);

I wonder if we need this line if the driver has mii(4).

> +#ifdef ALE_CHECKSUM
> +     ifp->if_capabilities |= IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
> +                             IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
> +                             IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_TCPv4_Rx;
> +#endif

According to comments in ale_encap(), at least
we should not have IFCAP_CSUM_IPv4_Tx, even if #ifdef'ed out.

> +static int
> +ale_detach(device_t self, int flags)
> +{
 :
> +     mii_detach(&sc->sc_miibus, MII_PHY_ANY, MII_OFFSET_ANY);
> +
> +     /* Delete all remaining media. */
> +     ifmedia_delete_instance(&sc->sc_miibus.mii_media, IFM_INST_ANY);
> +
> +     ether_ifdetach(ifp);
> +     if_detach(ifp);
> +     ale_dma_free(sc);
> +
> +     if (sc->sc_irq_handle != NULL) {
> +             pci_intr_disestablish(sc->sc_pct, sc->sc_irq_handle);
> +             sc->sc_irq_handle = NULL;
> +     }
> +
> +     return 0;

Should we also call bus_space_unmap(9) on detach?

> +ale_dma_alloc(struct ale_softc *sc)
> +{
> +     error = bus_dmamem_alloc(sc->sc_dmat, ALE_TX_RING_SZ, 
> +         ETHER_ALIGN, 0, &sc->ale_cdata.ale_tx_ring_seg, 1,
> +         &nsegs, BUS_DMA_WAITOK);

This "ETHER_ALIGN" arg for bus_dmamem_alloc(9) looks
a major FreeBSD's copy-and-pasted bug in NIC drivers.

> +     if (error) {
> +             printf("%s: could not load DMA'able memory for Tx ring.\n",
> +                 device_xname(sc->sc_dev));
> +             bus_dmamem_free(sc->sc_dmat, 
> +                 (bus_dma_segment_t *)&sc->ale_cdata.ale_tx_ring, 1);

Wrong arg with bogus cast. (also in other places)
bus_dmamem_free(9) takes bus_dma_segment_t,
i.e. it should be ale_cdata.ale_tx_ring_seg.

> +static int
> +ale_encap(struct ale_softc *sc, struct mbuf **m_head)
> +{
 :
> +     error = bus_dmamap_load_mbuf(sc->sc_dmat, map, *m_head, BUS_DMA_NOWAIT);
> +
> +     if (error != 0) {
> +             bus_dmamap_unload(sc->sc_dmat, map);
> +             error = EFBIG;
> +     }
> +     if (error == EFBIG) {

This error check looks very suspicious.

> +
> +             MGETHDR(m, M_DONTWAIT, MT_DATA);
> +             if (m == NULL) {
> +                     printf("%s: can't defrag TX mbuf\n",
> +                         device_xname(sc->sc_dev));
> +                     m_freem(*m_head);
> +                     *m_head = NULL;
> +                     return ENOBUFS;
> +             }
> +
> +             M_COPY_PKTHDR(m, *m_head);

Why M_COPY_PKTHDR() first?

> +     nsegs = map->dm_nsegs;
> +
> +     if (nsegs == 0) {
> +             m_freem(*m_head);
> +             *m_head = NULL;
> +             return EIO;
> +     }

I'm not sure if dm_nsegs could be zero even if bus_dmamap_load_mbuf(9)
returns successfully.

> +     /* Configure Tx checksum offload. */
> +     if ((m->m_pkthdr.csum_flags & ALE_CSUM_FEATURES) != 0) {
> +             /*
> +              * AR81xx supports Tx custom checksum offload feature
> +              * that offloads single 16bit checksum computation.
> +              * So you can choose one among IP, TCP and UDP.
> +              * Normally driver sets checksum start/insertion
> +              * position from the information of TCP/UDP frame as
> +              * TCP/UDP checksum takes more time than that of IP.
> +              * However it seems that custom checksum offload
> +              * requires 4 bytes aligned Tx buffers due to hardware
> +              * bug.
> +              * AR81xx also supports explicit Tx checksum computation
> +              * if it is told that the size of IP header and TCP
> +              * header(for UDP, the header size does not matter
> +              * because it's fixed length). However with this scheme
> +              * TSO does not work so you have to choose one either
> +              * TSO or explicit Tx checksum offload. I chosen TSO
> +              * plus custom checksum offload with work-around which
> +              * will cover most common usage for this consumer
> +              * ethernet controller. The work-around takes a lot of
> +              * CPU cycles if Tx buffer is not aligned on 4 bytes
> +              * boundary, though.
> +              */
> +             cflags |= ALE_TD_CXSUM;
> +             /* Set checksum start offset. */
> +             cflags |= (poff << ALE_TD_CSUM_PLOADOFFSET_SHIFT);
> +     }

It looks poff should have some offset value (sizeof (struct ip)?)
but I guess hwcsum support code is quite incomplete.
If so, these code block should explicitly #if 0'ed out.

> +
> +#if NVLAN > 0
> +     /* Configure VLAN hardware tag insertion. */
> +     if ((mtag = VLAN_OUTPUT_TAG(&sc->sc_ec, m))) {
> +             vtag = ALE_TX_VLAN_TAG(htons(VLAN_TAG_VALUE(mtag)));

I'm not sure if htons() is appropriate here since we use htole32() later.
(I guess it should be bswap16(), though maybe no BE machine with ale(9))

> +     /* Sync descriptors. */
> +     bus_dmamap_sync(sc->sc_dmat, sc->ale_cdata.ale_tx_ring_map, 0,
> +         sc->ale_cdata.ale_tx_ring_map->dm_mapsize, BUS_DMASYNC_PREWRITE);

It might be better to sync only updated descriptor, not all ones.
(also in all sync ops against DMA descriptors)

> +static void
> +ale_start(struct ifnet *ifp)
> +{
> +        struct ale_softc *sc = ifp->if_softc;
> +     struct mbuf *m_head;
> +     int enq;
> +
> +     if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
> +             return;
> +
> +     /* Reclaim transmitted frames. */
> +     if (sc->ale_cdata.ale_tx_cnt >= ALE_TX_DESC_HIWAT)
> +             ale_txeof(sc);
> +
> +     enq = 0;
> +     for (;;) {
> +             IFQ_DEQUEUE(&ifp->if_snd, m_head);

Shouldn't we do IFQ_POLL() first and then IFQ_DEQUEUE()
after TX mbuf is successfully set up in ale_encap()?

> +static void
> +ale_stats_update(struct ale_softc *sc)
> +{
> +     struct ifnet *ifp = &sc->sc_ec.ec_if;
> +     struct ale_hw_stats *stat;
> +     struct smb sb, *smb;
> +     uint32_t *reg;
> +     int i;
> +
> +     stat = &sc->ale_stats;
> +     smb = &sb;
> +
> +     /* Read Rx statistics. */
> +     for (reg = &sb.rx_frames, i = 0; reg <= &sb.rx_pkts_filtered; reg++) {
> +             *reg = CSR_READ_4(sc, ALE_RX_MIB_BASE + i);
> +             i += sizeof(uint32_t);
> +     }
> +     /* Read Tx statistics. */
> +     for (reg = &sb.tx_frames, i = 0; reg <= &sb.tx_mcast_bytes; reg++) {
> +             *reg = CSR_READ_4(sc, ALE_TX_MIB_BASE + i);
> +             i += sizeof(uint32_t);
> +     }
> +
> +     /* Rx stats. */
> +     stat->rx_frames += smb->rx_frames;
 :

IMO, struct smb should be a simple uint32_t array
if we use uint32_t pointers to fill it...

> +static void
> +ale_rxcsum(struct ale_softc *sc, struct mbuf *m, uint32_t status)
> +{
> +     if (status & ALE_RD_IPCSUM_NOK)
> +             m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;

We should also have M_CSUM_IPv4 to indicate results of
RX csum_flags are valid.
(we should note about M_CSUM flags in mbuf(9) man page..)

> +
> +     if ((sc->ale_flags & ALE_FLAG_RXCSUM_BUG) == 0) {
> +             if (((status & ALE_RD_IPV4_FRAG) == 0) &&
> +                 ((status & (ALE_RD_TCP | ALE_RD_UDP)) != 0) &&
> +                 (status & ALE_RD_TCP_UDPCSUM_NOK))
> +             {
> +                     m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
> +             }
> +     } else {
> +             if ((status & (ALE_RD_TCP | ALE_RD_UDP)) != 0) {
> +                     if (status & ALE_RD_TCP_UDPCSUM_NOK) {
> +                             m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
> +                     }
> +             }

Ditto for TCP and UDP.

> +static int
> +ale_rxeof(struct ale_softc *sc)
> +{
> +     struct ifnet *ifp = &sc->sc_ec.ec_if;
> +     struct ale_rx_page *rx_page;
> +     struct rx_rs *rs;
> +     struct mbuf *m;
> +     uint32_t length, prod, seqno, status;
> +     int prog;
 :
> +             /*
> +              * m_devget(9) is major bottle-neck of ale(4)(It comes
> +              * from hardware limitation). For jumbo frames we could
> +              * get a slightly better performance if driver use
> +              * m_getjcl(9) with proper buffer size argument. However
> +              * that would make code more complicated and I don't
> +              * think users would expect good Rx performance numbers
> +              * on these low-end consumer ethernet controller.
> +              */
> +             m = m_devget((char *)(rs + 1), length - ETHER_CRC_LEN,
> +                 ETHER_ALIGN, ifp, NULL);

I wonder if we can use MGETHDR(9)+MCLGET(9)+memcpy(9) like our rtk(4)..

---
Izumi Tsutsui


Home | Main Index | Thread Index | Old Index