Current-Users archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Call for testing: Attansic L1E
Some comments with a quick glance:
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/dev/pci/if_ale.c 7 Mar 2009 00:03:24 -0000
> @@ -0,0 +1,2033 @@
> +/* $OpenBSD: if_ale.c,v 1.3 2009/02/27 03:40:25 kevlo Exp $ */
$NetBSD$
> +#include "vlan.h"
OpenBSD often uses #if VLAN > 0 (to shrink install kernel?)
but I'm not sure if it's worth for us.
> +#include <machine/bus.h>
<sys/bus.h> in these days?
> + ifp->if_baudrate = IF_Gbps(1);
I wonder if we need this line if the driver has mii(4).
> +#ifdef ALE_CHECKSUM
> + ifp->if_capabilities |= IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
> + IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
> + IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_TCPv4_Rx;
> +#endif
According to comments in ale_encap(), at least
we should not have IFCAP_CSUM_IPv4_Tx, even if #ifdef'ed out.
> +static int
> +ale_detach(device_t self, int flags)
> +{
:
> + mii_detach(&sc->sc_miibus, MII_PHY_ANY, MII_OFFSET_ANY);
> +
> + /* Delete all remaining media. */
> + ifmedia_delete_instance(&sc->sc_miibus.mii_media, IFM_INST_ANY);
> +
> + ether_ifdetach(ifp);
> + if_detach(ifp);
> + ale_dma_free(sc);
> +
> + if (sc->sc_irq_handle != NULL) {
> + pci_intr_disestablish(sc->sc_pct, sc->sc_irq_handle);
> + sc->sc_irq_handle = NULL;
> + }
> +
> + return 0;
Should we also call bus_space_unmap(9) on detach?
> +ale_dma_alloc(struct ale_softc *sc)
> +{
> + error = bus_dmamem_alloc(sc->sc_dmat, ALE_TX_RING_SZ,
> + ETHER_ALIGN, 0, &sc->ale_cdata.ale_tx_ring_seg, 1,
> + &nsegs, BUS_DMA_WAITOK);
This "ETHER_ALIGN" arg for bus_dmamem_alloc(9) looks
a major FreeBSD's copy-and-pasted bug in NIC drivers.
> + if (error) {
> + printf("%s: could not load DMA'able memory for Tx ring.\n",
> + device_xname(sc->sc_dev));
> + bus_dmamem_free(sc->sc_dmat,
> + (bus_dma_segment_t *)&sc->ale_cdata.ale_tx_ring, 1);
Wrong arg with bogus cast. (also in other places)
bus_dmamem_free(9) takes bus_dma_segment_t,
i.e. it should be ale_cdata.ale_tx_ring_seg.
> +static int
> +ale_encap(struct ale_softc *sc, struct mbuf **m_head)
> +{
:
> + error = bus_dmamap_load_mbuf(sc->sc_dmat, map, *m_head, BUS_DMA_NOWAIT);
> +
> + if (error != 0) {
> + bus_dmamap_unload(sc->sc_dmat, map);
> + error = EFBIG;
> + }
> + if (error == EFBIG) {
This error check looks very suspicious.
> +
> + MGETHDR(m, M_DONTWAIT, MT_DATA);
> + if (m == NULL) {
> + printf("%s: can't defrag TX mbuf\n",
> + device_xname(sc->sc_dev));
> + m_freem(*m_head);
> + *m_head = NULL;
> + return ENOBUFS;
> + }
> +
> + M_COPY_PKTHDR(m, *m_head);
Why M_COPY_PKTHDR() first?
> + nsegs = map->dm_nsegs;
> +
> + if (nsegs == 0) {
> + m_freem(*m_head);
> + *m_head = NULL;
> + return EIO;
> + }
I'm not sure if dm_nsegs could be zero even if bus_dmamap_load_mbuf(9)
returns successfully.
> + /* Configure Tx checksum offload. */
> + if ((m->m_pkthdr.csum_flags & ALE_CSUM_FEATURES) != 0) {
> + /*
> + * AR81xx supports Tx custom checksum offload feature
> + * that offloads single 16bit checksum computation.
> + * So you can choose one among IP, TCP and UDP.
> + * Normally driver sets checksum start/insertion
> + * position from the information of TCP/UDP frame as
> + * TCP/UDP checksum takes more time than that of IP.
> + * However it seems that custom checksum offload
> + * requires 4 bytes aligned Tx buffers due to hardware
> + * bug.
> + * AR81xx also supports explicit Tx checksum computation
> + * if it is told that the size of IP header and TCP
> + * header(for UDP, the header size does not matter
> + * because it's fixed length). However with this scheme
> + * TSO does not work so you have to choose one either
> + * TSO or explicit Tx checksum offload. I chosen TSO
> + * plus custom checksum offload with work-around which
> + * will cover most common usage for this consumer
> + * ethernet controller. The work-around takes a lot of
> + * CPU cycles if Tx buffer is not aligned on 4 bytes
> + * boundary, though.
> + */
> + cflags |= ALE_TD_CXSUM;
> + /* Set checksum start offset. */
> + cflags |= (poff << ALE_TD_CSUM_PLOADOFFSET_SHIFT);
> + }
It looks poff should have some offset value (sizeof (struct ip)?)
but I guess hwcsum support code is quite incomplete.
If so, these code block should explicitly #if 0'ed out.
> +
> +#if NVLAN > 0
> + /* Configure VLAN hardware tag insertion. */
> + if ((mtag = VLAN_OUTPUT_TAG(&sc->sc_ec, m))) {
> + vtag = ALE_TX_VLAN_TAG(htons(VLAN_TAG_VALUE(mtag)));
I'm not sure if htons() is appropriate here since we use htole32() later.
(I guess it should be bswap16(), though maybe no BE machine with ale(9))
> + /* Sync descriptors. */
> + bus_dmamap_sync(sc->sc_dmat, sc->ale_cdata.ale_tx_ring_map, 0,
> + sc->ale_cdata.ale_tx_ring_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
It might be better to sync only updated descriptor, not all ones.
(also in all sync ops against DMA descriptors)
> +static void
> +ale_start(struct ifnet *ifp)
> +{
> + struct ale_softc *sc = ifp->if_softc;
> + struct mbuf *m_head;
> + int enq;
> +
> + if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
> + return;
> +
> + /* Reclaim transmitted frames. */
> + if (sc->ale_cdata.ale_tx_cnt >= ALE_TX_DESC_HIWAT)
> + ale_txeof(sc);
> +
> + enq = 0;
> + for (;;) {
> + IFQ_DEQUEUE(&ifp->if_snd, m_head);
Shouldn't we do IFQ_POLL() first and then IFQ_DEQUEUE()
after TX mbuf is successfully set up in ale_encap()?
> +static void
> +ale_stats_update(struct ale_softc *sc)
> +{
> + struct ifnet *ifp = &sc->sc_ec.ec_if;
> + struct ale_hw_stats *stat;
> + struct smb sb, *smb;
> + uint32_t *reg;
> + int i;
> +
> + stat = &sc->ale_stats;
> + smb = &sb;
> +
> + /* Read Rx statistics. */
> + for (reg = &sb.rx_frames, i = 0; reg <= &sb.rx_pkts_filtered; reg++) {
> + *reg = CSR_READ_4(sc, ALE_RX_MIB_BASE + i);
> + i += sizeof(uint32_t);
> + }
> + /* Read Tx statistics. */
> + for (reg = &sb.tx_frames, i = 0; reg <= &sb.tx_mcast_bytes; reg++) {
> + *reg = CSR_READ_4(sc, ALE_TX_MIB_BASE + i);
> + i += sizeof(uint32_t);
> + }
> +
> + /* Rx stats. */
> + stat->rx_frames += smb->rx_frames;
:
IMO, struct smb should be a simple uint32_t array
if we use uint32_t pointers to fill it...
> +static void
> +ale_rxcsum(struct ale_softc *sc, struct mbuf *m, uint32_t status)
> +{
> + if (status & ALE_RD_IPCSUM_NOK)
> + m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
We should also have M_CSUM_IPv4 to indicate results of
RX csum_flags are valid.
(we should note about M_CSUM flags in mbuf(9) man page..)
> +
> + if ((sc->ale_flags & ALE_FLAG_RXCSUM_BUG) == 0) {
> + if (((status & ALE_RD_IPV4_FRAG) == 0) &&
> + ((status & (ALE_RD_TCP | ALE_RD_UDP)) != 0) &&
> + (status & ALE_RD_TCP_UDPCSUM_NOK))
> + {
> + m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
> + }
> + } else {
> + if ((status & (ALE_RD_TCP | ALE_RD_UDP)) != 0) {
> + if (status & ALE_RD_TCP_UDPCSUM_NOK) {
> + m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
> + }
> + }
Ditto for TCP and UDP.
> +static int
> +ale_rxeof(struct ale_softc *sc)
> +{
> + struct ifnet *ifp = &sc->sc_ec.ec_if;
> + struct ale_rx_page *rx_page;
> + struct rx_rs *rs;
> + struct mbuf *m;
> + uint32_t length, prod, seqno, status;
> + int prog;
:
> + /*
> + * m_devget(9) is major bottle-neck of ale(4)(It comes
> + * from hardware limitation). For jumbo frames we could
> + * get a slightly better performance if driver use
> + * m_getjcl(9) with proper buffer size argument. However
> + * that would make code more complicated and I don't
> + * think users would expect good Rx performance numbers
> + * on these low-end consumer ethernet controller.
> + */
> + m = m_devget((char *)(rs + 1), length - ETHER_CRC_LEN,
> + ETHER_ALIGN, ifp, NULL);
I wonder if we can use MGETHDR(9)+MCLGET(9)+memcpy(9) like our rtk(4)..
---
Izumi Tsutsui
Home |
Main Index |
Thread Index |
Old Index