Subject: Problems with "bus_dmamap_sync()" on sparc64
To: None <tech-kern@netbsd.org>
From: Hans Petter Selasky <hselasky@c2i.net>
List: tech-kern
Date: 01/23/2007 14:22:17
Hi,

I'm trying to get my new USB driver working on Sparc64 on NetBSD 3.0.

See the following link for the source code:
http://www.turbocat.net/~hselasky/isdn4bsd/sources/src/sys/dev/usb/

I use the following piece of code to allocate and synchronize memory:

bus_dma_tag_t=20
usbd_dma_tag_alloc(bus_dma_tag_t parent, u_int32_t size,=20
                   u_int32_t alignment)
{
        /* FreeBSD specific */
        return parent;
}

void
usbd_dma_tag_free(bus_dma_tag_t tag)
{
        return;
}

void *
usbd_mem_alloc_sub(bus_dma_tag_t tag, struct usbd_page *page,
                   u_int32_t size, u_int32_t alignment)
{
        caddr_t ptr =3D NULL;

        page->tag =3D tag;
        page->seg_count =3D 1;

        if(bus_dmamem_alloc(page->tag, size, alignment, 0,
                            &page->seg, 1,
                            &page->seg_count, BUS_DMA_WAITOK))
        {
                goto done_4;
        }

        if(bus_dmamem_map(page->tag, &page->seg, page->seg_count, size,
                          &ptr, BUS_DMA_WAITOK|BUS_DMA_COHERENT))
        {
                goto done_3;
        }

        if(bus_dmamap_create(page->tag, size, 1, size,
                             0, BUS_DMA_WAITOK, &page->map))
        {
                goto done_2;
        }

        if(bus_dmamap_load(page->tag, page->map, ptr, size, NULL,=20
                           BUS_DMA_WAITOK))
        {
                goto done_1;
        }

        page->physaddr =3D page->map->dm_segs[0].ds_addr;
        page->buffer =3D ptr;
        page->length =3D size;
        page->exit_level =3D 0;
        page->intr_temp =3D 0;

        bzero(ptr, size);

        bus_dmamap_sync(page->tag, page->map, 0, page->length,=20
                        BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);

#ifdef USB_DEBUG
        if(usbdebug > 14)
        {
            printf("%s: %p, %d bytes, phys=3D%p\n",=20
                   __FUNCTION__, ptr, size,=20
                   ((char *)0) + page->physaddr);
        }
#endif
        return ptr;

 done_1:
        bus_dmamap_destroy(page->tag, page->map);

 done_2:
        bus_dmamem_unmap(page->tag, ptr, size);

 done_3:
        bus_dmamem_free(page->tag, &page->seg, page->seg_count);

 done_4:
        return NULL;
}

void
usbd_mem_free_sub(struct usbd_page *page)
{
        /* NOTE: make a copy of "tag", "map",=20
         * and "buffer" in case "page" is part=20
         * of the allocated memory:
         */
        struct usbd_page temp =3D *page;

        if (temp.exit_level =3D=3D 0) {
            bus_dmamap_sync(temp.tag, temp.map, 0, temp.length,=20
                            BUS_DMASYNC_POSTWRITE|BUS_DMASYNC_POSTREAD);
        } else {
            panic("%s:%d: exit_level is not zero!\n",
                  __FUNCTION__, __LINE__);
        }

        bus_dmamap_unload(temp.tag, temp.map);
        bus_dmamap_destroy(temp.tag, temp.map);
        bus_dmamem_unmap(temp.tag, temp.buffer, temp.length);
        bus_dmamem_free(temp.tag, &temp.seg, temp.seg_count);

#ifdef USB_DEBUG
        if(usbdebug > 14)
        {
            printf("%s: %p\n",=20
                   __FUNCTION__, temp.buffer);
        }
#endif
        return;
}

void
usbd_page_dma_exit(struct usbd_page *page)
{
        if ((page->exit_level)++ =3D=3D 0) {
                /*
                 * Disable interrupts so that the
                 * PCI controller can continue
                 * using the memory as quick as
                 * possible:
                 */
                page->intr_temp =3D intr_disable();
                bus_dmamap_sync(page->tag, page->map, 0, page->length,
                                BUS_DMASYNC_POSTWRITE|BUS_DMASYNC_POSTREAD);
        }
        return;
}

void
usbd_page_dma_enter(struct usbd_page *page)
{
        if (--(page->exit_level) =3D=3D 0) {
                bus_dmamap_sync(page->tag, page->map, 0, page->length,
                                BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
                intr_restore(page->intr_temp);
        }
        return;
}

When I initialize the EHCI chip I do like this:

1) Allocate memory using "usbd_mem_alloc_sub()".

2) Call "usbd_page_dma_exit(&(sc->sc_hw_page))"

3) Initialize hardware memory

4) Call "usbd_page_dma_enter(&(sc->sc_hw_page))"

5) Start the EHCI controller.

But the EHCI PCI controller immediately halts. If I insert a "tsleep(1*hz)"=
=20
before 5) then at least the EHCI PCI controller starts without halting.

I suspect that the following function does not do what it promises in the=20
manpage:
                bus_dmamap_sync(page->tag, page->map, 0, page->length,
                                BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);

=46rom the dmesg:

psycho0 at mainbus0 addr 0xfffc0000                                        =
    =20
SUNW,sabre: impl 0, version 0: ign 7c0 bus range 0 to 3; PCI bus 0         =
    =20
extent `psycho mem' (0x0 - 0xffffffff), flags =3D 0x0                      =
      =20
     0x0 - 0x1fffff                                                        =
    =20
extent `psycho io' (0x0 - 0xffffff), flags =3D 0x0                         =
      =20
     0x0 - 0x1fff                                                          =
    =20
DVMA map: c0000000 to e0000000                                             =
    =20
IOTSB: 53a000 to 5ba000                                                    =
    =20
pci0 at psycho0                                                            =
    =20
pci0: i/o space, memory space enabled                                      =
    =20
ppb0 at pci0 dev 1 function 1: Sun Microsystems Simba PCI bridge (rev. 0x13=
)   =20
pci1 at ppb0 bus 1                                                         =
    =20
pci1: i/o space, memory space enabled                                      =
    =20

[...]

VIA Technologies VT83C572 USB Controller (USB serial bus, revision 0x61) at=
=20
pci3 dev 15 fun
ction 0 not configured
VIA Technologies VT83C572 USB Controller (USB serial bus, revision 0x61) at=
=20
pci3 dev 15 fun
ction 1 not configured
VIA Technologies VT8237 EHCI USB Controller (USB serial bus, interface 0x20=
,=20
revision 0x63)
 at pci3 dev 15 function 2 not configured

What I understand is that the psycho0 controller interfaces to the memory o=
n=20
the sparc64. Looking into the source code I find:

=2E/dev/iommu.c:  if (ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTWRITE)) {
=2E/dev/iommu.c:  if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)) {
=2E/dev/psycho.c: if (ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE)) {
=2E/dev/psycho.c: if (ops & (BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE)) {

void
psycho_dmamap_sync(t, map, offset, len, ops)
        bus_dma_tag_t t;
        bus_dmamap_t map;
        bus_addr_t offset;
        bus_size_t len;
        int ops;
{
        struct psycho_pbm *pp =3D (struct psycho_pbm *)t->_cookie;

        if (ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE)) {
                /* Flush the CPU then the IOMMU */
                bus_dmamap_sync(t->_parent, map, offset, len, ops);
                iommu_dvmamap_sync(t, &pp->pp_sb, map, offset, len, ops);
        }
        if (ops & (BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE)) {
                /* Flush the IOMMU then the CPU */
                iommu_dvmamap_sync(t, &pp->pp_sb, map, offset, len, ops);
                bus_dmamap_sync(t->_parent, map, offset, len, ops);
        }

}

void
iommu_dvmamap_sync(t, sb, map, offset, len, ops)
        bus_dma_tag_t t;
        struct strbuf_ctl *sb;
        bus_dmamap_t map;
        bus_addr_t offset;
        bus_size_t len;
        int ops;
{
        bus_size_t count;
        int i, needsflush =3D 0;

        if (!sb->sb_flush)
                return;

        for (i =3D 0; i < map->dm_nsegs; i++) {
                if (offset < map->dm_segs[i].ds_len)
                        break;
                offset -=3D map->dm_segs[i].ds_len;
        }

        if (i =3D=3D map->dm_nsegs)
                panic("iommu_dvmamap_sync: segment too short %llu",=20
                    (unsigned long long)offset);

        if (ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTWRITE)) {
                /* Nothing to do */;
        }

        if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)) {

                for (; len > 0 && i < map->dm_nsegs; i++) {
                        count =3D MIN(map->dm_segs[i].ds_len - offset, len);
                        if (count > 0 &&=20
                            iommu_dvmamap_sync_range(sb,
                                map->dm_segs[i].ds_addr + offset, count))
                                needsflush =3D 1;
                        offset =3D 0;
                        len -=3D count;
                }
#ifdef DIAGNOSTIC
                if (i =3D=3D map->dm_nsegs && len > 0)
                        panic("iommu_dvmamap_sync: leftover %llu",
                            (unsigned long long)len);
#endif

                if (needsflush)
                        iommu_strbuf_flush_done(sb);
        }
}

iommu_dvmamap_sync_range() calls iommu_strbuf_flush()

#define iommu_strbuf_flush(i, v) do {                                   \
        if ((i)->sb_flush)                                              \
                bus_space_write_8((i)->sb_is->is_bustag, (i)->sb_sb,    \
                        STRBUFREG(strbuf_pgflush), (v));                \
        } while (0)


What I see is that "bus_dmamap_sync()" ends up calling "bus_space_write_8()=
"=20
to synchronize the memory. But nowhere I see that the kernel is waiting for=
=20
the sync to complete? And also there are no "membars" along the sync path.

Is bus_dmamap_sync() an asynchronous function then?

Thanks for any insight,
=2D-HPS