Subject: Patch to use mbuf dma optimizations in ARM bus_dma back-end
To: None <tech-kern@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-kern
Date: 03/29/2003 13:57:10
--6TrnltStXW4iwmi0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Here is the fourth and final patch in the series of simple patches
to improve network performance contributed by Wasabi Systems.
This makes the ARM bus_dma back-end use the cached physical addresses
for mbufs and clusters, and the read-only mapping indication to avoid
redundant cache cleans.
--
-- Jason R. Thorpe <thorpej@wasabisystems.com>
--6TrnltStXW4iwmi0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=arm32-bus_dma-patch
Index: bus_dma.c
===================================================================
RCS file: /cvsroot/src/sys/arch/arm/arm32/bus_dma.c,v
retrieving revision 1.26
diff -c -r1.26 bus_dma.c
*** bus_dma.c 2003/03/27 19:42:30 1.26
--- bus_dma.c 2003/03/29 21:51:00
***************
*** 213,218 ****
--- 213,219 ----
_bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
int flags)
{
+ struct arm32_dma_range *dr;
paddr_t lastaddr;
int seg, error, first;
struct mbuf *m;
***************
*** 236,250 ****
if (m0->m_pkthdr.len > map->_dm_size)
return (EINVAL);
! /* _bus_dmamap_load_buffer() clears this if we're not... */
! map->_dm_flags |= ARM32_DMAMAP_COHERENT;
first = 1;
seg = 0;
error = 0;
for (m = m0; m != NULL && error == 0; m = m->m_next) {
! error = _bus_dmamap_load_buffer(t, map, m->m_data, m->m_len,
! NULL, flags, &lastaddr, &seg, first);
first = 0;
}
if (error == 0) {
--- 237,302 ----
if (m0->m_pkthdr.len > map->_dm_size)
return (EINVAL);
! /*
! * Mbuf chains should almost never have coherent (i.e.
! * un-cached) mappings, so clear that flag now.
! */
! map->_dm_flags &= ~ARM32_DMAMAP_COHERENT;
first = 1;
seg = 0;
error = 0;
for (m = m0; m != NULL && error == 0; m = m->m_next) {
! if (m->m_len == 0)
! continue;
! /* XXX Could be better about coalescing. */
! /* XXX Doesn't check boundaries. */
! switch (m->m_flags & (M_EXT|M_CLUSTER)) {
! case M_EXT|M_CLUSTER:
! /* XXX KDASSERT */
! KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
! lastaddr = m->m_ext.ext_paddr +
! (m->m_data - m->m_ext.ext_buf);
! have_addr:
! if (first == 0 &&
! ++seg >= map->_dm_segcnt) {
! error = EFBIG;
! break;
! }
! /*
! * Make sure we're in an allowed DMA range.
! */
! if (t->_ranges != NULL) {
! /* XXX cache last result? */
! dr = _bus_dma_inrange(t->_ranges, t->_nranges,
! lastaddr);
! if (dr == NULL) {
! error = EINVAL;
! break;
! }
!
! /*
! * In a valid DMA range. Translate the
! * physical memory address to an address
! * in the DMA window.
! */
! lastaddr = (lastaddr - dr->dr_sysbase) +
! dr->dr_busbase;
! }
! map->dm_segs[seg].ds_addr = lastaddr;
! map->dm_segs[seg].ds_len = m->m_len;
! lastaddr += m->m_len;
! break;
!
! case 0:
! lastaddr = m->m_paddr + M_BUFOFFSET(m) +
! (m->m_data - M_BUFADDR(m));
! goto have_addr;
!
! default:
! error = _bus_dmamap_load_buffer(t, map, m->m_data,
! m->m_len, NULL, flags, &lastaddr, &seg, first);
! }
first = 0;
}
if (error == 0) {
***************
*** 409,418 ****
maddr = mtod(m, vaddr_t);
maddr += moff;
switch (ops) {
case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
! cpu_dcache_wbinv_range(maddr, minlen);
! break;
case BUS_DMASYNC_PREREAD:
if (((maddr | minlen) & arm_dcache_align_mask) == 0)
--- 461,489 ----
maddr = mtod(m, vaddr_t);
maddr += moff;
+ /*
+ * We can save a lot of work here if we know the mapping
+ * is read-only at the MMU:
+ *
+ * If a mapping is read-only, no dirty cache blocks will
+ * exist for it. If a writable mapping was made read-only,
+ * we know any dirty cache lines for the range will have
+ * been cleaned for us already. Therefore, if the upper
+ * layer can tell us we have a read-only mapping, we can
+ * skip all cache cleaning.
+ *
+ * NOTE: This only works if we know the pmap cleans pages
+ * before making a read-write -> read-only transition. If
+ * this ever becomes non-true (e.g. Physically Indexed
+ * cache), this will have to be revisited.
+ */
switch (ops) {
case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
! if (! M_ROMAP(m)) {
! cpu_dcache_wbinv_range(maddr, minlen);
! break;
! }
! /* else FALLTHROUGH */
case BUS_DMASYNC_PREREAD:
if (((maddr | minlen) & arm_dcache_align_mask) == 0)
***************
*** 422,428 ****
break;
case BUS_DMASYNC_PREWRITE:
! cpu_dcache_wb_range(maddr, minlen);
break;
}
moff = 0;
--- 493,500 ----
break;
case BUS_DMASYNC_PREWRITE:
! if (! M_ROMAP(m))
! cpu_dcache_wb_range(maddr, minlen);
break;
}
moff = 0;
--6TrnltStXW4iwmi0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=arm32-bus_dma-patch
Index: bus_dma.c
===================================================================
RCS file: /cvsroot/src/sys/arch/arm/arm32/bus_dma.c,v
retrieving revision 1.26
diff -c -r1.26 bus_dma.c
*** bus_dma.c 2003/03/27 19:42:30 1.26
--- bus_dma.c 2003/03/29 21:51:00
***************
*** 213,218 ****
--- 213,219 ----
_bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
int flags)
{
+ struct arm32_dma_range *dr;
paddr_t lastaddr;
int seg, error, first;
struct mbuf *m;
***************
*** 236,250 ****
if (m0->m_pkthdr.len > map->_dm_size)
return (EINVAL);
! /* _bus_dmamap_load_buffer() clears this if we're not... */
! map->_dm_flags |= ARM32_DMAMAP_COHERENT;
first = 1;
seg = 0;
error = 0;
for (m = m0; m != NULL && error == 0; m = m->m_next) {
! error = _bus_dmamap_load_buffer(t, map, m->m_data, m->m_len,
! NULL, flags, &lastaddr, &seg, first);
first = 0;
}
if (error == 0) {
--- 237,302 ----
if (m0->m_pkthdr.len > map->_dm_size)
return (EINVAL);
! /*
! * Mbuf chains should almost never have coherent (i.e.
! * un-cached) mappings, so clear that flag now.
! */
! map->_dm_flags &= ~ARM32_DMAMAP_COHERENT;
first = 1;
seg = 0;
error = 0;
for (m = m0; m != NULL && error == 0; m = m->m_next) {
! if (m->m_len == 0)
! continue;
! /* XXX Could be better about coalescing. */
! /* XXX Doesn't check boundaries. */
! switch (m->m_flags & (M_EXT|M_CLUSTER)) {
! case M_EXT|M_CLUSTER:
! /* XXX KDASSERT */
! KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
! lastaddr = m->m_ext.ext_paddr +
! (m->m_data - m->m_ext.ext_buf);
! have_addr:
! if (first == 0 &&
! ++seg >= map->_dm_segcnt) {
! error = EFBIG;
! break;
! }
! /*
! * Make sure we're in an allowed DMA range.
! */
! if (t->_ranges != NULL) {
! /* XXX cache last result? */
! dr = _bus_dma_inrange(t->_ranges, t->_nranges,
! lastaddr);
! if (dr == NULL) {
! error = EINVAL;
! break;
! }
!
! /*
! * In a valid DMA range. Translate the
! * physical memory address to an address
! * in the DMA window.
! */
! lastaddr = (lastaddr - dr->dr_sysbase) +
! dr->dr_busbase;
! }
! map->dm_segs[seg].ds_addr = lastaddr;
! map->dm_segs[seg].ds_len = m->m_len;
! lastaddr += m->m_len;
! break;
!
! case 0:
! lastaddr = m->m_paddr + M_BUFOFFSET(m) +
! (m->m_data - M_BUFADDR(m));
! goto have_addr;
!
! default:
! error = _bus_dmamap_load_buffer(t, map, m->m_data,
! m->m_len, NULL, flags, &lastaddr, &seg, first);
! }
first = 0;
}
if (error == 0) {
***************
*** 409,418 ****
maddr = mtod(m, vaddr_t);
maddr += moff;
switch (ops) {
case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
! cpu_dcache_wbinv_range(maddr, minlen);
! break;
case BUS_DMASYNC_PREREAD:
if (((maddr | minlen) & arm_dcache_align_mask) == 0)
--- 461,489 ----
maddr = mtod(m, vaddr_t);
maddr += moff;
+ /*
+ * We can save a lot of work here if we know the mapping
+ * is read-only at the MMU:
+ *
+ * If a mapping is read-only, no dirty cache blocks will
+ * exist for it. If a writable mapping was made read-only,
+ * we know any dirty cache lines for the range will have
+ * been cleaned for us already. Therefore, if the upper
+ * layer can tell us we have a read-only mapping, we can
+ * skip all cache cleaning.
+ *
+ * NOTE: This only works if we know the pmap cleans pages
+ * before making a read-write -> read-only transition. If
+ * this ever becomes non-true (e.g. Physically Indexed
+ * cache), this will have to be revisited.
+ */
switch (ops) {
case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
! if (! M_ROMAP(m)) {
! cpu_dcache_wbinv_range(maddr, minlen);
! break;
! }
! /* else FALLTHROUGH */
case BUS_DMASYNC_PREREAD:
if (((maddr | minlen) & arm_dcache_align_mask) == 0)
***************
*** 422,428 ****
break;
case BUS_DMASYNC_PREWRITE:
! cpu_dcache_wb_range(maddr, minlen);
break;
}
moff = 0;
--- 493,500 ----
break;
case BUS_DMASYNC_PREWRITE:
! if (! M_ROMAP(m))
! cpu_dcache_wb_range(maddr, minlen);
break;
}
moff = 0;
--6TrnltStXW4iwmi0--