tech-net archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Network drivers and memory allocation in interrupt context
Hi,
I now have a semi-working pool_cache based memory allocator for network
drivers (tested using the iwn driver). I am uncertain, however, about how to
get it fully functioning. The issue is memory allocation in interrupt context.
My first attempt was to use the palloc argument to pool_cache_init (see the
mbuf_pool_poolpage_alloc method, below). This works for a while but eventually
panics in kmem_alloc because it gets called in interrupt context (without the
error return for interrupt context that is in the code now).
The kmem_alloc manpage suggests using pool_cache instead, so I implemented
ctor/dtor methods to use in place of mbuf_pool_poolpage_alloc. This version
panics, however, almost immediately in bus_dmamem_map because the pool "pvpl "
(created in arch/x86/x86/pmap.c) is initialized with IPL_NONE. This impacts,
of course, the mbuf_pool_poolpage_alloc version too, but happens more quickly
here because the ctor method gets called via pool_cache_get (i.e., there is no
preallocation of the dma buffers).
Adding the current error returns in mbuf_pool_poolpage_alloc and
mbuf_pool_ctor avoids the panics but results in a nonworking network driver.
I'm unsure how to proceed on this. Some thoughts:
1) Initialize the "pvpl" pool with IPL_VM
2) Preallocate a large number of buffers in mbuf_pool_cache_init.
3) Rewrite the network drivers so that they do not request memory in interrupt
context.
4) Other.
Comments?
Thanks,
Sverre
Here's the current code:
/*
Mbuf_pool_cache_init sets up a DMA safe pool_cache for the
specified bus and size. The pool_cace will use bus_dmamem_alloc
as its memory allocator. Mbuf_pool_cache_init may be called
multiple times for a given bus and size. Subsequent calls
returns the original pool_cache and increments a reference count.
Mbuf_pool_cache_init should be called from bus or device attach
methods as needed.
Mbuf_pool_cache_destroy should similarly be called from a bus or
device detach method. The reference counter is used to destroy
the pool_cache when appropriate.
*/
#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#define MBUF_POOL_POOLPAGE_ALLOC
/* The mbuf_pool_item list */
static TAILQ_HEAD(, mbuf_pool_item) mbuf_pool_head =
TAILQ_HEAD_INITIALIZER(mbuf_pool_head);
struct mbuf_pool_item {
TAILQ_ENTRY(mbuf_pool_item) mbuf_pool_list;
bus_dma_tag_t mpi_bus_tag;
unsigned int mpi_size;
char *mpi_name;
pool_cache_t mpi_pc;
unsigned int mpi_refcnt;
};
struct mbuf_pool_extmem {
bus_size_t em_size;
bus_dma_segment_t em_seg;
void *em_vaddr;
};
typedef struct mbuf_pool_extmem mbuf_pool_extmem_t;
static bool mbuf_pool_initialized = 0;
static kmutex_t mbuf_pool_lock;
#ifdef MBUF_POOL_POOLPAGE_ALLOC
static struct pool_allocator mbuf_pool_allocator;
#endif
#define MBUF_POOL_TO_MPI(pool) ((struct mbuf_pool_item *)(pool->pr_qcache))
struct mbuf_pool_item *
mbuf_pool_get_pool_item(pool_cache_t pc, bus_dma_tag_t tag, unsigned int
size);
char *
mbuf_pool_get_pool_name(bus_dma_tag_t tag, unsigned int size);
pool_cache_t
mbuf_pool_cache_init(bus_dma_tag_t tag, unsigned int size);
void
mbuf_pool_cache_destroy(pool_cache_t pc);
void *
mbuf_pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap);
void
mbuf_pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa);
#ifdef MBUF_POOL_POOLPAGE_ALLOC
/*
* Custom pool alloc and free methods.
*/
static void *
mbuf_pool_poolpage_alloc(struct pool *pool, int prflags)
{
void *poolpage;
unsigned int poolpage_size = pool->pr_alloc->pa_pagesz;
struct mbuf_pool_item *mpi;
mbuf_pool_extmem_t *em;
unsigned int size, em_count, i, nsegs;
int error;
/* kmem_alloc cannot be used in interrupt context */
if (cpu_intr_p() || cpu_softintr_p())
return NULL;
/* Verify assumptions that are made in the code, below */
if (poolpage_size < sizeof(mbuf_pool_extmem_t) ||
poolpage_size % sizeof(mbuf_pool_extmem_t) != 0)
panic("mbuf_pool_poolpage_alloc: invalid struct
mbuf_pool_extmem
size");
/* XXX Should this be KM_NOSLEEP? */
/* cannot use in interrupt context.
says use pool_cache instead. how? */
poolpage = kmem_alloc(poolpage_size, KM_SLEEP);
if (poolpage == NULL)
goto fail1;
mpi = MBUF_POOL_TO_MPI(pool);
em_count = poolpage_size / sizeof(mbuf_pool_extmem_t);
size = mpi->mpi_size;
em = poolpage;
for (i = 0; i < em_count; i++) {
em->em_size = size;
/* XXX verify alignment arg (size) */
error = bus_dmamem_alloc(mpi->mpi_bus_tag, size,
size, 0, &em->em_seg, 1, &nsegs, BUS_DMA_NOWAIT);
if (error != 0 || nsegs != 1)
goto fail2;
error = bus_dmamem_map(mpi->mpi_bus_tag, &em->em_seg, 1,
size, &em->em_vaddr, BUS_DMA_WAITOK);
if (error != 0 || em->em_vaddr == NULL)
goto fail3;
em++;
}
return poolpage;
fail3:
bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
fail2:
while (--em >= (mbuf_pool_extmem_t *) poolpage)
bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
kmem_free(poolpage, poolpage_size);
fail1:
printf("iwn: mbuf_pool_poolpage_alloc failure\n");
return NULL;
}
static void
mbuf_pool_poolpage_free(struct pool *pool, void *poolpage)
{
unsigned int poolpage_size = pool->pr_alloc->pa_pagesz;
struct mbuf_pool_item *mpi;
mbuf_pool_extmem_t *em;
unsigned int em_count, i;
mpi = MBUF_POOL_TO_MPI(pool);
em_count = poolpage_size / sizeof(mbuf_pool_extmem_t);
em = poolpage;
for (i = 0; i < em_count; i++) {
bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
em++;
}
kmem_free(poolpage, poolpage_size);
}
#else
static int
mbuf_pool_ctor(void *arg, void *object, int flags)
{
struct mbuf_pool_item *mpi = arg;
mbuf_pool_extmem_t *em = object;
unsigned int size, nsegs;
int error;
/* bus_dmamem_map fails in interrupt context */
if (cpu_intr_p() || cpu_softintr_p())
return EBUSY;
size = mpi->mpi_size;
em->em_size = size;
/* XXX verify alignment arg (size) */
error = bus_dmamem_alloc(mpi->mpi_bus_tag, size,
size, 0, &em->em_seg, 1, &nsegs, BUS_DMA_NOWAIT);
if (error != 0 || nsegs != 1)
goto fail1;
/* XXX next call fails with ipl error in pool_get (pool pvpl) */
error = bus_dmamem_map(mpi->mpi_bus_tag, &em->em_seg, 1,
size, &em->em_vaddr, BUS_DMA_WAITOK);
if (error != 0 || em->em_vaddr == NULL)
goto fail2;
return 0;
fail2:
bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
fail1:
printf("iwn: mbuf_pool_ctor failure\n");
/* XXX need to return an error here */
return (error != 0) ? error : ENOMEM;
}
static void
mbuf_pool_dtor(void *arg, void *object)
{
struct mbuf_pool_item *mpi = arg;
mbuf_pool_extmem_t *em = object;
bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
}
#endif
/*
* Return the mbuf_pool_item struct that matches pc or tag and size.
* Must be called with mutex held.
*/
struct mbuf_pool_item *
mbuf_pool_get_pool_item(pool_cache_t pc, bus_dma_tag_t tag, unsigned int size)
{
struct mbuf_pool_item *mpi = NULL, *mpi1;
TAILQ_FOREACH(mpi1, &mbuf_pool_head, mbuf_pool_list) {
if (mpi1->mpi_pc == pc ||
(mpi1->mpi_size == size && mpi1->mpi_bus_tag == tag)) {
mpi = mpi1;
break;
}
}
return mpi;
}
char *
mbuf_pool_get_pool_name(bus_dma_tag_t tag, unsigned int size)
{
char *name;
name = kmem_alloc(16, KM_SLEEP);
snprintf(name, 22, "iwn: test_cache_%d", size);
return name;
}
pool_cache_t
mbuf_pool_cache_init(bus_dma_tag_t tag, unsigned int size)
{
pool_cache_t pc = NULL;
char *name;
struct mbuf_pool_item *mpi;
int pc_size = sizeof(struct mbuf_pool_extmem);
int nmbclusters;
if (! mbuf_pool_initialized) {
/* XXX Racy code. Need a proper constructor? */
/* XXX IPL_NONE implies: cannot use in
an interrupt handler. Verify! */
mutex_init(&mbuf_pool_lock, MUTEX_DEFAULT, IPL_NONE);
mbuf_pool_initialized = true;
}
mutex_enter(&mbuf_pool_lock);
/* Protect by mutex in order to avoid race
with mbuf_pool_cache_destroy */
/* Existing mbuf_pool_cache? */
mpi = mbuf_pool_get_pool_item(NULL, tag, size);
if (mpi == NULL) {
/* Create a new pool cache */
mpi = kmem_alloc(sizeof(struct mbuf_pool_item), KM_SLEEP);
if (mpi == NULL)
goto fail;
mpi->mpi_bus_tag = tag;
mpi->mpi_size = size;
/* Pool caches must be named - make up a name. */
name = mbuf_pool_get_pool_name(tag, size);
mpi->mpi_name = name;
/* Should we use IPL_NET instead of IPL_VM? */
#ifdef MBUF_POOL_POOLPAGE_ALLOC
mbuf_pool_allocator.pa_alloc = &mbuf_pool_poolpage_alloc;
mbuf_pool_allocator.pa_free = &mbuf_pool_poolpage_free;
pc = pool_cache_init(pc_size, 0, 0, PR_NOALIGN|PR_NOTOUCH,
name, &mbuf_pool_allocator, IPL_VM, NULL, NULL, NULL);
#else
pc = pool_cache_init(pc_size, 0, 0, PR_NOALIGN|PR_NOTOUCH,
name, NULL, IPL_VM, &mbuf_pool_ctor, &mbuf_pool_dtor, mpi);
#endif
printf("mbuf_pool_cache_init (%px): %d / %s (%d)\n", pc, size, name, pc_size);
if (pc == NULL) {
kmem_free(mpi, sizeof(struct mbuf_pool_item));
goto fail;
}
/*
* Set the hard limit on the pool to the number of
* mbuf clusters the kernel is to support. Log the limit
* reached message max once a minute.
* XXX Sizing is wrong. Fix.
*/
nmbclusters = physmem * PAGE_SIZE / (4 * size);
#ifdef NMBCLUSTERS_MAX
nmbclusters = MIN(nmbclusters, NMBCLUSTERS_MAX);
#endif
#ifdef NMBCLUSTERS
nmbclusters = MIN(nmbclusters, NMBCLUSTERS);
#endif
pool_cache_sethardlimit(pc, nmbclusters,
"WARNING: mbuf_pool_cache limit reached", 60);
/* mpi is needed in mbuf_pool_poolpage_alloc/free */
/* XXX is this OK? */
pc->pc_pool.pr_qcache = mpi;
mpi->mpi_pc = pc;
mpi->mpi_refcnt = 1;
/* Add the mbuf_pool_item to the mbuf pool item list. */
TAILQ_INSERT_TAIL(&mbuf_pool_head, mpi, mbuf_pool_list);
}
else {
/* Increment refcount and return the existing pool cache */
mpi->mpi_refcnt++;
pc = mpi->mpi_pc;
}
fail: mutex_exit(&mbuf_pool_lock);
if (pc != NULL) {
/* pool_cache_set_drain_hook(pc, m_reclaim, NULL); */
/* pool_cache_sethardlimit(pc, nmbclusters, mclpool_4k_warnmsg,
60);
*/
}
return pc;
}
void
mbuf_pool_cache_destroy(pool_cache_t pc)
{
struct mbuf_pool_item *mpi;
mutex_enter(&mbuf_pool_lock);
mpi = mbuf_pool_get_pool_item(pc, NULL, 0);
KASSERT(mpi != NULL);
if (mpi->mpi_refcnt <= 1) {
/* Pool cache is no longer needed */
pool_cache_destroy(mpi->mpi_pc);
TAILQ_REMOVE(&mbuf_pool_head, mpi, mbuf_pool_list);
kmem_free(mpi, sizeof(struct mbuf_pool_item));
}
else {
mpi->mpi_refcnt--;
}
if (TAILQ_EMPTY(&mbuf_pool_head)) {
/* XXX Add code here that un-initializes
this object when appropriate. */
/* XXX OK to distroy a held mutex? */
/* XXX Racy code. */
mutex_destroy(&mbuf_pool_lock);
mbuf_pool_initialized = false;
}
if (mbuf_pool_initialized)
mutex_exit(&mbuf_pool_lock);
}
/* XXX These methods may not be needed. Why not call
the pool_cache methods instead? */
/* Perhaps implement OpenBSD's livelock solution? */
void *
mbuf_pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap)
{
return pool_cache_get_paddr(pc, flags, pap);
}
void
mbuf_pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa)
{
return pool_cache_put_paddr(pc, object, pa);
}
/*
Implement these as needed:
mbuf_pool_cache_get
mbuf_pool_cache_put
mbuf_pool_cache_destruct_object
mbuf_pool_cache_invalidate
mbuf_pool_cache_sethiwat
mbuf_pool_cache_setlowat
mbuf_pool_cache_sethardlimit
*/
Home |
Main Index |
Thread Index |
Old Index