tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: pool_cache interface and memory constraints

On On 2011-11-28 at 15:14 David Young wrote
> On Sat, Nov 26, 2011 at 11:04:56AM -0700, Sverre Froyen wrote:
> > Ideally, we would need separate pool caches for PCI, ISA, etc., each with
> > its own set of constraints. These constraints are part of the DMA tag
> > structures. It would therefore be nice to be able to access the pool
> > cache by the DMA tag and the size rather than a pool cache name string.
> > Should pool cache (and pool) be extended to support this?
> It may help to have a look at the way that I handle this in ixgbe(4).
> Start at ixgbe_jcl_reinit() and then have a look at ixgbe_getjcl().
> Many improvements to ixgbe(4)'s scheme can be made.  For example,
> instead of a freelist protected by a mutex (ixgbe_extmem_head_t), a
> pool_cache of ixgbe_extmem_t's could be used.  Also, the system should
> arbitrate between drivers for access to DMA buffers---that would be a
> big improvement over the current mbuf-cluster scheme where a few driver
> instances can starve all others of clusters.

Thanks for the comments! I decided to try to write some code (attached)
to illustrate my ideas. The code is incomplete and untested but I think
that it can provide an idea of what I'm thinking about. I would appreciate 
comments about its feasibility. In particular, will pool_cache and bus_dma 
work together the way I have it?

Individual drivers would request the pools they need in their attach methods
and pass the pool handles around as needed.



Mbuf_pool_cache_init sets up a DMA safe pool_cache for the
specified bus and size. The pool_cace will use bus_dmamem_alloc
as its memory allocator. Mbuf_pool_cache_init may be called
multiple times for a given bus and size. Subsequent calls
returns the original pool_cache and increments a reference count.
Mbuf_pool_cache_init should be called from bus or device attach
methods as needed.

Mbuf_pool_cache_destroy should similarly be called from a bus or
device detach method.  The reference counter is used to destroy
the pool_cache when appropriate.


#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/bus.h>

/* The mbuf_pool_item list */
static TAILQ_HEAD(, mbuf_pool_item) mbuf_pool_head =

struct mbuf_pool_item {
        TAILQ_ENTRY(mbuf_pool_item) mbuf_pool_list;
        bus_dma_tag_t mpi_bus_tag;
        unsigned int mpi_size;
        char *mpi_name;
        pool_cache_t mpi_pc;
        unsigned int mpi_refcnt;

static bool mbuf_pool_initialized = 0;
static kmutex_t mbuf_pool_lock;

static struct pool_allocator mbuf_pool_allocator;

#define MBUF_POOL_TO_MPI(pool) ((struct mbuf_pool_item *)(pool->pr_qcache))

struct mbuf_pool_item *
mbuf_pool_get_pool_item(pool_cache_t pc, bus_dma_tag_t tag, unsigned int 

char *
mbuf_pool_get_pool_name(bus_dma_tag_t tag, unsigned int size);

mbuf_pool_cache_init(bus_dma_tag_t tag, unsigned int size);

mbuf_pool_cache_destroy(pool_cache_t pc);

void *
mbuf_pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap);

mbuf_pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa);

 * Custom pool alloc and free methods.

static void *
mbuf_pool_poolpage_alloc(struct pool *pool, int prflags)
        struct mbuf_pool_item *mpi;
        int error, nsegs;
        bus_dma_segment_t seg;

        mpi = MBUF_POOL_TO_MPI(pool);

        /* XXX verify alignment arg (mpi->mpi_size) */
        error = bus_dmamem_alloc(mpi->mpi_bus_tag, pool->pr_alloc->pa_pagesz,
                mpi->mpi_size, 0, &seg, 1, &nsegs, BUS_DMA_NOWAIT);

        return (error == 0 && nsegs == 1) ? (void *)(&seg) : NULL;

static void
mbuf_pool_poolpage_free(struct pool *pool, void *addr)
        struct mbuf_pool_item *mpi;

        mpi = MBUF_POOL_TO_MPI(pool);

        /* ... */

 * Return the mbuf_pool_item struct that matches pc or tag and size.
 * Must be called with mutex held.

struct mbuf_pool_item *
mbuf_pool_get_pool_item(pool_cache_t pc, bus_dma_tag_t tag, unsigned int size)
        struct mbuf_pool_item *mpi = NULL, *mpi1;

        TAILQ_FOREACH(mpi1, &mbuf_pool_head, mbuf_pool_list) {
                if (mpi1->mpi_pc == pc ||
                    (mpi1->mpi_size == size && mpi1->mpi_bus_tag == tag)) {
                        mpi = mpi1;

        return mpi;

char *
mbuf_pool_get_pool_name(bus_dma_tag_t tag, unsigned int size)
        /* ... */
        return NULL;

mbuf_pool_cache_init(bus_dma_tag_t tag, unsigned int size)
        pool_cache_t pc = NULL;
        char *name;
        struct mbuf_pool_item *mpi;

        if (! mbuf_pool_initialized) {
                /* XXX Racy code. Need a proper constructor? */
                /* XXX IPL_NONE implies: cannot use in
                   an interrupt handler. Verify! */
                mutex_init(&mbuf_pool_lock, MUTEX_DEFAULT, IPL_NONE);
                mbuf_pool_initialized = true;


        /* Protect by mutex in order to avoid race
           with mbuf_pool_cache_destroy */

        /* Existing mbuf_pool_cache? */
        mpi = mbuf_pool_get_pool_item(NULL, tag, size);

        if (mpi == NULL) {

                /* Create a new pool cache */

                mpi = kmem_alloc(sizeof(struct mbuf_pool_item), KM_SLEEP);
                if (mpi == NULL)
                        goto fail;

                mpi->mpi_bus_tag = tag;
                mpi->mpi_size = size;

                /* Pool caches must be named - make up a name. */
                name = mbuf_pool_get_pool_name(tag, size);
                mpi->mpi_name = name;

                mbuf_pool_allocator.pa_alloc = &mbuf_pool_poolpage_alloc;
                mbuf_pool_allocator.pa_free = &mbuf_pool_poolpage_free;

                pc = pool_cache_init(size, 0, 0, 0, name,
                    &mbuf_pool_allocator, IPL_VM, NULL, NULL, NULL);

                if (pc == NULL) {
                        kmem_free(mpi, sizeof(struct mbuf_pool_item));
                        goto fail;

                /* mpi is needed in mbuf_pool_poolpage_alloc/free */
                /* XXX is this OK? */
                pc->pc_pool.pr_qcache = mpi;

                mpi->mpi_pc = pc;
                mpi->mpi_refcnt = 1;

                /* Add the mbuf_pool_item to the mbuf pool item list. */
                TAILQ_INSERT_TAIL(&mbuf_pool_head, mpi, mbuf_pool_list);
        else {
                /* Increment refcount and return the existing pool cache */
                pc = mpi->mpi_pc;

fail:   mutex_exit(&mbuf_pool_lock);

        return pc;

/* XXX should this method use a pool_cache_t pc argument instead? */

mbuf_pool_cache_destroy(pool_cache_t pc)
        struct mbuf_pool_item *mpi;

        mpi = mbuf_pool_get_pool_item(pc, NULL, 0);
        KASSERT(mpi != NULL);

        if (mpi->mpi_refcnt <= 1) {
                /* Pool cache is no longer needed */

                TAILQ_REMOVE(&mbuf_pool_head, mpi, mbuf_pool_list);
                kmem_free(mpi, sizeof(struct mbuf_pool_item));
        else {

        if (TAILQ_EMPTY(&mbuf_pool_head)) {
                /* XXX Add code here that un-initializes
                   this object when appropriate. */
                /* XXX OK to distroy a held mutex? */
                /* XXX Racy code. */
                mbuf_pool_initialized = false;

        if (mbuf_pool_initialized)

/* XXX These methods may not be needed. Why not call
   the pool_cache methods instead? */
/* Perhaps implement OpenBSD's livelock solution? */

void *
mbuf_pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap)
        return pool_cache_get_paddr(pc, flags, pap);

mbuf_pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa)
        return pool_cache_put_paddr(pc, object, pa);


Implement these as needed:



Home | Main Index | Thread Index | Old Index