Subject: pool(9) revisited
To: None <tech-kern@NetBSD.ORG>
From: Paul Kranenburg <pk@cs.few.eur.nl>
List: tech-kern
Date: 07/01/1998 17:53:44
And here's a draft implementation.

Some notes:

	- probably need to keep pages sorted on # of free items in them

	- linear search for the page a returned item belongs to;
	  resolv to hashing?

	- a single threshold for the canonical size of a pool will
	  likely make it behave suboptimal under heavy load.

Suggestions welcome.



------------------------------
/*	$NetBSD: pool.h,v 1.3 1998/02/19 23:51:48 pk Exp $	*/

/*-
 * Copyright (c) 1997 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Paul Kranenburg.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the NetBSD
 *        Foundation, Inc. and its contributors.
 * 4. Neither the name of The NetBSD Foundation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _SYS_POOL_H_
#define _SYS_POOL_H_

#include <sys/queue.h>


typedef struct pool {
	TAILQ_HEAD(,pool_item)	pr_pagelist;	/* Allocated pages */
	struct pool_item	*pr_curpage;
	unsigned int		pr_npages;	/* # of pages allocated */
	unsigned int		pr_hiwat;	/* nominal # of pages in pool */
	unsigned int		pr_pagesz;	/* page size, must be 2^n */
	unsigned long		pr_pagemask;	/* abbrev. of above */
	unsigned int		pr_itemsperpage;/* # items that fit in a page */
	unsigned int		pr_size;	/* Size of item */
	void			*(*pr_alloc) __P((unsigned long, int, int));
	void			(*pr_free) __P((void *, unsigned long, int));
	int			pr_mtype;	/* memory allocator tag */
	char			*pr_wchan;	/* tsleep(9) identifier */
	unsigned int		pr_flags;
#define PR_MALLOCOK	1
#define PR_WAITOK	2
#define PR_WANTED	4
#define PR_STATIC	8
#define PR_FREEHEADER	16
#define PR_URGENT	32
	struct simplelock	pr_lock;

	/*
	 * Instrumentation
	 */
	unsigned long		pr_nalloc;
	unsigned long		pr_nfree;
	unsigned long		pr_npagealloc;
	unsigned long		pr_npagefree;
} *pool_handle_t;

pool_handle_t	pool_create __P((size_t, int, char *, size_t,
				 void *(*)__P((unsigned long, int, int)),
				 void  (*)__P((void *, unsigned long, int)),
				 int));
void		pool_init __P((struct pool *, size_t, int, char *, size_t,
				 void *(*)__P((unsigned long, int, int)),
				 void  (*)__P((void *, unsigned long, int)),
				 int));
void		pool_destroy __P((pool_handle_t));
void		*pool_get __P((pool_handle_t, int));
void		pool_put __P((pool_handle_t, void *));
int		pool_prime __P((pool_handle_t, int, caddr_t));
void		pool_sethiwat __P((pool_handle_t, int));
void		pool_print __P((pool_handle_t, char *));

#define POOL_ITEM_STORAGE_SIZE(size, nitems) (ALIGN(size) * nitems)
#define POOL_STORAGE_SIZE(size, nitems) \
	(ALIGN(sizeof(struct pool)) + POOL_ITEM_STORAGE_SIZE(size, nitems))

#endif /* _SYS_POOL_H_ */

------------------------------
/*	$NetBSD: subr_pool.c,v 1.2 1998/02/19 23:52:14 pk Exp $	*/

/*-
 * Copyright (c) 1997 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Paul Kranenburg.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the NetBSD
 *        Foundation, Inc. and its contributors.
 * 4. Neither the name of The NetBSD Foundation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/lock.h>
#include <sys/pool.h>

#include <vm/vm.h>
#include <vm/vm_kern.h>

#if defined(UVM)
#include <uvm/uvm.h>
#endif  

/*
 * Pool resource management utility.
 *
 * Memory is allocated in pages which are split into pieces according
 * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
 * in the pool structure and the individual pool items are on a linked list
 * headed by `pi_buckethead' in each page header. The memory for building
 * the page list is taken from the allocated pages themselves.
 * 
 */

struct pool_item {
	/* Page headers */
	TAILQ_ENTRY(pool_item)	pi_pagelist;	/* pool page list */
	TAILQ_HEAD(,pool_item)	pi_buckethead;	/* chunk list for this page */
	int			pi_nmissing;	/* # of chunks in use */
	int			pi_flag;
#define				PI_KEEPHEAD	1
/* XXX - flag could move to pool header.. */
	caddr_t			pi_page;	/* this page's address */

	/* Other entries use only this list entry */
	TAILQ_ENTRY(pool_item)	pi_bucketlist;
};
#define PH_PAGE_DEPLETED(ph)	\
	(((ph)->pi_flag & PI_KEEPHEAD) && ph->pi_buckethead.tqh_first == NULL)


static struct pool_item	*pr_find_pagehead __P((struct pool *, caddr_t));
static void		pool_rm_page __P((struct pool *, struct pool_item *));
static int		pool_prime_page __P((struct pool *, caddr_t));
static void		*pool_page_alloc __P((unsigned long, int, int));
static void		pool_page_free __P((void *, unsigned long, int));

static __inline__ struct pool_item *
pr_find_pagehead(pp, page)
	struct pool *pp;
	caddr_t page;
{
	struct pool_item *ph;

	/* XXX - linear search... */
	for (ph = pp->pr_pagelist.tqh_first; ph != NULL;
	     ph = ph->pi_pagelist.tqe_next) {
		if (ph->pi_page == page)
			return (ph);
	}
	return (NULL);
}

static __inline__ void
pool_rm_page(pp, ph)
	struct pool *pp;
	struct pool_item *ph;
{

	/*
	 * Unlink a page from the pool and release it.
	 */
	pp->pr_npagefree++;
	TAILQ_REMOVE(&pp->pr_pagelist, ph, pi_pagelist);
	(*pp->pr_free)(ph->pi_page, pp->pr_pagesz, pp->pr_mtype);
	pp->pr_npages--;

	if (pp->pr_curpage == ph) {
		/*
		 * Find a new non-empty page header, if any.
		 * Start search from the page head, to increase the
		 * chance for "high water" pages to be freed.
		 */
		for (ph = pp->pr_pagelist.tqh_first; ph != NULL;
		     ph = ph->pi_pagelist.tqe_next)
			if (!PH_PAGE_DEPLETED(ph))
					break;

		pp->pr_curpage = ph;
	}
}

struct pool *
pool_create(size, nitems, wchan, pagesz, alloc, release, mtype)
	size_t	size;
	int	nitems;
	char	*wchan;
	size_t	pagesz;
	void	*(*alloc) __P((unsigned long, int, int));
	void	(*release) __P((void *, unsigned long, int));
	int	mtype;
{
	struct pool *pp;

	pp = (struct pool *)malloc(sizeof(*pp), mtype, M_NOWAIT);
	if (pp == NULL)
		return (NULL);

	pool_init(pp, size, PR_FREEHEADER, wchan, pagesz,
		  alloc, release, mtype);

	if (nitems != 0) {
		if (pool_prime(pp, nitems, NULL) != 0) {
			pool_destroy(pp);
			return (NULL);
		}
	}

	return (pp);
}

void
pool_init(pp, size, flags, wchan, pagesz, alloc, release, mtype)
	struct pool	*pp;
	size_t		size;
	int		flags;
	char		*wchan;
	size_t		pagesz;
	void		*(*alloc) __P((unsigned long, int, int));
	void		(*release) __P((void *, unsigned long, int));
	int		mtype;
{

	if (!powerof2(pagesz))
		panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);

	if (alloc == NULL)
		alloc = pool_page_alloc;

	if (release == NULL)
		release = pool_page_free;

	if (pagesz == 0)
		pagesz = NBPG;

	/*
	 * Initialize the pool structure.
	 */
	TAILQ_INIT(&pp->pr_pagelist);
	pp->pr_curpage = NULL;
	pp->pr_npages = 0;
	pp->pr_hiwat = 0;
	pp->pr_flags = flags;
	pp->pr_size = ALIGN(size);
	pp->pr_wchan = wchan;
	pp->pr_mtype = mtype;
	pp->pr_alloc = alloc;
	pp->pr_free = release;
	pp->pr_pagesz = pagesz;
	pp->pr_pagemask = ~(pagesz - 1);
	pp->pr_itemsperpage = pagesz / pp->pr_size;
	pp->pr_nalloc = 0;
	pp->pr_nfree = 0;
	pp->pr_npagealloc = 0;
	pp->pr_npagefree = 0;

	simple_lock_init(&pp->pr_lock);

	return;
}

/*
 * De-commision a pool resource.
 */
void
pool_destroy(pp)
	struct pool *pp;
{
	struct pool_item *ph;

#ifdef DIAGNOSTIC
	if (pp->pr_nalloc - pp->pr_nfree != 0)
		panic("pool_destroy: pool busy: still out: %lu\n",
		      pp->pr_nalloc - pp->pr_nfree);
#endif
	if ((pp->pr_flags & PR_STATIC) == 0)
		while ((ph = pp->pr_pagelist.tqh_first) != NULL)
			pool_rm_page(pp, ph);

	if (pp->pr_flags & PR_FREEHEADER)
		free(pp, pp->pr_mtype);
}


/*
 * Grab an item from the pool; must be called at appropriate spl level
 */
void *
pool_get(pp, flags)
	struct pool *pp;
	int flags;
{
	struct pool_item *pi, *ph;

#ifdef DIAGNOSTIC
	if ((pp->pr_flags & PR_STATIC) && (flags & PR_MALLOCOK))
		panic("pool_get: static");
#endif

	simple_lock(&pp->pr_lock);

	/*
	 * The convention we use is that if `curpage' is not NULL, then
	 * it points at a non-empty bucket. In particular, `curpage'
	 * never points at a page header which has PI_KEEPHEAD set and
	 * has no items in its bucket.
	 */
again:
	if ((ph = pp->pr_curpage) == NULL) {
		void *v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
		if (v == NULL) {
			if (flags & PR_URGENT)
				panic("pool_get: urgent");
			if ((flags & PR_WAITOK) == 0) {
				simple_unlock(&pp->pr_lock);
				return (NULL);
			}
			pp->pr_flags |= PR_WANTED;
			simple_unlock(&pp->pr_lock);
			tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
			simple_lock(&pp->pr_lock);
		} else {
			pp->pr_npagealloc++;
			pool_prime_page(pp, v);
		}

		goto again;
	}

	if ((pi = ph->pi_buckethead.tqh_first) == NULL) {
#ifdef DIAGNOSTIC
		if (pp->pr_size < sizeof(struct pool_item))
  panic("pool_get: must keep page header for item size %u", pp->pr_size);
#endif
		/*
		 * Last item in bucket is gone; remove from page list
		 */
		TAILQ_REMOVE(&pp->pr_pagelist, ph, pi_pagelist);
		pi = ph;
		ph = NULL;
	} else {
		/*
		 * Remove from bucket list.
		 */
		TAILQ_REMOVE(&ph->pi_buckethead, pi, pi_bucketlist);
		ph->pi_nmissing++;
		if (PH_PAGE_DEPLETED(ph))
			ph = NULL;
	}

	if (ph == NULL) {
		/*
		 * Find a new non-empty page header, if any.
		 * Start search from the page head, to increase the
		 * chance for "high water" pages to be freed.
		 */
		for (ph = pp->pr_pagelist.tqh_first; ph != NULL;
		     ph = ph->pi_pagelist.tqe_next)
			if (!PH_PAGE_DEPLETED(ph))
				break;

		pp->pr_curpage = ph;
	}

	pp->pr_nalloc++;
	simple_unlock(&pp->pr_lock);
	return ((void *)pi);
}

/*
 * Return resource to the pool; must be called at appropriate spl level
 */
void
pool_put(pp, v)
	struct pool *pp;
	void *v;
{
	struct pool_item *pi = v;
	struct pool_item *ph;
	caddr_t page;

	page = (caddr_t)((u_long)v & pp->pr_pagemask);
	simple_lock(&pp->pr_lock);
	pp->pr_nfree++;

	if ((ph = pr_find_pagehead(pp, page)) == NULL) {
		/*
		 * Make this returned item the bucket header.
		 */
#ifdef DIAGNOSTIC
		if (pp->pr_size < sizeof(struct pool_item))
  panic("pool_put: should have page header for item size %u", pp->pr_size);
#endif
		ph = pi;
		TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, pi_pagelist);
		TAILQ_INIT(&ph->pi_buckethead);
		ph->pi_nmissing = pp->pr_itemsperpage - 1;
		ph->pi_page = page;
		ph->pi_flag = 0;
	} else {
		/*
		 * Return to bucket.
		 */
		TAILQ_INSERT_TAIL(&ph->pi_buckethead, pi, pi_bucketlist);
		ph->pi_nmissing--;
	}

	/* Cancel "pool empty" condition if it exists */
	if (pp->pr_curpage == NULL)
		pp->pr_curpage = ph;

	if (pp->pr_flags & PR_WANTED) {
		pp->pr_flags &= ~PR_WANTED;
		wakeup((caddr_t)pp);
		simple_unlock(&pp->pr_lock);
		return;
	}

	if (ph->pi_nmissing == 0 && pp->pr_npages > pp->pr_hiwat) {
		/*
		 * The pool is over the high water mark and this page
		 * is now complete, so release it.
		 */
#ifdef DIAGNOSTIC
		if (pp->pr_flags & PR_STATIC) {
			/* can't happen because hiwat > freecount */
			panic("pool_put: static");
		}
#endif
		pool_rm_page(pp, ph);
	}

	simple_unlock(&pp->pr_lock);
}

/*
 * Add N items to the pool.
 */
int
pool_prime(pp, n, storage)
	struct pool *pp;
	int n;
	caddr_t storage;
{
	caddr_t cp;

#ifdef DIAGNOSTIC
	if (storage && !(pp->pr_flags & PR_STATIC))
		panic("pool_prime: static");
	/* !storage && static caught below */
#endif

	simple_lock(&pp->pr_lock);
	while (n > 0) {

		if (pp->pr_flags & PR_STATIC) {
			cp = storage;
			storage += pp->pr_pagesz;
		} else {
			cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
		}

		if (cp == NULL) {
			simple_unlock(&pp->pr_lock);
			return (ENOMEM);
		}

		pool_prime_page(pp, cp);
		pp->pr_hiwat++;
		n -= pp->pr_itemsperpage;
	}
	simple_unlock(&pp->pr_lock);
	return (0);
}

/*
 * Add a page worth of items to the pool
 */
int
pool_prime_page(pp, storage)
	struct pool *pp;
	caddr_t storage;
{
	struct pool_item *pi, *ph;
	caddr_t cp = storage;
	int n, m;

	ph = (struct pool_item *)cp;

	/*
	 * First, determine how many chunks we need for our
	 * page header. If that is more than one, we need
	 * to hold on to the list head.
	 */
	m = howmany(sizeof(struct pool_item),pp->pr_size);
	ph->pi_flag = (m > 1) ? PI_KEEPHEAD : 0;

	/*
	 * Insert page header.
	 */
	TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, pi_pagelist);
	TAILQ_INIT(&ph->pi_buckethead);
	ph->pi_page = storage;
	ph->pi_nmissing = 0;

	/*
	 * Insert remaining chunks on the bucket list.
	 */
	cp = (caddr_t)(cp + m * pp->pr_size);
	n = pp->pr_itemsperpage - m;
	while (n--) {
		pi = (struct pool_item *)cp;
		cp = (caddr_t)(cp + pp->pr_size);

		/* Insert on page list */
		TAILQ_INSERT_TAIL(&ph->pi_buckethead, pi, pi_bucketlist);
	}

	/*
	 * If the pool was depleted, point at the new page.
	 */
	if (pp->pr_curpage == NULL)
		pp->pr_curpage = ph;

	pp->pr_npages++;
	return (0);
}

void
pool_sethiwat(pp, n)
	pool_handle_t	pp;
	int n;
{
	pp->pr_hiwat = roundup(pp->pr_itemsperpage,n) / pp->pr_itemsperpage;
}


/*
 * Default page allocator
 */
static void *
pool_page_alloc(sz, flags, mtype)
	unsigned long sz;
	int flags;
	int mtype;
{
	vm_offset_t va;
#if defined(UVM)
	va = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object,
			      (vm_size_t)sz, UVM_KMF_NOWAIT);
#else   
	va = kmem_malloc(kmem_map, (vm_size_t)sz, 0);
#endif                       
	return ((void *)va);
}

static void
pool_page_free(v, sz, mtype)
	void *v;
	unsigned long sz;
	int mtype;
{
#if defined(UVM)
	uvm_km_free(kmem_map, (vm_offset_t)v, sz);
#else
	kmem_free(kmem_map, (vm_offset_t)v, sz);
#endif  
}


void
pool_print(pp, label)
	struct pool *pp;
	char *label;
{

	if (label != NULL)
		printf("%s: ", label);

	printf("pool %s: nalloc %lu nfree %lu npagealloc %lu npagefree %lu\n"
	       "         npages %u hiwat %u itemsperpage %u\n",
		pp->pr_wchan,
		pp->pr_nalloc,
		pp->pr_nfree,
		pp->pr_npagealloc,
		pp->pr_npagefree,
		pp->pr_npages,
		pp->pr_hiwat,
		pp->pr_itemsperpage);
}