Subject: Improving the pool allocator on large-paged machines
To: None <tech-kern@netbsd.org>
From: Ben Harris <bjh21@netbsd.org>
List: tech-kern
Date: 08/11/2001 17:49:18
At present, our pool allocator allocates memory to each pool in page-sized
units.  On systems where the hardware page size in unreasonably large
(e.g. 32K on arm26), this can lead to substantial amounts of wastage (an
arm26 system booted single user has 832K allocated to pools, of which only
69K is actually in use).

My plan for dealing with this is to have the pool allocator work in
smaller units than hardware pages.  pool_init() defaults to using a new
pair of functions for allocating memory, pool_subpage_alloc and
pool_subpage_free, which in turn use pool_get and pool_put on a pool
(psppool) of sub-pages.  This all seems to work, though it confuses vmstat
a little, since its sums don't compensate for pools-within-pools.

The main fly in the ointment is that a lot of pools explicitly ask to use
pool_page_alloc_nointr(), so at the moment I make that equivalent to
pool_subpage_alloc() and special-case it in pool_init() to get the page
size right.  This is grotty and there should be a better way.

Anyway, here's the current state of the code.  Obviously the #define at
the top will have to be made a little more sensible, either coming from
<machine/param.h> or being determined auomatically based on the page size.

Index: subr_pool.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/subr_pool.c,v
retrieving revision 1.60
diff -u -r1.60 subr_pool.c
--- subr_pool.c	2001/07/01 06:12:20	1.60
+++ subr_pool.c	2001/08/11 16:49:56
@@ -64,12 +64,19 @@
  * small pool items) or taken from an internal pool of page headers (`phpool').
  */

+#define POOL_SUBPAGE 8192
+
 /* List of all pools */
 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);

 /* Private pool for page header structures */
 static struct pool phpool;

+#ifdef POOL_SUBPAGE
+/* Pool of subpages for use by normal pools. */
+static struct pool psppool;
+#endif
+
 /* # of seconds to retain page after last use */
 int pool_inactive_time = 10;

@@ -152,6 +159,10 @@
 		    struct pool_item_header *);
 static void	*pool_page_alloc(unsigned long, int, int);
 static void	pool_page_free(void *, unsigned long, int);
+#ifdef POOL_SUBPAGE
+static void	*pool_subpage_alloc(unsigned long, int, int);
+static void	pool_subpage_free(void *, unsigned long, int);
+#endif

 static void pool_print1(struct pool *, const char *,
 	void (*)(const char *, ...));
@@ -377,13 +388,21 @@
 		panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);

 	if (alloc == NULL && release == NULL) {
+#ifdef POOL_SUBPAGE
+		alloc = pool_subpage_alloc;
+		release = pool_subpage_free;
+		pagesz = POOL_SUBPAGE;
+#else
 		alloc = pool_page_alloc;
 		release = pool_page_free;
 		pagesz = PAGE_SIZE;	/* Rounds to PAGE_SIZE anyhow. */
+#endif
 	} else if ((alloc != NULL && release != NULL) == 0) {
 		/* If you specifiy one, must specify both. */
 		panic("pool_init: must specify alloc and release together");
-	}
+	} else if (alloc == pool_page_alloc_nointr &&
+	    release == pool_page_free_nointr)
+		pagesz = POOL_SUBPAGE;

 	if (pagesz == 0)
 		pagesz = PAGE_SIZE;
@@ -499,8 +518,15 @@
 	 * XXX LOCKING.
 	 */
 	if (phpool.pr_size == 0) {
+#ifdef POOL_SUBPAGE
+		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 0,
+		    "phpool", PAGE_SIZE, pool_page_alloc, pool_page_free, 0);
+		pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 0,
+		    "psppool", PAGE_SIZE, pool_page_alloc, pool_page_free, 0);
+#else
 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
 		    0, "phpool", 0, 0, 0, 0);
+#endif
 		pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
 		    0, "pcgpool", 0, 0, 0, 0);
 	}
@@ -1224,7 +1250,42 @@
 	uvm_km_free_poolpage((vaddr_t)v);
 }

+#ifdef POOL_SUBPAGE
 /*
+ * Sub-page allocator, for machines with large hardware pages.
+ */
+static void *
+pool_subpage_alloc(unsigned long sz, int flags, int mtype)
+{
+
+	return pool_get(&psppool, flags);
+}
+
+static void
+pool_subpage_free(void *v, unsigned long sz, int mtype)
+{
+
+	pool_put(&psppool, v);
+}
+#endif
+
+#ifdef POOL_SUBPAGE
+/* We don't provide a real nointr allocator.  Maybe later. */
+void *
+pool_page_alloc_nointr(unsigned long sz, int flags, int mtype)
+{
+
+	return pool_subpage_alloc(sz, flags, mtype);
+}
+
+void
+pool_page_free_nointr(void *v, unsigned long sz, int mtype)
+{
+
+	pool_subpage_free(v, sz, mtype);
+}
+#else
+/*
  * Alternate pool page allocator for pools that know they will
  * never be accessed in interrupt context.
  */
@@ -1243,6 +1304,7 @@

 	uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
 }
+#endif


 /*

-- 
Ben Harris                                                   <bjh21@netbsd.org>
Portmaster, NetBSD/arm26               <URL:http://www.netbsd.org/Ports/arm26/>