Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys two changes in improve scalability:



details:   https://anonhg.NetBSD.org/src/rev/390e3ef48f7e
branches:  trunk
changeset: 555269:390e3ef48f7e
user:      chs <chs%NetBSD.org@localhost>
date:      Thu Nov 13 02:44:01 2003 +0000

description:
two changes in improve scalability:

 (1) split the single list of pages allocated to a pool into three lists:
     completely full, partially full, and completely empty.
     there is no longer any need to traverse any list looking for a
     certain type of page.

 (2) replace the 8-element hash table for out-of-page page headers
     with a splay tree.

these two changes (together with the recent enhancements to the wait code)
give us linear scaling for a fork+exit microbenchmark.

diffstat:

 sys/kern/subr_pool.c |  410 ++++++++++++++++++++++++++------------------------
 sys/sys/pool.h       |   17 +-
 sys/uvm/uvm_map.c    |    8 +-
 3 files changed, 232 insertions(+), 203 deletions(-)

diffs (truncated from 791 to 300 lines):

diff -r a76015bb29d0 -r 390e3ef48f7e sys/kern/subr_pool.c
--- a/sys/kern/subr_pool.c      Thu Nov 13 02:34:24 2003 +0000
+++ b/sys/kern/subr_pool.c      Thu Nov 13 02:44:01 2003 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: subr_pool.c,v 1.87 2003/04/09 18:22:13 thorpej Exp $   */
+/*     $NetBSD: subr_pool.c,v 1.88 2003/11/13 02:44:01 chs Exp $       */
 
 /*-
  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.87 2003/04/09 18:22:13 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.88 2003/11/13 02:44:01 chs Exp $");
 
 #include "opt_pool.h"
 #include "opt_poollog.h"
@@ -59,12 +59,14 @@
 /*
  * Pool resource management utility.
  *
- * Memory is allocated in pages which are split into pieces according
- * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
- * in the pool structure and the individual pool items are on a linked list
- * headed by `ph_itemlist' in each page header. The memory for building
- * the page list is either taken from the allocated pages themselves (for
- * small pool items) or taken from an internal pool of page headers (`phpool').
+ * Memory is allocated in pages which are split into pieces according to
+ * the pool item size. Each page is kept on one of three lists in the
+ * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
+ * for empty, full and partially-full pages respectively. The individual
+ * pool items are on a linked list headed by `ph_itemlist' in each page
+ * header. The memory for building the page list is either taken from
+ * the allocated pages themselves (for small pool items) or taken from
+ * an internal pool of page headers (`phpool').
  */
 
 /* List of all pools */
@@ -89,16 +91,15 @@
 
 struct pool_item_header {
        /* Page headers */
-       TAILQ_ENTRY(pool_item_header)
+       LIST_ENTRY(pool_item_header)
                                ph_pagelist;    /* pool page list */
        TAILQ_HEAD(,pool_item)  ph_itemlist;    /* chunk list for this page */
-       LIST_ENTRY(pool_item_header)
-                               ph_hashlist;    /* Off-page page headers */
+       SPLAY_ENTRY(pool_item_header)
+                               ph_node;        /* Off-page page headers */
        unsigned int            ph_nmissing;    /* # of chunks in use */
        caddr_t                 ph_page;        /* this page's address */
        struct timeval          ph_time;        /* last referenced */
 };
-TAILQ_HEAD(pool_pagelist,pool_item_header);
 
 struct pool_item {
 #ifdef DIAGNOSTIC
@@ -109,10 +110,6 @@
        TAILQ_ENTRY(pool_item)  pi_list;
 };
 
-#define        PR_HASH_INDEX(pp,addr) \
-       (((u_long)(addr) >> (pp)->pr_alloc->pa_pageshift) & \
-        (PR_HASHTABSIZE - 1))
-
 #define        POOL_NEEDS_CATCHUP(pp)                                          \
        ((pp)->pr_nitems < (pp)->pr_minitems)
 
@@ -150,13 +147,19 @@
 static int     pool_catchup(struct pool *);
 static void    pool_prime_page(struct pool *, caddr_t,
                    struct pool_item_header *);
+static void    pool_update_curpage(struct pool *);
 
 void           *pool_allocator_alloc(struct pool *, int);
 void           pool_allocator_free(struct pool *, void *);
 
+static void pool_print_pagelist(struct pool_pagelist *,
+       void (*)(const char *, ...));
 static void pool_print1(struct pool *, const char *,
        void (*)(const char *, ...));
 
+static int pool_chk_page(struct pool *, const char *,
+                        struct pool_item_header *);
+
 /*
  * Pool log entry. An array of these is allocated in pool_init().
  */
@@ -275,24 +278,34 @@
 #define        pr_enter_check(pp, pr)
 #endif /* POOL_DIAGNOSTIC */
 
+static __inline int
+phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
+{
+       if (a->ph_page < b->ph_page)
+               return (-1);
+       else if (a->ph_page > b->ph_page)
+               return (1);
+       else
+               return (0);
+}
+
+SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
+SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
+
 /*
  * Return the pool page header based on page address.
  */
 static __inline struct pool_item_header *
 pr_find_pagehead(struct pool *pp, caddr_t page)
 {
-       struct pool_item_header *ph;
+       struct pool_item_header *ph, tmp;
 
        if ((pp->pr_roflags & PR_PHINPAGE) != 0)
                return ((struct pool_item_header *)(page + pp->pr_phoffset));
 
-       for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
-            ph != NULL;
-            ph = LIST_NEXT(ph, ph_hashlist)) {
-               if (ph->ph_page == page)
-                       return (ph);
-       }
-       return (NULL);
+       tmp.ph_page = page;
+       ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
+       return ph;
 }
 
 /*
@@ -322,13 +335,13 @@
        /*
         * Unlink a page from the pool and release it (or queue it for release).
         */
-       TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
+       LIST_REMOVE(ph, ph_pagelist);
        if (pq) {
-               TAILQ_INSERT_HEAD(pq, ph, ph_pagelist);
+               LIST_INSERT_HEAD(pq, ph, ph_pagelist);
        } else {
                pool_allocator_free(pp, ph->ph_page);
                if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
-                       LIST_REMOVE(ph, ph_hashlist);
+                       SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
                        s = splvm();
                        pool_put(&phpool, ph);
                        splx(s);
@@ -337,18 +350,7 @@
        pp->pr_npages--;
        pp->pr_npagefree++;
 
-       if (pp->pr_curpage == ph) {
-               /*
-                * Find a new non-empty page header, if any.
-                * Start search from the page head, to increase the
-                * chance for "high water" pages to be freed.
-                */
-               TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist)
-                       if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
-                               break;
-
-               pp->pr_curpage = ph;
-       }
+       pool_update_curpage(pp);
 }
 
 /*
@@ -361,7 +363,7 @@
 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
     const char *wchan, struct pool_allocator *palloc)
 {
-       int off, slack, i;
+       int off, slack;
 
 #ifdef POOL_DIAGNOSTIC
        /*
@@ -425,7 +427,9 @@
        /*
         * Initialize the pool structure.
         */
-       TAILQ_INIT(&pp->pr_pagelist);
+       LIST_INIT(&pp->pr_emptypages);
+       LIST_INIT(&pp->pr_fullpages);
+       LIST_INIT(&pp->pr_partpages);
        TAILQ_INIT(&pp->pr_cachelist);
        pp->pr_curpage = NULL;
        pp->pr_npages = 0;
@@ -465,9 +469,7 @@
                /* The page header will be taken from our page header pool */
                pp->pr_phoffset = 0;
                off = palloc->pa_pagesz;
-               for (i = 0; i < PR_HASHTABSIZE; i++) {
-                       LIST_INIT(&pp->pr_hashtab[i]);
-               }
+               SPLAY_INIT(&pp->pr_phtree);
        }
 
        /*
@@ -570,8 +572,10 @@
 #endif
 
        /* Remove all pages */
-       while ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL)
+       while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
                pr_rmpage(pp, ph, NULL);
+       KASSERT(LIST_EMPTY(&pp->pr_fullpages));
+       KASSERT(LIST_EMPTY(&pp->pr_partpages));
 
        /* Remove from global pool list */
        simple_lock(&pool_head_slock);
@@ -600,7 +604,7 @@
        pp->pr_drain_hook_arg = arg;
 }
 
-static __inline struct pool_item_header *
+static struct pool_item_header *
 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
 {
        struct pool_item_header *ph;
@@ -773,7 +777,6 @@
                /* Start the allocation process over. */
                goto startover;
        }
-
        if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
                pr_leave(pp);
                simple_unlock(&pp->pr_slock);
@@ -814,9 +817,16 @@
                        panic("pool_get: nidle inconsistent");
 #endif
                pp->pr_nidle--;
+
+               /*
+                * This page was previously empty.  Move it to the list of
+                * partially-full pages.  This page is already curpage.
+                */
+               LIST_REMOVE(ph, ph_pagelist);
+               LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
        }
        ph->ph_nmissing++;
-       if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
+       if (TAILQ_EMPTY(&ph->ph_itemlist)) {
 #ifdef DIAGNOSTIC
                if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
                        pr_leave(pp);
@@ -826,23 +836,12 @@
                }
 #endif
                /*
-                * Find a new non-empty page header, if any.
-                * Start search from the page head, to increase
-                * the chance for "high water" pages to be freed.
-                *
-                * Migrate empty pages to the end of the list.  This
-                * will speed the update of curpage as pages become
-                * idle.  Empty pages intermingled with idle pages
-                * is no big deal.  As soon as a page becomes un-empty,
-                * it will move back to the head of the list.
+                * This page is now full.  Move it to the full list
+                * and select a new current page.
                 */
-               TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
-               TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
-               TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist)
-                       if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
-                               break;
-
-               pp->pr_curpage = ph;
+               LIST_REMOVE(ph, ph_pagelist);
+               LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
+               pool_update_curpage(pp);
        }
 
        pp->pr_nget++;
@@ -935,18 +934,15 @@
        }
 
        /*
-        * If this page is now complete, do one of two things:
+        * If this page is now empty, do one of two things:
         *
-        *      (1) If we have more pages than the page high water
-        *          mark, free the page back to the system.
+        *      (1) If we have more pages than the page high water mark,
+        *          free the page back to the system.
         *
-        *      (2) Move it to the end of the page list, so that
-        *          we minimize our chances of fragmenting the
-        *          pool.  Idle pages migrate to the end (along with
-        *          completely empty pages, so that we find un-empty
-        *          pages more quickly when we update curpage) of the
-        *          list so they can be more easily swept up by
-        *          the pagedaemon when pages are scarce.
+        *      (2) Otherwise, move the page to the empty page list.
+        *
+        * Either way, select a new current page (so we use a partially-full
+        * page if one is available).



Home | Main Index | Thread Index | Old Index