Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys Fix several bugs/deficiencies in the pool allocator:



details:   https://anonhg.NetBSD.org/src/rev/4b70706e6cc8
branches:  trunk
changeset: 467911:4b70706e6cc8
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Wed Mar 31 01:14:06 1999 +0000

description:
Fix several bugs/deficiencies in the pool allocator:

- Add support for hard limits, with optional rate-limited logging of
a warning message when the pool limit is reached.  (This will be used
to fix a bug in mbuf cluster allocation on the MIPS and Alpha ports.)

- Fix some locking protocol errors.  This required splitting pr_flags
into pr_flags (which is protected by the spin lock) and pr_roflags (which
are `read only' flags, set when the pool is initialized, and never changed
again; these do not need to be protected by a mutex).

- Make the low water support actually mean something.  When a low water
mark is set, add free items to the pool until the low water mark is
reached.  When an item allocation causes the number of free items to
drop below the low water mark, make the pool catch up to it.  This can
make the pool allocator more useful for several applications (e.g.
pmap `pv entry' management) and more robust for others (for e.g. mbuf
and mbuf cluster allocation, so that the pagedaemon can use NFS to clean
pages on diskless systems without completely running dry on buffers to
receive packets in during extreme memory shoratages).

- Add a comment where we sleep waiting for more pages for the back-end
page allocator.  Specifically, instead of sleeping potentially forever,
perhaps we should just wake up once a second to try allocating a page
again.  XXX Revisit this soon.

diffstat:

 sys/kern/subr_pool.c |  272 +++++++++++++++++++++++++++++++++++++++++++++-----
 sys/sys/pool.h       |   18 +++-
 2 files changed, 259 insertions(+), 31 deletions(-)

diffs (truncated from 582 to 300 lines):

diff -r a65937e4e483 -r 4b70706e6cc8 sys/kern/subr_pool.c
--- a/sys/kern/subr_pool.c      Wed Mar 31 00:44:48 1999 +0000
+++ b/sys/kern/subr_pool.c      Wed Mar 31 01:14:06 1999 +0000
@@ -1,11 +1,12 @@
-/*     $NetBSD: subr_pool.c,v 1.19 1999/03/24 05:51:25 mrg Exp $       */
+/*     $NetBSD: subr_pool.c,v 1.20 1999/03/31 01:14:06 thorpej Exp $   */
 
 /*-
- * Copyright (c) 1997 The NetBSD Foundation, Inc.
+ * Copyright (c) 1997, 1999 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
- * by Paul Kranenburg.
+ * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
+ * Simulation Facility, NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -44,6 +45,7 @@
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/pool.h>
+#include <sys/syslog.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
@@ -104,6 +106,7 @@
 static struct pool_item_header
                *pr_find_pagehead __P((struct pool *, caddr_t));
 static void    pr_rmpage __P((struct pool *, struct pool_item_header *));
+static int     pool_catchup __P((struct pool *));
 static int     pool_prime_page __P((struct pool *, caddr_t));
 static void    *pool_page_alloc __P((unsigned long, int, int));
 static void    pool_page_free __P((void *, unsigned long, int));
@@ -143,7 +146,7 @@
        int n = pp->pr_curlogentry;
        struct pool_log *pl;
 
-       if ((pp->pr_flags & PR_LOGGING) == 0)
+       if ((pp->pr_roflags & PR_LOGGING) == 0)
                return;
 
        /*
@@ -167,7 +170,7 @@
        int i = pp->pr_logsize;
        int n = pp->pr_curlogentry;
 
-       if ((pp->pr_flags & PR_LOGGING) == 0)
+       if ((pp->pr_roflags & PR_LOGGING) == 0)
                return;
 
        pool_print(pp, "printlog");
@@ -205,7 +208,7 @@
 {
        struct pool_item_header *ph;
 
-       if ((pp->pr_flags & PR_PHINPAGE) != 0)
+       if ((pp->pr_roflags & PR_PHINPAGE) != 0)
                return ((struct pool_item_header *)(page + pp->pr_phoffset));
 
        for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
@@ -233,10 +236,14 @@
 #ifdef DIAGNOSTIC
                if (pp->pr_nidle == 0)
                        panic("pr_rmpage: nidle inconsistent");
+               if (pp->pr_nitems < pp->pr_itemsperpage)
+                       panic("pr_rmpage: nitems inconsistent");
 #endif
                pp->pr_nidle--;
        }
 
+       pp->pr_nitems -= pp->pr_itemsperpage;
+
        /*
         * Unlink a page from the pool and release it.
         */
@@ -245,7 +252,7 @@
        pp->pr_npages--;
        pp->pr_npagefree++;
 
-       if ((pp->pr_flags & PR_PHINPAGE) == 0) {
+       if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
                LIST_REMOVE(ph, ph_hashlist);
                pool_put(&phpool, ph);
        }
@@ -361,7 +368,8 @@
        pp->pr_minitems = 0;
        pp->pr_minpages = 0;
        pp->pr_maxpages = UINT_MAX;
-       pp->pr_flags = flags;
+       pp->pr_roflags = flags;
+       pp->pr_flags = 0;
        pp->pr_size = ALIGN(size);
        pp->pr_align = align;
        pp->pr_wchan = wchan;
@@ -371,6 +379,13 @@
        pp->pr_pagesz = pagesz;
        pp->pr_pagemask = ~(pagesz - 1);
        pp->pr_pageshift = ffs(pagesz) - 1;
+       pp->pr_nitems = 0;
+       pp->pr_nout = 0;
+       pp->pr_hardlimit = UINT_MAX;
+       pp->pr_hardlimit_warning = NULL;
+       pp->pr_hardlimit_ratecap = 0;
+       memset(&pp->pr_hardlimit_warning_last, 0,
+           sizeof(pp->pr_hardlimit_warning_last));
 
        /*
         * Decide whether to put the page header off page to avoid
@@ -381,7 +396,7 @@
         */
        if (pp->pr_size < pagesz/16) {
                /* Use the end of the page for the page header */
-               pp->pr_flags |= PR_PHINPAGE;
+               pp->pr_roflags |= PR_PHINPAGE;
                pp->pr_phoffset = off =
                        pagesz - ALIGN(sizeof(struct pool_item_header));
        } else {
@@ -424,7 +439,7 @@
                pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
                                    M_TEMP, M_NOWAIT);
                if (pp->pr_log == NULL)
-                       pp->pr_flags &= ~PR_LOGGING;
+                       pp->pr_roflags &= ~PR_LOGGING;
                pp->pr_curlogentry = 0;
                pp->pr_logsize = pool_logsize;
        }
@@ -454,15 +469,15 @@
        struct pool_item_header *ph;
 
 #ifdef DIAGNOSTIC
-       if (pp->pr_nget - pp->pr_nput != 0) {
+       if (pp->pr_nout != 0) {
                pr_printlog(pp);
-               panic("pool_destroy: pool busy: still out: %lu\n",
-                     pp->pr_nget - pp->pr_nput);
+               panic("pool_destroy: pool busy: still out: %u\n",
+                   pp->pr_nout);
        }
 #endif
 
        /* Remove all pages */
-       if ((pp->pr_flags & PR_STATIC) == 0)
+       if ((pp->pr_roflags & PR_STATIC) == 0)
                while ((ph = pp->pr_pagelist.tqh_first) != NULL)
                        pr_rmpage(pp, ph);
 
@@ -471,11 +486,11 @@
        drainpp = NULL;
 
 #ifdef POOL_DIAGNOSTIC
-       if ((pp->pr_flags & PR_LOGGING) != 0)
+       if ((pp->pr_roflags & PR_LOGGING) != 0)
                free(pp->pr_log, M_TEMP);
 #endif
 
-       if (pp->pr_flags & PR_FREEHEADER)
+       if (pp->pr_roflags & PR_FREEHEADER)
                free(pp, M_POOL);
 }
 
@@ -502,27 +517,80 @@
        struct pool_item_header *ph;
 
 #ifdef DIAGNOSTIC
-       if ((pp->pr_flags & PR_STATIC) && (flags & PR_MALLOCOK)) {
+       if ((pp->pr_roflags & PR_STATIC) && (flags & PR_MALLOCOK)) {
                pr_printlog(pp);
                panic("pool_get: static");
        }
 #endif
 
-       simple_lock(&pp->pr_lock);
        if (curproc == NULL && (flags & PR_WAITOK) != 0)
                panic("pool_get: must have NOWAIT");
 
+       simple_lock(&pp->pr_lock);
+
+ startover:
+       /*
+        * Check to see if we've reached the hard limit.  If we have,
+        * and we can wait, then wait until an item has been returned to
+        * the pool.
+        */
+#ifdef DIAGNOSTIC
+       if (pp->pr_nout > pp->pr_hardlimit) {
+               simple_unlock(&pp->pr_lock);
+               panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
+       }
+#endif
+       if (pp->pr_nout == pp->pr_hardlimit) {
+               if (flags & PR_WAITOK) {
+                       /*
+                        * XXX: A warning isn't logged in this case.  Should
+                        * it be?
+                        */
+                       pp->pr_flags |= PR_WANTED;
+                       simple_unlock(&pp->pr_lock);
+                       tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
+                       simple_lock(&pp->pr_lock);
+                       goto startover;
+               }
+               if (pp->pr_hardlimit_warning != NULL) {
+                       /*
+                        * Log a message that the hard limit has been hit.
+                        */
+                       struct timeval curtime, logdiff;
+                       int s = splclock();
+                       curtime = mono_time;
+                       splx(s);
+                       timersub(&curtime, &pp->pr_hardlimit_warning_last,
+                           &logdiff);
+                       if (logdiff.tv_sec >= pp->pr_hardlimit_ratecap) {
+                               pp->pr_hardlimit_warning_last = curtime;
+                               log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
+                       }
+               }
+               simple_unlock(&pp->pr_lock);
+               return (NULL);
+       }
+
        /*
         * The convention we use is that if `curpage' is not NULL, then
         * it points at a non-empty bucket. In particular, `curpage'
         * never points at a page header which has PR_PHINPAGE set and
         * has no items in its bucket.
         */
-       while ((ph = pp->pr_curpage) == NULL) {
+       if ((ph = pp->pr_curpage) == NULL) {
                void *v;
                int lkflags = LK_EXCLUSIVE | LK_INTERLOCK |
                              ((flags & PR_WAITOK) == 0 ? LK_NOWAIT : 0);
 
+#ifdef DIAGNOSTIC
+               if (pp->pr_nitems != 0) {
+                       simple_unlock(&pp->pr_lock);
+                       printf("pool_get: %s: curpage NULL, nitems %u\n",
+                           pp->pr_wchan, pp->pr_nitems);
+                       panic("pool_get: nitems inconsistent\n");
+               }
+#endif
+
                /* Get long-term lock on pool */
                if (lockmgr(&pp->pr_resourcelock, lkflags, &pp->pr_lock) != 0)
                        return (NULL);
@@ -548,27 +616,42 @@
                         * the page allocator has memory again. Depending
                         * on this pool's usage, we might get stuck here
                         * for a long time.
+                        *
+                        * XXX: maybe we should wake up once a second and
+                        * try again?
                         */
+                       simple_lock(&pp->pr_lock);
+                       (void) lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
                        pp->pr_flags |= PR_WANTED;
-                       lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
+                       simple_unlock(&pp->pr_lock);
                        tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
                        simple_lock(&pp->pr_lock);
-                       continue;
+                       goto startover;
                }
 
                /* We have more memory; add it to the pool */
                pp->pr_npagealloc++;
                pool_prime_page(pp, v);
 
-again:
+ again:
                /* Re-acquire pool interlock */
                simple_lock(&pp->pr_lock);
                lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
+               
+               /* Start the allocation process over. */
+               goto startover;
        }
 
        if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)
                panic("pool_get: %s: page empty", pp->pr_wchan);
-
+#ifdef DIAGNOSTIC
+       if (pp->pr_nitems == 0) {
+               simple_unlock(&pp->pr_lock);
+               printf("pool_get: %s: items on itemlist, nitems %u\n",
+                   pp->pr_wchan, pp->pr_nitems);
+               panic("pool_get: nitems inconsistent\n");
+       }
+#endif
        pr_log(pp, v, PRLOG_GET, file, line);
 
 #ifdef DIAGNOSTIC
@@ -584,6 +667,8 @@
         * Remove from item list.
         */
        TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
+       pp->pr_nitems--;
+       pp->pr_nout++;
        if (ph->ph_nmissing == 0) {



Home | Main Index | Thread Index | Old Index