Subject: port-amd64/34618: converted amd64 pmap to use a pool for pv_entry
To: None <port-amd64-maintainer@netbsd.org, gnats-admin@netbsd.org,>
From: None <murray@river-styx.org>
List: netbsd-bugs
Date: 09/25/2006 23:45:00
>Number:         34618
>Category:       port-amd64
>Synopsis:       converted amd64 pmap to use a pool for pv_entry
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    port-amd64-maintainer
>State:          open
>Class:          support
>Submitter-Id:   net
>Arrival-Date:   Mon Sep 25 23:45:00 +0000 2006
>Originator:     murray armfield
>Release:        NetBSD-current
>Organization:
N/A
>Environment:
NetBSD zeus.river-styx.org 4.99.3 NetBSD 4.99.3 (ZEUS) #0: Mon Sep 25 21:51:52 EST 2006  admin@zeus.river-styx.org:/usr/NetBSD/obj/sys/arch/amd64/compile/ZEUS amd64
>Description:
This send-pr is being done up Christos request to my original email to port-amd64. This is NOT a change request, just lodging some code for others.

Several archs in NetBSD use a pool for virtual address entries (pv_entry) in their pmap code. amd64 doesn't. OpenBSD's pmap for amd64 does. The following code is a port of OpenBSD's amd64 pmap code to NetBSD, but only porting the pv_entry pool code, not any of OpenBSD's other changes.

I am not sure as to which is the better implementation, the current amd64 pmap or my new one using a pool for pv_entry. I haven't seen and speed changes between the two and my new pmap seems to be running ok. (More testing needed with this type of change though)

Oh, there are one or two changes removing __ from __inline which got missed a while ago.
>How-To-Repeat:
not a problem...
>Fix:
--- include/vmparam.h.orig      2006-09-26 09:31:25.000000000 +1000
+++ include/vmparam.h
@@ -37,10 +37,8 @@
 #ifndef _VMPARAM_H_
 #define _VMPARAM_H_

-#include <sys/tree.h>
-
 /*
- * Machine dependent constants for 386.
+ * Machine dependent constants for amd64.
  */

 /*
@@ -153,30 +151,11 @@
 #define        VM_FREELIST_DEFAULT     0
 #define        VM_FREELIST_FIRST16     1

-#define __HAVE_PMAP_PHYSSEG
-
-#define __HAVE_VM_PAGE_MD
-#define VM_MDPAGE_INIT(pg)                                      \
-        memset(&(pg)->mdpage, 0, sizeof((pg)->mdpage));         \
-        simple_lock_init(&(pg)->mdpage.mp_pvhead.pvh_lock);     \
-        SPLAY_INIT(&(pg)->mdpage.mp_pvhead.pvh_root);
-
-struct pv_entry;
-
-struct pv_head {
-        struct simplelock pvh_lock;     /* locks every pv in this tree */
-        SPLAY_HEAD(pvtree, pv_entry) pvh_root;
-                                        /* head of tree (locked by pvh_lock) */
-};
-
-struct vm_page_md {
-        struct pv_head mp_pvhead;
-        int mp_attrs;
-};
-
 /*
  * pmap specific data stored in the vm_physmem[] array
  */
+#define __HAVE_PMAP_PHYSSEG
+
 struct pmap_physseg {
        struct pv_head *pvhead;         /* pv_head array */
        unsigned char *attrs;           /* attrs array */
admin@zeus:[/usr/src/sys/arch/amd64] {25} pkgdiff include/vmparam.h
$NetBSD$

--- include/vmparam.h.orig      2006-09-26 09:31:25.000000000 +1000
+++ include/vmparam.h
@@ -37,10 +37,8 @@
 #ifndef _VMPARAM_H_
 #define _VMPARAM_H_

-#include <sys/tree.h>
-
 /*
- * Machine dependent constants for 386.
+ * Machine dependent constants for amd64.
  */

 /*
@@ -153,30 +151,11 @@
 #define        VM_FREELIST_DEFAULT     0
 #define        VM_FREELIST_FIRST16     1

-#define __HAVE_PMAP_PHYSSEG
-
-#define __HAVE_VM_PAGE_MD
-#define VM_MDPAGE_INIT(pg)                                      \
-        memset(&(pg)->mdpage, 0, sizeof((pg)->mdpage));         \
-        simple_lock_init(&(pg)->mdpage.mp_pvhead.pvh_lock);     \
-        SPLAY_INIT(&(pg)->mdpage.mp_pvhead.pvh_root);
-
-struct pv_entry;
-
-struct pv_head {
-        struct simplelock pvh_lock;     /* locks every pv in this tree */
-        SPLAY_HEAD(pvtree, pv_entry) pvh_root;
-                                        /* head of tree (locked by pvh_lock) */
-};
-
-struct vm_page_md {
-        struct pv_head mp_pvhead;
-        int mp_attrs;
-};
-
 /*
  * pmap specific data stored in the vm_physmem[] array
  */
+#define __HAVE_PMAP_PHYSSEG
+
 struct pmap_physseg {
        struct pv_head *pvhead;         /* pv_head array */
        unsigned char *attrs;           /* attrs array */


--- include/pmap.h.orig 2006-09-26 09:31:25.000000000 +1000
+++ include/pmap.h
@@ -334,41 +334,18 @@ struct pmap {
  * describes one mapping).
  */

-struct pv_entry {                       /* locked by its list's pvh_lock */
-        SPLAY_ENTRY(pv_entry) pv_node;  /* splay-tree node */
-        struct pmap *pv_pmap;           /* the pmap */
-        vaddr_t pv_va;                  /* the virtual address */
-        struct vm_page *pv_ptp;         /* the vm_page of the PTP */
-};
-
-/*
- * pv_entrys are dynamically allocated in chunks from a single page.
- * we keep track of how many pv_entrys are in use for each page and
- * we can free pv_entry pages if needed.  there is one lock for the
- * entire allocation system.
- */
-
-struct pv_page_info {
-       TAILQ_ENTRY(pv_page) pvpi_list;
-       struct pv_entry *pvpi_pvfree;
-       int pvpi_nfree;
-};
-
-/*
- * number of pv_entry's in a pv_page
- * (note: won't work on systems where NPBG isn't a constant)
- */
+struct pv_entry;

-#define PVE_PER_PVPAGE ((PAGE_SIZE - sizeof(struct pv_page_info)) / \
-                       sizeof(struct pv_entry))
-
-/*
- * a pv_page: where pv_entrys are allocated from
- */
+struct pv_head {
+       struct simplelock pvh_lock;     /* locks every pv on this list */
+       struct pv_entry *pvh_list;      /* head of list (locked by pvh_lock) */
+};

-struct pv_page {
-       struct pv_page_info pvinfo;
-       struct pv_entry pvents[PVE_PER_PVPAGE];
+struct pv_entry {      /* locked by its list's pvh_lock */
+       struct pv_entry *pv_next;       /* next entry */
+       struct pmap *pv_pmap;           /* the pmap */
+       vaddr_t pv_va;                  /* the virtual address */
+       struct vm_page *pv_ptp; /* the vm_page of the PTP */
 };

 /*
@@ -455,7 +432,7 @@ boolean_t   pmap_pageidlezero __P((paddr_t
  * inline functions
  */

-static __inline void
+static inline void
 pmap_remove_all(struct pmap *pmap)
 {
        /* Nothing. */
@@ -466,7 +443,7 @@ pmap_remove_all(struct pmap *pmap)
  *     if hardware doesn't support one-page flushing)
  */

-__inline static void
+inline static void
 pmap_update_pg(va)
        vaddr_t va;
 {
@@ -477,7 +454,7 @@ pmap_update_pg(va)
  * pmap_update_2pg: flush two pages from the TLB
  */

-__inline static void
+inline static void
 pmap_update_2pg(va, vb)
        vaddr_t va, vb;
 {
@@ -494,7 +471,7 @@ pmap_update_2pg(va, vb)
  *     unprotecting a page is done on-demand at fault time.
  */

-__inline static void
+inline static void
 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
 {
        if ((prot & VM_PROT_WRITE) == 0) {
@@ -514,7 +491,7 @@ pmap_page_protect(struct vm_page *pg, vm
  *     unprotecting a page is done on-demand at fault time.
  */

-__inline static void
+inline static void
 pmap_protect(pmap, sva, eva, prot)
        struct pmap *pmap;
        vaddr_t sva, eva;
@@ -540,7 +517,7 @@ pmap_protect(pmap, sva, eva, prot)

 #include <lib/libkern/libkern.h>

-static __inline pt_entry_t *
+static inline pt_entry_t *
 vtopte(vaddr_t va)
 {

@@ -549,7 +526,7 @@ vtopte(vaddr_t va)
        return (PTE_BASE + pl1_i(va));
 }

-static __inline pt_entry_t *
+static inline pt_entry_t *
 kvtopte(vaddr_t va)
 {



--- amd64/pmap.c.orig   2006-09-26 09:31:52.000000000 +1000
+++ amd64/pmap.c
@@ -271,10 +271,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.2
  *   when traversing the list (e.g. adding/removing mappings,
  *   syncing R/M bits, etc.)
  *
- * - pvalloc_lock
- *   this lock protects the data structures which are used to manage
- *   the free list of pv_entry structures.
- *
  * - pmaps_lock
  *   this lock protects the list of active pmaps (headed by "pmaps").
  *   we lock it when adding or removing pmaps from this list.
@@ -296,7 +292,6 @@ pd_entry_t *alternate_pdes[] = APDES_INI

 /* int nkpde = NKPTP; */

-static struct simplelock pvalloc_lock;
 static struct simplelock pmaps_lock;

 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
@@ -422,35 +417,10 @@ static vaddr_t virtual_end;       /* VA of las


 /*
- * pv_page management structures: locked by pvalloc_lock
+ * pv management structures
  */

-TAILQ_HEAD(pv_pagelist, pv_page);
-static struct pv_pagelist pv_freepages;        /* list of pv_pages with free entrys */
-static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */
-static int pv_nfpvents;                        /* # of free pv entries */
-
-#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */
-#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2))
-                                       /* high water mark */
-
-static inline int
-pv_compare(struct pv_entry *a, struct pv_entry *b)
-{
-       if (a->pv_pmap < b->pv_pmap)
-               return (-1);
-       else if (a->pv_pmap > b->pv_pmap)
-               return (1);
-       else if (a->pv_va < b->pv_va)
-               return (-1);
-       else if (a->pv_va > b->pv_va)
-               return (1);
-       else
-               return (0);
-}
-
-SPLAY_PROTOTYPE(pvtree, pv_entry, pv_node, pv_compare);
-SPLAY_GENERATE(pvtree, pv_entry, pv_node, pv_compare);
+struct pool pmap_pv_pool;

 /*
  * linked list of all non-kernel pmaps
@@ -510,28 +480,13 @@ extern vaddr_t lo32_paddr;

 extern int end;

-#if defined(I586_CPU)
-/* stuff to fix the pentium f00f bug */
-extern vaddr_t pentium_idt_vaddr;
-#endif
-
 /*
  * local prototypes
  */

-static struct pv_entry *pmap_add_pvpage __P((struct pv_page *, boolean_t));
-static struct pv_entry *pmap_alloc_pv __P((struct pmap *, int)); /* see codes below */
-#define ALLOCPV_NEED   0       /* need PV now */
-#define ALLOCPV_TRY    1       /* just try to allocate, don't steal */
-#define ALLOCPV_NONEED 2       /* don't need PV, just growing cache */
-static struct pv_entry *pmap_alloc_pvpage __P((struct pmap *, int));
 static void             pmap_enter_pv __P((struct pv_head *,
                                            struct pv_entry *, struct pmap *,
                                            vaddr_t, struct vm_page *));
-static void             pmap_free_pv __P((struct pmap *, struct pv_entry *));
-static void             pmap_free_pvs __P((struct pmap *, struct pv_entry *));
-static void             pmap_free_pv_doit __P((struct pv_entry *));
-static void             pmap_free_pvpage __P((void));
 static struct vm_page  *pmap_get_ptp __P((struct pmap *, vaddr_t,
                                           pd_entry_t **));
 static struct vm_page  *pmap_find_ptp __P((struct pmap *, vaddr_t, paddr_t,
@@ -1046,12 +1001,6 @@ pmap_bootstrap(kva_start)
        idt_paddr = avail_start;                        /* steal a page */
        avail_start += 2 * PAGE_SIZE;

-#if defined(I586_CPU)
-       /* pentium f00f bug stuff */
-       pentium_idt_vaddr = virtual_avail;              /* don't need pte */
-       virtual_avail += PAGE_SIZE; pte++;
-#endif
-
 #ifdef _LP64
        /*
         * Grab a page below 4G for things that need it (i.e.
@@ -1076,11 +1025,8 @@ pmap_bootstrap(kva_start)
 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
        spinlockinit(&pmap_main_lock, "pmaplk", 0);
 #endif
-       simple_lock_init(&pvalloc_lock);
        simple_lock_init(&pmaps_lock);
        LIST_INIT(&pmaps);
-       TAILQ_INIT(&pv_freepages);
-       TAILQ_INIT(&pv_unusedpgs);

        /*
         * initialize the pmap pool.
@@ -1088,6 +1034,8 @@ pmap_bootstrap(kva_start)

        pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
            &pool_allocator_nointr);
+       pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl",
+           &pool_allocator_nointr);

        /*
         * Initialize the TLB shootdown queues.
@@ -1214,8 +1162,6 @@ pmap_init()
        }
 #endif

-       pv_nfpvents = 0;
-
        pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
        if (pj_page == NULL)
                panic("pmap_init: pj_page");
@@ -1239,284 +1185,12 @@ pmap_init()
  */

 /*
- * pv_entry allocation functions:
- *   the main pv_entry allocation functions are:
- *     pmap_alloc_pv: allocate a pv_entry structure
- *     pmap_free_pv: free one pv_entry
- *     pmap_free_pvs: free a list of pv_entrys
- *
- * the rest are helper functions
- */
-
-/*
- * pmap_alloc_pv: inline function to allocate a pv_entry structure
- * => we lock pvalloc_lock
- * => if we fail, we call out to pmap_alloc_pvpage
- * => 3 modes:
- *    ALLOCPV_NEED   = we really need a pv_entry, even if we have to steal it
- *    ALLOCPV_TRY    = we want a pv_entry, but not enough to steal
- *    ALLOCPV_NONEED = we are trying to grow our free list, don't really need
- *                     one now
- *
- * "try" is for optional functions like pmap_copy().
- */
-
-inline static struct pv_entry *
-pmap_alloc_pv(pmap, mode)
-       struct pmap *pmap;
-       int mode;
-{
-       struct pv_page *pvpage;
-       struct pv_entry *pv;
-
-       simple_lock(&pvalloc_lock);
-
-       pvpage = TAILQ_FIRST(&pv_freepages);
-       if (pvpage != NULL) {
-               pvpage->pvinfo.pvpi_nfree--;
-               if (pvpage->pvinfo.pvpi_nfree == 0) {
-                       /* nothing left in this one? */
-                       TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list);
-               }
-               pv = pvpage->pvinfo.pvpi_pvfree;
-               KASSERT(pv);
-               pvpage->pvinfo.pvpi_pvfree = SPLAY_RIGHT(pv, pv_node);
-               pv_nfpvents--;  /* took one from pool */
-       } else {
-               pv = NULL;              /* need more of them */
-       }
-
-       /*
-        * if below low water mark or we didn't get a pv_entry we try and
-        * create more pv_entrys ...
-        */
-
-       if (pv_nfpvents < PVE_LOWAT || pv == NULL) {
-               if (pv == NULL)
-                       pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ?
-                                              mode : ALLOCPV_NEED);
-               else
-                       (void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED);
-       }
-
-       simple_unlock(&pvalloc_lock);
-       return(pv);
-}
-
-/*
- * pmap_alloc_pvpage: maybe allocate a new pvpage
- *
- * if need_entry is false: try and allocate a new pv_page
- * if need_entry is true: try and allocate a new pv_page and return a
- *     new pv_entry from it.   if we are unable to allocate a pv_page
- *     we make a last ditch effort to steal a pv_page from some other
- *     mapping.    if that fails, we panic...
- *
- * => we assume that the caller holds pvalloc_lock
- */
-
-static struct pv_entry *
-pmap_alloc_pvpage(pmap, mode)
-       struct pmap *pmap;
-       int mode;
-{
-       struct pv_page *pvpage;
-       struct pv_entry *pv;
-       int s;
-
-       /*
-        * if we need_entry and we've got unused pv_pages, allocate from there
-        */
-
-       pvpage = TAILQ_FIRST(&pv_unusedpgs);
-       if (mode != ALLOCPV_NONEED && pvpage != NULL) {
-
-               /* move it to pv_freepages list */
-               TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
-               TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
-
-               /* allocate a pv_entry */
-               pvpage->pvinfo.pvpi_nfree--;    /* can't go to zero */
-               pv = pvpage->pvinfo.pvpi_pvfree;
-               KASSERT(pv);
-               pvpage->pvinfo.pvpi_pvfree = SPLAY_RIGHT(pv, pv_node);
-               pv_nfpvents--;  /* took one from pool */
-               return(pv);
-       }
-
-       /*
-        * NOTE: If we are allocating a PV page for the kernel pmap, the
-        * pmap is already locked!  (...but entering the mapping is safe...)
-        */
-
-       s = splvm();   /* must protect kmem_map with splvm! */
-       pvpage = (struct pv_page *)uvm_km_alloc(kmem_map, PAGE_SIZE, 0,
-           UVM_KMF_TRYLOCK|UVM_KMF_NOWAIT|UVM_KMF_WIRED);
-       splx(s);
-       if (pvpage == NULL)
-               return NULL;
-
-       return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
-}
-
-/*
- * pmap_add_pvpage: add a pv_page's pv_entrys to the free list
- *
- * => caller must hold pvalloc_lock
- * => if need_entry is true, we allocate and return one pv_entry
- */
-
-static struct pv_entry *
-pmap_add_pvpage(pvp, need_entry)
-       struct pv_page *pvp;
-       boolean_t need_entry;
-{
-       int tofree, lcv;
-
-       /* do we need to return one? */
-       tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE;
-
-       pvp->pvinfo.pvpi_pvfree = NULL;
-       pvp->pvinfo.pvpi_nfree = tofree;
-       for (lcv = 0 ; lcv < tofree ; lcv++) {
-               SPLAY_RIGHT(&pvp->pvents[lcv], pv_node) =
-                       pvp->pvinfo.pvpi_pvfree;
-               pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv];
-       }
-       if (need_entry)
-               TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list);
-       else
-               TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
-       pv_nfpvents += tofree;
-       return((need_entry) ? &pvp->pvents[lcv] : NULL);
-}
-
-/*
- * pmap_free_pv_doit: actually free a pv_entry
- *
- * => do not call this directly!  instead use either
- *    1. pmap_free_pv ==> free a single pv_entry
- *    2. pmap_free_pvs => free a list of pv_entrys
- * => we must be holding pvalloc_lock
- */
-
-inline static void
-pmap_free_pv_doit(pv)
-       struct pv_entry *pv;
-{
-       struct pv_page *pvp;
-
-       pvp = (struct pv_page *) x86_trunc_page(pv);
-       pv_nfpvents++;
-       pvp->pvinfo.pvpi_nfree++;
-
-       /* nfree == 1 => fully allocated page just became partly allocated */
-       if (pvp->pvinfo.pvpi_nfree == 1) {
-               TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list);
-       }
-
-       /* free it */
-       SPLAY_RIGHT(pv, pv_node) = pvp->pvinfo.pvpi_pvfree;
-       pvp->pvinfo.pvpi_pvfree = pv;
-
-       /*
-        * are all pv_page's pv_entry's free?  move it to unused queue.
-        */
-
-       if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) {
-               TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list);
-               TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
-       }
-}
-
-/*
- * pmap_free_pv: free a single pv_entry
- *
- * => we gain the pvalloc_lock
- */
-
-inline static void
-pmap_free_pv(pmap, pv)
-       struct pmap *pmap;
-       struct pv_entry *pv;
-{
-       simple_lock(&pvalloc_lock);
-       pmap_free_pv_doit(pv);
-
-       /*
-        * Can't free the PV page if the PV entries were associated with
-        * the kernel pmap; the pmap is already locked.
-        */
-       if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
-           pmap != pmap_kernel())
-               pmap_free_pvpage();
-
-       simple_unlock(&pvalloc_lock);
-}
-
-/*
- * pmap_free_pvs: free a list of pv_entrys
- *
- * => we gain the pvalloc_lock
- */
-
-inline static void
-pmap_free_pvs(pmap, pvs)
-       struct pmap *pmap;
-       struct pv_entry *pvs;
-{
-       struct pv_entry *nextpv;
-
-       simple_lock(&pvalloc_lock);
-
-       for ( /* null */ ; pvs != NULL ; pvs = nextpv) {
-               nextpv = SPLAY_RIGHT(pvs, pv_node);
-               pmap_free_pv_doit(pvs);
-       }
-
-       /*
-        * Can't free the PV page if the PV entries were associated with
-        * the kernel pmap; the pmap is already locked.
-        */
-       if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
-           pmap != pmap_kernel())
-               pmap_free_pvpage();
-
-       simple_unlock(&pvalloc_lock);
-}
-
-
-/*
- * pmap_free_pvpage: try and free an unused pv_page structure
- *
- * => assume caller is holding the pvalloc_lock and that
- *     there is a page on the pv_unusedpgs list
- */
-
-static void
-pmap_free_pvpage()
-{
-       int s;
-       struct pv_page *pvp;
-
-       pvp = TAILQ_FIRST(&pv_unusedpgs);
-       /* remove pvp from pv_unusedpgs */
-       TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
-
-       s = splvm();
-       uvm_km_free(kmem_map, (vaddr_t)pvp, PAGE_SIZE, UVM_KMF_WIRED);
-       splx(s);
-
-       pv_nfpvents -= PVE_PER_PVPAGE;  /* update free count */
-}
-
-/*
  * main pv_entry manipulation functions:
  *   pmap_enter_pv: enter a mapping onto a pv_head list
  *   pmap_remove_pv: remove a mappiing from a pv_head list
  *
  * NOTE: pmap_enter_pv expects to lock the pvh itself
- *       pmap_remove_pv expects te caller to lock the pvh before calling
+ *       pmap_remove_pv expects the caller to lock the pvh before calling
  */

 /*
@@ -1538,8 +1212,11 @@ pmap_enter_pv(pvh, pve, pmap, va, ptp)
 {
        pve->pv_pmap = pmap;
        pve->pv_va = va;
-       pve->pv_ptp = ptp;                      /* NULL for kernel pmap */
-       SPLAY_INSERT(pvtree, &pvh->pvh_root, pve); /* add to locked list */
+       pve->pv_ptp = ptp;              /* NULL for kernel pmap */
+       simple_lock(&pvh->pvh_lock);    /* lock pv_head */
+       pve->pv_next = pvh->pvh_list;   /* add to ... */
+       pvh->pvh_list = pve;            /* ... locked list */
+       simple_unlock(&pvh->pvh_lock);  /* unlock, done! */
 }

 /*
@@ -1558,15 +1235,19 @@ pmap_remove_pv(pvh, pmap, va)
        struct pmap *pmap;
        vaddr_t va;
 {
-       struct pv_entry tmp, *pve;
+       struct pv_entry *pve, **prevptr;

-       tmp.pv_pmap = pmap;
-       tmp.pv_va = va;
-       pve = SPLAY_FIND(pvtree, &pvh->pvh_root, &tmp);
-       if (pve == NULL)
-               return (NULL);
-       SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve);
-       return(pve);                            /* return removed pve */
+       prevptr = &pvh->pvh_list;
+       pve = *prevptr;
+       while (pve) {
+               if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */
+                       *prevptr = pve->pv_next;                /* remove it! */
+                       break;
+               }
+               prevptr = &pve->pv_next;                /* previous pointer */
+               pve = pve->pv_next;                     /* advance */
+       }
+       return(pve);                            /* return removed pve */
 }

 /*
@@ -1768,7 +1449,7 @@ pmap_pdp_ctor(void *arg, void *object, i
        /* zero init area */
        memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t));

-       /* put in recursibve PDE to map the PTEs */
+       /* put in recursive PDE to map the PTEs */
        pdir[PDIR_SLOT_PTE] = pdirpa | PG_V | PG_KW;

        npde = nkptp[PTP_LEVELS - 1];
@@ -2107,17 +1788,18 @@ pmap_extract(pmap, va, pap)
                pmap_unmap_ptes(pmap);
                return FALSE;
        }
-       pte = ptes[pl1_i(va)];
-       pmap_unmap_ptes(pmap);

 #ifdef LARGEPAGES
        if (pde & PG_PS) {
                if (pap != NULL)
                        *pap = (pde & PG_LGFRAME) | (va & 0x1fffff);
+               pmap_unmap_ptes(pmap);
                return (TRUE);
        }
 #endif

+       pte = ptes[pl1_i(va)];
+       pmap_unmap_ptes(pmap);

        if (__predict_true((pte & PG_V) != 0)) {
                if (pap != NULL)
@@ -2320,7 +2002,6 @@ pmap_remove_ptes(pmap, ptp, ptpva, start
        int32_t *cpumaskp;
        int flags;
 {
-       struct pv_entry *pv_tofree = NULL;      /* list of pv_entrys to free */
        struct pv_entry *pve;
        pt_entry_t *pte = (pt_entry_t *) ptpva;
        pt_entry_t opte;
@@ -2388,14 +2069,11 @@ pmap_remove_ptes(pmap, ptp, ptpva, start
                simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);

                if (pve) {
-                       SPLAY_RIGHT(pve, pv_node) = pv_tofree;
-                       pv_tofree = pve;
+                       pool_put(&pmap_pv_pool, pve);
                }

                /* end of "for" loop: time for next pte */
        }
-       if (pv_tofree)
-               pmap_free_pvs(pmap, pv_tofree);
 }


@@ -2470,7 +2148,7 @@ pmap_remove_pte(pmap, ptp, pte, va, cpum
        simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);

        if (pve)
-               pmap_free_pv(pmap, pve);
+               pool_put(&pmap_pv_pool, pve);
        return(TRUE);
 }

@@ -2632,7 +2310,7 @@ pmap_page_remove(pg)
 {
        int bank, off;
        struct pv_head *pvh;
-       struct pv_entry *pve, *npve, *killlist = NULL;
+       struct pv_entry *pve, *npve, **prevptr;
        pt_entry_t *ptes, opte;
        pd_entry_t **pdes;
 #ifdef DIAGNOSTIC
@@ -2646,7 +2324,7 @@ pmap_page_remove(pg)
                panic("pmap_page_remove: unmanaged page?");

        pvh = &vm_physmem[bank].pmseg.pvhead[off];
-       if (SPLAY_ROOT(&pvh->pvh_root) == NULL) {
+       if (pvh->pvh_list == NULL) {
                return;
        }

@@ -2656,8 +2334,9 @@ pmap_page_remove(pg)
        /* XXX: needed if we hold head->map lock? */
        simple_lock(&pvh->pvh_lock);

-       for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root); pve != NULL; pve = npve) {
-               npve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve);
+       for (prevptr = &pvh->pvh_list, pve = pvh->pvh_list;
+           pve != NULL; pve = npve) {
+               npve = pve->pv_next;
                pmap_map_ptes(pve->pv_pmap, &ptes, &pdes);      /* locks pmap */

 #ifdef DIAGNOSTIC
@@ -2695,11 +2374,11 @@ pmap_page_remove(pg)
                        }
                }
                pmap_unmap_ptes(pve->pv_pmap);          /* unlocks pmap */
-               SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); /* remove it */
-               SPLAY_RIGHT(pve, pv_node) = killlist;   /* mark it for death */
-               killlist = pve;
+               *prevptr = npve;                        /* remove it */
+               pool_put(&pmap_pv_pool, pve);
        }
-       pmap_free_pvs(NULL, killlist);
+
+       pvh->pvh_list = NULL;
        simple_unlock(&pvh->pvh_lock);
        PMAP_HEAD_TO_MAP_UNLOCK();
        pmap_tlb_shootnow(cpumask);
@@ -2746,7 +2425,7 @@ pmap_test_attrs(pg, testbits)

        /* test to see if there is a list before bothering to lock */
        pvh = &vm_physmem[bank].pmseg.pvhead[off];
-       if (SPLAY_ROOT(&pvh->pvh_root) == NULL) {
+       if (pvh->pvh_list == NULL) {
                return(FALSE);
        }

@@ -2755,9 +2434,8 @@ pmap_test_attrs(pg, testbits)
        /* XXX: needed if we hold head->map lock? */
        simple_lock(&pvh->pvh_lock);

-       for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root);
-            pve != NULL && (*myattrs & testbits) == 0;
-            pve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve)) {
+       for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0;
+           pve = pve->pv_next) {
                pmap_map_ptes(pve->pv_pmap, &ptes, &pdes);
                pte = ptes[pl1_i(pve->pv_va)];
                pmap_unmap_ptes(pve->pv_pmap);
@@ -2809,7 +2487,7 @@ pmap_clear_attrs(pg, clearbits)
        result = *myattrs & clearbits;
        *myattrs &= ~clearbits;

-       SPLAY_FOREACH(pve, pvtree, &pvh->pvh_root) {
+       for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) {
                pmap_map_ptes(pve->pv_pmap, &ptes, &pdes);      /* locks pmap */
 #ifdef DIAGNOSTIC
                if (!pmap_pdes_valid(pve->pv_va, pdes, NULL))
@@ -3161,7 +2839,7 @@ pmap_enter(pmap, va, pa, prot, flags)
        if (bank != -1) {
                pvh = &vm_physmem[bank].pmseg.pvhead[off];
                if (pve == NULL) {
-                       pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
+                       pve = pool_get(&pmap_pv_pool, PR_NOWAIT);
                        if (pve == NULL) {
                                if (flags & PMAP_CANFAIL) {
                                        error = ENOMEM;
@@ -3177,7 +2855,7 @@ pmap_enter(pmap, va, pa, prot, flags)
                /* new mapping is not PG_PVLIST.   free pve if we've got one */
                pvh = NULL;             /* ensure !PG_PVLIST */
                if (pve)
-                       pmap_free_pv(pmap, pve);
+                       pool_put(&pmap_pv_pool, pve);
        }

 enter_now:
@@ -3561,23 +3239,6 @@ pmap_tlb_shootdown(pmap, va, pte, cpumas
                        continue;
                }

-#ifdef I386_CPU
-               /*
-                * i386 CPUs can't invalidate a single VA, only
-                * flush the entire TLB, so don't bother allocating
-                * jobs for them -- just queue a `flushu'.
-                *
-                * XXX note that this can be executed for non-i386
-                * when called * early (before identifycpu() has set
-                * cpu_class)
-                */
-               if (cpu_class == CPUCLASS_386) {
-                       pq->pq_flushu++;
-                       *cpumaskp |= 1U << ci->ci_cpuid;
-                       continue;
-               }
-#endif
-
                pj = pmap_tlb_shootdown_job_get(pq);
                pq->pq_pte |= pte;
                if (pj == NULL) {