Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys x86 pmap:



details:   https://anonhg.NetBSD.org/src/rev/61e6b4b247d6
branches:  trunk
changeset: 746101:61e6b4b247d6
user:      ad <ad%NetBSD.org@localhost>
date:      Sun Mar 22 00:16:16 2020 +0000

description:
x86 pmap:

- Give pmap_remove_all() its own version of pmap_remove_ptes() that on native
  x86 does the bare minimum needed to clear out PTPs.  Cuts ~4% sys time on
  'build.sh release' for me.

- pmap_sync_pv(): there's no need to issue a redundant TLB shootdown.  The
  caller waits for the competing operation to finish.

- Bring 'options TLBSTATS' up to date.

diffstat:

 sys/arch/x86/include/pmap.h     |   13 +-
 sys/arch/x86/x86/pmap.c         |  217 ++++++++++++++++++++++++++++++---------
 sys/arch/x86/x86/x86_tlb.c      |   17 +-
 sys/dev/nvmm/x86/nvmm_x86_svm.c |    6 +-
 sys/dev/nvmm/x86/nvmm_x86_vmx.c |    6 +-
 5 files changed, 184 insertions(+), 75 deletions(-)

diffs (truncated from 449 to 300 lines):

diff -r 2302961bfc40 -r 61e6b4b247d6 sys/arch/x86/include/pmap.h
--- a/sys/arch/x86/include/pmap.h       Sun Mar 22 00:14:16 2020 +0000
+++ b/sys/arch/x86/include/pmap.h       Sun Mar 22 00:16:16 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.h,v 1.115 2020/03/17 22:29:19 ad Exp $    */
+/*     $NetBSD: pmap.h,v 1.116 2020/03/22 00:16:16 ad Exp $    */
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -399,18 +399,15 @@
 vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */
 
 typedef enum tlbwhy {
-       TLBSHOOT_APTE,
+       TLBSHOOT_REMOVE_ALL,
        TLBSHOOT_KENTER,
        TLBSHOOT_KREMOVE,
-       TLBSHOOT_FREE_PTP1,
-       TLBSHOOT_FREE_PTP2,
+       TLBSHOOT_FREE_PTP,
        TLBSHOOT_REMOVE_PTE,
-       TLBSHOOT_REMOVE_PTES,
-       TLBSHOOT_SYNC_PV1,
-       TLBSHOOT_SYNC_PV2,
+       TLBSHOOT_SYNC_PV,
        TLBSHOOT_WRITE_PROTECT,
        TLBSHOOT_ENTER,
-       TLBSHOOT_UPDATE,
+       TLBSHOOT_NVMM,
        TLBSHOOT_BUS_DMA,
        TLBSHOOT_BUS_SPACE,
        TLBSHOOT__MAX,
diff -r 2302961bfc40 -r 61e6b4b247d6 sys/arch/x86/x86/pmap.c
--- a/sys/arch/x86/x86/pmap.c   Sun Mar 22 00:14:16 2020 +0000
+++ b/sys/arch/x86/x86/pmap.c   Sun Mar 22 00:16:16 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.379 2020/03/20 19:06:14 ad Exp $    */
+/*     $NetBSD: pmap.c,v 1.380 2020/03/22 00:16:16 ad Exp $    */
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.379 2020/03/20 19:06:14 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.380 2020/03/22 00:16:16 ad Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -579,6 +579,32 @@
        return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
 }
 
+static inline uint8_t
+pmap_pte_to_pp_attrs(pt_entry_t pte)
+{
+       uint8_t ret = 0;
+       if (pte & PTE_D)
+               ret |= PP_ATTRS_D;
+       if (pte & PTE_A)
+               ret |= PP_ATTRS_A;
+       if (pte & PTE_W)
+               ret |= PP_ATTRS_W;
+       return ret;
+}
+
+static inline pt_entry_t
+pmap_pp_attrs_to_pte(uint8_t attrs)
+{
+       pt_entry_t pte = 0;
+       if (attrs & PP_ATTRS_D)
+               pte |= PTE_D;
+       if (attrs & PP_ATTRS_A)
+               pte |= PTE_A;
+       if (attrs & PP_ATTRS_W)
+               pte |= PTE_W;
+       return pte;
+}
+
 /*
  * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
  * of course the kernel is always loaded
@@ -2033,7 +2059,7 @@
         * [This mostly deals with shared mappings, for example shared libs
         * and executables.]
         *
-        * Optimise for pmap_remove_all() which works by ascending scan:
+        * Optimise for pmap_remove_ptes() which works by ascending scan:
         * look at the lowest numbered node in the tree first.  The tree is
         * known non-empty because of the check above.  For short lived
         * processes where pmap_remove() isn't used much this gets close to
@@ -2287,7 +2313,7 @@
                invaladdr = level == 1 ? (vaddr_t)ptes :
                    (vaddr_t)pdes[level - 2];
                pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE,
-                   opde, TLBSHOOT_FREE_PTP1);
+                   opde, TLBSHOOT_FREE_PTP);
 
 #if defined(XENPV)
                pmap_tlb_shootnow();
@@ -2858,6 +2884,134 @@
 }
 
 /*
+ * pmap_zap_ptp: clear out an entire PTP without modifying PTEs
+ *
+ * => caller must hold pmap's lock
+ * => PTP must be mapped into KVA
+ * => must be called with kernel preemption disabled
+ * => does as little work as possible
+ */
+static void
+pmap_zap_ptp(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
+    vaddr_t startva, vaddr_t blkendva, struct pv_entry **pv_tofree)
+{
+#ifndef XEN
+       struct pv_entry *pve;
+       struct vm_page *pg;
+       struct pmap_page *pp;
+       pt_entry_t opte;
+       rb_tree_t *tree;
+       vaddr_t va;
+       int wired;
+       uint8_t oattrs;
+       u_int cnt;
+
+       KASSERT(mutex_owned(&pmap->pm_lock));
+       KASSERT(kpreempt_disabled());
+       KASSERT(pmap != pmap_kernel());
+       KASSERT(ptp->wire_count > 1);
+       KASSERT(ptp->wire_count - 1 <= PAGE_SIZE / sizeof(pt_entry_t));
+
+       /*
+        * Start at the lowest entered VA, and scan until there are no more
+        * PTEs in the PTPs.  The goal is to disconnect PV entries and patch
+        * up the pmap's stats.  No PTEs will be modified.
+        */
+       tree = &VM_PAGE_TO_PP(ptp)->pp_rb;
+       pve = RB_TREE_MIN(tree);
+       wired = 0;
+       va = (vaddr_t)ptp->uanon;
+       pte += ((va - startva) >> PAGE_SHIFT);
+
+       for (cnt = ptp->wire_count; cnt > 1; pte++, va += PAGE_SIZE) {
+               opte = *pte;
+               if (!pmap_valid_entry(opte)) {
+                       continue;
+               }
+
+               /*
+                * Count the PTE.  If it's not for a managed mapping
+                * there's noting more to do.
+                */
+               cnt--;
+               wired -= (opte & PTE_WIRED);
+               if ((opte & PTE_PVLIST) == 0) {
+#ifndef DOM0OPS
+                       KASSERTMSG((PHYS_TO_VM_PAGE(pmap_pte2pa(opte)) == NULL),
+                           "managed page without PTE_PVLIST for %#"
+                           PRIxVADDR, va);
+                       KASSERTMSG((pmap_pv_tracked(pmap_pte2pa(opte)) == NULL),
+                           "pv-tracked page without PTE_PVLIST for %#"
+                           PRIxVADDR, va);
+#endif
+                       KASSERT(pmap_treelookup_pv(pmap, ptp, (ptp != NULL ?
+                           &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb),
+                           va) == NULL);
+                       continue;
+               }
+
+               /*
+                * "pve" now points to the lowest (by VA) dynamic PV entry
+                * in the PTP.  If it's for this VA, take advantage of it to
+                * avoid calling PHYS_TO_VM_PAGE().  Avoid modifying the RB
+                * tree by skipping to the next VA in the tree whenever
+                * there is a match here.  The tree will be cleared out in
+                * one pass before return to pmap_remove_all().
+                */ 
+               oattrs = pmap_pte_to_pp_attrs(opte);
+               if (pve != NULL && pve->pve_pte.pte_va == va) {
+                       pp = pve->pve_pp;
+                       KASSERT(pve->pve_pte.pte_ptp == ptp);
+                       KASSERT(pp->pp_pte.pte_ptp != ptp ||
+                           pp->pp_pte.pte_va != va);
+                       mutex_spin_enter(&pp->pp_lock);
+                       pp->pp_attrs |= oattrs;
+                       LIST_REMOVE(pve, pve_list);
+                       mutex_spin_exit(&pp->pp_lock);
+                       pve->pve_next = *pv_tofree;
+                       *pv_tofree = pve;
+                       pve = RB_TREE_NEXT(tree, pve);
+                       continue;
+               }
+
+               /*
+                * No entry in the tree so it must be embedded.  Look up the
+                * page and cancel the embedded entry.
+                */
+               if ((pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) {
+                       pp = VM_PAGE_TO_PP(pg);
+               } else if ((pp = pmap_pv_tracked(pmap_pte2pa(opte))) == NULL) {
+                       paddr_t pa = pmap_pte2pa(opte);
+                       panic("%s: PTE_PVLIST with pv-untracked page"
+                           " va = %#"PRIxVADDR"pa = %#"PRIxPADDR
+                           "(%#"PRIxPADDR")", __func__, va, pa, atop(pa));
+               }
+               mutex_spin_enter(&pp->pp_lock);
+               KASSERT(pp->pp_pte.pte_ptp == ptp);
+               KASSERT(pp->pp_pte.pte_va == va);
+               pp->pp_attrs |= oattrs;
+               pp->pp_pte.pte_ptp = NULL;
+               pp->pp_pte.pte_va = 0;
+               mutex_spin_exit(&pp->pp_lock);
+       }
+
+       /* PTP now empty - adjust the tree & stats to match. */
+       pmap_stats_update(pmap, -(ptp->wire_count - 1), wired / PTE_WIRED);
+       ptp->wire_count = 1;
+#ifdef DIAGNOSTIC
+       rb_tree_init(tree, &pmap_rbtree_ops);
+#endif
+#else  /* !XEN */
+       /*
+        * XXXAD For XEN, it's not clear to me that we can do this, because
+        * I guess the hypervisor keeps track of PTEs too.
+        */
+       pmap_remove_ptes(pmap, ptp, (vaddr_t)pte, startva, blkendva,
+           pv_tofree);
+#endif /* !XEN */
+}
+
+/*
  * pmap_remove_all: remove all mappings from pmap in bulk.
  *
  * Ordinarily when removing mappings it's important to hold the UVM object's
@@ -2912,8 +3066,7 @@
                        KASSERT(pmap_find_ptp(pmap, va, 1) == ptps[i]);
 
                        /* Zap! */
-                       pmap_remove_ptes(pmap, ptps[i],
-                           (vaddr_t)&ptes[pl1_i(va)], va,
+                       pmap_zap_ptp(pmap, ptps[i], &ptes[pl1_i(va)], va,
                            blkendva, &pv_tofree);
 
                        /* PTP should now be unused - free it. */
@@ -2922,6 +3075,7 @@
                }
                pmap_unmap_ptes(pmap, pmap2);
                pmap_free_pvs(pmap, pv_tofree);
+               pmap_tlb_shootdown(pmap, -1L, 0, TLBSHOOT_REMOVE_ALL);
                mutex_exit(&pmap->pm_lock);
 
                /* Process deferred frees. */
@@ -3767,32 +3921,6 @@
        }
 }
 
-static inline uint8_t
-pmap_pte_to_pp_attrs(pt_entry_t pte)
-{
-       uint8_t ret = 0;
-       if (pte & PTE_D)
-               ret |= PP_ATTRS_D;
-       if (pte & PTE_A)
-               ret |= PP_ATTRS_A;
-       if (pte & PTE_W)
-               ret |= PP_ATTRS_W;
-       return ret;
-}
-
-static inline pt_entry_t
-pmap_pp_attrs_to_pte(uint8_t attrs)
-{
-       pt_entry_t pte = 0;
-       if (attrs & PP_ATTRS_D)
-               pte |= PTE_D;
-       if (attrs & PP_ATTRS_A)
-               pte |= PTE_A;
-       if (attrs & PP_ATTRS_W)
-               pte |= PTE_W;
-       return pte;
-}
-
 /*
  * pmap_remove_pte: remove a single PTE from a PTP.
  *
@@ -4024,16 +4152,8 @@
                         * We lost a race with a V->P operation like
                         * pmap_remove().  Wait for the competitor
                         * reflecting pte bits into mp_attrs.
-                        *
-                        * Issue a redundant TLB shootdown so that
-                        * we can wait for its completion.
                         */
                        pmap_unmap_pte();
-                       if (clearbits != 0) {
-                               pmap_tlb_shootdown(pmap, va,
-                                   (pmap == pmap_kernel() ? PTE_G : 0),
-                                   TLBSHOOT_SYNC_PV1);



Home | Main Index | Thread Index | Old Index