Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/aarch64 Use sys/uvm/pmap/pmap_tlb.c on Aarch64 in t...



details:   https://anonhg.NetBSD.org/src/rev/c3bf62a72dc1
branches:  trunk
changeset: 989021:c3bf62a72dc1
user:      skrll <skrll%NetBSD.org@localhost>
date:      Sun Oct 10 07:15:25 2021 +0000

description:
Use sys/uvm/pmap/pmap_tlb.c on Aarch64 in the same way that some Arm, MIPS,
and some PPC kernels do.  This removes the limitation of 256 processes on
CPUs with 8bit ASID field, e.g. Apple M1.

Additionally the following changes have been made

- removed a couple of unnecessary aarch64_tlbi_all calls
- removed any invalidation after freeing page tables due to
  _pmap_sweep_pdp. This was never necessary afaict.
- all kernel mappings are marked global and userland mapping not-global.

Performance testing hasn't show a significant difference.  The data here
is from building a kernel on an lx2k system with nvme.

before
1489.6u 400.4s 2:40.65 1176.5% 228+224k 0+32289io 57pf+0w
1482.6u 403.2s 2:38.49 1189.9% 228+222k 0+32274io 46pf+0w
1485.4u 402.2s 2:37.27 1200.2% 228+222k 0+32275io 12pf+0w

after
1493.9u 404.6s 2:37.50 1205.4% 227+221k 0+32265io 48pf+0w
1485.0u 408.0s 2:38.54 1194.0% 227+222k 0+32272io 36pf+0w
1484.3u 407.0s 2:35.88 1213.3% 228+224k 0+32268io 14pf+0w

>>> stats.ttest_ind([160.65,158.49,157.27], [157.5,158.54,155.88])
Ttest_indResult(statistic=1.1923622711296888, pvalue=0.2990182944606766)
>>>

diffstat:

 sys/arch/aarch64/aarch64/aarch64_tlb.c |  118 ++++++++++++++
 sys/arch/aarch64/aarch64/pmap.c        |  265 ++++++++++++++++++++++----------
 sys/arch/aarch64/conf/files.aarch64    |    4 +-
 sys/arch/aarch64/include/cpu.h         |   20 +--
 sys/arch/aarch64/include/pmap.h        |   67 +++++++-
 sys/arch/aarch64/include/pte.h         |    8 +-
 sys/arch/aarch64/include/types.h       |    3 +-
 7 files changed, 373 insertions(+), 112 deletions(-)

diffs (truncated from 959 to 300 lines):

diff -r c7a3ccd608b0 -r c3bf62a72dc1 sys/arch/aarch64/aarch64/aarch64_tlb.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/aarch64/aarch64/aarch64_tlb.c    Sun Oct 10 07:15:25 2021 +0000
@@ -0,0 +1,118 @@
+/*-
+ * Copyright (c) 2021 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Nick Hudson
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_cputypes.h"
+#include "opt_multiprocessor.h"
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(1, "$NetBSD: aarch64_tlb.c,v 1.1 2021/10/10 07:15:25 skrll Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <uvm/uvm.h>
+
+#include <arm/cpufunc.h>
+
+#include <aarch64/armreg.h>
+
+tlb_asid_t
+tlb_get_asid(void)
+{
+
+       return __SHIFTOUT(reg_ttbr0_el1_read(), TTBR_ASID);
+}
+
+void
+tlb_set_asid(tlb_asid_t asid, pmap_t pm)
+{
+       const uint64_t ttbr =
+           __SHIFTIN(asid, TTBR_ASID) |
+           __SHIFTIN(pmap_l0pa(pm), TTBR_BADDR);
+
+       cpu_set_ttbr0(ttbr);
+}
+
+void
+tlb_invalidate_all(void)
+{
+
+       aarch64_tlbi_all();
+}
+
+void
+tlb_invalidate_globals(void)
+{
+       tlb_invalidate_all();
+}
+
+void
+tlb_invalidate_asids(tlb_asid_t lo, tlb_asid_t hi)
+{
+       for (; lo <= hi; lo++) {
+               aarch64_tlbi_by_asid(lo);
+       }
+}
+
+void
+tlb_invalidate_addr(vaddr_t va, tlb_asid_t asid)
+{
+       KASSERT((va & PAGE_MASK) == 0);
+
+       aarch64_tlbi_by_asid_va(asid, va);
+}
+
+bool
+tlb_update_addr(vaddr_t va, tlb_asid_t asid, pt_entry_t pte, bool insert_p)
+{
+       KASSERT((va & PAGE_MASK) == 0);
+
+       tlb_invalidate_addr(va, asid);
+
+       return true;
+}
+
+u_int
+tlb_record_asids(u_long *mapp, tlb_asid_t asid_max)
+{
+       KASSERT(asid_max == pmap_md_tlb_asid_max());
+
+#if DIAGNOSTIC
+       memset(mapp, 0xff, (asid_max + 1) / (NBBY * sizeof(u_long)));
+       mapp[0] ^= __BITS(0, KERNEL_PID);
+#endif
+       return asid_max;
+}
+
+void
+tlb_walk(void *ctx, bool (*func)(void *, vaddr_t, tlb_asid_t, pt_entry_t))
+{
+
+       /* no way to view the TLB */
+}
diff -r c7a3ccd608b0 -r c3bf62a72dc1 sys/arch/aarch64/aarch64/pmap.c
--- a/sys/arch/aarch64/aarch64/pmap.c   Sun Oct 10 07:09:20 2021 +0000
+++ b/sys/arch/aarch64/aarch64/pmap.c   Sun Oct 10 07:15:25 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.116 2021/09/30 21:19:16 skrll Exp $ */
+/*     $NetBSD: pmap.c,v 1.117 2021/10/10 07:15:25 skrll Exp $ */
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <ryo%nerv.org@localhost>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.116 2021/09/30 21:19:16 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.117 2021/10/10 07:15:25 skrll Exp $");
 
 #include "opt_arm_debug.h"
 #include "opt_ddb.h"
@@ -41,6 +41,7 @@
 
 #include <sys/asan.h>
 #include <sys/atomic.h>
+#include <sys/cpu.h>
 #include <sys/kmem.h>
 #include <sys/vmem.h>
 
@@ -181,10 +182,10 @@
  * change the pte to accessible temporarly before cpu_icache_sync_range().
  * this macro modifies PTE (*ptep). need to update PTE after this.
  */
-#define PTE_ICACHE_SYNC_PAGE(pte, ptep, pm, va, ll)                    \
+#define PTE_ICACHE_SYNC_PAGE(pte, ptep, asid, va, ll)                  \
        do {                                                            \
                atomic_swap_64((ptep), (pte) | LX_BLKPAG_AF);           \
-               AARCH64_TLBI_BY_ASID_VA((pm)->pm_asid, (va), (ll));     \
+               AARCH64_TLBI_BY_ASID_VA((asid), (va), (ll));            \
                cpu_icache_sync_range((va), PAGE_SIZE);                 \
        } while (0/*CONSTCOND*/)
 
@@ -336,7 +337,6 @@
        attr = _pmap_pte_adjust_prot(0, prot, VM_PROT_ALL, false);
        attr = _pmap_pte_adjust_cacheflags(attr, flags);
        pmapboot_enter_range(va, pa, resid, attr, printf);
-       aarch64_tlbi_all();
 
        return resid;
 }
@@ -472,18 +472,23 @@
        virtual_end = vend;
        pmap_maxkvaddr = vstart;
 
-       aarch64_tlbi_all();
-
        l0pa = reg_ttbr1_el1_read();
        l0 = (void *)AARCH64_PA_TO_KVA(l0pa);
 
+       pmap_tlb_info_init(&pmap_tlb0_info);
+
        memset(&kernel_pmap, 0, sizeof(kernel_pmap));
+
        kpm = pmap_kernel();
-       kpm->pm_asid = 0;
+       struct pmap_asid_info * const pai = PMAP_PAI(kpm, cpu_tlb_info(ci));
+
+       pai->pai_asid = KERNEL_PID;
        kpm->pm_refcnt = 1;
        kpm->pm_idlepdp = 0;
        kpm->pm_l0table = l0;
        kpm->pm_l0table_pa = l0pa;
+       kpm->pm_onproc = kcpuset_running;
+       kpm->pm_active = kcpuset_running;
        kpm->pm_activated = true;
        LIST_INIT(&kpm->pm_vmlist);
        LIST_INIT(&kpm->pm_pvlist);     /* not used for kernel pmap */
@@ -493,6 +498,12 @@
        CTASSERT(sizeof(kpm->pm_stats.resident_count) == sizeof(long));
 }
 
+void
+pmap_md_tlb_info_attach(struct pmap_tlb_info *ti, struct cpu_info *ci)
+{
+       /* nothing */
+}
+
 static inline void
 _pmap_adj_wired_count(struct pmap *pm, int adj)
 {
@@ -556,9 +567,7 @@
            32, 0, PR_LARGECACHE, "pvpl", NULL, IPL_NONE, _pmap_pv_ctor,
            NULL, NULL);
 
-       int nmaxproc = cpu_maxproc();
-       if (maxproc > nmaxproc)
-               maxproc = nmaxproc;
+       pmap_tlb_info_evcnt_attach(&pmap_tlb0_info);
 }
 
 void
@@ -674,18 +683,16 @@
 }
 
 /* free empty page table pages */
-static int
+static void
 _pmap_sweep_pdp(struct pmap *pm)
 {
        struct vm_page *pg, *tmp;
        pd_entry_t *ptep_in_parent, opte __diagused;
        paddr_t pa, pdppa;
-       int nsweep;
        uint16_t wirecount __diagused;
 
        KASSERT(mutex_owned(&pm->pm_lock) || pm->pm_refcnt == 0);
 
-       nsweep = 0;
        LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, pageq.list, tmp) {
                if (pg->wire_count != 1)
                        continue;
@@ -698,7 +705,6 @@
                if (ptep_in_parent == NULL) {
                        /* no parent */
                        pmap_free_pdp(pm, pg);
-                       nsweep++;
                        continue;
                }
 
@@ -708,7 +714,6 @@
                wirecount = --pg->wire_count; /* 1 -> 0 */
                KASSERT(wirecount == 0);
                pmap_free_pdp(pm, pg);
-               nsweep++;
 
                /* L3->L2->L1. no need for L0 */
                pdppa = AARCH64_KVA_TO_PA(trunc_page((vaddr_t)ptep_in_parent));
@@ -722,12 +727,10 @@
                /* decrement wire_count of parent */
                wirecount = --pg->wire_count;
                KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1),
-                   "pm=%p[%d], pg=%p, wire_count=%d",
-                   pm, pm->pm_asid, pg, pg->wire_count);
+                   "pm=%p, pg=%p, wire_count=%d",
+                   pm, pg, pg->wire_count);
        }
        pm->pm_idlepdp = 0;
-
-       return nsweep;
 }
 
 static void
@@ -977,11 +980,14 @@
                         * change to accessible temporally
                         * to do cpu_icache_sync_range()
                         */
+                       struct pmap_asid_info * const pai = PMAP_PAI(pm,
+                           cpu_tlb_info(ci));
+
                        atomic_swap_64(ptep, pte | LX_BLKPAG_AF);
-                       AARCH64_TLBI_BY_ASID_VA(pm->pm_asid, va, true);
+                       AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va, true);
                        cpu_icache_sync_range(va, len);
                        atomic_swap_64(ptep, pte);
-                       AARCH64_TLBI_BY_ASID_VA(pm->pm_asid, va, true);
+                       AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va, true);
                }
        }
 
@@ -1179,14 +1185,15 @@
                        KASSERT(pv == &pp->pp_pv);
                        continue;
                }
-               pr("  pv[%d] pv=%p\n",
-                   i, pv);
-               pr("    pv[%d].pv_pmap = %p (asid=%d)\n",
-                   i, pv->pv_pmap, pv->pv_pmap->pm_asid);
-               pr("    pv[%d].pv_va   = %016lx (color=%d)\n",
-                   i, trunc_page(pv->pv_va), _pmap_color(pv->pv_va));
-               pr("    pv[%d].pv_ptep = %p\n",
-                   i, pv->pv_ptep);



Home | Main Index | Thread Index | Old Index