Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/x86/x86 use __builtin_assume_aligned() on places wh...
details: https://anonhg.NetBSD.org/src/rev/cd9e397b67ba
branches: trunk
changeset: 951586:cd9e397b67ba
user: jdolecek <jdolecek%NetBSD.org@localhost>
date: Sat Feb 06 21:24:19 2021 +0000
description:
use __builtin_assume_aligned() on places where the memset() or memcpy()
parameters are known to be PAGE_SIZE-aligned, so compiler doesn't need
to emit atrocious alignment check code when inlining them
this particularly improves pmap_zero_page() and pmap_copy_page(),
which are now reduced to only 'rep stosq', and close to what a hand-written
assembly would do
Note: on CPUs supporting ERMS, 'rep stosb' would still be slightly faster, but
this is a solid stop-gap improvement
suggested by Mateusz Guzik in:
http://mail-index.netbsd.org/tech-kern/2020/07/19/msg026620.html
diffstat:
sys/arch/x86/x86/pmap.c | 39 +++++++++++++++++++++------------------
1 files changed, 21 insertions(+), 18 deletions(-)
diffs (170 lines):
diff -r 72cccf3307a4 -r cd9e397b67ba sys/arch/x86/x86/pmap.c
--- a/sys/arch/x86/x86/pmap.c Sat Feb 06 21:08:51 2021 +0000
+++ b/sys/arch/x86/x86/pmap.c Sat Feb 06 21:24:19 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.408 2020/11/30 17:06:02 bouyer Exp $ */
+/* $NetBSD: pmap.c,v 1.409 2021/02/06 21:24:19 jdolecek Exp $ */
/*
* Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.408 2020/11/30 17:06:02 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.409 2021/02/06 21:24:19 jdolecek Exp $");
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
@@ -339,6 +339,9 @@
#define PMAP_CHECK_PP(pp) \
KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp)
+#define PAGE_ALIGNED(pp) \
+ __builtin_assume_aligned((void *)(pp), PAGE_SIZE)
+
/*
* Other data structures
*/
@@ -1297,7 +1300,7 @@
xen_dummy_user_pgd = xen_dummy_page - KERNBASE;
/* Zero fill it, the less checks in Xen it requires the better */
- memset((void *)(xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE);
/* Mark read-only */
HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE,
pmap_pa2pte(xen_dummy_user_pgd) | PTE_P | pmap_pg_nx,
@@ -1542,7 +1545,7 @@
pa = pmap_bootstrap_palloc(1);
*pte = (pa & PTE_FRAME) | pteflags;
pmap_update_pg(tmpva);
- memset((void *)tmpva, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
L4_BASE[L4e_idx+i] = pa | pteflags | PTE_A;
}
@@ -1556,7 +1559,7 @@
pa = pmap_bootstrap_palloc(1);
*pte = (pa & PTE_FRAME) | pteflags;
pmap_update_pg(tmpva);
- memset((void *)tmpva, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
L3_BASE[L3e_idx+i] = pa | pteflags | PTE_A;
}
@@ -1571,7 +1574,7 @@
pa = pmap_bootstrap_palloc(1);
*pte = (pa & PTE_FRAME) | pteflags;
pmap_update_pg(tmpva);
- memset((void *)tmpva, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
L2_BASE[L2e_idx+i] = pa | pteflags | PTE_A;
}
@@ -1663,7 +1666,7 @@
pa = pmap_bootstrap_palloc(1);
*pte = (pa & PTE_FRAME) | pteflags;
pmap_update_pg(tmpva);
- memset((void *)tmpva, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
L4_BASE[L4e_idx+i] = pa | pteflags | PTE_A;
}
@@ -1677,7 +1680,7 @@
pa = pmap_bootstrap_palloc(1);
*pte = (pa & PTE_FRAME) | pteflags;
pmap_update_pg(tmpva);
- memset((void *)tmpva, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);
L3_BASE[L3e_idx+i] = pa | pteflags | PTE_A;
}
@@ -2627,7 +2630,7 @@
int s;
#endif
- memset(pdir, 0, PDP_SIZE * PAGE_SIZE);
+ memset(PAGE_ALIGNED(pdir), 0, PDP_SIZE * PAGE_SIZE);
/*
* NOTE: This is all done unlocked, but we will check afterwards
@@ -3805,7 +3808,7 @@
pmap_zero_page(paddr_t pa)
{
#if defined(__HAVE_DIRECT_MAP)
- memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(PMAP_DIRECT_MAP(pa)), 0, PAGE_SIZE);
#else
#if defined(XENPV)
if (XEN_VERSION_SUPPORTED(3, 4))
@@ -3829,7 +3832,7 @@
pmap_pte_flush();
pmap_update_pg(zerova); /* flush TLB */
- memset((void *)zerova, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(zerova), 0, PAGE_SIZE);
#if defined(DIAGNOSTIC) || defined(XENPV)
pmap_pte_set(zpte, 0); /* zap ! */
@@ -3847,7 +3850,7 @@
vaddr_t srcva = PMAP_DIRECT_MAP(srcpa);
vaddr_t dstva = PMAP_DIRECT_MAP(dstpa);
- memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
+ memcpy(PAGE_ALIGNED(dstva), PAGE_ALIGNED(srcva), PAGE_SIZE);
#else
#if defined(XENPV)
if (XEN_VERSION_SUPPORTED(3, 4)) {
@@ -3877,7 +3880,7 @@
pmap_update_pg(srcva);
pmap_update_pg(dstva);
- memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
+ memcpy(PAGE_ALIGNED(dstva), PAGE_ALIGNED(srcva), PAGE_SIZE);
#if defined(DIAGNOSTIC) || defined(XENPV)
pmap_pte_set(srcpte, 0);
@@ -5403,7 +5406,7 @@
if (!uvm_page_physget(&pa))
panic("%s: out of memory", __func__);
#if defined(__HAVE_DIRECT_MAP)
- memset((void *)PMAP_DIRECT_MAP(pa), 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(PMAP_DIRECT_MAP(pa)), 0, PAGE_SIZE);
#else
#if defined(XENPV)
if (XEN_VERSION_SUPPORTED(3, 4)) {
@@ -5416,7 +5419,7 @@
PTE_W | pmap_pg_nx);
pmap_pte_flush();
pmap_update_pg((vaddr_t)early_zerop);
- memset(early_zerop, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(early_zerop), 0, PAGE_SIZE);
#if defined(DIAGNOSTIC) || defined(XENPV)
pmap_pte_set(early_zero_pte, 0);
pmap_pte_flush();
@@ -5782,13 +5785,13 @@
/* Zero levels 1-3 */
for (level = 0; level < PTP_LEVELS - 1; ++level) {
tmp_pml = (void *)x86_tmp_pml_vaddr[level];
- memset(tmp_pml, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(tmp_pml), 0, PAGE_SIZE);
}
/* Copy PML4 */
kernel_pml = pmap_kernel()->pm_pdir;
tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1];
- memcpy(tmp_pml, kernel_pml, PAGE_SIZE);
+ memcpy(PAGE_ALIGNED(tmp_pml), PAGE_ALIGNED(kernel_pml), PAGE_SIZE);
#ifdef PAE
/*
@@ -6701,7 +6704,7 @@
pmap->pm_write_protect = pmap_ept_write_protect;
pmap->pm_unwire = pmap_ept_unwire;
- memset(pmap->pm_pdir, 0, PAGE_SIZE);
+ memset(PAGE_ALIGNED(pmap->pm_pdir), 0, PAGE_SIZE);
}
#endif /* __HAVE_DIRECT_MAP && __x86_64__ && !XENPV */
Home |
Main Index |
Thread Index |
Old Index