pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/sysutils/xenkernel48



Module Name:    pkgsrc
Committed By:   bouyer
Date:           Thu Jan 18 10:28:13 UTC 2018

Modified Files:
        pkgsrc/sysutils/xenkernel48: Makefile distinfo
Added Files:
        pkgsrc/sysutils/xenkernel48/patches: patch-XSA254-1 patch-XSA254-2
            patch-XSA254-3 patch-XSA254-4

Log Message:
Add patches from upstream, from xsa254/README.pti :
> This README gives references for one of three mitigation strategies
> for Meltdown.

> This series is a first-class migitation pagetable isolation series for
> Xen.  It is available for Xen 4.6 to Xen 4.10 and later.

bump PKGREVISION


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 pkgsrc/sysutils/xenkernel48/Makefile
cvs rdiff -u -r1.4 -r1.5 pkgsrc/sysutils/xenkernel48/distinfo
cvs rdiff -u -r0 -r1.1 pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-1 \
    pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-2 \
    pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-3 \
    pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-4

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/sysutils/xenkernel48/Makefile
diff -u pkgsrc/sysutils/xenkernel48/Makefile:1.10 pkgsrc/sysutils/xenkernel48/Makefile:1.11
--- pkgsrc/sysutils/xenkernel48/Makefile:1.10   Mon Jan 15 09:47:54 2018
+++ pkgsrc/sysutils/xenkernel48/Makefile        Thu Jan 18 10:28:13 2018
@@ -1,9 +1,9 @@
-# $NetBSD: Makefile,v 1.10 2018/01/15 09:47:54 jperkin Exp $
+# $NetBSD: Makefile,v 1.11 2018/01/18 10:28:13 bouyer Exp $
 
 VERSION=       4.8.2
 DISTNAME=      xen-${VERSION}
 PKGNAME=       xenkernel48-${VERSION}
-PKGREVISION=   2
+PKGREVISION=   3
 CATEGORIES=    sysutils
 MASTER_SITES=  https://downloads.xenproject.org/release/xen/${VERSION}/
 DIST_SUBDIR=   xen48

Index: pkgsrc/sysutils/xenkernel48/distinfo
diff -u pkgsrc/sysutils/xenkernel48/distinfo:1.4 pkgsrc/sysutils/xenkernel48/distinfo:1.5
--- pkgsrc/sysutils/xenkernel48/distinfo:1.4    Fri Dec 15 14:02:15 2017
+++ pkgsrc/sysutils/xenkernel48/distinfo        Thu Jan 18 10:28:13 2018
@@ -1,4 +1,4 @@
-$NetBSD: distinfo,v 1.4 2017/12/15 14:02:15 bouyer Exp $
+$NetBSD: distinfo,v 1.5 2018/01/18 10:28:13 bouyer Exp $
 
 SHA1 (xen48/xen-4.8.2.tar.gz) = 184c57ce9e71e34b3cbdd318524021f44946efbe
 RMD160 (xen48/xen-4.8.2.tar.gz) = f4126cb0f7ff427ed7d20ce399dcd1077c599343
@@ -22,6 +22,10 @@ SHA1 (patch-XSA248) = d5787fa7fc48449ca9
 SHA1 (patch-XSA249) = 7037a35f37eb866f16fe90482e66d0eca95944c4
 SHA1 (patch-XSA250) = 25ab2e8c67ebe2b40cf073197c17f1625f5581f6
 SHA1 (patch-XSA251) = dc0786c85bcfbdd3f7a1c97a3af32c10deea8276
+SHA1 (patch-XSA254-1) = a2e1573bebd2f5e873da85d1f29a6cb5cfa2fb31
+SHA1 (patch-XSA254-2) = fddc172293fcd8cfbaaf61155bb16738fb6fdcf5
+SHA1 (patch-XSA254-3) = eaded260831b8146c7943ed5c9138d8bde256213
+SHA1 (patch-XSA254-4) = 9766e14d3e48d41d8bce969f07c9f3a7b22d9120
 SHA1 (patch-xen_Makefile) = be3f4577a205b23187b91319f91c50720919f70b
 SHA1 (patch-xen_Rules.mk) = 5f33a667bae67c85d997a968c0f8b014b707d13c
 SHA1 (patch-xen_arch_x86_Rules.mk) = e2d148fb308c37c047ca41a678471217b6166977

Added files:

Index: pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-1
diff -u /dev/null pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-1:1.1
--- /dev/null   Thu Jan 18 10:28:13 2018
+++ pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-1  Thu Jan 18 10:28:13 2018
@@ -0,0 +1,389 @@
+$NetBSD: patch-XSA254-1,v 1.1 2018/01/18 10:28:13 bouyer Exp $
+
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Date: Wed, 17 Jan 2018 16:14:16 +0000 (+0100)
+Subject: x86/entry: Remove support for partial cpu_user_regs frames
+X-Git-Url: http://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff_plain;h=a7cf0a3b818377a8a49baed3606bfa2f214cd645;hp=40c02dd27a3e350197ef438b1ea6ad21f275c1c5
+
+x86/entry: Remove support for partial cpu_user_regs frames
+
+Save all GPRs on entry to Xen.
+
+The entry_int82() path is via a DPL1 gate, only usable by 32bit PV guests, so
+can get away with only saving the 32bit registers.  All other entrypoints can
+be reached from 32 or 64bit contexts.
+
+This is part of XSA-254.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Wei Liu <wei.liu2%citrix.com@localhost>
+Acked-by: Jan Beulich <jbeulich%suse.com@localhost>
+master commit: f9eb74789af77e985ae653193f3622263499f674
+master date: 2018-01-05 19:57:07 +0000
+---
+
+diff --git a/tools/tests/x86_emulator/x86_emulate.c b/tools/tests/x86_emulator/x86_emulate.c
+index 19d8385..127a926 100644
+--- tools/tests/x86_emulator/x86_emulate.c.orig
++++ tools/tests/x86_emulator/x86_emulate.c
+@@ -33,7 +33,6 @@ typedef bool bool_t;
+ #define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
+ 
+ #define cpu_has_amd_erratum(nr) 0
+-#define mark_regs_dirty(r) ((void)(r))
+ 
+ /* For generic assembly code: use macros to define operation/operand sizes. */
+ #ifdef __i386__
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index c8a303d..747cf65 100644
+--- xen/arch/x86/domain.c.orig
++++ xen/arch/x86/domain.c
+@@ -148,7 +148,6 @@ static void noreturn continue_idle_domain(struct vcpu *v)
+ static void noreturn continue_nonidle_domain(struct vcpu *v)
+ {
+     check_wakeup_from_wait();
+-    mark_regs_dirty(guest_cpu_user_regs());
+     reset_stack_and_jump(ret_from_intr);
+ }
+ 
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index 249932a..f4bf8b5 100644
+--- xen/arch/x86/traps.c.orig
++++ xen/arch/x86/traps.c
+@@ -3049,7 +3049,6 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
+             goto fail;
+         if ( admin_io_okay(port, op_bytes, currd) )
+         {
+-            mark_regs_dirty(regs);
+             io_emul(regs);            
+         }
+         else
+@@ -3079,7 +3078,6 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
+             goto fail;
+         if ( admin_io_okay(port, op_bytes, currd) )
+         {
+-            mark_regs_dirty(regs);
+             io_emul(regs);            
+             if ( (op_bytes == 1) && pv_post_outb_hook )
+                 pv_post_outb_hook(port, regs->eax);
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index 474ffbc..df693c2 100644
+--- xen/arch/x86/x86_64/compat/entry.S.orig
++++ xen/arch/x86/x86_64/compat/entry.S
+@@ -15,7 +15,8 @@
+ ENTRY(compat_hypercall)
+         ASM_CLAC
+         pushq $0
+-        SAVE_VOLATILE type=TRAP_syscall compat=1
++        movl  $TRAP_syscall, 4(%rsp)
++        SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
+         CR4_PV32_RESTORE
+ 
+         cmpb  $0,untrusted_msi(%rip)
+@@ -66,7 +67,6 @@ compat_test_guest_events:
+ /* %rbx: struct vcpu */
+ compat_process_softirqs:
+         sti
+-        andl  $~TRAP_regs_partial,UREGS_entry_vector(%rsp)
+         call  do_softirq
+         jmp   compat_test_all_events
+ 
+@@ -203,7 +203,8 @@ ENTRY(cstar_enter)
+         pushq $FLAT_USER_CS32
+         pushq %rcx
+         pushq $0
+-        SAVE_VOLATILE TRAP_syscall
++        movl  $TRAP_syscall, 4(%rsp)
++        SAVE_ALL
+         GET_CURRENT(bx)
+         movq  VCPU_domain(%rbx),%rcx
+         cmpb  $0,DOMAIN_is_32bit_pv(%rcx)
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 85f1a4b..ac9ab4c 100644
+--- xen/arch/x86/x86_64/entry.S.orig
++++ xen/arch/x86/x86_64/entry.S
+@@ -97,7 +97,8 @@ ENTRY(lstar_enter)
+         pushq $FLAT_KERNEL_CS64
+         pushq %rcx
+         pushq $0
+-        SAVE_VOLATILE TRAP_syscall
++        movl  $TRAP_syscall, 4(%rsp)
++        SAVE_ALL
+         GET_CURRENT(bx)
+         testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
+         jz    switch_to_kernel
+@@ -139,7 +140,6 @@ test_guest_events:
+ /* %rbx: struct vcpu */
+ process_softirqs:
+         sti       
+-        SAVE_PRESERVED
+         call do_softirq
+         jmp  test_all_events
+ 
+@@ -189,7 +189,8 @@ GLOBAL(sysenter_eflags_saved)
+         pushq $3 /* ring 3 null cs */
+         pushq $0 /* null rip */
+         pushq $0
+-        SAVE_VOLATILE TRAP_syscall
++        movl  $TRAP_syscall, 4(%rsp)
++        SAVE_ALL
+         GET_CURRENT(bx)
+         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
+         movq  VCPU_sysenter_addr(%rbx),%rax
+@@ -206,7 +207,6 @@ UNLIKELY_END(sysenter_nt_set)
+         leal  (,%rcx,TBF_INTERRUPT),%ecx
+ UNLIKELY_START(z, sysenter_gpf)
+         movq  VCPU_trap_ctxt(%rbx),%rsi
+-        SAVE_PRESERVED
+         movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+         movl  %eax,TRAPBOUNCE_error_code(%rdx)
+         movq  TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax
+@@ -224,7 +224,8 @@ UNLIKELY_END(sysenter_gpf)
+ ENTRY(int80_direct_trap)
+         ASM_CLAC
+         pushq $0
+-        SAVE_VOLATILE 0x80
++        movl  $0x80, 4(%rsp)
++        SAVE_ALL
+ 
+         cmpb  $0,untrusted_msi(%rip)
+ UNLIKELY_START(ne, msi_check)
+@@ -252,7 +253,6 @@ int80_slow_path:
+          * IDT entry with DPL==0.
+          */
+         movl  $((0x80 << 3) | X86_XEC_IDT),UREGS_error_code(%rsp)
+-        SAVE_PRESERVED
+         movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+         /* A GPF wouldn't have incremented the instruction pointer. */
+         subq  $2,UREGS_rip(%rsp)
+diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
+index a9b0282..df4ac81 100644
+diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c
+--- xen/arch/x86/x86_64/traps.c.orig   2017-09-06 12:26:35.000000000 +0200
++++ xen/arch/x86/x86_64/traps.c        2018-01-17 20:50:17.000000000 +0100
+@@ -66,15 +66,10 @@
+            regs->rbp, regs->rsp, regs->r8);
+     printk("r9:  %016lx   r10: %016lx   r11: %016lx\n",
+            regs->r9,  regs->r10, regs->r11);
+-    if ( !(regs->entry_vector & TRAP_regs_partial) )
+-    {
+-        printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
+-               regs->r12, regs->r13, regs->r14);
+-        printk("r15: %016lx   cr0: %016lx   cr4: %016lx\n",
+-               regs->r15, crs[0], crs[4]);
+-    }
+-    else
+-        printk("cr0: %016lx   cr4: %016lx\n", crs[0], crs[4]);
++    printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
++           regs->r12, regs->r13, regs->r14);
++    printk("r15: %016lx   cr0: %016lx   cr4: %016lx\n",
++           regs->r15, crs[0], crs[4]);
+     printk("cr3: %016lx   cr2: %016lx\n", crs[3], crs[2]);
+     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
+            "ss: %04x   cs: %04x\n",
+index f52f543..c1e2d54 100644
+--- xen/arch/x86/x86_emulate.c.orig
++++ xen/arch/x86/x86_emulate.c
+@@ -11,7 +11,6 @@
+ 
+ #include <xen/domain_page.h>
+ #include <asm/x86_emulate.h>
+-#include <asm/asm_defns.h> /* mark_regs_dirty() */
+ #include <asm/processor.h> /* current_cpu_info */
+ #include <asm/xstate.h>
+ #include <asm/amd.h> /* cpu_has_amd_erratum() */
+diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
+index c4d282a..9851416 100644
+--- xen/arch/x86/x86_emulate/x86_emulate.c.orig
++++ xen/arch/x86/x86_emulate/x86_emulate.c
+@@ -1559,10 +1559,10 @@ decode_register(
+     case  9: p = &regs->r9;  break;
+     case 10: p = &regs->r10; break;
+     case 11: p = &regs->r11; break;
+-    case 12: mark_regs_dirty(regs); p = &regs->r12; break;
+-    case 13: mark_regs_dirty(regs); p = &regs->r13; break;
+-    case 14: mark_regs_dirty(regs); p = &regs->r14; break;
+-    case 15: mark_regs_dirty(regs); p = &regs->r15; break;
++    case 12: p = &regs->r12; break;
++    case 13: p = &regs->r13; break;
++    case 14: p = &regs->r14; break;
++    case 15: p = &regs->r15; break;
+ #endif
+     default: BUG(); p = NULL; break;
+     }
+diff --git a/xen/common/wait.c b/xen/common/wait.c
+index 4ac98c0..398f653 100644
+--- xen/common/wait.c.orig
++++ xen/common/wait.c
+@@ -128,7 +128,6 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
+     unsigned long dummy;
+     u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
+ 
+-    cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial;
+     ASSERT(wqv->esp == 0);
+ 
+     /* Save current VCPU affinity; force wakeup on *this* CPU only. */
+diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
+index f1c6fa1..99cb337 100644
+--- xen/include/asm-x86/asm_defns.h.orig
++++ xen/include/asm-x86/asm_defns.h
+@@ -17,15 +17,6 @@
+ void ret_from_intr(void);
+ #endif
+ 
+-#ifdef CONFIG_FRAME_POINTER
+-/* Indicate special exception stack frame by inverting the frame pointer. */
+-#define SETUP_EXCEPTION_FRAME_POINTER(offs)     \
+-        leaq  offs(%rsp),%rbp;                  \
+-        notq  %rbp
+-#else
+-#define SETUP_EXCEPTION_FRAME_POINTER(offs)
+-#endif
+-
+ #ifndef NDEBUG
+ #define ASSERT_INTERRUPT_STATUS(x, msg)         \
+         pushf;                                  \
+@@ -42,31 +33,6 @@ void ret_from_intr(void);
+ #define ASSERT_INTERRUPTS_DISABLED \
+     ASSERT_INTERRUPT_STATUS(z, "INTERRUPTS DISABLED")
+ 
+-/*
+- * This flag is set in an exception frame when registers R12-R15 did not get
+- * saved.
+- */
+-#define _TRAP_regs_partial 16
+-#define TRAP_regs_partial  (1 << _TRAP_regs_partial)
+-/*
+- * This flag gets set in an exception frame when registers R12-R15 possibly
+- * get modified from their originally saved values and hence need to be
+- * restored even if the normal call flow would restore register values.
+- *
+- * The flag being set implies _TRAP_regs_partial to be unset. Restoring
+- * R12-R15 thus is
+- * - required when this flag is set,
+- * - safe when _TRAP_regs_partial is unset.
+- */
+-#define _TRAP_regs_dirty   17
+-#define TRAP_regs_dirty    (1 << _TRAP_regs_dirty)
+-
+-#define mark_regs_dirty(r) ({ \
+-        struct cpu_user_regs *r__ = (r); \
+-        ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \
+-        r__->entry_vector |= TRAP_regs_dirty; \
+-})
+-
+ #ifdef __ASSEMBLY__
+ # define _ASM_EX(p) p-.
+ #else
+@@ -236,7 +202,7 @@ static always_inline void stac(void)
+ #endif
+ 
+ #ifdef __ASSEMBLY__
+-.macro SAVE_ALL op
++.macro SAVE_ALL op, compat=0
+ .ifeqs "\op", "CLAC"
+         ASM_CLAC
+ .else
+@@ -255,40 +221,6 @@ static always_inline void stac(void)
+         movq  %rdx,UREGS_rdx(%rsp)
+         movq  %rcx,UREGS_rcx(%rsp)
+         movq  %rax,UREGS_rax(%rsp)
+-        movq  %r8,UREGS_r8(%rsp)
+-        movq  %r9,UREGS_r9(%rsp)
+-        movq  %r10,UREGS_r10(%rsp)
+-        movq  %r11,UREGS_r11(%rsp)
+-        movq  %rbx,UREGS_rbx(%rsp)
+-        movq  %rbp,UREGS_rbp(%rsp)
+-        SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+-        movq  %r12,UREGS_r12(%rsp)
+-        movq  %r13,UREGS_r13(%rsp)
+-        movq  %r14,UREGS_r14(%rsp)
+-        movq  %r15,UREGS_r15(%rsp)
+-.endm
+-
+-/*
+- * Save all registers not preserved by C code or used in entry/exit code. Mark
+- * the frame as partial.
+- *
+- * @type: exception type
+- * @compat: R8-R15 don't need saving, and the frame nevertheless is complete
+- */
+-.macro SAVE_VOLATILE type compat=0
+-.if \compat
+-        movl  $\type,UREGS_entry_vector-UREGS_error_code(%rsp)
+-.else
+-        movl  $\type|TRAP_regs_partial,\
+-              UREGS_entry_vector-UREGS_error_code(%rsp)
+-.endif
+-        addq  $-(UREGS_error_code-UREGS_r15),%rsp
+-        cld
+-        movq  %rdi,UREGS_rdi(%rsp)
+-        movq  %rsi,UREGS_rsi(%rsp)
+-        movq  %rdx,UREGS_rdx(%rsp)
+-        movq  %rcx,UREGS_rcx(%rsp)
+-        movq  %rax,UREGS_rax(%rsp)
+ .if !\compat
+         movq  %r8,UREGS_r8(%rsp)
+         movq  %r9,UREGS_r9(%rsp)
+@@ -297,20 +229,17 @@ static always_inline void stac(void)
+ .endif
+         movq  %rbx,UREGS_rbx(%rsp)
+         movq  %rbp,UREGS_rbp(%rsp)
+-        SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+-.endm
+-
+-/*
+- * Complete a frame potentially only partially saved.
+- */
+-.macro SAVE_PRESERVED
+-        btrl  $_TRAP_regs_partial,UREGS_entry_vector(%rsp)
+-        jnc   987f
++#ifdef CONFIG_FRAME_POINTER
++/* Indicate special exception stack frame by inverting the frame pointer. */
++        leaq  UREGS_rbp(%rsp), %rbp
++        notq  %rbp
++#endif
++.if !\compat
+         movq  %r12,UREGS_r12(%rsp)
+         movq  %r13,UREGS_r13(%rsp)
+         movq  %r14,UREGS_r14(%rsp)
+         movq  %r15,UREGS_r15(%rsp)
+-987:
++.endif
+ .endm
+ 
+ #define LOAD_ONE_REG(reg, compat) \
+@@ -351,33 +280,13 @@ static always_inline void stac(void)
+  * @compat: R8-R15 don't need reloading
+  */
+ .macro RESTORE_ALL adj=0 compat=0
+-.if !\compat
+-        testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp)
+-.endif
+         LOAD_C_CLOBBERED \compat
+ .if !\compat
+-        jz    987f
+         movq  UREGS_r15(%rsp),%r15
+         movq  UREGS_r14(%rsp),%r14
+         movq  UREGS_r13(%rsp),%r13
+         movq  UREGS_r12(%rsp),%r12
+-#ifndef NDEBUG
+-        .subsection 1
+-987:    testl $TRAP_regs_partial,UREGS_entry_vector(%rsp)
+-        jnz   987f
+-        cmpq  UREGS_r15(%rsp),%r15
+-        jne   789f
+-        cmpq  UREGS_r14(%rsp),%r14
+-        jne   789f
+-        cmpq  UREGS_r13(%rsp),%r13
+-        jne   789f
+-        cmpq  UREGS_r12(%rsp),%r12
+-        je    987f
+-789:    BUG   /* Corruption of partial register state. */
+-        .subsection 0
+-#endif
+ .endif
+-987:
+         LOAD_ONE_REG(bp, \compat)
+         LOAD_ONE_REG(bx, \compat)
+         subq  $-(UREGS_error_code-UREGS_r15+\adj), %rsp
Index: pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-2
diff -u /dev/null pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-2:1.1
--- /dev/null   Thu Jan 18 10:28:13 2018
+++ pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-2  Thu Jan 18 10:28:13 2018
@@ -0,0 +1,44 @@
+$NetBSD: patch-XSA254-2,v 1.1 2018/01/18 10:28:13 bouyer Exp $
+
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Date: Wed, 17 Jan 2018 16:15:25 +0000 (+0100)
+Subject: x86/mm: Always set _PAGE_ACCESSED on L4e updates
+X-Git-Url: http://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff_plain;h=049e2f45bfa488967494466ec6506c3ecae5fe0e;hp=49a44f089c59c326828b8aed39bee9743f5802fb
+
+x86/mm: Always set _PAGE_ACCESSED on L4e updates
+
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+master commit: bd61fe94bee0556bc2f64999a4a8315b93f90f21
+master date: 2018-01-15 13:53:16 +0000
+---
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index be51d16..c22455d 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -1297,11 +1297,23 @@ get_page_from_l4e(
+                                          _PAGE_USER|_PAGE_RW);      \
+     } while ( 0 )
+ 
++/*
++ * When shadowing an L4 behind the guests back (e.g. for per-pcpu
++ * purposes), we cannot efficiently sync access bit updates from hardware
++ * (on the shadow tables) back into the guest view.
++ *
++ * We therefore unconditionally set _PAGE_ACCESSED even in the guests
++ * view.  This will appear to the guest as a CPU which proactively pulls
++ * all valid L4e's into its TLB, which is compatible with the x86 ABI.
++ *
++ * At the time of writing, all PV guests set the access bit anyway, so
++ * this is no actual change in their behaviour.
++ */
+ #define adjust_guest_l4e(pl4e, d)                               \
+     do {                                                        \
+         if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) &&   \
+              likely(!is_pv_32bit_domain(d)) )                   \
+-            l4e_add_flags((pl4e), _PAGE_USER);                  \
++            l4e_add_flags((pl4e), _PAGE_USER | _PAGE_ACCESSED); \
+     } while ( 0 )
+ 
+ #define unadjust_guest_l3e(pl3e, d)                                         \
Index: pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-3
diff -u /dev/null pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-3:1.1
--- /dev/null   Thu Jan 18 10:28:13 2018
+++ pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-3  Thu Jan 18 10:28:13 2018
@@ -0,0 +1,758 @@
+$NetBSD: patch-XSA254-3,v 1.1 2018/01/18 10:28:13 bouyer Exp $
+
+From 1ba477bde737bf9b28cc455bef1e9a6bc76d66fc Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich%suse.com@localhost>
+Date: Wed, 17 Jan 2018 17:16:28 +0100
+Subject: [PATCH] x86: Meltdown band-aid against malicious 64-bit PV guests
+
+This is a very simplistic change limiting the amount of memory a running
+64-bit PV guest has mapped (and hence available for attacking): Only the
+mappings of stack, IDT, and TSS are being cloned from the direct map
+into per-CPU page tables. Guest controlled parts of the page tables are
+being copied into those per-CPU page tables upon entry into the guest.
+Cross-vCPU synchronization of top level page table entry changes is
+being effected by forcing other active vCPU-s of the guest into the
+hypervisor.
+
+The change to context_switch() isn't strictly necessary, but there's no
+reason to keep switching page tables once a PV guest is being scheduled
+out.
+
+This isn't providing full isolation yet, but it should be covering all
+pieces of information exposure of which would otherwise require an XSA.
+
+There is certainly much room for improvement, especially of performance,
+here - first and foremost suppressing all the negative effects on AMD
+systems. But in the interest of backportability (including to really old
+hypervisors, which may not even have alternative patching) any such is
+being left out here.
+
+Signed-off-by: Jan Beulich <jbeulich%suse.com@localhost>
+Reviewed-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+master commit: 5784de3e2067ed73efc2fe42e62831e8ae7f46c4
+master date: 2018-01-16 17:49:03 +0100
+---
+ xen/arch/x86/domain.c              |   5 +
+ xen/arch/x86/mm.c                  |  17 ++++
+ xen/arch/x86/smpboot.c             | 198 +++++++++++++++++++++++++++++++++++++
+ xen/arch/x86/x86_64/asm-offsets.c  |   2 +
+ xen/arch/x86/x86_64/compat/entry.S |  11 +++
+ xen/arch/x86/x86_64/entry.S        | 149 +++++++++++++++++++++++++++-
+ xen/include/asm-x86/asm_defns.h    |  30 ++++++
+ xen/include/asm-x86/current.h      |  12 +++
+ xen/include/asm-x86/processor.h    |   1 +
+ xen/include/asm-x86/x86_64/page.h  |   5 +-
+ 10 files changed, 424 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 747cf65..bf3590d 100644
+--- xen/arch/x86/domain.c.orig
++++ xen/arch/x86/domain.c
+@@ -1951,6 +1951,9 @@ static void paravirt_ctxt_switch_to(struct vcpu *v)
+ 
+     switch_kernel_stack(v);
+ 
++    this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] =
++        l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW);
++
+     cr4 = pv_guest_cr4_to_real_cr4(v);
+     if ( unlikely(cr4 != read_cr4()) )
+         write_cr4(cr4);
+@@ -2119,6 +2122,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
+ 
+     ASSERT(local_irq_is_enabled());
+ 
++    get_cpu_info()->xen_cr3 = 0;
++
+     cpumask_copy(&dirty_mask, next->vcpu_dirty_cpumask);
+     /* Allow at most one CPU at a time to be dirty. */
+     ASSERT(cpumask_weight(&dirty_mask) <= 1);
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index c22455d..69e1ab6 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -3861,6 +3861,7 @@ long do_mmu_update(
+     struct vcpu *curr = current, *v = curr;
+     struct domain *d = v->domain, *pt_owner = d, *pg_owner;
+     struct domain_mmap_cache mapcache;
++    bool sync_guest = false;
+     uint32_t xsm_needed = 0;
+     uint32_t xsm_checked = 0;
+     int rc = put_old_guest_table(curr);
+@@ -3998,6 +3998,8 @@ long do_mmu_update(
+                 case PGT_l4_page_table:
+                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++                  if ( !rc )
++                      sync_guest = true;
+                 break;
+                 case PGT_writable_page:
+                     perfc_incr(writable_mmu_updates);
+@@ -4111,6 +4114,20 @@ long do_mmu_update(
+ 
+     domain_mmap_cache_destroy(&mapcache);
+ 
++    if ( sync_guest )
++    {
++        /*
++         * Force other vCPU-s of the affected guest to pick up L4 entry
++         * changes (if any). Issue a flush IPI with empty operation mask to
++         * facilitate this (including ourselves waiting for the IPI to
++         * actually have arrived). Utilize the fact that FLUSH_VA_VALID is
++         * meaningless without FLUSH_CACHE, but will allow to pass the no-op
++         * check in flush_area_mask().
++         */
++        flush_area_mask(pt_owner->domain_dirty_cpumask,
++                        ZERO_BLOCK_PTR, FLUSH_VA_VALID);
++    }
++
+     perfc_add(num_page_updates, i);
+ 
+  out:
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 144258f..327c744 100644
+--- xen/arch/x86/smpboot.c.orig
++++ xen/arch/x86/smpboot.c
+@@ -319,6 +319,9 @@ void start_secondary(void *unused)
+      */
+     spin_debug_disable();
+ 
++    get_cpu_info()->xen_cr3 = 0;
++    get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt));
++
+     load_system_tables();
+ 
+     /* Full exception support from here on in. */
+@@ -628,6 +631,187 @@ void cpu_exit_clear(unsigned int cpu)
+     set_cpu_state(CPU_STATE_DEAD);
+ }
+ 
++static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
++{
++    unsigned long linear = (unsigned long)ptr, pfn;
++    unsigned int flags;
++    l3_pgentry_t *pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) +
++                         l3_table_offset(linear);
++    l2_pgentry_t *pl2e;
++    l1_pgentry_t *pl1e;
++
++    if ( linear < DIRECTMAP_VIRT_START )
++        return 0;
++
++    flags = l3e_get_flags(*pl3e);
++    ASSERT(flags & _PAGE_PRESENT);
++    if ( flags & _PAGE_PSE )
++    {
++        pfn = (l3e_get_pfn(*pl3e) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1)) |
++              (PFN_DOWN(linear) & ((1UL << (2 * PAGETABLE_ORDER)) - 1));
++        flags &= ~_PAGE_PSE;
++    }
++    else
++    {
++        pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(linear);
++        flags = l2e_get_flags(*pl2e);
++        ASSERT(flags & _PAGE_PRESENT);
++        if ( flags & _PAGE_PSE )
++        {
++            pfn = (l2e_get_pfn(*pl2e) & ~((1UL << PAGETABLE_ORDER) - 1)) |
++                  (PFN_DOWN(linear) & ((1UL << PAGETABLE_ORDER) - 1));
++            flags &= ~_PAGE_PSE;
++        }
++        else
++        {
++            pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(linear);
++            flags = l1e_get_flags(*pl1e);
++            if ( !(flags & _PAGE_PRESENT) )
++                return 0;
++            pfn = l1e_get_pfn(*pl1e);
++        }
++    }
++
++    if ( !(root_get_flags(rpt[root_table_offset(linear)]) & _PAGE_PRESENT) )
++    {
++        pl3e = alloc_xen_pagetable();
++        if ( !pl3e )
++            return -ENOMEM;
++        clear_page(pl3e);
++        l4e_write(&rpt[root_table_offset(linear)],
++                  l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
++    }
++    else
++        pl3e = l4e_to_l3e(rpt[root_table_offset(linear)]);
++
++    pl3e += l3_table_offset(linear);
++
++    if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
++    {
++        pl2e = alloc_xen_pagetable();
++        if ( !pl2e )
++            return -ENOMEM;
++        clear_page(pl2e);
++        l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR));
++    }
++    else
++    {
++        ASSERT(!(l3e_get_flags(*pl3e) & _PAGE_PSE));
++        pl2e = l3e_to_l2e(*pl3e);
++    }
++
++    pl2e += l2_table_offset(linear);
++
++    if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
++    {
++        pl1e = alloc_xen_pagetable();
++        if ( !pl1e )
++            return -ENOMEM;
++        clear_page(pl1e);
++        l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR));
++    }
++    else
++    {
++        ASSERT(!(l2e_get_flags(*pl2e) & _PAGE_PSE));
++        pl1e = l2e_to_l1e(*pl2e);
++    }
++
++    pl1e += l1_table_offset(linear);
++
++    if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT )
++    {
++        ASSERT(l1e_get_pfn(*pl1e) == pfn);
++        ASSERT(l1e_get_flags(*pl1e) == flags);
++    }
++    else
++        l1e_write(pl1e, l1e_from_pfn(pfn, flags));
++
++    return 0;
++}
++
++DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
++
++static int setup_cpu_root_pgt(unsigned int cpu)
++{
++    root_pgentry_t *rpt = alloc_xen_pagetable();
++    unsigned int off;
++    int rc;
++
++    if ( !rpt )
++        return -ENOMEM;
++
++    clear_page(rpt);
++    per_cpu(root_pgt, cpu) = rpt;
++
++    rpt[root_table_offset(RO_MPT_VIRT_START)] =
++        idle_pg_table[root_table_offset(RO_MPT_VIRT_START)];
++    /* SH_LINEAR_PT inserted together with guest mappings. */
++    /* PERDOMAIN inserted during context switch. */
++    rpt[root_table_offset(XEN_VIRT_START)] =
++        idle_pg_table[root_table_offset(XEN_VIRT_START)];
++
++    /* Install direct map page table entries for stack, IDT, and TSS. */
++    for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE )
++        rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt);
++
++    if ( !rc )
++        rc = clone_mapping(idt_tables[cpu], rpt);
++    if ( !rc )
++        rc = clone_mapping(&per_cpu(init_tss, cpu), rpt);
++
++    return rc;
++}
++
++static void cleanup_cpu_root_pgt(unsigned int cpu)
++{
++    root_pgentry_t *rpt = per_cpu(root_pgt, cpu);
++    unsigned int r;
++
++    if ( !rpt )
++        return;
++
++    per_cpu(root_pgt, cpu) = NULL;
++
++    for ( r = root_table_offset(DIRECTMAP_VIRT_START);
++          r < root_table_offset(HYPERVISOR_VIRT_END); ++r )
++    {
++        l3_pgentry_t *l3t;
++        unsigned int i3;
++
++        if ( !(root_get_flags(rpt[r]) & _PAGE_PRESENT) )
++            continue;
++
++        l3t = l4e_to_l3e(rpt[r]);
++
++        for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; ++i3 )
++        {
++            l2_pgentry_t *l2t;
++            unsigned int i2;
++
++            if ( !(l3e_get_flags(l3t[i3]) & _PAGE_PRESENT) )
++                continue;
++
++            ASSERT(!(l3e_get_flags(l3t[i3]) & _PAGE_PSE));
++            l2t = l3e_to_l2e(l3t[i3]);
++
++            for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; ++i2 )
++            {
++                if ( !(l2e_get_flags(l2t[i2]) & _PAGE_PRESENT) )
++                    continue;
++
++                ASSERT(!(l2e_get_flags(l2t[i2]) & _PAGE_PSE));
++                free_xen_pagetable(l2e_to_l1e(l2t[i2]));
++            }
++
++            free_xen_pagetable(l2t);
++        }
++
++        free_xen_pagetable(l3t);
++    }
++
++    free_xen_pagetable(rpt);
++}
++
+ static void cpu_smpboot_free(unsigned int cpu)
+ {
+     unsigned int order, socket = cpu_to_socket(cpu);
+@@ -664,6 +848,8 @@ static void cpu_smpboot_free(unsigned int cpu)
+             free_domheap_page(mfn_to_page(mfn));
+     }
+ 
++    cleanup_cpu_root_pgt(cpu);
++
+     order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+     free_xenheap_pages(per_cpu(gdt_table, cpu), order);
+ 
+@@ -719,6 +905,9 @@ static int cpu_smpboot_alloc(unsigned int cpu)
+     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NONE);
+     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
+ 
++    if ( setup_cpu_root_pgt(cpu) )
++        goto oom;
++
+     for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
+           i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
+         if ( cpu_online(i) && cpu_to_node(i) == node )
+@@ -773,6 +962,8 @@ static struct notifier_block cpu_smpboot_nfb = {
+ 
+ void __init smp_prepare_cpus(unsigned int max_cpus)
+ {
++    int rc;
++
+     register_cpu_notifier(&cpu_smpboot_nfb);
+ 
+     mtrr_aps_sync_begin();
+@@ -786,6 +977,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+ 
+     stack_base[0] = stack_start;
+ 
++    rc = setup_cpu_root_pgt(0);
++    if ( rc )
++        panic("Error %d setting up PV root page table\n", rc);
++    get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
++
+     set_nr_sockets();
+ 
+     socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets);
+@@ -850,6 +1046,8 @@ void __init smp_prepare_boot_cpu(void)
+ {
+     cpumask_set_cpu(smp_processor_id(), &cpu_online_map);
+     cpumask_set_cpu(smp_processor_id(), &cpu_present_map);
++
++    get_cpu_info()->xen_cr3 = 0;
+ }
+ 
+ static void
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index 64905c6..325abdc 100644
+--- xen/arch/x86/x86_64/asm-offsets.c.orig
++++ xen/arch/x86/x86_64/asm-offsets.c
+@@ -137,6 +137,8 @@ void __dummy__(void)
+     OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
+     OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
+     OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
++    OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3);
++    OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
+     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+     BLANK();
+ 
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index df693c2..c8f68a0 100644
+--- xen/arch/x86/x86_64/compat/entry.S.orig
++++ xen/arch/x86/x86_64/compat/entry.S
+@@ -205,6 +205,17 @@ ENTRY(cstar_enter)
+         pushq $0
+         movl  $TRAP_syscall, 4(%rsp)
+         SAVE_ALL
++
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++        neg   %rcx
++        jz    .Lcstar_cr3_okay
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++        neg   %rcx
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lcstar_cr3_okay:
++
+         GET_CURRENT(bx)
+         movq  VCPU_domain(%rbx),%rcx
+         cmpb  $0,DOMAIN_is_32bit_pv(%rcx)
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index ac9ab4c..d1afb3c 100644
+--- xen/arch/x86/x86_64/entry.S.orig
++++ xen/arch/x86/x86_64/entry.S
+@@ -36,6 +36,32 @@ ENTRY(switch_to_kernel)
+ /* %rbx: struct vcpu, interrupts disabled */
+ restore_all_guest:
+         ASSERT_INTERRUPTS_DISABLED
++
++        /* Copy guest mappings and switch to per-CPU root page table. */
++        mov   %cr3, %r9
++        GET_STACK_END(dx)
++        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
++        movabs $PADDR_MASK & PAGE_MASK, %rsi
++        movabs $DIRECTMAP_VIRT_START, %rcx
++        mov   %rdi, %rax
++        and   %rsi, %rdi
++        and   %r9, %rsi
++        add   %rcx, %rdi
++        add   %rcx, %rsi
++        mov   $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx
++        mov   root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8
++        mov   %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi)
++        rep movsq
++        mov   $ROOT_PAGETABLE_ENTRIES - \
++               ROOT_PAGETABLE_LAST_XEN_SLOT - 1, %ecx
++        sub   $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
++                ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rsi
++        sub   $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
++                ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
++        rep movsq
++        mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
++        write_cr3 rax, rdi, rsi
++
+         RESTORE_ALL
+         testw $TRAP_syscall,4(%rsp)
+         jz    iret_exit_to_guest
+@@ -70,6 +96,22 @@ iret_exit_to_guest:
+         ALIGN
+ /* No special register assumptions. */
+ restore_all_xen:
++        /*
++         * Check whether we need to switch to the per-CPU page tables, in
++         * case we return to late PV exit code (from an NMI or #MC).
++         */
++        GET_STACK_END(ax)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx
++        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax
++        test  %rdx, %rdx
++        /*
++         * Ideally the condition would be "nsz", but such doesn't exist,
++         * so "g" will have to do.
++         */
++UNLIKELY_START(g, exit_cr3)
++        write_cr3 rax, rdi, rsi
++UNLIKELY_END(exit_cr3)
++
+         RESTORE_ALL adj=8
+         iretq
+ 
+@@ -99,7 +141,18 @@ ENTRY(lstar_enter)
+         pushq $0
+         movl  $TRAP_syscall, 4(%rsp)
+         SAVE_ALL
+-        GET_CURRENT(bx)
++
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++        neg   %rcx
++        jz    .Llstar_cr3_okay
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++        neg   %rcx
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Llstar_cr3_okay:
++
++        __GET_CURRENT(bx)
+         testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
+         jz    switch_to_kernel
+ 
+@@ -191,7 +244,18 @@ GLOBAL(sysenter_eflags_saved)
+         pushq $0
+         movl  $TRAP_syscall, 4(%rsp)
+         SAVE_ALL
+-        GET_CURRENT(bx)
++
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++        neg   %rcx
++        jz    .Lsyse_cr3_okay
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++        neg   %rcx
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lsyse_cr3_okay:
++
++        __GET_CURRENT(bx)
+         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
+         movq  VCPU_sysenter_addr(%rbx),%rax
+         setne %cl
+@@ -227,13 +291,23 @@ ENTRY(int80_direct_trap)
+         movl  $0x80, 4(%rsp)
+         SAVE_ALL
+ 
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++        neg   %rcx
++        jz    .Lint80_cr3_okay
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++        neg   %rcx
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lint80_cr3_okay:
++
+         cmpb  $0,untrusted_msi(%rip)
+ UNLIKELY_START(ne, msi_check)
+         movl  $0x80,%edi
+         call  check_for_unexpected_msi
+ UNLIKELY_END(msi_check)
+ 
+-        GET_CURRENT(bx)
++        __GET_CURRENT(bx)
+ 
+         /* Check that the callback is non-null. */
+         leaq  VCPU_int80_bounce(%rbx),%rdx
+@@ -384,9 +458,27 @@ ENTRY(dom_crash_sync_extable)
+ 
+ ENTRY(common_interrupt)
+         SAVE_ALL CLAC
++
++        GET_STACK_END(14)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++        mov   %rcx, %r15
++        neg   %rcx
++        jz    .Lintr_cr3_okay
++        jns   .Lintr_cr3_load
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        neg   %rcx
++.Lintr_cr3_load:
++        write_cr3 rcx, rdi, rsi
++        xor   %ecx, %ecx
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        testb $3, UREGS_cs(%rsp)
++        cmovnz %rcx, %r15
++.Lintr_cr3_okay:
++
+         CR4_PV32_RESTORE
+         movq %rsp,%rdi
+         callq do_IRQ
++        mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+         jmp ret_from_intr
+ 
+ /* No special register assumptions. */
+@@ -404,6 +496,23 @@ ENTRY(page_fault)
+ /* No special register assumptions. */
+ GLOBAL(handle_exception)
+         SAVE_ALL CLAC
++
++        GET_STACK_END(14)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++        mov   %rcx, %r15
++        neg   %rcx
++        jz    .Lxcpt_cr3_okay
++        jns   .Lxcpt_cr3_load
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        neg   %rcx
++.Lxcpt_cr3_load:
++        write_cr3 rcx, rdi, rsi
++        xor   %ecx, %ecx
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        testb $3, UREGS_cs(%rsp)
++        cmovnz %rcx, %r15
++.Lxcpt_cr3_okay:
++
+ handle_exception_saved:
+         GET_CURRENT(bx)
+         testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
+@@ -468,6 +577,7 @@ handle_exception_saved:
+         leaq  exception_table(%rip),%rdx
+         PERFC_INCR(exceptions, %rax, %rbx)
+         callq *(%rdx,%rax,8)
++        mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+         testb $3,UREGS_cs(%rsp)
+         jz    restore_all_xen
+         leaq  VCPU_trap_bounce(%rbx),%rdx
+@@ -500,6 +610,7 @@ exception_with_ints_disabled:
+         rep;  movsq                     # make room for ec/ev
+ 1:      movq  UREGS_error_code(%rsp),%rax # ec/ev
+         movq  %rax,UREGS_kernel_sizeof(%rsp)
++        mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+         jmp   restore_all_xen           # return to fixup code
+ 
+ /* No special register assumptions. */
+@@ -578,6 +689,17 @@ ENTRY(double_fault)
+         movl  $TRAP_double_fault,4(%rsp)
+         /* Set AC to reduce chance of further SMAP faults */
+         SAVE_ALL STAC
++
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx
++        test  %rbx, %rbx
++        jz    .Ldblf_cr3_okay
++        jns   .Ldblf_cr3_load
++        neg   %rbx
++.Ldblf_cr3_load:
++        write_cr3 rbx, rdi, rsi
++.Ldblf_cr3_okay:
++
+         movq  %rsp,%rdi
+         call  do_double_fault
+         BUG   /* do_double_fault() shouldn't return. */
+@@ -596,10 +718,28 @@ ENTRY(nmi)
+         movl  $TRAP_nmi,4(%rsp)
+ handle_ist_exception:
+         SAVE_ALL CLAC
++
++        GET_STACK_END(14)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++        mov   %rcx, %r15
++        neg   %rcx
++        jz    .List_cr3_okay
++        jns   .List_cr3_load
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        neg   %rcx
++.List_cr3_load:
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++.List_cr3_okay:
++
+         CR4_PV32_RESTORE
+         testb $3,UREGS_cs(%rsp)
+         jz    1f
+-        /* Interrupted guest context. Copy the context to stack bottom. */
++        /*
++         * Interrupted guest context. Clear the restore value for xen_cr3
++         * and copy the context to stack bottom.
++         */
++        xor   %r15, %r15
+         GET_CPUINFO_FIELD(guest_cpu_user_regs,di)
+         movq  %rsp,%rsi
+         movl  $UREGS_kernel_sizeof/8,%ecx
+@@ -609,6 +749,7 @@ handle_ist_exception:
+         movzbl UREGS_entry_vector(%rsp),%eax
+         leaq  exception_table(%rip),%rdx
+         callq *(%rdx,%rax,8)
++        mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+         cmpb  $TRAP_nmi,UREGS_entry_vector(%rsp)
+         jne   ret_from_intr
+ 
+diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
+index 99cb337..1c8d66c 100644
+--- xen/include/asm-x86/asm_defns.h.orig
++++ xen/include/asm-x86/asm_defns.h
+@@ -93,9 +93,30 @@ void ret_from_intr(void);
+         UNLIKELY_DONE(mp, tag);   \
+         __UNLIKELY_END(tag)
+ 
++        .equ .Lrax, 0
++        .equ .Lrcx, 1
++        .equ .Lrdx, 2
++        .equ .Lrbx, 3
++        .equ .Lrsp, 4
++        .equ .Lrbp, 5
++        .equ .Lrsi, 6
++        .equ .Lrdi, 7
++        .equ .Lr8,  8
++        .equ .Lr9,  9
++        .equ .Lr10, 10
++        .equ .Lr11, 11
++        .equ .Lr12, 12
++        .equ .Lr13, 13
++        .equ .Lr14, 14
++        .equ .Lr15, 15
++
+ #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field)
+ #define GET_STACK_END(reg)                        \
++        .if .Lr##reg > 8;                         \
++        movq $STACK_SIZE-1, %r##reg;              \
++        .else;                                    \
+         movl $STACK_SIZE-1, %e##reg;              \
++        .endif;                                   \
+         orq  %rsp, %r##reg
+ 
+ #define GET_CPUINFO_FIELD(field, reg)             \
+@@ -177,6 +198,15 @@ void ret_from_intr(void);
+ #define ASM_STAC ASM_AC(STAC)
+ #define ASM_CLAC ASM_AC(CLAC)
+ 
++.macro write_cr3 val:req, tmp1:req, tmp2:req
++        mov   %cr4, %\tmp1
++        mov   %\tmp1, %\tmp2
++        and   $~X86_CR4_PGE, %\tmp1
++        mov   %\tmp1, %cr4
++        mov   %\val, %cr3
++        mov   %\tmp2, %cr4
++.endm
++
+ #define CR4_PV32_RESTORE                                           \
+         667: ASM_NOP5;                                             \
+         .pushsection .altinstr_replacement, "ax";                  \
+diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
+index e6587e6..397fa4c 100644
+--- xen/include/asm-x86/current.h.orig
++++ xen/include/asm-x86/current.h
+@@ -42,6 +42,18 @@ struct cpu_info {
+     struct vcpu *current_vcpu;
+     unsigned long per_cpu_offset;
+     unsigned long cr4;
++    /*
++     * Of the two following fields the latter is being set to the CR3 value
++     * to be used on the given pCPU for loading whenever 64-bit PV guest
++     * context is being entered. The value never changes once set.
++     * The former is the value to restore when re-entering Xen, if any. IOW
++     * its value being zero means there's nothing to restore. However, its
++     * value can also be negative, indicating to the exit-to-Xen code that
++     * restoring is not necessary, but allowing any nested entry code paths
++     * to still know the value to put back into CR3.
++     */
++    unsigned long xen_cr3;
++    unsigned long pv_cr3;
+     /* get_stack_bottom() must be 16-byte aligned */
+ };
+ 
+diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
+index fb0cd55..a73993c 100644
+--- xen/include/asm-x86/processor.h.orig
++++ xen/include/asm-x86/processor.h
+@@ -524,6 +524,7 @@ extern idt_entry_t idt_table[];
+ extern idt_entry_t *idt_tables[];
+ 
+ DECLARE_PER_CPU(struct tss_struct, init_tss);
++DECLARE_PER_CPU(root_pgentry_t *, root_pgt);
+ 
+ extern void init_int80_direct_trap(struct vcpu *v);
+ 
+diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h
+index 589f225..afc77c3 100644
+--- xen/include/asm-x86/x86_64/page.h.orig
++++ xen/include/asm-x86/x86_64/page.h
+@@ -25,8 +25,8 @@
+ /* These are architectural limits. Current CPUs support only 40-bit phys. */
+ #define PADDR_BITS              52
+ #define VADDR_BITS              48
+-#define PADDR_MASK              ((1UL << PADDR_BITS)-1)
+-#define VADDR_MASK              ((1UL << VADDR_BITS)-1)
++#define PADDR_MASK              ((_AC(1,UL) << PADDR_BITS) - 1)
++#define VADDR_MASK              ((_AC(1,UL) << VADDR_BITS) - 1)
+ 
+ #define is_canonical_address(x) (((long)(x) >> 47) == ((long)(x) >> 63))
+ 
+@@ -117,6 +117,7 @@ typedef l4_pgentry_t root_pgentry_t;
+       : (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) ||  \
+          ((_s) > ROOT_PAGETABLE_LAST_XEN_SLOT)))
+ 
++#define root_table_offset         l4_table_offset
+ #define root_get_pfn              l4e_get_pfn
+ #define root_get_flags            l4e_get_flags
+ #define root_get_intpte           l4e_get_intpte
+-- 
+2.1.4
+
Index: pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-4
diff -u /dev/null pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-4:1.1
--- /dev/null   Thu Jan 18 10:28:13 2018
+++ pkgsrc/sysutils/xenkernel48/patches/patch-XSA254-4  Thu Jan 18 10:28:13 2018
@@ -0,0 +1,165 @@
+$NetBSD: patch-XSA254-4,v 1.1 2018/01/18 10:28:13 bouyer Exp $
+
+From 31d38d633a306b2b06767b5a5f5a8a00269f3c92 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich%suse.com@localhost>
+Date: Wed, 17 Jan 2018 17:17:26 +0100
+Subject: [PATCH] x86: allow Meltdown band-aid to be disabled
+
+First of all we don't need it on AMD systems. Additionally allow its use
+to be controlled by command line option. For best backportability, this
+intentionally doesn't use alternative instruction patching to achieve
+the intended effect - while we likely want it, this will be later
+follow-up.
+
+Signed-off-by: Jan Beulich <jbeulich%suse.com@localhost>
+Reviewed-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+master commit: e871e80c38547d9faefc6604532ba3e985e65873
+master date: 2018-01-16 17:50:59 +0100
+---
+ docs/misc/xen-command-line.markdown | 12 ++++++++++++
+ xen/arch/x86/domain.c               |  7 +++++--
+ xen/arch/x86/mm.c                   |  2 +-
+ xen/arch/x86/smpboot.c              | 17 ++++++++++++++---
+ xen/arch/x86/x86_64/entry.S         |  2 ++
+ 5 files changed, 34 insertions(+), 6 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index 0fcdb7d..768d4f5 100644
+--- docs/misc/xen-command-line.markdown.orig
++++ docs/misc/xen-command-line.markdown
+@@ -1687,6 +1687,18 @@ In the case that x2apic is in use, this option switches between physical and
+ clustered mode.  The default, given no hint from the **FADT**, is cluster
+ mode.
+ 
++### xpti
++> `= <boolean>`
++
++> Default: `false` on AMD hardware
++> Default: `true` everywhere else
++
++Override default selection of whether to isolate 64-bit PV guest page
++tables.
++
++** WARNING: Not yet a complete isolation implementation, but better than
++nothing. **
++
+ ### xsave
+ > `= <boolean>`
+ 
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index bf3590d..8817263 100644
+--- xen/arch/x86/domain.c.orig
++++ xen/arch/x86/domain.c
+@@ -1947,12 +1947,15 @@ static void paravirt_ctxt_switch_from(struct vcpu *v)
+ 
+ static void paravirt_ctxt_switch_to(struct vcpu *v)
+ {
++    root_pgentry_t *root_pgt = this_cpu(root_pgt);
+     unsigned long cr4;
+ 
+     switch_kernel_stack(v);
+ 
+-    this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] =
+-        l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW);
++    if ( root_pgt )
++        root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
++            l4e_from_page(v->domain->arch.perdomain_l3_pg,
++                          __PAGE_HYPERVISOR_RW);
+ 
+     cr4 = pv_guest_cr4_to_real_cr4(v);
+     if ( unlikely(cr4 != read_cr4()) )
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 69e1ab6..303c551 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -3999,7 +3999,7 @@ long do_mmu_update(
+                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
+                   if ( !rc )
+-                      sync_guest = true;
++                        sync_guest = this_cpu(root_pgt);
+                 break;
+                 case PGT_writable_page:
+                     perfc_incr(writable_mmu_updates);
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 327c744..c19508f 100644
+--- xen/arch/x86/smpboot.c.orig
++++ xen/arch/x86/smpboot.c
+@@ -320,7 +320,7 @@ void start_secondary(void *unused)
+     spin_debug_disable();
+ 
+     get_cpu_info()->xen_cr3 = 0;
+-    get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt));
++    get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 0;
+ 
+     load_system_tables();
+ 
+@@ -729,14 +729,20 @@ static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
+     return 0;
+ }
+ 
++static __read_mostly int8_t opt_xpti = -1;
++boolean_param("xpti", opt_xpti);
+ DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
+ 
+ static int setup_cpu_root_pgt(unsigned int cpu)
+ {
+-    root_pgentry_t *rpt = alloc_xen_pagetable();
++    root_pgentry_t *rpt;
+     unsigned int off;
+     int rc;
+ 
++    if ( !opt_xpti )
++        return 0;
++
++    rpt = alloc_xen_pagetable();
+     if ( !rpt )
+         return -ENOMEM;
+ 
+@@ -977,10 +983,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+ 
+     stack_base[0] = stack_start;
+ 
++    if ( opt_xpti < 0 )
++        opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
++
+     rc = setup_cpu_root_pgt(0);
+     if ( rc )
+         panic("Error %d setting up PV root page table\n", rc);
+-    get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
++    if ( per_cpu(root_pgt, 0) )
++        get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
+ 
+     set_nr_sockets();
+ 
+@@ -1048,6 +1058,7 @@ void __init smp_prepare_boot_cpu(void)
+     cpumask_set_cpu(smp_processor_id(), &cpu_present_map);
+ 
+     get_cpu_info()->xen_cr3 = 0;
++    get_cpu_info()->pv_cr3 = 0;
+ }
+ 
+ static void
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index d1afb3c..505604f 100644
+--- xen/arch/x86/x86_64/entry.S.orig
++++ xen/arch/x86/x86_64/entry.S
+@@ -45,6 +45,7 @@ restore_all_guest:
+         movabs $DIRECTMAP_VIRT_START, %rcx
+         mov   %rdi, %rax
+         and   %rsi, %rdi
++        jz    .Lrag_keep_cr3
+         and   %r9, %rsi
+         add   %rcx, %rdi
+         add   %rcx, %rsi
+@@ -61,6 +62,7 @@ restore_all_guest:
+         rep movsq
+         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
+         write_cr3 rax, rdi, rsi
++.Lrag_keep_cr3:
+ 
+         RESTORE_ALL
+         testw $TRAP_syscall,4(%rsp)
+-- 
+2.1.4
+



Home | Main Index | Thread Index | Old Index