pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/sysutils/xenkernel411



Module Name:    pkgsrc
Committed By:   bouyer
Date:           Thu Nov 12 11:29:25 UTC 2020

Modified Files:
        pkgsrc/sysutils/xenkernel411: Makefile distinfo
        pkgsrc/sysutils/xenkernel411/patches: patch-XSA286
Added Files:
        pkgsrc/sysutils/xenkernel411/patches: patch-XSA351

Log Message:
Update patch for XSA286 from upstream
Add upstream patch for XSA351
bump PKGREVISION


To generate a diff of this commit:
cvs rdiff -u -r1.17 -r1.18 pkgsrc/sysutils/xenkernel411/Makefile
cvs rdiff -u -r1.15 -r1.16 pkgsrc/sysutils/xenkernel411/distinfo
cvs rdiff -u -r1.1 -r1.2 pkgsrc/sysutils/xenkernel411/patches/patch-XSA286
cvs rdiff -u -r0 -r1.1 pkgsrc/sysutils/xenkernel411/patches/patch-XSA351

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/sysutils/xenkernel411/Makefile
diff -u pkgsrc/sysutils/xenkernel411/Makefile:1.17 pkgsrc/sysutils/xenkernel411/Makefile:1.18
--- pkgsrc/sysutils/xenkernel411/Makefile:1.17  Wed Oct 21 09:03:05 2020
+++ pkgsrc/sysutils/xenkernel411/Makefile       Thu Nov 12 11:29:25 2020
@@ -1,8 +1,8 @@
-# $NetBSD: Makefile,v 1.17 2020/10/21 09:03:05 bouyer Exp $
+# $NetBSD: Makefile,v 1.18 2020/11/12 11:29:25 bouyer Exp $
 
 VERSION=       4.11.4
 #keep >= 1 if we have security patches
-PKGREVISION=   3
+PKGREVISION=   4
 DISTNAME=      xen-${VERSION}
 PKGNAME=       xenkernel411-${VERSION}
 CATEGORIES=    sysutils

Index: pkgsrc/sysutils/xenkernel411/distinfo
diff -u pkgsrc/sysutils/xenkernel411/distinfo:1.15 pkgsrc/sysutils/xenkernel411/distinfo:1.16
--- pkgsrc/sysutils/xenkernel411/distinfo:1.15  Wed Oct 21 09:03:05 2020
+++ pkgsrc/sysutils/xenkernel411/distinfo       Thu Nov 12 11:29:25 2020
@@ -1,11 +1,11 @@
-$NetBSD: distinfo,v 1.15 2020/10/21 09:03:05 bouyer Exp $
+$NetBSD: distinfo,v 1.16 2020/11/12 11:29:25 bouyer Exp $
 
 SHA1 (xen411/xen-4.11.4.tar.gz) = 6c8cdf441621c14dc5345196b48df6982c060c4f
 RMD160 (xen411/xen-4.11.4.tar.gz) = 49819fcd1de3985d4dea370be962548c862f2933
 SHA512 (xen411/xen-4.11.4.tar.gz) = 8383f0b369fa08c8ecfdd68f902a2aaad140146a183131c50c020fe04c2f1e829c219b9bd9923fa8f1c180e1e7c6e73d0d68b7015fc39fd3b7f59e55c680cedb
 Size (xen411/xen-4.11.4.tar.gz) = 25184564 bytes
 SHA1 (patch-Config.mk) = 9372a09efd05c9fbdbc06f8121e411fcb7c7ba65
-SHA1 (patch-XSA286) = c7c5cc192be821721919cc035515ddf55d2c0658
+SHA1 (patch-XSA286) = de645acb85378b884e280be3dba8c5479334fbf8
 SHA1 (patch-XSA317) = 3a3e7bf8f115bebaf56001afcf68c2bd501c00a5
 SHA1 (patch-XSA319) = 4954bdc849666e1c735c3281256e4850c0594ee8
 SHA1 (patch-XSA320) = 38d84a2ded4ccacee455ba64eb3b369e5661fbfd
@@ -23,6 +23,7 @@ SHA1 (patch-XSA344) = cf7184ac9263b41830
 SHA1 (patch-XSA345) = 14ab754703af1045b2d049de1c6ba1c5baca5d81
 SHA1 (patch-XSA346) = c1962c037c5ab62c2f7e9a558c4565331c981be0
 SHA1 (patch-XSA347) = f3f98a794584d5d4321b95c2b1b9c88821fa567e
+SHA1 (patch-XSA351) = fca8d8c5c77ba8d6007d7643330be7f8835bbc5a
 SHA1 (patch-xen_Makefile) = 465388d80de414ca3bb84faefa0f52d817e423a6
 SHA1 (patch-xen_Rules.mk) = c743dc63f51fc280d529a7d9e08650292c171dac
 SHA1 (patch-xen_arch_x86_Rules.mk) = 0bedfc53a128a87b6a249ae04fbdf6a053bfb70b

Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA286
diff -u pkgsrc/sysutils/xenkernel411/patches/patch-XSA286:1.1 pkgsrc/sysutils/xenkernel411/patches/patch-XSA286:1.2
--- pkgsrc/sysutils/xenkernel411/patches/patch-XSA286:1.1       Wed Oct 21 09:03:05 2020
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA286   Thu Nov 12 11:29:25 2020
@@ -1,4 +1,4 @@
-$NetBSD: patch-XSA286,v 1.1 2020/10/21 09:03:05 bouyer Exp $
+$NetBSD: patch-XSA286,v 1.2 2020/11/12 11:29:25 bouyer Exp $
 
 From: Jan Beulich <jbeulich%suse.com@localhost>
 Subject: x86: don't allow clearing of TF_kernel_mode for other than 64-bit PV
@@ -776,3 +776,227 @@ index c1e92937c0..e72c277b9f 100644
  extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
  extern l2_pgentry_t  *compat_idle_pg_table_l2;
  extern unsigned int   m2p_compat_vstart;
+From 1d021db3c8712d25e25f078833baa160c90f260f Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Date: Thu, 22 Oct 2020 11:28:58 +0100
+Subject: [PATCH 1/2] x86/pv: Drop FLUSH_TLB_GLOBAL in do_mmu_update() for XPTI
+
+c/s 9d1d31ad9498 "x86: slightly reduce Meltdown band-aid overhead" removed the
+use of Global TLB flushes on the Xen entry path, but added a FLUSH_TLB_GLOBAL
+to the L4 path in do_mmu_update().
+
+However, this was unnecessary.
+
+It is the guests responsibility to perform appropriate TLB flushing if the L4
+modification altered an established mapping in a flush-relevant way.  In this
+case, an MMUEXT_OP hypercall will follow.  The case which Xen needs to cover
+is when new mappings are created, and the resync on the exit-to-guest path
+covers this correctly.
+
+There is a corner case with multiple vCPUs in hypercalls at the same time,
+which 9d1d31ad9498 changed, and this patch changes back to its original XPTI
+behaviour.
+
+Architecturally, established TLB entries can continue to be used until the
+broadcast flush has completed.  Therefore, even with concurrent hypercalls,
+the guest cannot depend on older mappings not being used until an MMUEXT_OP
+hypercall completes.  Xen's implementation of guest-initiated flushes will
+take correct effect on top of an in-progress hypercall, picking up new mapping
+setting before the other vCPU's MMUEXT_OP completes.
+
+Note: The correctness of this change is not impacted by whether XPTI uses
+global mappings or not.  Correctness there depends on the behaviour of Xen on
+the entry/exit paths when switching two/from the XPTI "shadow" pagetables.
+
+This is (not really) XSA-286 (but necessary to simplify the logic).
+
+Fixes: 9d1d31ad9498 ("x86: slightly reduce Meltdown band-aid overhead")
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+(cherry picked from commit 055e1c3a3d95b1e753148369fbc4ba48782dd602)
+---
+ xen/arch/x86/mm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 5ca5c8c9a2..129da1e648 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -4279,7 +4279,7 @@ long do_mmu_update(
+ 
+         cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu));
+         if ( !cpumask_empty(mask) )
+-            flush_mask(mask, FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
++            flush_mask(mask, FLUSH_ROOT_PGTBL);
+     }
+ 
+     perfc_add(num_page_updates, i);
+-- 
+2.20.1
+
+From e274c8bdc12eb596e55233040e8b49da27150f31 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Date: Mon, 19 Oct 2020 15:51:22 +0100
+Subject: [PATCH 2/2] x86/pv: Flush TLB in response to paging structure changes
+
+With MMU_UPDATE, a PV guest can make changes to higher level pagetables.  This
+is safe from Xen's point of view (as the update only affects guest mappings),
+and the guest is required to flush (if necessary) after making updates.
+
+However, Xen's use of linear pagetables (UPDATE_VA_MAPPING, GNTTABOP_map,
+writeable pagetables, etc.) is an implementation detail outside of the
+API/ABI.
+
+Changes in the paging structure require invalidations in the linear pagetable
+range for subsequent accesses into the linear pagetables to access non-stale
+mappings.  Xen must provide suitable flushing to prevent intermixed guest
+actions from accidentally accessing/modifying the wrong pagetable.
+
+For all L2 and higher modifications, flush the TLB.  PV guests cannot create
+L2 or higher entries with the Global bit set, so no mappings established in
+the linear range can be global.  (This could in principle be an order 39 flush
+starting at LINEAR_PT_VIRT_START, but no such mechanism exists in practice.)
+
+Express the necessary flushes as a set of booleans which accumulate across the
+operation.  Comment the flushing logic extensively.
+
+This is XSA-286.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+(cherry picked from commit 16a20963b3209788f2c0d3a3eebb7d92f03f5883)
+---
+ xen/arch/x86/mm.c | 69 ++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 59 insertions(+), 10 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 129da1e648..3528cf6b85 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -3983,7 +3983,8 @@ long do_mmu_update(
+     struct vcpu *curr = current, *v = curr;
+     struct domain *d = v->domain, *pt_owner = d, *pg_owner;
+     mfn_t map_mfn = INVALID_MFN;
+-    bool sync_guest = false;
++    bool flush_linear_pt = false, flush_root_pt_local = false,
++        flush_root_pt_others = false;
+     uint32_t xsm_needed = 0;
+     uint32_t xsm_checked = 0;
+     int rc = put_old_guest_table(curr);
+@@ -4133,6 +4134,8 @@ long do_mmu_update(
+                         break;
+                     rc = mod_l2_entry(va, l2e_from_intpte(req.val), mfn,
+                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++                    if ( !rc )
++                        flush_linear_pt = true;
+                     break;
+ 
+                 case PGT_l3_page_table:
+@@ -4140,6 +4143,8 @@ long do_mmu_update(
+                         break;
+                     rc = mod_l3_entry(va, l3e_from_intpte(req.val), mfn,
+                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++                    if ( !rc )
++                        flush_linear_pt = true;
+                     break;
+ 
+                 case PGT_l4_page_table:
+@@ -4147,6 +4152,8 @@ long do_mmu_update(
+                         break;
+                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++                    if ( !rc )
++                        flush_linear_pt = true;
+                     if ( !rc && pt_owner->arch.pv_domain.xpti )
+                     {
+                         bool local_in_use = false;
+@@ -4154,7 +4161,7 @@ long do_mmu_update(
+                         if ( pagetable_get_pfn(curr->arch.guest_table) == mfn )
+                         {
+                             local_in_use = true;
+-                            get_cpu_info()->root_pgt_changed = true;
++                            flush_root_pt_local = true;
+                         }
+ 
+                         /*
+@@ -4166,7 +4173,7 @@ long do_mmu_update(
+                              (1 + !!(page->u.inuse.type_info & PGT_pinned) +
+                               (pagetable_get_pfn(curr->arch.guest_table_user) ==
+                                mfn) + local_in_use) )
+-                            sync_guest = true;
++                            flush_root_pt_others = true;
+                     }
+                     break;
+ 
+@@ -4268,19 +4275,61 @@ long do_mmu_update(
+     if ( va )
+         unmap_domain_page(va);
+ 
+-    if ( sync_guest )
++    /*
++     * Perform required TLB maintenance.
++     *
++     * This logic currently depend on flush_linear_pt being a superset of the
++     * flush_root_pt_* conditions.
++     *
++     * pt_owner may not be current->domain.  This may occur during
++     * construction of 32bit PV guests, or debugging of PV guests.  The
++     * behaviour cannot be correct with domain unpaused.  We therefore expect
++     * pt_owner->dirty_cpumask to be empty, but it is a waste of effort to
++     * explicitly check for, and exclude, this corner case.
++     *
++     * flush_linear_pt requires a FLUSH_TLB to all dirty CPUs.  The flush must
++     * be performed now to maintain correct behaviour across a multicall.
++     * i.e. we cannot relax FLUSH_TLB to FLUSH_ROOT_PGTBL, given that the
++     * former is a side effect of the latter, because the resync (which is in
++     * the return-to-guest path) happens too late.
++     *
++     * flush_root_pt_* requires FLUSH_ROOT_PGTBL on either the local CPU
++     * (implies pt_owner == current->domain and current->processor set in
++     * pt_owner->dirty_cpumask), and/or all *other* dirty CPUs as there are
++     * references we can't account for locally.
++     */
++    if ( flush_linear_pt /* || flush_root_pt_local || flush_root_pt_others */ )
+     {
++        unsigned int cpu = smp_processor_id();
++        cpumask_t *mask = pt_owner->dirty_cpumask;
++
+         /*
+-         * Force other vCPU-s of the affected guest to pick up L4 entry
+-         * changes (if any).
++         * Always handle local flushing separately (if applicable), to
++         * separate the flush invocations appropriately for scope of the two
++         * flush_root_pt_* variables.
+          */
+-        unsigned int cpu = smp_processor_id();
+-        cpumask_t *mask = per_cpu(scratch_cpumask, cpu);
++        if ( likely(cpumask_test_cpu(cpu, mask)) )
++        {
++            mask = per_cpu(scratch_cpumask, cpu);
+ 
+-        cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu));
++            cpumask_copy(mask, pt_owner->dirty_cpumask);
++            __cpumask_clear_cpu(cpu, mask);
++
++            flush_local(FLUSH_TLB |
++                        (flush_root_pt_local ? FLUSH_ROOT_PGTBL : 0));
++        }
++        else
++            /* Sanity check.  flush_root_pt_local implies local cpu is dirty. */
++            ASSERT(!flush_root_pt_local);
++
++        /* Flush the remote dirty CPUs.  Does not include the local CPU. */
+         if ( !cpumask_empty(mask) )
+-            flush_mask(mask, FLUSH_ROOT_PGTBL);
++            flush_mask(mask, FLUSH_TLB |
++                       (flush_root_pt_others ? FLUSH_ROOT_PGTBL : 0));
+     }
++    else
++        /* Sanity check.  flush_root_pt_* implies flush_linear_pt. */
++        ASSERT(!flush_root_pt_local && !flush_root_pt_others);
+ 
+     perfc_add(num_page_updates, i);
+ 
+-- 
+2.20.1
+

Added files:

Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA351
diff -u /dev/null pkgsrc/sysutils/xenkernel411/patches/patch-XSA351:1.1
--- /dev/null   Thu Nov 12 11:29:25 2020
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA351   Thu Nov 12 11:29:25 2020
@@ -0,0 +1,283 @@
+$NetBSD: patch-XSA351,v 1.1 2020/11/12 11:29:25 bouyer Exp $
+
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau%citrix.com@localhost>
+Subject: x86/msr: fix handling of MSR_IA32_PERF_{STATUS/CTL}
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently a PV hardware domain can also be given control over the CPU
+frequency, and such guest is allowed to write to MSR_IA32_PERF_CTL.
+However since commit 322ec7c89f6 the default behavior has been changed
+to reject accesses to not explicitly handled MSRs, preventing PV
+guests that manage CPU frequency from reading
+MSR_IA32_PERF_{STATUS/CTL}.
+
+Additionally some HVM guests (Windows at least) will attempt to read
+MSR_IA32_PERF_CTL and will panic if given back a #GP fault:
+
+  vmx.c:3035:d8v0 RDMSR 0x00000199 unimplemented
+  d8v0 VIRIDIAN CRASH: 3b c0000096 fffff806871c1651 ffffda0253683720 0
+
+Move the handling of MSR_IA32_PERF_{STATUS/CTL} to the common MSR
+handling shared between HVM and PV guests, and add an explicit case
+for reads to MSR_IA32_PERF_{STATUS/CTL}.
+
+Restore previous behavior and allow PV guests with the required
+permissions to read the contents of the mentioned MSRs. Non privileged
+guests will get 0 when trying to read those registers, as writes to
+MSR_IA32_PERF_CTL by such guest will already be silently dropped.
+
+Fixes: 322ec7c89f6 ('x86/pv: disallow access to unknown MSRs')
+Fixes: 84e848fd7a1 ('x86/hvm: disallow access to unknown MSRs')
+Signed-off-by: Roger Pau Monné <roger.pau%citrix.com@localhost>
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Roger Pau Monné <roger.pau%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+(cherry picked from commit 3059178798a23ba870ff86ff54d442a07e6651fc)
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 256e58d82b..3495ac9f4a 100644
+--- xen/arch/x86/msr.c.orig
++++ xen/arch/x86/msr.c
+@@ -141,6 +141,7 @@ int init_vcpu_msr_policy(struct vcpu *v)
+ 
+ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+ {
++    const struct domain *d = v->domain;
+     const struct cpuid_policy *cp = v->domain->arch.cpuid;
+     const struct msr_domain_policy *dp = v->domain->arch.msr;
+     const struct msr_vcpu_policy *vp = v->arch.msr;
+@@ -212,6 +213,25 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+         break;
+ 
+         /*
++         * These MSRs are not enumerated in CPUID.  They have been around
++         * since the Pentium 4, and implemented by other vendors.
++         *
++         * Some versions of Windows try reading these before setting up a #GP
++         * handler, and Linux has several unguarded reads as well.  Provide
++         * RAZ semantics, in general, but permit a cpufreq controller dom0 to
++         * have full access.
++         */
++    case MSR_IA32_PERF_STATUS:
++    case MSR_IA32_PERF_CTL:
++        if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
++            goto gp_fault;
++
++        *val = 0;
++        if ( likely(!is_cpufreq_controller(d)) || rdmsr_safe(msr, *val) == 0 )
++            break;
++        goto gp_fault;
++
++        /*
+          * TODO: Implement when we have better topology representation.
+     case MSR_INTEL_CORE_THREAD_COUNT:
+          */
+@@ -241,6 +261,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+     case MSR_INTEL_CORE_THREAD_COUNT:
+     case MSR_INTEL_PLATFORM_INFO:
+     case MSR_ARCH_CAPABILITIES:
++    case MSR_IA32_PERF_STATUS:
+         /* Read-only */
+     case MSR_TSX_FORCE_ABORT:
+     case MSR_TSX_CTRL:
+@@ -345,6 +366,21 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+         break;
+     }
+ 
++        /*
++         * This MSR is not enumerated in CPUID.  It has been around since the
++         * Pentium 4, and implemented by other vendors.
++         *
++         * To match the RAZ semantics, implement as write-discard, except for
++         * a cpufreq controller dom0 which has full access.
++         */
++    case MSR_IA32_PERF_CTL:
++        if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
++            goto gp_fault;
++
++        if ( likely(!is_cpufreq_controller(d)) || wrmsr_safe(msr, val) == 0 )
++            break;
++        goto gp_fault;
++
+     default:
+         return X86EMUL_UNHANDLEABLE;
+     }
+diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
+index 8120ded330..755f00db33 100644
+--- xen/arch/x86/pv/emul-priv-op.c.orig
++++ xen/arch/x86/pv/emul-priv-op.c
+@@ -816,12 +816,6 @@ static inline uint64_t guest_misc_enable(uint64_t val)
+     return val;
+ }
+ 
+-static inline bool is_cpufreq_controller(const struct domain *d)
+-{
+-    return ((cpufreq_controller == FREQCTL_dom0_kernel) &&
+-            is_hardware_domain(d));
+-}
+-
+ static int read_msr(unsigned int reg, uint64_t *val,
+                     struct x86_emulate_ctxt *ctxt)
+ {
+@@ -1096,14 +1090,6 @@ static int write_msr(unsigned int reg, uint64_t val,
+             return X86EMUL_OKAY;
+         break;
+ 
+-    case MSR_IA32_PERF_CTL:
+-        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+-            break;
+-        if ( likely(!is_cpufreq_controller(currd)) ||
+-             wrmsr_safe(reg, val) == 0 )
+-            return X86EMUL_OKAY;
+-        break;
+-
+     case MSR_IA32_THERM_CONTROL:
+     case MSR_IA32_ENERGY_PERF_BIAS:
+         if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index c0cc5d9336..7e4ad5d51b 100644
+--- xen/include/xen/sched.h.orig
++++ xen/include/xen/sched.h
+@@ -920,6 +920,22 @@ extern enum cpufreq_controller {
+     FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
+ } cpufreq_controller;
+ 
++static always_inline bool is_cpufreq_controller(const struct domain *d)
++{
++    /*
++     * A PV dom0 can be nominated as the cpufreq controller, instead of using
++     * Xen's cpufreq driver, at which point dom0 gets direct access to certain
++     * MSRs.
++     *
++     * This interface only works when dom0 is identity pinned and has the same
++     * number of vCPUs as pCPUs on the system.
++     *
++     * It would be far better to paravirtualise the interface.
++     */
++    return (is_pv_domain(d) && is_hardware_domain(d) &&
++            cpufreq_controller == FREQCTL_dom0_kernel);
++}
++
+ #define CPUPOOLID_NONE    -1
+ 
+ struct cpupool *cpupool_get_by_id(int poolid);
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Subject: x86/msr: Disallow guest access to the RAPL MSRs
+
+Researchers have demonstrated using the RAPL interface to perform a
+differential power analysis attack to recover AES keys used by other cores in
+the system.
+
+Furthermore, even privileged guests cannot use this interface correctly, due
+to MSR scope and vcpu scheduling issues.  The interface would want to be
+paravirtualised to be used sensibly.
+
+Disallow access to the RAPL MSRs completely, as well as other MSRs which
+potentially access fine grain power information.
+
+This is part of XSA-351.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 3495ac9f4a..99c848ff41 100644
+--- xen/arch/x86/msr.c.orig
++++ xen/arch/x86/msr.c
+@@ -156,6 +156,15 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+     case MSR_TSX_FORCE_ABORT:
+     case MSR_TSX_CTRL:
+     case MSR_MCU_OPT_CTRL:
++    case MSR_RAPL_POWER_UNIT:
++    case MSR_PKG_POWER_LIMIT  ... MSR_PKG_POWER_INFO:
++    case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO:
++    case MSR_PP0_POWER_LIMIT  ... MSR_PP0_POLICY:
++    case MSR_PP1_POWER_LIMIT  ... MSR_PP1_POLICY:
++    case MSR_PLATFORM_ENERGY_COUNTER:
++    case MSR_PLATFORM_POWER_LIMIT:
++    case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER:
++    case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS:
+         /* Not offered to guests. */
+         goto gp_fault;
+ 
+@@ -266,6 +275,15 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+     case MSR_TSX_FORCE_ABORT:
+     case MSR_TSX_CTRL:
+     case MSR_MCU_OPT_CTRL:
++    case MSR_RAPL_POWER_UNIT:
++    case MSR_PKG_POWER_LIMIT  ... MSR_PKG_POWER_INFO:
++    case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO:
++    case MSR_PP0_POWER_LIMIT  ... MSR_PP0_POLICY:
++    case MSR_PP1_POWER_LIMIT  ... MSR_PP1_POLICY:
++    case MSR_PLATFORM_ENERGY_COUNTER:
++    case MSR_PLATFORM_POWER_LIMIT:
++    case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER:
++    case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS:
+         /* Not offered to guests. */
+         goto gp_fault;
+ 
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 480d1d8102..a685dcdcca 100644
+--- xen/include/asm-x86/msr-index.h.orig
++++ xen/include/asm-x86/msr-index.h
+@@ -96,6 +96,38 @@
+ /* Lower 6 bits define the format of the address in the LBR stack */
+ #define MSR_IA32_PERF_CAP_LBR_FORMAT  0x3f
+ 
++/*
++ * Intel Runtime Average Power Limiting (RAPL) interface.  Power plane base
++ * addresses (MSR_*_POWER_LIMIT) are model specific, but have so-far been
++ * consistent since their introduction in SandyBridge.
++ *
++ * Offsets of functionality from the power plane base is architectural, but
++ * not all power planes support all functionality.
++ */
++#define MSR_RAPL_POWER_UNIT           0x00000606
++
++#define MSR_PKG_POWER_LIMIT           0x00000610
++#define MSR_PKG_ENERGY_STATUS         0x00000611
++#define MSR_PKG_PERF_STATUS           0x00000613
++#define MSR_PKG_POWER_INFO            0x00000614
++
++#define MSR_DRAM_POWER_LIMIT          0x00000618
++#define MSR_DRAM_ENERGY_STATUS                0x00000619
++#define MSR_DRAM_PERF_STATUS          0x0000061b
++#define MSR_DRAM_POWER_INFO           0x0000061c
++
++#define MSR_PP0_POWER_LIMIT           0x00000638
++#define MSR_PP0_ENERGY_STATUS         0x00000639
++#define MSR_PP0_POLICY                        0x0000063a
++
++#define MSR_PP1_POWER_LIMIT           0x00000640
++#define MSR_PP1_ENERGY_STATUS         0x00000641
++#define MSR_PP1_POLICY                        0x00000642
++
++/* Intel Platform-wide power interface. */
++#define MSR_PLATFORM_ENERGY_COUNTER   0x0000064d
++#define MSR_PLATFORM_POWER_LIMIT      0x0000065c
++
+ #define MSR_IA32_BNDCFGS              0x00000d90
+ #define IA32_BNDCFGS_ENABLE           0x00000001
+ #define IA32_BNDCFGS_PRESERVE         0x00000002
+@@ -218,6 +250,8 @@
+ #define MSR_K8_VM_CR                  0xc0010114
+ #define MSR_K8_VM_HSAVE_PA            0xc0010117
+ 
++#define MSR_F15H_CU_POWER             0xc001007a
++#define MSR_F15H_CU_MAX_POWER         0xc001007b
+ #define MSR_AMD_FAM15H_EVNTSEL0               0xc0010200
+ #define MSR_AMD_FAM15H_PERFCTR0               0xc0010201
+ #define MSR_AMD_FAM15H_EVNTSEL1               0xc0010202
+@@ -231,6 +265,10 @@
+ #define MSR_AMD_FAM15H_EVNTSEL5               0xc001020a
+ #define MSR_AMD_FAM15H_PERFCTR5               0xc001020b
+ 
++#define MSR_AMD_RAPL_POWER_UNIT               0xc0010299
++#define MSR_AMD_CORE_ENERGY_STATUS    0xc001029a
++#define MSR_AMD_PKG_ENERGY_STATUS     0xc001029b
++
+ #define MSR_AMD_L7S0_FEATURE_MASK     0xc0011002
+ #define MSR_AMD_THRM_FEATURE_MASK     0xc0011003
+ #define MSR_K8_FEATURE_MASK           0xc0011004



Home | Main Index | Thread Index | Old Index