pkgsrc-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
CVS commit: pkgsrc/sysutils
Module Name: pkgsrc
Committed By: bouyer
Date: Fri Dec 13 13:44:21 UTC 2019
Modified Files:
pkgsrc/sysutils/xenkernel411: Makefile distinfo
pkgsrc/sysutils/xentools411: Makefile distinfo
Added Files:
pkgsrc/sysutils/xenkernel411/patches: patch-XSA307 patch-XSA308
patch-XSA309 patch-XSA310 patch-XSA311
Removed Files:
pkgsrc/sysutils/xenkernel411/patches: patch-XSA298 patch-XSA299
patch-XSA302 patch-XSA304 patch-XSA305 patch-XSA306
Log Message:
Update xenkernel411 to 4.11.3nb1, and xentools411 to 4.11.3
(PKGREVISION not reset on xenkernel411 on purpose, to enphasis that it's
not a stock Xen 4.11.3 kernel).
Changes since 4.11.2:
- includes all security patches up to XSA306
- other minor bug fixes, hardware support and performances improvements
In addition, xenkernel411 includes all security patches released since 4.11.3,
up to XSA311
To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 pkgsrc/sysutils/xenkernel411/Makefile
cvs rdiff -u -r1.8 -r1.9 pkgsrc/sysutils/xenkernel411/distinfo
cvs rdiff -u -r1.2 -r0 pkgsrc/sysutils/xenkernel411/patches/patch-XSA298 \
pkgsrc/sysutils/xenkernel411/patches/patch-XSA302 \
pkgsrc/sysutils/xenkernel411/patches/patch-XSA304 \
pkgsrc/sysutils/xenkernel411/patches/patch-XSA305
cvs rdiff -u -r1.1 -r0 pkgsrc/sysutils/xenkernel411/patches/patch-XSA299 \
pkgsrc/sysutils/xenkernel411/patches/patch-XSA306
cvs rdiff -u -r0 -r1.1 pkgsrc/sysutils/xenkernel411/patches/patch-XSA307 \
pkgsrc/sysutils/xenkernel411/patches/patch-XSA308 \
pkgsrc/sysutils/xenkernel411/patches/patch-XSA309 \
pkgsrc/sysutils/xenkernel411/patches/patch-XSA310 \
pkgsrc/sysutils/xenkernel411/patches/patch-XSA311
cvs rdiff -u -r1.11 -r1.12 pkgsrc/sysutils/xentools411/Makefile
cvs rdiff -u -r1.7 -r1.8 pkgsrc/sysutils/xentools411/distinfo
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: pkgsrc/sysutils/xenkernel411/Makefile
diff -u pkgsrc/sysutils/xenkernel411/Makefile:1.11 pkgsrc/sysutils/xenkernel411/Makefile:1.12
--- pkgsrc/sysutils/xenkernel411/Makefile:1.11 Fri Dec 6 17:30:28 2019
+++ pkgsrc/sysutils/xenkernel411/Makefile Fri Dec 13 13:44:21 2019
@@ -1,7 +1,7 @@
-# $NetBSD: Makefile,v 1.11 2019/12/06 17:30:28 bouyer Exp $
+# $NetBSD: Makefile,v 1.12 2019/12/13 13:44:21 bouyer Exp $
-VERSION= 4.11.2
-PKGREVISION= 3
+VERSION= 4.11.3
+PKGREVISION= 1
DISTNAME= xen-${VERSION}
PKGNAME= xenkernel411-${VERSION}
CATEGORIES= sysutils
Index: pkgsrc/sysutils/xenkernel411/distinfo
diff -u pkgsrc/sysutils/xenkernel411/distinfo:1.8 pkgsrc/sysutils/xenkernel411/distinfo:1.9
--- pkgsrc/sysutils/xenkernel411/distinfo:1.8 Fri Dec 6 17:30:28 2019
+++ pkgsrc/sysutils/xenkernel411/distinfo Fri Dec 13 13:44:21 2019
@@ -1,16 +1,15 @@
-$NetBSD: distinfo,v 1.8 2019/12/06 17:30:28 bouyer Exp $
+$NetBSD: distinfo,v 1.9 2019/12/13 13:44:21 bouyer Exp $
-SHA1 (xen411/xen-4.11.2.tar.gz) = 82766db0eca7ce65962732af8a31bb5cce1eb7ce
-RMD160 (xen411/xen-4.11.2.tar.gz) = 6dcb1ac3e72381474912607b30b59fa55d87d38b
-SHA512 (xen411/xen-4.11.2.tar.gz) = 48d3d926d35eb56c79c06d0abc6e6be2564fadb43367cc7f46881c669a75016707672179c2cca1c4cfb14af2cefd46e2e7f99470cddf7df2886d8435a2de814e
-Size (xen411/xen-4.11.2.tar.gz) = 25164925 bytes
+SHA1 (xen411/xen-4.11.3.tar.gz) = 2d77152168d6f9dcea50db9cb8e3e6a0720a4a1b
+RMD160 (xen411/xen-4.11.3.tar.gz) = cfb2e699842867b60d25a01963c564a6c5e580da
+SHA512 (xen411/xen-4.11.3.tar.gz) = 2204e490e9fc357a05983a9bf4e7345e1d364fe00400ce473988dcb9ca7d4e2b921fe10f095cbbc64248130a92d22c6f0d154dcae250a57a7f915df32e3dc436
+Size (xen411/xen-4.11.3.tar.gz) = 25180826 bytes
SHA1 (patch-Config.mk) = 9372a09efd05c9fbdbc06f8121e411fcb7c7ba65
-SHA1 (patch-XSA298) = 63e0f96ce3b945b16b98b51b423bafec14cf2be6
-SHA1 (patch-XSA299) = beb7ba1a8f9e0adda161c0da725ff053e674067e
-SHA1 (patch-XSA302) = 12fbb7dfea27f53c70c8115487a2e30595549c2b
-SHA1 (patch-XSA304) = f2c22732227e11a3e77c630f0264a689eed53399
-SHA1 (patch-XSA305) = eb5e0096cbf501fcbd7a5c5f9d1f932b557636b6
-SHA1 (patch-XSA306) = f57201b2ae5f6435ce6ba3c6aac3e9e10cdba3fb
+SHA1 (patch-XSA307) = afd88b8294b0dbbc32e1d1aa74eb887d2da6695a
+SHA1 (patch-XSA308) = bda9ef732e0b6578ce8f7f0f7aa0a4189da41e86
+SHA1 (patch-XSA309) = 78cf7306e9d1efcbf2ebf425025d46948ae83019
+SHA1 (patch-XSA310) = 77b711f4b75de1d473a6988eb6f2b48e37cc353a
+SHA1 (patch-XSA311) = 4d3e6cc39c2b95cb3339961271df2bc885667927
SHA1 (patch-xen_Makefile) = 465388d80de414ca3bb84faefa0f52d817e423a6
SHA1 (patch-xen_Rules.mk) = c743dc63f51fc280d529a7d9e08650292c171dac
SHA1 (patch-xen_arch_x86_Rules.mk) = 0bedfc53a128a87b6a249ae04fbdf6a053bfb70b
Index: pkgsrc/sysutils/xentools411/Makefile
diff -u pkgsrc/sysutils/xentools411/Makefile:1.11 pkgsrc/sysutils/xentools411/Makefile:1.12
--- pkgsrc/sysutils/xentools411/Makefile:1.11 Mon Nov 4 21:28:58 2019
+++ pkgsrc/sysutils/xentools411/Makefile Fri Dec 13 13:44:21 2019
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.11 2019/11/04 21:28:58 rillig Exp $
+# $NetBSD: Makefile,v 1.12 2019/12/13 13:44:21 bouyer Exp $
#
-VERSION= 4.11.2
+VERSION= 4.11.3
VERSION_IPXE= 356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d
DIST_IPXE= ipxe-git-${VERSION_IPXE}.tar.gz
Index: pkgsrc/sysutils/xentools411/distinfo
diff -u pkgsrc/sysutils/xentools411/distinfo:1.7 pkgsrc/sysutils/xentools411/distinfo:1.8
--- pkgsrc/sysutils/xentools411/distinfo:1.7 Sun Nov 3 10:07:16 2019
+++ pkgsrc/sysutils/xentools411/distinfo Fri Dec 13 13:44:21 2019
@@ -1,13 +1,13 @@
-$NetBSD: distinfo,v 1.7 2019/11/03 10:07:16 maya Exp $
+$NetBSD: distinfo,v 1.8 2019/12/13 13:44:21 bouyer Exp $
SHA1 (xen411/ipxe-git-356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d.tar.gz) = 272b8c904dc0127690eca2c5c20c67479e40da34
RMD160 (xen411/ipxe-git-356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d.tar.gz) = cfcb4a314c15da19b36132b27126f3bd9699d0e5
SHA512 (xen411/ipxe-git-356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d.tar.gz) =
bbcce5e55040e7e29adebd4a5253a046016a6e2e7ff34cf801a42d147e1ec1af57e0297318249bfa9c5bbeac969fe4b37c18cbf845a80b2136d65387a4fc31da
Size (xen411/ipxe-git-356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d.tar.gz) = 3732065 bytes
-SHA1 (xen411/xen-4.11.2.tar.gz) = 82766db0eca7ce65962732af8a31bb5cce1eb7ce
-RMD160 (xen411/xen-4.11.2.tar.gz) = 6dcb1ac3e72381474912607b30b59fa55d87d38b
-SHA512 (xen411/xen-4.11.2.tar.gz) = 48d3d926d35eb56c79c06d0abc6e6be2564fadb43367cc7f46881c669a75016707672179c2cca1c4cfb14af2cefd46e2e7f99470cddf7df2886d8435a2de814e
-Size (xen411/xen-4.11.2.tar.gz) = 25164925 bytes
+SHA1 (xen411/xen-4.11.3.tar.gz) = 2d77152168d6f9dcea50db9cb8e3e6a0720a4a1b
+RMD160 (xen411/xen-4.11.3.tar.gz) = cfb2e699842867b60d25a01963c564a6c5e580da
+SHA512 (xen411/xen-4.11.3.tar.gz) = 2204e490e9fc357a05983a9bf4e7345e1d364fe00400ce473988dcb9ca7d4e2b921fe10f095cbbc64248130a92d22c6f0d154dcae250a57a7f915df32e3dc436
+Size (xen411/xen-4.11.3.tar.gz) = 25180826 bytes
SHA1 (patch-.._ipxe_src_core_settings.c) = 1eab2fbd8b22dde2b8aa830ae7701603486f74e4
SHA1 (patch-.._ipxe_src_net_fcels.c) = eda41b25c3d5f5bef33caa9a6af28c40cb91e66b
SHA1 (patch-Config.mk) = c41005a60de2f94a72b0206030eb021c137653d3
Added files:
Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA307
diff -u /dev/null pkgsrc/sysutils/xenkernel411/patches/patch-XSA307:1.1
--- /dev/null Fri Dec 13 13:44:21 2019
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA307 Fri Dec 13 13:44:21 2019
@@ -0,0 +1,101 @@
+$NetBSD: patch-XSA307,v 1.1 2019/12/13 13:44:21 bouyer Exp $
+
+From: Jan Beulich <jbeulich%suse.com@localhost>
+Subject: x86+Arm32: make find_next_{,zero_}bit() have well defined behavior
+
+These functions getting used with the 2nd and 3rd arguments being equal
+wasn't well defined: Arm64 reliably returns the value of the 2nd
+argument in this case, while on x86 for bitmaps up to 64 bits wide the
+return value was undefined (due to the undefined behavior of a shift of
+a value by the number of bits it's wide) when the incoming value was 64.
+On Arm32 an actual out of bounds access would happen when the
+size/offset value is a multiple of 32; if this access doesn't fault, the
+return value would have been sufficiently correct afaict.
+
+Make the functions consistently tolerate the last two arguments being
+equal (and in fact the 3rd argument being greater or equal to the 2nd),
+in favor of finding and fixing all the use sites that violate the
+original more strict assumption.
+
+This is XSA-307.
+
+Signed-off-by: Jan Beulich <jbeulich%suse.com@localhost>
+Acked-by: Julien Grall <julien%xen.org@localhost>
+---
+The most obvious (albeit still indirect) exposure to guests is
+evtchn_check_pollers(), which imo makes this a security issue at least
+for Arm32.
+
+This was originally already discussed between (at least) Andrew and me,
+and I don't really recall who brought up the issue first.
+
+Note that Arm's Linux origin of the code may call for syncing
+publication with them. Then again I don't want to tell them just to see
+them go public ahead of us.
+
+--- xen/arch/arm/arm32/lib/findbit.S.orig
++++ xen/arch/arm/arm32/lib/findbit.S
+@@ -42,8 +42,8 @@ ENDPROC(_find_first_zero_bit_le)
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ ENTRY(_find_next_zero_bit_le)
+- teq r1, #0
+- beq 3b
++ cmp r1, r2
++ bls 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+@@ -83,8 +83,8 @@ ENDPROC(_find_first_bit_le)
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ ENTRY(_find_next_bit_le)
+- teq r1, #0
+- beq 3b
++ cmp r1, r2
++ bls 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+@@ -117,8 +117,8 @@ ENTRY(_find_first_zero_bit_be)
+ ENDPROC(_find_first_zero_bit_be)
+
+ ENTRY(_find_next_zero_bit_be)
+- teq r1, #0
+- beq 3b
++ cmp r1, r2
++ bls 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ eor r3, r2, #0x18 @ big endian byte ordering
+@@ -151,8 +151,8 @@ ENTRY(_find_first_bit_be)
+ ENDPROC(_find_first_bit_be)
+
+ ENTRY(_find_next_bit_be)
+- teq r1, #0
+- beq 3b
++ cmp r1, r2
++ bls 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ eor r3, r2, #0x18 @ big endian byte ordering
+--- xen/include/asm-x86/bitops.h.orig
++++ xen/include/asm-x86/bitops.h
+@@ -358,7 +358,7 @@ static always_inline unsigned int __scan
+ const unsigned long *a__ = (addr); \
+ unsigned int s__ = (size); \
+ unsigned int o__ = (off); \
+- if ( __builtin_constant_p(size) && !s__ ) \
++ if ( o__ >= s__ ) \
+ r__ = s__; \
+ else if ( __builtin_constant_p(size) && s__ <= BITS_PER_LONG ) \
+ r__ = o__ + __scanbit(*(const unsigned long *)(a__) >> o__, s__); \
+@@ -390,7 +390,7 @@ static always_inline unsigned int __scan
+ const unsigned long *a__ = (addr); \
+ unsigned int s__ = (size); \
+ unsigned int o__ = (off); \
+- if ( __builtin_constant_p(size) && !s__ ) \
++ if ( o__ >= s__ ) \
+ r__ = s__; \
+ else if ( __builtin_constant_p(size) && s__ <= BITS_PER_LONG ) \
+ r__ = o__ + __scanbit(~*(const unsigned long *)(a__) >> o__, s__); \
Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA308
diff -u /dev/null pkgsrc/sysutils/xenkernel411/patches/patch-XSA308:1.1
--- /dev/null Fri Dec 13 13:44:21 2019
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA308 Fri Dec 13 13:44:21 2019
@@ -0,0 +1,76 @@
+$NetBSD: patch-XSA308,v 1.1 2019/12/13 13:44:21 bouyer Exp $
+
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Subject: x86/vtx: Work around SingleStep + STI/MovSS VMEntry failures
+
+See patch comment for technical details.
+
+Concerning the timeline, this was first discovered in the aftermath of
+XSA-156 which caused #DB to be intercepted unconditionally, but only in
+its SingleStep + STI form which is restricted to privileged software.
+
+After working with Intel and identifying the problematic vmentry check,
+this workaround was suggested, and the patch was posted in an RFC
+series. Outstanding work for that series (not breaking Introspection)
+is still pending, and this fix from it (which wouldn't have been good
+enough in its original form) wasn't committed.
+
+A vmentry failure was reported to xen-devel, and debugging identified
+this bug in its SingleStep + MovSS form by way of INT1, which does not
+involve the use of any privileged instructions, and proving this to be a
+security issue.
+
+This is XSA-308
+
+Reported-by: Håkon Alstadheim <hakon%alstadheim.priv.no@localhost>
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+Acked-by: Kevin Tian <kevin.tian%intel.com@localhost>
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index 6a5eeb5c13..59b836f43f 100644
+--- xen/arch/x86/hvm/vmx/vmx.c.orig
++++ xen/arch/x86/hvm/vmx/vmx.c
+@@ -3816,6 +3816,42 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
+ HVMTRACE_1D(TRAP_DEBUG, exit_qualification);
+ __restore_debug_registers(v);
+ write_debugreg(6, exit_qualification | DR_STATUS_RESERVED_ONE);
++
++ /*
++ * Work around SingleStep + STI/MovSS VMEntry failures.
++ *
++ * We intercept #DB unconditionally to work around CVE-2015-8104 /
++ * XSA-156 (guest-kernel induced host DoS).
++ *
++ * STI/MovSS shadows block/defer interrupts/exceptions (exact
++ * details are complicated and poorly documented). Debug
++ * exceptions delayed for any reason are stored in the
++ * PENDING_DBG_EXCEPTIONS field.
++ *
++ * The falling edge of PENDING_DBG causes #DB to be delivered,
++ * resulting in a VMExit, as #DB is intercepted. The VMCS still
++ * reports blocked-by-STI/MovSS.
++ *
++ * The VMEntry checks when EFLAGS.TF is set don't like a VMCS in
++ * this state. Despite a #DB queued in VMENTRY_INTR_INFO, the
++ * state is rejected as DR6.BS isn't pending. Fix this up.
++ */
++ if ( unlikely(regs->eflags & X86_EFLAGS_TF) )
++ {
++ unsigned long int_info;
++
++ __vmread(GUEST_INTERRUPTIBILITY_INFO, &int_info);
++
++ if ( int_info & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
++ {
++ unsigned long pending_dbg;
++
++ __vmread(GUEST_PENDING_DBG_EXCEPTIONS, &pending_dbg);
++ __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS,
++ pending_dbg | DR_STEP);
++ }
++ }
++
+ if ( !v->domain->debugger_attached )
+ {
+ unsigned long insn_len = 0;
Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA309
diff -u /dev/null pkgsrc/sysutils/xenkernel411/patches/patch-XSA309:1.1
--- /dev/null Fri Dec 13 13:44:21 2019
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA309 Fri Dec 13 13:44:21 2019
@@ -0,0 +1,60 @@
+$NetBSD: patch-XSA309,v 1.1 2019/12/13 13:44:21 bouyer Exp $
+
+From 523e3974ed2213719a19218f5b246e382ceef18a Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap%citrix.com@localhost>
+Date: Wed, 30 Oct 2019 17:05:28 +0000
+Subject: [PATCH] x86/mm: Don't reset linear_pt_count on partial validation
+
+"Linear pagetables" is a technique which involves either pointing a
+pagetable at itself, or to another pagetable the same or higher level.
+Xen has limited support for linear pagetables: A page may either point
+to itself, or point to another page of the same level (i.e., L2 to L2,
+L3 to L3, and so on).
+
+XSA-240 introduced an additional restriction that limited the "depth"
+of such chains by allowing pages to either *point to* other pages of
+the same level, or *be pointed to* by other pages of the same level,
+but not both. To implement this, we keep track of the number of
+outstanding times a page points to or is pointed to another page
+table, to prevent both from happening at the same time.
+
+Unfortunately, the original commit introducing this reset this count
+when resuming validation of a partially-validated pagetable, dropping
+some "linear_pt_entry" counts.
+
+On debug builds on systems where guests used this feature, this might
+lead to crashes that look like this:
+
+ Assertion 'oc > 0' failed at mm.c:874
+
+Worse, if an attacker could engineer such a situation to occur, they
+might be able to make loops or other abitrary chains of linear
+pagetables, leading to the denial-of-service situation outlined in
+XSA-240.
+
+This is XSA-309.
+
+Reported-by: Manuel Bouyer <bouyer%antioche.eu.org@localhost>
+Signed-off-by: George Dunlap <george.dunlap%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+---
+ xen/arch/x86/mm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 7d4dd80a85..01393fb0da 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -3059,8 +3059,8 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ {
+ page->nr_validated_ptes = 0;
+ page->partial_flags = 0;
++ page->linear_pt_count = 0;
+ }
+- page->linear_pt_count = 0;
+ rc = alloc_page_type(page, type, preemptible);
+ }
+
+--
+2.24.0
+
Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA310
diff -u /dev/null pkgsrc/sysutils/xenkernel411/patches/patch-XSA310:1.1
--- /dev/null Fri Dec 13 13:44:21 2019
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA310 Fri Dec 13 13:44:21 2019
@@ -0,0 +1,348 @@
+$NetBSD: patch-XSA310,v 1.1 2019/12/13 13:44:21 bouyer Exp $
+
+From 7c537dc8d28a03064a14171ed5c6fc329531816a Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap%citrix.com@localhost>
+Date: Tue, 19 Nov 2019 11:40:34 +0000
+Subject: [PATCH 1/3] x86/mm: Set old_guest_table when destroying vcpu
+ pagetables
+
+Changeset 6c4efc1eba ("x86/mm: Don't drop a type ref unless you held a
+ref to begin with"), part of XSA-299, changed the calling discipline
+of put_page_type() such that if put_page_type() returned -ERESTART
+(indicating a partially de-validated page), subsequent calls to
+put_page_type() must be called with PTF_partial_set. If called on a
+partially de-validated page but without PTF_partial_set, Xen will
+BUG(), because to do otherwise would risk opening up the kind of
+privilege escalation bug described in XSA-299.
+
+One place this was missed was in vcpu_destroy_pagetables().
+put_page_and_type_preemptible() is called, but on -ERESTART, the
+entire operation is simply restarted, causing put_page_type() to be
+called on a partially de-validated page without PTF_partial_set. The
+result was that if such an operation were interrupted, Xen would hit a
+BUG().
+
+Fix this by having vcpu_destroy_pagetables() consistently pass off
+interrupted de-validations to put_old_page_type():
+- Unconditionally clear references to the page, even if
+ put_page_and_type failed
+- Set old_guest_table and old_guest_table_partial appropriately
+
+While here, do some refactoring:
+
+ - Move clearing of arch.cr3 to the top of the function
+
+ - Now that clearing is unconditional, move the unmap to the same
+ conditional as the l4tab mapping. This also allows us to reduce
+ the scope of the l4tab variable.
+
+ - Avoid code duplication by looping to drop references on
+ guest_table_user
+
+This is part of XSA-310.
+
+Reported-by: Sarah Newman <srn%prgmr.com@localhost>
+Signed-off-by: George Dunlap <george.dunlap%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+---
+Added in v2.
+
+Changes in v3:
+- Minor comment / whitespace fixes
+---
+ xen/arch/x86/mm.c | 75 +++++++++++++++++++++++++++++------------------
+ 1 file changed, 47 insertions(+), 28 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 01393fb0da..a759afc9e3 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -3142,40 +3142,36 @@ int put_old_guest_table(struct vcpu *v)
+ int vcpu_destroy_pagetables(struct vcpu *v)
+ {
+ unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
+- struct page_info *page;
+- l4_pgentry_t *l4tab = NULL;
++ struct page_info *page = NULL;
+ int rc = put_old_guest_table(v);
++ bool put_guest_table_user = false;
+
+ if ( rc )
+ return rc;
+
++ v->arch.cr3 = 0;
++
++ /*
++ * Get the top-level guest page; either the guest_table itself, for
++ * 64-bit, or the top-level l4 entry for 32-bit. Either way, remove
++ * the reference to that page.
++ */
+ if ( is_pv_32bit_vcpu(v) )
+ {
+- l4tab = map_domain_page(_mfn(mfn));
+- mfn = l4e_get_pfn(*l4tab);
+- }
++ l4_pgentry_t *l4tab = map_domain_page(_mfn(mfn));
+
+- if ( mfn )
+- {
+- page = mfn_to_page(_mfn(mfn));
+- if ( paging_mode_refcounts(v->domain) )
+- put_page(page);
+- else
+- rc = put_page_and_type_preemptible(page);
+- }
+-
+- if ( l4tab )
+- {
+- if ( !rc )
+- l4e_write(l4tab, l4e_empty());
++ mfn = l4e_get_pfn(*l4tab);
++ l4e_write(l4tab, l4e_empty());
+ unmap_domain_page(l4tab);
+ }
+- else if ( !rc )
++ else
+ {
+ v->arch.guest_table = pagetable_null();
++ put_guest_table_user = true;
++ }
+
+- /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
+- mfn = pagetable_get_pfn(v->arch.guest_table_user);
++ /* Free that page if non-zero */
++ do {
+ if ( mfn )
+ {
+ page = mfn_to_page(_mfn(mfn));
+@@ -3183,18 +3179,41 @@ int vcpu_destroy_pagetables(struct vcpu *v)
+ put_page(page);
+ else
+ rc = put_page_and_type_preemptible(page);
++ mfn = 0;
+ }
+- if ( !rc )
+- v->arch.guest_table_user = pagetable_null();
+- }
+
+- v->arch.cr3 = 0;
++ if ( !rc && put_guest_table_user )
++ {
++ /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
++ mfn = pagetable_get_pfn(v->arch.guest_table_user);
++ v->arch.guest_table_user = pagetable_null();
++ put_guest_table_user = false;
++ }
++ } while ( mfn );
+
+ /*
+- * put_page_and_type_preemptible() is liable to return -EINTR. The
+- * callers of us expect -ERESTART so convert it over.
++ * If a "put" operation was interrupted, finish things off in
++ * put_old_guest_table() when the operation is restarted.
+ */
+- return rc != -EINTR ? rc : -ERESTART;
++ switch ( rc )
++ {
++ case -EINTR:
++ case -ERESTART:
++ v->arch.old_guest_ptpg = NULL;
++ v->arch.old_guest_table = page;
++ v->arch.old_guest_table_partial = (rc == -ERESTART);
++ rc = -ERESTART;
++ break;
++ default:
++ /*
++ * Failure to 'put' a page may cause it to leak, but that's
++ * less bad than a crash.
++ */
++ ASSERT(rc == 0);
++ break;
++ }
++
++ return rc;
+ }
+
+ int new_guest_cr3(mfn_t mfn)
+--
+2.24.0
+
+From 128cb126aee9b4a2855ab898fdfbfe7009fbf1f5 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap%citrix.com@localhost>
+Date: Thu, 31 Oct 2019 11:17:38 +0000
+Subject: [PATCH 2/3] x86/mm: alloc/free_lN_table: Retain partial_flags on
+ -EINTR
+
+When validating or de-validating pages (in alloc_lN_table and
+free_lN_table respectively), the `partial_flags` local variable is
+used to keep track of whether the "current" PTE started the entire
+operation in a "may be partial" state.
+
+One of the patches in XSA-299 addressed the fact that it is possible
+for a previously-partially-validated entry to subsequently be found to
+have invalid entries (indicated by returning -EINVAL); in which case
+page->partial_flags needs to be set to indicate that the current PTE
+may have the partial bit set (and thus _put_page_type() should be
+called with PTF_partial_set).
+
+Unfortunately, the patches in XSA-299 assumed that once
+put_page_from_lNe() returned -ERESTART on a page, it was not possible
+for it to return -EINTR. This turns out to be true for
+alloc_lN_table() and free_lN_table, but not for _get_page_type() and
+_put_page_type(): both can return -EINTR when called on pages with
+PGT_partial set. In these cases, the pages PGT_partial will still be
+set; failing to set partial_flags appropriately may allow an attacker
+to do a privilege escalation similar to those described in XSA-299.
+
+Fix this by always copying the local partial_flags variable into
+page->partial_flags when exiting early.
+
+NB that on the "get" side, no adjustment to nr_validated_entries is
+needed: whether pte[i] is partially validated or entirely
+un-validated, we want nr_validated_entries = i. On the "put" side,
+however, we need to adjust nr_validated_entries appropriately: if
+pte[i] is entirely validated, we want nr_validated_entries = i + 1; if
+pte[i] is partially validated, we want nr_validated_entries = i.
+
+This is part of XSA-310.
+
+Reported-by: Sarah Newman <srn%prgmr.com@localhost>
+Signed-off-by: George Dunlap <george.dunlap%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+---
+ xen/arch/x86/mm.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index a759afc9e3..97c8d73b7b 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -1557,7 +1557,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+ if ( rc == -EINTR && i )
+ {
+ page->nr_validated_ptes = i;
+- page->partial_flags = 0;
++ page->partial_flags = partial_flags;;
+ rc = -ERESTART;
+ }
+ else if ( rc < 0 && rc != -EINTR )
+@@ -1660,7 +1660,7 @@ static int alloc_l3_table(struct page_info *page)
+ else if ( rc == -EINTR && i )
+ {
+ page->nr_validated_ptes = i;
+- page->partial_flags = 0;
++ page->partial_flags = partial_flags;
+ rc = -ERESTART;
+ }
+ if ( rc < 0 )
+@@ -1982,8 +1982,8 @@ static int free_l2_table(struct page_info *page)
+ }
+ else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 )
+ {
+- page->nr_validated_ptes = i + 1;
+- page->partial_flags = 0;
++ page->nr_validated_ptes = i + !(partial_flags & PTF_partial_set);
++ page->partial_flags = partial_flags;
+ rc = -ERESTART;
+ }
+
+@@ -2030,8 +2030,8 @@ static int free_l3_table(struct page_info *page)
+ }
+ else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
+ {
+- page->nr_validated_ptes = i + 1;
+- page->partial_flags = 0;
++ page->nr_validated_ptes = i + !(partial_flags & PTF_partial_set);
++ page->partial_flags = partial_flags;
+ rc = -ERESTART;
+ }
+ return rc > 0 ? 0 : rc;
+@@ -2061,8 +2061,8 @@ static int free_l4_table(struct page_info *page)
+ }
+ else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
+ {
+- page->nr_validated_ptes = i + 1;
+- page->partial_flags = 0;
++ page->nr_validated_ptes = i + !(partial_flags & PTF_partial_set);
++ page->partial_flags = partial_flags;
+ rc = -ERESTART;
+ }
+
+--
+2.24.0
+
+From e9f835982a726ae16997c566b5eafab74f8b4cb7 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap%citrix.com@localhost>
+Date: Mon, 28 Oct 2019 14:33:51 +0000
+Subject: [PATCH 3/3] x86/mm: relinquish_memory: Grab an extra type ref when
+ setting PGT_partial
+
+The PGT_partial bit in page->type_info holds both a type count and a
+general ref count. During domain tear-down, when free_page_type()
+returns -ERESTART, relinquish_memory() correctly handles the general
+ref count, but fails to grab an extra type count when setting
+PGT_partial. When this bit is eventually cleared, type_count underflows
+and triggers the following BUG in page_alloc.c:free_domheap_pages():
+
+ BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
+
+As far as we can tell, this page underflow cannot be exploited any any
+other way: The page can't be used as a pagetable by the dying domain
+because it's dying; it can't be used as a pagetable by any other
+domain since it belongs to the dying domain; and ownership can't
+transfer to any other domain without hitting the BUG_ON() in
+free_domheap_pages().
+
+(steal_page() won't work on a page in this state, since it requires
+PGC_allocated to be set, and PGC_allocated will already have been
+cleared.)
+
+Fix this by grabbing an extra type ref if setting PGT_partial in
+relinquish_memory.
+
+This is part of XSA-310.
+
+Reported-by: Sarah Newman <srn%prgmr.com@localhost>
+Signed-off-by: George Dunlap <george.dunlap%citrix.com@localhost>
+Acked-by: Jan Beulich <jbeulich%suse.com@localhost>
+---
+v2:
+- Move discussion of potential exploits into the commit message
+- Keep PGT_partial and put_page() ordering
+---
+ xen/arch/x86/domain.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index f1dd86e12e..51880fc50d 100644
+--- xen/arch/x86/domain.c.orig
++++ xen/arch/x86/domain.c
+@@ -2049,6 +2049,25 @@ static int relinquish_memory(
+ goto out;
+ case -ERESTART:
+ page_list_add(page, list);
++ /*
++ * PGT_partial holds a type ref and a general ref.
++ * If we came in with PGT_partial set, then we 1)
++ * don't need to grab an extra type count, and 2)
++ * do need to drop the extra page ref we grabbed
++ * at the top of the loop. If we didn't come in
++ * with PGT_partial set, we 1) do need to drab an
++ * extra type count, but 2) can transfer the page
++ * ref we grabbed above to it.
++ *
++ * Note that we must increment type_info before
++ * setting PGT_partial. Theoretically it should
++ * be safe to drop the page ref before setting
++ * PGT_partial, but do it afterwards just to be
++ * extra safe.
++ */
++ if ( !(x & PGT_partial) )
++ page->u.inuse.type_info++;
++ smp_wmb();
+ page->u.inuse.type_info |= PGT_partial;
+ if ( x & PGT_partial )
+ put_page(page);
+--
+2.24.0
+
Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA311
diff -u /dev/null pkgsrc/sysutils/xenkernel411/patches/patch-XSA311:1.1
--- /dev/null Fri Dec 13 13:44:21 2019
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA311 Fri Dec 13 13:44:21 2019
@@ -0,0 +1,189 @@
+$NetBSD: patch-XSA311,v 1.1 2019/12/13 13:44:21 bouyer Exp $
+
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Subject: AMD/IOMMU: Cease using a dynamic height for the IOMMU pagetables
+
+update_paging_mode() has multiple bugs:
+
+ 1) Booting with iommu=debug will cause it to inform you that that it called
+ without the pdev_list lock held.
+ 2) When growing by more than a single level, it leaks the newly allocated
+ table(s) in the case of a further error.
+
+Furthermore, the choice of default level for a domain has issues:
+
+ 1) All HVM guests grow from 2 to 3 levels during construction because of the
+ position of the VRAM just below the 4G boundary, so defaulting to 2 is a
+ waste of effort.
+ 2) The limit for PV guests doesn't take memory hotplug into account, and
+ isn't dynamic at runtime like HVM guests. This means that a PV guest may
+ get RAM which it can't map in the IOMMU.
+
+The dynamic height is a property unique to AMD, and adds a substantial
+quantity of complexity for what is a marginal performance improvement. Remove
+the complexity by removing the dynamic height.
+
+PV guests now get 3 or 4 levels based on any hotplug regions in the host.
+This only makes a difference for hardware which previously had all RAM below
+the 512G boundary, and a hotplug region above.
+
+HVM guests now get 4 levels (which will be sufficient until 256TB guests
+become a thing), because we don't currently have the information to know when
+3 would be safe to use.
+
+The overhead of this extra level is not expected to be noticeable. It costs
+one page (4k) per domain, and one extra IO-TLB paging structure cache entry
+which is very hot and less likely to be evicted.
+
+This is XSA-311.
+
+Reported-by: XXX PERSON <XXX EMAIL>3
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Acked-by: Jan Beulich <jbeulich%suse.com@localhost>
+
+--- xen/drivers/passthrough/amd/iommu_map.c.orig
++++ xen/drivers/passthrough/amd/iommu_map.c
+@@ -569,97 +569,6 @@ static int iommu_pde_from_gfn(struct dom
+ return 0;
+ }
+
+-static int update_paging_mode(struct domain *d, unsigned long gfn)
+-{
+- u16 bdf;
+- void *device_entry;
+- unsigned int req_id, level, offset;
+- unsigned long flags;
+- struct pci_dev *pdev;
+- struct amd_iommu *iommu = NULL;
+- struct page_info *new_root = NULL;
+- struct page_info *old_root = NULL;
+- void *new_root_vaddr;
+- unsigned long old_root_mfn;
+- struct domain_iommu *hd = dom_iommu(d);
+-
+- if ( gfn == gfn_x(INVALID_GFN) )
+- return -EADDRNOTAVAIL;
+- ASSERT(!(gfn >> DEFAULT_DOMAIN_ADDRESS_WIDTH));
+-
+- level = hd->arch.paging_mode;
+- old_root = hd->arch.root_table;
+- offset = gfn >> (PTE_PER_TABLE_SHIFT * (level - 1));
+-
+- ASSERT(spin_is_locked(&hd->arch.mapping_lock) && is_hvm_domain(d));
+-
+- while ( offset >= PTE_PER_TABLE_SIZE )
+- {
+- /* Allocate and install a new root table.
+- * Only upper I/O page table grows, no need to fix next level bits */
+- new_root = alloc_amd_iommu_pgtable();
+- if ( new_root == NULL )
+- {
+- AMD_IOMMU_DEBUG("%s Cannot allocate I/O page table\n",
+- __func__);
+- return -ENOMEM;
+- }
+-
+- new_root_vaddr = __map_domain_page(new_root);
+- old_root_mfn = mfn_x(page_to_mfn(old_root));
+- set_iommu_pde_present(new_root_vaddr, old_root_mfn, level,
+- !!IOMMUF_writable, !!IOMMUF_readable);
+- level++;
+- old_root = new_root;
+- offset >>= PTE_PER_TABLE_SHIFT;
+- unmap_domain_page(new_root_vaddr);
+- }
+-
+- if ( new_root != NULL )
+- {
+- hd->arch.paging_mode = level;
+- hd->arch.root_table = new_root;
+-
+- if ( !pcidevs_locked() )
+- AMD_IOMMU_DEBUG("%s Try to access pdev_list "
+- "without aquiring pcidevs_lock.\n", __func__);
+-
+- /* Update device table entries using new root table and paging mode */
+- for_each_pdev( d, pdev )
+- {
+- bdf = PCI_BDF2(pdev->bus, pdev->devfn);
+- iommu = find_iommu_for_device(pdev->seg, bdf);
+- if ( !iommu )
+- {
+- AMD_IOMMU_DEBUG("%s Fail to find iommu.\n", __func__);
+- return -ENODEV;
+- }
+-
+- spin_lock_irqsave(&iommu->lock, flags);
+- do {
+- req_id = get_dma_requestor_id(pdev->seg, bdf);
+- device_entry = iommu->dev_table.buffer +
+- (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+-
+- /* valid = 0 only works for dom0 passthrough mode */
+- amd_iommu_set_root_page_table((u32 *)device_entry,
+- page_to_maddr(hd->arch.root_table),
+- d->domain_id,
+- hd->arch.paging_mode, 1);
+-
+- amd_iommu_flush_device(iommu, req_id);
+- bdf += pdev->phantom_stride;
+- } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
+- PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
+- spin_unlock_irqrestore(&iommu->lock, flags);
+- }
+-
+- /* For safety, invalidate all entries */
+- amd_iommu_flush_all_pages(d);
+- }
+- return 0;
+-}
+-
+ int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
+ unsigned int flags)
+ {
+@@ -685,19 +594,6 @@ int amd_iommu_map_page(struct domain *d,
+ return rc;
+ }
+
+- /* Since HVM domain is initialized with 2 level IO page table,
+- * we might need a deeper page table for lager gfn now */
+- if ( is_hvm_domain(d) )
+- {
+- if ( update_paging_mode(d, gfn) )
+- {
+- spin_unlock(&hd->arch.mapping_lock);
+- AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
+- domain_crash(d);
+- return -EFAULT;
+- }
+- }
+-
+ if ( iommu_pde_from_gfn(d, gfn, pt_mfn, true) || (pt_mfn[1] == 0) )
+ {
+ spin_unlock(&hd->arch.mapping_lock);
+--- xen/drivers/passthrough/amd/pci_amd_iommu.c.orig
++++ xen/drivers/passthrough/amd/pci_amd_iommu.c
+@@ -242,11 +242,17 @@ static int amd_iommu_domain_init(struct
+ {
+ struct domain_iommu *hd = dom_iommu(d);
+
+- /* For pv and dom0, stick with get_paging_mode(max_page)
+- * For HVM dom0, use 2 level page table at first */
+- hd->arch.paging_mode = is_hvm_domain(d) ?
+- IOMMU_PAGING_MODE_LEVEL_2 :
+- get_paging_mode(max_page);
++ /*
++ * Choose the number of levels for the IOMMU page tables.
++ * - PV needs 3 or 4, depending on whether there is RAM (including hotplug
++ * RAM) above the 512G boundary.
++ * - HVM could in principle use 3 or 4 depending on how much guest
++ * physical address space we give it, but this isn't known yet so use 4
++ * unilaterally.
++ */
++ hd->arch.paging_mode = is_hvm_domain(d)
++ ? IOMMU_PAGING_MODE_LEVEL_4 : get_paging_mode(get_upper_mfn_bound());
++
+ return 0;
+ }
+
Home |
Main Index |
Thread Index |
Old Index