tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

PAX mprotect and JIT



Hi all,
at the moment, PAX mprotect makes it very expensive to implement any
form of JIT. It is not possible to switch a page from writeable to
executable. It is not possible to use anonymous memory for JIT in
multi-threaded applications as you can't have distinct writable and
executable mappings. The only ways JIT can work right now is by either
disabling PAX mprotect or creating a temporary file on disk. That's not
only silly, but IMO actively harmful. Considering that some important
components like libffi fall into this category, the answer can't be
"disable PAX mprotect on bin/python*" and various other places.

I've attached three patches to this mail:
(1) Implement a new flag for mremap to allow duplicating a mapping
(M_REMAPDUP). This patch is functional by itself.
(2) A hack for allow mprotect to switch between W and X, but still
honoring W^X. This is a hack and needs to be carefully rethought,
since I believe the way pax is currently implemented is wrong. Consider
it a PoC.
(3) A patch for devel/libffi to show how the first two parts can be
implemented to obtain JIT. With this patch the libffi test suite passes
with active PAX mprotect and ASLR.

I find the availability of two separate mappings quite an acceptable
compromise. It would allow us to easily improve security for most
binaries by mapping the GOT read-only as far as possible by keeping the
write mapping separate. But that's a separate topic.

Joerg
Index: sys/uvm/uvm_map.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/uvm/uvm_map.c,v
retrieving revision 1.342
diff -u -p -r1.342 uvm_map.c
--- sys/uvm/uvm_map.c	1 Dec 2016 02:09:03 -0000	1.342
+++ sys/uvm/uvm_map.c	24 Feb 2017 01:40:35 -0000
@@ -2988,7 +2988,7 @@ uvm_map_protect(struct vm_map *map, vadd
 			error = EINVAL;
 			goto out;
 		}
-		if ((new_prot & current->max_protection) != new_prot) {
+		if (!set_max && (new_prot & current->max_protection) != new_prot) {
 			error = EACCES;
 			goto out;
 		}
@@ -3021,10 +3021,8 @@ uvm_map_protect(struct vm_map *map, vadd
 		UVM_MAP_CLIP_END(map, current, end);
 		old_prot = current->protection;
 		if (set_max)
-			current->protection =
-			    (current->max_protection = new_prot) & old_prot;
-		else
-			current->protection = new_prot;
+			current->max_protection = new_prot;
+		current->protection = new_prot;
 
 		/*
 		 * update physical map if necessary.  worry about copy-on-write
Index: sys/uvm/uvm_mmap.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/uvm/uvm_mmap.c,v
retrieving revision 1.162
diff -u -p -r1.162 uvm_mmap.c
--- sys/uvm/uvm_mmap.c	9 Aug 2016 12:17:04 -0000	1.162
+++ sys/uvm/uvm_mmap.c	24 Feb 2017 00:48:05 -0000
@@ -603,7 +603,7 @@ sys_mprotect(struct lwp *l, const struct
 	struct proc *p = l->l_proc;
 	vaddr_t addr;
 	vsize_t size, pageoff;
-	vm_prot_t prot;
+	vm_prot_t oprot, prot, maxprot;
 	int error;
 
 	/*
@@ -612,7 +612,12 @@ sys_mprotect(struct lwp *l, const struct
 
 	addr = (vaddr_t)SCARG(uap, addr);
 	size = (vsize_t)SCARG(uap, len);
-	prot = SCARG(uap, prot) & VM_PROT_ALL;
+	prot = oprot = SCARG(uap, prot) & VM_PROT_ALL;
+
+	maxprot = prot;
+	PAX_MPROTECT_ADJUST(l, &prot, &maxprot);
+	if (prot != oprot)
+		return EPERM;
 
 	/*
 	 * align the address to a page boundary and adjust the size accordingly.
@@ -628,7 +633,7 @@ sys_mprotect(struct lwp *l, const struct
 		return EINVAL;
 
 	error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
-				false);
+				true);
 	return error;
 }
 
Index: sys/sys/mman.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/sys/mman.h,v
retrieving revision 1.50
diff -u -p -r1.50 mman.h
--- sys/sys/mman.h	1 Jun 2016 00:46:44 -0000	1.50
+++ sys/sys/mman.h	23 Feb 2017 22:52:43 -0000
@@ -82,6 +82,7 @@ typedef	__off_t		off_t;		/* file offset 
 /*
  * Other flags
  */
+#define	MAP_REMAPDUP	 0x0004	/* mremap only: duplicate the mapping */
 #define	MAP_FIXED	 0x0010	/* map addr must be exactly as requested */
 #define	MAP_RENAME	 0x0020	/* Sun: rename private pages to file */
 #define	MAP_NORESERVE	 0x0040	/* Sun: don't reserve needed swap area */
Index: sys/uvm/uvm_mremap.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/uvm/uvm_mremap.c,v
retrieving revision 1.18
diff -u -p -r1.18 uvm_mremap.c
--- sys/uvm/uvm_mremap.c	26 Nov 2015 13:15:34 -0000	1.18
+++ sys/uvm/uvm_mremap.c	23 Feb 2017 22:56:00 -0000
@@ -120,6 +120,7 @@ uvm_mremap(struct vm_map *oldmap, vaddr_
 	vaddr_t align = 0;
 	int error = 0;
 	const bool fixed = (flags & MAP_FIXED) != 0;
+	const bool duplicate = (flags & MAP_REMAPDUP) != 0;
 
 	if (fixed) {
 		newva = *newvap;
@@ -165,7 +166,8 @@ uvm_mremap(struct vm_map *oldmap, vaddr_
 	 * check the easy cases first.
 	 */
 
-	if ((!fixed || newva == oldva) && newmap == oldmap &&
+	if (!duplicate &&
+	    (!fixed || newva == oldva) && newmap == oldmap &&
 	    (align == 0 || (oldva & (align - 1)) == 0)) {
 		vaddr_t va;
 
@@ -240,7 +242,7 @@ extend:
 	 * remove original entries unless we did in-place extend.
 	 */
 
-	if (oldva != newva || oldmap != newmap) {
+	if (!duplicate && (oldva != newva || oldmap != newmap)) {
 		uvm_unmap(oldmap, oldva, oldva + oldsize);
 	}
 done:
@@ -278,7 +280,7 @@ sys_mremap(struct lwp *l, const struct s
 	newva = (vaddr_t)SCARG(uap, new_address);
 	newsize = (vsize_t)(SCARG(uap, new_size));
 
-	if ((flags & ~(MAP_FIXED | MAP_ALIGNMENT_MASK)) != 0) {
+	if ((flags & ~(MAP_FIXED | MAP_REMAPDUP | MAP_ALIGNMENT_MASK)) != 0) {
 		error = EINVAL;
 		goto done;
 	}
$NetBSD$

--- src/closures.c.orig	2017-02-24 01:00:47.888238256 +0000
+++ src/closures.c
@@ -33,6 +33,12 @@
 #include <ffi.h>
 #include <ffi_common.h>
 
+#ifdef __NetBSD__
+#include <sys/param.h>
+#endif
+
+#if __NetBSD_Version__ - 799005900 < 0
+
 #if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE
 # if __gnu_linux__ && !defined(__ANDROID__)
 /* This macro indicates it may be forbidden to map anonymous memory
@@ -686,3 +692,57 @@ ffi_closure_free (void *ptr)
 
 # endif /* ! FFI_MMAP_EXEC_WRIT */
 #endif /* FFI_CLOSURES */
+
+#else
+#include <sys/mman.h>
+
+#include <stddef.h>
+#include <unistd.h>
+
+static const size_t overhead =
+  (sizeof(max_align_t) > sizeof(void *) + sizeof(size_t)) ?
+    sizeof(max_align_t)
+    : sizeof(void *) + sizeof(size_t);
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  if (!code)
+    return NULL;
+
+  static size_t page_size;
+  if (!page_size)
+    page_size = sysconf(_SC_PAGESIZE);
+  size_t rounded_size = (size + overhead + page_size - 1) & ~(page_size - 1);
+  void *dataseg = mmap(NULL, rounded_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+  if (dataseg == MAP_FAILED)
+    return NULL;
+  void *codeseg = mremap(dataseg, rounded_size, NULL, rounded_size, MAP_REMAPDUP);
+  if (codeseg == MAP_FAILED) {
+    munmap(dataseg, rounded_size);
+    return NULL;
+  }
+  if (mprotect(codeseg, rounded_size, PROT_READ | PROT_EXEC) == -1) {
+    munmap(codeseg, rounded_size);
+    munmap(dataseg, rounded_size);
+    return NULL;
+  }
+  memcpy(dataseg, &rounded_size, sizeof(rounded_size));
+  memcpy((void *)((uintptr_t)dataseg + sizeof(size_t)), &codeseg, sizeof(void *));
+  *code = (void *)((uintptr_t)codeseg + overhead);
+  return (void *)((uintptr_t)dataseg + overhead);
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  void *dataseg = (void *)((uintptr_t)ptr - overhead);
+  size_t rounded_size;
+  void *codeseg;
+  memcpy(&rounded_size, dataseg, sizeof(rounded_size));
+  memcpy(&codeseg, (void *)((uintptr_t)dataseg + sizeof(size_t)), sizeof(void *));
+  munmap(dataseg, rounded_size);
+  munmap(codeseg, rounded_size);
+}
+
+#endif


Home | Main Index | Thread Index | Old Index