tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: PAE support for kvm(3)
On 01.10.2010 10:19, Jean-Yves Migeon wrote:
>> Attached is the patch required to add PAE support in kvm(3). Except for
>> one "major" nit (see below), it seems to be functional: I can sync a
>> kernel, with or without PAE enabled, and all libkvm binaries (vmstat(1),
>> netstat(1), ...) still work on the core files.
>> [snip]
>> However, I have some kind of "chicken-egg" situation there: getting
>> i386_use_pae value needs a go through KREAD/kvm_read, but these
>> functions cannot work properly until the correct kvatop function has
>> been selected. Downside is, this depends on i386_use_pae value...
>>
> Replying to myself:
>
> Having some sleep seems to help: as the pdppaddr is at least 32 bits
> aligned (least common multiple between PAE and !PAE),
> that gives me some bits accessible which are not used by the translation
> code.
>
> So I think I will use one to signify "use PAE."
A new patch is attached. Tested for PAE and !PAE dumps: both work.
Solution was quite easy: I just took the PG_AVAIL1 bit, and use it to
pass the PAE information between dump and libkvm.
There is one exception (but I suspect it is orthogonal to this patch):
dumping a PAE (resp. !PAE) kernel after rebooting with a !PAE (resp.
PAE) kernel fails: savecore complains about version mismatch.
For now, I would like to import that patch, and have a look at the
"version mismatch" issue after. Should I bump libkvm minor here?
Thanks!
--
Jean-Yves Migeon
jeanyves.migeon%free.fr@localhost
Index: lib/libkvm/Makefile
===================================================================
RCS file: /cvsroot/src/lib/libkvm/Makefile,v
retrieving revision 1.45
diff -u -p -r1.45 Makefile
--- lib/libkvm/Makefile 25 Oct 2008 23:59:42 -0000 1.45
+++ lib/libkvm/Makefile 2 Oct 2010 22:56:03 -0000
@@ -7,9 +7,6 @@ USE_SHLIBDIR= yes
LIB= kvm
CPPFLAGS+=-DLIBC_SCCS -I${NETBSDSRCDIR}/sys
-.if ${MACHINE_ARCH} == "i386"
-LINTFLAGS+=-w
-.endif
SRCS= kvm.c kvm_file.c kvm_getloadavg.c kvm_proc.c
@@ -31,6 +28,11 @@ SRCS+= kvm_${MACHINE_CPU}.c
@false
.endif
+.if ${MACHINE_ARCH} == "i386"
+LINTFLAGS+=-w
+SRCS+= kvm_i386pae.c # Hook PAE support in the i386 build
+.endif
+
# Additional modules needed for m68k
.if (${MACHINE_ARCH} == "m68k" || ${MACHINE_CPU} == "m68k")
SRCS+= kvm_m68k_cmn.c kvm_sun2.c kvm_sun3.c kvm_sun3x.c
Index: lib/libkvm/kvm_i386.c
===================================================================
RCS file: /cvsroot/src/lib/libkvm/kvm_i386.c,v
retrieving revision 1.28
diff -u -p -r1.28 kvm_i386.c
--- lib/libkvm/kvm_i386.c 20 Sep 2010 23:23:16 -0000 1.28
+++ lib/libkvm/kvm_i386.c 2 Oct 2010 22:56:03 -0000
@@ -74,6 +74,16 @@ __RCSID("$NetBSD: kvm_i386.c,v 1.28 2010
#define ptob(x) ((caddr_t)((x) << PGSHIFT)) /* XXX */
#endif
+/*
+ * Indicates whether PAE is in use for the kernel image
+ * 0: native i386 virtual memory mappings
+ * 1: i386 PAE mappings
+ */
+static int i386_use_pae;
+
+int _kvm_kvatop_i386(kvm_t *, vaddr_t, paddr_t *);
+int _kvm_kvatop_i386pae(kvm_t *, vaddr_t, paddr_t *);
+
void
_kvm_freevtop(kvm_t *kd)
{
@@ -87,6 +97,11 @@ _kvm_freevtop(kvm_t *kd)
int
_kvm_initvtop(kvm_t *kd)
{
+ cpu_kcore_hdr_t *cpu_kh = kd->cpu_data;
+
+ i386_use_pae = 0; /* default: non PAE mode */
+ if ((cpu_kh->pdppaddr & I386_KCORE_PAE) == I386_KCORE_PAE)
+ i386_use_pae = 1;
return 0;
}
@@ -97,24 +112,44 @@ _kvm_initvtop(kvm_t *kd)
int
_kvm_kvatop(kvm_t *kd, vaddr_t va, paddr_t *pa)
{
- cpu_kcore_hdr_t *cpu_kh;
- u_long page_off;
- pd_entry_t pde;
- pt_entry_t pte;
- paddr_t pde_pa, pte_pa;
if (ISALIVE(kd)) {
_kvm_err(kd, 0, "vatop called in live kernel!");
return 0;
}
+ switch (i386_use_pae) {
+ default:
+ case 0:
+ return _kvm_kvatop_i386(kd, va, pa);
+ case 1:
+ return _kvm_kvatop_i386pae(kd, va, pa);
+ }
+
+}
+
+/*
+ * Used to translate a virtual address to a physical address for systems
+ * with PAE mode disabled. Only two levels of virtual memory pages are
+ * dereferenced (L2 PDEs, then L1 PTEs).
+ */
+int
+_kvm_kvatop_i386(kvm_t *kd, vaddr_t va, paddr_t *pa)
+{
+ cpu_kcore_hdr_t *cpu_kh;
+ u_long page_off;
+ pd_entry_t pde;
+ pt_entry_t pte;
+ paddr_t pde_pa, pte_pa;
+
cpu_kh = kd->cpu_data;
page_off = va & PGOFSET;
/*
* Find and read the page directory entry.
+ * pdppaddr being PAGE_SIZE aligned, we mask the option bits.
*/
- pde_pa = cpu_kh->pdppaddr + (pl2_pi(va) * sizeof(pd_entry_t));
+ pde_pa = (cpu_kh->pdppaddr & PG_FRAME) + (pl2_pi(va) * sizeof(pde));
if (_kvm_pread(kd, kd->pmfd, (void *)&pde, sizeof(pde),
_kvm_pa2off(kd, pde_pa)) != sizeof(pde)) {
_kvm_syserr(kd, 0, "could not read PDE");
Index: sys/arch/i386/i386/dumpsys.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/dumpsys.c,v
retrieving revision 1.10
diff -u -p -r1.10 dumpsys.c
--- sys/arch/i386/i386/dumpsys.c 2 Oct 2010 22:54:47 -0000 1.10
+++ sys/arch/i386/i386/dumpsys.c 2 Oct 2010 22:56:09 -0000
@@ -628,6 +628,8 @@ cpu_dump(void)
* Add the machine-dependent header info.
*/
cpuhdr.pdppaddr = PDPpaddr;
+ if (i386_use_pae == 1)
+ cpuhdr.pdppaddr |= I386_KCORE_PAE;
cpuhdr.nmemsegs = dump_nmemsegs;
(void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr)));
Index: sys/arch/i386/include/kcore.h
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/include/kcore.h,v
retrieving revision 1.4
diff -u -p -r1.4 kcore.h
--- sys/arch/i386/include/kcore.h 12 Jan 2008 20:03:42 -0000 1.4
+++ sys/arch/i386/include/kcore.h 2 Oct 2010 22:56:09 -0000
@@ -43,6 +43,15 @@ typedef struct cpu_kcore_hdr {
#endif
} cpu_kcore_hdr_t;
+/*
+ * Used to indicate that PAE should be used for virtual address
+ * translation. As PDPpaddr is expected to be PAGE_SIZE aligned,
+ * this can be safely OR'ed in pdppaddr.
+ * To avoid any kind of conflict with existing MMU bits, we chose one
+ * ignored by hardware
+ */
+#define I386_KCORE_PAE PG_AVAIL1
+
#ifdef _KERNEL
void dumpsys(void);
--- /dev/null 2010-10-03 00:28:26.000000000 +0200
+++ lib/libkvm/kvm_i386pae.c 2010-10-02 20:14:16.000000000 +0200
@@ -0,0 +1,131 @@
+/* $NetBSD$ */
+
+/*
+ * Copyright (c) 2010 Jean-Yves Migeon.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+/*
+ * This will expose PAE functions, macros, definitions and constants.
+ * Note: this affects all virtual memory related functions. Only their
+ * PAE versions can be used below.
+ */
+#define PAE
+
+#include <sys/param.h>
+#include <sys/user.h>
+#include <sys/stat.h>
+#include <sys/kcore.h>
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <nlist.h>
+#include <kvm.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <limits.h>
+#include <db.h>
+
+#include "kvm_private.h"
+
+#include <i386/kcore.h>
+#include <i386/pmap.h>
+#include <i386/pte.h>
+#include <i386/vmparam.h>
+
+int _kvm_kvatop_i386pae(kvm_t *, vaddr_t, paddr_t *);
+
+/*
+ * Used to translate a virtual address to a physical address for systems
+ * running under PAE mode. Three levels of virtual memory pages are handled
+ * here: the per-CPU L3 page, the 4 L2 PDs and the PTs.
+ */
+int
+_kvm_kvatop_i386pae(kvm_t *kd, vaddr_t va, paddr_t *pa)
+{
+ cpu_kcore_hdr_t *cpu_kh;
+ u_long page_off;
+ pd_entry_t pde;
+ pt_entry_t pte;
+ paddr_t pde_pa, pte_pa;
+
+ cpu_kh = kd->cpu_data;
+ page_off = va & PGOFSET;
+
+ /*
+ * Find and read the PDE. Ignore the L3, as it is only a per-CPU
+ * page, not needed for kernel VA => PA translations.
+ * Remember that the 4 L2 pages are contiguous, so it is safe
+ * to increment pdppaddr to compute the address of the PDE.
+ * pdppaddr being PAGE_SIZE aligned, we mask the option bits.
+ */
+ pde_pa = (cpu_kh->pdppaddr & PG_FRAME) + (pl2_pi(va) * sizeof(pde));
+ if (_kvm_pread(kd, kd->pmfd, (void *)&pde, sizeof(pde),
+ _kvm_pa2off(kd, pde_pa)) != sizeof(pde)) {
+ _kvm_syserr(kd, 0, "could not read PDE");
+ goto lose;
+ }
+
+ /*
+ * Find and read the page table entry.
+ */
+ if ((pde & PG_V) == 0) {
+ _kvm_err(kd, 0, "invalid translation (invalid PDE)");
+ goto lose;
+ }
+ if ((pde & PG_PS) != 0) {
+ /*
+ * This is a 2MB page.
+ */
+ page_off = va & ((vaddr_t)~PG_LGFRAME);
+ *pa = (pde & PG_LGFRAME) + page_off;
+ return (int)(NBPD_L2 - page_off);
+ }
+
+ pte_pa = (pde & PG_FRAME) + (pl1_pi(va) * sizeof(pt_entry_t));
+ if (_kvm_pread(kd, kd->pmfd, (void *) &pte, sizeof(pte),
+ _kvm_pa2off(kd, pte_pa)) != sizeof(pte)) {
+ _kvm_syserr(kd, 0, "could not read PTE");
+ goto lose;
+ }
+
+ /*
+ * Validate the PTE and return the physical address.
+ */
+ if ((pte & PG_V) == 0) {
+ _kvm_err(kd, 0, "invalid translation (invalid PTE)");
+ goto lose;
+ }
+ *pa = (pte & PG_FRAME) + page_off;
+ return (int)(NBPG - page_off);
+
+lose:
+ *pa = (paddr_t)~0L;
+ return 0;
+
+}
Home |
Main Index |
Thread Index |
Old Index