tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

PAE support for kvm(3)



Hi list,

Attached is the patch required to add PAE support in kvm(3). Except for
one "major" nit (see below), it seems to be functional: I can sync a
kernel, with or without PAE enabled, and all libkvm binaries (vmstat(1),
netstat(1), ...) still work on the core files.

I am facing an interesting limitation there: some weeks ago, I added a
symbol, i386_use_pae. This variable is also exposed through a sysctl(7):
"machdep.pae".

Goal was to easily provide an indication whether a kernel is currently
using PAE mode or not, and also query its value through kvm_nlist(3) (to
select the proper kvatop function, depending on the mode).

However, I have some kind of "chicken-egg" situation there: getting
i386_use_pae value needs a go through KREAD/kvm_read, but these
functions cannot work properly until the correct kvatop function has
been selected. Downside is, this depends on i386_use_pae value...

At this late hour, I cannot think of a quick and clean solution. Adding
an element to the cpu_kcore_hdr_t will break all core dumps from before
the change (the offset of the memory segments, "memsegs", will differ);
that would need compat code to cope with that. Another possibility is to
set the last bit in pdppaddr to indicate PAE activation, but, this seems
like a dirty hack (I don't think that PDPpaddr will ever go that high
though).

Any advice for that one? Thanks in advance!

-- 
Jean-Yves Migeon
jeanyves.migeon%free.fr@localhost
Index: lib/libkvm/Makefile
===================================================================
RCS file: /cvsroot/src/lib/libkvm/Makefile,v
retrieving revision 1.45
diff -u -p -r1.45 Makefile
--- lib/libkvm/Makefile 25 Oct 2008 23:59:42 -0000      1.45
+++ lib/libkvm/Makefile 1 Oct 2010 00:03:55 -0000
@@ -7,9 +7,6 @@ USE_SHLIBDIR=   yes
 
 LIB=   kvm
 CPPFLAGS+=-DLIBC_SCCS -I${NETBSDSRCDIR}/sys
-.if ${MACHINE_ARCH} == "i386"
-LINTFLAGS+=-w
-.endif
 
 SRCS=  kvm.c kvm_file.c kvm_getloadavg.c kvm_proc.c
 
@@ -31,6 +28,11 @@ SRCS+=       kvm_${MACHINE_CPU}.c
        @false
 .endif
 
+.if ${MACHINE_ARCH} == "i386"
+LINTFLAGS+=-w
+SRCS+= kvm_i386pae.c # Hook PAE support in the i386 build
+.endif
+
 # Additional modules needed for m68k
 .if (${MACHINE_ARCH} == "m68k" || ${MACHINE_CPU} == "m68k")
 SRCS+= kvm_m68k_cmn.c kvm_sun2.c kvm_sun3.c kvm_sun3x.c
Index: lib/libkvm/kvm_i386.c
===================================================================
RCS file: /cvsroot/src/lib/libkvm/kvm_i386.c,v
retrieving revision 1.28
diff -u -p -r1.28 kvm_i386.c
--- lib/libkvm/kvm_i386.c       20 Sep 2010 23:23:16 -0000      1.28
+++ lib/libkvm/kvm_i386.c       1 Oct 2010 00:03:55 -0000
@@ -74,6 +74,16 @@ __RCSID("$NetBSD: kvm_i386.c,v 1.28 2010
 #define        ptob(x)         ((caddr_t)((x) << PGSHIFT))     /* XXX */
 #endif
 
+/*
+ * Indicates whether PAE is in use for the kernel image
+ * 0: native i386 virtual memory mappings
+ * 1: i386 PAE mappings
+ */
+static int i386_use_pae;
+
+int _kvm_kvatop_i386(kvm_t *, vaddr_t, paddr_t *);
+int _kvm_kvatop_i386pae(kvm_t *, vaddr_t, paddr_t *);
+
 void
 _kvm_freevtop(kvm_t *kd)
 {
@@ -87,6 +97,23 @@ _kvm_freevtop(kvm_t *kd)
 int
 _kvm_initvtop(kvm_t *kd)
 {
+       struct nlist nl[2];
+
+       nl[0].n_name = "i386_use_pae";
+       nl[1].n_name = NULL;
+
+       if (kvm_nlist(kd, nl) != 0) {
+               /* Support kernels from before the PAE era */
+               _kvm_err(kd, 0, "could not read i386 virtual memory mode -- "
+                   "assuming i386 native");
+               i386_use_pae = 0;
+               return 0;
+       }
+       
+       if (KREAD(kd, nl[0].n_value, &i386_use_pae)) {
+               _kvm_err(kd, kd->program, "can't read i386_use_pae");
+               return -1;
+       }
 
        return 0;
 }
@@ -97,17 +124,36 @@ _kvm_initvtop(kvm_t *kd)
 int
 _kvm_kvatop(kvm_t *kd, vaddr_t va, paddr_t *pa)
 {
-       cpu_kcore_hdr_t *cpu_kh;
-       u_long page_off;
-       pd_entry_t pde;
-       pt_entry_t pte;
-       paddr_t pde_pa, pte_pa;
 
        if (ISALIVE(kd)) {
                _kvm_err(kd, 0, "vatop called in live kernel!");
                return 0;
        }
 
+       switch (i386_use_pae) {
+       default:
+       case 0:
+               return _kvm_kvatop_i386(kd, va, pa);
+       case 1:
+               return _kvm_kvatop_i386pae(kd, va, pa);
+       }
+       
+}
+
+/*
+ * Used to translate a virtual address to a physical address for systems
+ * with PAE mode disabled. Only two levels of virtual memory pages are
+ * dereferenced (L2 PDEs, then L1 PTEs).
+ */
+int
+_kvm_kvatop_i386(kvm_t *kd, vaddr_t va, paddr_t *pa)
+{
+       cpu_kcore_hdr_t *cpu_kh;
+       u_long page_off;
+       pd_entry_t pde;
+       pt_entry_t pte;
+       paddr_t pde_pa, pte_pa;
+
        cpu_kh = kd->cpu_data;
        page_off = va & PGOFSET;
 
--- /dev/null   2010-10-01 02:32:35.000000000 +0200
+++ lib/libkvm/kvm_i386pae.c    2010-09-26 16:05:38.000000000 +0200
@@ -0,0 +1,131 @@
+/* $NetBSD$ */
+
+/*
+ * Copyright (c) 2010 Jean-Yves Migeon.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+/*
+ * This will expose PAE functions, macros, definitions and constants.
+ * Note: this affects all virtual memory related functions. Only their
+ * PAE versions can be used below.
+ */
+#define PAE
+
+#include <sys/param.h>
+#include <sys/user.h>
+#include <sys/stat.h>
+#include <sys/kcore.h>
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <nlist.h>
+#include <kvm.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <limits.h>
+#include <db.h>
+
+#include "kvm_private.h"
+
+#include <i386/kcore.h>
+#include <i386/pmap.h>
+#include <i386/pte.h>
+#include <i386/vmparam.h>
+
+int _kvm_kvatop_i386pae(kvm_t *, vaddr_t, paddr_t *);
+
+/*
+ * Used to translate a virtual address to a physical address for systems
+ * running under PAE mode. Three levels of virtual memory pages are handled
+ * here: the per-CPU L3 page, the 4 L2 PDs and the PTs.
+ */
+int
+_kvm_kvatop_i386pae(kvm_t *kd, vaddr_t va, paddr_t *pa)
+{
+       cpu_kcore_hdr_t *cpu_kh;
+       u_long page_off;
+       pd_entry_t pde;
+       pt_entry_t pte;
+       paddr_t pde_pa, pte_pa;
+
+       cpu_kh = kd->cpu_data;
+       page_off = va & PGOFSET;
+       
+       /*
+        * Find and read the PDE. Ignore the L3, as it is only a per-CPU
+        * page, not needed for kernel VA => PA translations.
+        * Remember that the 4 L2 pages are contiguous, so it is safe
+        * to increment pdppaddr to compute the address of the PDE.
+        */
+       pde_pa = cpu_kh->pdppaddr + (pl2_pi(va) * sizeof(pd_entry_t));
+
+       if (_kvm_pread(kd, kd->pmfd, (void *)&pde, sizeof(pde),
+           _kvm_pa2off(kd, pde_pa)) != sizeof(pde)) {
+               _kvm_syserr(kd, 0, "could not read PDE");
+               goto lose;
+       }
+
+       /*
+        * Find and read the page table entry.
+        */
+       if ((pde & PG_V) == 0) {
+               _kvm_err(kd, 0, "invalid translation (invalid PDE)");
+               goto lose;
+       }
+       if ((pde & PG_PS) != 0) {
+               /*
+                * This is a 2MB page.
+                */
+               page_off = va & ((vaddr_t)~PG_LGFRAME);
+               *pa = (pde & PG_LGFRAME) + page_off;
+               return (int)(NBPD_L2 - page_off);
+       }
+
+       pte_pa = (pde & PG_FRAME) + (pl1_pi(va) * sizeof(pt_entry_t));
+       if (_kvm_pread(kd, kd->pmfd, (void *) &pte, sizeof(pte),
+           _kvm_pa2off(kd, pte_pa)) != sizeof(pte)) {
+               _kvm_syserr(kd, 0, "could not read PTE");
+               goto lose;
+       }
+
+       /*
+        * Validate the PTE and return the physical address.
+        */
+       if ((pte & PG_V) == 0) {
+               _kvm_err(kd, 0, "invalid translation (invalid PTE)");
+               goto lose;
+       }
+       *pa = (pte & PG_FRAME) + page_off;
+       return (int)(NBPG - page_off);
+
+lose:
+       *pa = (paddr_t)~0L;
+       return 0;
+
+}


Home | Main Index | Thread Index | Old Index