tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: PAE support for kvm(3)



On 01.10.2010 10:19, Jean-Yves Migeon wrote:
>> Attached is the patch required to add PAE support in kvm(3). Except for
>> one "major" nit (see below), it seems to be functional: I can sync a
>> kernel, with or without PAE enabled, and all libkvm binaries (vmstat(1),
>> netstat(1), ...) still work on the core files.
>> [snip]
>> However, I have some kind of "chicken-egg" situation there: getting
>> i386_use_pae value needs a go through KREAD/kvm_read, but these
>> functions cannot work properly until the correct kvatop function has
>> been selected. Downside is, this depends on i386_use_pae value...
>>
> Replying to myself:
> 
> Having some sleep seems to help: as the pdppaddr is at least 32 bits
> aligned (least common multiple between PAE and !PAE),
> that gives me some bits accessible which are not used by the translation
> code.
> 
> So I think I will use one to signify "use PAE."

A new patch is attached. Tested for PAE and !PAE dumps: both work.

Solution was quite easy: I just took the PG_AVAIL1 bit, and use it to
pass the PAE information between dump and libkvm.

There is one exception (but I suspect it is orthogonal to this patch):
dumping a PAE (resp. !PAE) kernel after rebooting with a !PAE (resp.
PAE) kernel fails: savecore complains about version mismatch.

For now, I would like to import that patch, and have a look at the
"version mismatch" issue after. Should I bump libkvm minor here?

Thanks!

-- 
Jean-Yves Migeon
jeanyves.migeon%free.fr@localhost
Index: lib/libkvm/Makefile
===================================================================
RCS file: /cvsroot/src/lib/libkvm/Makefile,v
retrieving revision 1.45
diff -u -p -r1.45 Makefile
--- lib/libkvm/Makefile 25 Oct 2008 23:59:42 -0000      1.45
+++ lib/libkvm/Makefile 2 Oct 2010 22:56:03 -0000
@@ -7,9 +7,6 @@ USE_SHLIBDIR=   yes
 
 LIB=   kvm
 CPPFLAGS+=-DLIBC_SCCS -I${NETBSDSRCDIR}/sys
-.if ${MACHINE_ARCH} == "i386"
-LINTFLAGS+=-w
-.endif
 
 SRCS=  kvm.c kvm_file.c kvm_getloadavg.c kvm_proc.c
 
@@ -31,6 +28,11 @@ SRCS+=       kvm_${MACHINE_CPU}.c
        @false
 .endif
 
+.if ${MACHINE_ARCH} == "i386"
+LINTFLAGS+=-w
+SRCS+= kvm_i386pae.c # Hook PAE support in the i386 build
+.endif
+
 # Additional modules needed for m68k
 .if (${MACHINE_ARCH} == "m68k" || ${MACHINE_CPU} == "m68k")
 SRCS+= kvm_m68k_cmn.c kvm_sun2.c kvm_sun3.c kvm_sun3x.c
Index: lib/libkvm/kvm_i386.c
===================================================================
RCS file: /cvsroot/src/lib/libkvm/kvm_i386.c,v
retrieving revision 1.28
diff -u -p -r1.28 kvm_i386.c
--- lib/libkvm/kvm_i386.c       20 Sep 2010 23:23:16 -0000      1.28
+++ lib/libkvm/kvm_i386.c       2 Oct 2010 22:56:03 -0000
@@ -74,6 +74,16 @@ __RCSID("$NetBSD: kvm_i386.c,v 1.28 2010
 #define        ptob(x)         ((caddr_t)((x) << PGSHIFT))     /* XXX */
 #endif
 
+/*
+ * Indicates whether PAE is in use for the kernel image
+ * 0: native i386 virtual memory mappings
+ * 1: i386 PAE mappings
+ */
+static int i386_use_pae;
+
+int _kvm_kvatop_i386(kvm_t *, vaddr_t, paddr_t *);
+int _kvm_kvatop_i386pae(kvm_t *, vaddr_t, paddr_t *);
+
 void
 _kvm_freevtop(kvm_t *kd)
 {
@@ -87,6 +97,11 @@ _kvm_freevtop(kvm_t *kd)
 int
 _kvm_initvtop(kvm_t *kd)
 {
+       cpu_kcore_hdr_t *cpu_kh = kd->cpu_data;
+
+       i386_use_pae = 0; /* default: non PAE mode */
+       if ((cpu_kh->pdppaddr & I386_KCORE_PAE) == I386_KCORE_PAE)
+               i386_use_pae = 1;
 
        return 0;
 }
@@ -97,24 +112,44 @@ _kvm_initvtop(kvm_t *kd)
 int
 _kvm_kvatop(kvm_t *kd, vaddr_t va, paddr_t *pa)
 {
-       cpu_kcore_hdr_t *cpu_kh;
-       u_long page_off;
-       pd_entry_t pde;
-       pt_entry_t pte;
-       paddr_t pde_pa, pte_pa;
 
        if (ISALIVE(kd)) {
                _kvm_err(kd, 0, "vatop called in live kernel!");
                return 0;
        }
 
+       switch (i386_use_pae) {
+       default:
+       case 0:
+               return _kvm_kvatop_i386(kd, va, pa);
+       case 1:
+               return _kvm_kvatop_i386pae(kd, va, pa);
+       }
+       
+}
+
+/*
+ * Used to translate a virtual address to a physical address for systems
+ * with PAE mode disabled. Only two levels of virtual memory pages are
+ * dereferenced (L2 PDEs, then L1 PTEs).
+ */
+int
+_kvm_kvatop_i386(kvm_t *kd, vaddr_t va, paddr_t *pa)
+{
+       cpu_kcore_hdr_t *cpu_kh;
+       u_long page_off;
+       pd_entry_t pde;
+       pt_entry_t pte;
+       paddr_t pde_pa, pte_pa;
+
        cpu_kh = kd->cpu_data;
        page_off = va & PGOFSET;
 
        /*
         * Find and read the page directory entry.
+        * pdppaddr being PAGE_SIZE aligned, we mask the option bits.
         */
-       pde_pa = cpu_kh->pdppaddr + (pl2_pi(va) * sizeof(pd_entry_t));
+       pde_pa = (cpu_kh->pdppaddr & PG_FRAME) + (pl2_pi(va) * sizeof(pde));
        if (_kvm_pread(kd, kd->pmfd, (void *)&pde, sizeof(pde),
            _kvm_pa2off(kd, pde_pa)) != sizeof(pde)) {
                _kvm_syserr(kd, 0, "could not read PDE");
Index: sys/arch/i386/i386/dumpsys.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/dumpsys.c,v
retrieving revision 1.10
diff -u -p -r1.10 dumpsys.c
--- sys/arch/i386/i386/dumpsys.c        2 Oct 2010 22:54:47 -0000       1.10
+++ sys/arch/i386/i386/dumpsys.c        2 Oct 2010 22:56:09 -0000
@@ -628,6 +628,8 @@ cpu_dump(void)
         * Add the machine-dependent header info.
         */
        cpuhdr.pdppaddr = PDPpaddr;
+       if (i386_use_pae == 1)
+               cpuhdr.pdppaddr |= I386_KCORE_PAE;
        cpuhdr.nmemsegs = dump_nmemsegs;
        (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr)));
 
Index: sys/arch/i386/include/kcore.h
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/include/kcore.h,v
retrieving revision 1.4
diff -u -p -r1.4 kcore.h
--- sys/arch/i386/include/kcore.h       12 Jan 2008 20:03:42 -0000      1.4
+++ sys/arch/i386/include/kcore.h       2 Oct 2010 22:56:09 -0000
@@ -43,6 +43,15 @@ typedef struct cpu_kcore_hdr {
 #endif
 } cpu_kcore_hdr_t;
 
+/*
+ * Used to indicate that PAE should be used for virtual address
+ * translation. As PDPpaddr is expected to be PAGE_SIZE aligned,
+ * this can be safely OR'ed in pdppaddr.
+ * To avoid any kind of conflict with existing MMU bits, we chose one
+ * ignored by hardware
+ */
+#define I386_KCORE_PAE PG_AVAIL1
+
 #ifdef _KERNEL
 void   dumpsys(void);
 
--- /dev/null   2010-10-03 00:28:26.000000000 +0200
+++ lib/libkvm/kvm_i386pae.c    2010-10-02 20:14:16.000000000 +0200
@@ -0,0 +1,131 @@
+/* $NetBSD$ */
+
+/*
+ * Copyright (c) 2010 Jean-Yves Migeon.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+/*
+ * This will expose PAE functions, macros, definitions and constants.
+ * Note: this affects all virtual memory related functions. Only their
+ * PAE versions can be used below.
+ */
+#define PAE
+
+#include <sys/param.h>
+#include <sys/user.h>
+#include <sys/stat.h>
+#include <sys/kcore.h>
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <nlist.h>
+#include <kvm.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <limits.h>
+#include <db.h>
+
+#include "kvm_private.h"
+
+#include <i386/kcore.h>
+#include <i386/pmap.h>
+#include <i386/pte.h>
+#include <i386/vmparam.h>
+
+int _kvm_kvatop_i386pae(kvm_t *, vaddr_t, paddr_t *);
+
+/*
+ * Used to translate a virtual address to a physical address for systems
+ * running under PAE mode. Three levels of virtual memory pages are handled
+ * here: the per-CPU L3 page, the 4 L2 PDs and the PTs.
+ */
+int
+_kvm_kvatop_i386pae(kvm_t *kd, vaddr_t va, paddr_t *pa)
+{
+       cpu_kcore_hdr_t *cpu_kh;
+       u_long page_off;
+       pd_entry_t pde;
+       pt_entry_t pte;
+       paddr_t pde_pa, pte_pa;
+
+       cpu_kh = kd->cpu_data;
+       page_off = va & PGOFSET;
+       
+       /*
+        * Find and read the PDE. Ignore the L3, as it is only a per-CPU
+        * page, not needed for kernel VA => PA translations.
+        * Remember that the 4 L2 pages are contiguous, so it is safe
+        * to increment pdppaddr to compute the address of the PDE.
+        * pdppaddr being PAGE_SIZE aligned, we mask the option bits.
+        */
+       pde_pa = (cpu_kh->pdppaddr & PG_FRAME) + (pl2_pi(va) * sizeof(pde));
+       if (_kvm_pread(kd, kd->pmfd, (void *)&pde, sizeof(pde),
+           _kvm_pa2off(kd, pde_pa)) != sizeof(pde)) {
+               _kvm_syserr(kd, 0, "could not read PDE");
+               goto lose;
+       }
+
+       /*
+        * Find and read the page table entry.
+        */
+       if ((pde & PG_V) == 0) {
+               _kvm_err(kd, 0, "invalid translation (invalid PDE)");
+               goto lose;
+       }
+       if ((pde & PG_PS) != 0) {
+               /*
+                * This is a 2MB page.
+                */
+               page_off = va & ((vaddr_t)~PG_LGFRAME);
+               *pa = (pde & PG_LGFRAME) + page_off;
+               return (int)(NBPD_L2 - page_off);
+       }
+
+       pte_pa = (pde & PG_FRAME) + (pl1_pi(va) * sizeof(pt_entry_t));
+       if (_kvm_pread(kd, kd->pmfd, (void *) &pte, sizeof(pte),
+           _kvm_pa2off(kd, pte_pa)) != sizeof(pte)) {
+               _kvm_syserr(kd, 0, "could not read PTE");
+               goto lose;
+       }
+
+       /*
+        * Validate the PTE and return the physical address.
+        */
+       if ((pte & PG_V) == 0) {
+               _kvm_err(kd, 0, "invalid translation (invalid PTE)");
+               goto lose;
+       }
+       *pa = (pte & PG_FRAME) + page_off;
+       return (int)(NBPG - page_off);
+
+lose:
+       *pa = (paddr_t)~0L;
+       return 0;
+
+}


Home | Main Index | Thread Index | Old Index