tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: sparse dumps (was: WAPL panic)



On Wed, Nov 07, 2012 at 02:22:49PM +0100, Edgar Fu wrote:
> > Try to get a sparse dump via machdep.sparse_dump=1
> How long is that supposed to take?
> It said "dump", paused for a few seconds, then counted from 44 down to 38 and 
> then nothing happened for minutes. Until I hit the virtual reset button.

I tried triggering a sparse dump (with "reboot -qd") on amd64
and after a number of tries I did see the hang during the dump.
but even when it doesn't hang, the resulting sparse dump is not valid:

savecore: kvm_read: invalid translation (invalid level 4 PDE)

sparse dumps appear to be a bit too sparse.

after I fixed that (and the problem that causes the kernel to spew
"pmap_kenter_pa: mapping already present"), the next problem was that savecore
generates a useless kernel image file, so you need to ignore the one
from savecore and use the kernel image you actually booted.  this isn't
specific to sparse dumps, it happens with both normal and sparse dumps.

but once I get past all that, sparse dumps work for me on amd64.

... I later tried triggering a dump from ddb with "reboot 0x104"
to make sure that my fix for the "mapping already present" thing
would work in this context as well (since the last attempt to fix that
resulted in a different hang), and I found that rebooting from ddb
currently always hangs.  I traced it as far as cpu_shutdown(),
and it's not surprising that the xcalls from that also cause problems.
I'm inclined to have pmf_system_shutdown() return without doing anything
if panicstr is set, since the context in which this is called could cause
a hang for any driver shutdown hook.  does anyone have any other ideas
on what to do about this?

the attached patch fixes the amd64 kernel problems with sparse dumps for me,
could you give that a try?

(sparse dumps probably still won't work for XEN though.)

-Chuck
Index: sys/arch/amd64/amd64/machdep.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.190
diff -u -p -r1.190 machdep.c
--- sys/arch/amd64/amd64/machdep.c      3 Sep 2012 05:01:44 -0000       1.190
+++ sys/arch/amd64/amd64/machdep.c      9 Nov 2012 14:54:51 -0000
@@ -317,7 +317,7 @@ int dump_seg_iter(int (*)(paddr_t, paddr
 
 #ifndef NO_SPARSE_DUMP
 void sparse_dump_reset(void);
-void sparse_dump_mark(vaddr_t, vaddr_t, int);
+void sparse_dump_mark(void);
 void cpu_dump_prep_sparse(void);
 #endif
 
@@ -821,7 +821,7 @@ haltsys:
  * XXXfvdl share dumpcode.
  */
 
- /*
+/*
  * Perform assorted dump-related initialization tasks.  Assumes that
  * the maximum physical memory address will not increase afterwards.
  */
@@ -877,30 +877,33 @@ sparse_dump_reset(void)
  * address interval (which may wrap around the end of the space).
  */
 void
-sparse_dump_mark(vaddr_t vbegin, vaddr_t vend, int includep)
+sparse_dump_mark(void)
 {
-       pmap_t pmap;
-       paddr_t p;
-       vaddr_t v;
+       paddr_t p, pstart, pend;
+       struct vm_page *pg;
+       int i;
 
        /*
-        * If a partial page is called for, the whole page must be included.
+        * Mark all memory pages, then unmark pages that are uninteresting.
         */
-       if (includep) {
-               vbegin = rounddown(vbegin, PAGE_SIZE);
-               vend = roundup(vend, PAGE_SIZE);
-       } else {
-               vbegin = roundup(vbegin, PAGE_SIZE);
-               vend = rounddown(vend, PAGE_SIZE);
+
+       for (i = 0; i < mem_cluster_cnt; ++i) {
+               pstart = mem_clusters[i].start / PAGE_SIZE;
+               pend = pstart + mem_clusters[i].size / PAGE_SIZE;
+
+               for (p = pstart; p < pend; p++) {
+                       setbit(sparse_dump_physmap, p);
+               }
        }
+       for (i = 0; i < vm_nphysseg; i++) {
+               struct vm_physseg *seg = VM_PHYSMEM_PTR(i);
 
-       pmap = pmap_kernel();
-       for (v = vbegin; v != vend; v += PAGE_SIZE) {
-               if (pmap_extract(pmap, v, &p)) {
-                       if (includep)
-                               setbit(sparse_dump_physmap, p/PAGE_SIZE);
-                       else
-                               clrbit(sparse_dump_physmap, p/PAGE_SIZE);
+               for (pg = seg->pgs; pg < seg->lastpg; pg++) {
+                       if (pg->uanon || (pg->pqflags & PQ_FREE) ||
+                           (pg->uobject && pg->uobject->pgops)) {
+                               p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE;
+                               clrbit(sparse_dump_physmap, p);
+                       }
                }
        }
 }
@@ -914,7 +917,7 @@ cpu_dump_prep_sparse(void)
 {
        sparse_dump_reset();
        /* XXX could the alternate recursive page table be skipped? */
-       sparse_dump_mark((vaddr_t)PTE_BASE, (vaddr_t)KERN_BASE, 1);
+       sparse_dump_mark();
        /* Memory for I/O buffers could be unmarked here, for example. */
        /* The kernel text could also be unmarked, but gdb would be upset. */
 }
@@ -1206,6 +1209,7 @@ dumpsys_seg(paddr_t maddr, paddr_t bytes
                pmap_update(pmap_kernel());
 
                error = (*dump)(dumpdev, blkno, (void *)dumpspace, n);
+               pmap_kremove_local(dumpspace, n);
                if (error)
                        return error;
                maddr += n;
Index: sys/arch/x86/include/pmap.h
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/arch/x86/include/pmap.h,v
retrieving revision 1.52
diff -u -p -r1.52 pmap.h
--- sys/arch/x86/include/pmap.h 20 Apr 2012 22:23:24 -0000      1.52
+++ sys/arch/x86/include/pmap.h 9 Nov 2012 14:27:30 -0000
@@ -255,6 +255,7 @@ void                pmap_load(void);
 paddr_t                pmap_init_tmp_pgtbl(paddr_t);
 void           pmap_remove_all(struct pmap *);
 void           pmap_ldt_sync(struct pmap *);
+void           pmap_kremove_local(vaddr_t, vsize_t);
 
 void           pmap_emap_enter(vaddr_t, paddr_t, vm_prot_t);
 void           pmap_emap_remove(vaddr_t, vsize_t);
Index: sys/arch/x86/x86/pmap.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/arch/x86/x86/pmap.c,v
retrieving revision 1.178
diff -u -p -r1.178 pmap.c
--- sys/arch/x86/x86/pmap.c     15 Jun 2012 13:53:40 -0000      1.178
+++ sys/arch/x86/x86/pmap.c     9 Nov 2012 14:27:30 -0000
@@ -1118,8 +1118,8 @@ pmap_changeprot_local(vaddr_t va, vm_pro
  * => must be followed by call to pmap_update() before reuse of page
  */
 
-void
-pmap_kremove(vaddr_t sva, vsize_t len)
+static inline void
+pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly)
 {
        pt_entry_t *pte, opte;
        vaddr_t va, eva;
@@ -1128,21 +1128,33 @@ pmap_kremove(vaddr_t sva, vsize_t len)
 
        kpreempt_disable();
        for (va = sva; va < eva; va += PAGE_SIZE) {
-               if (va < VM_MIN_KERNEL_ADDRESS)
-                       pte = vtopte(va);
-               else
-                       pte = kvtopte(va);
+               pte = kvtopte(va);
                opte = pmap_pte_testset(pte, 0); /* zap! */
-               if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
+               if ((opte & (PG_V | PG_U)) == (PG_V | PG_U) && !localonly) {
                        pmap_tlb_shootdown(pmap_kernel(), va, opte,
                            TLBSHOOT_KREMOVE);
                }
                KASSERT((opte & PG_PS) == 0);
                KASSERT((opte & PG_PVLIST) == 0);
        }
+       if (localonly) {
+               tlbflushg();
+       }
        kpreempt_enable();
 }
 
+void
+pmap_kremove(vaddr_t sva, vsize_t len)
+{
+       pmap_kremove1(sva, len, false);
+}
+
+void
+pmap_kremove_local(vaddr_t sva, vsize_t len)
+{
+       pmap_kremove1(sva, len, true);
+}
+
 /*
  * p m a p   i n i t   f u n c t i o n s
  *
@@ -2984,25 +2996,6 @@ pmap_virtual_space(vaddr_t *startp, vadd
 }
 
 /*
- * pmap_map: map a range of PAs into kvm.
- *
- * => used during crash dump
- * => XXX: pmap_map() should be phased out?
- */
-
-vaddr_t
-pmap_map(vaddr_t va, paddr_t spa, paddr_t epa, vm_prot_t prot)
-{
-       while (spa < epa) {
-               pmap_kenter_pa(va, spa, prot, 0);
-               va += PAGE_SIZE;
-               spa += PAGE_SIZE;
-       }
-       pmap_update(pmap_kernel());
-       return va;
-}
-
-/*
  * pmap_zero_page: zero a page
  */
 
Index: sys/kern/kern_pmf.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/kern/kern_pmf.c,v
retrieving revision 1.35
diff -u -p -r1.35 kern_pmf.c
--- sys/kern/kern_pmf.c 5 Jun 2011 09:04:22 -0000       1.35
+++ sys/kern/kern_pmf.c 9 Nov 2012 14:45:35 -0000
@@ -381,6 +381,10 @@ shutdown_all(int how)
 void
 pmf_system_shutdown(int how)
 {
+
+       if (panicstr != NULL)
+               return;
+
        aprint_debug("Shutting down devices:");
        shutdown_all(how);
 }


Home | Main Index | Thread Index | Old Index