Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/amd64 Implement sparse dumps for amd64 (copied from...



details:   https://anonhg.NetBSD.org/src/rev/008a4d8b90f1
branches:  trunk
changeset: 768789:008a4d8b90f1
user:      christos <christos%NetBSD.org@localhost>
date:      Sat Aug 27 16:23:44 2011 +0000

description:
Implement sparse dumps for amd64 (copied from i386). Disabled for now via
sysctl.
XXX: most of the code can be merged.

diffstat:

 sys/arch/amd64/amd64/machdep.c |  594 ++++++++++++++++++++++++++++++++--------
 sys/arch/amd64/include/pmap.h  |    3 +-
 2 files changed, 477 insertions(+), 120 deletions(-)

diffs (truncated from 751 to 300 lines):

diff -r cbf4a9dab539 -r 008a4d8b90f1 sys/arch/amd64/amd64/machdep.c
--- a/sys/arch/amd64/amd64/machdep.c    Sat Aug 27 16:12:54 2011 +0000
+++ b/sys/arch/amd64/amd64/machdep.c    Sat Aug 27 16:23:44 2011 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: machdep.c,v 1.164 2011/08/11 18:11:17 cherry Exp $     */
+/*     $NetBSD: machdep.c,v 1.165 2011/08/27 16:23:44 christos Exp $   */
 
 /*-
- * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008
+ * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
  *     The NetBSD Foundation, Inc.
  * All rights reserved.
  *
@@ -9,6 +9,10 @@
  * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
  * Simulation Facility, NASA Ames Research Center.
  *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Coyote Point Systems, Inc. which was written under contract to Coyote
+ * Point by Jed Davis and Devon O'Dell.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -107,7 +111,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.164 2011/08/11 18:11:17 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.165 2011/08/27 16:23:44 christos Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -239,6 +243,25 @@
 uint64_t       dumpmem_high;
 int    cpu_class;
 
+
+#ifndef NO_SPARSE_DUMP
+int sparse_dump = 0;
+
+paddr_t max_paddr = 0;
+unsigned char *sparse_dump_physmap;
+#endif
+
+char *dump_headerbuf, *dump_headerbuf_ptr;
+#define dump_headerbuf_size PAGE_SIZE
+#define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size)
+#define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr)
+daddr_t dump_header_blkno;
+
+size_t dump_nmemsegs;
+size_t dump_npages;
+size_t dump_header_size;
+size_t dump_totalbytesleft;
+
 vaddr_t        msgbuf_vaddr;
 paddr_t msgbuf_paddr;
 
@@ -290,8 +313,28 @@
 int    cpu_dump(void);
 int    cpu_dumpsize(void);
 u_long cpu_dump_mempagecnt(void);
+void   dodumpsys(void);
 void   dumpsys(void);
-void   dodumpsys(void);
+
+void dump_misc_init(void);
+void dump_seg_prep(void);
+int dump_seg_iter(int (*)(paddr_t, paddr_t));
+
+#ifndef NO_SPARSE_DUMP
+void sparse_dump_reset(void);
+void sparse_dump_mark(vaddr_t, vaddr_t, int);
+void cpu_dump_prep_sparse(void);
+#endif
+
+void dump_header_start(void);
+int dump_header_flush(void);
+int dump_header_addbytes(const void*, size_t);
+int dump_header_addseg(paddr_t, paddr_t);
+int dump_header_finish(void);
+
+int dump_seg_count_range(paddr_t, paddr_t);
+int dumpsys_seg(paddr_t, paddr_t);
+
 void   init_x86_64(paddr_t);
 
 /*
@@ -530,6 +573,14 @@
                       SYSCTL_DESCR("Whether the kernel uses PAE"),
                       NULL, 1, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
+#ifndef NO_SPARSE_DUMP
+       /* XXXjld Does this really belong under machdep, and not e.g. kern? */
+       sysctl_createv(clog, 0, NULL, NULL,
+                      CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+                      CTLTYPE_INT, "sparse_dump", NULL,
+                      NULL, 0, &sparse_dump, 0,
+                      CTL_MACHDEP, CTL_CREATE, CTL_EOL);
+#endif
 }
 
 void
@@ -746,6 +797,259 @@
  * XXXfvdl share dumpcode.
  */
 
+ /*
+ * Perform assorted dump-related initialization tasks.  Assumes that
+ * the maximum physical memory address will not increase afterwards.
+ */
+void
+dump_misc_init(void)
+{
+#ifndef NO_SPARSE_DUMP
+       int i;
+#endif
+
+       if (dump_headerbuf != NULL)
+               return; /* already called */
+
+#ifndef NO_SPARSE_DUMP
+       for (i = 0; i < mem_cluster_cnt; ++i) {
+               paddr_t top = mem_clusters[i].start + mem_clusters[i].size;
+               if (max_paddr < top)
+                       max_paddr = top;
+       }
+#ifdef DEBUG
+       printf("dump_misc_init: max_paddr = 0x%lx\n",
+           (unsigned long)max_paddr);
+#endif
+
+       sparse_dump_physmap = (void*)uvm_km_alloc(kernel_map,
+           roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE),
+           PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
+#endif
+       dump_headerbuf = (void*)uvm_km_alloc(kernel_map,
+           dump_headerbuf_size,
+           PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
+       /* XXXjld should check for failure here, disable dumps if so. */
+}
+
+#ifndef NO_SPARSE_DUMP
+/*
+ * Clear the set of pages to include in a sparse dump.
+ */
+void
+sparse_dump_reset(void)
+{
+       memset(sparse_dump_physmap, 0,
+           roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE));
+}
+
+/*
+ * Include or exclude pages in a sparse dump, by half-open virtual
+ * address interval (which may wrap around the end of the space).
+ */
+void
+sparse_dump_mark(vaddr_t vbegin, vaddr_t vend, int includep)
+{
+       pmap_t pmap;
+       paddr_t p;
+       vaddr_t v;
+
+       /*
+        * If a partial page is called for, the whole page must be included.
+        */
+       if (includep) {
+               vbegin = rounddown(vbegin, PAGE_SIZE);
+               vend = roundup(vend, PAGE_SIZE);
+       } else {
+               vbegin = roundup(vbegin, PAGE_SIZE);
+               vend = rounddown(vend, PAGE_SIZE);
+       }
+
+       pmap = pmap_kernel();
+       for (v = vbegin; v != vend; v += PAGE_SIZE) {
+               if (pmap_extract(pmap, v, &p)) {
+                       if (includep)
+                               setbit(sparse_dump_physmap, p/PAGE_SIZE);
+                       else
+                               clrbit(sparse_dump_physmap, p/PAGE_SIZE);
+               }
+       }
+}
+
+/*
+ * Machine-dependently decides on the contents of a sparse dump, using
+ * the above.
+ */
+void
+cpu_dump_prep_sparse(void)
+{
+       sparse_dump_reset();
+       /* XXX could the alternate recursive page table be skipped? */
+       sparse_dump_mark((vaddr_t)PTE_BASE, (vaddr_t)KERN_BASE, 1);
+       /* Memory for I/O buffers could be unmarked here, for example. */
+       /* The kernel text could also be unmarked, but gdb would be upset. */
+}
+#endif
+
+/*
+ * Abstractly iterate over the collection of memory segments to be
+ * dumped; the callback lacks the customary environment-pointer
+ * argument because none of the current users really need one.
+ *
+ * To be used only after dump_seg_prep is called to set things up.
+ */
+int
+dump_seg_iter(int (*callback)(paddr_t, paddr_t))
+{
+       int error, i;
+
+#define CALLBACK(start,size) do {     \
+       error = callback(start,size); \
+       if (error)                    \
+               return error;         \
+} while(0)
+
+       for (i = 0; i < mem_cluster_cnt; ++i) {
+#ifndef NO_SPARSE_DUMP
+               /*
+                * The bitmap is scanned within each memory segment,
+                * rather than over its entire domain, in case any
+                * pages outside of the memory proper have been mapped
+                * into kva; they might be devices that wouldn't
+                * appreciate being arbitrarily read, and including
+                * them could also break the assumption that a sparse
+                * dump will always be smaller than a full one.
+                */
+               if (sparse_dump) {
+                       paddr_t p, start, end;
+                       int lastset;
+
+                       start = mem_clusters[i].start;
+                       end = start + mem_clusters[i].size;
+                       start = rounddown(start, PAGE_SIZE); /* unnecessary? */
+                       lastset = 0;
+                       for (p = start; p < end; p += PAGE_SIZE) {
+                               int thisset = isset(sparse_dump_physmap,
+                                   p/PAGE_SIZE);
+
+                               if (!lastset && thisset)
+                                       start = p;
+                               if (lastset && !thisset)
+                                       CALLBACK(start, p - start);
+                               lastset = thisset;
+                       }
+                       if (lastset)
+                               CALLBACK(start, p - start);
+               } else
+#endif
+                       CALLBACK(mem_clusters[i].start, mem_clusters[i].size);
+       }
+       return 0;
+#undef CALLBACK
+}
+
+/*
+ * Prepare for an impending core dump: decide what's being dumped and
+ * how much space it will take up.
+ */
+void
+dump_seg_prep(void)
+{
+#ifndef NO_SPARSE_DUMP
+       if (sparse_dump)
+               cpu_dump_prep_sparse();
+#endif
+
+       dump_nmemsegs = 0;
+       dump_npages = 0;
+       dump_seg_iter(dump_seg_count_range);
+
+       dump_header_size = ALIGN(sizeof(kcore_seg_t)) +
+           ALIGN(sizeof(cpu_kcore_hdr_t)) +
+           ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t));
+       dump_header_size = roundup(dump_header_size, dbtob(1));
+
+       /*
+        * savecore(8) will read this to decide how many pages to
+        * copy, and cpu_dumpconf has already used the pessimistic
+        * value to set dumplo, so it's time to tell the truth.
+        */
+       dumpsize = dump_npages; /* XXX could these just be one variable? */
+}
+
+int
+dump_seg_count_range(paddr_t start, paddr_t size)
+{
+       ++dump_nmemsegs;
+       dump_npages += size / PAGE_SIZE;
+       return 0;
+}
+
+/*
+ * A sparse dump's header may be rather large, due to the number of
+ * "segments" emitted.  These routines manage a simple output buffer,
+ * so that the header can be written to disk incrementally.



Home | Main Index | Thread Index | Old Index