Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/kern ksyms(4): Take a complete snapshot on each open.



details:   https://anonhg.NetBSD.org/src/rev/35019e9e90ec
branches:  trunk
changeset: 1023497:35019e9e90ec
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sat Sep 11 10:09:31 2021 +0000

description:
ksyms(4): Take a complete snapshot on each open.

- Snapshots are stored in pageable anonymous uvm objects.
- Snapshots are reference-counted so they can be reused across opens.
- Opening /dev/ksyms blocks module unload until snapshot is taken.
- Merely holding /dev/ksyms open does not block module unload.
- /dev/ksyms is now mmappable.

This slightly changes the behaviour of fstat(2) on /dev/ksyms -- it
is a little more useful now!  In particular, st_size is the size of
the symbol table.  Some other fields which were not very useful to
begin with -- st_dev, st_ino, st_mode, st_nlink, st_*time,
st_blksize, st_blocks -- are now different, and independent of the
file system on which the device node resides.

Discussed in

https://mail-index.netbsd.org/source-changes-d/2021/08/17/msg013425.html

This is option (3), adapted to make the ksyms snapshots pageable,
after options (1) and (2) posed practical problems.

diffstat:

 sys/kern/kern_ksyms.c |  606 ++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 441 insertions(+), 165 deletions(-)

diffs (truncated from 858 to 300 lines):

diff -r 71951e5d29be -r 35019e9e90ec sys/kern/kern_ksyms.c
--- a/sys/kern/kern_ksyms.c     Sat Sep 11 10:09:13 2021 +0000
+++ b/sys/kern/kern_ksyms.c     Sat Sep 11 10:09:31 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: kern_ksyms.c,v 1.102 2021/09/07 16:56:25 riastradh Exp $       */
+/*     $NetBSD: kern_ksyms.c,v 1.103 2021/09/11 10:09:31 riastradh Exp $       */
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.102 2021/09/07 16:56:25 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.103 2021/09/11 10:09:31 riastradh Exp $");
 
 #if defined(_KERNEL) && defined(_KERNEL_OPT)
 #include "opt_copy_symtab.h"
@@ -86,6 +86,9 @@
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/exec.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kauth.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kmem.h>
@@ -94,6 +97,9 @@
 #include <sys/ksyms.h>
 #include <sys/kernel.h>
 #include <sys/intr.h>
+#include <sys/stat.h>
+
+#include <uvm/uvm_extern.h>
 
 #ifdef DDB
 #include <ddb/db_output.h>
@@ -104,6 +110,15 @@
 #include "ioconf.h"
 #endif
 
+struct ksyms_snapshot {
+       uint64_t                ks_refcnt;
+       uint64_t                ks_gen;
+       struct uvm_object       *ks_uobj;
+       size_t                  ks_size;
+       dev_t                   ks_dev;
+       int                     ks_maxlen;
+};
+
 #define KSYMS_MAX_ID   98304
 #ifdef KDTRACE_HOOKS
 static uint32_t ksyms_nmap[KSYMS_MAX_ID];      /* sorted symbol table map */
@@ -112,15 +127,20 @@
 #endif
 
 static int ksyms_maxlen;
-static uint64_t ksyms_opencnt;
-static struct ksyms_symtab *ksyms_last_snapshot;
 static bool ksyms_initted;
 static bool ksyms_loaded;
 static kmutex_t ksyms_lock __cacheline_aligned;
 static struct ksyms_symtab kernel_symtab;
+static kcondvar_t ksyms_cv;
+static struct lwp *ksyms_snapshotting;
+static struct ksyms_snapshot *ksyms_snapshot;
+static uint64_t ksyms_snapshot_gen;
 
 static void ksyms_hdr_init(const void *);
 static void ksyms_sizes_calc(void);
+static struct ksyms_snapshot *ksyms_snapshot_alloc(int, size_t, dev_t,
+    uint64_t);
+static void ksyms_snapshot_release(struct ksyms_snapshot *);
 
 #ifdef KSYMS_DEBUG
 #define        FOLLOW_CALLS            1
@@ -245,6 +265,7 @@
 
        if (!ksyms_initted) {
                mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE);
+               cv_init(&ksyms_cv, "ksyms");
                ksyms_initted = true;
        }
 }
@@ -328,7 +349,6 @@
        tab->sd_minsym = UINTPTR_MAX;
        tab->sd_maxsym = 0;
        tab->sd_usroffset = 0;
-       tab->sd_gone = false;
        tab->sd_ctfstart = ctfstart;
        tab->sd_ctfsize = ctfsize;
        tab->sd_nmap = nmap;
@@ -446,9 +466,9 @@
        KASSERT(cold || mutex_owned(&ksyms_lock));
 
        /*
-        * Ensure ddb never witnesses an inconsistent state of the
-        * queue, unless memory is so corrupt that we crash in
-        * TAILQ_INSERT_TAIL.
+        * Publish the symtab.  Do this at splhigh to ensure ddb never
+        * witnesses an inconsistent state of the queue, unless memory
+        * is so corrupt that we crash in TAILQ_INSERT_TAIL.
         */
        s = splhigh();
        TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue);
@@ -557,6 +577,9 @@
            kernel_symtab.sd_symstart, kernel_symtab.sd_strstart,
            (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym));
 #endif
+
+       /* Should be no snapshot to invalidate yet.  */
+       KASSERT(ksyms_snapshot == NULL);
 }
 
 /*
@@ -577,6 +600,9 @@
        ksyms_hdr_init(ehdr);
        addsymtab("netbsd", symstart, symsize, strstart, strsize,
            &kernel_symtab, symstart, NULL, 0, ksyms_nmap);
+
+       /* Should be no snapshot to invalidate yet.  */
+       KASSERT(ksyms_snapshot == NULL);
 }
 
 /*
@@ -601,8 +627,6 @@
 #endif
 
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-               if (__predict_false(st->sd_gone))
-                       continue;
                if (mod != NULL && strcmp(st->sd_name, mod))
                        continue;
                if ((es = findsym(sym, st, type)) != NULL) {
@@ -636,8 +660,6 @@
 
        mutex_enter(&ksyms_lock);
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-               if (__predict_false(st->sd_gone))
-                       continue;
                if (mod != NULL && strcmp(st->sd_name, mod))
                        continue;
                break;
@@ -671,8 +693,6 @@
 
        /* find the module */
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-               if (__predict_false(st->sd_gone))
-                       continue;
                if (mod != NULL && strcmp(st->sd_name, mod))
                        continue;
 
@@ -716,8 +736,6 @@
                return ENOENT;
 
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-               if (st->sd_gone)
-                       continue;
                if (v < st->sd_minsym || v > st->sd_maxsym)
                        continue;
                sz = st->sd_symsize/sizeof(Elf_Sym);
@@ -762,6 +780,7 @@
     char *strstart, vsize_t strsize)
 {
        struct ksyms_symtab *st;
+       struct ksyms_snapshot *ks;
        void *nmap;
 
        st = kmem_zalloc(sizeof(*st), KM_SLEEP);
@@ -770,7 +789,12 @@
        mutex_enter(&ksyms_lock);
        addsymtab(name, symstart, symsize, strstart, strsize, st, symstart,
            NULL, 0, nmap);
+       ks = ksyms_snapshot;
+       ksyms_snapshot = NULL;
        mutex_exit(&ksyms_lock);
+
+       if (ks)
+               ksyms_snapshot_release(ks);
 }
 
 /*
@@ -780,37 +804,48 @@
 ksyms_modunload(const char *name)
 {
        struct ksyms_symtab *st;
-       bool do_free = false;
+       struct ksyms_snapshot *ks;
        int s;
 
        mutex_enter(&ksyms_lock);
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-               if (st->sd_gone)
-                       continue;
                if (strcmp(name, st->sd_name) != 0)
                        continue;
-               st->sd_gone = true;
-               ksyms_sizes_calc();
-               if (ksyms_opencnt == 0) {
-                       /*
-                        * Ensure ddb never witnesses an inconsistent
-                        * state of the queue, unless memory is so
-                        * corrupt that we crash in TAILQ_REMOVE.
-                        */
-                       s = splhigh();
-                       TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
-                       splx(s);
-                       do_free = true;
-               }
                break;
        }
-       mutex_exit(&ksyms_lock);
        KASSERT(st != NULL);
 
-       if (do_free) {
-               kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
-               kmem_free(st, sizeof(*st));
-       }
+       /* Wait for any snapshot in progress to complete.  */
+       while (ksyms_snapshotting)
+               cv_wait(&ksyms_cv, &ksyms_lock);
+
+       /*
+        * Remove the symtab.  Do this at splhigh to ensure ddb never
+        * witnesses an inconsistent state of the queue, unless memory
+        * is so corrupt that we crash in TAILQ_REMOVE.
+        */
+       s = splhigh();
+       TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
+       splx(s);
+
+       /* Recompute the ksyms sizes now that we've removed st.  */
+       ksyms_sizes_calc();
+
+       /* Invalidate the global ksyms snapshot.  */
+       ks = ksyms_snapshot;
+       ksyms_snapshot = NULL;
+       mutex_exit(&ksyms_lock);
+
+       /*
+        * No more references are possible.  Free the name map and the
+        * symtab itself, which we had allocated in ksyms_modload.
+        */
+       kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
+       kmem_free(st, sizeof(*st));
+
+       /* Release the formerly global ksyms snapshot, if any.  */
+       if (ks)
+               ksyms_snapshot_release(ks);
 }
 
 #ifdef DDB
@@ -830,8 +865,6 @@
                return ENOENT;
 
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-               if (st->sd_gone)
-                       continue;
                if (mod && strcmp(mod, st->sd_name))
                        continue;
                sb = st->sd_strstart - st->sd_usroffset;
@@ -893,8 +926,6 @@
 
        ksyms_symsz = ksyms_strsz = 0;
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
-               if (__predict_false(st->sd_gone))
-                       continue;
                delta = ksyms_strsz - st->sd_usroffset;
                if (delta != 0) {
                        for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++)
@@ -997,19 +1028,183 @@
        SHTCOPY(".SUNW_ctf");
 }
 
+static struct ksyms_snapshot *
+ksyms_snapshot_alloc(int maxlen, size_t size, dev_t dev, uint64_t gen)
+{
+       struct ksyms_snapshot *ks;
+
+       ks = kmem_zalloc(sizeof(*ks), KM_SLEEP);
+       ks->ks_refcnt = 1;
+       ks->ks_gen = gen;
+       ks->ks_uobj = uao_create(size, 0);
+       ks->ks_size = size;
+       ks->ks_dev = dev;
+       ks->ks_maxlen = maxlen;
+
+       return ks;
+}
+
+static void
+ksyms_snapshot_release(struct ksyms_snapshot *ks)
+{
+       uint64_t refcnt;
+
+       mutex_enter(&ksyms_lock);
+       refcnt = --ks->ks_refcnt;
+       mutex_exit(&ksyms_lock);
+



Home | Main Index | Thread Index | Old Index