Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.sbin/tprof Improve tprof(4)



details:   https://anonhg.NetBSD.org/src/rev/ffbb661e80f9
branches:  trunk
changeset: 372478:ffbb661e80f9
user:      ryo <ryo%NetBSD.org@localhost>
date:      Thu Dec 01 00:32:52 2022 +0000

description:
Improve tprof(4)

- Multiple events can now be handled simultaneously.
- Counters should be configured with TPROF_IOC_CONFIGURE_EVENT in advance,
  instead of being configured at TPROF_IOC_START.
- The configured counters can be started and stopped repeatedly by
  PROF_IOC_START/TPROF_IOC_STOP.
- The value of the performance counter can be obtained at any timing as a 64bit
  value with TPROF_IOC_GETCOUNTS.
- Backend common parts are handled in tprof.c as much as possible, and functions
  on the tprof_backend side have been reimplemented to be more primitive.
- The reset value of counter overflows for profiling can now be adjusted.
  It is calculated by default from the CPU clock (speed of cycle counter) and
  TPROF_HZ, but for some events the value may be too large to be sufficient for
  profiling. The event counter can be specified as a ratio to the default or as
  an absolute value when configuring the event counter.
- Due to overall changes, API and ABI have been changed. TPROF_VERSION and
  TPROF_BACKEND_VERSION were updated.

diffstat:

 sys/arch/aarch64/include/armreg.h |    7 +-
 sys/dev/tprof/tprof.c             |  452 +++++++++++++++++++++++++++++++------
 sys/dev/tprof/tprof.h             |   36 ++-
 sys/dev/tprof/tprof_armv7.c       |  260 ++++++++++++--------
 sys/dev/tprof/tprof_armv8.c       |  255 +++++++++++++-------
 sys/dev/tprof/tprof_ioctl.h       |   18 +-
 sys/dev/tprof/tprof_types.h       |   45 +++-
 sys/dev/tprof/tprof_x86.c         |   24 +-
 sys/dev/tprof/tprof_x86_amd.c     |  185 ++++++++++-----
 sys/dev/tprof/tprof_x86_intel.c   |  191 ++++++++++-----
 usr.sbin/tprof/tprof.8            |    5 +-
 usr.sbin/tprof/tprof.c            |   65 +++-
 usr.sbin/tprof/tprof_analyze.c    |   44 ++-
 13 files changed, 1122 insertions(+), 465 deletions(-)

diffs (truncated from 2443 to 300 lines):

diff -r ebde77086a75 -r ffbb661e80f9 sys/arch/aarch64/include/armreg.h
--- a/sys/arch/aarch64/include/armreg.h Thu Dec 01 00:29:51 2022 +0000
+++ b/sys/arch/aarch64/include/armreg.h Thu Dec 01 00:32:52 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: armreg.h,v 1.62 2022/12/01 00:29:10 ryo Exp $ */
+/* $NetBSD: armreg.h,v 1.63 2022/12/01 00:32:52 ryo Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -248,6 +248,10 @@
 #define         ID_AA64DFR0_EL1_PMUVER_NONE     0
 #define         ID_AA64DFR0_EL1_PMUVER_V3       1
 #define         ID_AA64DFR0_EL1_PMUVER_NOV3     2
+#define         ID_AA64DFR0_EL1_PMUVER_V3P1     4
+#define         ID_AA64DFR0_EL1_PMUVER_V3P4     5
+#define         ID_AA64DFR0_EL1_PMUVER_V3P5     6
+#define         ID_AA64DFR0_EL1_PMUVER_V3P7     7
 #define         ID_AA64DFR0_EL1_PMUVER_IMPL     15
 #define        ID_AA64DFR0_EL1_TRACEVER        __BITS(4,7)
 #define         ID_AA64DFR0_EL1_TRACEVER_NONE   0
@@ -1221,6 +1225,7 @@
 #define        PMCR_IMP                __BITS(31,24)   // Implementor code
 #define        PMCR_IDCODE             __BITS(23,16)   // Identification code
 #define        PMCR_N                  __BITS(15,11)   // Number of event counters
+#define        PMCR_LP                 __BIT(7)        // Long event counter enable
 #define        PMCR_LC                 __BIT(6)        // Long cycle counter enable
 #define        PMCR_DP                 __BIT(5)        // Disable cycle counter when event
                                                // counting is prohibited
diff -r ebde77086a75 -r ffbb661e80f9 sys/dev/tprof/tprof.c
--- a/sys/dev/tprof/tprof.c     Thu Dec 01 00:29:51 2022 +0000
+++ b/sys/dev/tprof/tprof.c     Thu Dec 01 00:32:52 2022 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tprof.c,v 1.18 2022/12/01 00:27:59 ryo Exp $   */
+/*     $NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $   */
 
 /*-
  * Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.18 2022/12/01 00:27:59 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -42,12 +42,17 @@
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/workqueue.h>
+#include <sys/xcall.h>
 
 #include <dev/tprof/tprof.h>
 #include <dev/tprof/tprof_ioctl.h>
 
 #include "ioconf.h"
 
+#ifndef TPROF_HZ
+#define TPROF_HZ       10000
+#endif
+
 /*
  * locking order:
  *     tprof_reader_lock -> tprof_lock
@@ -73,7 +78,7 @@
 } tprof_buf_t;
 #define        TPROF_BUF_BYTESIZE(sz) \
        (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
-#define        TPROF_MAX_SAMPLES_PER_BUF       10000
+#define        TPROF_MAX_SAMPLES_PER_BUF       (TPROF_HZ * 2)
 
 #define        TPROF_MAX_BUF                   100
 
@@ -85,14 +90,20 @@
 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
 
 typedef struct tprof_backend {
+       /*
+        * tprof_backend_softc_t must be passed as an argument to the interrupt
+        * handler, but since this is difficult to implement in armv7/v8. Then,
+        * tprof_backend is exposed. Additionally, softc must be placed at the
+        * beginning of struct tprof_backend.
+        */
+       tprof_backend_softc_t tb_softc;
+
        const char *tb_name;
        const tprof_backend_ops_t *tb_ops;
        LIST_ENTRY(tprof_backend) tb_list;
-       int tb_usecount;        /* S: */
 } tprof_backend_t;
 
 static kmutex_t tprof_lock;
-static bool tprof_running;             /* s: */
 static u_int tprof_nworker;            /* L: # of running worker LWPs */
 static lwp_t *tprof_owner;
 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
@@ -101,7 +112,7 @@
 static struct percpu *tprof_cpus __read_mostly;        /* tprof_cpu_t * */
 static u_int tprof_samples_per_buf;
 
-static tprof_backend_t *tprof_backend; /* S: */
+tprof_backend_t *tprof_backend;        /* S: */
 static LIST_HEAD(, tprof_backend) tprof_backends =
     LIST_HEAD_INITIALIZER(tprof_backend); /* S: */
 
@@ -193,6 +204,7 @@
 {
        tprof_cpu_t * const c = tprof_curcpu();
        tprof_buf_t *buf;
+       tprof_backend_t *tb;
        bool shouldstop;
 
        KASSERT(wk == &c->c_work);
@@ -207,7 +219,8 @@
         * and put it on the global list for read(2).
         */
        mutex_enter(&tprof_lock);
-       shouldstop = !tprof_running;
+       tb = tprof_backend;
+       shouldstop = (tb == NULL || tb->tb_softc.sc_ctr_running_mask == 0);
        if (shouldstop) {
                KASSERT(tprof_nworker > 0);
                tprof_nworker--;
@@ -283,17 +296,190 @@
 }
 
 static int
-tprof_start(const tprof_param_t *param)
+tprof_getncounters(u_int *ncounters)
+{
+       tprof_backend_t *tb;
+
+       tb = tprof_backend;
+       if (tb == NULL)
+               return ENOENT;
+
+       *ncounters = tb->tb_ops->tbo_ncounters();
+       return 0;
+}
+
+static void
+tprof_start_cpu(void *arg1, void *arg2)
+{
+       tprof_backend_t *tb = arg1;
+       tprof_countermask_t runmask = (uintptr_t)arg2;
+
+       tb->tb_ops->tbo_start(runmask);
+}
+
+static void
+tprof_stop_cpu(void *arg1, void *arg2)
+{
+       tprof_backend_t *tb = arg1;
+       tprof_countermask_t stopmask = (uintptr_t)arg2;
+
+       tb->tb_ops->tbo_stop(stopmask);
+}
+
+static int
+tprof_start(tprof_countermask_t runmask)
 {
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
+       tprof_backend_t *tb;
+       uint64_t xc;
        int error;
-       uint64_t freq;
-       tprof_backend_t *tb;
+       bool firstrun;
 
        KASSERT(mutex_owned(&tprof_startstop_lock));
-       if (tprof_running) {
-               error = EBUSY;
+
+       tb = tprof_backend;
+       if (tb == NULL) {
+               error = ENOENT;
+               goto done;
+       }
+
+       runmask &= ~tb->tb_softc.sc_ctr_running_mask;
+       runmask &= tb->tb_softc.sc_ctr_configured_mask;
+       if (runmask == 0) {
+               /*
+                * targets are already running.
+                * unconfigured counters are ignored.
+                */
+               error = 0;
+               goto done;
+       }
+
+       firstrun = (tb->tb_softc.sc_ctr_running_mask == 0);
+       if (firstrun) {
+               if (tb->tb_ops->tbo_establish != NULL) {
+                       error = tb->tb_ops->tbo_establish(&tb->tb_softc);
+                       if (error != 0)
+                               goto done;
+               }
+
+               tprof_samples_per_buf = TPROF_MAX_SAMPLES_PER_BUF;
+               error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker,
+                   NULL, PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
+               if (error != 0) {
+                       if (tb->tb_ops->tbo_disestablish != NULL)
+                               tb->tb_ops->tbo_disestablish(&tb->tb_softc);
+                       goto done;
+               }
+
+               for (CPU_INFO_FOREACH(cii, ci)) {
+                       tprof_cpu_t * const c = tprof_cpu(ci);
+                       tprof_buf_t *new;
+                       tprof_buf_t *old;
+
+                       new = tprof_buf_alloc();
+                       old = tprof_buf_switch(c, new);
+                       if (old != NULL) {
+                               tprof_buf_free(old);
+                       }
+                       callout_init(&c->c_callout, CALLOUT_MPSAFE);
+                       callout_setfunc(&c->c_callout, tprof_kick, ci);
+               }
+       }
+
+       runmask &= tb->tb_softc.sc_ctr_configured_mask;
+       xc = xc_broadcast(0, tprof_start_cpu, tb, (void *)(uintptr_t)runmask);
+       xc_wait(xc);
+       mutex_enter(&tprof_lock);
+       tb->tb_softc.sc_ctr_running_mask |= runmask;
+       mutex_exit(&tprof_lock);
+
+       if (firstrun) {
+               for (CPU_INFO_FOREACH(cii, ci)) {
+                       tprof_cpu_t * const c = tprof_cpu(ci);
+
+                       mutex_enter(&tprof_lock);
+                       tprof_nworker++;
+                       mutex_exit(&tprof_lock);
+                       workqueue_enqueue(tprof_wq, &c->c_work, ci);
+               }
+       }
+done:
+       return error;
+}
+
+static void
+tprof_stop(tprof_countermask_t stopmask)
+{
+       tprof_backend_t *tb;
+       uint64_t xc;
+
+       tb = tprof_backend;
+       if (tb == NULL)
+               return;
+
+       KASSERT(mutex_owned(&tprof_startstop_lock));
+       stopmask &= tb->tb_softc.sc_ctr_running_mask;
+       if (stopmask == 0) {
+               /* targets are not running */
+               goto done;
+       }
+
+       xc = xc_broadcast(0, tprof_stop_cpu, tb, (void *)(uintptr_t)stopmask);
+       xc_wait(xc);
+       mutex_enter(&tprof_lock);
+       tb->tb_softc.sc_ctr_running_mask &= ~stopmask;
+       mutex_exit(&tprof_lock);
+
+       /* all counters have stopped? */
+       if (tb->tb_softc.sc_ctr_running_mask == 0) {
+               mutex_enter(&tprof_lock);
+               cv_broadcast(&tprof_reader_cv);
+               while (tprof_nworker > 0) {
+                       cv_wait(&tprof_cv, &tprof_lock);
+               }
+               mutex_exit(&tprof_lock);
+
+               tprof_stop1();
+               if (tb->tb_ops->tbo_disestablish != NULL)
+                       tb->tb_ops->tbo_disestablish(&tb->tb_softc);
+       }
+done:
+       ;
+}
+
+static void
+tprof_init_percpu_counters_offset(void *vp, void *vp2, struct cpu_info *ci)
+{
+       uint64_t *counters_offset = vp;
+       u_int counter = (uintptr_t)vp2;
+
+       tprof_backend_t *tb = tprof_backend;
+       tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param;
+       counters_offset[counter] = param->p_value;
+}
+
+static void
+tprof_configure_event_cpu(void *arg1, void *arg2)
+{
+       tprof_backend_t *tb = arg1;
+       u_int counter = (uintptr_t)arg2;



Home | Main Index | Thread Index | Old Index