Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/usr.sbin/tprof Improve tprof(4)
details: https://anonhg.NetBSD.org/src/rev/ffbb661e80f9
branches: trunk
changeset: 372478:ffbb661e80f9
user: ryo <ryo%NetBSD.org@localhost>
date: Thu Dec 01 00:32:52 2022 +0000
description:
Improve tprof(4)
- Multiple events can now be handled simultaneously.
- Counters should be configured with TPROF_IOC_CONFIGURE_EVENT in advance,
instead of being configured at TPROF_IOC_START.
- The configured counters can be started and stopped repeatedly by
PROF_IOC_START/TPROF_IOC_STOP.
- The value of the performance counter can be obtained at any timing as a 64bit
value with TPROF_IOC_GETCOUNTS.
- Backend common parts are handled in tprof.c as much as possible, and functions
on the tprof_backend side have been reimplemented to be more primitive.
- The reset value of counter overflows for profiling can now be adjusted.
It is calculated by default from the CPU clock (speed of cycle counter) and
TPROF_HZ, but for some events the value may be too large to be sufficient for
profiling. The event counter can be specified as a ratio to the default or as
an absolute value when configuring the event counter.
- Due to overall changes, API and ABI have been changed. TPROF_VERSION and
TPROF_BACKEND_VERSION were updated.
diffstat:
sys/arch/aarch64/include/armreg.h | 7 +-
sys/dev/tprof/tprof.c | 452 +++++++++++++++++++++++++++++++------
sys/dev/tprof/tprof.h | 36 ++-
sys/dev/tprof/tprof_armv7.c | 260 ++++++++++++--------
sys/dev/tprof/tprof_armv8.c | 255 +++++++++++++-------
sys/dev/tprof/tprof_ioctl.h | 18 +-
sys/dev/tprof/tprof_types.h | 45 +++-
sys/dev/tprof/tprof_x86.c | 24 +-
sys/dev/tprof/tprof_x86_amd.c | 185 ++++++++++-----
sys/dev/tprof/tprof_x86_intel.c | 191 ++++++++++-----
usr.sbin/tprof/tprof.8 | 5 +-
usr.sbin/tprof/tprof.c | 65 +++-
usr.sbin/tprof/tprof_analyze.c | 44 ++-
13 files changed, 1122 insertions(+), 465 deletions(-)
diffs (truncated from 2443 to 300 lines):
diff -r ebde77086a75 -r ffbb661e80f9 sys/arch/aarch64/include/armreg.h
--- a/sys/arch/aarch64/include/armreg.h Thu Dec 01 00:29:51 2022 +0000
+++ b/sys/arch/aarch64/include/armreg.h Thu Dec 01 00:32:52 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: armreg.h,v 1.62 2022/12/01 00:29:10 ryo Exp $ */
+/* $NetBSD: armreg.h,v 1.63 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -248,6 +248,10 @@
#define ID_AA64DFR0_EL1_PMUVER_NONE 0
#define ID_AA64DFR0_EL1_PMUVER_V3 1
#define ID_AA64DFR0_EL1_PMUVER_NOV3 2
+#define ID_AA64DFR0_EL1_PMUVER_V3P1 4
+#define ID_AA64DFR0_EL1_PMUVER_V3P4 5
+#define ID_AA64DFR0_EL1_PMUVER_V3P5 6
+#define ID_AA64DFR0_EL1_PMUVER_V3P7 7
#define ID_AA64DFR0_EL1_PMUVER_IMPL 15
#define ID_AA64DFR0_EL1_TRACEVER __BITS(4,7)
#define ID_AA64DFR0_EL1_TRACEVER_NONE 0
@@ -1221,6 +1225,7 @@
#define PMCR_IMP __BITS(31,24) // Implementor code
#define PMCR_IDCODE __BITS(23,16) // Identification code
#define PMCR_N __BITS(15,11) // Number of event counters
+#define PMCR_LP __BIT(7) // Long event counter enable
#define PMCR_LC __BIT(6) // Long cycle counter enable
#define PMCR_DP __BIT(5) // Disable cycle counter when event
// counting is prohibited
diff -r ebde77086a75 -r ffbb661e80f9 sys/dev/tprof/tprof.c
--- a/sys/dev/tprof/tprof.c Thu Dec 01 00:29:51 2022 +0000
+++ b/sys/dev/tprof/tprof.c Thu Dec 01 00:32:52 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tprof.c,v 1.18 2022/12/01 00:27:59 ryo Exp $ */
+/* $NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $ */
/*-
* Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.18 2022/12/01 00:27:59 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -42,12 +42,17 @@
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/workqueue.h>
+#include <sys/xcall.h>
#include <dev/tprof/tprof.h>
#include <dev/tprof/tprof_ioctl.h>
#include "ioconf.h"
+#ifndef TPROF_HZ
+#define TPROF_HZ 10000
+#endif
+
/*
* locking order:
* tprof_reader_lock -> tprof_lock
@@ -73,7 +78,7 @@
} tprof_buf_t;
#define TPROF_BUF_BYTESIZE(sz) \
(sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
-#define TPROF_MAX_SAMPLES_PER_BUF 10000
+#define TPROF_MAX_SAMPLES_PER_BUF (TPROF_HZ * 2)
#define TPROF_MAX_BUF 100
@@ -85,14 +90,20 @@
} __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
typedef struct tprof_backend {
+ /*
+ * tprof_backend_softc_t must be passed as an argument to the interrupt
+ * handler, but since this is difficult to implement in armv7/v8. Then,
+ * tprof_backend is exposed. Additionally, softc must be placed at the
+ * beginning of struct tprof_backend.
+ */
+ tprof_backend_softc_t tb_softc;
+
const char *tb_name;
const tprof_backend_ops_t *tb_ops;
LIST_ENTRY(tprof_backend) tb_list;
- int tb_usecount; /* S: */
} tprof_backend_t;
static kmutex_t tprof_lock;
-static bool tprof_running; /* s: */
static u_int tprof_nworker; /* L: # of running worker LWPs */
static lwp_t *tprof_owner;
static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
@@ -101,7 +112,7 @@
static struct percpu *tprof_cpus __read_mostly; /* tprof_cpu_t * */
static u_int tprof_samples_per_buf;
-static tprof_backend_t *tprof_backend; /* S: */
+tprof_backend_t *tprof_backend; /* S: */
static LIST_HEAD(, tprof_backend) tprof_backends =
LIST_HEAD_INITIALIZER(tprof_backend); /* S: */
@@ -193,6 +204,7 @@
{
tprof_cpu_t * const c = tprof_curcpu();
tprof_buf_t *buf;
+ tprof_backend_t *tb;
bool shouldstop;
KASSERT(wk == &c->c_work);
@@ -207,7 +219,8 @@
* and put it on the global list for read(2).
*/
mutex_enter(&tprof_lock);
- shouldstop = !tprof_running;
+ tb = tprof_backend;
+ shouldstop = (tb == NULL || tb->tb_softc.sc_ctr_running_mask == 0);
if (shouldstop) {
KASSERT(tprof_nworker > 0);
tprof_nworker--;
@@ -283,17 +296,190 @@
}
static int
-tprof_start(const tprof_param_t *param)
+tprof_getncounters(u_int *ncounters)
+{
+ tprof_backend_t *tb;
+
+ tb = tprof_backend;
+ if (tb == NULL)
+ return ENOENT;
+
+ *ncounters = tb->tb_ops->tbo_ncounters();
+ return 0;
+}
+
+static void
+tprof_start_cpu(void *arg1, void *arg2)
+{
+ tprof_backend_t *tb = arg1;
+ tprof_countermask_t runmask = (uintptr_t)arg2;
+
+ tb->tb_ops->tbo_start(runmask);
+}
+
+static void
+tprof_stop_cpu(void *arg1, void *arg2)
+{
+ tprof_backend_t *tb = arg1;
+ tprof_countermask_t stopmask = (uintptr_t)arg2;
+
+ tb->tb_ops->tbo_stop(stopmask);
+}
+
+static int
+tprof_start(tprof_countermask_t runmask)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
+ tprof_backend_t *tb;
+ uint64_t xc;
int error;
- uint64_t freq;
- tprof_backend_t *tb;
+ bool firstrun;
KASSERT(mutex_owned(&tprof_startstop_lock));
- if (tprof_running) {
- error = EBUSY;
+
+ tb = tprof_backend;
+ if (tb == NULL) {
+ error = ENOENT;
+ goto done;
+ }
+
+ runmask &= ~tb->tb_softc.sc_ctr_running_mask;
+ runmask &= tb->tb_softc.sc_ctr_configured_mask;
+ if (runmask == 0) {
+ /*
+ * targets are already running.
+ * unconfigured counters are ignored.
+ */
+ error = 0;
+ goto done;
+ }
+
+ firstrun = (tb->tb_softc.sc_ctr_running_mask == 0);
+ if (firstrun) {
+ if (tb->tb_ops->tbo_establish != NULL) {
+ error = tb->tb_ops->tbo_establish(&tb->tb_softc);
+ if (error != 0)
+ goto done;
+ }
+
+ tprof_samples_per_buf = TPROF_MAX_SAMPLES_PER_BUF;
+ error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker,
+ NULL, PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
+ if (error != 0) {
+ if (tb->tb_ops->tbo_disestablish != NULL)
+ tb->tb_ops->tbo_disestablish(&tb->tb_softc);
+ goto done;
+ }
+
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ tprof_cpu_t * const c = tprof_cpu(ci);
+ tprof_buf_t *new;
+ tprof_buf_t *old;
+
+ new = tprof_buf_alloc();
+ old = tprof_buf_switch(c, new);
+ if (old != NULL) {
+ tprof_buf_free(old);
+ }
+ callout_init(&c->c_callout, CALLOUT_MPSAFE);
+ callout_setfunc(&c->c_callout, tprof_kick, ci);
+ }
+ }
+
+ runmask &= tb->tb_softc.sc_ctr_configured_mask;
+ xc = xc_broadcast(0, tprof_start_cpu, tb, (void *)(uintptr_t)runmask);
+ xc_wait(xc);
+ mutex_enter(&tprof_lock);
+ tb->tb_softc.sc_ctr_running_mask |= runmask;
+ mutex_exit(&tprof_lock);
+
+ if (firstrun) {
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ tprof_cpu_t * const c = tprof_cpu(ci);
+
+ mutex_enter(&tprof_lock);
+ tprof_nworker++;
+ mutex_exit(&tprof_lock);
+ workqueue_enqueue(tprof_wq, &c->c_work, ci);
+ }
+ }
+done:
+ return error;
+}
+
+static void
+tprof_stop(tprof_countermask_t stopmask)
+{
+ tprof_backend_t *tb;
+ uint64_t xc;
+
+ tb = tprof_backend;
+ if (tb == NULL)
+ return;
+
+ KASSERT(mutex_owned(&tprof_startstop_lock));
+ stopmask &= tb->tb_softc.sc_ctr_running_mask;
+ if (stopmask == 0) {
+ /* targets are not running */
+ goto done;
+ }
+
+ xc = xc_broadcast(0, tprof_stop_cpu, tb, (void *)(uintptr_t)stopmask);
+ xc_wait(xc);
+ mutex_enter(&tprof_lock);
+ tb->tb_softc.sc_ctr_running_mask &= ~stopmask;
+ mutex_exit(&tprof_lock);
+
+ /* all counters have stopped? */
+ if (tb->tb_softc.sc_ctr_running_mask == 0) {
+ mutex_enter(&tprof_lock);
+ cv_broadcast(&tprof_reader_cv);
+ while (tprof_nworker > 0) {
+ cv_wait(&tprof_cv, &tprof_lock);
+ }
+ mutex_exit(&tprof_lock);
+
+ tprof_stop1();
+ if (tb->tb_ops->tbo_disestablish != NULL)
+ tb->tb_ops->tbo_disestablish(&tb->tb_softc);
+ }
+done:
+ ;
+}
+
+static void
+tprof_init_percpu_counters_offset(void *vp, void *vp2, struct cpu_info *ci)
+{
+ uint64_t *counters_offset = vp;
+ u_int counter = (uintptr_t)vp2;
+
+ tprof_backend_t *tb = tprof_backend;
+ tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param;
+ counters_offset[counter] = param->p_value;
+}
+
+static void
+tprof_configure_event_cpu(void *arg1, void *arg2)
+{
+ tprof_backend_t *tb = arg1;
+ u_int counter = (uintptr_t)arg2;
Home |
Main Index |
Thread Index |
Old Index