Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.sbin/kgmon Improved the performance of kernel profiling ...



details:   https://anonhg.NetBSD.org/src/rev/728a38197385
branches:  trunk
changeset: 1022925:728a38197385
user:      ryo <ryo%NetBSD.org@localhost>
date:      Sat Aug 14 17:51:18 2021 +0000

description:
Improved the performance of kernel profiling on MULTIPROCESSOR, and possible to get profiling data for each CPU.

In the current implementation, locks are acquired at the entrance of the mcount
internal function, so the higher the number of cores, the more lock conflict
occurs, making profiling performance in a MULTIPROCESSOR environment unusable
and slow. Profiling buffers has been changed to be reserved for each CPU,
improving profiling performance in MP by several to several dozen times.

- Eliminated cpu_simple_lock in mcount internal function, using per-CPU buffers.
- Add ci_gmon member to struct cpu_info of each MP arch.
- Add kern.profiling.percpu node in sysctl tree.
- Add new -c <cpuid> option to kgmon(8) to specify the cpuid, like openbsd.
  For compatibility, if the -c option is not specified, the entire system can be
  operated as before, and the -p option will get the total profiling data for
  all CPUs.

diffstat:

 common/lib/libc/gmon/mcount.c  |  126 +++++++++++++++--
 sys/arch/aarch64/include/cpu.h |    6 +-
 sys/arch/alpha/include/cpu.h   |    6 +-
 sys/arch/arm/include/cpu.h     |    7 +-
 sys/arch/hppa/include/cpu.h    |    7 +-
 sys/arch/mips/include/cpu.h    |    6 +-
 sys/arch/or1k/include/cpu.h    |    5 +-
 sys/arch/powerpc/include/cpu.h |    6 +-
 sys/arch/riscv/include/cpu.h   |    5 +-
 sys/arch/sparc/include/cpu.h   |    7 +-
 sys/arch/sparc64/include/cpu.h |    7 +-
 sys/arch/vax/include/cpu.h     |    6 +-
 sys/arch/x86/include/cpu.h     |    6 +-
 sys/kern/kern_clock.c          |   11 +-
 sys/kern/subr_prof.c           |  284 +++++++++++++++++++++++++++++++++++++++-
 sys/sys/gmon.h                 |    3 +-
 usr.sbin/kgmon/kgmon.8         |   23 +++-
 usr.sbin/kgmon/kgmon.c         |  188 ++++++++++++++++++--------
 18 files changed, 608 insertions(+), 101 deletions(-)

diffs (truncated from 1322 to 300 lines):

diff -r f0b817f4c070 -r 728a38197385 common/lib/libc/gmon/mcount.c
--- a/common/lib/libc/gmon/mcount.c     Sat Aug 14 17:38:44 2021 +0000
+++ b/common/lib/libc/gmon/mcount.c     Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: mcount.c,v 1.15 2021/08/14 17:38:44 ryo Exp $  */
+/*     $NetBSD: mcount.c,v 1.16 2021/08/14 17:51:18 ryo Exp $  */
 
 /*
  * Copyright (c) 2003, 2004 Wasabi Systems, Inc.
@@ -76,13 +76,14 @@
 #if 0
 static char sccsid[] = "@(#)mcount.c   8.1 (Berkeley) 6/4/93";
 #else
-__RCSID("$NetBSD: mcount.c,v 1.15 2021/08/14 17:38:44 ryo Exp $");
+__RCSID("$NetBSD: mcount.c,v 1.16 2021/08/14 17:51:18 ryo Exp $");
 #endif
 #endif
 
 #include <sys/param.h>
 #include <sys/gmon.h>
 #include <sys/lock.h>
+#include <sys/proc.h>
 
 #ifndef _KERNEL
 #include "reentrant.h"
@@ -94,10 +95,6 @@
 struct gmonparam *_m_gmon_alloc(void);
 #endif
 
-#if defined(_KERNEL) && !defined(_RUMPKERNEL) && defined(MULTIPROCESSOR)
-__cpu_simple_lock_t __mcount_lock;
-#endif
-
 #ifndef __LINT__
 _MCOUNT_DECL(u_long, u_long)
 #ifdef _KERNEL
@@ -168,8 +165,11 @@
 #if defined(_KERNEL) && !defined(_RUMPKERNEL)
        MCOUNT_ENTER;
 #ifdef MULTIPROCESSOR
-       __cpu_simple_lock(&__mcount_lock);
-       __insn_barrier();
+       p = curcpu()->ci_gmon;
+       if (p == NULL || p->state != GMON_PROF_ON) {
+               MCOUNT_EXIT;
+               return;
+       }
 #endif
 #endif
        p->state = GMON_PROF_BUSY;
@@ -264,10 +264,6 @@
 done:
        p->state = GMON_PROF_ON;
 #if defined(_KERNEL) && !defined(_RUMPKERNEL)
-#ifdef MULTIPROCESSOR
-       __insn_barrier();
-       __cpu_simple_unlock(&__mcount_lock);
-#endif
        MCOUNT_EXIT;
 #endif
        return;
@@ -275,10 +271,6 @@
 overflow:
        p->state = GMON_PROF_ERROR;
 #if defined(_KERNEL) && !defined(_RUMPKERNEL)
-#ifdef MULTIPROCESSOR
-       __insn_barrier();
-       __cpu_simple_unlock(&__mcount_lock);
-#endif
        MCOUNT_EXIT;
 #endif
        return;
@@ -293,4 +285,106 @@
 MCOUNT
 #endif
 
+#if defined(_KERNEL) && !defined(_RUMPKERNEL) && defined(MULTIPROCESSOR)
+void _gmonparam_merge(struct gmonparam *, struct gmonparam *);
+
+void
+_gmonparam_merge(struct gmonparam *p, struct gmonparam *q)
+{
+       u_long fromindex;
+       u_short *frompcindex, qtoindex, toindex;
+       u_long selfpc;
+       u_long endfrom;
+       long count;
+       struct tostruct *top;
+       int i;
+
+       count = q->kcountsize / sizeof(*q->kcount);
+       for (i = 0; i < count; i++)
+               p->kcount[i] += q->kcount[i];
+
+       endfrom = (q->fromssize / sizeof(*q->froms));
+       for (fromindex = 0; fromindex < endfrom; fromindex++) {
+               if (q->froms[fromindex] == 0)
+                       continue;
+               for (qtoindex = q->froms[fromindex]; qtoindex != 0;
+                    qtoindex = q->tos[qtoindex].link) {
+                       selfpc = q->tos[qtoindex].selfpc;
+                       count = q->tos[qtoindex].count;
+                       /* cribbed from mcount */
+                       frompcindex = &p->froms[fromindex];
+                       toindex = *frompcindex;
+                       if (toindex == 0) {
+                               /*
+                                * first time traversing this arc
+                                */
+                               toindex = ++p->tos[0].link;
+                               if (toindex >= p->tolimit)
+                                       /* halt further profiling */
+                                       goto overflow;
+
+                               *frompcindex = (u_short)toindex;
+                               top = &p->tos[(size_t)toindex];
+                               top->selfpc = selfpc;
+                               top->count = count;
+                               top->link = 0;
+                               goto done;
+                       }
+                       top = &p->tos[(size_t)toindex];
+                       if (top->selfpc == selfpc) {
+                               /*
+                                * arc at front of chain; usual case.
+                                */
+                               top->count+= count;
+                               goto done;
+                       }
+                       /*
+                        * have to go looking down chain for it.
+                        * top points to what we are looking at,
+                        * we know it is not at the head of the chain.
+                        */
+                       for (; /* goto done */; ) {
+                               if (top->link == 0) {
+                                       /*
+                                        * top is end of the chain and
+                                        * none of the chain had
+                                        * top->selfpc == selfpc.  so
+                                        * we allocate a new tostruct
+                                        * and link it to the head of
+                                        * the chain.
+                                        */
+                                       toindex = ++p->tos[0].link;
+                                       if (toindex >= p->tolimit)
+                                               goto overflow;
+
+                                       top = &p->tos[(size_t)toindex];
+                                       top->selfpc = selfpc;
+                                       top->count = count;
+                                       top->link = *frompcindex;
+                                       *frompcindex = (u_short)toindex;
+                                       goto done;
+                               }
+                               /*
+                                * otherwise, check the next arc on the chain.
+                                */
+                               top = &p->tos[top->link];
+                               if (top->selfpc == selfpc) {
+                                       /*
+                                        * there it is.
+                                        * add to its count.
+                                        */
+                                       top->count += count;
+                                       goto done;
+                               }
+                       }
+
+               done: ;
+               }
+
+       }
+ overflow: ;
+
+}
+#endif
+
 #endif /* (!_KERNEL || GPROF) && !_STANDALONE */
diff -r f0b817f4c070 -r 728a38197385 sys/arch/aarch64/include/cpu.h
--- a/sys/arch/aarch64/include/cpu.h    Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/aarch64/include/cpu.h    Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.37 2021/08/08 19:28:08 skrll Exp $ */
+/* $NetBSD: cpu.h,v 1.38 2021/08/14 17:51:18 ryo Exp $ */
 
 /*-
  * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
@@ -37,6 +37,7 @@
 #ifdef __aarch64__
 
 #ifdef _KERNEL_OPT
+#include "opt_gprof.h"
 #include "opt_multiprocessor.h"
 #endif
 
@@ -133,6 +134,9 @@
        struct aarch64_cache_info *ci_cacheinfo;
        struct aarch64_cpufuncs ci_cpufuncs;
 
+#if defined(GPROF) && defined(MULTIPROCESSOR)
+       struct gmonparam *ci_gmon;      /* MI per-cpu GPROF */
+#endif
 } __aligned(COHERENCY_UNIT);
 
 #ifdef _KERNEL
diff -r f0b817f4c070 -r 728a38197385 sys/arch/alpha/include/cpu.h
--- a/sys/arch/alpha/include/cpu.h      Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/alpha/include/cpu.h      Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.103 2021/07/22 01:39:18 thorpej Exp $ */
+/* $NetBSD: cpu.h,v 1.104 2021/08/14 17:51:18 ryo Exp $ */
 
 /*-
  * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
@@ -72,6 +72,7 @@
 #define _ALPHA_CPU_H_
 
 #if defined(_KERNEL_OPT)
+#include "opt_gprof.h"
 #include "opt_multiprocessor.h"
 #include "opt_lockdebug.h"
 #endif
@@ -140,6 +141,9 @@
        uint64_t ci_pcc_freq;           /* cpu cycles/second */
        struct trapframe *ci_db_regs;   /* registers for debuggers */
        u_int   ci_nintrhand;           /* # of interrupt handlers */
+#if defined(GPROF) && defined(MULTIPROCESSOR)
+       struct gmonparam *ci_gmon;      /* [MI] per-cpu GPROF */
+#endif
 };
 
 /* Ensure some cpu_info fields are within the signed 16-bit displacement. */
diff -r f0b817f4c070 -r 728a38197385 sys/arch/arm/include/cpu.h
--- a/sys/arch/arm/include/cpu.h        Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/arm/include/cpu.h        Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.h,v 1.118 2021/08/08 19:28:08 skrll Exp $  */
+/*     $NetBSD: cpu.h,v 1.119 2021/08/14 17:51:18 ryo Exp $    */
 
 /*
  * Copyright (c) 1994-1996 Mark Brinicombe.
@@ -92,6 +92,7 @@
  */
 
 #if !defined(_MODULE) && defined(_KERNEL_OPT)
+#include "opt_gprof.h"
 #include "opt_multiprocessor.h"
 #include "opt_cpuoptions.h"
 #include "opt_lockdebug.h"
@@ -223,6 +224,10 @@
 
        struct arm_cache_info *
                        ci_cacheinfo;
+
+#if defined(GPROF) && defined(MULTIPROCESSOR)
+       struct gmonparam *ci_gmon;      /* MI per-cpu GPROF */
+#endif
 };
 
 extern struct cpu_info cpu_info_store[];
diff -r f0b817f4c070 -r 728a38197385 sys/arch/hppa/include/cpu.h
--- a/sys/arch/hppa/include/cpu.h       Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/hppa/include/cpu.h       Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.h,v 1.10 2020/04/16 09:28:52 skrll Exp $   */
+/*     $NetBSD: cpu.h,v 1.11 2021/08/14 17:51:19 ryo Exp $     */
 
 /*     $OpenBSD: cpu.h,v 1.55 2008/07/23 17:39:35 kettenis Exp $       */
 
@@ -55,6 +55,7 @@
 
 #ifdef _KERNEL_OPT
 #include "opt_cputype.h"
+#include "opt_gprof.h"
 #include "opt_multiprocessor.h"
 #endif
 
@@ -300,7 +301,9 @@
 
        struct cpu_softc *ci_softc;
 #endif
-
+#if defined(GPROF) && defined(MULTIPROCESSOR)
+       struct gmonparam *ci_gmon;      /* MI per-cpu GPROF */
+#endif
 #endif /* !_KMEMUSER */
 } __aligned(64);
 
diff -r f0b817f4c070 -r 728a38197385 sys/arch/mips/include/cpu.h
--- a/sys/arch/mips/include/cpu.h       Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/mips/include/cpu.h       Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.h,v 1.132 2021/03/29 01:47:45 simonb Exp $ */
+/*     $NetBSD: cpu.h,v 1.133 2021/08/14 17:51:19 ryo Exp $    */
 
 /*-
  * Copyright (c) 1992, 1993
@@ -49,6 +49,7 @@
 



Home | Main Index | Thread Index | Old Index