tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Sane support for SMP kernel profiling



We've fixed SMP kernel profiling, which worked poorly at best (particularly
on systems with high HZ) since a lock was taken and released around every
single entry to mcount.  Thanks to Andy for the suggestion as to how.

The following patch includes:

        * A convenience change to build.sh to make it simpler to
          cross-build profiling kernels (-p)

        * Per-CPU profiling space

        * Changes to kgmon to fetch profiling data per-CPU rather than
          globally on MP systems.  gprof already knows how to merge
          multiple profiles so there seems little reason to include this
          functionaity in the kernel code or kgmon, both of which should
          be as small and simple as possible.

        * The MCOUNT macro is cleaned up to an inline function on
          architectures which define PROFILE_TASTEFUL_FUNCTIONS.  The idea is
          to eventually eliminate the macro.

        * The kernel and userspace mcount implementations are separated
          into two source files to clean up (some of) a really ugly #ifdef
          mess.

        * A correct, non-locking mcount implementation is included for
          i386.  Other ports still build and should be no worse than they
          were before, but also no better.

Patch is against netbsd-5.  Comments much appreciated.
                
-- 
Thor Lancelot Simon
Coyote Point Systems, Inc.                      <tls%coyotepoint.com@localhost>
Millerton, NY, USA
diff -u -r -N nb5/build.sh mpprof/build.sh
--- nb5/build.sh        2009-03-18 01:39:06.000000000 -0400
+++ mpprof/build.sh     2010-12-10 22:12:18.000000000 -0500
@@ -156,6 +156,7 @@
        runcmd=
        operations=
        removedirs=
+       profile_kernel=
 
        [ -d usr.bin/make ] || cd "$(dirname $0)"
        [ -d usr.bin/make ] ||
@@ -612,7 +613,7 @@
 
 parseoptions()
 {
-       opts='a:B:C:D:Ehj:M:m:N:nO:oR:rS:T:UuV:w:xX:Z:'
+       opts='a:B:C:D:Ehj:M:m:N:nO:opR:rS:T:UuV:w:xX:Z:'
        opt_a=no
 
        if type getopts >/dev/null 2>&1; then
@@ -712,6 +713,10 @@
                        MKOBJDIRS=no
                        ;;
 
+               -p)
+                       profile_kernel="-p"
+                       ;;
+
                -R)
                        eval ${optargcmd}; resolvepath
                        setmakeenv RELEASEDIR "${OPTARG}"
@@ -1312,7 +1317,7 @@
        fi
        [ -x "${TOOLDIR}/bin/${toolprefix}config" ] \
        || bomb "${TOOLDIR}/bin/${toolprefix}config does not exist. You need to 
\"$0 tools\" first."
-       ${runcmd} "${TOOLDIR}/bin/${toolprefix}config" -b "${kernelbuildpath}" \
+       ${runcmd} "${TOOLDIR}/bin/${toolprefix}config" ${profile_kernel} -b 
"${kernelbuildpath}" \
                -s "${TOP}/sys" "${kernelconfpath}" ||
            bomb "${toolprefix}config failed for ${kernelconf}"
        ${runcmd} cd "${kernelbuildpath}"
diff -u -r -N nb5/common/lib/libc/gmon/mcount.c 
mpprof/common/lib/libc/gmon/mcount.c
--- nb5/common/lib/libc/gmon/mcount.c   2006-10-27 18:14:13.000000000 -0400
+++ mpprof/common/lib/libc/gmon/mcount.c        2010-12-10 16:23:01.000000000 
-0500
@@ -64,88 +64,60 @@
  * SUCH DAMAGE.
  */
 
-/* If building a standalone libkern, don't include mcount. */
-#if (!defined(_KERNEL) || defined(GPROF)) && !defined(_STANDALONE)
-
-#ifdef _KERNEL_OPT
+#ifndef _KERNEL
+#include "non_kernel_mcount.c"
+#else
 #include "opt_multiprocessor.h"
-#endif
-
 #include <sys/cdefs.h>
-#if !defined(lint) && !defined(_KERNEL) && defined(LIBC_SCCS)
-#if 0
-static char sccsid[] = "@(#)mcount.c   8.1 (Berkeley) 6/4/93";
-#else
-__RCSID("$NetBSD: mcount.c,v 1.7 2006/10/27 22:14:13 uwe Exp $");
-#endif
-#endif
-
 #include <sys/param.h>
 #include <sys/gmon.h>
+#include <sys/cpu.h>
 
-#ifndef _KERNEL
-#include "reentrant.h"
-#endif
-
-#ifdef _REENTRANT
-extern thread_key_t _gmonkey;
-extern struct gmonparam _gmondummy;
-struct gmonparam *_m_gmon_alloc(void);
-#endif
+/* Warning: pmax hides some crap we need for MCOUNT_ENTER and MCOUNT_EXIT.
+ */
 
-_MCOUNT_DECL __P((u_long, u_long))
-#ifdef _KERNEL
-    __attribute__((__no_instrument_function__))
-#endif
-    __used;
+_MCOUNT_DECL(u_long frompc, u_long selfpc)
+    __attribute__((__no_instrument_function__)) __used;
 
-/*
- * mcount is called on entry to each function compiled with the profiling
- * switch set.  _mcount(), which is declared in a machine-dependent way
- * with _MCOUNT_DECL, does the actual work and is either inlined into a
- * C routine or called by an assembly stub.  In any case, this magic is
- * taken care of by the MCOUNT definition in <machine/profile.h>.
- *
- * _mcount updates data structures that represent traversals of the
- * program's call graph edges.  frompc and selfpc are the return
- * address and function address that represents the given call graph edge.
- * 
- * Note: the original BSD code used the same variable (frompcindex) for
- * both frompcindex and frompc.  Any reasonable, modern compiler will
- * perform this optimization.
+#ifndef PROFILE_mcount_enter_exit
+/* No tasteful function definition has been provided for this arch, so
+ * we will have to synthesise from the un-tasteful ones.
  */
-_MCOUNT_DECL(frompc, selfpc)   /* _mcount; may be static, inline, etc */
-       u_long frompc, selfpc;
+static inline uint32_t mcount_enter(void)
 {
-       u_short *frompcindex;
-       struct tostruct *top, *prevtop;
-       struct gmonparam *p;
-       long toindex;
-#ifdef _KERNEL
-       int s;
-#endif
+       uint32_t s;
 
-#if defined(_REENTRANT) && !defined(_KERNEL)
-       if (__isthreaded) {
-               p = thr_getspecific(_gmonkey);
-               if (p == NULL) {
-                       /* Prevent recursive calls while allocating */
-                       thr_setspecific(_gmonkey, &_gmondummy);
-                       p = _m_gmon_alloc();
-               }
-       } else
+       MCOUNT_ENTER;   /* yuk! */
+
+       return s;
+}
+
+static inline void mcount_exit(uint32_t s)
+{
+       MCOUNT_EXIT;    /* yuk! */
+}
 #endif
-               p = &_gmonparam;
-       /*
-        * check that we are profiling
-        * and that we aren't recursively invoked.
-        */
-       if (p->state != GMON_PROF_ON)
+
+int enable_mcount = 0;
+
+_MCOUNT_DECL(u_long frompc, u_long selfpc)
+{
+       u_short                 *frompcindex;
+       struct tostruct         *top, *prevtop;
+       struct gmonparam        *p;
+       struct cpu_info         *ci;
+       long                    toindex;
+       uint32_t                ef;
+       volatile struct gmonparam *vp;
+
+       if (!enable_mcount
+           || !(ci = curcpu())
+           || !( p = ci->ci_gmon)
+           || !(p->state == GMON_PROF_ON))
                return;
-#ifdef _KERNEL
-       MCOUNT_ENTER;
-#endif
-       p->state = GMON_PROF_BUSY;
+
+       vp = p;
+
        /*
         * check that frompcindex is a reasonable pc value.
         * for example: signal catchers get called from the stack,
@@ -153,19 +125,32 @@
         */
        frompc -= p->lowpc;
        if (frompc > p->textsize)
-               goto done;
+               return;
+
+       if (!(HASHFRACTION & (HASHFRACTION - 1))
+           && (p->hashfraction == HASHFRACTION)) {
+               frompcindex = &p->froms[(size_t)(frompc /
+                                        (HASHFRACTION *
+                                         sizeof (*p->froms)))];
+       } else {
+               frompcindex = &p->froms[(size_t)(frompc /
+                                       (p->hashfraction *
+                                        sizeof (*p->froms)))];
+       }
+
+       /* XXX: We may be able to do better than this as far as
+        * XXX: intra-cpu concurrency.
+        * XXX: For now, and perhaps ever, disable for the entire update.
+        */
+       ef = mcount_enter();
+
+       if (__predict_false(vp->state != GMON_PROF_ON)) {
+               mcount_exit(ef);
+               return;
+       }
 
-#if (HASHFRACTION & (HASHFRACTION - 1)) == 0
-       if (p->hashfraction == HASHFRACTION)
-               frompcindex =
-                   &p->froms[
-                   (size_t)(frompc / (HASHFRACTION * sizeof(*p->froms)))];
-       else
-#endif
-               frompcindex =
-                   &p->froms[
-                   (size_t)(frompc / (p->hashfraction * sizeof(*p->froms)))];
        toindex = *frompcindex;
+
        if (toindex == 0) {
                /*
                 *      first time traversing this arc
@@ -236,17 +221,11 @@
                
        }
 done:
-       p->state = GMON_PROF_ON;
-#ifdef _KERNEL
-       MCOUNT_EXIT;
-#endif
+       mcount_exit(ef);
        return;
 overflow:
        p->state = GMON_PROF_ERROR;
-#ifdef _KERNEL
-       MCOUNT_EXIT;
-#endif
-       return;
+       mcount_exit(ef);
 }
 
 #ifdef MCOUNT
@@ -257,4 +236,4 @@
 MCOUNT
 #endif
 
-#endif /* (!_KERNEL || GPROF) && !_STANDALONE */
+#endif
diff -u -r -N nb5/common/lib/libc/gmon/non_kernel_mcount.c 
mpprof/common/lib/libc/gmon/non_kernel_mcount.c
--- nb5/common/lib/libc/gmon/non_kernel_mcount.c        1969-12-31 
19:00:00.000000000 -0500
+++ mpprof/common/lib/libc/gmon/non_kernel_mcount.c     2010-12-10 
16:21:19.000000000 -0500
@@ -0,0 +1,260 @@
+/*     $NetBSD: mcount.c,v 1.7 2006/10/27 22:14:13 uwe Exp $   */
+
+/*
+ * Copyright (c) 2003, 2004 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Nathan J. Williams for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed for the NetBSD Project by
+ *     Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1983, 1992, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* If building a standalone libkern, don't include mcount. */
+#if (!defined(_KERNEL) || defined(GPROF)) && !defined(_STANDALONE)
+
+#ifdef _KERNEL_OPT
+#include "opt_multiprocessor.h"
+#endif
+
+#include <sys/cdefs.h>
+#if !defined(lint) && !defined(_KERNEL) && defined(LIBC_SCCS)
+#if 0
+static char sccsid[] = "@(#)mcount.c   8.1 (Berkeley) 6/4/93";
+#else
+__RCSID("$NetBSD: mcount.c,v 1.7 2006/10/27 22:14:13 uwe Exp $");
+#endif
+#endif
+
+#include <sys/param.h>
+#include <sys/gmon.h>
+
+#ifndef _KERNEL
+#include "reentrant.h"
+#endif
+
+#ifdef _REENTRANT
+extern thread_key_t _gmonkey;
+extern struct gmonparam _gmondummy;
+struct gmonparam *_m_gmon_alloc(void);
+#endif
+
+_MCOUNT_DECL __P((u_long, u_long))
+#ifdef _KERNEL
+    __attribute__((__no_instrument_function__))
+#endif
+    __used;
+
+/*
+ * mcount is called on entry to each function compiled with the profiling
+ * switch set.  _mcount(), which is declared in a machine-dependent way
+ * with _MCOUNT_DECL, does the actual work and is either inlined into a
+ * C routine or called by an assembly stub.  In any case, this magic is
+ * taken care of by the MCOUNT definition in <machine/profile.h>.
+ *
+ * _mcount updates data structures that represent traversals of the
+ * program's call graph edges.  frompc and selfpc are the return
+ * address and function address that represents the given call graph edge.
+ * 
+ * Note: the original BSD code used the same variable (frompcindex) for
+ * both frompcindex and frompc.  Any reasonable, modern compiler will
+ * perform this optimization.
+ */
+_MCOUNT_DECL(frompc, selfpc)   /* _mcount; may be static, inline, etc */
+       u_long frompc, selfpc;
+{
+       u_short *frompcindex;
+       struct tostruct *top, *prevtop;
+       struct gmonparam *p;
+       long toindex;
+#ifdef _KERNEL
+       int s;
+#endif
+
+#if defined(_REENTRANT) && !defined(_KERNEL)
+       if (__isthreaded) {
+               p = thr_getspecific(_gmonkey);
+               if (p == NULL) {
+                       /* Prevent recursive calls while allocating */
+                       thr_setspecific(_gmonkey, &_gmondummy);
+                       p = _m_gmon_alloc();
+               }
+       } else
+#endif
+               p = &_gmonparam;
+       /*
+        * check that we are profiling
+        * and that we aren't recursively invoked.
+        */
+       if (p->state != GMON_PROF_ON)
+               return;
+#ifdef _KERNEL
+       MCOUNT_ENTER;
+#endif
+       p->state = GMON_PROF_BUSY;
+       /*
+        * check that frompcindex is a reasonable pc value.
+        * for example: signal catchers get called from the stack,
+        *              not from text space.  too bad.
+        */
+       frompc -= p->lowpc;
+       if (frompc > p->textsize)
+               goto done;
+
+#if (HASHFRACTION & (HASHFRACTION - 1)) == 0
+       if (p->hashfraction == HASHFRACTION)
+               frompcindex =
+                   &p->froms[
+                   (size_t)(frompc / (HASHFRACTION * sizeof(*p->froms)))];
+       else
+#endif
+               frompcindex =
+                   &p->froms[
+                   (size_t)(frompc / (p->hashfraction * sizeof(*p->froms)))];
+       toindex = *frompcindex;
+       if (toindex == 0) {
+               /*
+                *      first time traversing this arc
+                */
+               toindex = ++p->tos[0].link;
+               if (toindex >= p->tolimit)
+                       /* halt further profiling */
+                       goto overflow;
+
+               *frompcindex = (u_short)toindex;
+               top = &p->tos[(size_t)toindex];
+               top->selfpc = selfpc;
+               top->count = 1;
+               top->link = 0;
+               goto done;
+       }
+       top = &p->tos[(size_t)toindex];
+       if (top->selfpc == selfpc) {
+               /*
+                * arc at front of chain; usual case.
+                */
+               top->count++;
+               goto done;
+       }
+       /*
+        * have to go looking down chain for it.
+        * top points to what we are looking at,
+        * prevtop points to previous top.
+        * we know it is not at the head of the chain.
+        */
+       for (; /* goto done */; ) {
+               if (top->link == 0) {
+                       /*
+                        * top is end of the chain and none of the chain
+                        * had top->selfpc == selfpc.
+                        * so we allocate a new tostruct
+                        * and link it to the head of the chain.
+                        */
+                       toindex = ++p->tos[0].link;
+                       if (toindex >= p->tolimit)
+                               goto overflow;
+
+                       top = &p->tos[(size_t)toindex];
+                       top->selfpc = selfpc;
+                       top->count = 1;
+                       top->link = *frompcindex;
+                       *frompcindex = (u_short)toindex;
+                       goto done;
+               }
+               /*
+                * otherwise, check the next arc on the chain.
+                */
+               prevtop = top;
+               top = &p->tos[top->link];
+               if (top->selfpc == selfpc) {
+                       /*
+                        * there it is.
+                        * increment its count
+                        * move it to the head of the chain.
+                        */
+                       top->count++;
+                       toindex = prevtop->link;
+                       prevtop->link = top->link;
+                       top->link = *frompcindex;
+                       *frompcindex = (u_short)toindex;
+                       goto done;
+               }
+               
+       }
+done:
+       p->state = GMON_PROF_ON;
+#ifdef _KERNEL
+       MCOUNT_EXIT;
+#endif
+       return;
+overflow:
+       p->state = GMON_PROF_ERROR;
+#ifdef _KERNEL
+       MCOUNT_EXIT;
+#endif
+       return;
+}
+
+#ifdef MCOUNT
+/*
+ * Actual definition of mcount function.  Defined in <machine/profile.h>,
+ * which is included by <sys/gmon.h>.
+ */
+MCOUNT
+#endif
+
+#endif /* (!_KERNEL || GPROF) && !_STANDALONE */
diff -u -r -N nb5/sys/arch/i386/include/profile.h 
mpprof/sys/arch/i386/include/profile.h
--- nb5/sys/arch/i386/include/profile.h 2007-12-20 18:46:13.000000000 -0500
+++ mpprof/sys/arch/i386/include/profile.h      2010-12-10 16:19:40.000000000 
-0500
@@ -83,55 +83,41 @@
 }
 
 #ifdef _KERNEL
-#ifdef MULTIPROCESSOR
-__cpu_simple_lock_t __mcount_lock;
+/* PROFILE_TASTEFUL_FUNCTIONS is on if we provide the tastefully
+ * defined functions required by mcount.c.
+ */
+#define PROFILE_mcount_enter_exit
 
-static inline void
-MCOUNT_ENTER_MP(void)
+static inline void mcount_disable_intr(void)
 {
-       __cpu_simple_lock(&__mcount_lock);
-       __insn_barrier();
+       __asm volatile("cli");
 }
 
-static inline void
-MCOUNT_EXIT_MP(void)
+static inline uint32_t mcount_read_psl(void)
 {
-       __insn_barrier();
-       __mcount_lock = __SIMPLELOCK_UNLOCKED;
+       uint32_t        ef;
+
+       __asm volatile("pushfl; popl %0" : "=r" (ef));
+
+       return ef;
 }
-#else
-#define MCOUNT_ENTER_MP()
-#define MCOUNT_EXIT_MP()
-#endif
 
-static inline void
-mcount_disable_intr(void)
+static inline void mcount_write_psl(uint32_t ef)
 {
-       __asm volatile("cli");
+       __asm volatile("pushl %0; popfl" : : "r" (ef));
 }
 
-static inline u_long
-mcount_read_psl(void)
+static inline uint32_t mcount_enter(void)
 {
-       u_long  ef;
+       uint32_t ef = mcount_read_psl();
 
-       __asm volatile("pushfl; popl %0" : "=r" (ef));
-       return (ef);
+       mcount_disable_intr();
+
+       return ef;
 }
 
-static inline void
-mcount_write_psl(u_long ef)
+static inline void mcount_exit(uint32_t ef)
 {
-       __asm volatile("pushl %0; popfl" : : "r" (ef));
+       mcount_write_psl(ef);
 }
-
-#define        MCOUNT_ENTER                                                    
\
-       s = (int)mcount_read_psl();                                     \
-       mcount_disable_intr();                                          \
-       MCOUNT_ENTER_MP();
-
-#define        MCOUNT_EXIT                                                     
\
-       MCOUNT_EXIT_MP();                                               \
-       mcount_write_psl(s);
-
 #endif /* _KERNEL */
diff -u -r -N nb5/sys/arch/powerpc/include/profile.h 
mpprof/sys/arch/powerpc/include/profile.h
--- nb5/sys/arch/powerpc/include/profile.h      2006-07-07 17:28:03.000000000 
-0400
+++ mpprof/sys/arch/powerpc/include/profile.h   2010-12-10 16:20:37.000000000 
-0500
@@ -124,8 +124,6 @@
 #ifdef _KERNEL
 #define MCOUNT_ENTER                                           \
        __asm volatile("mfmsr %0" : "=r"(s));                   \
-       if ((s & (PSL_IR | PSL_DR)) != (PSL_IR | PSL_DR))       \
-               return;         /* XXX */                       \
        s &= ~PSL_POW;                                          \
        __asm volatile("mtmsr %0" :: "r"(s & ~PSL_EE))
 
diff -u -r -N nb5/sys/kern/kern_clock.c mpprof/sys/kern/kern_clock.c
--- nb5/sys/kern/kern_clock.c   2008-10-05 17:57:20.000000000 -0400
+++ mpprof/sys/kern/kern_clock.c        2010-12-10 16:18:45.000000000 -0500
@@ -297,8 +297,9 @@
                mutex_spin_exit(&p->p_stmutex);
        } else {
 #ifdef GPROF
-               g = &_gmonparam;
-               if (g->state == GMON_PROF_ON) {
+               g = curcpu()->ci_gmon;
+
+               if (g && g->state == GMON_PROF_ON) {
                        i = CLKF_PC(frame) - g->lowpc;
                        if (i < g->textsize) {
                                i /= HISTFRACTION * sizeof(*g->kcount);
@@ -397,8 +398,10 @@
                /*
                 * Kernel statistics are just like addupc_intr, only easier.
                 */
-               g = &_gmonparam;
-               if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
+               g = ci->ci_gmon;
+
+               if (g && profsrc == PROFSRC_CLOCK &&
+                   g->state == GMON_PROF_ON) {
                        i = CLKF_PC(frame) - g->lowpc;
                        if (i < g->textsize) {
                                i /= HISTFRACTION * sizeof(*g->kcount);
diff -u -r -N nb5/sys/kern/subr_prof.c mpprof/sys/kern/subr_prof.c
--- nb5/sys/kern/subr_prof.c    2007-12-20 18:03:10.000000000 -0500
+++ mpprof/sys/kern/subr_prof.c 2010-12-10 16:17:50.000000000 -0500
@@ -60,13 +60,16 @@
 extern char kernel_text[];
 
 extern char etext[];
+extern int enable_mcount;
 
-
-void
-kmstartup(void)
+static struct gmonparam *__kmstartup(void)
+    __attribute__((__no_instrument_function__));
+static struct gmonparam *__kmstartup(void)
 {
        char *cp;
-       struct gmonparam *p = &_gmonparam;
+       struct gmonparam xot = _gmonparam;
+       struct gmonparam *p = &xot, *q;
+
        /*
         * Round lowpc and highpc to multiples of the density we're using
         * so the rest of the scaling (here and in gprof) stays in ints.
@@ -87,19 +90,48 @@
        else if (p->tolimit > MAXARCS)
                p->tolimit = MAXARCS;
        p->tossize = p->tolimit * sizeof(struct tostruct);
-       cp = (char *)malloc(p->kcountsize + p->fromssize + p->tossize,
-           M_GPROF, M_NOWAIT | M_ZERO);
-       if (cp == 0) {
+
+       q = malloc(sizeof (*p) + p->kcountsize + p->fromssize + p->tossize,
+                  M_GPROF, M_NOWAIT | M_ZERO);
+       if (!q) {
                printf("No memory for profiling.\n");
-               return;
+               return 0;
+       }
+       *q = *p;
+       cp = (char*)(q+1);
+
+       q->tos = (struct tostruct *)cp;
+       cp += q->tossize;
+       q->kcount = (u_short *)cp;
+       cp += q->kcountsize;
+       q->froms = (u_short *)cp;
+
+       return q;
+}
+
+void kmstartup(void)  __attribute__((__no_instrument_function__));
+void kmstartup(void)
+{
+       CPU_INFO_ITERATOR cii;
+       struct cpu_info *ci;
+
+       for (CPU_INFO_FOREACH(cii, ci)) {
+               if (ci->ci_gmon)        // XXX: wtf?
+                       continue;
+
+               /* OK, the loop invariants in __kmstartup() should, perhaps
+                * be pulled up.  This is left as an exercise for the reader.
+                * Hey, that might be me I'm referring to.
+                */
+               ci->ci_gmon = __kmstartup();
+
+               if (!ci->ci_gmon)
+                       break;
        }
-       p->tos = (struct tostruct *)cp;
-       cp += p->tossize;
-       p->kcount = (u_short *)cp;
-       cp += p->kcountsize;
-       p->froms = (u_short *)cp;
 }
 
+int sysctl_kern_profiling_cpu = 0;
+
 /*
  * Return kernel profiling information.
  */
@@ -113,6 +145,20 @@
        struct gmonparam *gp = &_gmonparam;
        int error;
        struct sysctlnode node;
+       CPU_INFO_ITERATOR cii;
+       struct cpu_info *ci;
+
+       /* Select the appropriate gmonparam buf.
+        * Absent one matching the sysctl_kern_profiling_cpu,
+        * we use the global template one.
+        */
+       for (CPU_INFO_FOREACH(cii, ci)) {
+               if (ci->ci_index == sysctl_kern_profiling_cpu
+                   && ci->ci_gmon) {
+                       gp = ci->ci_gmon;
+                       break;
+               }
+       }
 
        node = *rnode;
 
@@ -136,6 +182,9 @@
                node.sysctl_data = gp;
                node.sysctl_size = sizeof(*gp);
                break;
+       case GPROF_CPU:
+               node.sysctl_data = &sysctl_kern_profiling_cpu;
+               break;
        default:
                return (EOPNOTSUPP);
        }
@@ -144,12 +193,35 @@
        if (error || newp == NULL)
                return (error);
 
+       if (node.sysctl_num == GPROF_CPU) {
+               if (sysctl_kern_profiling_cpu > ncpuonline) {
+                       sysctl_kern_profiling_cpu = ncpuonline;
+               }
+               if (sysctl_kern_profiling_cpu < 0) {
+                       sysctl_kern_profiling_cpu = 0;
+               }
+       }
+
        if (node.sysctl_num == GPROF_STATE) {
+               int on = 0;
+               
                mutex_spin_enter(&proc0.p_stmutex);
-               if (gp->state == GMON_PROF_OFF)
+
+               for (CPU_INFO_FOREACH(cii, ci)) {
+                       if (ci->ci_index == sysctl_kern_profiling_cpu
+                           && (gp = ci->ci_gmon)
+                           && gp->state != GMON_PROF_OFF) {
+                               ++on;
+                       }
+               }
+               if (!on) {
+                       enable_mcount = 0;
                        stopprofclock(&proc0);
-               else
+               } else {
+                       enable_mcount = 1;
                        startprofclock(&proc0);
+               }
+
                mutex_spin_exit(&proc0.p_stmutex);
        }
 
@@ -158,7 +230,6 @@
 
 SYSCTL_SETUP(sysctl_kern_gprof_setup, "sysctl kern.profiling subtree setup")
 {
-
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "kern", NULL,
@@ -173,6 +244,12 @@
 
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+                      CTLTYPE_INT, "cpu",
+                      SYSCTL_DESCR("CPU selector"),
+                      sysctl_kern_profiling, 0, NULL, 0,
+                      CTL_KERN, KERN_PROF, GPROF_CPU, CTL_EOL);
+       sysctl_createv(clog, 0, NULL, NULL,
+                      CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "state",
                       SYSCTL_DESCR("Profiling state"),
                       sysctl_kern_profiling, 0, NULL, 0,
@@ -215,7 +292,8 @@
  */
 /* ARGSUSED */
 int
-sys_profil(struct lwp *l, const struct sys_profil_args *uap, register_t 
*retval)
+sys_profil(struct lwp *l, const struct sys_profil_args *uap,
+          register_t *retval)
 {
        /* {
                syscallarg(char *) samples;
diff -u -r -N nb5/sys/sys/cpu_data.h mpprof/sys/sys/cpu_data.h
--- nb5/sys/sys/cpu_data.h      2008-06-03 11:50:22.000000000 -0400
+++ mpprof/sys/sys/cpu_data.h   2010-12-10 16:17:24.000000000 -0500
@@ -77,6 +77,7 @@
        lwp_t           *cpu_idlelwp;           /* idle lwp */
        void            *cpu_lockstat;          /* lockstat private tables */
        u_int           cpu_index;              /* CPU index */
+       struct gmonparam *cpu_gmon;             /* gmon buffer */
        u_int           cpu_biglock_count;      /* # recursive holds */
        u_int           cpu_spin_locks;         /* # of spinlockmgr locks */
        u_int           cpu_simple_locks;       /* # of simple locks held */
@@ -101,6 +102,7 @@
 /* compat definitions */
 #define        ci_schedstate           ci_data.cpu_schedstate
 #define        ci_index                ci_data.cpu_index
+#define        ci_gmon                 ci_data.cpu_gmon
 #define        ci_biglock_count        ci_data.cpu_biglock_count
 #define        ci_biglock_wanted       ci_data.cpu_biglock_wanted
 #define        ci_spin_locks           ci_data.cpu_spin_locks
diff -u -r -N nb5/sys/sys/gmon.h mpprof/sys/sys/gmon.h
--- nb5/sys/sys/gmon.h  2006-10-04 10:22:58.000000000 -0400
+++ mpprof/sys/sys/gmon.h       2010-12-10 16:15:49.000000000 -0500
@@ -150,4 +150,5 @@
 #define        GPROF_FROMS     2       /* struct: from location hash bucket */
 #define        GPROF_TOS       3       /* struct: destination/count structure 
*/
 #define        GPROF_GMONPARAM 4       /* struct: profiling parameters (see 
above) */
+#define GPROF_CPU      5       /* int: specify CPU */
 #endif /* !_SYS_GMON_H_ */
diff -u -r -N nb5/usr.sbin/kgmon/kgmon.8 mpprof/usr.sbin/kgmon/kgmon.8
--- nb5/usr.sbin/kgmon/kgmon.8  2005-06-20 09:25:26.000000000 -0400
+++ mpprof/usr.sbin/kgmon/kgmon.8       2010-12-10 16:14:46.000000000 -0500
@@ -38,11 +38,13 @@
 .Sh SYNOPSIS
 .Nm
 .Op Fl bdhpr
+.Op Fl c Ar CPU
 .Op Fl M Ar core
 .Op Fl N Ar system
 .Sh DESCRIPTION
 .Nm
 is a tool used when profiling the operating system.
+For MP systems, each CPU is profiled individually.
 When no arguments are supplied,
 .Nm
 indicates the state of operating system profiling as running,
@@ -62,6 +64,8 @@
 .Bl -tag -width Ds
 .It Fl b
 Resume the collection of profile data.
+.It Fl c
+Specify CPU.
 .It Fl d
 Enable debug output.
 .It Fl h
@@ -87,6 +91,12 @@
 file is generated before the buffers are reset.
 .El
 .Pp
+.Nm
+operates on one CPU at a time, with the CPU selection being retained
+by the system.  Thus the
+.Op Fl bdhpr
+options refer to the last CPU selection.
+.Pp
 If neither
 .Fl b
 nor
diff -u -r -N nb5/usr.sbin/kgmon/kgmon.c mpprof/usr.sbin/kgmon/kgmon.c
--- nb5/usr.sbin/kgmon/kgmon.c  2008-07-21 09:36:58.000000000 -0400
+++ mpprof/usr.sbin/kgmon/kgmon.c       2010-12-10 16:14:46.000000000 -0500
@@ -64,6 +64,8 @@
        { "__gmonparam", 0, 0, 0, 0 },
 #define        N_PROFHZ        1
        { "_profhz", 0, 0, 0, 0 },
+#define N_PROFILING_CPU                2
+       { "_sysctl_kern_profiling_cpu", 0,0,0,0 },
        { 0, 0, 0, 0, 0 }
 };
 
@@ -75,12 +77,14 @@
 static int     bflag, hflag, kflag, rflag, pflag;
 static int     debug = 0;
 static void    setprof(struct kvmvars *kvp, int state);
-static void    dumpstate(struct kvmvars *kvp);
+static void    dumpstate(struct kvmvars *kvp, int cpu);
 static void    reset(struct kvmvars *kvp);
 static int     openfiles(char *, char *, struct kvmvars *);
 static int     getprof(struct kvmvars *);
 static void    kern_readonly(int);
 static int     getprofhz(struct kvmvars *);
+static int     getcpu(struct kvmvars *);
+static void    setcpu(struct kvmvars *, int);
 
 int
 main(int argc, char **argv)
@@ -88,12 +92,13 @@
        int ch, mode, disp, accessmode;
        struct kvmvars kvmvars;
        char *sys, *kmemf;
+       int     cpu = ~0;
 
        setprogname(argv[0]);
        (void)seteuid(getuid());
        kmemf = NULL;
        sys = NULL;
-       while ((ch = getopt(argc, argv, "M:N:bdhpr")) != -1) {
+       while ((ch = getopt(argc, argv, "M:N:bdhprc:")) != -1) {
                switch((char)ch) {
 
                case 'M':
@@ -105,6 +110,10 @@
                        sys = optarg;
                        break;
 
+               case 'c':
+                       cpu = atoi(optarg);
+                       break;
+
                case 'b':
                        bflag = 1;
                        break;
@@ -127,7 +136,8 @@
 
                default:
                        (void)fprintf(stderr,
-                           "usage: %s [-bdhrp] [-M core] [-N system]\n",
+                           "usage: %s [-bdhrp] "
+                           "[-c cpu] [-M core] [-N system]\n",
                            getprogname());
                        exit(1);
                }
@@ -146,6 +156,11 @@
        }
 #endif
        accessmode = openfiles(sys, kmemf, &kvmvars);
+       if (cpu != ~0)
+               setcpu(&kvmvars, cpu);
+       else
+               cpu = getcpu(&kvmvars);
+
        mode = getprof(&kvmvars);
        if (hflag)
                disp = GMON_PROF_OFF;
@@ -154,13 +169,14 @@
        else
                disp = mode;
        if (pflag)
-               dumpstate(&kvmvars);
+               dumpstate(&kvmvars, cpu);
        if (rflag)
                reset(&kvmvars);
        if (accessmode == O_RDWR)
                setprof(&kvmvars, disp);
-       (void)fprintf(stdout, "%s: kernel profiling is %s.\n",
-            getprogname(), disp == GMON_PROF_OFF ? "off" : "running");
+       (void)fprintf(stdout, "%s: kernel profiling is %s for cpu %d.\n",
+                     getprogname(), disp == GMON_PROF_OFF ? "off" : "running",
+                     cpu);
        return (0);
 }
 
@@ -280,19 +296,68 @@
                        return;
                }
                (void)seteuid(getuid());
-       } else if (kvm_write(kvp->kd, (u_long)&p->state, (void *)&state, sz) 
-           == sz)
+       } else if (kvm_write(kvp->kd, (u_long)&p->state,
+                            (void *)&state, sz) == sz)
                return;
 bad:
        warnx("cannot turn profiling %s", state == GMON_PROF_OFF ?
            "off" : "on");
 }
 
+static int getcpu(struct kvmvars *kvp)
+{
+       int *p = (int*)nl[N_PROFILING_CPU].n_value;
+       int mib[3], cpu;
+       size_t sz;
+
+       sz = sizeof (*p);
+       if (!kflag) {
+               mib[0] = CTL_KERN;
+               mib[1] = KERN_PROF;
+               mib[2] = GPROF_CPU;
+               if (sysctl(mib, 3, &cpu, &sz, NULL, 0) == 0)
+                       return cpu;
+       } else if (kvm_read(kvp->kd, (u_long)p, (void *)&cpu, sz) == sz)
+               return cpu;
+
+       errx(EXIT_FAILURE, "cannot set profiling cpu to %d: %s"
+            , cpu
+            , kflag ? strerror(errno) : kvm_geterr(kvp->kd));
+}
+
+static void setcpu(struct kvmvars *kvp, int cpu)
+{
+       int *p = (int*)nl[N_PROFILING_CPU].n_value;
+       int mib[3], oldval;
+       size_t sz;
+
+       sz = sizeof (*p);
+       if (!kflag) {
+               mib[0] = CTL_KERN;
+               mib[1] = KERN_PROF;
+               mib[2] = GPROF_CPU;
+               if (sysctl(mib, 3, &oldval, &sz, NULL, 0) < 0)
+                       goto bad;
+               if (oldval == cpu)
+                       return;
+               (void)seteuid(0);
+               if (sysctl(mib, 3, NULL, NULL, &cpu, sz) >= 0) {
+                       (void)seteuid(getuid());
+                       return;
+               }
+               (void)seteuid(getuid());
+       } else if (kvm_write(kvp->kd, (u_long)p, (void *)&cpu, sz) 
+                  == sz)
+               return;
+bad:
+       warnx("cannot set profiling cpu to %d", cpu);
+}
+
 /*
  * Build the gmon.out file.
  */
 static void
-dumpstate(struct kvmvars *kvp)
+dumpstate(struct kvmvars *kvp, int cpu)
 {
        FILE *fp;
        struct rawarc rawarc;
@@ -304,11 +369,14 @@
        struct gmonhdr h;
        int fromindex, endfrom, toindex;
        size_t kcountsize;
+       char fname_buf[80];
+
+       snprintf(fname_buf, sizeof (fname_buf), "gmon-%d.out", cpu);
 
        setprof(kvp, GMON_PROF_OFF);
-       fp = fopen("gmon.out", "w");
+       fp = fopen(fname_buf, "w");
        if (fp == NULL) {
-               warn("cannot open `gmon.out'");
+               warn("cannot open `%s'", fname_buf);
                return;
        }
 
@@ -399,7 +467,8 @@
                   toindex = tos[toindex].link) {
                        if (debug)
                            (void)fprintf(stderr,
-                           "%s: [mcleanup] frompc 0x%lx selfpc 0x%lx count 
%ld\n",
+                           "%s: [mcleanup] frompc 0x%lx selfpc 0x%lx "
+                           "count %ld\n",
                            getprogname(), frompc, tos[toindex].selfpc,
                            tos[toindex].count);
                        rawarc.raw_frompc = frompc;
@@ -472,7 +541,8 @@
                             kvm_geterr(kvp->kd));
                if (kvm_write(kvp->kd, (u_long)kvp->gpm.tos, zbuf,
                    (size_t)kvp->gpm.tossize) != kvp->gpm.tossize)
-                       errx(EXIT_FAILURE, "tos zero: %s", kvm_geterr(kvp->kd));
+                       errx(EXIT_FAILURE, "tos zero: %s",
+                            kvm_geterr(kvp->kd));
                free(zbuf);
                return;
        }


Home | Main Index | Thread Index | Old Index