Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-8]: src/sys/arch/sparc Pull up following revision(s) (requested b...



details:   https://anonhg.NetBSD.org/src/rev/ade5860c15f7
branches:  netbsd-8
changeset: 851211:ade5860c15f7
user:      msaitoh <msaitoh%NetBSD.org@localhost>
date:      Fri Dec 08 06:05:15 2017 +0000

description:
Pull up following revision(s) (requested by macallan in ticket #429):
        sys/arch/sparc/sparc/cpu.c: revision 1.250
        sys/arch/sparc/include/cpu.h: revision 1.99
        sys/arch/sparc/sparc/intr.c: revision 1.119
- return early in xcall() if the function is sparc_noop() instead of triggering
  the IPI and then ignoring responses ( or lack thereof )
- write the .tag field last to avoid a race when polling for an incoming
  IPI
- add event counters for IPIs being caught with the mutex not held, and for
  messages that are already marked as completed
With this my SS20 made it through 48 hours of pkgsrc with MAKE_JOBS=3 and a
pair of SM81s.
Hypersparcs still crash but instead of craziness we get actual error messages,
apparently one CPU will occasionally do a watchdog reset, which according to
the manual is caused by catching a trap with traps disabled. Now to figure
out how that can even happen...

diffstat:

 sys/arch/sparc/include/cpu.h |   6 ++++--
 sys/arch/sparc/sparc/cpu.c   |  24 ++++++++++++++++++------
 sys/arch/sparc/sparc/intr.c  |  29 ++++++++++++++++++++++++++---
 3 files changed, 48 insertions(+), 11 deletions(-)

diffs (178 lines):

diff -r 116ae042ca7a -r ade5860c15f7 sys/arch/sparc/include/cpu.h
--- a/sys/arch/sparc/include/cpu.h      Fri Dec 08 06:02:11 2017 +0000
+++ b/sys/arch/sparc/include/cpu.h      Fri Dec 08 06:05:15 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.h,v 1.97 2016/12/10 10:41:07 mrg Exp $ */
+/*     $NetBSD: cpu.h,v 1.97.8.1 2017/12/08 06:05:15 msaitoh Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -164,7 +164,7 @@
         * the pending register to avoid a hardware bug.
         */
 #define raise_ipi(cpi,lvl)     do {                    \
-       int x;                                          \
+       volatile int x;                                         \
        (cpi)->intreg_4m->pi_set = PINTR_SINTRLEV(lvl); \
        x = (cpi)->intreg_4m->pi_pend; __USE(x);        \
 } while (0)
@@ -333,6 +333,8 @@
        struct evcnt ci_savefpstate_null;
        struct evcnt ci_xpmsg_mutex_fail;
        struct evcnt ci_xpmsg_mutex_fail_call;
+       struct evcnt ci_xpmsg_mutex_not_held;
+       struct evcnt ci_xpmsg_bogus;
        struct evcnt ci_intrcnt[16];
        struct evcnt ci_sintrcnt[16];
 };
diff -r 116ae042ca7a -r ade5860c15f7 sys/arch/sparc/sparc/cpu.c
--- a/sys/arch/sparc/sparc/cpu.c        Fri Dec 08 06:02:11 2017 +0000
+++ b/sys/arch/sparc/sparc/cpu.c        Fri Dec 08 06:05:15 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.c,v 1.249 2017/01/18 21:33:25 macallan Exp $ */
+/*     $NetBSD: cpu.c,v 1.249.6.1 2017/12/08 06:05:15 msaitoh Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.249 2017/01/18 21:33:25 macallan Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.249.6.1 2017/12/08 06:05:15 msaitoh Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_lockdebug.h"
@@ -183,7 +183,7 @@
  * This must be locked around all message transactions to ensure only
  * one CPU is generating them.
  */
-static kmutex_t xpmsg_mutex;
+kmutex_t xpmsg_mutex;
 
 #endif /* MULTIPROCESSOR */
 
@@ -367,6 +367,10 @@
                             NULL, cpu_name(cpi), "IPI mutex_trylock fail");
        evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_fail_call, EVCNT_TYPE_MISC,
                             NULL, cpu_name(cpi), "IPI mutex_trylock fail/call");
+       evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_not_held, EVCNT_TYPE_MISC,
+                            NULL, cpu_name(cpi), "IPI with mutex not held");
+       evcnt_attach_dynamic(&cpi->ci_xpmsg_bogus, EVCNT_TYPE_MISC,
+                            NULL, cpu_name(cpi), "bogus IPI");
 
        /*
         * These are the per-cpu per-IPL hard & soft interrupt counters.
@@ -653,6 +657,8 @@
        char *bufp = errbuf;
        size_t bufsz = sizeof errbuf, wrsz;
 
+       if (is_noop) return;
+
        mybit = (1 << cpuinfo.ci_cpuid);
        callself = func && (cpuset & mybit) != 0;
        cpuset &= ~mybit;
@@ -714,7 +720,10 @@
                if ((cpuset & (1 << n)) == 0)
                        continue;
 
-               cpi->msg.tag = XPMSG_FUNC;
+               /*
+                * Write msg.tag last - if another CPU is polling above it may
+                * end up seeing an incomplete message. Not likely but still.
+                */ 
                cpi->msg.complete = 0;
                p = &cpi->msg.u.xpmsg_func;
                p->func = func;
@@ -722,6 +731,9 @@
                p->arg0 = arg0;
                p->arg1 = arg1;
                p->arg2 = arg2;
+               __insn_barrier();
+               cpi->msg.tag = XPMSG_FUNC;
+               __insn_barrier();
                /* Fast cross calls use interrupt level 14 */
                raise_ipi(cpi,13+fasttrap);/*xcall_cookie->pil*/
        }
@@ -737,7 +749,7 @@
         * have completed (bailing if it takes "too long", being loud about
         * this in the process).
         */
-       done = is_noop;
+       done = 0;
        i = 1000000;    /* time-out, not too long, but still an _AGE_ */
        while (!done) {
                if (--i < 0) {
@@ -774,7 +786,7 @@
 
        if (i >= 0 || debug_xcall == 0) {
                if (i < 0)
-                       printf_nolog("%s\n", errbuf);
+                       aprint_error("%s\n", errbuf);
                mutex_spin_exit(&xpmsg_mutex);
                return;
        }
diff -r 116ae042ca7a -r ade5860c15f7 sys/arch/sparc/sparc/intr.c
--- a/sys/arch/sparc/sparc/intr.c       Fri Dec 08 06:02:11 2017 +0000
+++ b/sys/arch/sparc/sparc/intr.c       Fri Dec 08 06:05:15 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: intr.c,v 1.118 2013/11/16 23:54:01 mrg Exp $ */
+/*     $NetBSD: intr.c,v 1.118.22.1 2017/12/08 06:05:15 msaitoh Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.118 2013/11/16 23:54:01 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.118.22.1 2017/12/08 06:05:15 msaitoh Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_sparc_arch.h"
@@ -76,6 +76,8 @@
 void *xcall_cookie;
 #endif
 
+extern kmutex_t xpmsg_mutex;
+
 void   strayintr(struct clockframe *);
 #ifdef DIAGNOSTIC
 void   bogusintr(struct clockframe *);
@@ -241,7 +243,7 @@
                        DELAY(1);
                        if (n-- > 0)
                                continue;
-                       printf("nmi_hard: SMP botch.");
+                       printf("nmi_hard: SMP botch.\n");
                        break;
                }
        }
@@ -364,6 +366,27 @@
        if (v != xcallintr)
                cpuinfo.ci_sintrcnt[13].ev_count++;
 
+       if (mutex_owned(&xpmsg_mutex) == 0) {
+               cpuinfo.ci_xpmsg_mutex_not_held.ev_count++;
+#ifdef DEBUG
+               printf("%s: mutex not held\n", __func__);
+#endif
+               cpuinfo.msg.complete = 1;
+               kpreempt_enable();
+               return;
+       }
+
+       if (cpuinfo.msg.complete != 0) {
+               cpuinfo.ci_xpmsg_bogus.ev_count++;
+#ifdef DEBUG
+               volatile struct xpmsg_func *p = &cpuinfo.msg.u.xpmsg_func;
+               printf("%s: bogus message %08x %08x %08x %08x\n", __func__,
+                   cpuinfo.msg.tag, (uint32_t)p->func, p->arg0, p->arg1);
+#endif
+               kpreempt_enable();
+               return;
+       }
+
        /* notyet - cpuinfo.msg.received = 1; */
        switch (cpuinfo.msg.tag) {
        case XPMSG_FUNC:



Home | Main Index | Thread Index | Old Index