Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/sparc64/sparc64 port the corrected ECC error handli...



details:   https://anonhg.NetBSD.org/src/rev/033954d63169
branches:  trunk
changeset: 778154:033954d63169
user:      mrg <mrg%NetBSD.org@localhost>
date:      Sat Mar 17 22:19:53 2012 +0000

description:
port the corrected ECC error handling code from freebsd.  i noticed my U10
took one of these and then hang.  it shouldn't hang, there's a 'sir' here that
doesn't seem to reset properly.  oh well.

tested by simulated a trap via 'ta 0x10' and considering that the same, but
it hasn't been tested against a real ECC error yet.

diffstat:

 sys/arch/sparc64/sparc64/locore.s |   6 ++--
 sys/arch/sparc64/sparc64/trap.c   |  58 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 59 insertions(+), 5 deletions(-)

diffs (131 lines):

diff -r a731776ca44f -r 033954d63169 sys/arch/sparc64/sparc64/locore.s
--- a/sys/arch/sparc64/sparc64/locore.s Sat Mar 17 22:10:51 2012 +0000
+++ b/sys/arch/sparc64/sparc64/locore.s Sat Mar 17 22:19:53 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.s,v 1.340 2012/03/03 03:17:32 nakayama Exp $    */
+/*     $NetBSD: locore.s,v 1.341 2012/03/17 22:19:53 mrg Exp $ */
 
 /*
  * Copyright (c) 2006-2010 Matthew R. Green
@@ -491,7 +491,7 @@
        VTRAP(0x060, interrupt_vector); ! 060 = interrupt vector
        TRAP(T_PA_WATCHPT)              ! 061 = physical address data watchpoint
        TRAP(T_VA_WATCHPT)              ! 062 = virtual address data watchpoint
-       UTRAP(T_ECCERR)                 ! We'll implement this one later
+       TRAP(T_ECCERR)                  ! 063 = corrected ECC error
 ufast_IMMU_miss:                       ! 064 = fast instr access MMU miss
        ldxa    [%g0] ASI_IMMU_8KPTR, %g2 ! Load IMMU 8K TSB pointer
 #ifdef NO_TSB
@@ -727,7 +727,7 @@
        VTRAP(0x060, interrupt_vector); ! 060 = interrupt vector
        TRAP(T_PA_WATCHPT)              ! 061 = physical address data watchpoint
        TRAP(T_VA_WATCHPT)              ! 062 = virtual address data watchpoint
-       UTRAP(T_ECCERR)                 ! We'll implement this one later
+       TRAP(T_ECCERR)                  ! 063 = corrected ECC error
 kfast_IMMU_miss:                       ! 064 = fast instr access MMU miss
        ldxa    [%g0] ASI_IMMU_8KPTR, %g2 ! Load IMMU 8K TSB pointer
 #ifdef NO_TSB
diff -r a731776ca44f -r 033954d63169 sys/arch/sparc64/sparc64/trap.c
--- a/sys/arch/sparc64/sparc64/trap.c   Sat Mar 17 22:10:51 2012 +0000
+++ b/sys/arch/sparc64/sparc64/trap.c   Sat Mar 17 22:19:53 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: trap.c,v 1.169 2012/02/19 21:06:31 rmind Exp $ */
+/*     $NetBSD: trap.c,v 1.170 2012/03/17 22:19:53 mrg Exp $ */
 
 /*
  * Copyright (c) 1996-2002 Eduardo Horvath.  All rights reserved.
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.169 2012/02/19 21:06:31 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.170 2012/03/17 22:19:53 mrg Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -92,6 +92,8 @@
 #include <machine/svr4_32_machdep.h>
 #endif
 
+#include <sparc64/sparc64/cache.h>
+
 #include <sparc/fpu/fpu_extern.h>
 
 #ifndef offsetof
@@ -369,6 +371,7 @@
        u_long sfsr);
 void text_access_error(struct trapframe64 *, unsigned int, vaddr_t, u_long,
        vaddr_t, u_long);
+void ecc_corrected_error(unsigned int type, vaddr_t pc);
 
 #ifdef DEBUG
 void print_trapframe(struct trapframe64 *);
@@ -538,6 +541,9 @@
                        /* Enable the FPU */
                        tf->tf_tstate |= TSTATE_PEF;
                        return;
+               } else if (type == T_ECCERR) {
+                       ecc_corrected_error(type, pc);
+                       return;
                }
                goto dopanic;
        }
@@ -853,6 +859,9 @@
                ksi.ksi_code = FPE_INTOVF;
                ksi.ksi_addr = (void *)pc;
                break;
+       case T_ECCERR:
+               ecc_corrected_error(type, pc);
+               break;
        }
        if (sig != 0) {
                ksi.ksi_signo = sig;
@@ -1609,3 +1618,48 @@
        }
 #endif
 }
+
+/*
+ * Handle an ECC corrected event.
+ */
+void
+ecc_corrected_error(unsigned int type, vaddr_t pc)
+{
+       uint64_t eeer, afar, afsr;
+       char buf[128];
+       int s;
+
+       /* Clear the error */
+       eeer = ldxa(0, ASI_ERROR_EN_REG);
+       s = intr_disable();
+       stxa(0, ASI_ERROR_EN_REG,
+           eeer & ~(P_EER_NCEEN | P_EER_CEEN));
+       membar_Sync();
+       intr_restore(s);
+
+       /* Flush the caches in order ensure no corrupt data got installed. */
+       blast_dcache();
+       blast_icache();
+
+#if 0
+       /* Ensure the caches are still turned on (should be). */
+       cache_enable(PCPU_GET(impl));
+#endif
+
+       /* Grab the current AFSR/AFAR, and clear the error from the AFSR. */
+       afar = ldxa(0, ASI_AFAR);
+       afsr = ldxa(0, ASI_AFSR);
+       s = intr_disable();
+       stxa(0, ASI_AFSR, ldxa(0, ASI_AFSR));
+       membar_Sync();
+       intr_restore(s);
+       /* XXX: count the error */
+       snprintb(buf, sizeof(buf), AFSR_BITS, afsr);
+       printf("corrected ECC error: pc %p afsr %"PRIx64" (%s) addr %"PRIx64"\n", (void *)pc, afsr, buf, afar);
+
+       /* Turn (non-)correctable error reporting back on. */
+       s = intr_disable();
+       stxa(0, ASI_ERROR_EN_REG, eeer);
+       membar_Sync();
+       intr_restore(s);
+}



Home | Main Index | Thread Index | Old Index