X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fmips%2Fmm%2Fcerr-sb1.c;fp=arch%2Fmips%2Fmm%2Fcerr-sb1.c;h=1cf3c6006ccd38f2ecaea4247489cdcdea413087;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=13d96d62764ef08f31108e3539287537aa127efc;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c index 13d96d627..1cf3c6006 100644 --- a/arch/mips/mm/cerr-sb1.c +++ b/arch/mips/mm/cerr-sb1.c @@ -19,13 +19,19 @@ #include #include #include +#include -#ifndef CONFIG_SIBYTE_BUS_WATCHER +#if !defined(CONFIG_SIBYTE_BUS_WATCHER) || defined(CONFIG_SIBYTE_BW_TRACE) #include -#include #include #endif - + +/* + * We'd like to dump the L2_ECC_TAG register on errors, but errata make + * that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.) + */ +#undef DUMP_L2_ECC_TAG_ON_ERROR + /* SB1 definitions */ /* XXX should come from config1 XXX */ @@ -136,15 +142,21 @@ static inline void breakout_cerrd(unsigned int val) #ifndef CONFIG_SIBYTE_BUS_WATCHER -static void check_bus_watcher(void) -{ +static void check_bus_watcher(void) +{ uint32_t status, l2_err, memio_err; +#ifdef DUMP_L2_ECC_TAG_ON_ERROR + uint64_t l2_tag; +#endif /* Destructive read, clears register and interrupt */ status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); /* Bit 31 is always on, but there's no #define for that */ - if (status & ~(1UL << 31)) { + if (status & ~(1UL << 31)) { l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); +#ifdef DUMP_L2_ECC_TAG_ON_ERROR + l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG); +#endif memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); prom_printf("\nLast recorded signature:\n"); @@ -153,19 +165,32 @@ static void check_bus_watcher(void) (int)(G_SCD_BERR_TID(status) >> 6), (int)G_SCD_BERR_RID(status), (int)G_SCD_BERR_DCODE(status)); - } else { - prom_printf("Bus watcher indicates no error\n"); - } -} -#else -extern void check_bus_watcher(void); -#endif - +#ifdef DUMP_L2_ECC_TAG_ON_ERROR + prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag); +#endif + } else { + prom_printf("Bus watcher indicates no error\n"); + } +} +#else +extern void check_bus_watcher(void); +#endif + asmlinkage void sb1_cache_error(void) { uint64_t cerr_dpa; uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res; +#ifdef CONFIG_SIBYTE_BW_TRACE + /* Freeze the trace buffer now */ +#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) + csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG); +#else + csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG); +#endif + prom_printf("Trace buffer frozen\n"); +#endif + prom_printf("Cache error exception on CPU %x:\n", (read_c0_prid() >> 25) & 0x7); @@ -229,11 +254,19 @@ asmlinkage void sb1_cache_error(void) check_bus_watcher(); - while (1); /* - * This tends to make things get really ugly; let's just stall instead. - * panic("Can't handle the cache error!"); + * Calling panic() when a fatal cache error occurs scrambles the + * state of the system (and the cache), making it difficult to + * investigate after the fact. However, if you just stall the CPU, + * the other CPU may keep on running, which is typically very + * undesirable. */ +#ifdef CONFIG_SB1_CERR_STALL + while (1) + ; +#else + panic("unhandled cache error"); +#endif } @@ -434,7 +467,8 @@ static struct dc_state dc_states[] = { }; #define DC_TAG_VALID(state) \ - (((state) == 0xf) || ((state) == 0x13) || ((state) == 0x19) || ((state == 0x16)) || ((state) == 0x1c)) + (((state) == 0x0) || ((state) == 0xf) || ((state) == 0x13) || \ + ((state) == 0x19) || ((state) == 0x16) || ((state) == 0x1c)) static char *dc_state_str(unsigned char state) { @@ -505,6 +539,7 @@ static uint32_t extract_dc(unsigned short addr, int data) uint64_t datalo; uint32_t datalohi, datalolo, datahi; int offset; + char bad_ecc = 0; for (offset = 0; offset < 4; offset++) { /* Index-load-data-D */ @@ -525,8 +560,7 @@ static uint32_t extract_dc(unsigned short addr, int data) ecc = dc_ecc(datalo); if (ecc != datahi) { int bits = 0; - prom_printf(" ** bad ECC (%02x %02x) ->", - datahi, ecc); + bad_ecc |= 1 << (3-offset); ecc ^= datahi; while (ecc) { if (ecc & 1) bits++; @@ -537,6 +571,10 @@ static uint32_t extract_dc(unsigned short addr, int data) prom_printf(" %02X-%016llX", datahi, datalo); } prom_printf("\n"); + if (bad_ecc) + prom_printf(" dwords w/ bad ECC: %d %d %d %d\n", + !!(bad_ecc & 8), !!(bad_ecc & 4), + !!(bad_ecc & 2), !!(bad_ecc & 1)); } } return res;