2 * AMD K8 class Memory Controller kernel module
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
7 * Written by Thayne Harbaugh Linux Networx (http://lnxi.com)
9 * Changes by Douglas "norsk" Thompson <norsk5@xmission.com>:
10 * - K8 CPU Revision D and greater support
12 * Changes by Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>:
13 * - Module largely rewritten, with new (and hopefully correct)
14 * code for dealing with node and chip select interleaving, various
15 * code cleanup, and bug fixes
16 * - Added support for memory hoisting using DRAM hole address
19 * This module is based on the following document (available from
20 * http://www.amd.com/):
22 * Title: BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
24 * AMD publication #: 26094
27 * Unless otherwise stated, section numbers mentioned in the comments below
28 * refer to this document.
31 #include <linux/module.h>
32 #include <linux/init.h>
33 #include <linux/pci.h>
34 #include <linux/pci_ids.h>
35 #include <linux/slab.h>
36 #include <asm/mmzone.h>
39 #define k8_printk(level, fmt, arg...) \
40 edac_printk(level, "k8", fmt, ##arg)
42 #define k8_mc_printk(mci, level, fmt, arg...) \
43 edac_mc_chipset_printk(mci, level, "k8", fmt, ##arg)
45 /* Throughout the comments in this code, the terms SysAddr, DramAddr, and
46 * InputAddr are used. These terms come directly from the k8 documentation
47 * (AMD publication #26094). They are defined as follows:
50 * This is a physical address generated by a CPU core or a device
51 * doing DMA. If generated by a CPU core, a SysAddr is the result of
52 * a virtual to physical address translation by the CPU core's address
53 * translation mechanism (MMU).
56 * A DramAddr is derived from a SysAddr by subtracting an offset that
57 * depends on which node the SysAddr maps to and whether the SysAddr
58 * is within a range affected by memory hoisting. The DRAM Base
59 * (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers
60 * determine which node a SysAddr maps to.
62 * If the DRAM Hole Address Register (DHAR) is enabled and the SysAddr
63 * is within the range of addresses specified by this register, then
64 * a value x from the DHAR is subtracted from the SysAddr to produce a
65 * DramAddr. Here, x represents the base address for the node that
66 * the SysAddr maps to plus an offset due to memory hoisting. See
67 * section 3.4.8 and the comments in get_dram_hole_info() and
68 * sys_addr_to_dram_addr() below for more information.
70 * If the SysAddr is not affected by the DHAR then a value y is
71 * subtracted from the SysAddr to produce a DramAddr. Here, y is the
72 * base address for the node that the SysAddr maps to. See section
73 * 3.4.4 and the comments in sys_addr_to_dram_addr() below for more
77 * A DramAddr is translated to an InputAddr before being passed to the
78 * memory controller for the node that the DramAddr is associated
79 * with. The memory controller then maps the InputAddr to a csrow.
80 * If node interleaving is not in use, then the InputAddr has the same
81 * value as the DramAddr. Otherwise, the InputAddr is produced by
82 * discarding the bits used for node interleaving from the DramAddr.
83 * See section 3.4.4 for more information.
85 * The memory controller for a given node uses its DRAM CS Base and
86 * DRAM CS Mask registers to map an InputAddr to a csrow. See
87 * sections 3.5.4 and 3.5.5 for more information.
91 * Alter this version for the K8 module when modifications are made
93 #define EDAC_K8_VERSION " Ver: 2.0.0 " __DATE__
94 #define EDAC_MOD_STR "k8_edac"
96 #ifndef PCI_DEVICE_ID_AMD_OPT_0_HT
97 #define PCI_DEVICE_ID_AMD_OPT_0_HT 0x1100
98 #endif /* PCI_DEVICE_ID_AMD_OPT_0_HT */
100 #ifndef PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP
101 #define PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP 0x1101
102 #endif /* PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP */
104 #ifndef PCI_DEVICE_ID_AMD_OPT_2_MEMCTL
105 #define PCI_DEVICE_ID_AMD_OPT_2_MEMCTL 0x1102
106 #endif /* PCI_DEVICE_ID_AMD_OPT_2_MEMCTL */
108 #ifndef PCI_DEVICE_ID_AMD_OPT_3_MISCCTL
109 #define PCI_DEVICE_ID_AMD_OPT_3_MISCCTL 0x1103
110 #endif /* PCI_DEVICE_ID_AMD_OPT_3_MISCCTL */
112 /* Extended Model from CPUID, for CPU Revision numbers */
113 #define OPTERON_CPU_LE_REV_C 0
114 #define OPTERON_CPU_REV_D 1
115 #define OPTERON_CPU_REV_E 2
117 #define K8_NR_CSROWS 8
118 #define MAX_K8_NODES 8
120 /* K8 register addresses - device 0 function 1 - Address Map */
121 #define K8_DBR 0x40 /* DRAM Base Register (8 x 32b
122 * interlaced with K8_DLR)
124 * 31:16 DRAM Base addr 39:24
126 * 10:8 interleave enable
132 #define K8_DLR 0x44 /* DRAM Limit Register (8 x 32b
133 * interlaced with K8_DBR)
135 * 31:16 DRAM Limit addr 32:24
137 * 10:8 interleave select
139 * 2:0 destination node ID
142 #define K8_DHAR 0xf0 /* DRAM Hole Address Register
146 * 15:8 DramHoleOffset
151 /* K8 register addresses - device 0 function 2 - DRAM controller */
152 #define K8_DCSB 0x40 /* DRAM Chip-Select Base (8 x 32b)
154 * 31:21 Base addr high 35:25
156 * 15:9 Base addr low 19:13 (interlvd)
158 * 0 chip-select bank enable
161 #define K8_DCSM 0x60 /* DRAM Chip-Select Mask (8 x 32b)
164 * 29:21 addr mask high 33:25
166 * 15:9 addr mask low 19:13
170 /* selects bits 29-21 and 15-9 from DCSM */
171 #define DCSM_MASK_BITS 0x3fe0fe00
173 #define K8_DBAM 0x80 /* DRAM Base Addr Mapping (32b) */
175 #define K8_DCL 0x90 /* DRAM configuration low reg (32b)
178 * 27:25 Bypass Max: 000b=respect
179 * 24 Dissable receivers - no sockets
184 * 16 128/64 bit (dual/single chan)
185 * 15:14 R/W Queue bypass count
187 * 12 exit self refresh
188 * 11 mem clear status
193 * 3 dis DQS hysteresis
195 * 1 DRAM drive strength
196 * 0 Digital Locked Loop disable
199 /* K8 register addresses - device 0 function 3 - Misc Control */
200 #define K8_NBCTL 0x40 /* MCA NB Control (32b)
206 #define K8_NBCFG 0x44 /* MCA NB Config (32b)
208 * 23 Chip-kill x4 ECC enable
213 #define K8_NBSL 0x48 /* MCA NB Status Low (32b)
215 * 31:24 Syndrome 15:8 chip-kill x4
217 * 19:16 Extended err code
221 #define K8_NBSH 0x4C /* MCA NB Status High (32b)
227 * 27 Misc err reg valid
229 * 25 proc context corrupt
235 * 8 err found by scrubber
237 * 6:4 Hyper-transport link number
243 #define K8_NBSH_VALID_BIT BIT(31)
245 #define K8_NBEAL 0x50 /* MCA NB err addr low (32b)
247 * 31:3 Err addr low 31:3
251 #define K8_NBEAH 0x54 /* MCA NB err addr high (32b)
254 * 7:0 Err addr high 39:32
257 #define K8_NBCAP 0xE8 /* MCA NB capabilities (32b)
267 * K8_MSR_MCxCTL (64b)
268 * (0x400,404,408,40C,410)
269 * 63 Enable reporting source 63
273 * 2 Enable error source 2
274 * 1 Enable error source 1
275 * 0 Enable error source 0
279 * K8_MSR_MCxSTAT (64b)
280 * (0x401,405,409,40D,411)
284 * 60 Enabled error condition
285 * 59 Misc register valid (not used)
286 * 58 Err addr register valid
287 * 57 Processor context corrupt
288 * 56:32 Other information
289 * 31:16 Model specific error code
294 * K8_MSR_MCxADDR (64b)
295 * (0x402,406,40A,40E,412)
301 * K8_MSR_MCxMISC (64b)
302 * (0x403,407,40B,40F,413)
303 * Unused on Athlon64 and K8
306 #define K8_MSR_MCGCTL 0x017b /* Machine Chk Global report ctl (64b)
312 * 1 Instruction Cache
316 #define K8_MSR_MC4CTL 0x0410 /* North Bridge Check report ctl (64b) */
317 #define K8_MSR_MC4STAT 0x0411 /* North Bridge status (64b) */
318 #define K8_MSR_MC4ADDR 0x0412 /* North Bridge Address (64b) */
320 static inline int MCI_TO_NODE_ID(struct mem_ctl_info *mci)
322 return PCI_SLOT(to_pci_dev(mci->dev)->devfn) - 0x18;
325 /* Ugly hack that allows module to compile when built as part of a 32-bit
326 * kernel. Just in case anyone wants to run a 32-bit kernel on their Opteron.
332 /* Each entry holds the CPU revision of all CPU cores for the given node. */
333 static int k8_node_revision_table[MAXNODE] = { 0 };
335 static inline int node_rev(int node_id)
337 return k8_node_revision_table[node_id];
340 static void store_node_revision(void *param)
342 int node_id, revision;
344 /* Multiple CPU cores on the same node will all write their revision
345 * number to the same array entry. This is ok. For a given node, all
346 * CPU cores are on the same piece of silicon and share the same
349 node_id = (cpuid_ebx(1) >> 24) & 0x07;
350 revision = (cpuid_eax(1) >> 16) & 0x0f;
351 k8_node_revision_table[node_id] = revision;
354 /* Initialize k8_node_revision_table. */
355 static void build_node_revision_table(void)
357 static int initialized = 0;
362 on_each_cpu(store_node_revision, NULL, 1, 1);
371 struct pci_dev *addr_map;
372 struct pci_dev *misc_ctl;
374 int node_id; /* ID of this node */
376 /* The values of these registers will remain constant so we might as
377 * well cache them here.
380 u32 dbr[MAX_K8_NODES];
381 u32 dlr[MAX_K8_NODES];
383 u32 dcsb[K8_NR_CSROWS];
384 u32 dcsm[K8_NR_CSROWS];
389 struct k8_error_info_regs {
396 struct k8_error_info {
397 struct k8_error_info_regs error_info;
399 int race_condition_detected;
403 const char *ctl_name;
408 static const struct k8_dev_info k8_devs[] = {
410 .ctl_name = "Athlon64/Opteron",
411 .addr_map = PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP,
412 .misc_ctl = PCI_DEVICE_ID_AMD_OPT_3_MISCCTL},
415 static struct pci_dev * pci_get_related_function(unsigned int vendor,
416 unsigned int device, struct pci_dev *related)
422 while ((dev = pci_get_device(vendor, device, dev)) != NULL) {
423 if ((dev->bus->number == related->bus->number) &&
424 (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
431 /* FIXME - stolen from msr.c - the calls in msr.c could be exported */
439 static void smp_wrmsr(void *cmd_block)
441 struct msr_command *cmd = cmd_block;
442 wrmsr(cmd->reg, cmd->data[0], cmd->data[1]);
445 static void smp_rdmsr(void *cmd_block)
447 struct msr_command *cmd = cmd_block;
448 rdmsr(cmd->reg, cmd->data[0], cmd->data[1]);
451 static void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
453 struct msr_command cmd;
455 cmd.cpu = raw_smp_processor_id();
459 on_each_cpu(smp_wrmsr, &cmd, 1, 1);
462 static void do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
464 struct msr_command cmd;
466 cmd.cpu = raw_smp_processor_id();
468 on_each_cpu(smp_rdmsr, &cmd, 1, 1);
474 * FIXME - This is a large chunk of memory to suck up just to decode the
475 * syndrome. It would be nice to discover a pattern in the syndromes that
476 * could be used to quickly identify the channel. The big problems with
477 * this table is memory usage, lookup speed (could sort and binary search),
478 * correctness (there could be a transcription error). A zero in any nibble
479 * for a syndrom is always channel 0, but that only decodes some of the
480 * syndromes. Can anyone find any other patterns?
482 * The comment in the left column is the nibble that is in error. The least
483 * significant nibble of the syndrome is the mask for the bits that are
484 * in error (need to be toggled) for the particular nibble.
486 #define SYNDROME_TABLE_SIZE 270
487 static const unsigned long syndromes_chan0[SYNDROME_TABLE_SIZE] = {
488 /*0 */ 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57,
489 0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df,
490 /*1 */ 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7,
491 0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f,
492 /*2 */ 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
493 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
494 /*3 */ 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057,
495 0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df,
496 /*4 */ 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097,
497 0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f,
498 /*5 */ 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857,
499 0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf,
500 /*6 */ 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467,
501 0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f,
502 /*7 */ 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27,
503 0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff,
504 /*8 */ 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177,
505 0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f,
506 /*9 */ 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07,
507 0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f,
508 /*a */ 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07,
509 0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f,
510 /*b */ 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7,
511 0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f,
512 /*c */ 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87,
513 0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f,
514 /*d */ 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067,
515 0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f,
516 /*e */ 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77,
517 0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f,
518 /*f */ 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77,
519 0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x9562, 0xa6ee, 0xb72f,
521 /*20 */ 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807,
522 0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f,
523 /*21 */ 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07,
524 0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f
527 static const unsigned long syndromes_chan1[SYNDROME_TABLE_SIZE] = {
528 /*10 */ 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187,
529 0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f,
530 /*11 */ 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627,
531 0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff,
532 /*12 */ 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97,
533 0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f,
534 /*13 */ 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97,
535 0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f,
536 /*14 */ 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987,
537 0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f,
538 /*15 */ 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677,
539 0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f,
540 /*16 */ 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387,
541 0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f,
542 /*17 */ 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17,
543 0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf,
544 /*18 */ 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7,
545 0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f,
546 /*19 */ 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397,
547 0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f,
548 /*1a */ 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617,
549 0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf,
550 /*1b */ 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527,
551 0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff,
552 /*1c */ 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7,
553 0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef,
554 /*1d */ 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7,
555 0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def,
556 /*1e */ 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67,
557 0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f,
558 /*1f */ 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377,
559 0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f,
561 /*22 */ 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97,
562 0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f,
563 /*23 */ 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757,
564 0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df
567 static int chan_from_chipkill_syndrome(unsigned long syndrome)
571 debugf0("%s()\n", __func__);
573 for (i = 0; i < SYNDROME_TABLE_SIZE; i++) {
574 if (syndromes_chan0[i] == syndrome)
576 if (syndromes_chan1[i] == syndrome)
580 debugf0("%s(): syndrome(%lx) not found\n", __func__, syndrome);
584 static const char *tt_msgs[] = { /* transaction type */
591 static const char *ll_msgs[] = { /* cache level */
598 static const char *memtt_msgs[] = {
617 static const char *pp_msgs[] = { /* participating processor */
619 "local node response",
620 "local node observed",
624 static const char *to_msgs[] = {
629 static const char *ii_msgs[] = { /* memory or i/o */
636 static const char *ext_msgs[] = { /* extended error */
645 "ECC chipkill x4 error",
655 static const char *htlink_msgs[] = {
666 static inline u64 base_from_dcsb(u32 dcsb)
668 /* 0xffe0fe00 selects bits 31-21 and 15-9 of a DRAM CS Base Address
669 * Register (section 3.5.4). Shifting the bits left 4 puts them in
670 * their proper bit positions of 35-25 and 19-13.
672 return ((u64) (dcsb & 0xffe0fe00)) << 4;
675 static u64 mask_from_dcsm(u32 dcsm)
677 u64 dcsm_bits, other_bits;
679 /* Extract bits bits 29-21 and 15-9 from DCSM (section 3.5.5). */
680 dcsm_bits = dcsm & DCSM_MASK_BITS;
682 /* Set all bits except bits 33-25 and 19-13. */
683 other_bits = DCSM_MASK_BITS;
684 other_bits = ~(other_bits << 4);
686 /* The extracted bits from DCSM belong in the spaces represented by
687 * the cleared bits in other_bits.
689 return (dcsm_bits << 4) | other_bits;
692 /* In *base and *limit, pass back the full 40-bit base and limit physical
693 * addresses for the node given by node_id. This information is obtained from
694 * DRAM Base (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers. The
695 * base and limit addresses are of type SysAddr, as defined at the start of
696 * section 3.4.4 (p. 70). They are the lowest and highest physical addresses
697 * in the address range they represent.
699 static void get_base_and_limit(struct k8_pvt *pvt, int node_id,
700 u64 *base, u64 *limit)
702 *base = ((u64) (pvt->dbr[node_id] & 0xffff0000)) << 8;
704 /* Since the limit represents the highest address in the range, we
705 * must set its lowest 24 bits to 1.
707 *limit = (((u64) (pvt->dlr[node_id] & 0xffff0000)) << 8) | 0xffffff;
710 /* Return 1 if the SysAddr given by sys_addr matches the base/limit associated
713 static int base_limit_match(struct k8_pvt *pvt, u64 sys_addr, int node_id)
715 u64 base, limit, addr;
717 get_base_and_limit(pvt, node_id, &base, &limit);
719 /* The k8 treats this as a 40-bit value. However, bits 63-40 will be
720 * all ones if the most significant implemented address bit is 1.
721 * Here we discard bits 63-40. See section 3.4.2 of AMD publication
722 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
723 * Application Programming.
725 addr = sys_addr & 0x000000ffffffffffull;
727 return (addr >= base) && (addr <= limit);
730 /* Attempt to map a SysAddr to a node. On success, return a pointer to the
731 * mem_ctl_info structure for the node that the SysAddr maps to. On failure,
734 static struct mem_ctl_info * find_mc_by_sys_addr(struct mem_ctl_info *mci,
741 /* Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
742 * 3.4.4.2) registers to map the SysAddr to a node ID.
747 /* The value of this field should be the same for all DRAM Base
748 * registers. Therefore we arbitrarily choose to read it from the
749 * register for node 0.
751 intlv_en = pvt->dbr[0] & (0x07 << 8);
753 if (intlv_en == 0) { /* node interleaving is disabled */
754 debugf2("%s(): node interleaving disabled\n", __func__);
755 for (node_id = 0; ; ) {
756 if (base_limit_match(pvt, sys_addr, node_id))
759 if (++node_id == MAX_K8_NODES) {
760 debugf2("%s(): sys_addr 0x%lx "
761 "does not match any node\n", __func__,
762 (unsigned long) sys_addr);
770 if (unlikely((intlv_en != (0x01 << 8)) &&
771 (intlv_en != (0x03 << 8)) &&
772 (intlv_en != (0x07 << 8)))) {
773 k8_printk(KERN_WARNING,
774 "%s(): junk value of 0x%x extracted from IntlvEn "
775 "field of DRAM Base Register for node 0: This "
776 "probably indicates a BIOS bug.\n", __func__,
781 /* If we get this far, node interleaving is enabled. */
782 debugf2("%s(): node interleaving enabled\n", __func__);
783 bits = (((u32) sys_addr) >> 12) & intlv_en;
785 for (node_id = 0; ; ) {
786 if ((pvt->dlr[node_id] & intlv_en) == bits)
787 break; /* intlv_sel field matches */
789 if (++node_id == MAX_K8_NODES) {
790 debugf2("%s(): sys_addr 0x%lx does not match any "
791 "node\n", __func__, (unsigned long) sys_addr);
796 /* sanity test for sys_addr */
797 if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) {
798 k8_printk(KERN_WARNING,
799 "%s(): sys_addr 0x%lx falls outside base/limit "
800 "address range for node %d with node interleaving "
801 "enabled.\n", __func__, (unsigned long) sys_addr,
807 debugf2("%s(): sys_addr 0x%lx matches node %d\n", __func__,
808 (unsigned long) sys_addr, node_id);
809 return edac_mc_find(node_id);
812 /* Return the base value defined by the DRAM Base register for the node
813 * represented by mci. This function returns the full 40-bit value despite
814 * the fact that the register only stores bits 39-24 of the value. See
817 static inline u64 get_dram_base(struct mem_ctl_info *mci)
822 return ((u64) (pvt->dbr[pvt->node_id] & 0xffff0000)) << 8;
825 /* Obtain info from the DRAM Hole Address Register (section 3.4.8) for the
826 * node represented by mci. Info is passed back in *hole_base, *hole_offset,
827 * and *hole_size. Function returns 0 if info is valid or 1 if info is
828 * invalid. Info may be invalid for either of the following reasons:
830 * - The revision of the node is not E or greater. In this case, the DRAM
831 * Hole Address Register does not exist.
832 * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
833 * indicating that its contents are not valid.
835 * The values passed back in *hole_base, *hole_offset, and *hole_size are
836 * complete 32-bit values despite the fact that the bitfields in the DHAR
837 * only represent bits 31-24 of the base and offset values.
839 static int get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
840 u64 *hole_offset, u64 *hole_size)
847 if (node_rev(pvt->node_id) < OPTERON_CPU_REV_E) {
848 debugf2("revision %d for node %d does not support DHAR\n",
849 node_rev(pvt->node_id), pvt->node_id);
853 if ((pvt->dhar & 0x01) == 0) {
854 debugf2("DramHoleValid bit cleared in DHAR for node %d\n",
856 return 1; /* DramHoleValid bit is cleared */
859 /* +------------------+--------------------+--------------------+-----
860 * | memory | DRAM hole | relocated |
861 * | [0, (x - 1)] | [x, 0xffffffff] | addresses from |
863 * | | | [0x100000000, |
864 * | | | (0x100000000+ |
865 * | | | (0xffffffff-x))] |
866 * +------------------+--------------------+--------------------+-----
868 * Above is a diagram of physical memory showing the DRAM hole and the
869 * relocated addresses from the DRAM hole. As shown, the DRAM hole
870 * starts at address x (the base address) and extends through address
871 * 0xffffffff. The DRAM Hole Address Register (DHAR) relocates the
872 * addresses in the hole so that they start at 0x100000000.
875 base = pvt->dhar & 0xff000000;
877 *hole_offset = (pvt->dhar & 0x0000ff00) << 16;
878 *hole_size = (0x1ull << 32) - base;
879 debugf2("DHAR info for node %d: base 0x%lx offset 0x%lx size 0x%lx\n",
880 pvt->node_id, (unsigned long) *hole_base,
881 (unsigned long) *hole_offset, (unsigned long) *hole_size);
885 /* Return the DramAddr that the SysAddr given by sys_addr maps to. It is
886 * assumed that sys_addr maps to the node given by mci.
888 static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
890 u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
892 /* The first part of section 3.4.4 (p. 70) shows how the DRAM Base
893 * (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are
894 * used to translate a SysAddr to a DramAddr. If the DRAM Hole
895 * Address Register (DHAR) is enabled, then it is also involved in
896 * translating a SysAddr to a DramAddr. Sections 3.4.8 and 3.5.8.2
897 * describe the DHAR and how it is used for memory hoisting. These
898 * parts of the documentation are unclear. I interpret them as
901 * When node n receives a SysAddr, it processes the SysAddr as
904 * 1. It extracts the DRAMBase and DRAMLimit values from the
905 * DRAM Base and DRAM Limit registers for node n. If the
906 * SysAddr is not within the range specified by the base
907 * and limit values, then node n ignores the Sysaddr
908 * (since it does not map to node n). Otherwise continue
911 * 2. If the DramHoleValid bit of the DHAR for node n is
912 * clear, the DHAR is disabled so skip to step 3 below.
913 * Otherwise see if the SysAddr is within the range of
914 * relocated addresses (starting at 0x100000000) from the
915 * DRAM hole. If not, skip to step 3 below. Else get the
916 * value of the DramHoleOffset field from the DHAR. To
917 * obtain the DramAddr, subtract the offset defined by
918 * this value from the SysAddr.
920 * 3. Obtain the base address for node n from the DRAMBase
921 * field of the DRAM Base register for node n. To obtain
922 * the DramAddr, subtract the base address from the
923 * SysAddr, as shown near the start of section 3.4.4
927 dram_base = get_dram_base(mci);
929 if (!get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size)) {
930 if ((sys_addr >= (1ull << 32)) &&
931 (sys_addr < ((1ull << 32) + hole_size))) {
932 /* use DHAR to translate SysAddr to DramAddr */
933 dram_addr = sys_addr - hole_offset;
934 debugf2("using DHAR to translate SysAddr 0x%lx to "
936 (unsigned long) sys_addr,
937 (unsigned long) dram_addr);
942 /* Translate the SysAddr to a DramAddr as shown near the start of
943 * section 3.4.4 (p. 70). Although sys_addr is a 64-bit value, the k8
944 * only deals with 40-bit values. Therefore we discard bits 63-40 of
945 * sys_addr below. If bit 39 of sys_addr is 1 then the bits we
946 * discard are all 1s. Otherwise the bits we discard are all 0s. See
947 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
948 * Programmer's Manual Volume 1 Application Programming.
950 dram_addr = (sys_addr & 0xffffffffffull) - dram_base;
952 debugf2("using DRAM Base register to translate SysAddr 0x%lx to "
953 "DramAddr 0x%lx\n", (unsigned long) sys_addr,
954 (unsigned long) dram_addr);
958 /* Parameter intlv_en is the value of the IntlvEn field from a DRAM Base
959 * register (section 3.4.4.1). Return the number of bits from a SysAddr that
960 * are used for node interleaving.
962 static int num_node_interleave_bits(unsigned intlv_en)
964 static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
967 BUG_ON(intlv_en > 7);
968 n = intlv_shift_table[intlv_en];
969 debugf2("using %d bits for node interleave\n", n);
973 /* Translate the DramAddr given by dram_addr to an InputAddr and return the
976 static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
984 /* Near the start of section 3.4.4 (p. 70), the k8 documentation gives
985 * instructions for translating a DramAddr to an InputAddr. Here we
986 * are following these instructions.
988 intlv_shift = num_node_interleave_bits((pvt->dbr[0] >> 8) & 0x07);
989 input_addr = ((dram_addr >> intlv_shift) & 0xffffff000ull) +
992 debugf2("DramAddr 0x%lx translates to InputAddr 0x%lx\n",
993 (unsigned long) dram_addr, (unsigned long) input_addr);
997 /* Translate the SysAddr represented by sys_addr to an InputAddr and return
998 * the result. It is assumed that sys_addr maps to the node given by mci.
1000 static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
1004 input_addr = dram_addr_to_input_addr(
1005 mci, sys_addr_to_dram_addr(mci, sys_addr));
1006 debugf2("%s(): SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
1007 __func__, (unsigned long) sys_addr,
1008 (unsigned long) input_addr);
1012 /* input_addr is an InputAddr associated with the node given by mci. Return
1013 * the csrow that input_addr maps to, or -1 on failure (no csrow claims
1016 static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
1023 pvt = mci->pvt_info;
1025 /* Here we use the DRAM CS Base (section 3.5.4) and DRAM CS Mask
1026 * (section 3.5.5) registers. For each CS base/mask register pair,
1027 * test the condition shown near the start of section 3.5.4 (p. 84).
1030 for (i = 0; i < K8_NR_CSROWS; i++) {
1031 dcsb = pvt->dcsb[i];
1032 dcsm = pvt->dcsm[i];
1034 if ((dcsb & 0x01) == 0) {
1035 debugf2("input_addr_to_csrow: CSBE bit is cleared "
1036 "for csrow %d (node %d)\n", i,
1038 continue; /* CSBE bit is cleared */
1041 base = base_from_dcsb(dcsb);
1042 mask = ~mask_from_dcsm(dcsm);
1044 if ((input_addr & mask) == (base & mask)) {
1045 debugf2("InputAddr 0x%lx matches csrow %d "
1046 "(node %d)\n", (unsigned long) input_addr, i,
1048 return i; /* success: csrow i matches */
1052 debugf2("no matching csrow for InputAddr 0x%lx (node %d)\n",
1053 (unsigned long) input_addr, pvt->node_id);
1054 return -1; /* failed to find matching csrow */
1057 /* input_addr is an InputAddr associated with the node represented by mci.
1058 * Translate input_addr to a DramAddr and return the result.
1060 static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
1063 int node_id, intlv_shift;
1064 u64 bits, dram_addr;
1067 /* Near the start of section 3.4.4 (p. 70), the k8 documentation shows
1068 * how to translate a DramAddr to an InputAddr. Here we reverse this
1069 * procedure. When translating from a DramAddr to an InputAddr, the
1070 * bits used for node interleaving are discarded. Here we recover
1071 * these bits from the IntlvSel field of the DRAM Limit register
1072 * (section 3.4.4.2) for the node that input_addr is associated with.
1075 pvt = mci->pvt_info;
1076 node_id = pvt->node_id;
1077 BUG_ON((node_id < 0) || (node_id > 7));
1078 intlv_shift = num_node_interleave_bits((pvt->dbr[0] >> 8) & 0x07);
1080 if (intlv_shift == 0) {
1081 debugf1("node interleaving disabled: InputAddr 0x%lx "
1082 "translates to DramAddr of same value\n",
1083 (unsigned long) input_addr);
1087 bits = ((input_addr & 0xffffff000ull) << intlv_shift) +
1088 (input_addr & 0xfff);
1089 intlv_sel = pvt->dlr[node_id] & (((1 << intlv_shift) - 1) << 8);
1090 dram_addr = bits + (intlv_sel << 4);
1091 debugf1("InputAddr 0x%lx translates to DramAddr 0x%lx "
1092 "(%d node interleave bits)\n", (unsigned long) input_addr,
1093 (unsigned long) dram_addr, intlv_shift);
1097 /* dram_addr is a DramAddr that maps to the node represented by mci. Convert
1098 * dram_addr to a SysAddr and return the result.
1100 static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
1103 u64 hole_base, hole_offset, hole_size, base, limit, sys_addr;
1105 pvt = mci->pvt_info;
1107 if (!get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size)) {
1108 if ((dram_addr >= hole_base) &&
1109 (dram_addr < (hole_base + hole_size))) {
1110 /* use DHAR to translate DramAddr to SysAddr */
1111 sys_addr = dram_addr + hole_offset;
1112 debugf1("using DHAR to translate DramAddr 0x%lx to "
1113 "SysAddr 0x%lx\n", (unsigned long) dram_addr,
1114 (unsigned long) sys_addr);
1119 get_base_and_limit(pvt, pvt->node_id, &base, &limit);
1120 sys_addr = dram_addr + base;
1122 /* The sys_addr we have computed up to this point is a 40-bit value
1123 * because the k8 deals with 40-bit values. However, the value we are
1124 * supposed to return is a full 64-bit physical address. The AMD
1125 * x86-64 architecture specifies that the most significant implemented
1126 * address bit through bit 63 of a physical address must be either all
1127 * 0s or all 1s. Therefore we sign-extend the 40-bit sys_addr to a
1128 * 64-bit value below. See section 3.4.2 of AMD publication 24592:
1129 * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
1132 sys_addr |= ~((sys_addr & (1ull << 39)) - 1);
1134 debugf1("Using DRAM Base register for node %d to translate "
1135 "DramAddr 0x%lx to SysAddr 0x%lx\n", pvt->node_id,
1136 (unsigned long) dram_addr, (unsigned long) sys_addr);
1140 /* input_addr is an InputAddr associated with the node given by mci.
1141 * Translate input_addr to a SysAddr and return the result.
1143 static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
1146 return dram_addr_to_sys_addr(
1147 mci, input_addr_to_dram_addr(mci, input_addr));
1150 /* Find the minimum and maximum InputAddr values that map to the given csrow.
1151 * Pass back these values in *input_addr_min and *input_addr_max.
1153 static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
1154 u64 *input_addr_min, u64 *input_addr_max)
1159 pvt = mci->pvt_info;
1160 BUG_ON((csrow < 0) || (csrow >= K8_NR_CSROWS));
1161 base = base_from_dcsb(pvt->dcsb[csrow]);
1162 mask = mask_from_dcsm(pvt->dcsm[csrow]);
1163 *input_addr_min = base & ~mask;
1165 /* To find the max InputAddr for the csrow, start with the base
1166 * address and set all bits that are "don't care" bits in the test at
1167 * the start of section 3.5.4 (p. 84). The "don't care" bits are all
1168 * set bits in the mask and all bits in the gaps between bit ranges
1169 * [35-25] and [19-13]. The value 0x1f01fff represents bits [24-20]
1170 * and [12-0], which are all bits in the above-mentioned gaps.
1172 *input_addr_max = base | mask | 0x1f01fff;
1175 /* Extract error address from MCA NB Address Low (section 3.6.4.5) and
1176 * MCA NB Address High (section 3.6.4.6) register values and return the
1179 static inline u64 error_address_from_k8_error_info(
1180 struct k8_error_info *info)
1182 return (((u64) (info->error_info.nbeah & 0xff)) << 32) +
1183 (info->error_info.nbeal & ~0x03);
1186 static inline void error_address_to_page_and_offset(u64 error_address,
1187 u32 *page, u32 *offset)
1189 *page = (u32) (error_address >> PAGE_SHIFT);
1190 *offset = ((u32) error_address) & ~PAGE_MASK;
1193 /* Return 1 if registers contain valid error information. Else return 0. */
1194 static inline int k8_error_info_valid(struct k8_error_info_regs *regs)
1196 return ((regs->nbsh & K8_NBSH_VALID_BIT) != 0);
1199 /* return 0 if regs contains valid error info; else return 1 */
1200 static int k8_get_error_info_regs(struct mem_ctl_info *mci,
1201 struct k8_error_info_regs *regs)
1205 pvt = mci->pvt_info;
1206 pci_read_config_dword(pvt->misc_ctl, K8_NBSH, ®s->nbsh);
1208 if (!k8_error_info_valid(regs))
1211 pci_read_config_dword(pvt->misc_ctl, K8_NBSL, ®s->nbsl);
1212 pci_read_config_dword(pvt->misc_ctl, K8_NBEAH, ®s->nbeah);
1213 pci_read_config_dword(pvt->misc_ctl, K8_NBEAL, ®s->nbeal);
1217 static void k8_get_error_info(struct mem_ctl_info *mci,
1218 struct k8_error_info *info)
1221 struct k8_error_info_regs regs;
1223 pvt = mci->pvt_info;
1224 info->race_condition_detected = 0;
1226 if (k8_get_error_info_regs(mci, &info->error_info))
1230 * Here's the problem with the K8's EDAC reporting:
1231 * There are four registers which report pieces of error
1232 * information. These four registers are shared between
1233 * CEs and UEs. Furthermore, contrary to what is stated in
1234 * the OBKG, the overflow bit is never used! Every error
1235 * always updates the reporting registers.
1237 * Can you see the race condition? All four error reporting
1238 * registers must be read before a new error updates them!
1239 * There is no way to read all four registers atomically. The
1240 * best than can be done is to detect that a race has occured
1241 * and then report the error without any kind of precision.
1243 * What is still positive is that errors are
1244 * still reported and thus problems can still be detected -
1245 * just not localized because the syndrome and address are
1246 * spread out across registers.
1248 * Grrrrr!!!!! Here's hoping that AMD fixes this in some
1249 * future K8 rev. UEs and CEs should have separate
1250 * register sets with proper overflow bits that are used!
1251 * At very least the problem can be fixed by honoring the
1252 * ErrValid bit in nbsh and not updating registers - just
1253 * set the overflow bit - unless the current error is CE
1254 * and the new error is UE which would be the only situation
1255 * for overwriting the current values.
1258 regs = info->error_info;
1260 /* Use info from the second read - most current */
1261 if (unlikely(k8_get_error_info_regs(mci, &info->error_info)))
1264 /* clear the error */
1265 pci_write_bits32(pvt->misc_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT);
1267 pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &info->nbcfg);
1268 info->race_condition_detected =
1269 ((regs.nbsh != info->error_info.nbsh) ||
1270 (regs.nbsl != info->error_info.nbsl) ||
1271 (regs.nbeah != info->error_info.nbeah) ||
1272 (regs.nbeal != info->error_info.nbeal));
1275 static inline void decode_gart_tlb_error(struct mem_ctl_info *mci,
1276 struct k8_error_info *info)
1279 u32 ec_tt; /* error code transaction type (2b) */
1280 u32 ec_ll; /* error code cache level (2b) */
1282 err_code = info->error_info.nbsl & 0xffffUL;
1283 ec_tt = (err_code >> 2) & 0x03UL;
1284 ec_ll = (err_code >> 0) & 0x03UL;
1285 k8_mc_printk(mci, KERN_ERR,
1286 "GART TLB errorr: transaction type(%s), "
1287 "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]);
1290 static inline void decode_cache_error(struct mem_ctl_info *mci,
1291 struct k8_error_info *info)
1294 u32 ec_rrrr; /* error code memory transaction (4b) */
1295 u32 ec_tt; /* error code transaction type (2b) */
1296 u32 ec_ll; /* error code cache level (2b) */
1298 err_code = info->error_info.nbsl & 0xffffUL;
1299 ec_rrrr = (err_code >> 4) & 0x0fUL;
1300 ec_tt = (err_code >> 2) & 0x03UL;
1301 ec_ll = (err_code >> 0) & 0x03UL;
1302 k8_mc_printk(mci, KERN_ERR,
1303 "cache heirarchy error: memory transaction type(%s), "
1304 "transaction type(%s), cache level(%s)\n",
1305 memtt_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]);
1308 /* sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
1309 * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
1310 * of a node that detected an ECC memory error. mci represents the node that
1311 * the error address maps to (possibly different from the node that detected
1312 * the error). Return the number of the csrow that sys_addr maps to, or -1 on
1315 static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
1319 csrow = input_addr_to_csrow(mci,
1320 sys_addr_to_input_addr(mci, sys_addr));
1323 k8_mc_printk(mci, KERN_ERR,
1324 "Failed to translate InputAddr to csrow for "
1325 "address 0x%lx\n", (unsigned long) sys_addr);
1330 static void k8_handle_ce(struct mem_ctl_info *mci, struct k8_error_info *info)
1337 struct mem_ctl_info *log_mci, *src_mci;
1340 pvt = mci->pvt_info;
1342 if ((info->error_info.nbsh & BIT(26)) == 0)
1343 goto no_info; /* error address not valid */
1345 error_address = error_address_from_k8_error_info(info);
1346 syndrome = ((info->error_info.nbsh >> 15) & 0xff);
1348 if (info->nbcfg & BIT(23)) {
1349 /* chipkill ecc mode */
1350 syndrome += (info->error_info.nbsl >> 16) & 0xff00;
1351 channel = chan_from_chipkill_syndrome(syndrome);
1354 /* If the syndrome couldn't be found then the race
1355 * condition for error reporting registers likely
1356 * occurred. There's alot more in doubt than just the
1357 * channel. Might as well just log the error without
1360 k8_mc_printk(mci, KERN_WARNING,
1361 "unknown syndrome 0x%x - possible error "
1362 "reporting race\n", syndrome);
1366 /* non-chipkill ecc mode
1368 * The k8 documentation is unclear about how to determine the
1369 * channel number when using non-chipkill memory. This method
1370 * was obtained from email communication with someone at AMD.
1372 channel = ((error_address & BIT(3)) != 0);
1374 /* Find out which node the error address belongs to. This may be
1375 * different from the node that detected the error.
1377 if ((src_mci = find_mc_by_sys_addr(mci, error_address)) == NULL) {
1378 k8_mc_printk(mci, KERN_ERR,
1379 "failed to map error address 0x%lx to a node\n",
1380 (unsigned long) error_address);
1386 if ((csrow = sys_addr_to_csrow(log_mci, error_address)) < 0)
1389 error_address_to_page_and_offset(error_address, &page, &offset);
1390 edac_mc_handle_ce(log_mci, page, offset, syndrome, csrow, channel,
1395 edac_mc_handle_ce_no_info(log_mci,EDAC_MOD_STR);
1398 static void k8_handle_ue(struct mem_ctl_info *mci, struct k8_error_info *info)
1403 struct mem_ctl_info *log_mci, *src_mci;
1407 if ((info->error_info.nbsh & BIT(26)) == 0)
1408 goto no_info; /* error address not valid */
1410 error_address = error_address_from_k8_error_info(info);
1412 /* Find out which node the error address belongs to. This may be
1413 * different from the node that detected the error.
1415 if ((src_mci = find_mc_by_sys_addr(mci, error_address)) == NULL) {
1416 k8_mc_printk(mci, KERN_ERR,
1417 "failed to map error address 0x%lx to a node\n",
1418 (unsigned long) error_address);
1424 if ((csrow = sys_addr_to_csrow(log_mci, error_address)) < 0)
1427 error_address_to_page_and_offset(error_address, &page, &offset);
1428 edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
1432 edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
1435 static void decode_bus_error(struct mem_ctl_info *mci,
1436 struct k8_error_info *info)
1438 u32 err_code, ext_ec;
1439 u32 ec_pp; /* error code participating processor (2p) */
1440 u32 ec_to; /* error code timed out (1b) */
1441 u32 ec_rrrr; /* error code memory transaction (4b) */
1442 u32 ec_ii; /* error code memory or I/O (2b) */
1443 u32 ec_ll; /* error code cache level (2b) */
1445 debugf0("MC%d: %s()\n", mci->mc_idx, __func__);
1446 err_code = info->error_info.nbsl & 0xffffUL;
1447 ec_pp = (err_code >> 9) & 0x03UL;
1448 ec_to = (err_code >> 8) & 0x01UL;
1449 ec_rrrr = (err_code >> 4) & 0x0fUL;
1450 ec_ii = (err_code >> 2) & 0x03UL;
1451 ec_ll = (err_code >> 0) & 0x03UL;
1452 ext_ec = (info->error_info.nbsl >> 16) & 0xfUL;
1454 /* FIXME - these should report through EDAC channels */
1455 k8_mc_printk(mci, KERN_ERR, "general bus error: participating "
1456 "processor(%s), time-out(%s) memory transaction "
1457 "type(%s), mem or i/o(%s), cache level(%s)\n",
1458 pp_msgs[ec_pp], to_msgs[ec_to], memtt_msgs[ec_rrrr],
1459 ii_msgs[ec_ii], ll_msgs[ec_ll]);
1462 return; /* We aren't the node involved */
1464 /* FIXME - other errors should have other error handling mechanisms */
1465 if (ext_ec && (ext_ec != 0x8)) {
1466 k8_mc_printk(mci, KERN_ERR,
1467 "no special error handling for this error\n");
1471 if (info->error_info.nbsh & BIT(14))
1472 k8_handle_ce(mci, info);
1473 else if (info->error_info.nbsh & BIT(13))
1474 k8_handle_ue(mci, info);
1476 /* If main error is CE then overflow must be CE. If main error is UE
1477 * then overflow is unknown. We'll call the overflow a CE - if
1478 * panic_on_ue is set then we're already panic'ed and won't arrive
1479 * here. If panic_on_ue is not set then apparently someone doesn't
1480 * think that UE's are catastrophic.
1482 if (info->error_info.nbsh & BIT(30))
1483 edac_mc_handle_ce_no_info(mci,
1484 EDAC_MOD_STR " Error Overflow set");
1487 /* return 1 if error found or 0 if error not found */
1488 static int k8_process_error_info(struct mem_ctl_info *mci,
1489 struct k8_error_info *info, int handle_errors)
1492 struct k8_error_info_regs *regs;
1493 u32 err_code, ext_ec;
1496 pvt = mci->pvt_info;
1498 /* check for an error */
1499 if (!k8_error_info_valid(&info->error_info))
1505 if (info->race_condition_detected)
1506 k8_mc_printk(mci, KERN_WARNING, "race condition detected!\n");
1509 regs = &info->error_info;
1510 err_code = info->error_info.nbsl & 0xffffUL;
1511 ext_ec = (info->error_info.nbsl >> 16) & 0x0fUL;
1512 debugf1("NorthBridge ERROR: mci(0x%p) node(%d) ErrAddr(0x%.8x-%.8x) "
1513 "nbsh(0x%.8x) nbsl(0x%.8x)\n", mci, pvt->node_id, regs->nbeah,
1514 regs->nbeal, regs->nbsh, regs->nbsl);
1516 if ((err_code & 0xfff0UL) == 0x0010UL) {
1517 debugf1("GART TLB error\n");
1519 decode_gart_tlb_error(mci, info);
1520 } else if ((err_code & 0xff00UL) == 0x0100UL) {
1521 debugf1("Cache error\n");
1522 decode_cache_error(mci, info);
1523 } else if ((err_code & 0xf800UL) == 0x0800UL) {
1524 debugf1("Bus error\n");
1525 decode_bus_error(mci, info);
1527 /* shouldn't reach here! */
1528 k8_mc_printk(mci, KERN_WARNING,
1529 "%s(): unknown MCE error 0x%x\n", __func__,
1532 k8_mc_printk(mci, KERN_ERR, "extended error code: %s\n",
1535 if (((ext_ec >= 1 && ext_ec <= 4) || (ext_ec == 6)) &&
1536 ((info->error_info.nbsh >> 4) & 0x07UL))
1537 k8_mc_printk(mci, KERN_ERR,
1538 "Error on hypertransport link: %s\n",
1539 htlink_msgs[(info->error_info.nbsh >> 4) & 0x07UL]);
1541 /* GART errors are benign as per AMD, do not panic on them */
1542 if (!gart_tlb_error && (regs->nbsh & BIT(29))) {
1543 k8_mc_printk(mci, KERN_CRIT, "uncorrected error\n");
1544 edac_mc_handle_ue_no_info(mci, "UE bit is set\n");
1547 if (regs->nbsh & BIT(25))
1548 panic("MC%d: processor context corrupt", mci->mc_idx);
1553 static void k8_check(struct mem_ctl_info *mci)
1555 struct k8_error_info info;
1557 debugf3("%s()\n", __func__);
1558 k8_get_error_info(mci, &info);
1559 k8_process_error_info(mci, &info, 1);
1562 static int k8_get_devs(struct mem_ctl_info *mci, int dev_idx)
1564 const struct k8_dev_info *k8_dev = &k8_devs[dev_idx];
1566 struct pci_dev *pdev;
1568 pdev = to_pci_dev(mci->dev);
1569 pvt = mci->pvt_info;
1571 /* The address mapping device provides a table that indicates which
1572 * physical address ranges are owned by which node. Each node's
1573 * memory controller has memory controller addresses that begin at
1576 pvt->addr_map = pci_get_related_function(PCI_VENDOR_ID_AMD,
1577 k8_dev->addr_map, pdev);
1579 if (pvt->addr_map == NULL) {
1580 k8_printk(KERN_ERR, "error address map device not found: "
1581 "vendor %x device 0x%x (broken BIOS?)\n",
1582 PCI_VENDOR_ID_AMD, k8_dev->addr_map);
1586 debugf1("Addr Map device PCI Bus ID:\t%s\n",
1587 pci_name(pvt->addr_map));
1589 pvt->misc_ctl = pci_get_related_function(PCI_VENDOR_ID_AMD,
1590 k8_dev->misc_ctl, pdev);
1592 if (pvt->misc_ctl == NULL) {
1593 pci_dev_put(pvt->addr_map);
1594 pvt->addr_map = NULL;
1595 k8_printk(KERN_ERR, "error miscellaneous device not found: "
1596 "vendor %x device 0x%x (broken BIOS?)\n",
1597 PCI_VENDOR_ID_AMD, k8_dev->misc_ctl);
1601 debugf1("Misc device PCI Bus ID:\t\t%s\n",
1602 pci_name(pvt->misc_ctl));
1607 static void k8_get_mc_regs(struct mem_ctl_info *mci)
1610 struct pci_dev *pdev;
1613 pdev = to_pci_dev(mci->dev);
1614 pvt = mci->pvt_info;
1615 debugf1("k8 regs:\n");
1617 for (i = 0; i < MAX_K8_NODES; i++) {
1618 pci_read_config_dword(pvt->addr_map, K8_DBR + (i * 8),
1620 pci_read_config_dword(pvt->addr_map, K8_DLR + (i * 8),
1622 debugf1(" dbr[%d]: 0x%x\n", i, pvt->dbr[i]);
1623 debugf1(" dlr[%d]: 0x%x\n", i, pvt->dlr[i]);
1626 pci_read_config_dword(pvt->misc_ctl, K8_NBCAP, &pvt->nbcap);
1627 debugf1(" nbcap: %u\n", pvt->nbcap);
1629 for (i = 0; i < K8_NR_CSROWS; i++) {
1630 pci_read_config_dword(pdev, K8_DCSB + (i * 4), &pvt->dcsb[i]);
1631 pci_read_config_dword(pdev, K8_DCSM + (i * 4), &pvt->dcsm[i]);
1632 debugf1(" dcsb[%d]: 0x%x\n", i, pvt->dcsb[i]);
1633 debugf1(" dcsm[%d]: 0x%x\n", i, pvt->dcsm[i]);
1636 pci_read_config_dword(pvt->addr_map, K8_DHAR, &pvt->dhar);
1637 pci_read_config_dword(pdev, K8_DBAM, &pvt->dbam);
1638 debugf1(" dhar: 0x%x\n", pvt->dhar);
1639 debugf1(" dbam: 0x%x\n", pvt->dbam);
1642 /* Return 1 if dual channel mode is active. Else return 0. */
1643 static inline int dual_channel_active(u32 dcl)
1645 return (dcl >> 16) & 0x1;
1648 static u32 csrow_nr_pages(int csrow_nr, struct k8_pvt *pvt)
1652 /* The math on this doesn't look right on the surface because x/2*4
1653 * can be simplified to x*2 but this expression makes use of the fact
1654 * that it is integral math where 1/2=0
1656 cs = (pvt->dbam >> ((csrow_nr / 2) * 4)) & 0xF; /* PG88 */
1658 /* This line is tricky. It collapses the table used by revision D and
1659 * later to one that matches revision CG and earlier
1661 cs -= (node_rev(pvt->node_id) >= OPTERON_CPU_REV_D) ?
1662 (cs > 8 ? 4 : (cs > 5 ? 3 : (cs > 2 ? 1 : 0))) : 0;
1664 /* 25 is 32MiB minimum DIMM size */
1665 return 1 << (cs + 25 - PAGE_SHIFT + dual_channel_active(pvt->dcl));
1668 static int k8_init_csrows(struct mem_ctl_info *mci)
1670 struct csrow_info *csrow;
1673 u64 input_addr_min, input_addr_max, sys_addr;
1676 pvt = mci->pvt_info;
1677 pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &nbcfg);
1680 for (i = 0; i < K8_NR_CSROWS; i++) {
1681 csrow = &mci->csrows[i];
1683 if ((pvt->dcsb[i] & 0x01) == 0) {
1684 debugf1("csrow %d empty for node %d\n", i,
1686 continue; /* empty */
1689 debugf1("revision for this node (%d) is %d\n",
1690 pvt->node_id, node_rev(pvt->node_id));
1692 csrow->nr_pages = csrow_nr_pages(i, pvt);
1693 find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
1694 sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
1695 csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
1696 sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
1697 csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
1698 csrow->page_mask = ~mask_from_dcsm(pvt->dcsm[i]);
1699 csrow->grain = 8; /* 8 bytes of resolution */
1700 csrow->mtype = ((pvt->dcl >> 18) & 0x1) ? MEM_DDR : MEM_RDDR;
1701 csrow->dtype = ((pvt->dcl >> (20 + (i / 2))) & 0x01) ?
1702 DEV_X4 : DEV_UNKNOWN;
1703 debugf1("for node %d csrow %d:\n nr_pages: %u "
1704 "input_addr_min: 0x%lx input_addr_max: 0x%lx "
1705 "sys_addr: 0x%lx first_page: 0x%lx last_page: 0x%lx "
1706 "page_mask: 0x%lx\n", pvt->node_id, i,
1707 (unsigned) csrow->nr_pages,
1708 (unsigned long) input_addr_min,
1709 (unsigned long) input_addr_max,
1710 (unsigned long) sys_addr,
1711 csrow->first_page, csrow->last_page,
1714 if (nbcfg & BIT(22))
1715 csrow->edac_mode = (nbcfg & BIT(23)) ?
1716 EDAC_S4ECD4ED : EDAC_SECDED;
1718 csrow->edac_mode = EDAC_NONE;
1724 static void k8_enable_error_reporting(struct mem_ctl_info *mci)
1727 u32 mc4ctl_l=0, mc4ctl_h=0, mcgctl_l=0, mcgctl_h=0;
1729 pvt = mci->pvt_info;
1730 pci_write_bits32(pvt->misc_ctl, K8_NBCTL, 0x3UL, 0x3UL);
1731 do_rdmsr(pvt->node_id, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h);
1732 mc4ctl_l |= BIT(0) | BIT(1);
1733 do_wrmsr(pvt->node_id, K8_MSR_MC4CTL, mc4ctl_l, mc4ctl_h);
1734 do_rdmsr(pvt->node_id, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h);
1735 do_rdmsr(pvt->node_id, K8_MSR_MCGCTL, &mcgctl_l, &mcgctl_h);
1737 do_wrmsr(pvt->node_id, K8_MSR_MCGCTL, mcgctl_l, mcgctl_h);
1738 do_rdmsr(pvt->node_id, K8_MSR_MCGCTL, &mcgctl_l, &mcgctl_h);
1741 static int k8_probe1(struct pci_dev *pdev, int dev_idx)
1743 struct mem_ctl_info *mci;
1745 u32 dcl, dual_channel;
1747 debugf0("%s()\n", __func__);
1748 build_node_revision_table();
1749 debugf1("pdev bus %u devfn %u\n", pdev->bus->number, pdev->devfn);
1750 pci_read_config_dword(pdev, K8_DCL, &dcl);
1751 dual_channel = dual_channel_active(dcl);
1752 debugf1("dual_channel is %u (dcl is 0x%x)\n", dual_channel, dcl);
1753 mci = edac_mc_alloc(sizeof(*pvt), K8_NR_CSROWS, dual_channel + 1);
1758 debugf0("%s(): mci = %p\n", __func__, mci);
1759 pvt = mci->pvt_info;
1761 mci->dev = &pdev->dev;
1762 pvt->node_id = MCI_TO_NODE_ID(mci);
1764 if (k8_get_devs(mci, dev_idx))
1767 k8_get_mc_regs(mci);
1768 mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR;
1769 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1770 debugf1("Initializing mci->edac_cap to EDAC_FLAG_NONE\n");
1771 mci->edac_cap = EDAC_FLAG_NONE;
1773 if ((pvt->nbcap >> 3) & 0x1)
1774 mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
1776 if ((pvt->nbcap >> 4) & 0x1)
1777 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
1779 if ((pvt->dcl >> 17) & 0x1) {
1780 debugf1("setting EDAC_FLAG_SECDED in mci->edac_cap\n");
1781 mci->edac_cap |= EDAC_FLAG_SECDED;
1784 debugf1("setting EDAC_FLAG_S4ECD4ED in "
1786 mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
1790 mci->mod_name = EDAC_MOD_STR;
1791 mci->mod_ver = EDAC_K8_VERSION;
1792 mci->ctl_name = k8_devs[dev_idx].ctl_name;
1793 mci->edac_check = k8_check;
1794 mci->ctl_page_to_phys = NULL;
1796 if (k8_init_csrows(mci)) {
1797 debugf1("Setting mci->edac_cap to EDAC_FLAG_NONE "
1798 "because k8_init_csrows() returned nonzero "
1800 mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
1802 k8_enable_error_reporting(mci);
1804 if (edac_mc_add_mc(mci, pvt->node_id)) {
1805 debugf1("%s(): failed edac_mc_add_mc()\n", __func__);
1806 /* FIXME: perhaps some code should go here that disables error
1807 * reporting if we just enabled it
1812 debugf1("%s(): success\n", __func__);
1816 pci_dev_put(pvt->addr_map);
1817 pci_dev_put(pvt->misc_ctl);
1824 /* returns count (>= 0), or negative on error */
1825 static int __devinit k8_init_one(struct pci_dev *pdev,
1826 const struct pci_device_id *ent)
1828 debugf0("%s()\n", __func__);
1830 /* wake up and enable device */
1831 return pci_enable_device(pdev) ?
1832 -EIO : k8_probe1(pdev, ent->driver_data);
1835 static void __devexit k8_remove_one(struct pci_dev *pdev)
1837 struct mem_ctl_info *mci;
1840 debugf0("%s()\n", __func__);
1842 if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
1845 pvt = mci->pvt_info;
1846 pci_dev_put(pvt->addr_map);
1847 pci_dev_put(pvt->misc_ctl);
1851 static const struct pci_device_id k8_pci_tbl[] __devinitdata = {
1852 {PCI_VEND_DEV(AMD, OPT_2_MEMCTL), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
1854 {0,} /* 0 terminated list. */
1857 MODULE_DEVICE_TABLE(pci, k8_pci_tbl);
1859 static struct pci_driver k8_driver = {
1860 .name = EDAC_MOD_STR,
1861 .probe = k8_init_one,
1862 .remove = __devexit_p(k8_remove_one),
1863 .id_table = k8_pci_tbl,
1866 int __init k8_init(void)
1868 return pci_module_init(&k8_driver);
1871 static void __exit k8_exit(void)
1873 pci_unregister_driver(&k8_driver);
1876 module_init(k8_init);
1877 module_exit(k8_exit);
1879 MODULE_LICENSE("GPL");
1880 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh");
1881 MODULE_DESCRIPTION("MC support for AMD K8 memory controllers - " EDAC_K8_VERSION );