/*
 * AMD K8 class Memory Controller kernel module
 *
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh Linux Networx (http://lnxi.com)
 *
 *      Changes by Douglas "norsk" Thompson  <norsk5@xmission.com>:
 *          - K8 CPU Revision D and greater support
 *
 *      Changes by Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>:
 *          - Module largely rewritten, with new (and hopefully correct)
 *            code for dealing with node and chip select interleaving, various
 *            code cleanup, and bug fixes
 *          - Added support for memory hoisting using DRAM hole address
 *            register
 *
 * This module is based on the following document (available from
 * http://www.amd.com/):
 *
 *     Title: BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
 *            Opteron Processors
 *     AMD publication #: 26094
 *     Revision: 3.26
 *
 * Unless otherwise stated, section numbers mentioned in the comments below
 * refer to this document.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <asm/mmzone.h>
#include "edac_mc.h"

#define k8_printk(level, fmt, arg...) \
	edac_printk(level, "k8", fmt, ##arg)

#define k8_mc_printk(mci, level, fmt, arg...) \
	edac_mc_chipset_printk(mci, level, "k8", fmt, ##arg)

/* Throughout the comments in this code, the terms SysAddr, DramAddr, and
 * InputAddr are used.  These terms come directly from the k8 documentation
 * (AMD publication #26094).  They are defined as follows:
 *
 *     SysAddr:
 *         This is a physical address generated by a CPU core or a device
 *         doing DMA.  If generated by a CPU core, a SysAddr is the result of
 *         a virtual to physical address translation by the CPU core's address
 *         translation mechanism (MMU).
 *
 *     DramAddr:
 *         A DramAddr is derived from a SysAddr by subtracting an offset that
 *         depends on which node the SysAddr maps to and whether the SysAddr
 *         is within a range affected by memory hoisting.  The DRAM Base
 *         (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers
 *         determine which node a SysAddr maps to.
 *
 *         If the DRAM Hole Address Register (DHAR) is enabled and the SysAddr
 *         is within the range of addresses specified by this register, then
 *         a value x from the DHAR is subtracted from the SysAddr to produce a
 *         DramAddr.  Here, x represents the base address for the node that
 *         the SysAddr maps to plus an offset due to memory hoisting.  See
 *         section 3.4.8 and the comments in get_dram_hole_info() and
 *         sys_addr_to_dram_addr() below for more information.
 *
 *         If the SysAddr is not affected by the DHAR then a value y is
 *         subtracted from the SysAddr to produce a DramAddr.  Here, y is the
 *         base address for the node that the SysAddr maps to.  See section
 *         3.4.4 and the comments in sys_addr_to_dram_addr() below for more
 *         information.
 *
 *     InputAddr:
 *         A DramAddr is translated to an InputAddr before being passed to the
 *         memory controller for the node that the DramAddr is associated
 *         with.  The memory controller then maps the InputAddr to a csrow.
 *         If node interleaving is not in use, then the InputAddr has the same
 *         value as the DramAddr.  Otherwise, the InputAddr is produced by
 *         discarding the bits used for node interleaving from the DramAddr.
 *         See section 3.4.4 for more information.
 *
 *         The memory controller for a given node uses its DRAM CS Base and
 *         DRAM CS Mask registers to map an InputAddr to a csrow.  See
 *         sections 3.5.4 and 3.5.5 for more information.
 */

/*
 * Alter this version for the K8 module when modifications are made
 */
#define EDAC_K8_VERSION    " Ver: 2.0.2 " __DATE__
#define EDAC_MOD_STR	"k8_edac"

#ifndef PCI_DEVICE_ID_AMD_OPT_0_HT
#define PCI_DEVICE_ID_AMD_OPT_0_HT	0x1100
#endif				/* PCI_DEVICE_ID_AMD_OPT_0_HT */

#ifndef PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP
#define PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP	0x1101
#endif				/* PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP */

#ifndef PCI_DEVICE_ID_AMD_OPT_2_MEMCTL
#define PCI_DEVICE_ID_AMD_OPT_2_MEMCTL	0x1102
#endif				/* PCI_DEVICE_ID_AMD_OPT_2_MEMCTL */

#ifndef PCI_DEVICE_ID_AMD_OPT_3_MISCCTL
#define PCI_DEVICE_ID_AMD_OPT_3_MISCCTL	0x1103
#endif				/* PCI_DEVICE_ID_AMD_OPT_3_MISCCTL */

/* Extended Model from CPUID, for CPU Revision numbers */
#define OPTERON_CPU_LE_REV_C    0
#define OPTERON_CPU_REV_D       1
#define OPTERON_CPU_REV_E       2
/* Unknown Extended Model value */
#define OPTERON_CPU_REV_X	3
/* NPT processors have the following Extended Models */
#define OPTERON_CPU_REV_F	4
#define OPTERON_CPU_REV_FA	5

#define K8_NR_CSROWS 8
#define MAX_K8_NODES 8

/* K8 register addresses - device 0 function 1 - Address Map */
#define K8_DBR		0x40	/* DRAM Base Register (8 x 32b
				 * interlaced with K8_DLR)
				 *
				 * 31:16 DRAM Base addr 39:24
				 * 15:11 reserved
				 * 10:8  interleave enable
				 *  7:2  reserved
				 *  1    write enable
				 *  0    read enable
				 */

#define K8_DLR		0x44	/* DRAM Limit Register (8 x 32b
				 * interlaced with K8_DBR)
				 *
				 * 31:16 DRAM Limit addr 32:24
				 * 15:11 reserved
				 * 10:8  interleave select
				 *  7:3  reserved
				 *  2:0  destination node ID
				 */

#define K8_DHAR         0xf0    /* DRAM Hole Address Register
				 *
				 * 31:24 DramHoleBase
				 * 23:16 reserved
				 * 15:8  DramHoleOffset
				 *  7:1  reserved
				 *    0  DramHoleValid
				 */

/* K8 register addresses - device 0 function 2 - DRAM controller */
#define K8_DCSB		0x40	/* DRAM Chip-Select Base (8 x 32b)
				 *
				 * For Rev E and prior
				 * 31:21 Base addr high 35:25
				 * 20:16 reserved
				 * 15:9  Base addr low 19:13 (interlvd)
				 *  8:1  reserved
				 *  0    chip-select bank enable
				 *
				 * For Rev F (NPT) and later
				 * 31:29 reserved
				 * 28:19 Base address (36:27)
				 * 18:14 reserved
				 * 13:5  Base address (21:13)
				 * 4:3   reserved
				 * 2     TestFail
				 * 1     Spare Rank
				 * 0     CESenable
				 */
#define K8_DCSB_CS_ENABLE	0x1
#define K8_DCSB_NPT_SPARE	0x2
#define K8_DCSB_NPT_TESTFAIL	0x4

/* REV E: selects bits 31-21 and 15-9 from DCSB
 * and the shift amount to form address
 */
#define REV_E_DCSB_BASE_BITS	(0xFFE0FE00ULL)
#define REV_E_DCS_SHIFT		4
#define REV_E_DCSM_SHIFT	0
#define REF_E_DCSM_COUNT	4

/* REV F: selects bits 28-19 and 13-5 from DCSB
 * and the shift amount to form address
 */
#define REV_F_DCSB_BASE_BITS	(0x1FF83FE0ULL)
#define REV_F_DCS_SHIFT		8
#define REV_F_DCSM_SHIFT	1
#define REF_F_DCSM_COUNT	8

#define K8_DCSM		0x60	/* DRAM Chip-Select Mask (8 x 32b)
				 *
				 * 31:30 reserved
				 * 29:21 addr mask high 33:25
				 * 20:16 reserved
				 * 15:9  addr mask low  19:13
				 *  8:0  reserved
				 */

/* REV E: selects bits 29-21 and 15-9 from DCSM */
#define REV_E_DCSM_MASK_BITS	0x3FE0FE00
/*     represents unused bits [24-20] and [12-0] */
#define REV_E_DCS_NOTUSED_BITS	0x1f01fff

/* REV F: selects bits 28-19 and 13-5 from DCSM */
#define REV_F_DCSM_MASK_BITS	0x1FF83FC0
/*     represents unused bits [26-22] and [12-0] */
#define REV_F_DCS_NOTUSED_BITS	0x03c1fff

#define K8_DBAM		0x80	/* DRAM Base Addr Mapping (32b) */

#define K8_DCL		0x90	/* DRAM configuration low reg (32b)
				 *
				 * Rev E and earlier CPUS:
				 *
				 * 31:28 reserved
				 * 27:25 Bypass Max: 000b=respect
				 * 24    Dissable receivers - no sockets
				 * 23:20 x4 DIMMS
				 * 19    32byte chunks
				 * 18    Unbuffered
				 * 17    ECC enabled
				 * 16    128/64 bit (dual/single chan)
				 * 15:14 R/W Queue bypass count
				 * 13    Self refresh
				 * 12    exit self refresh
				 * 11    mem clear status
				 * 10    DRAM enable
				 *  9    reserved
				 *  8    DRAM init
				 *  7:4  reserved
				 *  3    dis DQS hysteresis
				 *  2    QFC enabled
				 *  1    DRAM drive strength
				 *  0    Digital Locked Loop disable
				 *
				 * Rev F and later CPUs:
				 *
				 * 31:20 reserved
				 * 19    DIMM ECC Enable
				 * 18:17 reserved
				 * 16    Unbuffered DIMM
				 * 15:12 x4 DIMMs
				 * 11    Width128 bits
				 * 10    burstLength32
				 *  9    SelRefRateEn
				 *  8    ParEn
				 *  7    DramDrvWeak
				 *  6    reserved
				 * 5:4   DramTerm
				 * 3:2   reserved
				 *  1    ExitSelfRef
				 *  0    InitDram
				 */

/* K8 register addresses - device 0 function 3 - Misc Control */
#define K8_NBCTL	0x40	/* MCA NB Control (32b)
				 *
				 *  1    MCA UE Reporting
				 *  0    MCA CE Reporting
				 */

#define K8_NBCFG	0x44	/* MCA NB Config (32b)
				 *
				 * 23    Chip-kill x4 ECC enable
				 * 22    ECC enable
				 *  1    CPU ECC enable
				 */
#define			K8_NBCFG_CHIPKILL	23
#define			K8_NBCFG_ECC_ENABLE	22

#define K8_NBSL		0x48	/* MCA NB Status Low (32b)
				 *
				 * 31:24 Syndrome 15:8 chip-kill x4
				 * 23:20 reserved
				 * 19:16 Extended err code
				 * 15:0  Err code
				 */

#define K8_NBSH		0x4C	/* MCA NB Status High (32b)
				 *
				 * 31    Err valid
				 * 30    Err overflow
				 * 29    Uncorrected err
				 * 28    Err enable
				 * 27    Misc err reg valid
				 * 26    Err addr valid
				 * 25    proc context corrupt
				 * 24:23 reserved
				 * 22:15 Syndrome 7:0
				 * 14    CE
				 * 13    UE
				 * 12:9  reserved
				 *  8    err found by scrubber
				 *  7    reserved
				 *  6:4  Hyper-transport link number
				 *  3:2  reserved
				 *  1    Err CPU 1
				 *  0    Err CPU 0
				 */

#define K8_NBSH_VALID_BIT BIT(31)

#define K8_NBEAL	0x50	/* MCA NB err addr low (32b)
				 *
				 * 31:3  Err addr low 31:3
				 *  2:0  reserved
				 */

#define K8_NBEAH	0x54	/* MCA NB err addr high (32b)
				 *
				 * 31:8  reserved
				 *  7:0  Err addr high 39:32
				 */

#define K8_NBCAP	0xE8	/* MCA NB capabilities (32b)
				 *
				 * 31:9  reserved
				 *  4    S4ECD4ED capable
				 *  3    SECDED capable
				 */
#define K8_NBCAP_CHIPKILL	4
#define K8_NBCAP_SECDED		3

				/* MSR's */

				/*
				 * K8_MSR_MCxCTL (64b)
				 * (0x400,404,408,40C,410)
				 * 63    Enable reporting source 63
				 *  .
				 *  .
				 *  .
				 *  2    Enable error source 2
				 *  1    Enable error source 1
				 *  0    Enable error source 0
				 */

				/*
				 * K8_MSR_MCxSTAT (64b)
				 * (0x401,405,409,40D,411)
				 * 63    Error valid
				 * 62    Status overflow
				 * 61    UE
				 * 60    Enabled error condition
				 * 59    Misc register valid (not used)
				 * 58    Err addr register valid
				 * 57    Processor context corrupt
				 * 56:32 Other information
				 * 31:16 Model specific error code
				 * 15:0  MCA err code
				 */

				/*
				 * K8_MSR_MCxADDR (64b)
				 * (0x402,406,40A,40E,412)
				 * 63:48 reserved
				 * 47:0  Address
				 */

				/*
				 * K8_MSR_MCxMISC (64b)
				 * (0x403,407,40B,40F,413)
				 * Unused on Athlon64 and K8
				 */

#define K8_MSR_MCGCTL	0x017b	/* Machine Chk Global report ctl (64b)
				 *
				 * 31:5  reserved
				 *  4    North Bridge
				 *  3    Load/Store
				 *  2    Bus Unit
				 *  1    Instruction Cache
				 *  0    Data Cache
				 */

#define K8_MSR_MC4CTL	0x0410	/* North Bridge Check report ctl (64b) */
#define K8_MSR_MC4STAT	0x0411	/* North Bridge status (64b) */
#define K8_MSR_MC4ADDR	0x0412	/* North Bridge Address (64b) */

static inline int MCI_TO_NODE_ID(struct pci_dev *pdev)
{
	return PCI_SLOT(pdev->devfn) - 0x18;
}

/* Ugly hack that allows module to compile when built as part of a 32-bit
 * kernel.  Just in case anyone wants to run a 32-bit kernel on their Opteron.
 */
#ifndef MAXNODE
#define MAXNODE 8
#endif

/* Each entry holds the CPU revision of all CPU cores for the given node. */
static int k8_node_revision_table[MAXNODE] = { 0 };

static inline int node_rev(int node_id)
{
	return k8_node_revision_table[node_id];
}

static void store_node_revision(void *param)
{
	int node_id, revision;

	/* Multiple CPU cores on the same node will all write their revision
	 * number to the same array entry.  This is ok.  For a given node, all
	 * CPU cores are on the same piece of silicon and share the same
	 * revision number.
	 */
	node_id = (cpuid_ebx(1) >> 24) & 0x07;
	revision = (cpuid_eax(1) >> 16) & 0x0f;
	k8_node_revision_table[node_id] = revision;
}

/* Initialize k8_node_revision_table. */
static void build_node_revision_table(void)
{
	static int initialized = 0;

	if (initialized)
		return;

	on_each_cpu(store_node_revision, NULL, 1, 1);
	initialized = 1;
}

enum k8_chips {
	OPTERON = 0,
};

struct k8_pvt {
	struct pci_dev *addr_map;
	struct pci_dev *misc_ctl;

	int node_id;  /* ID of this node */
	int ext_model;

	/* The values of these registers will remain constant so we might as
	 * well cache them here.
	 */
	u32 dcl;
	u32 dbr[MAX_K8_NODES];
	u32 dlr[MAX_K8_NODES];
	u32 nbcap;
	u32 nbcfg;
	u32 dcsb[K8_NR_CSROWS];
	u32 dcsm[K8_NR_CSROWS];

	/* The following 3 fields are set at run time, after Revision has
	 * been determine, since the dcsb and dcsm registers vary
	 * by CPU Revsion
	 */
	u32 dcsb_mask;			/* DCSB mask bits */
	u32 dcsm_mask;			/* DCSM mask bits */
	u32 num_dcsm;			/* Number of DCSM registers */
	u32 dcs_mask_notused;		/* DCSM notused mask bits */
	u32 dcs_shift;			/* DCSB and DCSM shift value */

	/* On Rev E there are 8 DCSM registers,
	 * On Rev F there are 4 DCSM registers.
	 * This field is set as a 0 (Rev E) or 1 (Rev F) to indicate
	 * number of bits to shift the index for DCSM array look ups
	 */
	u32 dcsm_shift_bit;

	u32 dhar;
	u32 dbam;
};

struct k8_error_info_regs {
	u32 nbsh;
	u32 nbsl;
	u32 nbeah;
	u32 nbeal;
};

struct k8_error_info {
	struct k8_error_info_regs error_info;
	int race_condition_detected;
};

struct k8_dev_info {
	const char *ctl_name;
	u16 addr_map;
	u16 misc_ctl;
};

static const struct k8_dev_info k8_devs[] = {
	[OPTERON] = {
		     .ctl_name = "Athlon64/Opteron",
		     .addr_map = PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP,
		     .misc_ctl = PCI_DEVICE_ID_AMD_OPT_3_MISCCTL},
};

static struct pci_dev * pci_get_related_function(unsigned int vendor,
		unsigned int device, struct pci_dev *related)
{
	struct pci_dev *dev;

	dev = NULL;

	while ((dev = pci_get_device(vendor, device, dev)) != NULL) {
		if ((dev->bus->number == related->bus->number) &&
		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
			break;
	}

	return dev;
}

/* FIXME - stolen from msr.c - the calls in msr.c could be exported */
struct msr_command {
	int cpu;
	int err;
	u32 reg;
	u32 data[2];
};

static void smp_wrmsr(void *cmd_block)
{
	struct msr_command *cmd = cmd_block;
	wrmsr(cmd->reg, cmd->data[0], cmd->data[1]);
}

static void smp_rdmsr(void *cmd_block)
{
	struct msr_command *cmd = cmd_block;
	rdmsr(cmd->reg, cmd->data[0], cmd->data[1]);
}

static void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
{
	struct msr_command cmd;

	cmd.cpu = raw_smp_processor_id();
	cmd.reg = reg;
	cmd.data[0] = eax;
	cmd.data[1] = edx;
	on_each_cpu(smp_wrmsr, &cmd, 1, 1);
}

static void do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
{
	struct msr_command cmd;

	cmd.cpu = raw_smp_processor_id();
	cmd.reg = reg;
	on_each_cpu(smp_rdmsr, &cmd, 1, 1);
	*eax = cmd.data[0];
	*edx = cmd.data[1];
}

/*
 * FIXME - This is a large chunk of memory to suck up just to decode the
 * syndrome.  It would be nice to discover a pattern in the syndromes that
 * could be used to quickly identify the channel.  The big problems with
 * this table is memory usage, lookup speed (could sort and binary search),
 * correctness (there could be a transcription error).  A zero in any nibble
 * for a syndrom is always channel 0, but that only decodes some of the
 * syndromes.  Can anyone find any other patterns?
 *
 * The comment in the left column is the nibble that is in error.  The least
 * significant nibble of the syndrome is the mask for the bits that are
 * in error (need to be toggled) for the particular nibble.
 */
#define SYNDROME_TABLE_SIZE 270
static const unsigned long syndromes_chan0[SYNDROME_TABLE_SIZE] = {
	/*0 */ 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57,
	0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df,
	/*1 */ 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7,
	0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f,
	/*2 */ 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
	0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
	/*3 */ 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057,
	0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df,
	/*4 */ 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097,
	0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f,
	/*5 */ 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857,
	0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf,
	/*6 */ 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467,
	0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f,
	/*7 */ 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27,
	0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff,
	/*8 */ 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177,
	0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f,
	/*9 */ 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07,
	0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f,
	/*a */ 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07,
	0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f,
	/*b */ 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7,
	0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f,
	/*c */ 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87,
	0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f,
	/*d */ 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067,
	0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f,
	/*e */ 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77,
	0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f,
	/*f */ 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77,
	0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x9562, 0xa6ee, 0xb72f,

	/*20 */ 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807,
	0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f,
	/*21 */ 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07,
	0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f
};

static const unsigned long syndromes_chan1[SYNDROME_TABLE_SIZE] = {
	/*10 */ 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187,
	0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f,
	/*11 */ 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627,
	0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff,
	/*12 */ 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97,
	0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f,
	/*13 */ 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97,
	0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f,
	/*14 */ 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987,
	0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f,
	/*15 */ 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677,
	0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f,
	/*16 */ 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387,
	0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f,
	/*17 */ 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17,
	0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf,
	/*18 */ 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7,
	0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f,
	/*19 */ 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397,
	0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f,
	/*1a */ 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617,
	0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf,
	/*1b */ 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527,
	0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff,
	/*1c */ 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7,
	0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef,
	/*1d */ 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7,
	0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def,
	/*1e */ 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67,
	0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f,
	/*1f */ 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377,
	0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f,

	/*22 */ 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97,
	0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f,
	/*23 */ 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757,
	0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df
};

static int chan_from_chipkill_syndrome(unsigned long syndrome)
{
	int i;

	debugf0("%s()\n", __func__);

	for (i = 0; i < SYNDROME_TABLE_SIZE; i++) {
		if (syndromes_chan0[i] == syndrome)
			return 0;
		if (syndromes_chan1[i] == syndrome)
			return 1;
	}

	debugf0("%s(): syndrome(%lx) not found\n", __func__, syndrome);
	return -1;
}

static const char *tt_msgs[] = {	/* transaction type */
	"inst",
	"data",
	"generic",
	"reserved"
};

static const char *ll_msgs[] = {	/* cache level */
	"0",
	"1",
	"2",
	"generic"
};

static const char *memtt_msgs[] = {
	"generic",
	"generic read",
	"generic write",
	"data read",
	"data write",
	"inst fetch",
	"prefetch",
	"evict",
	"snoop",
	"unknown error 9",
	"unknown error 10",
	"unknown error 11",
	"unknown error 12",
	"unknown error 13",
	"unknown error 14",
	"unknown error 15"
};

static const char *pp_msgs[] = {	/* participating processor */
	"local node origin",
	"local node response",
	"local node observed",
	"generic"
};

static const char *to_msgs[] = {
	"no timeout",
	"timed out"
};

static const char *ii_msgs[] = {	/* memory or i/o */
	"mem access",
	"reserved",
	"i/o access",
	"generic"
};

static const char *ext_msgs[] = {	/* extended error */
	"ECC error",
	"CRC error",
	"sync error",
	"mst abort",
	"tgt abort",
	"GART error",
	"RMW error",
	"watchdog error",
	"ECC chipkill x4 error",
	"unknown error 9",
	"unknown error 10",
	"unknown error 11",
	"unknown error 12",
	"unknown error 13",
	"unknown error 14",
	"unknown error 15"
};

static const char *htlink_msgs[] = {
	"none",
	"1",
	"2",
	"1 2",
	"3",
	"1 3",
	"2 3",
	"1 2 3"
};

/*
 * The DCSB and DCSM registers differ between Rev E and Rev F CPUs
 * The following several functions intialize and extract information
 * from this registers
 */

/*
 * set_dcsb_dcsm_rev_specific(pvt)
 *
 *	NOTE: CPU Revision Dependent code
 *
 *	Set the DCSB and DCSM mask values depending on the
 *	CPU revision value.
 *	Also set the shift factor for the DCSB and DCSM values
 *
 *	member dcs_mask_notused, REV E:
 *
 *	To find the max InputAddr for the csrow, start with the base
 *	address and set all bits that are "don't care" bits in the test at
 *	the start of section 3.5.4 (p. 84).
 *
 *	The "don't care" bits are all set bits in the mask and
 *	all bits in the gaps between bit ranges [35-25] and [19-13].
 *	The value REV_E_DCS_NOTUSED_BITS represents bits [24-20] and [12-0],
 *	which are all bits in the above-mentioned gaps.
 *
 *	member dcs_mask_notused, REV F:
 *
 *	To find the max InputAddr for the csrow, start with the base
 *	address and set all bits that are "don't care" bits in the test at
 *	the start of NPT section 4.5.4 (p. 87).
 *
 *	The "don't care" bits are all set bits in the mask and
 *	all bits in the gaps between bit ranges [36-27] and [21-13].
 *	The value REV_F_DCS_NOTUSED_BITS represents bits [26-22] and [12-0],
 *	which are all bits in the above-mentioned gaps.
 */
static void set_dcsb_dcsm_rev_specific(struct k8_pvt *pvt)
{
	if ( pvt->ext_model >= OPTERON_CPU_REV_F) {
		pvt->dcsb_mask		= REV_F_DCSB_BASE_BITS;
		pvt->dcsm_mask		= REV_F_DCSM_MASK_BITS;
		pvt->dcs_mask_notused	= REV_F_DCS_NOTUSED_BITS;
		pvt->dcs_shift		= REV_F_DCS_SHIFT;
		pvt->dcsm_shift_bit	= REV_F_DCSM_SHIFT;
		pvt->num_dcsm		= REF_F_DCSM_COUNT;
	} else {
		pvt->dcsb_mask		= REV_E_DCSB_BASE_BITS;
		pvt->dcsm_mask		= REV_E_DCSM_MASK_BITS;
		pvt->dcs_mask_notused	= REV_E_DCS_NOTUSED_BITS;
		pvt->dcs_shift		= REV_E_DCS_SHIFT;
		pvt->dcsm_shift_bit	= REV_E_DCSM_SHIFT;
		pvt->num_dcsm		= REF_E_DCSM_COUNT;
	}
}

/*
 * get_dcsb()
 *
 *	getter function to return the 'base' address the i'th CS entry.
 */
static u32 get_dcsb(struct k8_pvt *pvt, int csrow)
{
	/* 0xffe0fe00 selects bits 31-21 and 15-9 of a DRAM CS Base Address
	 * Register (section 3.5.4).  Shifting the bits left 4 puts them in
	 * their proper bit positions of 35-25 and 19-13.
	 */
	return pvt->dcsb[csrow];
}

/*
 * get_dcsm()
 *
 *	getter function to return the 'mask' address the i'th CS entry.
 *	This getter function is needed because there different number
 *	of DCSM registers on Rev E and prior vs Rev F and later
 */
static u32 get_dcsm(struct k8_pvt *pvt, int csrow)
{
	return pvt->dcsm[csrow >> pvt->dcsm_shift_bit];
}


/*
 * base_from_dcsb
 *
 *	Extract the DRAM CS base address from selected csrow register
 */
static u64 base_from_dcsb(struct k8_pvt *pvt, int csrow)
{
	return ((u64)(get_dcsb(pvt, csrow) & pvt->dcsb_mask)) <<
		 pvt->dcs_shift;
}

static u64 mask_from_dcsm(struct k8_pvt *pvt, int csrow)
{
	u64 dcsm_bits, other_bits;

	/* Extract bits bits 29-21 and 15-9 from DCSM (section 3.5.5). */
	dcsm_bits = get_dcsm(pvt, csrow) & pvt->dcsm_mask;

	/* Set all bits except bits 33-25 and 19-13. */
	other_bits = pvt->dcsm_mask;
	other_bits = ~(other_bits << pvt->dcs_shift);

	/* The extracted bits from DCSM belong in the spaces represented by
	 * the cleared bits in other_bits.
	 */
	return (dcsm_bits << pvt->dcs_shift) | other_bits;
}

/*
 * setup_dcsb_dcsm()
 *
 *	Setup the DCSB and DCSM arrays from hardware
 */
static void setup_dcsb_dcsm(struct k8_pvt *pvt, struct pci_dev *pdev)
{
	int i;

	/* Set the dcsb and dcsm mask bits and their shift value */
	set_dcsb_dcsm_rev_specific(pvt);

	/* Retrieve the DRAM CS Base Address Registers from hardware
	 */
	for (i = 0; i < K8_NR_CSROWS; i++) {
		pci_read_config_dword(pdev, K8_DCSB + (i * 4), &pvt->dcsb[i]);
		debugf1("    dcsb[%d]: 0x%x\n", i, pvt->dcsb[i]);
	}

	/* The number of DCSMs differents at the Rev E/Rev F boundary
	 * so we retrieve the number of registers defined for this processor
	 */
	for (i = 0; i < pvt->num_dcsm; i++) {
		pci_read_config_dword(pdev, K8_DCSM + (i * 4), &pvt->dcsm[i]);
		debugf1("    dcsm[%d]: 0x%x\n", i, pvt->dcsm[i]);
	}

	/* Debug dump only of DCSB and DCSM registers */
	for (i = 0; i < K8_NR_CSROWS; i++) {
		debugf1("  dcsb[%d]: 0x%8.8x  dcsm[%d]: 0x%x\n",
				i, get_dcsb(pvt,i),
				i>> pvt->dcsm_shift_bit, get_dcsm(pvt,i));
	}
}

/* In *base and *limit, pass back the full 40-bit base and limit physical
 * addresses for the node given by node_id.  This information is obtained from
 * DRAM Base (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers. The
 * base and limit addresses are of type SysAddr, as defined at the start of
 * section 3.4.4 (p. 70).  They are the lowest and highest physical addresses
 * in the address range they represent.
 */
static void get_base_and_limit(struct k8_pvt *pvt, int node_id,
		u64 *base, u64 *limit)
{
	*base = ((u64) (pvt->dbr[node_id] & 0xffff0000)) << 8;

	/* Since the limit represents the highest address in the range, we
	 * must set its lowest 24 bits to 1.
	 */
	*limit = (((u64) (pvt->dlr[node_id] & 0xffff0000)) << 8) | 0xffffff;
}

/* Return 1 if the SysAddr given by sys_addr matches the base/limit associated
 * with node_id
 */
static int base_limit_match(struct k8_pvt *pvt, u64 sys_addr, int node_id)
{
	u64 base, limit, addr;

	get_base_and_limit(pvt, node_id, &base, &limit);

	/* The k8 treats this as a 40-bit value.  However, bits 63-40 will be
	 * all ones if the most significant implemented address bit is 1.
	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
	 * Application Programming.
	 */
	addr = sys_addr & 0x000000ffffffffffull;

	return (addr >= base) && (addr <= limit);
}

/* Attempt to map a SysAddr to a node.  On success, return a pointer to the
 * mem_ctl_info structure for the node that the SysAddr maps to.  On failure,
 * return NULL.
 */
static struct mem_ctl_info * find_mc_by_sys_addr(struct mem_ctl_info *mci,
						u64 sys_addr)
{
	struct k8_pvt *pvt;
	int node_id;
	u32 intlv_en, bits;

	/* Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
	 * 3.4.4.2) registers to map the SysAddr to a node ID.
	 */

	pvt = mci->pvt_info;

	/* The value of this field should be the same for all DRAM Base
	 * registers.  Therefore we arbitrarily choose to read it from the
	 * register for node 0.
	 */
	intlv_en = pvt->dbr[0] & (0x07 << 8);

	if (intlv_en == 0) {  /* node interleaving is disabled */
		debugf2("%s(): node interleaving disabled\n", __func__);
		for (node_id = 0; ; ) {
			if (base_limit_match(pvt, sys_addr, node_id))
				break;

			if (++node_id == MAX_K8_NODES) {
				debugf2("%s(): sys_addr 0x%lx "
					"does not match any node\n", __func__,
					(unsigned long) sys_addr);
				return NULL;
			}
		}

		goto found;
	}

	if (unlikely((intlv_en != (0x01 << 8)) &&
		     (intlv_en != (0x03 << 8)) &&
		     (intlv_en != (0x07 << 8)))) {
		k8_printk(KERN_WARNING,
			  "%s(): junk value of 0x%x extracted from IntlvEn "
			  "field of DRAM Base Register for node 0: This "
			  "probably indicates a BIOS bug.\n", __func__,
			  intlv_en);
		return NULL;
	}

	/* If we get this far, node interleaving is enabled. */
	debugf2("%s(): node interleaving enabled\n", __func__);
	bits = (((u32) sys_addr) >> 12) & intlv_en;

	for (node_id = 0; ; ) {
		if ((pvt->dlr[node_id] & intlv_en) == bits)
			break;  /* intlv_sel field matches */

		if (++node_id == MAX_K8_NODES) {
			debugf2("%s(): sys_addr 0x%lx does not match any "
				"node\n", __func__, (unsigned long) sys_addr);
			return NULL;
		}
	}

	/* sanity test for sys_addr */
	if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) {
		k8_printk(KERN_WARNING,
			  "%s(): sys_addr 0x%lx falls outside base/limit "
			  "address range for node %d with node interleaving "
			  "enabled.\n", __func__, (unsigned long) sys_addr,
			  node_id);
		return NULL;
	}

found:
	debugf2("%s(): sys_addr 0x%lx matches node %d\n", __func__,
		(unsigned long) sys_addr, node_id);
	return edac_mc_find(node_id);
}

/* Return the base value defined by the DRAM Base register for the node
 * represented by mci.  This function returns the full 40-bit value despite
 * the fact that the register only stores bits 39-24 of the value.  See
 * section 3.4.4.1.
 */
static inline u64 get_dram_base(struct mem_ctl_info *mci)
{
	struct k8_pvt *pvt;

	pvt = mci->pvt_info;
	return ((u64) (pvt->dbr[pvt->node_id] & 0xffff0000)) << 8;
}

/* Obtain info from the DRAM Hole Address Register (section 3.4.8) for the
 * node represented by mci.  Info is passed back in *hole_base, *hole_offset,
 * and *hole_size.  Function returns 0 if info is valid or 1 if info is
 * invalid.  Info may be invalid for either of the following reasons:
 *
 *     - The revision of the node is not E or greater.  In this case, the DRAM
 *       Hole Address Register does not exist.
 *     - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
 *       indicating that its contents are not valid.
 *
 * The values passed back in *hole_base, *hole_offset, and *hole_size are
 * complete 32-bit values despite the fact that the bitfields in the DHAR
 * only represent bits 31-24 of the base and offset values.
 */
static int get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
		u64 *hole_offset, u64 *hole_size)
{
	struct k8_pvt *pvt;
	u64 base;

	pvt = mci->pvt_info;

	if (pvt->ext_model < OPTERON_CPU_REV_E) {
		debugf2("revision %d for node %d does not support DHAR\n",
			pvt->ext_model, pvt->node_id);
		return 1;
	}

	if ((pvt->dhar & 0x01) == 0) {
		debugf2("DramHoleValid bit cleared in DHAR for node %d\n",
			pvt->node_id);
		return 1;  /* DramHoleValid bit is cleared */
	}

	/* +------------------+--------------------+--------------------+-----
	 * | memory           | DRAM hole          | relocated          |
	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
	 * |                  |                    | DRAM hole          |
	 * |                  |                    | [0x100000000,      |
	 * |                  |                    |  (0x100000000+     |
	 * |                  |                    |   (0xffffffff-x))] |
	 * +------------------+--------------------+--------------------+-----
	 *
	 * Above is a diagram of physical memory showing the DRAM hole and the
	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
	 * starts at address x (the base address) and extends through address
	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
	 * addresses in the hole so that they start at 0x100000000.
	 */

	base = pvt->dhar & 0xff000000;
	*hole_base = base;
	*hole_offset = (pvt->dhar & 0x0000ff00) << 16;
	*hole_size = (0x1ull << 32) - base;
	debugf2("DHAR info for node %d: base 0x%lx offset 0x%lx size 0x%lx\n",
		pvt->node_id, (unsigned long) *hole_base,
		(unsigned long) *hole_offset, (unsigned long) *hole_size);
	return 0;
}

/* Return the DramAddr that the SysAddr given by sys_addr maps to.  It is
 * assumed that sys_addr maps to the node given by mci.
 */
static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
{
	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;

	/* The first part of section 3.4.4 (p. 70) shows how the DRAM Base
	 * (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are
	 * used to translate a SysAddr to a DramAddr.  If the DRAM Hole
	 * Address Register (DHAR) is enabled, then it is also involved in
	 * translating a SysAddr to a DramAddr.  Sections 3.4.8 and 3.5.8.2
	 * describe the DHAR and how it is used for memory hoisting.  These
	 * parts of the documentation are unclear.  I interpret them as
	 * follows:
	 *
	 *     When node n receives a SysAddr, it processes the SysAddr as
	 *     follows:
	 *
	 *         1.  It extracts the DRAMBase and DRAMLimit values from the
	 *             DRAM Base and DRAM Limit registers for node n.  If the
	 *             SysAddr is not within the range specified by the base
	 *             and limit values, then node n ignores the Sysaddr
	 *             (since it does not map to node n).  Otherwise continue
	 *             to step 2 below.
	 *
	 *         2.  If the DramHoleValid bit of the DHAR for node n is
	 *             clear, the DHAR is disabled so skip to step 3 below.
	 *             Otherwise see if the SysAddr is within the range of
	 *             relocated addresses (starting at 0x100000000) from the
	 *             DRAM hole.  If not, skip to step 3 below.  Else get the
	 *             value of the DramHoleOffset field from the DHAR.  To
	 *             obtain the DramAddr, subtract the offset defined by
	 *             this value from the SysAddr.
	 *
	 *         3.  Obtain the base address for node n from the DRAMBase
	 *             field of the DRAM Base register for node n.  To obtain
	 *             the DramAddr, subtract the base address from the
	 *             SysAddr, as shown near the start of section 3.4.4
	 *             (p. 70).
	 */

	dram_base = get_dram_base(mci);

	if (!get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size)) {
		if ((sys_addr >= (1ull << 32)) &&
		    (sys_addr < ((1ull << 32) + hole_size))) {
			/* use DHAR to translate SysAddr to DramAddr */
			dram_addr = sys_addr - hole_offset;
			debugf2("using DHAR to translate SysAddr 0x%lx to "
				"DramAddr 0x%lx\n",
				(unsigned long) sys_addr,
				(unsigned long) dram_addr);
			return dram_addr;
		}
	}

	/* Translate the SysAddr to a DramAddr as shown near the start of
	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
	 * Programmer's Manual Volume 1 Application Programming.
	 */
	dram_addr = (sys_addr & 0xffffffffffull) - dram_base;

	debugf2("using DRAM Base register to translate SysAddr 0x%lx to "
		"DramAddr 0x%lx\n", (unsigned long) sys_addr,
		(unsigned long) dram_addr);
	return dram_addr;
}

/* Parameter intlv_en is the value of the IntlvEn field from a DRAM Base
 * register (section 3.4.4.1).  Return the number of bits from a SysAddr that
 * are used for node interleaving.
 */
static int num_node_interleave_bits(unsigned intlv_en)
{
	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
	int n;

	BUG_ON(intlv_en > 7);
	n = intlv_shift_table[intlv_en];
	debugf2("using %d bits for node interleave\n", n);
	return n;
}

/* Translate the DramAddr given by dram_addr to an InputAddr and return the
 * result.
 */
static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
{
	struct k8_pvt *pvt;
	int intlv_shift;
	u64 input_addr;

	pvt = mci->pvt_info;

	/* Near the start of section 3.4.4 (p. 70), the k8 documentation gives
	 * instructions for translating a DramAddr to an InputAddr.  Here we
	 * are following these instructions.
	 */
	intlv_shift = num_node_interleave_bits((pvt->dbr[0] >> 8) & 0x07);
	input_addr = ((dram_addr >> intlv_shift) & 0xffffff000ull) +
	    (dram_addr & 0xfff);

	debugf2("DramAddr 0x%lx translates to InputAddr 0x%lx\n",
		(unsigned long) dram_addr, (unsigned long) input_addr);
	return input_addr;
}

/* Translate the SysAddr represented by sys_addr to an InputAddr and return
 * the result.  It is assumed that sys_addr maps to the node given by mci.
 */
static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
{
	u64 input_addr;

	input_addr = dram_addr_to_input_addr(
				mci, sys_addr_to_dram_addr(mci, sys_addr));
	debugf2("%s(): SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
		__func__, (unsigned long) sys_addr,
		(unsigned long) input_addr);
	return input_addr;
}

/* input_addr is an InputAddr associated with the node given by mci.  Return
 * the csrow that input_addr maps to, or -1 on failure (no csrow claims
 * input_addr).
 */
static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
{
	struct k8_pvt *pvt;
	int i;
	u32 dcsb, dcsm;
	u64 base, mask;

	pvt = mci->pvt_info;

	/* Here we use the DRAM CS Base (section 3.5.4) and DRAM CS Mask
	 * (section 3.5.5) registers.  For each CS base/mask register pair,
	 * test the condition shown near the start of section 3.5.4 (p. 84).
	 */

	for (i = 0; i < K8_NR_CSROWS; i++) {
		dcsb = pvt->dcsb[i];
		dcsm = pvt->dcsm[i];

		if ((dcsb & K8_DCSB_CS_ENABLE) == 0) {
			debugf2("input_addr_to_csrow: CSBE bit is cleared "
				"for csrow %d (node %d)\n", i,
				pvt->node_id);
			continue;  /* CSBE bit is cleared */
		}

		base = base_from_dcsb(pvt, i);
		mask = ~mask_from_dcsm(pvt, i);

		if ((input_addr & mask) == (base & mask)) {
			debugf2("InputAddr 0x%lx matches csrow %d "
				"(node %d)\n", (unsigned long) input_addr, i,
				pvt->node_id);
			return i;  /* success: csrow i matches */
		}
	}

	debugf2("no matching csrow for InputAddr 0x%lx (node %d)\n",
		(unsigned long) input_addr, pvt->node_id);
	return -1;  /* failed to find matching csrow */
}

/* input_addr is an InputAddr associated with the node represented by mci.
 * Translate input_addr to a DramAddr and return the result.
 */
static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
{
	struct k8_pvt *pvt;
	int node_id, intlv_shift;
	u64 bits, dram_addr;
	u32 intlv_sel;

	/* Near the start of section 3.4.4 (p. 70), the k8 documentation shows
	 * how to translate a DramAddr to an InputAddr.  Here we reverse this
	 * procedure.  When translating from a DramAddr to an InputAddr, the
	 * bits used for node interleaving are discarded.  Here we recover
	 * these bits from the IntlvSel field of the DRAM Limit register
	 * (section 3.4.4.2) for the node that input_addr is associated with.
	 */

	pvt = mci->pvt_info;
	node_id = pvt->node_id;
	BUG_ON((node_id < 0) || (node_id > 7));
	intlv_shift = num_node_interleave_bits((pvt->dbr[0] >> 8) & 0x07);

	if (intlv_shift == 0) {
		debugf1("node interleaving disabled: InputAddr 0x%lx "
			"translates to DramAddr of same value\n",
			(unsigned long) input_addr);
		return input_addr;
	}

	bits = ((input_addr & 0xffffff000ull) << intlv_shift) +
	       (input_addr & 0xfff);
	intlv_sel = pvt->dlr[node_id] & (((1 << intlv_shift) - 1) << 8);
	dram_addr = bits + (intlv_sel << 4);
	debugf1("InputAddr 0x%lx translates to DramAddr 0x%lx "
		"(%d node interleave bits)\n", (unsigned long) input_addr,
		(unsigned long) dram_addr, intlv_shift);
	return dram_addr;
}

/* dram_addr is a DramAddr that maps to the node represented by mci.  Convert
 * dram_addr to a SysAddr and return the result.
 */
static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
{
	struct k8_pvt *pvt;
	u64 hole_base, hole_offset, hole_size, base, limit, sys_addr;

	pvt = mci->pvt_info;

	if (!get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size)) {
		if ((dram_addr >= hole_base) &&
		    (dram_addr < (hole_base + hole_size))) {
			/* use DHAR to translate DramAddr to SysAddr */
			sys_addr = dram_addr + hole_offset;
			debugf1("using DHAR to translate DramAddr 0x%lx to "
				"SysAddr 0x%lx\n", (unsigned long) dram_addr,
				(unsigned long) sys_addr);
			return sys_addr;
		}
	}

	get_base_and_limit(pvt, pvt->node_id, &base, &limit);
	sys_addr = dram_addr + base;

	/* The sys_addr we have computed up to this point is a 40-bit value
	 * because the k8 deals with 40-bit values.  However, the value we are
	 * supposed to return is a full 64-bit physical address.  The AMD
	 * x86-64 architecture specifies that the most significant implemented
	 * address bit through bit 63 of a physical address must be either all
	 * 0s or all 1s.  Therefore we sign-extend the 40-bit sys_addr to a
	 * 64-bit value below.  See section 3.4.2 of AMD publication 24592:
	 * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
	 * Programming.
	 */
	sys_addr |= ~((sys_addr & (1ull << 39)) - 1);

	debugf1("Using DRAM Base register for node %d to translate "
		"DramAddr 0x%lx to SysAddr 0x%lx\n", pvt->node_id,
		(unsigned long) dram_addr, (unsigned long) sys_addr);
	return sys_addr;
}

/* input_addr is an InputAddr associated with the node given by mci.
 * Translate input_addr to a SysAddr and return the result.
 */
static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
		u64 input_addr)
{
	return dram_addr_to_sys_addr(
			mci, input_addr_to_dram_addr(mci, input_addr));
}

/* Find the minimum and maximum InputAddr values that map to the given csrow.
 * Pass back these values in *input_addr_min and *input_addr_max.
 */
static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
		u64 *input_addr_min, u64 *input_addr_max)
{
	struct k8_pvt *pvt;
	u64 base, mask;

	pvt = mci->pvt_info;
	BUG_ON((csrow < 0) || (csrow >= K8_NR_CSROWS));
	base = base_from_dcsb(pvt, csrow);
	mask = mask_from_dcsm(pvt, csrow);
	*input_addr_min = base & ~mask;

	/* To find the max InputAddr for the csrow, start with the base
	 * address and set all bits that are "don't care" bits in the test at
	 * the start of section 3.5.4 (p. 84).  The "don't care" bits are all
	 * set bits in the mask and all bits in the gaps between bit ranges
	 * [35-25] and [19-13].  The value 0x1f01fff represents bits [24-20]
	 * and [12-0], which are all bits in the above-mentioned gaps.
	 */
	*input_addr_max = base | mask | pvt->dcs_mask_notused;
}

/* Extract error address from MCA NB Address Low (section 3.6.4.5) and
 * MCA NB Address High (section 3.6.4.6) register values and return the
 * result.
 */
static inline u64 error_address_from_k8_error_info(
		struct k8_error_info *info)
{
	return (((u64) (info->error_info.nbeah & 0xff)) << 32) +
	       (info->error_info.nbeal & ~0x03);
}

static inline void error_address_to_page_and_offset(u64 error_address,
		u32 *page, u32 *offset)
{
	*page = (u32) (error_address >> PAGE_SHIFT);
	*offset = ((u32) error_address) & ~PAGE_MASK;
}

/* Return 1 if registers contain valid error information.  Else return 0. */
static inline int k8_error_info_valid(struct k8_error_info_regs *regs)
{
	return ((regs->nbsh & K8_NBSH_VALID_BIT) != 0);
}

/* return 0 if regs contains valid error info; else return 1 */
static int k8_get_error_info_regs(struct mem_ctl_info *mci,
		struct k8_error_info_regs *regs)
{
	struct k8_pvt *pvt;

	pvt = mci->pvt_info;
	pci_read_config_dword(pvt->misc_ctl, K8_NBSH, &regs->nbsh);

	if (!k8_error_info_valid(regs))
		return 1;

	pci_read_config_dword(pvt->misc_ctl, K8_NBSL, &regs->nbsl);
	pci_read_config_dword(pvt->misc_ctl, K8_NBEAH, &regs->nbeah);
	pci_read_config_dword(pvt->misc_ctl, K8_NBEAL, &regs->nbeal);
	return 0;
}

static void k8_get_error_info(struct mem_ctl_info *mci,
		struct k8_error_info *info)
{
	struct k8_pvt *pvt;
	struct k8_error_info_regs regs;

	pvt = mci->pvt_info;
	info->race_condition_detected = 0;

	if (k8_get_error_info_regs(mci, &info->error_info))
		return;

	/*
	 * Here's the problem with the K8's EDAC reporting:
	 * There are four registers which report pieces of error
	 * information.  These four registers are shared between
	 * CEs and UEs.  Furthermore, contrary to what is stated in
	 * the OBKG, the overflow bit is never used!  Every error
	 * always updates the reporting registers.
	 *
	 * Can you see the race condition?  All four error reporting
	 * registers must be read before a new error updates them!
	 * There is no way to read all four registers atomically.  The
	 * best than can be done is to detect that a race has occured
	 * and then report the error without any kind of precision.
	 *
	 * What is still positive is that errors are
	 * still reported and thus problems can still be detected -
	 * just not localized because the syndrome and address are
	 * spread out across registers.
	 *
	 * Grrrrr!!!!!  Here's hoping that AMD fixes this in some
	 * future K8 rev. UEs and CEs should have separate
	 * register sets with proper overflow bits that are used!
	 * At very least the problem can be fixed by honoring the
	 * ErrValid bit in nbsh and not updating registers - just
	 * set the overflow bit - unless the current error is CE
	 * and the new error is UE which would be the only situation
	 * for overwriting the current values.
	 */

	regs = info->error_info;

	/* Use info from the second read - most current */
	if (unlikely(k8_get_error_info_regs(mci, &info->error_info)))
		return;

	/* clear the error */
	pci_write_bits32(pvt->misc_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT);

	pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &pvt->nbcfg);
	info->race_condition_detected =
	    ((regs.nbsh != info->error_info.nbsh) ||
	     (regs.nbsl != info->error_info.nbsl) ||
	     (regs.nbeah != info->error_info.nbeah) ||
	     (regs.nbeal != info->error_info.nbeal));
}

static inline void decode_gart_tlb_error(struct mem_ctl_info *mci,
					 struct k8_error_info *info)
{
	u32 err_code;
	u32 ec_tt;		/* error code transaction type (2b) */
	u32 ec_ll;		/* error code cache level (2b) */

	err_code = info->error_info.nbsl & 0xffffUL;
	ec_tt = (err_code >> 2) & 0x03UL;
	ec_ll = (err_code >> 0) & 0x03UL;
	k8_mc_printk(mci, KERN_ERR,
		     "GART TLB errorr: transaction type(%s), "
		     "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]);
}

static inline void decode_cache_error(struct mem_ctl_info *mci,
				      struct k8_error_info *info)
{
	u32 err_code;
	u32 ec_rrrr;		/* error code memory transaction (4b) */
	u32 ec_tt;		/* error code transaction type (2b) */
	u32 ec_ll;		/* error code cache level (2b) */

	err_code = info->error_info.nbsl & 0xffffUL;
	ec_rrrr = (err_code >> 4) & 0x0fUL;
	ec_tt = (err_code >> 2) & 0x03UL;
	ec_ll = (err_code >> 0) & 0x03UL;
	k8_mc_printk(mci, KERN_ERR,
		     "cache heirarchy error: memory transaction type(%s), "
		     "transaction type(%s), cache level(%s)\n",
		     memtt_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]);
}

/* sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
 * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
 * of a node that detected an ECC memory error.  mci represents the node that
 * the error address maps to (possibly different from the node that detected
 * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
 * error.
 */
static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
{
	int csrow;

	csrow = input_addr_to_csrow(mci,
				    sys_addr_to_input_addr(mci, sys_addr));

	if (csrow == -1)
		k8_mc_printk(mci, KERN_ERR,
			     "Failed to translate InputAddr to csrow for "
			     "address 0x%lx\n", (unsigned long) sys_addr);

	return csrow;
}

static void k8_handle_ce(struct mem_ctl_info *mci, struct k8_error_info *info)
{
	struct k8_pvt *pvt;
	unsigned syndrome;
	u64 error_address;
	u32 page, offset;
	int channel, csrow;
	struct mem_ctl_info *log_mci, *src_mci;

	log_mci = mci;
	pvt = mci->pvt_info;

	if ((info->error_info.nbsh & BIT(26)) == 0)
		goto no_info;  /* error address not valid */

	error_address = error_address_from_k8_error_info(info);
	syndrome = ((info->error_info.nbsh >> 15) & 0xff);

	if (pvt->nbcfg & BIT(K8_NBCFG_CHIPKILL)) {
		/* chipkill ecc mode */
		syndrome += (info->error_info.nbsl >> 16) & 0xff00;
		channel = chan_from_chipkill_syndrome(syndrome);

		if (channel < 0) {
			/* If the syndrome couldn't be found then the race
			 * condition for error reporting registers likely
			 * occurred.  There's alot more in doubt than just the
			 * channel.  Might as well just log the error without
			 * any info.
			 */
			k8_mc_printk(mci, KERN_WARNING,
				     "unknown syndrome 0x%x - possible error "
				     "reporting race\n", syndrome);
			goto no_info;
		}
	} else
		/* non-chipkill ecc mode
		 *
		 * The k8 documentation is unclear about how to determine the
		 * channel number when using non-chipkill memory.  This method
		 * was obtained from email communication with someone at AMD.
		 */
		channel = ((error_address & BIT(3)) != 0);

	/* Find out which node the error address belongs to.  This may be
	 * different from the node that detected the error.
	 */
	if ((src_mci = find_mc_by_sys_addr(mci, error_address)) == NULL) {
		k8_mc_printk(mci, KERN_ERR,
			     "failed to map error address 0x%lx to a node\n",
			     (unsigned long) error_address);
		goto no_info;
	}

	log_mci = src_mci;

	if ((csrow = sys_addr_to_csrow(log_mci, error_address)) < 0)
		goto no_info;

	error_address_to_page_and_offset(error_address, &page, &offset);
	edac_mc_handle_ce(log_mci, page, offset, syndrome, csrow, channel,
			  EDAC_MOD_STR);
	return;

no_info:
	edac_mc_handle_ce_no_info(log_mci,EDAC_MOD_STR);
}

static void k8_handle_ue(struct mem_ctl_info *mci, struct k8_error_info *info)
{
	int csrow;
	u64 error_address;
	u32 page, offset;
	struct mem_ctl_info *log_mci, *src_mci;

	log_mci = mci;

	if ((info->error_info.nbsh & BIT(26)) == 0)
		goto no_info;  /* error address not valid */

	error_address = error_address_from_k8_error_info(info);

	/* Find out which node the error address belongs to.  This may be
	 * different from the node that detected the error.
	 */
	if ((src_mci = find_mc_by_sys_addr(mci, error_address)) == NULL) {
		k8_mc_printk(mci, KERN_ERR,
			     "failed to map error address 0x%lx to a node\n",
			     (unsigned long) error_address);
		goto no_info;
	}

	log_mci = src_mci;

	if ((csrow = sys_addr_to_csrow(log_mci, error_address)) < 0)
		goto no_info;

	error_address_to_page_and_offset(error_address, &page, &offset);
	edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
	return;

no_info:
	edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
}

static void decode_bus_error(struct mem_ctl_info *mci,
		struct k8_error_info *info)
{
	u32 err_code, ext_ec;
	u32 ec_pp;    /* error code participating processor (2p) */
	u32 ec_to;    /* error code timed out (1b) */
	u32 ec_rrrr;  /* error code memory transaction (4b) */
	u32 ec_ii;    /* error code memory or I/O (2b) */
	u32 ec_ll;    /* error code cache level (2b) */

	debugf0("MC%d: %s()\n", mci->mc_idx, __func__);
	err_code = info->error_info.nbsl & 0xffffUL;
	ec_pp = (err_code >> 9) & 0x03UL;
	ec_to = (err_code >> 8) & 0x01UL;
	ec_rrrr = (err_code >> 4) & 0x0fUL;
	ec_ii = (err_code >> 2) & 0x03UL;
	ec_ll = (err_code >> 0) & 0x03UL;
	ext_ec = (info->error_info.nbsl >> 16) & 0xfUL;

	/* FIXME - these should report through EDAC channels */
	k8_mc_printk(mci, KERN_ERR, "general bus error: participating "
		     "processor(%s), time-out(%s) memory transaction "
		     "type(%s), mem or i/o(%s), cache level(%s)\n",
		     pp_msgs[ec_pp], to_msgs[ec_to], memtt_msgs[ec_rrrr],
		     ii_msgs[ec_ii], ll_msgs[ec_ll]);

	if (ec_pp & 0x02)
		return;  /* We aren't the node involved */

	/* FIXME - other errors should have other error handling mechanisms */
	if (ext_ec && (ext_ec != 0x8)) {
		k8_mc_printk(mci, KERN_ERR,
			     "no special error handling for this error\n");
		return;
	}

	if (info->error_info.nbsh & BIT(14))
		k8_handle_ce(mci, info);
	else if (info->error_info.nbsh & BIT(13))
		k8_handle_ue(mci, info);

	/* If main error is CE then overflow must be CE.  If main error is UE
	 * then overflow is unknown.  We'll call the overflow a CE - if
	 * panic_on_ue is set then we're already panic'ed and won't arrive
	 * here.  If panic_on_ue is not set then apparently someone doesn't
	 * think that UE's are catastrophic.
	 */
	if (info->error_info.nbsh & BIT(30))
		edac_mc_handle_ce_no_info(mci,
		    EDAC_MOD_STR " Error Overflow set");
}

/* return 1 if error found or 0 if error not found */
static int k8_process_error_info(struct mem_ctl_info *mci,
		struct k8_error_info *info, int handle_errors)
{
	struct k8_pvt *pvt;
	struct k8_error_info_regs *regs;
	u32 err_code, ext_ec;
	int gart_tlb_error;

	pvt = mci->pvt_info;

	/* check for an error */
	if (!k8_error_info_valid(&info->error_info))
		return 0;

	if (!handle_errors)
		return 1;

	if (info->race_condition_detected)
		k8_mc_printk(mci, KERN_WARNING, "race condition detected!\n");

	gart_tlb_error = 0;
	regs = &info->error_info;
	err_code = info->error_info.nbsl & 0xffffUL;
	ext_ec = (info->error_info.nbsl >> 16) & 0x0fUL;
	debugf1("NorthBridge ERROR: mci(0x%p) node(%d) ErrAddr(0x%.8x-%.8x) "
		"nbsh(0x%.8x) nbsl(0x%.8x)\n", mci, pvt->node_id, regs->nbeah,
		regs->nbeal, regs->nbsh, regs->nbsl);

	if ((err_code & 0xfff0UL) == 0x0010UL) {
		debugf1("GART TLB error\n");
		gart_tlb_error = 1;
		decode_gart_tlb_error(mci, info);
	} else if ((err_code & 0xff00UL) == 0x0100UL) {
		debugf1("Cache error\n");
		decode_cache_error(mci, info);
	} else if ((err_code & 0xf800UL) == 0x0800UL) {
		debugf1("Bus error\n");
		decode_bus_error(mci, info);
	} else
		/* shouldn't reach here! */
		k8_mc_printk(mci, KERN_WARNING,
			     "%s(): unknown MCE error 0x%x\n", __func__,
			     err_code);

	k8_mc_printk(mci, KERN_ERR, "extended error code: %s\n",
		     ext_msgs[ext_ec]);

	if (((ext_ec >= 1 && ext_ec <= 4) || (ext_ec == 6)) &&
	    ((info->error_info.nbsh >> 4) & 0x07UL))
		k8_mc_printk(mci, KERN_ERR,
		    "Error on hypertransport link: %s\n",
		    htlink_msgs[(info->error_info.nbsh >> 4) & 0x07UL]);

	/* GART errors are benign as per AMD, do not panic on them */
	if (!gart_tlb_error && (regs->nbsh & BIT(29))) {
		k8_mc_printk(mci, KERN_CRIT, "uncorrected error\n");
		edac_mc_handle_ue_no_info(mci, "UE bit is set\n");
	}

	if (regs->nbsh & BIT(25))
		panic("MC%d: processor context corrupt", mci->mc_idx);

	return 1;
}

static void k8_check(struct mem_ctl_info *mci)
{
	struct k8_error_info info;

	debugf3("%s()\n", __func__);
	k8_get_error_info(mci, &info);
	k8_process_error_info(mci, &info, 1);
}

static int k8_get_devs(struct mem_ctl_info *mci, int dev_idx)
{
	const struct k8_dev_info *k8_dev = &k8_devs[dev_idx];
	struct k8_pvt *pvt;
	struct pci_dev *pdev;

	pdev = to_pci_dev(mci->dev);
	pvt = mci->pvt_info;

	/* The address mapping device provides a table that indicates which
	 * physical address ranges are owned by which node.  Each node's
	 * memory controller has memory controller addresses that begin at
	 * 0x0.
	 */
	pvt->addr_map = pci_get_related_function(PCI_VENDOR_ID_AMD,
						 k8_dev->addr_map, pdev);

	if (pvt->addr_map == NULL) {
		k8_printk(KERN_ERR, "error address map device not found: "
			  "vendor %x device 0x%x (broken BIOS?)\n",
			  PCI_VENDOR_ID_AMD, k8_dev->addr_map);
		return 1;
	}

	debugf1("Addr Map device PCI Bus ID:\t%s\n",
		pci_name(pvt->addr_map));

	pvt->misc_ctl = pci_get_related_function(PCI_VENDOR_ID_AMD,
						 k8_dev->misc_ctl, pdev);

	if (pvt->misc_ctl == NULL) {
		pci_dev_put(pvt->addr_map);
		pvt->addr_map = NULL;
		k8_printk(KERN_ERR, "error miscellaneous device not found: "
			  "vendor %x device 0x%x (broken BIOS?)\n",
			  PCI_VENDOR_ID_AMD, k8_dev->misc_ctl);
		return 1;
	}

	debugf1("Misc device PCI Bus ID:\t\t%s\n",
		pci_name(pvt->misc_ctl));

	return 0;
}

static void k8_get_mc_regs(struct mem_ctl_info *mci)
{
	struct k8_pvt *pvt;
	struct pci_dev *pdev;
	int i;

	pdev = to_pci_dev(mci->dev);
	pvt = mci->pvt_info;
	debugf1("%s(MC node-id=%d): (ExtModel=%d) %s\n",
		__func__, pvt->node_id, pvt->ext_model,
		(pvt->ext_model >= OPTERON_CPU_REV_F) ?
		"Rev F or later" : "Rev E or earlier");

	for (i = 0; i < MAX_K8_NODES; i++) {
		pci_read_config_dword(pvt->addr_map, K8_DBR + (i * 8),
				      &pvt->dbr[i]);
		pci_read_config_dword(pvt->addr_map, K8_DLR + (i * 8),
				      &pvt->dlr[i]);
		debugf1("    dbr[%d]: 0x%x\n", i, pvt->dbr[i]);
		debugf1("    dlr[%d]: 0x%x\n", i, pvt->dlr[i]);
	}

	/* Setup the DCSB and DCSM arrays from hardware */
	setup_dcsb_dcsm(pvt,pdev);

	pci_read_config_dword(pvt->addr_map, K8_DHAR, &pvt->dhar);
	pci_read_config_dword(pdev, K8_DBAM, &pvt->dbam);
	pci_read_config_dword(pvt->misc_ctl, K8_NBCAP, &pvt->nbcap);
	debugf1("    dhar: 0x%x\n", pvt->dhar);
	debugf1("    dbam: 0x%x\n", pvt->dbam);
	debugf1("    dcl:  0x%x\n", pvt->dcl);
	debugf1("    nbcap: %u\n", pvt->nbcap);
}

/* Return 1 if dual channel mode is active.  Else return 0. */
static inline int dual_channel_active(u32 dcl, int mc_device_index)
{
	int flag;
	int ext_model = node_rev(mc_device_index);

	if (ext_model >= OPTERON_CPU_REV_F) {
		/* Rev F (NPT) and later */
		flag = (dcl >> 11) & 0x1;
	} else {
		/* Rev E and earlier */
		flag = (dcl >> 16) & 0x1;
	}

	return flag;
}

static u32 csrow_nr_pages(int csrow_nr, struct k8_pvt *pvt)
{
	u32 shift, nr_pages;
	int ext_model = pvt->ext_model;

	/* The math on this doesn't look right on the surface because x/2*4
	 * can be simplified to x*2 but this expression makes use of the fact
	 * that it is integral math where 1/2=0
	 */
	shift = (pvt->dbam >> ((csrow_nr / 2) * 4)) & 0xF; /*PG88 */
	debugf0("   %s(csrow=%d) DBAM index= %d\n", __func__, csrow_nr,
		shift);
	/* First step is to calc the number of bits to shift a value of 1
	 * left to indicate show many pages. Start with the DBAM value
	 * as the starting bits, then proceed to adjust those shift
	 * bits, based on CPU REV and the table. See BKDG on the DBAM
	 */

	if (ext_model >= OPTERON_CPU_REV_F) {
		/* 27 shift, is 128Mib minimum DIMM size in REV F and later
		 * upto 8 Gb, in a step function progression
		 */
		static u32 rev_f_shift[] = { 27, 28, 29, 29, 29, 30, 30, 31,
					     31, 32, 32, 33,  0,  0,  0,  0 };
		nr_pages = 1 << (rev_f_shift[shift] - PAGE_SHIFT);
	} else {
		/* REV E and less section This line is tricky.
		 * It collapses the table used by revision D and later to one
		 * that matches revision CG and earlier
		 */
		shift -= (ext_model >= OPTERON_CPU_REV_D)?
			(shift > 8 ? 4:
				(shift > 5 ? 3:
					(shift > 2 ? 1 : 0))): 0;
		/* 25 shift, is 32MiB minimum DIMM size in REV E and prior */
		nr_pages = 1 << (shift + 25 - PAGE_SHIFT);
	}

	/* If dual channel then double thememory size of single channel */
	nr_pages <<= dual_channel_active(pvt->dcl, pvt->node_id);

	debugf0("   nr_pages= %u  dual channel_active = %d\n",
		nr_pages, dual_channel_active(pvt->dcl, pvt->node_id));

	return nr_pages;
}

/*
 * determine_parity_enabled()
 *
 *     NOTE: CPU Revision Dependent code
 *
 *     determine if Parity is Enabled
 */
static int determine_parity_enabled(struct k8_pvt *pvt)
{
	int rc = 0;

	if (pvt->ext_model >= OPTERON_CPU_REV_F) {
		if (pvt->dcl & BIT(8))
			rc = 1;
	}

	return rc;
}

/*
* determine_memory_type()
*
*     NOTE: CPU Revision Dependent code
*
*     determine the memory type in operation on this controller
*/
static enum mem_type determine_memory_type(struct k8_pvt *pvt)
{
	enum mem_type type;

	if (pvt->ext_model >= OPTERON_CPU_REV_F) {
		/* Rev F and later */
		type = ((pvt->dcl >> 16) & 0x1) ? MEM_DDR2 : MEM_RDDR2;
	} else {
		/* Rev E and earlier */
		type = ((pvt->dcl >> 18) & 0x1) ? MEM_DDR : MEM_RDDR;
	}

	debugf1("  Memory type is: %s\n",
		(type == MEM_DDR2) ? "MEM_DDR2" :
		(type == MEM_RDDR2) ? "MEM_RDDR2" :
		(type == MEM_DDR) ? "MEM_DDR" : "MEM_RDDR");

	return type;
}

/*
 * determine_dram_type()
 *
 *     NOTE: CPU Revision Dependent code
 *
 *     determine the DRAM type in operation
 *     There are K8_NR_CSROWS  (8) and 2 CSROWS per DIMM, therefore
 *     there are 4 Logical DIMMs possible, thus 4 bits in the
 *     configuration register indicating whether there are
 *     X4 or X8 devices, one per logical DIMM
 */
static enum dev_type determine_dram_type(struct k8_pvt *pvt, int row)
{
	int bit;
	enum dev_type type;

	/* the starting bit depends on Revision value */
	bit = (pvt->ext_model >= OPTERON_CPU_REV_F) ? 12 : 20;
	type = ((pvt->dcl >> (bit + (row / 2))) & 0x01) ? DEV_X4 : DEV_X8;

	debugf1("  DRAM type is: %s\n", (type == DEV_X4) ? "DEV-x4" : "DEV-x8");

	return type;
}

/*
 * determine_edac_cap()
 *
 *     NOTE: CPU Revision Dependent code
 *
 *     determine if the DIMMs have ECC enabled
 *     ECC is enabled ONLY if all the DIMMs are ECC capable
 */
static enum edac_type determine_edac_cap(struct k8_pvt *pvt)
{
	int bit;
	enum dev_type edac_cap = EDAC_NONE;

	bit = (pvt->ext_model >= OPTERON_CPU_REV_F) ? 19 : 17;
	if ((pvt->dcl >> bit) & 0x1) {
		debugf1("  edac_type is: EDAC_FLAG_SECDED\n");
		edac_cap = EDAC_FLAG_SECDED;
	}

	return edac_cap;
}

static int k8_init_csrows(struct mem_ctl_info *mci)
{
	struct csrow_info *csrow;
	struct k8_pvt *pvt;
	int i, empty;
	u64 input_addr_min, input_addr_max, sys_addr;

	pvt = mci->pvt_info;
	pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &pvt->nbcfg);
	empty = 1;

	for (i = 0; i < K8_NR_CSROWS; i++) {
		csrow = &mci->csrows[i];

		if ((pvt->dcsb[i] & K8_DCSB_CS_ENABLE) == 0) {
			debugf1("csrow %d empty for node %d\n", i,
				pvt->node_id);
			continue;  /* empty */
		}

		debugf1("revision for this node (%d) is %d\n",
			pvt->node_id, node_rev(pvt->node_id));
		empty = 0;
		csrow->nr_pages = csrow_nr_pages(i, pvt);
		find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
		sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
		csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
		sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
		csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
		csrow->page_mask = ~mask_from_dcsm(pvt, i);
		csrow->grain = 8;  /* 8 bytes of resolution */
		csrow->mtype = determine_memory_type(pvt);
		csrow->dtype = determine_dram_type(pvt, i);
		debugf1("for node %d csrow %d:\n    nr_pages: %u "
			"input_addr_min: 0x%lx input_addr_max: 0x%lx "
			"sys_addr: 0x%lx first_page: 0x%lx last_page: 0x%lx "
			"page_mask: 0x%lx\n", pvt->node_id, i,
			(unsigned) csrow->nr_pages,
			(unsigned long) input_addr_min,
			(unsigned long) input_addr_max,
			(unsigned long) sys_addr,
			csrow->first_page, csrow->last_page,
			csrow->page_mask);

		if (pvt->nbcfg & BIT(K8_NBCFG_ECC_ENABLE))
			csrow->edac_mode = (pvt->nbcfg & BIT(K8_NBCFG_CHIPKILL)) ?
					   EDAC_S4ECD4ED : EDAC_SECDED;
		else
			csrow->edac_mode = EDAC_NONE;
	}

	return empty;
}

static void k8_enable_error_reporting(struct mem_ctl_info *mci)
{
	struct k8_pvt *pvt;
	u32 mc4ctl_l=0, mc4ctl_h=0, mcgctl_l=0, mcgctl_h=0;

	pvt = mci->pvt_info;
	pci_write_bits32(pvt->misc_ctl, K8_NBCTL, 0x3UL, 0x3UL);
	do_rdmsr(pvt->node_id, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h);
	mc4ctl_l |= BIT(0) | BIT(1);
	do_wrmsr(pvt->node_id, K8_MSR_MC4CTL, mc4ctl_l, mc4ctl_h);
	do_rdmsr(pvt->node_id, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h);
	do_rdmsr(pvt->node_id, K8_MSR_MCGCTL, &mcgctl_l, &mcgctl_h);
	mcgctl_l |= BIT(4);
	do_wrmsr(pvt->node_id, K8_MSR_MCGCTL, mcgctl_l, mcgctl_h);
	do_rdmsr(pvt->node_id, K8_MSR_MCGCTL, &mcgctl_l, &mcgctl_h);
}

static int k8_probe1(struct pci_dev *pdev, int dev_idx)
{
	struct mem_ctl_info *mci;
	struct k8_pvt *pvt;
	u32 dcl, dual_channel;
	int parity_enable;

	debugf0("%s()\n", __func__);
	build_node_revision_table();
	debugf1("pdev bus %u devfn %u\n", pdev->bus->number, pdev->devfn);
	pci_read_config_dword(pdev, K8_DCL, &dcl);
	dual_channel = dual_channel_active(dcl,
					   MCI_TO_NODE_ID(pdev));
	debugf1("dual_channel is %u (dcl is 0x%x)\n", dual_channel, dcl);

	mci = edac_mc_alloc(sizeof(*pvt), K8_NR_CSROWS, dual_channel + 1);
	if (mci == NULL)
		return -ENOMEM;

	debugf0("%s(): mci = %p\n", __func__, mci);
	pvt = mci->pvt_info;
	pvt->dcl = dcl;
	mci->dev = &pdev->dev;
	pvt->node_id = MCI_TO_NODE_ID(pdev);
	pvt->ext_model = node_rev(pvt->node_id);

	if (k8_get_devs(mci, dev_idx))
		goto fail0;

	k8_get_mc_regs(mci);
	mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
	mci->edac_ctl_cap = EDAC_FLAG_NONE;
	debugf1("Initializing mci->edac_cap to EDAC_FLAG_NONE\n");
	mci->edac_cap = EDAC_FLAG_NONE;

	if (pvt->nbcap & BIT(K8_NBCAP_SECDED))
		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;

	if (pvt->nbcap & BIT(K8_NBCAP_CHIPKILL))
		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;

	mci->edac_cap = determine_edac_cap(pvt);
	if (mci->edac_cap & EDAC_FLAG_SECDED) {
		debugf1("setting EDAC_FLAG_SECDED in mci->edac_cap\n");
		mci->edac_cap |= EDAC_FLAG_SECDED;

#if 0
		if (dual_channel) {
			debugf1("setting EDAC_FLAG_S4ECD4ED in "
				"mci->edac_cap\n");
			mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
		}
#endif
	}

	parity_enable = determine_parity_enabled(pvt);
	debugf1("   Parity is %s\n", parity_enable ? "Enabled" : "Disabled");

	mci->mod_name = EDAC_MOD_STR;
	mci->mod_ver = EDAC_K8_VERSION;
	mci->ctl_name = k8_devs[dev_idx].ctl_name;
	mci->edac_check = k8_check;
	mci->ctl_page_to_phys = NULL;

	if (k8_init_csrows(mci)) {
		debugf1("Setting mci->edac_cap to EDAC_FLAG_NONE "
			"because k8_init_csrows() returned nonzero "
			"value\n");
		mci->edac_cap = EDAC_FLAG_NONE;  /* no csrows found */
	} else
		k8_enable_error_reporting(mci);

	if (edac_mc_add_mc(mci, pvt->node_id)) {
		debugf1("%s(): failed edac_mc_add_mc()\n", __func__);
		/* FIXME: perhaps some code should go here that disables error
		 * reporting if we just enabled it
		 */
		goto fail1;
	}

	debugf1("%s(): success\n", __func__);
	return 0;

fail1:
	pci_dev_put(pvt->addr_map);
	pci_dev_put(pvt->misc_ctl);

fail0:
	edac_mc_free(mci);
	return -ENODEV;
}

/* returns count (>= 0), or negative on error */
static int __devinit k8_init_one(struct pci_dev *pdev,
				 const struct pci_device_id *ent)
{
	debugf0("%s()\n", __func__);

	/* wake up and enable device */
	return pci_enable_device(pdev) ?
	    -EIO : k8_probe1(pdev, ent->driver_data);
}

static void __devexit k8_remove_one(struct pci_dev *pdev)
{
	struct mem_ctl_info *mci;
	struct k8_pvt *pvt;

	debugf0("%s()\n", __func__);

	if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
		return;

	pvt = mci->pvt_info;
	pci_dev_put(pvt->addr_map);
	pci_dev_put(pvt->misc_ctl);
	edac_mc_free(mci);
}

static const struct pci_device_id k8_pci_tbl[] __devinitdata = {
	{PCI_VEND_DEV(AMD, OPT_2_MEMCTL), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
	 OPTERON},
	{0,}			/* 0 terminated list. */
};

MODULE_DEVICE_TABLE(pci, k8_pci_tbl);

static struct pci_driver k8_driver = {
	.name = EDAC_MOD_STR,
	.probe = k8_init_one,
	.remove = __devexit_p(k8_remove_one),
	.id_table = k8_pci_tbl,
};

int __init k8_init(void)
{
	return pci_module_init(&k8_driver);
}

static void __exit k8_exit(void)
{
	pci_unregister_driver(&k8_driver);
}

module_init(k8_init);
module_exit(k8_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh, Doug Thompson, Dave Peterson");
MODULE_DESCRIPTION("MC support for AMD K8 memory controllers - " EDAC_K8_VERSION );