static int ibm_slot_error_detail;
static int eeh_subsystem_enabled;
-#define EEH_MAX_OPTS 4096
-static char *eeh_opts;
-static int eeh_opts_last;
/* Buffer for reporting slot-error-detail rtas calls */
static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
static DEFINE_PER_CPU(unsigned long, false_positives);
static DEFINE_PER_CPU(unsigned long, ignored_failures);
-static int eeh_check_opts_config(struct device_node *dn, int class_code,
- int vendor_id, int device_id,
- int default_state);
-
/**
* The pci address cache subsystem. This subsystem places
* PCI device address resources into a red-black tree, sorted
{
struct device_node *dn;
int i;
+ int inserted = 0;
dn = pci_device_to_OF_node(dev);
if (!dn) {
if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
continue;
pci_addr_cache_insert(dev, start, end, flags);
+ inserted = 1;
}
+
+ /* If there was nothing to add, the cache has no reference... */
+ if (!inserted)
+ pci_dev_put(dev);
}
/**
static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
{
struct rb_node *n;
+ int removed = 0;
restart:
n = rb_first(&pci_io_addr_cache_root.rb_root);
if (piar->pcidev == dev) {
rb_erase(n, &pci_io_addr_cache_root.rb_root);
+ removed = 1;
kfree(piar);
goto restart;
}
}
/* The cache no longer holds its reference to this device... */
- pci_dev_put(dev);
+ if (removed)
+ pci_dev_put(dev);
}
/**
/**
* eeh_token_to_phys - convert EEH address token to phys address
- * @token i/o token, should be address in the form 0xA....
- *
- * Converts EEH address tokens into physical addresses. Note that
- * ths routine does *not* convert I/O BAR addresses (which start
- * with 0xE...) to phys addresses!
+ * @token i/o token, should be address in the form 0xE....
*/
-static unsigned long eeh_token_to_phys(unsigned long token)
+static inline unsigned long eeh_token_to_phys(unsigned long token)
{
pte_t *ptep;
- unsigned long pa, vaddr;
+ unsigned long pa;
- if (REGION_ID(token) == EEH_REGION_ID)
- vaddr = IO_TOKEN_TO_ADDR(token);
- else
+ ptep = find_linux_pte(ioremap_mm.pgd, token);
+ if (!ptep)
return token;
-
- ptep = find_linux_pte(ioremap_mm.pgd, vaddr);
pa = pte_pfn(*ptep) << PAGE_SHIFT;
- return pa | (vaddr & (PAGE_SIZE-1));
+ return pa | (token & (PAGE_SIZE-1));
}
/**
- * eeh_check_failure - check if all 1's data is due to EEH slot freeze
- * @token i/o token, should be address in the form 0xA....
- * @val value, should be all 1's (XXX why do we need this arg??)
+ * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
+ * @dn device node
+ * @dev pci device, if known
*
- * Check for an eeh failure at the given token address.
- * The given value has been read and it should be 1's (0xff, 0xffff or
- * 0xffffffff).
+ * Check for an EEH failure for the given device node. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze event. This routine
+ * will query firmware for the EEH status.
*
- * Probe to determine if an error actually occurred. If not return val.
- * Otherwise panic.
+ * Returns 0 if there has not been an EEH error; otherwise returns
+ * an error code.
*
- * Note this routine might be called in an interrupt context ...
+ * It is safe to call this routine in an interrupt context.
*/
-unsigned long eeh_check_failure(void *token, unsigned long val)
+int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
{
- unsigned long addr;
- struct pci_dev *dev;
- struct device_node *dn;
int ret;
int rets[2];
unsigned long flags;
__get_cpu_var(total_mmio_ffs)++;
if (!eeh_subsystem_enabled)
- return val;
+ return 0;
- /* Finding the phys addr + pci device; this is pretty quick. */
- addr = eeh_token_to_phys((unsigned long)token);
- dev = pci_get_device_by_addr(addr);
- if (!dev)
- return val;
-
- dn = pci_device_to_OF_node(dev);
- if (!dn) {
- pci_dev_put(dev);
- return val;
- }
+ if (!dn)
+ return 0;
/* Access to IO BARs might get this far and still not want checking. */
if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) ||
dn->eeh_mode & EEH_MODE_NOCHECK) {
- pci_dev_put(dev);
- return val;
+ return 0;
}
if (!dn->eeh_config_addr) {
- pci_dev_put(dev);
- return val;
+ return 0;
}
/*
dn->eeh_config_addr, BUID_HI(dn->phb->buid),
BUID_LO(dn->phb->buid));
- if (ret == 0 && rets[1] == 1 && rets[0] >= 2) {
+ if (ret == 0 && rets[1] == 1 && (rets[0] == 2 || rets[0] == 4)) {
int log_event;
spin_lock_irqsave(&slot_errbuf_lock, flags);
BUID_LO(dn->phb->buid), NULL, 0,
virt_to_phys(slot_errbuf),
eeh_error_buf_size,
- 2 /* Permanent Error */);
+ 1 /* Temporary Error */);
if (log_event == 0)
log_error(slot_errbuf, ERR_TYPE_RTAS_LOG,
spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+ printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n",
+ rets[0], dn->name, dn->full_name);
+ WARN_ON(1);
+
/*
* XXX We should create a separate sysctl for this.
*
* can use it here.
*/
if (panic_on_oops) {
- panic("EEH: MMIO failure (%d) on device:%s %s\n",
- rets[0], pci_name(dev), pci_pretty_name(dev));
+ panic("EEH: MMIO failure (%d) on device: %s %s\n",
+ rets[0], dn->name, dn->full_name);
} else {
__get_cpu_var(ignored_failures)++;
- printk(KERN_INFO "EEH: MMIO failure (%d) on device:%s %s\n",
- rets[0], pci_name(dev), pci_pretty_name(dev));
}
} else {
__get_cpu_var(false_positives)++;
}
+ return 0;
+}
+
+EXPORT_SYMBOL(eeh_dn_check_failure);
+
+/**
+ * eeh_check_failure - check if all 1's data is due to EEH slot freeze
+ * @token i/o token, should be address in the form 0xA....
+ * @val value, should be all 1's (XXX why do we need this arg??)
+ *
+ * Check for an eeh failure at the given token address.
+ * Check for an EEH failure at the given token address. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze event. This routine
+ * will query firmware for the EEH status.
+ *
+ * Note this routine is safe to call in an interrupt context.
+ */
+unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+{
+ unsigned long addr;
+ struct pci_dev *dev;
+ struct device_node *dn;
+
+ /* Finding the phys addr + pci device; this is pretty quick. */
+ addr = eeh_token_to_phys((unsigned long __force) token);
+ dev = pci_get_device_by_addr(addr);
+ if (!dev)
+ return val;
+
+ dn = pci_device_to_OF_node(dev);
+ eeh_dn_check_failure (dn, dev);
+
pci_dev_put(dev);
return val;
}
+
EXPORT_SYMBOL(eeh_check_failure);
struct eeh_early_enable_info {
unsigned int buid_hi;
unsigned int buid_lo;
- int force_off;
};
/* Enable eeh for the given device node. */
if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
enable = 0;
- if (!eeh_check_opts_config(dn, *class_code, *vendor_id, *device_id,
- enable)) {
- if (enable) {
- printk(KERN_WARNING "EEH: %s user requested to run "
- "without EEH checking.\n", dn->full_name);
- enable = 0;
- }
- }
-
- if (!enable || info->force_off) {
+ if (!enable)
dn->eeh_mode |= EEH_MODE_NOCHECK;
- }
/* Ok... see if this device supports EEH. Some do, some don't,
* and the only way to find out is to check each and every one. */
{
struct device_node *phb, *np;
struct eeh_early_enable_info info;
- char *eeh_force_off = strstr(saved_command_line, "eeh-force-off");
init_pci_config_tokens();
np = of_find_node_by_path("/rtas");
- if (np == NULL) {
- printk(KERN_WARNING "EEH: RTAS not found !\n");
+ if (np == NULL)
return;
- }
ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
}
- info.force_off = 0;
- if (eeh_force_off) {
- printk(KERN_WARNING "EEH: WARNING: PCI Enhanced I/O Error "
- "Handling is user disabled\n");
- info.force_off = 1;
- }
-
/* Enable EEH for all adapters. Note that eeh requires buid's */
for (phb = of_find_node_by_name(NULL, "pci"); phb;
phb = of_find_node_by_name(phb, "pci")) {
traverse_pci_devices(phb, early_enable_eeh, &info);
}
- if (eeh_subsystem_enabled) {
+ if (eeh_subsystem_enabled)
printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
- } else {
- printk(KERN_WARNING "EEH: disabled PCI Enhanced I/O Error Handling\n");
- }
+ else
+ printk(KERN_WARNING "EEH: No capable adapters found\n");
}
/**
}
EXPORT_SYMBOL(eeh_remove_device);
-/*
- * If EEH is implemented, find the PCI device using given phys addr
- * and check to see if eeh failure checking is disabled.
- * Remap the addr (trivially) to the EEH region if EEH checking enabled.
- * For addresses not known to PCI the vaddr is simply returned unchanged.
- */
-void *eeh_ioremap(unsigned long addr, void *vaddr)
-{
- struct pci_dev *dev;
- struct device_node *dn;
-
- if (!eeh_subsystem_enabled)
- return vaddr;
-
- dev = pci_get_device_by_addr(addr);
- if (!dev)
- return vaddr;
-
- dn = pci_device_to_OF_node(dev);
- if (!dn) {
- pci_dev_put(dev);
- return vaddr;
- }
-
- if (dn->eeh_mode & EEH_MODE_NOCHECK) {
- pci_dev_put(dev);
- return vaddr;
- }
-
- pci_dev_put(dev);
- return (void *)IO_ADDR_TO_TOKEN(vaddr);
-}
-
static int proc_eeh_show(struct seq_file *m, void *v)
{
unsigned int cpu;
return 0;
}
__initcall(eeh_init_proc);
-
-/*
- * Test if "dev" should be configured on or off.
- * This processes the options literally from left to right.
- * This lets the user specify stupid combinations of options,
- * but at least the result should be very predictable.
- */
-static int eeh_check_opts_config(struct device_node *dn,
- int class_code, int vendor_id, int device_id,
- int default_state)
-{
- char devname[32], classname[32];
- char *strs[8], *s;
- int nstrs, i;
- int ret = default_state;
-
- /* Build list of strings to match */
- nstrs = 0;
- s = (char *)get_property(dn, "ibm,loc-code", NULL);
- if (s)
- strs[nstrs++] = s;
- sprintf(devname, "dev%04x:%04x", vendor_id, device_id);
- strs[nstrs++] = devname;
- sprintf(classname, "class%04x", class_code);
- strs[nstrs++] = classname;
- strs[nstrs++] = ""; /* yes, this matches the empty string */
-
- /*
- * Now see if any string matches the eeh_opts list.
- * The eeh_opts list entries start with + or -.
- */
- for (s = eeh_opts; s && (s < (eeh_opts + eeh_opts_last));
- s += strlen(s)+1) {
- for (i = 0; i < nstrs; i++) {
- if (strcasecmp(strs[i], s+1) == 0) {
- ret = (strs[i][0] == '+') ? 1 : 0;
- }
- }
- }
- return ret;
-}
-
-/*
- * Handle kernel eeh-on & eeh-off cmd line options for eeh.
- *
- * We support:
- * eeh-off=loc1,loc2,loc3...
- *
- * and this option can be repeated so
- * eeh-off=loc1,loc2 eeh-off=loc3
- * is the same as eeh-off=loc1,loc2,loc3
- *
- * loc is an IBM location code that can be found in a manual or
- * via openfirmware (or the Hardware Management Console).
- *
- * We also support these additional "loc" values:
- *
- * dev#:# vendor:device id in hex (e.g. dev1022:2000)
- * class# class id in hex (e.g. class0200)
- *
- * If no location code is specified all devices are assumed
- * so eeh-off means eeh by default is off.
- */
-
-/*
- * This is implemented as a null separated list of strings.
- * Each string looks like this: "+X" or "-X"
- * where X is a loc code, vendor:device, class (as shown above)
- * or empty which is used to indicate all.
- *
- * We interpret this option string list so that it will literally
- * behave left-to-right even if some combinations don't make sense.
- */
-static int __init eeh_parm(char *str, int state)
-{
- char *s, *cur, *curend;
-
- if (!eeh_opts) {
- eeh_opts = alloc_bootmem(EEH_MAX_OPTS);
- eeh_opts[eeh_opts_last++] = '+'; /* default */
- eeh_opts[eeh_opts_last++] = '\0';
- }
- if (*str == '\0') {
- eeh_opts[eeh_opts_last++] = state ? '+' : '-';
- eeh_opts[eeh_opts_last++] = '\0';
- return 1;
- }
- if (*str == '=')
- str++;
- for (s = str; s && *s != '\0'; s = curend) {
- cur = s;
- /* ignore empties. Don't treat as "all-on" or "all-off" */
- while (*cur == ',')
- cur++;
- curend = strchr(cur, ',');
- if (!curend)
- curend = cur + strlen(cur);
- if (*cur) {
- int curlen = curend-cur;
- if (eeh_opts_last + curlen > EEH_MAX_OPTS-2) {
- printk(KERN_WARNING "EEH: sorry...too many "
- "eeh cmd line options\n");
- return 1;
- }
- eeh_opts[eeh_opts_last++] = state ? '+' : '-';
- strncpy(eeh_opts+eeh_opts_last, cur, curlen);
- eeh_opts_last += curlen;
- eeh_opts[eeh_opts_last++] = '\0';
- }
- }
-
- return 1;
-}
-
-static int __init eehoff_parm(char *str)
-{
- return eeh_parm(str, 0);
-}
-
-static int __init eehon_parm(char *str)
-{
- return eeh_parm(str, 1);
-}
-
-__setup("eeh-off", eehoff_parm);
-__setup("eeh-on", eehon_parm);