X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fpowerpc%2Fplatforms%2Fpseries%2Feeh_driver.c;h=cbd6b0711ab450f1c306930d1ebdb50e3834201d;hb=refs%2Fheads%2Fvserver;hp=1fba695e32e82d113cbfcab42e18cfc69ae0da62;hpb=16cf0ec7408f389279d413869e94c1a351392f97;p=linux-2.6.git diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 1fba695e3..cbd6b0711 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -23,9 +23,8 @@ * */ #include -#include #include -#include +#include #include #include #include @@ -78,8 +77,12 @@ static int irq_in_use(unsigned int irq) } /* ------------------------------------------------------- */ -/** eeh_report_error - report an EEH error to each device, - * collect up and merge the device responses. +/** + * eeh_report_error - report pci error to each device driver + * + * Report an EEH error to each device driver, collect up and + * merge the device driver responses. Cumulative response + * passed back in "userdata". */ static void eeh_report_error(struct pci_dev *dev, void *userdata) @@ -97,24 +100,49 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata) PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; disable_irq_nosync(dev->irq); } - if (!driver->err_handler) - return; - if (!driver->err_handler->error_detected) + if (!driver->err_handler || + !driver->err_handler->error_detected) return; rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); if (*res == PCI_ERS_RESULT_NONE) *res = rc; - if (*res == PCI_ERS_RESULT_NEED_RESET) return; if (*res == PCI_ERS_RESULT_DISCONNECT && rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; } -/** eeh_report_reset -- tell this device that the pci slot - * has been reset. +/** + * eeh_report_mmio_enabled - tell drivers that MMIO has been enabled + * + * Report an EEH error to each device driver, collect up and + * merge the device driver responses. Cumulative response + * passed back in "userdata". + */ + +static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) +{ + enum pci_ers_result rc, *res = userdata; + struct pci_driver *driver = dev->driver; + + // dev->error_state = pci_channel_mmio_enabled; + + if (!driver || + !driver->err_handler || + !driver->err_handler->mmio_enabled) + return; + + rc = driver->err_handler->mmio_enabled (dev); + if (*res == PCI_ERS_RESULT_NONE) *res = rc; + if (*res == PCI_ERS_RESULT_DISCONNECT && + rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; +} + +/** + * eeh_report_reset - tell device that slot has been reset */ static void eeh_report_reset(struct pci_dev *dev, void *userdata) { + enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev->driver; struct device_node *dn = pci_device_to_OF_node(dev); @@ -125,30 +153,48 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata) PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; enable_irq(dev->irq); } - if (!driver->err_handler) - return; - if (!driver->err_handler->slot_reset) + if (!driver->err_handler || + !driver->err_handler->slot_reset) return; - driver->err_handler->slot_reset(dev); + rc = driver->err_handler->slot_reset(dev); + if (*res == PCI_ERS_RESULT_NONE) *res = rc; + if (*res == PCI_ERS_RESULT_DISCONNECT && + rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; } +/** + * eeh_report_resume - tell device to resume normal operations + */ + static void eeh_report_resume(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; + struct device_node *dn = pci_device_to_OF_node(dev); dev->error_state = pci_channel_io_normal; if (!driver) return; - if (!driver->err_handler) - return; - if (!driver->err_handler->resume) + + if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { + PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; + enable_irq(dev->irq); + } + if (!driver->err_handler || + !driver->err_handler->resume) return; driver->err_handler->resume(dev); } +/** + * eeh_report_failure - tell device driver that device is dead. + * + * This informs the device driver that the device is permanently + * dead, and that no further recovery attempts will be made on it. + */ + static void eeh_report_failure(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; @@ -176,7 +222,7 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata) * * pSeries systems will isolate a PCI slot if the PCI-Host * bridge detects address or data parity errors, DMA's - * occuring to wild addresses (which usually happen due to + * occurring to wild addresses (which usually happen due to * bugs in device drivers or in PCI adapter firmware). * Slot isolations also occur if #SERR, #PERR or other misc * PCI-related errors are detected. @@ -191,18 +237,22 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata) /** * eeh_reset_device() -- perform actual reset of a pci slot - * Args: bus: pointer to the pci bus structure corresponding + * @bus: pointer to the pci bus structure corresponding * to the isolated slot. A non-null value will * cause all devices under the bus to be removed * and then re-added. - * pe_dn: pointer to a "Partionable Endpoint" device node. + * @pe_dn: pointer to a "Partionable Endpoint" device node. * This is the top-level structure on which pci * bus resets can be performed. */ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) { - int rc; + int cnt, rc; + + /* pcibios will clear the counter; save the value */ + cnt = pe_dn->eeh_freeze_count; + if (bus) pcibios_remove_pci_devices(bus); @@ -241,6 +291,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) ssleep (5); pcibios_add_pci_devices(bus); } + pe_dn->eeh_freeze_count = cnt; return 0; } @@ -250,23 +301,29 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) */ #define MAX_WAIT_FOR_RECOVERY 15 -void handle_eeh_events (struct eeh_event *event) +struct pci_dn * handle_eeh_events (struct eeh_event *event) { struct device_node *frozen_dn; struct pci_dn *frozen_pdn; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; - const char *pci_str, *drv_str; + const char *location, *pci_str, *drv_str; frozen_dn = find_device_pe(event->dn); frozen_bus = pcibios_find_pci_bus(frozen_dn); if (!frozen_dn) { - printk(KERN_ERR "EEH: Error: Cannot find partition endpoint for %s\n", - pci_name(event->dev)); - return; + + location = get_property(event->dn, "ibm,loc-code", NULL); + location = location ? location : "unknown"; + printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " + "for location=%s pci addr=%s\n", + location, pci_name(event->dev)); + return NULL; } + location = get_property(frozen_dn, "ibm,loc-code", NULL); + location = location ? location : "unknown"; /* There are two different styles for coming up with the PE. * In the old style, it was the highest EEH-capable device @@ -278,9 +335,10 @@ void handle_eeh_events (struct eeh_event *event) frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); if (!frozen_bus) { - printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n", - frozen_dn->full_name); - return; + printk(KERN_ERR "EEH: Cannot find PCI bus " + "for location=%s dn=%s\n", + location, frozen_dn->full_name); + return NULL; } #if 0 @@ -314,8 +372,9 @@ void handle_eeh_events (struct eeh_event *event) eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); printk(KERN_WARNING - "EEH: This PCI device has failed %d times since last reboot: %s - %s\n", - frozen_pdn->eeh_freeze_count, drv_str, pci_str); + "EEH: This PCI device has failed %d times since last reboot: " + "location=%s driver=%s pci addr=%s\n", + frozen_pdn->eeh_freeze_count, location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -335,27 +394,49 @@ void handle_eeh_events (struct eeh_event *event) goto hard_fail; } - /* If any device called out for a reset, then reset the slot */ - if (result == PCI_ERS_RESULT_NEED_RESET) { - rc = eeh_reset_device(frozen_pdn, NULL); - if (rc) - goto hard_fail; - pci_walk_bus(frozen_bus, eeh_report_reset, NULL); + /* If all devices reported they can proceed, then re-enable MMIO */ + if (result == PCI_ERS_RESULT_CAN_RECOVER) { + rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO); + + if (rc) { + result = PCI_ERS_RESULT_NEED_RESET; + } else { + result = PCI_ERS_RESULT_NONE; + pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result); + } } - /* If all devices reported they can proceed, the re-enable PIO */ + /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - /* XXX Not supported; we brute-force reset the device */ + rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA); + + if (rc) + result = PCI_ERS_RESULT_NEED_RESET; + else + result = PCI_ERS_RESULT_RECOVERED; + } + + /* If any device has a hard failure, then shut off everything. */ + if (result == PCI_ERS_RESULT_DISCONNECT) + goto hard_fail; + + /* If any device called out for a reset, then reset the slot */ + if (result == PCI_ERS_RESULT_NEED_RESET) { rc = eeh_reset_device(frozen_pdn, NULL); if (rc) goto hard_fail; - pci_walk_bus(frozen_bus, eeh_report_reset, NULL); + result = PCI_ERS_RESULT_NONE; + pci_walk_bus(frozen_bus, eeh_report_reset, &result); } + /* All devices should claim they have recovered by now. */ + if (result != PCI_ERS_RESULT_RECOVERED) + goto hard_fail; + /* Tell all device drivers that they can resume operations */ pci_walk_bus(frozen_bus, eeh_report_resume, NULL); - return; + return frozen_pdn; excess_failures: /* @@ -364,17 +445,18 @@ excess_failures: * due to actual, failed cards. */ printk(KERN_ERR - "EEH: PCI device %s - %s has failed %d times \n" - "and has been permanently disabled. Please try reseating\n" - "this device or replacing it.\n", - drv_str, pci_str, frozen_pdn->eeh_freeze_count); + "EEH: PCI device at location=%s driver=%s pci addr=%s \n" + "has failed %d times and has been permanently disabled. \n" + "Please try reseating this device or replacing it.\n", + location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); goto perm_error; hard_fail: printk(KERN_ERR - "EEH: Unable to recover from failure of PCI device %s - %s\n" + "EEH: Unable to recover from failure of PCI device " + "at location=%s driver=%s pci addr=%s \n" "Please try reseating this device or replacing it.\n", - drv_str, pci_str); + location, drv_str, pci_str); perm_error: eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */); @@ -384,6 +466,8 @@ perm_error: /* Shut down the device drivers for good. */ pcibios_remove_pci_devices(frozen_bus); + + return NULL; } /* ---------- end of file ---------- */