X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fpowerpc%2Fplatforms%2Fpseries%2Feeh_event.c;h=49037edf7d3999545c9190d2d7fd0e0594330f49;hb=refs%2Fheads%2Fvserver;hp=9a9961f27480d5736981c5982716ee17fc7c307e;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index 9a9961f27..49037edf7 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -18,8 +18,11 @@ * Copyright (c) 2005 Linas Vepstas */ +#include #include +#include #include +#include #include #include @@ -32,55 +35,65 @@ */ /* EEH event workqueue setup. */ -static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(eeh_eventlist_lock); LIST_HEAD(eeh_eventlist); -static void eeh_thread_launcher(void *); -DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL); +static void eeh_thread_launcher(struct work_struct *); +DECLARE_WORK(eeh_event_wq, eeh_thread_launcher); + +/* Serialize reset sequences for a given pci device */ +DEFINE_MUTEX(eeh_event_mutex); /** - * eeh_event_handler - dispatch EEH events. The detection of a frozen - * slot can occur inside an interrupt, where it can be hard to do - * anything about it. The goal of this routine is to pull these - * detection events out of the context of the interrupt handler, and - * re-dispatch them for processing at a later time in a normal context. - * + * eeh_event_handler - dispatch EEH events. * @dummy - unused + * + * The detection of a frozen slot can occur inside an interrupt, + * where it can be hard to do anything about it. The goal of this + * routine is to pull these detection events out of the context + * of the interrupt handler, and re-dispatch them for processing + * at a later time in a normal context. */ static int eeh_event_handler(void * dummy) { unsigned long flags; struct eeh_event *event; + struct pci_dn *pdn; daemonize ("eehd"); + set_current_state(TASK_INTERRUPTIBLE); - while (1) { - set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&eeh_eventlist_lock, flags); + event = NULL; - spin_lock_irqsave(&eeh_eventlist_lock, flags); - event = NULL; + /* Unqueue the event, get ready to process. */ + if (!list_empty(&eeh_eventlist)) { + event = list_entry(eeh_eventlist.next, struct eeh_event, list); + list_del(&event->list); + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - /* Unqueue the event, get ready to process. */ - if (!list_empty(&eeh_eventlist)) { - event = list_entry(eeh_eventlist.next, struct eeh_event, list); - list_del(&event->list); - } - - if (event) - eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); + if (event == NULL) + return 0; - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - if (event == NULL) - break; + /* Serialize processing of EEH events */ + mutex_lock(&eeh_event_mutex); + eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); - printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", - pci_name(event->dev)); + printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", + pci_name(event->dev)); - handle_eeh_events(event); + pdn = handle_eeh_events(event); - eeh_clear_slot(event->dn, EEH_MODE_RECOVERING); + eeh_clear_slot(event->dn, EEH_MODE_RECOVERING); + pci_dev_put(event->dev); + kfree(event); + mutex_unlock(&eeh_event_mutex); - pci_dev_put(event->dev); - kfree(event); + /* If there are no new errors after an hour, clear the counter. */ + if (pdn && pdn->eeh_freeze_count>0) { + msleep_interruptible (3600*1000); + if (pdn->eeh_freeze_count>0) + pdn->eeh_freeze_count--; } return 0; @@ -88,10 +101,9 @@ static int eeh_event_handler(void * dummy) /** * eeh_thread_launcher - * * @dummy - unused */ -static void eeh_thread_launcher(void *dummy) +static void eeh_thread_launcher(struct work_struct *dummy) { if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) printk(KERN_ERR "Failed to start EEH daemon\n"); @@ -112,7 +124,15 @@ int eeh_send_failure_event (struct device_node *dn, { unsigned long flags; struct eeh_event *event; + const char *location; + if (!mem_init_done) { + printk(KERN_ERR "EEH: event during early boot not handled\n"); + location = get_property(dn, "ibm,loc-code", NULL); + printk(KERN_ERR "EEH: device node = %s\n", dn->full_name); + printk(KERN_ERR "EEH: PCI location = %s\n", location); + return 1; + } event = kmalloc(sizeof(*event), GFP_ATOMIC); if (event == NULL) { printk (KERN_ERR "EEH: out of memory, event not handled\n");