upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19
20 #include <asm/irq.h>
21 /*
22    - No shared variables, all the data are CPU local.
23    - If a softirq needs serialization, let it serialize itself
24      by its own spinlocks.
25    - Even if softirq is serialized, only local cpu is marked for
26      execution. Hence, we get something sort of weak cpu binding.
27      Though it is still not clear, will it result in better locality
28      or will not.
29
30    Examples:
31    - NET RX softirq. It is multithreaded and does not require
32      any global serialization.
33    - NET TX softirq. It kicks software netdevice queues, hence
34      it is logically serialized per device, but this serialization
35      is invisible to common code.
36    - Tasklets: serialized wrt itself.
37  */
38
39 #ifndef __ARCH_IRQ_STAT
40 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
41 EXPORT_SYMBOL(irq_stat);
42 #endif
43
44 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
45
46 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
47
48 /*
49  * we cannot loop indefinitely here to avoid userspace starvation,
50  * but we also don't want to introduce a worst case 1/HZ latency
51  * to the pending events, so lets the scheduler to balance
52  * the softirq load for us.
53  */
54 static inline void wakeup_softirqd(void)
55 {
56         /* Interrupts are disabled: no need to stop preemption */
57         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
58
59         if (tsk && tsk->state != TASK_RUNNING)
60                 wake_up_process(tsk);
61 }
62
63 /*
64  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
65  * and we fall back to softirqd after that.
66  *
67  * This number has been established via experimentation.
68  * The two things to balance is latency against fairness -
69  * we want to handle softirqs as soon as possible, but they
70  * should not be able to lock up the box.
71  */
72 #define MAX_SOFTIRQ_RESTART 10
73
74 asmlinkage void __do_softirq(void)
75 {
76         struct softirq_action *h;
77         __u32 pending;
78         int max_restart = MAX_SOFTIRQ_RESTART;
79         int cpu;
80
81         pending = local_softirq_pending();
82
83         local_bh_disable();
84         cpu = smp_processor_id();
85 restart:
86         /* Reset the pending bitmask before enabling irqs */
87         local_softirq_pending() = 0;
88
89         local_irq_enable();
90
91         h = softirq_vec;
92
93         do {
94                 if (pending & 1) {
95                         h->action(h);
96                         rcu_bh_qsctr_inc(cpu);
97                 }
98                 h++;
99                 pending >>= 1;
100         } while (pending);
101
102         local_irq_disable();
103
104         pending = local_softirq_pending();
105         if (pending && --max_restart)
106                 goto restart;
107
108         if (pending)
109                 wakeup_softirqd();
110
111         __local_bh_enable();
112 }
113
114 #ifndef __ARCH_HAS_DO_SOFTIRQ
115
116 asmlinkage void do_softirq(void)
117 {
118         __u32 pending;
119         unsigned long flags;
120
121         if (in_interrupt())
122                 return;
123
124         local_irq_save(flags);
125
126         pending = local_softirq_pending();
127
128         if (pending)
129                 __do_softirq();
130
131         local_irq_restore(flags);
132 }
133
134 EXPORT_SYMBOL(do_softirq);
135
136 #endif
137
138 void local_bh_enable(void)
139 {
140         WARN_ON(irqs_disabled());
141         /*
142          * Keep preemption disabled until we are done with
143          * softirq processing:
144          */
145         preempt_count() -= SOFTIRQ_OFFSET - 1;
146
147         if (unlikely(!in_interrupt() && local_softirq_pending()))
148                 do_softirq();
149
150         dec_preempt_count();
151         preempt_check_resched();
152 }
153 EXPORT_SYMBOL(local_bh_enable);
154
155 /*
156  * This function must run with irqs disabled!
157  */
158 inline fastcall void raise_softirq_irqoff(unsigned int nr)
159 {
160         __raise_softirq_irqoff(nr);
161
162         /*
163          * If we're in an interrupt or softirq, we're done
164          * (this also catches softirq-disabled code). We will
165          * actually run the softirq once we return from
166          * the irq or softirq.
167          *
168          * Otherwise we wake up ksoftirqd to make sure we
169          * schedule the softirq soon.
170          */
171         if (!in_interrupt())
172                 wakeup_softirqd();
173 }
174
175 EXPORT_SYMBOL(raise_softirq_irqoff);
176
177 void fastcall raise_softirq(unsigned int nr)
178 {
179         unsigned long flags;
180
181         local_irq_save(flags);
182         raise_softirq_irqoff(nr);
183         local_irq_restore(flags);
184 }
185
186 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
187 {
188         softirq_vec[nr].data = data;
189         softirq_vec[nr].action = action;
190 }
191
192 EXPORT_SYMBOL(open_softirq);
193
194 /* Tasklets */
195 struct tasklet_head
196 {
197         struct tasklet_struct *list;
198 };
199
200 /* Some compilers disobey section attribute on statics when not
201    initialized -- RR */
202 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
203 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
204
205 void fastcall __tasklet_schedule(struct tasklet_struct *t)
206 {
207         unsigned long flags;
208
209         local_irq_save(flags);
210         t->next = __get_cpu_var(tasklet_vec).list;
211         __get_cpu_var(tasklet_vec).list = t;
212         raise_softirq_irqoff(TASKLET_SOFTIRQ);
213         local_irq_restore(flags);
214 }
215
216 EXPORT_SYMBOL(__tasklet_schedule);
217
218 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
219 {
220         unsigned long flags;
221
222         local_irq_save(flags);
223         t->next = __get_cpu_var(tasklet_hi_vec).list;
224         __get_cpu_var(tasklet_hi_vec).list = t;
225         raise_softirq_irqoff(HI_SOFTIRQ);
226         local_irq_restore(flags);
227 }
228
229 EXPORT_SYMBOL(__tasklet_hi_schedule);
230
231 static void tasklet_action(struct softirq_action *a)
232 {
233         struct tasklet_struct *list;
234
235         local_irq_disable();
236         list = __get_cpu_var(tasklet_vec).list;
237         __get_cpu_var(tasklet_vec).list = NULL;
238         local_irq_enable();
239
240         while (list) {
241                 struct tasklet_struct *t = list;
242
243                 list = list->next;
244
245                 if (tasklet_trylock(t)) {
246                         if (!atomic_read(&t->count)) {
247                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
248                                         BUG();
249                                 t->func(t->data);
250                                 tasklet_unlock(t);
251                                 continue;
252                         }
253                         tasklet_unlock(t);
254                 }
255
256                 local_irq_disable();
257                 t->next = __get_cpu_var(tasklet_vec).list;
258                 __get_cpu_var(tasklet_vec).list = t;
259                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
260                 local_irq_enable();
261         }
262 }
263
264 static void tasklet_hi_action(struct softirq_action *a)
265 {
266         struct tasklet_struct *list;
267
268         local_irq_disable();
269         list = __get_cpu_var(tasklet_hi_vec).list;
270         __get_cpu_var(tasklet_hi_vec).list = NULL;
271         local_irq_enable();
272
273         while (list) {
274                 struct tasklet_struct *t = list;
275
276                 list = list->next;
277
278                 if (tasklet_trylock(t)) {
279                         if (!atomic_read(&t->count)) {
280                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
281                                         BUG();
282                                 t->func(t->data);
283                                 tasklet_unlock(t);
284                                 continue;
285                         }
286                         tasklet_unlock(t);
287                 }
288
289                 local_irq_disable();
290                 t->next = __get_cpu_var(tasklet_hi_vec).list;
291                 __get_cpu_var(tasklet_hi_vec).list = t;
292                 __raise_softirq_irqoff(HI_SOFTIRQ);
293                 local_irq_enable();
294         }
295 }
296
297
298 void tasklet_init(struct tasklet_struct *t,
299                   void (*func)(unsigned long), unsigned long data)
300 {
301         t->next = NULL;
302         t->state = 0;
303         atomic_set(&t->count, 0);
304         t->func = func;
305         t->data = data;
306 }
307
308 EXPORT_SYMBOL(tasklet_init);
309
310 void tasklet_kill(struct tasklet_struct *t)
311 {
312         if (in_interrupt())
313                 printk("Attempt to kill tasklet from interrupt\n");
314
315         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
316                 do
317                         yield();
318                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
319         }
320         tasklet_unlock_wait(t);
321         clear_bit(TASKLET_STATE_SCHED, &t->state);
322 }
323
324 EXPORT_SYMBOL(tasklet_kill);
325
326 struct tasklet_head saved_tasklet;
327
328 void dump_clear_tasklet(void)
329 {
330         saved_tasklet.list = __get_cpu_var(tasklet_vec).list;
331         __get_cpu_var(tasklet_vec).list = NULL;
332 }
333
334 EXPORT_SYMBOL_GPL(dump_clear_tasklet);
335
336 void dump_run_tasklet(void)
337 {
338         struct tasklet_struct *list;
339
340         list = __get_cpu_var(tasklet_vec).list;
341         __get_cpu_var(tasklet_vec).list = NULL;
342
343         while (list) {
344                 struct tasklet_struct *t = list;
345                 list = list->next;
346
347                 if (!atomic_read(&t->count) &&
348                     (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
349                                 t->func(t->data);
350
351                 t->next = __get_cpu_var(tasklet_vec).list;
352                 __get_cpu_var(tasklet_vec).list = t;
353         }
354 }
355
356 EXPORT_SYMBOL_GPL(dump_run_tasklet);
357
358 void __init softirq_init(void)
359 {
360         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
361         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
362 }
363
364 static int ksoftirqd(void * __bind_cpu)
365 {
366         set_user_nice(current, 19);
367         current->flags |= PF_NOFREEZE;
368
369         set_current_state(TASK_INTERRUPTIBLE);
370
371         while (!kthread_should_stop()) {
372                 if (!local_softirq_pending())
373                         schedule();
374
375                 __set_current_state(TASK_RUNNING);
376
377                 while (local_softirq_pending()) {
378                         /* Preempt disable stops cpu going offline.
379                            If already offline, we'll be on wrong CPU:
380                            don't process */
381                         preempt_disable();
382                         if (cpu_is_offline((long)__bind_cpu))
383                                 goto wait_to_die;
384                         do_softirq();
385                         preempt_enable();
386                         cond_resched();
387                 }
388
389                 set_current_state(TASK_INTERRUPTIBLE);
390         }
391         __set_current_state(TASK_RUNNING);
392         return 0;
393
394 wait_to_die:
395         preempt_enable();
396         /* Wait for kthread_stop */
397         set_current_state(TASK_INTERRUPTIBLE);
398         while (!kthread_should_stop()) {
399                 schedule();
400                 set_current_state(TASK_INTERRUPTIBLE);
401         }
402         __set_current_state(TASK_RUNNING);
403         return 0;
404 }
405
406 #ifdef CONFIG_HOTPLUG_CPU
407 /*
408  * tasklet_kill_immediate is called to remove a tasklet which can already be
409  * scheduled for execution on @cpu.
410  *
411  * Unlike tasklet_kill, this function removes the tasklet
412  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
413  *
414  * When this function is called, @cpu must be in the CPU_DEAD state.
415  */
416 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
417 {
418         struct tasklet_struct **i;
419
420         BUG_ON(cpu_online(cpu));
421         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
422
423         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
424                 return;
425
426         /* CPU is dead, so no lock needed. */
427         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
428                 if (*i == t) {
429                         *i = t->next;
430                         return;
431                 }
432         }
433         BUG();
434 }
435
436 static void takeover_tasklets(unsigned int cpu)
437 {
438         struct tasklet_struct **i;
439
440         /* CPU is dead, so no lock needed. */
441         local_irq_disable();
442
443         /* Find end, append list for that CPU. */
444         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
445         *i = per_cpu(tasklet_vec, cpu).list;
446         per_cpu(tasklet_vec, cpu).list = NULL;
447         raise_softirq_irqoff(TASKLET_SOFTIRQ);
448
449         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
450         *i = per_cpu(tasklet_hi_vec, cpu).list;
451         per_cpu(tasklet_hi_vec, cpu).list = NULL;
452         raise_softirq_irqoff(HI_SOFTIRQ);
453
454         local_irq_enable();
455 }
456 #endif /* CONFIG_HOTPLUG_CPU */
457
458 static int __devinit cpu_callback(struct notifier_block *nfb,
459                                   unsigned long action,
460                                   void *hcpu)
461 {
462         int hotcpu = (unsigned long)hcpu;
463         struct task_struct *p;
464
465         switch (action) {
466         case CPU_UP_PREPARE:
467                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
468                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
469                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
470                 if (IS_ERR(p)) {
471                         printk("ksoftirqd for %i failed\n", hotcpu);
472                         return NOTIFY_BAD;
473                 }
474                 kthread_bind(p, hotcpu);
475                 per_cpu(ksoftirqd, hotcpu) = p;
476                 break;
477         case CPU_ONLINE:
478                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
479                 break;
480 #ifdef CONFIG_HOTPLUG_CPU
481         case CPU_UP_CANCELED:
482                 /* Unbind so it can run.  Fall thru. */
483                 kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id());
484         case CPU_DEAD:
485                 p = per_cpu(ksoftirqd, hotcpu);
486                 per_cpu(ksoftirqd, hotcpu) = NULL;
487                 kthread_stop(p);
488                 takeover_tasklets(hotcpu);
489                 break;
490 #endif /* CONFIG_HOTPLUG_CPU */
491         }
492         return NOTIFY_OK;
493 }
494
495 static struct notifier_block __devinitdata cpu_nfb = {
496         .notifier_call = cpu_callback
497 };
498
499 __init int spawn_ksoftirqd(void)
500 {
501         void *cpu = (void *)(long)smp_processor_id();
502         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
503         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
504         register_cpu_notifier(&cpu_nfb);
505         return 0;
506 }