upgrade to fedora-2.6.12-1.1398.FC4 + vserver 2.0.rc7
[linux-2.6.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19
20 #include <asm/irq.h>
21 /*
22    - No shared variables, all the data are CPU local.
23    - If a softirq needs serialization, let it serialize itself
24      by its own spinlocks.
25    - Even if softirq is serialized, only local cpu is marked for
26      execution. Hence, we get something sort of weak cpu binding.
27      Though it is still not clear, will it result in better locality
28      or will not.
29
30    Examples:
31    - NET RX softirq. It is multithreaded and does not require
32      any global serialization.
33    - NET TX softirq. It kicks software netdevice queues, hence
34      it is logically serialized per device, but this serialization
35      is invisible to common code.
36    - Tasklets: serialized wrt itself.
37  */
38
39 #ifndef __ARCH_IRQ_STAT
40 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
41 EXPORT_SYMBOL(irq_stat);
42 #endif
43
44 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
45
46 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
47
48 /*
49  * we cannot loop indefinitely here to avoid userspace starvation,
50  * but we also don't want to introduce a worst case 1/HZ latency
51  * to the pending events, so lets the scheduler to balance
52  * the softirq load for us.
53  */
54 static inline void wakeup_softirqd(void)
55 {
56         /* Interrupts are disabled: no need to stop preemption */
57         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
58
59         if (tsk && tsk->state != TASK_RUNNING)
60                 wake_up_process(tsk);
61 }
62
63 /*
64  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
65  * and we fall back to softirqd after that.
66  *
67  * This number has been established via experimentation.
68  * The two things to balance is latency against fairness -
69  * we want to handle softirqs as soon as possible, but they
70  * should not be able to lock up the box.
71  */
72 #define MAX_SOFTIRQ_RESTART 10
73
74 asmlinkage void __do_softirq(void)
75 {
76         struct softirq_action *h;
77         __u32 pending;
78         int max_restart = MAX_SOFTIRQ_RESTART;
79         int cpu;
80
81         pending = local_softirq_pending();
82
83         local_bh_disable();
84         cpu = smp_processor_id();
85 restart:
86         /* Reset the pending bitmask before enabling irqs */
87         local_softirq_pending() = 0;
88
89         local_irq_enable();
90
91         h = softirq_vec;
92
93         do {
94                 if (pending & 1) {
95                         h->action(h);
96                         rcu_bh_qsctr_inc(cpu);
97                 }
98                 h++;
99                 pending >>= 1;
100         } while (pending);
101
102         local_irq_disable();
103
104         pending = local_softirq_pending();
105         if (pending && --max_restart)
106                 goto restart;
107
108         if (pending)
109                 wakeup_softirqd();
110
111         __local_bh_enable();
112 }
113
114 #ifndef __ARCH_HAS_DO_SOFTIRQ
115
116 asmlinkage void do_softirq(void)
117 {
118         __u32 pending;
119         unsigned long flags;
120
121         if (in_interrupt())
122                 return;
123
124         local_irq_save(flags);
125
126         pending = local_softirq_pending();
127
128         if (pending)
129                 __do_softirq();
130
131         local_irq_restore(flags);
132 }
133
134 EXPORT_SYMBOL(do_softirq);
135
136 #endif
137
138 void local_bh_enable(void)
139 {
140         WARN_ON(irqs_disabled());
141         /*
142          * Keep preemption disabled until we are done with
143          * softirq processing:
144          */
145         sub_preempt_count(SOFTIRQ_OFFSET - 1);
146
147         if (unlikely(!in_interrupt() && local_softirq_pending()))
148                 do_softirq();
149
150         dec_preempt_count();
151         preempt_check_resched();
152 }
153 EXPORT_SYMBOL(local_bh_enable);
154
155 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
156 # define invoke_softirq()       __do_softirq()
157 #else
158 # define invoke_softirq()       do_softirq()
159 #endif
160
161 /*
162  * Exit an interrupt context. Process softirqs if needed and possible:
163  */
164 void irq_exit(void)
165 {
166         account_system_vtime(current);
167         sub_preempt_count(IRQ_EXIT_OFFSET);
168         if (!in_interrupt() && local_softirq_pending())
169                 invoke_softirq();
170         preempt_enable_no_resched();
171 }
172
173 /*
174  * This function must run with irqs disabled!
175  */
176 inline fastcall void raise_softirq_irqoff(unsigned int nr)
177 {
178         __raise_softirq_irqoff(nr);
179
180         /*
181          * If we're in an interrupt or softirq, we're done
182          * (this also catches softirq-disabled code). We will
183          * actually run the softirq once we return from
184          * the irq or softirq.
185          *
186          * Otherwise we wake up ksoftirqd to make sure we
187          * schedule the softirq soon.
188          */
189         if (!in_interrupt())
190                 wakeup_softirqd();
191 }
192
193 EXPORT_SYMBOL(raise_softirq_irqoff);
194
195 void fastcall raise_softirq(unsigned int nr)
196 {
197         unsigned long flags;
198
199         local_irq_save(flags);
200         raise_softirq_irqoff(nr);
201         local_irq_restore(flags);
202 }
203
204 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
205 {
206         softirq_vec[nr].data = data;
207         softirq_vec[nr].action = action;
208 }
209
210 EXPORT_SYMBOL(open_softirq);
211
212 /* Tasklets */
213 struct tasklet_head
214 {
215         struct tasklet_struct *list;
216 };
217
218 /* Some compilers disobey section attribute on statics when not
219    initialized -- RR */
220 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
221 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
222
223 void fastcall __tasklet_schedule(struct tasklet_struct *t)
224 {
225         unsigned long flags;
226
227         local_irq_save(flags);
228         t->next = __get_cpu_var(tasklet_vec).list;
229         __get_cpu_var(tasklet_vec).list = t;
230         raise_softirq_irqoff(TASKLET_SOFTIRQ);
231         local_irq_restore(flags);
232 }
233
234 EXPORT_SYMBOL(__tasklet_schedule);
235
236 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
237 {
238         unsigned long flags;
239
240         local_irq_save(flags);
241         t->next = __get_cpu_var(tasklet_hi_vec).list;
242         __get_cpu_var(tasklet_hi_vec).list = t;
243         raise_softirq_irqoff(HI_SOFTIRQ);
244         local_irq_restore(flags);
245 }
246
247 EXPORT_SYMBOL(__tasklet_hi_schedule);
248
249 static void tasklet_action(struct softirq_action *a)
250 {
251         struct tasklet_struct *list;
252
253         local_irq_disable();
254         list = __get_cpu_var(tasklet_vec).list;
255         __get_cpu_var(tasklet_vec).list = NULL;
256         local_irq_enable();
257
258         while (list) {
259                 struct tasklet_struct *t = list;
260
261                 list = list->next;
262
263                 if (tasklet_trylock(t)) {
264                         if (!atomic_read(&t->count)) {
265                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
266                                         BUG();
267                                 t->func(t->data);
268                                 tasklet_unlock(t);
269                                 continue;
270                         }
271                         tasklet_unlock(t);
272                 }
273
274                 local_irq_disable();
275                 t->next = __get_cpu_var(tasklet_vec).list;
276                 __get_cpu_var(tasklet_vec).list = t;
277                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
278                 local_irq_enable();
279         }
280 }
281
282 static void tasklet_hi_action(struct softirq_action *a)
283 {
284         struct tasklet_struct *list;
285
286         local_irq_disable();
287         list = __get_cpu_var(tasklet_hi_vec).list;
288         __get_cpu_var(tasklet_hi_vec).list = NULL;
289         local_irq_enable();
290
291         while (list) {
292                 struct tasklet_struct *t = list;
293
294                 list = list->next;
295
296                 if (tasklet_trylock(t)) {
297                         if (!atomic_read(&t->count)) {
298                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
299                                         BUG();
300                                 t->func(t->data);
301                                 tasklet_unlock(t);
302                                 continue;
303                         }
304                         tasklet_unlock(t);
305                 }
306
307                 local_irq_disable();
308                 t->next = __get_cpu_var(tasklet_hi_vec).list;
309                 __get_cpu_var(tasklet_hi_vec).list = t;
310                 __raise_softirq_irqoff(HI_SOFTIRQ);
311                 local_irq_enable();
312         }
313 }
314
315
316 void tasklet_init(struct tasklet_struct *t,
317                   void (*func)(unsigned long), unsigned long data)
318 {
319         t->next = NULL;
320         t->state = 0;
321         atomic_set(&t->count, 0);
322         t->func = func;
323         t->data = data;
324 }
325
326 EXPORT_SYMBOL(tasklet_init);
327
328 void tasklet_kill(struct tasklet_struct *t)
329 {
330         if (in_interrupt())
331                 printk("Attempt to kill tasklet from interrupt\n");
332
333         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
334                 do
335                         yield();
336                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
337         }
338         tasklet_unlock_wait(t);
339         clear_bit(TASKLET_STATE_SCHED, &t->state);
340 }
341
342 EXPORT_SYMBOL(tasklet_kill);
343
344 struct tasklet_head saved_tasklet;
345
346 void dump_clear_tasklet(void)
347 {
348         saved_tasklet.list = __get_cpu_var(tasklet_vec).list;
349         __get_cpu_var(tasklet_vec).list = NULL;
350 }
351
352 EXPORT_SYMBOL_GPL(dump_clear_tasklet);
353
354 void dump_run_tasklet(void)
355 {
356         struct tasklet_struct *list;
357
358         list = __get_cpu_var(tasklet_vec).list;
359         __get_cpu_var(tasklet_vec).list = NULL;
360
361         while (list) {
362                 struct tasklet_struct *t = list;
363                 list = list->next;
364
365                 if (!atomic_read(&t->count) &&
366                     (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
367                                 t->func(t->data);
368
369                 t->next = __get_cpu_var(tasklet_vec).list;
370                 __get_cpu_var(tasklet_vec).list = t;
371         }
372 }
373
374 EXPORT_SYMBOL_GPL(dump_run_tasklet);
375
376 void __init softirq_init(void)
377 {
378         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
379         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
380 }
381
382 static int ksoftirqd(void * __bind_cpu)
383 {
384         set_user_nice(current, 19);
385         current->flags |= PF_NOFREEZE;
386
387         set_current_state(TASK_INTERRUPTIBLE);
388
389         while (!kthread_should_stop()) {
390                 preempt_disable();
391                 if (!local_softirq_pending()) {
392                         preempt_enable_no_resched();
393                         schedule();
394                         preempt_disable();
395                 }
396
397                 __set_current_state(TASK_RUNNING);
398
399                 while (local_softirq_pending()) {
400                         /* Preempt disable stops cpu going offline.
401                            If already offline, we'll be on wrong CPU:
402                            don't process */
403                         if (cpu_is_offline((long)__bind_cpu))
404                                 goto wait_to_die;
405                         do_softirq();
406                         preempt_enable_no_resched();
407                         cond_resched();
408                         preempt_disable();
409                 }
410                 preempt_enable();
411                 set_current_state(TASK_INTERRUPTIBLE);
412         }
413         __set_current_state(TASK_RUNNING);
414         return 0;
415
416 wait_to_die:
417         preempt_enable();
418         /* Wait for kthread_stop */
419         set_current_state(TASK_INTERRUPTIBLE);
420         while (!kthread_should_stop()) {
421                 schedule();
422                 set_current_state(TASK_INTERRUPTIBLE);
423         }
424         __set_current_state(TASK_RUNNING);
425         return 0;
426 }
427
428 #ifdef CONFIG_HOTPLUG_CPU
429 /*
430  * tasklet_kill_immediate is called to remove a tasklet which can already be
431  * scheduled for execution on @cpu.
432  *
433  * Unlike tasklet_kill, this function removes the tasklet
434  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
435  *
436  * When this function is called, @cpu must be in the CPU_DEAD state.
437  */
438 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
439 {
440         struct tasklet_struct **i;
441
442         BUG_ON(cpu_online(cpu));
443         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
444
445         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
446                 return;
447
448         /* CPU is dead, so no lock needed. */
449         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
450                 if (*i == t) {
451                         *i = t->next;
452                         return;
453                 }
454         }
455         BUG();
456 }
457
458 static void takeover_tasklets(unsigned int cpu)
459 {
460         struct tasklet_struct **i;
461
462         /* CPU is dead, so no lock needed. */
463         local_irq_disable();
464
465         /* Find end, append list for that CPU. */
466         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
467         *i = per_cpu(tasklet_vec, cpu).list;
468         per_cpu(tasklet_vec, cpu).list = NULL;
469         raise_softirq_irqoff(TASKLET_SOFTIRQ);
470
471         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
472         *i = per_cpu(tasklet_hi_vec, cpu).list;
473         per_cpu(tasklet_hi_vec, cpu).list = NULL;
474         raise_softirq_irqoff(HI_SOFTIRQ);
475
476         local_irq_enable();
477 }
478 #endif /* CONFIG_HOTPLUG_CPU */
479
480 static int __devinit cpu_callback(struct notifier_block *nfb,
481                                   unsigned long action,
482                                   void *hcpu)
483 {
484         int hotcpu = (unsigned long)hcpu;
485         struct task_struct *p;
486
487         switch (action) {
488         case CPU_UP_PREPARE:
489                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
490                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
491                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
492                 if (IS_ERR(p)) {
493                         printk("ksoftirqd for %i failed\n", hotcpu);
494                         return NOTIFY_BAD;
495                 }
496                 kthread_bind(p, hotcpu);
497                 per_cpu(ksoftirqd, hotcpu) = p;
498                 break;
499         case CPU_ONLINE:
500                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
501                 break;
502 #ifdef CONFIG_HOTPLUG_CPU
503         case CPU_UP_CANCELED:
504                 /* Unbind so it can run.  Fall thru. */
505                 kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id());
506         case CPU_DEAD:
507                 p = per_cpu(ksoftirqd, hotcpu);
508                 per_cpu(ksoftirqd, hotcpu) = NULL;
509                 kthread_stop(p);
510                 takeover_tasklets(hotcpu);
511                 break;
512 #endif /* CONFIG_HOTPLUG_CPU */
513         }
514         return NOTIFY_OK;
515 }
516
517 static struct notifier_block __devinitdata cpu_nfb = {
518         .notifier_call = cpu_callback
519 };
520
521 __init int spawn_ksoftirqd(void)
522 {
523         void *cpu = (void *)(long)smp_processor_id();
524         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
525         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
526         register_cpu_notifier(&cpu_nfb);
527         return 0;
528 }