Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19 #include <linux/smp.h>
20 #include <linux/vs_context.h>
21
22 #include <asm/irq.h>
23 /*
24    - No shared variables, all the data are CPU local.
25    - If a softirq needs serialization, let it serialize itself
26      by its own spinlocks.
27    - Even if softirq is serialized, only local cpu is marked for
28      execution. Hence, we get something sort of weak cpu binding.
29      Though it is still not clear, will it result in better locality
30      or will not.
31
32    Examples:
33    - NET RX softirq. It is multithreaded and does not require
34      any global serialization.
35    - NET TX softirq. It kicks software netdevice queues, hence
36      it is logically serialized per device, but this serialization
37      is invisible to common code.
38    - Tasklets: serialized wrt itself.
39  */
40
41 #ifndef __ARCH_IRQ_STAT
42 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
43 EXPORT_SYMBOL(irq_stat);
44 #endif
45
46 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
47
48 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
49
50 /*
51  * we cannot loop indefinitely here to avoid userspace starvation,
52  * but we also don't want to introduce a worst case 1/HZ latency
53  * to the pending events, so lets the scheduler to balance
54  * the softirq load for us.
55  */
56 static inline void wakeup_softirqd(void)
57 {
58         /* Interrupts are disabled: no need to stop preemption */
59         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
60
61         if (tsk && tsk->state != TASK_RUNNING)
62                 wake_up_process(tsk);
63 }
64
65 /*
66  * This one is for softirq.c-internal use,
67  * where hardirqs are disabled legitimately:
68  */
69 #ifdef CONFIG_TRACE_IRQFLAGS
70 static void __local_bh_disable(unsigned long ip)
71 {
72         unsigned long flags;
73
74         WARN_ON_ONCE(in_irq());
75
76         raw_local_irq_save(flags);
77         add_preempt_count(SOFTIRQ_OFFSET);
78         /*
79          * Were softirqs turned off above:
80          */
81         if (softirq_count() == SOFTIRQ_OFFSET)
82                 trace_softirqs_off(ip);
83         raw_local_irq_restore(flags);
84 }
85 #else /* !CONFIG_TRACE_IRQFLAGS */
86 static inline void __local_bh_disable(unsigned long ip)
87 {
88         add_preempt_count(SOFTIRQ_OFFSET);
89         barrier();
90 }
91 #endif /* CONFIG_TRACE_IRQFLAGS */
92
93 void local_bh_disable(void)
94 {
95         __local_bh_disable((unsigned long)__builtin_return_address(0));
96 }
97
98 EXPORT_SYMBOL(local_bh_disable);
99
100 void __local_bh_enable(void)
101 {
102         WARN_ON_ONCE(in_irq());
103
104         /*
105          * softirqs should never be enabled by __local_bh_enable(),
106          * it always nests inside local_bh_enable() sections:
107          */
108         WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
109
110         sub_preempt_count(SOFTIRQ_OFFSET);
111 }
112 EXPORT_SYMBOL_GPL(__local_bh_enable);
113
114 /*
115  * Special-case - softirqs can safely be enabled in
116  * cond_resched_softirq(), or by __do_softirq(),
117  * without processing still-pending softirqs:
118  */
119 void _local_bh_enable(void)
120 {
121         WARN_ON_ONCE(in_irq());
122         WARN_ON_ONCE(!irqs_disabled());
123
124         if (softirq_count() == SOFTIRQ_OFFSET)
125                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
126         sub_preempt_count(SOFTIRQ_OFFSET);
127 }
128
129 EXPORT_SYMBOL(_local_bh_enable);
130
131 void local_bh_enable(void)
132 {
133 #ifdef CONFIG_TRACE_IRQFLAGS
134         unsigned long flags;
135
136         WARN_ON_ONCE(in_irq());
137 #endif
138         WARN_ON_ONCE(irqs_disabled());
139
140 #ifdef CONFIG_TRACE_IRQFLAGS
141         local_irq_save(flags);
142 #endif
143         /*
144          * Are softirqs going to be turned on now:
145          */
146         if (softirq_count() == SOFTIRQ_OFFSET)
147                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
148         /*
149          * Keep preemption disabled until we are done with
150          * softirq processing:
151          */
152         sub_preempt_count(SOFTIRQ_OFFSET - 1);
153
154         if (unlikely(!in_interrupt() && local_softirq_pending()))
155                 do_softirq();
156
157         dec_preempt_count();
158 #ifdef CONFIG_TRACE_IRQFLAGS
159         local_irq_restore(flags);
160 #endif
161         preempt_check_resched();
162 }
163 EXPORT_SYMBOL(local_bh_enable);
164
165 void local_bh_enable_ip(unsigned long ip)
166 {
167 #ifdef CONFIG_TRACE_IRQFLAGS
168         unsigned long flags;
169
170         WARN_ON_ONCE(in_irq());
171
172         local_irq_save(flags);
173 #endif
174         /*
175          * Are softirqs going to be turned on now:
176          */
177         if (softirq_count() == SOFTIRQ_OFFSET)
178                 trace_softirqs_on(ip);
179         /*
180          * Keep preemption disabled until we are done with
181          * softirq processing:
182          */
183         sub_preempt_count(SOFTIRQ_OFFSET - 1);
184
185         if (unlikely(!in_interrupt() && local_softirq_pending()))
186                 do_softirq();
187
188         dec_preempt_count();
189 #ifdef CONFIG_TRACE_IRQFLAGS
190         local_irq_restore(flags);
191 #endif
192         preempt_check_resched();
193 }
194 EXPORT_SYMBOL(local_bh_enable_ip);
195
196 /*
197  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
198  * and we fall back to softirqd after that.
199  *
200  * This number has been established via experimentation.
201  * The two things to balance is latency against fairness -
202  * we want to handle softirqs as soon as possible, but they
203  * should not be able to lock up the box.
204  */
205 #define MAX_SOFTIRQ_RESTART 10
206
207 asmlinkage void __do_softirq(void)
208 {
209         struct vx_info_save vxis;
210         struct softirq_action *h;
211         __u32 pending;
212         int max_restart = MAX_SOFTIRQ_RESTART;
213         int cpu;
214
215         pending = local_softirq_pending();
216         account_system_vtime(current);
217
218         __local_bh_disable((unsigned long)__builtin_return_address(0));
219         __enter_vx_admin(&vxis);
220         trace_softirq_enter();
221
222         cpu = smp_processor_id();
223 restart:
224         /* Reset the pending bitmask before enabling irqs */
225         set_softirq_pending(0);
226
227         local_irq_enable();
228
229         h = softirq_vec;
230
231         do {
232                 if (pending & 1) {
233                         h->action(h);
234                         rcu_bh_qsctr_inc(cpu);
235                 }
236                 h++;
237                 pending >>= 1;
238         } while (pending);
239
240         local_irq_disable();
241
242         pending = local_softirq_pending();
243         if (pending && --max_restart)
244                 goto restart;
245
246         if (pending)
247                 wakeup_softirqd();
248
249         trace_softirq_exit();
250
251         __leave_vx_admin(&vxis);
252         account_system_vtime(current);
253         _local_bh_enable();
254 }
255
256 #ifndef __ARCH_HAS_DO_SOFTIRQ
257
258 asmlinkage void do_softirq(void)
259 {
260         __u32 pending;
261         unsigned long flags;
262
263         if (in_interrupt())
264                 return;
265
266         local_irq_save(flags);
267
268         pending = local_softirq_pending();
269
270         if (pending)
271                 __do_softirq();
272
273         local_irq_restore(flags);
274 }
275
276 EXPORT_SYMBOL(do_softirq);
277
278 #endif
279
280 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
281 # define invoke_softirq()       __do_softirq()
282 #else
283 # define invoke_softirq()       do_softirq()
284 #endif
285
286 /*
287  * Exit an interrupt context. Process softirqs if needed and possible:
288  */
289 void irq_exit(void)
290 {
291         account_system_vtime(current);
292         trace_hardirq_exit();
293         sub_preempt_count(IRQ_EXIT_OFFSET);
294         if (!in_interrupt() && local_softirq_pending())
295                 invoke_softirq();
296         preempt_enable_no_resched();
297 }
298
299 /*
300  * This function must run with irqs disabled!
301  */
302 inline fastcall void raise_softirq_irqoff(unsigned int nr)
303 {
304         __raise_softirq_irqoff(nr);
305
306         /*
307          * If we're in an interrupt or softirq, we're done
308          * (this also catches softirq-disabled code). We will
309          * actually run the softirq once we return from
310          * the irq or softirq.
311          *
312          * Otherwise we wake up ksoftirqd to make sure we
313          * schedule the softirq soon.
314          */
315         if (!in_interrupt())
316                 wakeup_softirqd();
317 }
318
319 EXPORT_SYMBOL(raise_softirq_irqoff);
320
321 void fastcall raise_softirq(unsigned int nr)
322 {
323         unsigned long flags;
324
325         local_irq_save(flags);
326         raise_softirq_irqoff(nr);
327         local_irq_restore(flags);
328 }
329
330 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
331 {
332         softirq_vec[nr].data = data;
333         softirq_vec[nr].action = action;
334 }
335
336 /* Tasklets */
337 struct tasklet_head
338 {
339         struct tasklet_struct *list;
340 };
341
342 /* Some compilers disobey section attribute on statics when not
343    initialized -- RR */
344 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
345 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
346
347 void fastcall __tasklet_schedule(struct tasklet_struct *t)
348 {
349         unsigned long flags;
350
351         local_irq_save(flags);
352         t->next = __get_cpu_var(tasklet_vec).list;
353         __get_cpu_var(tasklet_vec).list = t;
354         raise_softirq_irqoff(TASKLET_SOFTIRQ);
355         local_irq_restore(flags);
356 }
357
358 EXPORT_SYMBOL(__tasklet_schedule);
359
360 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
361 {
362         unsigned long flags;
363
364         local_irq_save(flags);
365         t->next = __get_cpu_var(tasklet_hi_vec).list;
366         __get_cpu_var(tasklet_hi_vec).list = t;
367         raise_softirq_irqoff(HI_SOFTIRQ);
368         local_irq_restore(flags);
369 }
370
371 EXPORT_SYMBOL(__tasklet_hi_schedule);
372
373 static void tasklet_action(struct softirq_action *a)
374 {
375         struct tasklet_struct *list;
376
377         local_irq_disable();
378         list = __get_cpu_var(tasklet_vec).list;
379         __get_cpu_var(tasklet_vec).list = NULL;
380         local_irq_enable();
381
382         while (list) {
383                 struct tasklet_struct *t = list;
384
385                 list = list->next;
386
387                 if (tasklet_trylock(t)) {
388                         if (!atomic_read(&t->count)) {
389                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
390                                         BUG();
391                                 t->func(t->data);
392                                 tasklet_unlock(t);
393                                 continue;
394                         }
395                         tasklet_unlock(t);
396                 }
397
398                 local_irq_disable();
399                 t->next = __get_cpu_var(tasklet_vec).list;
400                 __get_cpu_var(tasklet_vec).list = t;
401                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
402                 local_irq_enable();
403         }
404 }
405
406 static void tasklet_hi_action(struct softirq_action *a)
407 {
408         struct tasklet_struct *list;
409
410         local_irq_disable();
411         list = __get_cpu_var(tasklet_hi_vec).list;
412         __get_cpu_var(tasklet_hi_vec).list = NULL;
413         local_irq_enable();
414
415         while (list) {
416                 struct tasklet_struct *t = list;
417
418                 list = list->next;
419
420                 if (tasklet_trylock(t)) {
421                         if (!atomic_read(&t->count)) {
422                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
423                                         BUG();
424                                 t->func(t->data);
425                                 tasklet_unlock(t);
426                                 continue;
427                         }
428                         tasklet_unlock(t);
429                 }
430
431                 local_irq_disable();
432                 t->next = __get_cpu_var(tasklet_hi_vec).list;
433                 __get_cpu_var(tasklet_hi_vec).list = t;
434                 __raise_softirq_irqoff(HI_SOFTIRQ);
435                 local_irq_enable();
436         }
437 }
438
439
440 void tasklet_init(struct tasklet_struct *t,
441                   void (*func)(unsigned long), unsigned long data)
442 {
443         t->next = NULL;
444         t->state = 0;
445         atomic_set(&t->count, 0);
446         t->func = func;
447         t->data = data;
448 }
449
450 EXPORT_SYMBOL(tasklet_init);
451
452 void tasklet_kill(struct tasklet_struct *t)
453 {
454         if (in_interrupt())
455                 printk("Attempt to kill tasklet from interrupt\n");
456
457         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
458                 do
459                         yield();
460                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
461         }
462         tasklet_unlock_wait(t);
463         clear_bit(TASKLET_STATE_SCHED, &t->state);
464 }
465
466 EXPORT_SYMBOL(tasklet_kill);
467
468 void __init softirq_init(void)
469 {
470         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
471         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
472 }
473
474 static int ksoftirqd(void * __bind_cpu)
475 {
476         set_user_nice(current, 19);
477         current->flags |= PF_NOFREEZE;
478
479         set_current_state(TASK_INTERRUPTIBLE);
480
481         while (!kthread_should_stop()) {
482                 preempt_disable();
483                 if (!local_softirq_pending()) {
484                         preempt_enable_no_resched();
485                         schedule();
486                         preempt_disable();
487                 }
488
489                 __set_current_state(TASK_RUNNING);
490
491                 while (local_softirq_pending()) {
492                         /* Preempt disable stops cpu going offline.
493                            If already offline, we'll be on wrong CPU:
494                            don't process */
495                         if (cpu_is_offline((long)__bind_cpu))
496                                 goto wait_to_die;
497                         do_softirq();
498                         preempt_enable_no_resched();
499                         cond_resched();
500                         preempt_disable();
501                 }
502                 preempt_enable();
503                 set_current_state(TASK_INTERRUPTIBLE);
504         }
505         __set_current_state(TASK_RUNNING);
506         return 0;
507
508 wait_to_die:
509         preempt_enable();
510         /* Wait for kthread_stop */
511         set_current_state(TASK_INTERRUPTIBLE);
512         while (!kthread_should_stop()) {
513                 schedule();
514                 set_current_state(TASK_INTERRUPTIBLE);
515         }
516         __set_current_state(TASK_RUNNING);
517         return 0;
518 }
519
520 #ifdef CONFIG_HOTPLUG_CPU
521 /*
522  * tasklet_kill_immediate is called to remove a tasklet which can already be
523  * scheduled for execution on @cpu.
524  *
525  * Unlike tasklet_kill, this function removes the tasklet
526  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
527  *
528  * When this function is called, @cpu must be in the CPU_DEAD state.
529  */
530 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
531 {
532         struct tasklet_struct **i;
533
534         BUG_ON(cpu_online(cpu));
535         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
536
537         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
538                 return;
539
540         /* CPU is dead, so no lock needed. */
541         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
542                 if (*i == t) {
543                         *i = t->next;
544                         return;
545                 }
546         }
547         BUG();
548 }
549
550 static void takeover_tasklets(unsigned int cpu)
551 {
552         struct tasklet_struct **i;
553
554         /* CPU is dead, so no lock needed. */
555         local_irq_disable();
556
557         /* Find end, append list for that CPU. */
558         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
559         *i = per_cpu(tasklet_vec, cpu).list;
560         per_cpu(tasklet_vec, cpu).list = NULL;
561         raise_softirq_irqoff(TASKLET_SOFTIRQ);
562
563         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
564         *i = per_cpu(tasklet_hi_vec, cpu).list;
565         per_cpu(tasklet_hi_vec, cpu).list = NULL;
566         raise_softirq_irqoff(HI_SOFTIRQ);
567
568         local_irq_enable();
569 }
570 #endif /* CONFIG_HOTPLUG_CPU */
571
572 static int __cpuinit cpu_callback(struct notifier_block *nfb,
573                                   unsigned long action,
574                                   void *hcpu)
575 {
576         int hotcpu = (unsigned long)hcpu;
577         struct task_struct *p;
578
579         switch (action) {
580         case CPU_UP_PREPARE:
581                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
582                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
583                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
584                 if (IS_ERR(p)) {
585                         printk("ksoftirqd for %i failed\n", hotcpu);
586                         return NOTIFY_BAD;
587                 }
588                 kthread_bind(p, hotcpu);
589                 per_cpu(ksoftirqd, hotcpu) = p;
590                 break;
591         case CPU_ONLINE:
592                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
593                 break;
594 #ifdef CONFIG_HOTPLUG_CPU
595         case CPU_UP_CANCELED:
596                 if (!per_cpu(ksoftirqd, hotcpu))
597                         break;
598                 /* Unbind so it can run.  Fall thru. */
599                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
600                              any_online_cpu(cpu_online_map));
601         case CPU_DEAD:
602                 p = per_cpu(ksoftirqd, hotcpu);
603                 per_cpu(ksoftirqd, hotcpu) = NULL;
604                 kthread_stop(p);
605                 takeover_tasklets(hotcpu);
606                 break;
607 #endif /* CONFIG_HOTPLUG_CPU */
608         }
609         return NOTIFY_OK;
610 }
611
612 static struct notifier_block __cpuinitdata cpu_nfb = {
613         .notifier_call = cpu_callback
614 };
615
616 __init int spawn_ksoftirqd(void)
617 {
618         void *cpu = (void *)(long)smp_processor_id();
619         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
620         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
621         register_cpu_notifier(&cpu_nfb);
622         return 0;
623 }
624
625 #ifdef CONFIG_SMP
626 /*
627  * Call a function on all processors
628  */
629 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
630 {
631         int ret = 0;
632
633         preempt_disable();
634         ret = smp_call_function(func, info, retry, wait);
635         local_irq_disable();
636         func(info);
637         local_irq_enable();
638         preempt_enable();
639         return ret;
640 }
641 EXPORT_SYMBOL(on_each_cpu);
642 #endif