vserver 1.9.3
[linux-2.6.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19
20 #include <asm/irq.h>
21 /*
22    - No shared variables, all the data are CPU local.
23    - If a softirq needs serialization, let it serialize itself
24      by its own spinlocks.
25    - Even if softirq is serialized, only local cpu is marked for
26      execution. Hence, we get something sort of weak cpu binding.
27      Though it is still not clear, will it result in better locality
28      or will not.
29
30    Examples:
31    - NET RX softirq. It is multithreaded and does not require
32      any global serialization.
33    - NET TX softirq. It kicks software netdevice queues, hence
34      it is logically serialized per device, but this serialization
35      is invisible to common code.
36    - Tasklets: serialized wrt itself.
37  */
38
39 #ifndef __ARCH_IRQ_STAT
40 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
41 EXPORT_SYMBOL(irq_stat);
42 #endif
43
44 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
45
46 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
47
48 /*
49  * we cannot loop indefinitely here to avoid userspace starvation,
50  * but we also don't want to introduce a worst case 1/HZ latency
51  * to the pending events, so lets the scheduler to balance
52  * the softirq load for us.
53  */
54 static inline void wakeup_softirqd(void)
55 {
56         /* Interrupts are disabled: no need to stop preemption */
57         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
58
59         if (tsk && tsk->state != TASK_RUNNING)
60                 wake_up_process(tsk);
61 }
62
63 /*
64  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
65  * and we fall back to softirqd after that.
66  *
67  * This number has been established via experimentation.
68  * The two things to balance is latency against fairness -
69  * we want to handle softirqs as soon as possible, but they
70  * should not be able to lock up the box.
71  */
72 #define MAX_SOFTIRQ_RESTART 10
73
74 asmlinkage void __do_softirq(void)
75 {
76         struct softirq_action *h;
77         __u32 pending;
78         int max_restart = MAX_SOFTIRQ_RESTART;
79         int cpu;
80
81         pending = local_softirq_pending();
82
83         local_bh_disable();
84         cpu = smp_processor_id();
85 restart:
86         /* Reset the pending bitmask before enabling irqs */
87         local_softirq_pending() = 0;
88
89         local_irq_enable();
90
91         h = softirq_vec;
92
93         do {
94                 if (pending & 1) {
95                         h->action(h);
96                         rcu_bh_qsctr_inc(cpu);
97                 }
98                 h++;
99                 pending >>= 1;
100         } while (pending);
101
102         local_irq_disable();
103
104         pending = local_softirq_pending();
105         if (pending && --max_restart)
106                 goto restart;
107
108         if (pending)
109                 wakeup_softirqd();
110
111         __local_bh_enable();
112 }
113
114 #ifndef __ARCH_HAS_DO_SOFTIRQ
115
116 asmlinkage void do_softirq(void)
117 {
118         __u32 pending;
119         unsigned long flags;
120
121         if (in_interrupt())
122                 return;
123
124         local_irq_save(flags);
125
126         pending = local_softirq_pending();
127
128         if (pending)
129                 __do_softirq();
130
131         local_irq_restore(flags);
132 }
133
134 EXPORT_SYMBOL(do_softirq);
135
136 #endif
137
138 void local_bh_enable(void)
139 {
140         __local_bh_enable();
141         WARN_ON(irqs_disabled());
142         if (unlikely(!in_interrupt() &&
143                      local_softirq_pending()))
144                 invoke_softirq();
145         preempt_check_resched();
146 }
147 EXPORT_SYMBOL(local_bh_enable);
148
149 /*
150  * This function must run with irqs disabled!
151  */
152 inline fastcall void raise_softirq_irqoff(unsigned int nr)
153 {
154         __raise_softirq_irqoff(nr);
155
156         /*
157          * If we're in an interrupt or softirq, we're done
158          * (this also catches softirq-disabled code). We will
159          * actually run the softirq once we return from
160          * the irq or softirq.
161          *
162          * Otherwise we wake up ksoftirqd to make sure we
163          * schedule the softirq soon.
164          */
165         if (!in_interrupt())
166                 wakeup_softirqd();
167 }
168
169 EXPORT_SYMBOL(raise_softirq_irqoff);
170
171 void fastcall raise_softirq(unsigned int nr)
172 {
173         unsigned long flags;
174
175         local_irq_save(flags);
176         raise_softirq_irqoff(nr);
177         local_irq_restore(flags);
178 }
179
180 EXPORT_SYMBOL(raise_softirq);
181
182 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
183 {
184         softirq_vec[nr].data = data;
185         softirq_vec[nr].action = action;
186 }
187
188 EXPORT_SYMBOL(open_softirq);
189
190 /* Tasklets */
191 struct tasklet_head
192 {
193         struct tasklet_struct *list;
194 };
195
196 /* Some compilers disobey section attribute on statics when not
197    initialized -- RR */
198 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
199 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
200
201 void fastcall __tasklet_schedule(struct tasklet_struct *t)
202 {
203         unsigned long flags;
204
205         local_irq_save(flags);
206         t->next = __get_cpu_var(tasklet_vec).list;
207         __get_cpu_var(tasklet_vec).list = t;
208         raise_softirq_irqoff(TASKLET_SOFTIRQ);
209         local_irq_restore(flags);
210 }
211
212 EXPORT_SYMBOL(__tasklet_schedule);
213
214 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
215 {
216         unsigned long flags;
217
218         local_irq_save(flags);
219         t->next = __get_cpu_var(tasklet_hi_vec).list;
220         __get_cpu_var(tasklet_hi_vec).list = t;
221         raise_softirq_irqoff(HI_SOFTIRQ);
222         local_irq_restore(flags);
223 }
224
225 EXPORT_SYMBOL(__tasklet_hi_schedule);
226
227 static void tasklet_action(struct softirq_action *a)
228 {
229         struct tasklet_struct *list;
230
231         local_irq_disable();
232         list = __get_cpu_var(tasklet_vec).list;
233         __get_cpu_var(tasklet_vec).list = NULL;
234         local_irq_enable();
235
236         while (list) {
237                 struct tasklet_struct *t = list;
238
239                 list = list->next;
240
241                 if (tasklet_trylock(t)) {
242                         if (!atomic_read(&t->count)) {
243                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
244                                         BUG();
245                                 t->func(t->data);
246                                 tasklet_unlock(t);
247                                 continue;
248                         }
249                         tasklet_unlock(t);
250                 }
251
252                 local_irq_disable();
253                 t->next = __get_cpu_var(tasklet_vec).list;
254                 __get_cpu_var(tasklet_vec).list = t;
255                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
256                 local_irq_enable();
257         }
258 }
259
260 static void tasklet_hi_action(struct softirq_action *a)
261 {
262         struct tasklet_struct *list;
263
264         local_irq_disable();
265         list = __get_cpu_var(tasklet_hi_vec).list;
266         __get_cpu_var(tasklet_hi_vec).list = NULL;
267         local_irq_enable();
268
269         while (list) {
270                 struct tasklet_struct *t = list;
271
272                 list = list->next;
273
274                 if (tasklet_trylock(t)) {
275                         if (!atomic_read(&t->count)) {
276                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
277                                         BUG();
278                                 t->func(t->data);
279                                 tasklet_unlock(t);
280                                 continue;
281                         }
282                         tasklet_unlock(t);
283                 }
284
285                 local_irq_disable();
286                 t->next = __get_cpu_var(tasklet_hi_vec).list;
287                 __get_cpu_var(tasklet_hi_vec).list = t;
288                 __raise_softirq_irqoff(HI_SOFTIRQ);
289                 local_irq_enable();
290         }
291 }
292
293
294 void tasklet_init(struct tasklet_struct *t,
295                   void (*func)(unsigned long), unsigned long data)
296 {
297         t->next = NULL;
298         t->state = 0;
299         atomic_set(&t->count, 0);
300         t->func = func;
301         t->data = data;
302 }
303
304 EXPORT_SYMBOL(tasklet_init);
305
306 void tasklet_kill(struct tasklet_struct *t)
307 {
308         if (in_interrupt())
309                 printk("Attempt to kill tasklet from interrupt\n");
310
311         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
312                 do
313                         yield();
314                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
315         }
316         tasklet_unlock_wait(t);
317         clear_bit(TASKLET_STATE_SCHED, &t->state);
318 }
319
320 EXPORT_SYMBOL(tasklet_kill);
321
322 void __init softirq_init(void)
323 {
324         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
325         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
326 }
327
328 static int ksoftirqd(void * __bind_cpu)
329 {
330         set_user_nice(current, 19);
331         current->flags |= PF_NOFREEZE;
332
333         set_current_state(TASK_INTERRUPTIBLE);
334
335         while (!kthread_should_stop()) {
336                 if (!local_softirq_pending())
337                         schedule();
338
339                 __set_current_state(TASK_RUNNING);
340
341                 while (local_softirq_pending()) {
342                         /* Preempt disable stops cpu going offline.
343                            If already offline, we'll be on wrong CPU:
344                            don't process */
345                         preempt_disable();
346                         if (cpu_is_offline((long)__bind_cpu))
347                                 goto wait_to_die;
348                         do_softirq();
349                         preempt_enable();
350                         cond_resched();
351                 }
352
353                 set_current_state(TASK_INTERRUPTIBLE);
354         }
355         __set_current_state(TASK_RUNNING);
356         return 0;
357
358 wait_to_die:
359         preempt_enable();
360         /* Wait for kthread_stop */
361         set_current_state(TASK_INTERRUPTIBLE);
362         while (!kthread_should_stop()) {
363                 schedule();
364                 set_current_state(TASK_INTERRUPTIBLE);
365         }
366         __set_current_state(TASK_RUNNING);
367         return 0;
368 }
369
370 #ifdef CONFIG_HOTPLUG_CPU
371 /*
372  * tasklet_kill_immediate is called to remove a tasklet which can already be
373  * scheduled for execution on @cpu.
374  *
375  * Unlike tasklet_kill, this function removes the tasklet
376  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
377  *
378  * When this function is called, @cpu must be in the CPU_DEAD state.
379  */
380 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
381 {
382         struct tasklet_struct **i;
383
384         BUG_ON(cpu_online(cpu));
385         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
386
387         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
388                 return;
389
390         /* CPU is dead, so no lock needed. */
391         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
392                 if (*i == t) {
393                         *i = t->next;
394                         return;
395                 }
396         }
397         BUG();
398 }
399
400 static void takeover_tasklets(unsigned int cpu)
401 {
402         struct tasklet_struct **i;
403
404         /* CPU is dead, so no lock needed. */
405         local_irq_disable();
406
407         /* Find end, append list for that CPU. */
408         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
409         *i = per_cpu(tasklet_vec, cpu).list;
410         per_cpu(tasklet_vec, cpu).list = NULL;
411         raise_softirq_irqoff(TASKLET_SOFTIRQ);
412
413         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
414         *i = per_cpu(tasklet_hi_vec, cpu).list;
415         per_cpu(tasklet_hi_vec, cpu).list = NULL;
416         raise_softirq_irqoff(HI_SOFTIRQ);
417
418         local_irq_enable();
419 }
420 #endif /* CONFIG_HOTPLUG_CPU */
421
422 static int __devinit cpu_callback(struct notifier_block *nfb,
423                                   unsigned long action,
424                                   void *hcpu)
425 {
426         int hotcpu = (unsigned long)hcpu;
427         struct task_struct *p;
428
429         switch (action) {
430         case CPU_UP_PREPARE:
431                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
432                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
433                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
434                 if (IS_ERR(p)) {
435                         printk("ksoftirqd for %i failed\n", hotcpu);
436                         return NOTIFY_BAD;
437                 }
438                 kthread_bind(p, hotcpu);
439                 per_cpu(ksoftirqd, hotcpu) = p;
440                 break;
441         case CPU_ONLINE:
442                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
443                 break;
444 #ifdef CONFIG_HOTPLUG_CPU
445         case CPU_UP_CANCELED:
446                 /* Unbind so it can run.  Fall thru. */
447                 kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id());
448         case CPU_DEAD:
449                 p = per_cpu(ksoftirqd, hotcpu);
450                 per_cpu(ksoftirqd, hotcpu) = NULL;
451                 kthread_stop(p);
452                 takeover_tasklets(hotcpu);
453                 break;
454 #endif /* CONFIG_HOTPLUG_CPU */
455         }
456         return NOTIFY_OK;
457 }
458
459 static struct notifier_block __devinitdata cpu_nfb = {
460         .notifier_call = cpu_callback
461 };
462
463 __init int spawn_ksoftirqd(void)
464 {
465         void *cpu = (void *)(long)smp_processor_id();
466         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
467         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
468         register_cpu_notifier(&cpu_nfb);
469         return 0;
470 }