ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18
19 #include <asm/irq.h>
20 /*
21    - No shared variables, all the data are CPU local.
22    - If a softirq needs serialization, let it serialize itself
23      by its own spinlocks.
24    - Even if softirq is serialized, only local cpu is marked for
25      execution. Hence, we get something sort of weak cpu binding.
26      Though it is still not clear, will it result in better locality
27      or will not.
28
29    Examples:
30    - NET RX softirq. It is multithreaded and does not require
31      any global serialization.
32    - NET TX softirq. It kicks software netdevice queues, hence
33      it is logically serialized per device, but this serialization
34      is invisible to common code.
35    - Tasklets: serialized wrt itself.
36  */
37
38 #ifndef __ARCH_IRQ_STAT
39 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
40 EXPORT_SYMBOL(irq_stat);
41 #endif
42
43 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
44
45 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
46
47 /*
48  * we cannot loop indefinitely here to avoid userspace starvation,
49  * but we also don't want to introduce a worst case 1/HZ latency
50  * to the pending events, so lets the scheduler to balance
51  * the softirq load for us.
52  */
53 static inline void wakeup_softirqd(void)
54 {
55         /* Interrupts are disabled: no need to stop preemption */
56         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
57
58         if (tsk && tsk->state != TASK_RUNNING)
59                 wake_up_process(tsk);
60 }
61
62 /*
63  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
64  * and we fall back to softirqd after that.
65  *
66  * This number has been established via experimentation.
67  * The two things to balance is latency against fairness -
68  * we want to handle softirqs as soon as possible, but they
69  * should not be able to lock up the box.
70  */
71 #define MAX_SOFTIRQ_RESTART 10
72
73 asmlinkage void __do_softirq(void)
74 {
75         struct softirq_action *h;
76         __u32 pending;
77         int max_restart = MAX_SOFTIRQ_RESTART;
78
79         pending = local_softirq_pending();
80
81         local_bh_disable();
82 restart:
83         /* Reset the pending bitmask before enabling irqs */
84         local_softirq_pending() = 0;
85
86         local_irq_enable();
87
88         h = softirq_vec;
89
90         do {
91                 if (pending & 1)
92                         h->action(h);
93                 h++;
94                 pending >>= 1;
95         } while (pending);
96
97         local_irq_disable();
98
99         pending = local_softirq_pending();
100         if (pending && --max_restart)
101                 goto restart;
102
103         if (pending)
104                 wakeup_softirqd();
105
106         __local_bh_enable();
107 }
108
109 #ifndef __ARCH_HAS_DO_SOFTIRQ
110
111 asmlinkage void do_softirq(void)
112 {
113         __u32 pending;
114         unsigned long flags;
115
116         if (in_interrupt())
117                 return;
118
119         local_irq_save(flags);
120
121         pending = local_softirq_pending();
122
123         if (pending)
124                 __do_softirq();
125
126         local_irq_restore(flags);
127 }
128
129 EXPORT_SYMBOL(do_softirq);
130
131 #endif
132
133 void local_bh_enable(void)
134 {
135         __local_bh_enable();
136         WARN_ON(irqs_disabled());
137         if (unlikely(!in_interrupt() &&
138                      local_softirq_pending()))
139                 invoke_softirq();
140         preempt_check_resched();
141 }
142 EXPORT_SYMBOL(local_bh_enable);
143
144 /*
145  * This function must run with irqs disabled!
146  */
147 inline fastcall void raise_softirq_irqoff(unsigned int nr)
148 {
149         __raise_softirq_irqoff(nr);
150
151         /*
152          * If we're in an interrupt or softirq, we're done
153          * (this also catches softirq-disabled code). We will
154          * actually run the softirq once we return from
155          * the irq or softirq.
156          *
157          * Otherwise we wake up ksoftirqd to make sure we
158          * schedule the softirq soon.
159          */
160         if (!in_interrupt())
161                 wakeup_softirqd();
162 }
163
164 EXPORT_SYMBOL(raise_softirq_irqoff);
165
166 void fastcall raise_softirq(unsigned int nr)
167 {
168         unsigned long flags;
169
170         local_irq_save(flags);
171         raise_softirq_irqoff(nr);
172         local_irq_restore(flags);
173 }
174
175 EXPORT_SYMBOL(raise_softirq);
176
177 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
178 {
179         softirq_vec[nr].data = data;
180         softirq_vec[nr].action = action;
181 }
182
183 EXPORT_SYMBOL(open_softirq);
184
185 /* Tasklets */
186 struct tasklet_head
187 {
188         struct tasklet_struct *list;
189 };
190
191 /* Some compilers disobey section attribute on statics when not
192    initialized -- RR */
193 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
194 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
195
196 void fastcall __tasklet_schedule(struct tasklet_struct *t)
197 {
198         unsigned long flags;
199
200         local_irq_save(flags);
201         t->next = __get_cpu_var(tasklet_vec).list;
202         __get_cpu_var(tasklet_vec).list = t;
203         raise_softirq_irqoff(TASKLET_SOFTIRQ);
204         local_irq_restore(flags);
205 }
206
207 EXPORT_SYMBOL(__tasklet_schedule);
208
209 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
210 {
211         unsigned long flags;
212
213         local_irq_save(flags);
214         t->next = __get_cpu_var(tasklet_hi_vec).list;
215         __get_cpu_var(tasklet_hi_vec).list = t;
216         raise_softirq_irqoff(HI_SOFTIRQ);
217         local_irq_restore(flags);
218 }
219
220 EXPORT_SYMBOL(__tasklet_hi_schedule);
221
222 static void tasklet_action(struct softirq_action *a)
223 {
224         struct tasklet_struct *list;
225
226         local_irq_disable();
227         list = __get_cpu_var(tasklet_vec).list;
228         __get_cpu_var(tasklet_vec).list = NULL;
229         local_irq_enable();
230
231         while (list) {
232                 struct tasklet_struct *t = list;
233
234                 list = list->next;
235
236                 if (tasklet_trylock(t)) {
237                         if (!atomic_read(&t->count)) {
238                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
239                                         BUG();
240                                 t->func(t->data);
241                                 tasklet_unlock(t);
242                                 continue;
243                         }
244                         tasklet_unlock(t);
245                 }
246
247                 local_irq_disable();
248                 t->next = __get_cpu_var(tasklet_vec).list;
249                 __get_cpu_var(tasklet_vec).list = t;
250                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
251                 local_irq_enable();
252         }
253 }
254
255 static void tasklet_hi_action(struct softirq_action *a)
256 {
257         struct tasklet_struct *list;
258
259         local_irq_disable();
260         list = __get_cpu_var(tasklet_hi_vec).list;
261         __get_cpu_var(tasklet_hi_vec).list = NULL;
262         local_irq_enable();
263
264         while (list) {
265                 struct tasklet_struct *t = list;
266
267                 list = list->next;
268
269                 if (tasklet_trylock(t)) {
270                         if (!atomic_read(&t->count)) {
271                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
272                                         BUG();
273                                 t->func(t->data);
274                                 tasklet_unlock(t);
275                                 continue;
276                         }
277                         tasklet_unlock(t);
278                 }
279
280                 local_irq_disable();
281                 t->next = __get_cpu_var(tasklet_hi_vec).list;
282                 __get_cpu_var(tasklet_hi_vec).list = t;
283                 __raise_softirq_irqoff(HI_SOFTIRQ);
284                 local_irq_enable();
285         }
286 }
287
288
289 void tasklet_init(struct tasklet_struct *t,
290                   void (*func)(unsigned long), unsigned long data)
291 {
292         t->next = NULL;
293         t->state = 0;
294         atomic_set(&t->count, 0);
295         t->func = func;
296         t->data = data;
297 }
298
299 EXPORT_SYMBOL(tasklet_init);
300
301 void tasklet_kill(struct tasklet_struct *t)
302 {
303         if (in_interrupt())
304                 printk("Attempt to kill tasklet from interrupt\n");
305
306         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
307                 do
308                         yield();
309                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
310         }
311         tasklet_unlock_wait(t);
312         clear_bit(TASKLET_STATE_SCHED, &t->state);
313 }
314
315 EXPORT_SYMBOL(tasklet_kill);
316
317 void __init softirq_init(void)
318 {
319         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
320         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
321 }
322
323 static int ksoftirqd(void * __bind_cpu)
324 {
325         set_user_nice(current, 19);
326         current->flags |= PF_NOFREEZE;
327
328         set_current_state(TASK_INTERRUPTIBLE);
329
330         while (!kthread_should_stop()) {
331                 if (!local_softirq_pending())
332                         schedule();
333
334                 __set_current_state(TASK_RUNNING);
335
336                 while (local_softirq_pending()) {
337                         /* Preempt disable stops cpu going offline.
338                            If already offline, we'll be on wrong CPU:
339                            don't process */
340                         preempt_disable();
341                         if (cpu_is_offline((long)__bind_cpu))
342                                 goto wait_to_die;
343                         do_softirq();
344                         preempt_enable();
345                         cond_resched();
346                 }
347
348                 set_current_state(TASK_INTERRUPTIBLE);
349         }
350         __set_current_state(TASK_RUNNING);
351         return 0;
352
353 wait_to_die:
354         preempt_enable();
355         /* Wait for kthread_stop */
356         set_current_state(TASK_INTERRUPTIBLE);
357         while (!kthread_should_stop()) {
358                 schedule();
359                 set_current_state(TASK_INTERRUPTIBLE);
360         }
361         __set_current_state(TASK_RUNNING);
362         return 0;
363 }
364
365 #ifdef CONFIG_HOTPLUG_CPU
366 /*
367  * tasklet_kill_immediate is called to remove a tasklet which can already be
368  * scheduled for execution on @cpu.
369  *
370  * Unlike tasklet_kill, this function removes the tasklet
371  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
372  *
373  * When this function is called, @cpu must be in the CPU_DEAD state.
374  */
375 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
376 {
377         struct tasklet_struct **i;
378
379         BUG_ON(cpu_online(cpu));
380         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
381
382         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
383                 return;
384
385         /* CPU is dead, so no lock needed. */
386         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
387                 if (*i == t) {
388                         *i = t->next;
389                         return;
390                 }
391         }
392         BUG();
393 }
394
395 static void takeover_tasklets(unsigned int cpu)
396 {
397         struct tasklet_struct **i;
398
399         /* CPU is dead, so no lock needed. */
400         local_irq_disable();
401
402         /* Find end, append list for that CPU. */
403         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
404         *i = per_cpu(tasklet_vec, cpu).list;
405         per_cpu(tasklet_vec, cpu).list = NULL;
406         raise_softirq_irqoff(TASKLET_SOFTIRQ);
407
408         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
409         *i = per_cpu(tasklet_hi_vec, cpu).list;
410         per_cpu(tasklet_hi_vec, cpu).list = NULL;
411         raise_softirq_irqoff(HI_SOFTIRQ);
412
413         local_irq_enable();
414 }
415 #endif /* CONFIG_HOTPLUG_CPU */
416
417 static int __devinit cpu_callback(struct notifier_block *nfb,
418                                   unsigned long action,
419                                   void *hcpu)
420 {
421         int hotcpu = (unsigned long)hcpu;
422         struct task_struct *p;
423
424         switch (action) {
425         case CPU_UP_PREPARE:
426                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
427                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
428                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
429                 if (IS_ERR(p)) {
430                         printk("ksoftirqd for %i failed\n", hotcpu);
431                         return NOTIFY_BAD;
432                 }
433                 kthread_bind(p, hotcpu);
434                 per_cpu(ksoftirqd, hotcpu) = p;
435                 break;
436         case CPU_ONLINE:
437                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
438                 break;
439 #ifdef CONFIG_HOTPLUG_CPU
440         case CPU_UP_CANCELED:
441                 /* Unbind so it can run.  Fall thru. */
442                 kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id());
443         case CPU_DEAD:
444                 p = per_cpu(ksoftirqd, hotcpu);
445                 per_cpu(ksoftirqd, hotcpu) = NULL;
446                 kthread_stop(p);
447                 takeover_tasklets(hotcpu);
448                 break;
449 #endif /* CONFIG_HOTPLUG_CPU */
450         }
451         return NOTIFY_OK;
452 }
453
454 static struct notifier_block __devinitdata cpu_nfb = {
455         .notifier_call = cpu_callback
456 };
457
458 __init int spawn_ksoftirqd(void)
459 {
460         void *cpu = (void *)(long)smp_processor_id();
461         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
462         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
463         register_cpu_notifier(&cpu_nfb);
464         return 0;
465 }