VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / arch / ppc64 / kernel / smp.c
1 /*
2  * SMP support for ppc.
3  *
4  * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
5  * deal of code from the sparc and intel versions.
6  *
7  * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
8  *
9  * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
10  * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
11  *
12  *      This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/sched.h>
22 #include <linux/smp.h>
23 #include <linux/smp_lock.h>
24 #include <linux/interrupt.h>
25 #include <linux/kernel_stat.h>
26 #include <linux/delay.h>
27 #include <linux/init.h>
28 #include <linux/spinlock.h>
29 #include <linux/cache.h>
30 #include <linux/err.h>
31 #include <linux/sysdev.h>
32 #include <linux/cpu.h>
33
34 #include <asm/ptrace.h>
35 #include <asm/atomic.h>
36 #include <asm/irq.h>
37 #include <asm/page.h>
38 #include <asm/pgtable.h>
39 #include <asm/hardirq.h>
40 #include <asm/io.h>
41 #include <asm/prom.h>
42 #include <asm/smp.h>
43 #include <asm/naca.h>
44 #include <asm/paca.h>
45 #include <asm/iSeries/LparData.h>
46 #include <asm/iSeries/HvCall.h>
47 #include <asm/iSeries/HvCallCfg.h>
48 #include <asm/time.h>
49 #include <asm/ppcdebug.h>
50 #include "open_pic.h"
51 #include <asm/machdep.h>
52 #include <asm/xics.h>
53 #include <asm/cputable.h>
54 #include <asm/system.h>
55 #include <asm/rtas.h>
56
57 int smp_threads_ready;
58 unsigned long cache_decay_ticks;
59
60 cpumask_t cpu_possible_map = CPU_MASK_NONE;
61 cpumask_t cpu_online_map = CPU_MASK_NONE;
62 cpumask_t cpu_available_map = CPU_MASK_NONE;
63 cpumask_t cpu_present_at_boot = CPU_MASK_NONE;
64
65 EXPORT_SYMBOL(cpu_online_map);
66 EXPORT_SYMBOL(cpu_possible_map);
67
68 struct smp_ops_t *smp_ops;
69
70 static volatile unsigned int cpu_callin_map[NR_CPUS];
71
72 extern unsigned char stab_array[];
73
74 extern int cpu_idle(void *unused);
75 void smp_call_function_interrupt(void);
76 extern long register_vpa(unsigned long flags, unsigned long proc,
77                          unsigned long vpa);
78
79 /* Low level assembly function used to backup CPU 0 state */
80 extern void __save_cpu_setup(void);
81
82 #ifdef CONFIG_PPC_ISERIES
83 static unsigned long iSeries_smp_message[NR_CPUS];
84
85 void iSeries_smp_message_recv( struct pt_regs * regs )
86 {
87         int cpu = smp_processor_id();
88         int msg;
89
90         if ( num_online_cpus() < 2 )
91                 return;
92
93         for ( msg = 0; msg < 4; ++msg )
94                 if ( test_and_clear_bit( msg, &iSeries_smp_message[cpu] ) )
95                         smp_message_recv( msg, regs );
96 }
97
98 static inline void smp_iSeries_do_message(int cpu, int msg)
99 {
100         set_bit(msg, &iSeries_smp_message[cpu]);
101         HvCall_sendIPI(&(paca[cpu]));
102 }
103
104 static void smp_iSeries_message_pass(int target, int msg)
105 {
106         int i;
107
108         if (target < NR_CPUS)
109                 smp_iSeries_do_message(target, msg);
110         else {
111                 for_each_online_cpu(i) {
112                         if (target == MSG_ALL_BUT_SELF
113                             && i == smp_processor_id())
114                                 continue;
115                         smp_iSeries_do_message(i, msg);
116                 }
117         }
118 }
119
120 static int smp_iSeries_numProcs(void)
121 {
122         unsigned np, i;
123
124         np = 0;
125         for (i=0; i < NR_CPUS; ++i) {
126                 if (paca[i].lppaca.xDynProcStatus < 2) {
127                         cpu_set(i, cpu_available_map);
128                         cpu_set(i, cpu_possible_map);
129                         cpu_set(i, cpu_present_at_boot);
130                         ++np;
131                 }
132         }
133         return np;
134 }
135
136 static int smp_iSeries_probe(void)
137 {
138         unsigned i;
139         unsigned np = 0;
140
141         for (i=0; i < NR_CPUS; ++i) {
142                 if (paca[i].lppaca.xDynProcStatus < 2) {
143                         /*paca[i].active = 1;*/
144                         ++np;
145                 }
146         }
147
148         return np;
149 }
150
151 static void smp_iSeries_kick_cpu(int nr)
152 {
153         BUG_ON(nr < 0 || nr >= NR_CPUS);
154
155         /* Verify that our partition has a processor nr */
156         if (paca[nr].lppaca.xDynProcStatus >= 2)
157                 return;
158
159         /* The processor is currently spinning, waiting
160          * for the cpu_start field to become non-zero
161          * After we set cpu_start, the processor will
162          * continue on to secondary_start in iSeries_head.S
163          */
164         paca[nr].cpu_start = 1;
165 }
166
167 static void __devinit smp_iSeries_setup_cpu(int nr)
168 {
169 }
170
171 static struct smp_ops_t iSeries_smp_ops = {
172         .message_pass = smp_iSeries_message_pass,
173         .probe        = smp_iSeries_probe,
174         .kick_cpu     = smp_iSeries_kick_cpu,
175         .setup_cpu    = smp_iSeries_setup_cpu,
176 };
177
178 /* This is called very early. */
179 void __init smp_init_iSeries(void)
180 {
181         smp_ops = &iSeries_smp_ops;
182         systemcfg->processorCount       = smp_iSeries_numProcs();
183 }
184 #endif
185
186 #ifdef CONFIG_PPC_PSERIES
187 void smp_openpic_message_pass(int target, int msg)
188 {
189         /* make sure we're sending something that translates to an IPI */
190         if ( msg > 0x3 ){
191                 printk("SMP %d: smp_message_pass: unknown msg %d\n",
192                        smp_processor_id(), msg);
193                 return;
194         }
195         switch ( target )
196         {
197         case MSG_ALL:
198                 openpic_cause_IPI(msg, 0xffffffff);
199                 break;
200         case MSG_ALL_BUT_SELF:
201                 openpic_cause_IPI(msg,
202                                   0xffffffff & ~(1 << smp_processor_id()));
203                 break;
204         default:
205                 openpic_cause_IPI(msg, 1<<target);
206                 break;
207         }
208 }
209
210 static int __init smp_openpic_probe(void)
211 {
212         int nr_cpus;
213
214         nr_cpus = cpus_weight(cpu_possible_map);
215
216         if (nr_cpus > 1)
217                 openpic_request_IPIs();
218
219         return nr_cpus;
220 }
221
222 static void __devinit smp_openpic_setup_cpu(int cpu)
223 {
224         do_openpic_setup_cpu();
225 }
226
227 #ifdef CONFIG_HOTPLUG_CPU
228 /* Get state of physical CPU.
229  * Return codes:
230  *      0       - The processor is in the RTAS stopped state
231  *      1       - stop-self is in progress
232  *      2       - The processor is not in the RTAS stopped state
233  *      -1      - Hardware Error
234  *      -2      - Hardware Busy, Try again later.
235  */
236 static int query_cpu_stopped(unsigned int pcpu)
237 {
238         int cpu_status;
239         int status, qcss_tok;
240
241         qcss_tok = rtas_token("query-cpu-stopped-state");
242         BUG_ON(qcss_tok == RTAS_UNKNOWN_SERVICE);
243         status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
244         if (status != 0) {
245                 printk(KERN_ERR
246                        "RTAS query-cpu-stopped-state failed: %i\n", status);
247                 return status;
248         }
249
250         return cpu_status;
251 }
252
253 int __cpu_disable(void)
254 {
255         /* FIXME: go put this in a header somewhere */
256         extern void xics_migrate_irqs_away(void);
257
258         systemcfg->processorCount--;
259
260         /*fix boot_cpuid here*/
261         if (smp_processor_id() == boot_cpuid)
262                 boot_cpuid = any_online_cpu(cpu_online_map);
263
264         /* FIXME: abstract this to not be platform specific later on */
265         xics_migrate_irqs_away();
266         return 0;
267 }
268
269 void __cpu_die(unsigned int cpu)
270 {
271         int tries;
272         int cpu_status;
273         unsigned int pcpu = get_hard_smp_processor_id(cpu);
274
275         for (tries = 0; tries < 5; tries++) {
276                 cpu_status = query_cpu_stopped(pcpu);
277
278                 if (cpu_status == 0)
279                         break;
280                 set_current_state(TASK_UNINTERRUPTIBLE);
281                 schedule_timeout(HZ);
282         }
283         if (cpu_status != 0) {
284                 printk("Querying DEAD? cpu %i (%i) shows %i\n",
285                        cpu, pcpu, cpu_status);
286         }
287
288         /* Isolation and deallocation are definatly done by
289          * drslot_chrp_cpu.  If they were not they would be
290          * done here.  Change isolate state to Isolate and
291          * change allocation-state to Unusable.
292          */
293         paca[cpu].cpu_start = 0;
294
295         /* So we can recognize if it fails to come up next time. */
296         cpu_callin_map[cpu] = 0;
297 }
298
299 /* Kill this cpu */
300 void cpu_die(void)
301 {
302         local_irq_disable();
303         /* Some hardware requires clearing the CPPR, while other hardware does not
304          * it is safe either way
305          */
306         pSeriesLP_cppr_info(0, 0);
307         rtas_stop_self();
308         /* Should never get here... */
309         BUG();
310         for(;;);
311 }
312
313 /* Search all cpu device nodes for an offline logical cpu.  If a
314  * device node has a "ibm,my-drc-index" property (meaning this is an
315  * LPAR), paranoid-check whether we own the cpu.  For each "thread"
316  * of a cpu, if it is offline and has the same hw index as before,
317  * grab that in preference.
318  */
319 static unsigned int find_physical_cpu_to_start(unsigned int old_hwindex)
320 {
321         struct device_node *np = NULL;
322         unsigned int best = -1U;
323
324         while ((np = of_find_node_by_type(np, "cpu"))) {
325                 int nr_threads, len;
326                 u32 *index = (u32 *)get_property(np, "ibm,my-drc-index", NULL);
327                 u32 *tid = (u32 *)
328                         get_property(np, "ibm,ppc-interrupt-server#s", &len);
329
330                 if (!tid)
331                         tid = (u32 *)get_property(np, "reg", &len);
332
333                 if (!tid)
334                         continue;
335
336                 /* If there is a drc-index, make sure that we own
337                  * the cpu.
338                  */
339                 if (index) {
340                         int state;
341                         int rc = rtas_get_sensor(9003, *index, &state);
342                         if (rc != 0 || state != 1)
343                                 continue;
344                 }
345
346                 nr_threads = len / sizeof(u32);
347
348                 while (nr_threads--) {
349                         if (0 == query_cpu_stopped(tid[nr_threads])) {
350                                 best = tid[nr_threads];
351                                 if (best == old_hwindex)
352                                         goto out;
353                         }
354                 }
355         }
356 out:
357         of_node_put(np);
358         return best;
359 }
360
361 /**
362  * smp_startup_cpu() - start the given cpu
363  *
364  * At boot time, there is nothing to do.  At run-time, call RTAS with
365  * the appropriate start location, if the cpu is in the RTAS stopped
366  * state.
367  *
368  * Returns:
369  *      0       - failure
370  *      1       - success
371  */
372 static inline int __devinit smp_startup_cpu(unsigned int lcpu)
373 {
374         int status;
375         extern void (*pseries_secondary_smp_init)(unsigned int cpu);
376         unsigned long start_here = __pa(pseries_secondary_smp_init);
377         unsigned int pcpu;
378
379         /* At boot time the cpus are already spinning in hold
380          * loops, so nothing to do. */
381         if (system_state == SYSTEM_BOOTING)
382                 return 1;
383
384         pcpu = find_physical_cpu_to_start(get_hard_smp_processor_id(lcpu));
385         if (pcpu == -1U) {
386                 printk(KERN_INFO "No more cpus available, failing\n");
387                 return 0;
388         }
389
390         /* Fixup atomic count: it exited inside IRQ handler. */
391         paca[lcpu].__current->thread_info->preempt_count        = 0;
392
393         /* At boot this is done in prom.c. */
394         paca[lcpu].hw_cpu_id = pcpu;
395
396         status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL,
397                            pcpu, start_here, lcpu);
398         if (status != 0) {
399                 printk(KERN_ERR "start-cpu failed: %i\n", status);
400                 return 0;
401         }
402         return 1;
403 }
404
405 static inline void look_for_more_cpus(void)
406 {
407         int num_addr_cell, num_size_cell, len, i, maxcpus;
408         struct device_node *np;
409         unsigned int *ireg;
410
411         /* Find the property which will tell us about how many CPUs
412          * we're allowed to have. */
413         if ((np = find_path_device("/rtas")) == NULL) {
414                 printk(KERN_ERR "Could not find /rtas in device tree!");
415                 return;
416         }
417         num_addr_cell = prom_n_addr_cells(np);
418         num_size_cell = prom_n_size_cells(np);
419
420         ireg = (unsigned int *)get_property(np, "ibm,lrdr-capacity", &len);
421         if (ireg == NULL) {
422                 /* FIXME: make sure not marked as lrdr_capable() */
423                 return;
424         }
425
426         maxcpus = ireg[num_addr_cell + num_size_cell];
427
428         /* Double maxcpus for processors which have SMT capability */
429         if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
430                 maxcpus *= 2;
431
432
433         if (maxcpus > NR_CPUS) {
434                 printk(KERN_WARNING
435                        "Partition configured for %d cpus, "
436                        "operating system maximum is %d.\n", maxcpus, NR_CPUS);
437                 maxcpus = NR_CPUS;
438         } else
439                 printk(KERN_INFO "Partition configured for %d cpus.\n",
440                        maxcpus);
441
442         /* Make those cpus (which might appear later) possible too. */
443         for (i = 0; i < maxcpus; i++)
444                 cpu_set(i, cpu_possible_map);
445 }
446 #else /* ... CONFIG_HOTPLUG_CPU */
447 static inline int __devinit smp_startup_cpu(unsigned int lcpu)
448 {
449         return 1;
450 }
451 static inline void look_for_more_cpus(void)
452 {
453 }
454 #endif /* CONFIG_HOTPLUG_CPU */
455
456 static void smp_pSeries_kick_cpu(int nr)
457 {
458         BUG_ON(nr < 0 || nr >= NR_CPUS);
459
460         if (!smp_startup_cpu(nr))
461                 return;
462
463         /*
464          * The processor is currently spinning, waiting for the
465          * cpu_start field to become non-zero After we set cpu_start,
466          * the processor will continue on to secondary_start
467          */
468         paca[nr].cpu_start = 1;
469 }
470 #endif /* CONFIG_PPC_PSERIES */
471
472 static void __init smp_space_timers(unsigned int max_cpus)
473 {
474         int i;
475         unsigned long offset = tb_ticks_per_jiffy / max_cpus;
476         unsigned long previous_tb = paca[boot_cpuid].next_jiffy_update_tb;
477
478         for_each_cpu(i) {
479                 if (i != boot_cpuid) {
480                         paca[i].next_jiffy_update_tb =
481                                 previous_tb + offset;
482                         previous_tb = paca[i].next_jiffy_update_tb;
483                 }
484         }
485 }
486
487 #ifdef CONFIG_PPC_PSERIES
488 void vpa_init(int cpu)
489 {
490         unsigned long flags;
491
492         /* Register the Virtual Processor Area (VPA) */
493         flags = 1UL << (63 - 18);
494         register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].lppaca)));
495 }
496
497 static inline void smp_xics_do_message(int cpu, int msg)
498 {
499         set_bit(msg, &xics_ipi_message[cpu].value);
500         mb();
501         xics_cause_IPI(cpu);
502 }
503
504 static void smp_xics_message_pass(int target, int msg)
505 {
506         unsigned int i;
507
508         if (target < NR_CPUS) {
509                 smp_xics_do_message(target, msg);
510         } else {
511                 for_each_online_cpu(i) {
512                         if (target == MSG_ALL_BUT_SELF
513                             && i == smp_processor_id())
514                                 continue;
515                         smp_xics_do_message(i, msg);
516                 }
517         }
518 }
519
520 extern void xics_request_IPIs(void);
521
522 static int __init smp_xics_probe(void)
523 {
524 #ifdef CONFIG_SMP
525         xics_request_IPIs();
526 #endif
527
528         return cpus_weight(cpu_possible_map);
529 }
530
531 static void __devinit smp_xics_setup_cpu(int cpu)
532 {
533         if (cpu != boot_cpuid)
534                 xics_setup_cpu();
535 }
536
537 static spinlock_t timebase_lock = SPIN_LOCK_UNLOCKED;
538 static unsigned long timebase = 0;
539
540 static void __devinit pSeries_give_timebase(void)
541 {
542         spin_lock(&timebase_lock);
543         rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
544         timebase = get_tb();
545         spin_unlock(&timebase_lock);
546
547         while (timebase)
548                 barrier();
549         rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
550 }
551
552 static void __devinit pSeries_take_timebase(void)
553 {
554         while (!timebase)
555                 barrier();
556         spin_lock(&timebase_lock);
557         set_tb(timebase >> 32, timebase & 0xffffffff);
558         timebase = 0;
559         spin_unlock(&timebase_lock);
560 }
561
562 static struct smp_ops_t pSeries_openpic_smp_ops = {
563         .message_pass   = smp_openpic_message_pass,
564         .probe          = smp_openpic_probe,
565         .kick_cpu       = smp_pSeries_kick_cpu,
566         .setup_cpu      = smp_openpic_setup_cpu,
567 };
568
569 static struct smp_ops_t pSeries_xics_smp_ops = {
570         .message_pass   = smp_xics_message_pass,
571         .probe          = smp_xics_probe,
572         .kick_cpu       = smp_pSeries_kick_cpu,
573         .setup_cpu      = smp_xics_setup_cpu,
574 };
575
576 /* This is called very early */
577 void __init smp_init_pSeries(void)
578 {
579
580         if (naca->interrupt_controller == IC_OPEN_PIC)
581                 smp_ops = &pSeries_openpic_smp_ops;
582         else
583                 smp_ops = &pSeries_xics_smp_ops;
584
585         /* Non-lpar has additional take/give timebase */
586         if (systemcfg->platform == PLATFORM_PSERIES) {
587                 smp_ops->give_timebase = pSeries_give_timebase;
588                 smp_ops->take_timebase = pSeries_take_timebase;
589         }
590 }
591 #endif
592
593 void smp_local_timer_interrupt(struct pt_regs * regs)
594 {
595         if (!--(get_paca()->prof_counter)) {
596                 update_process_times(user_mode(regs));
597                 (get_paca()->prof_counter)=get_paca()->prof_multiplier;
598         }
599 }
600
601 void smp_message_recv(int msg, struct pt_regs *regs)
602 {
603         switch(msg) {
604         case PPC_MSG_CALL_FUNCTION:
605                 smp_call_function_interrupt();
606                 break;
607         case PPC_MSG_RESCHEDULE: 
608                 /* XXX Do we have to do this? */
609                 set_need_resched();
610                 break;
611 #if 0
612         case PPC_MSG_MIGRATE_TASK:
613                 /* spare */
614                 break;
615 #endif
616 #ifdef CONFIG_DEBUGGER
617         case PPC_MSG_DEBUGGER_BREAK:
618                 debugger_ipi(regs);
619                 break;
620 #endif
621         default:
622                 printk("SMP %d: smp_message_recv(): unknown msg %d\n",
623                        smp_processor_id(), msg);
624                 break;
625         }
626 }
627
628 void smp_send_reschedule(int cpu)
629 {
630         smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
631 }
632
633 #ifdef CONFIG_DEBUGGER
634 void smp_send_debugger_break(int cpu)
635 {
636         smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
637 }
638 #endif
639
640 static void stop_this_cpu(void *dummy)
641 {
642         local_irq_disable();
643         while (1)
644                 ;
645 }
646
647 void smp_send_stop(void)
648 {
649         smp_call_function(stop_this_cpu, NULL, 1, 0);
650 }
651
652 /*
653  * Structure and data for smp_call_function(). This is designed to minimise
654  * static memory requirements. It also looks cleaner.
655  * Stolen from the i386 version.
656  */
657 static spinlock_t call_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
658
659 static struct call_data_struct {
660         void (*func) (void *info);
661         void *info;
662         atomic_t started;
663         atomic_t finished;
664         int wait;
665 } *call_data;
666
667 /* delay of at least 8 seconds on 1GHz cpu */
668 #define SMP_CALL_TIMEOUT (1UL << (30 + 3))
669
670 /*
671  * This function sends a 'generic call function' IPI to all other CPUs
672  * in the system.
673  *
674  * [SUMMARY] Run a function on all other CPUs.
675  * <func> The function to run. This must be fast and non-blocking.
676  * <info> An arbitrary pointer to pass to the function.
677  * <nonatomic> currently unused.
678  * <wait> If true, wait (atomically) until function has completed on other CPUs.
679  * [RETURNS] 0 on success, else a negative status code. Does not return until
680  * remote CPUs are nearly ready to execute <<func>> or are or have executed.
681  *
682  * You must not call this function with disabled interrupts or from a
683  * hardware interrupt handler or from a bottom half handler.
684  */
685 int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
686                        int wait)
687
688         struct call_data_struct data;
689         int ret = -1, cpus;
690         unsigned long timeout;
691
692         /* Can deadlock when called with interrupts disabled */
693         WARN_ON(irqs_disabled());
694
695         data.func = func;
696         data.info = info;
697         atomic_set(&data.started, 0);
698         data.wait = wait;
699         if (wait)
700                 atomic_set(&data.finished, 0);
701
702         spin_lock(&call_lock);
703         /* Must grab online cpu count with preempt disabled, otherwise
704          * it can change. */
705         cpus = num_online_cpus() - 1;
706         if (!cpus) {
707                 ret = 0;
708                 goto out;
709         }
710
711         call_data = &data;
712         wmb();
713         /* Send a message to all other CPUs and wait for them to respond */
714         smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION);
715
716         /* Wait for response */
717         timeout = SMP_CALL_TIMEOUT;
718         while (atomic_read(&data.started) != cpus) {
719                 HMT_low();
720                 if (--timeout == 0) {
721                         printk("smp_call_function on cpu %d: other cpus not "
722                                "responding (%d)\n", smp_processor_id(),
723                                atomic_read(&data.started));
724                         debugger(NULL);
725                         goto out;
726                 }
727         }
728
729         if (wait) {
730                 timeout = SMP_CALL_TIMEOUT;
731                 while (atomic_read(&data.finished) != cpus) {
732                         HMT_low();
733                         if (--timeout == 0) {
734                                 printk("smp_call_function on cpu %d: other "
735                                        "cpus not finishing (%d/%d)\n",
736                                        smp_processor_id(),
737                                        atomic_read(&data.finished),
738                                        atomic_read(&data.started));
739                                 debugger(NULL);
740                                 goto out;
741                         }
742                 }
743         }
744
745         ret = 0;
746
747 out:
748         call_data = NULL;
749         HMT_medium();
750         spin_unlock(&call_lock);
751         return ret;
752 }
753
754 void smp_call_function_interrupt(void)
755 {
756         void (*func) (void *info);
757         void *info;
758         int wait;
759
760         /* call_data will be NULL if the sender timed out while
761          * waiting on us to receive the call.
762          */
763         if (!call_data)
764                 return;
765
766         func = call_data->func;
767         info = call_data->info;
768         wait = call_data->wait;
769
770         if (!wait)
771                 smp_mb__before_atomic_inc();
772
773         /*
774          * Notify initiating CPU that I've grabbed the data and am
775          * about to execute the function
776          */
777         atomic_inc(&call_data->started);
778         /*
779          * At this point the info structure may be out of scope unless wait==1
780          */
781         (*func)(info);
782         if (wait) {
783                 smp_mb__before_atomic_inc();
784                 atomic_inc(&call_data->finished);
785         }
786 }
787
788 extern unsigned long decr_overclock;
789 extern struct gettimeofday_struct do_gtod;
790
791 struct thread_info *current_set[NR_CPUS];
792
793 DECLARE_PER_CPU(unsigned int, pvr);
794
795 static void __devinit smp_store_cpu_info(int id)
796 {
797         per_cpu(pvr, id) = _get_PVR();
798 }
799
800 static void __init smp_create_idle(unsigned int cpu)
801 {
802         struct pt_regs regs;
803         struct task_struct *p;
804
805         /* create a process for the processor */
806         /* only regs.msr is actually used, and 0 is OK for it */
807         memset(&regs, 0, sizeof(struct pt_regs));
808         p = copy_process(CLONE_VM | CLONE_IDLETASK,
809                          0, &regs, 0, NULL, NULL);
810         if (IS_ERR(p))
811                 panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
812
813         wake_up_forked_process(p);
814         init_idle(p, cpu);
815         unhash_process(p);
816
817         paca[cpu].__current = p;
818         current_set[cpu] = p->thread_info;
819 }
820
821 void __init smp_prepare_cpus(unsigned int max_cpus)
822 {
823         unsigned int cpu;
824
825         /* 
826          * setup_cpu may need to be called on the boot cpu. We havent
827          * spun any cpus up but lets be paranoid.
828          */
829         BUG_ON(boot_cpuid != smp_processor_id());
830
831         /* Fixup boot cpu */
832         smp_store_cpu_info(boot_cpuid);
833         cpu_callin_map[boot_cpuid] = 1;
834         paca[boot_cpuid].prof_counter = 1;
835         paca[boot_cpuid].prof_multiplier = 1;
836
837 #ifndef CONFIG_PPC_ISERIES
838         paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb();
839
840         /*
841          * Should update do_gtod.stamp_xsec.
842          * For now we leave it which means the time can be some
843          * number of msecs off until someone does a settimeofday()
844          */
845         do_gtod.tb_orig_stamp = tb_last_stamp;
846
847         look_for_more_cpus();
848 #endif
849
850         max_cpus = smp_ops->probe();
851  
852         /* Backup CPU 0 state if necessary */
853         __save_cpu_setup();
854
855         smp_space_timers(max_cpus);
856
857         for_each_cpu(cpu)
858                 if (cpu != boot_cpuid)
859                         smp_create_idle(cpu);
860 }
861
862 void __devinit smp_prepare_boot_cpu(void)
863 {
864         BUG_ON(smp_processor_id() != boot_cpuid);
865
866         /* cpu_possible is set up in prom.c */
867         cpu_set(boot_cpuid, cpu_online_map);
868
869         paca[boot_cpuid].__current = current;
870         current_set[boot_cpuid] = current->thread_info;
871 }
872
873 int __devinit __cpu_up(unsigned int cpu)
874 {
875         int c;
876
877         /* At boot, don't bother with non-present cpus -JSCHOPP */
878         if (system_state == SYSTEM_BOOTING && !cpu_present_at_boot(cpu))
879                 return -ENOENT;
880
881         paca[cpu].prof_counter = 1;
882         paca[cpu].prof_multiplier = 1;
883         paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock;
884
885         if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) {
886                 void *tmp;
887
888                 /* maximum of 48 CPUs on machines with a segment table */
889                 if (cpu >= 48)
890                         BUG();
891
892                 tmp = &stab_array[PAGE_SIZE * cpu];
893                 memset(tmp, 0, PAGE_SIZE); 
894                 paca[cpu].stab_addr = (unsigned long)tmp;
895                 paca[cpu].stab_real = virt_to_abs(tmp);
896         }
897
898         /* The information for processor bringup must
899          * be written out to main store before we release
900          * the processor.
901          */
902         mb();
903
904         /* wake up cpus */
905         smp_ops->kick_cpu(cpu);
906
907         /*
908          * wait to see if the cpu made a callin (is actually up).
909          * use this value that I found through experimentation.
910          * -- Cort
911          */
912         if (system_state == SYSTEM_BOOTING)
913                 for (c = 5000; c && !cpu_callin_map[cpu]; c--)
914                         udelay(100);
915 #ifdef CONFIG_HOTPLUG_CPU
916         else
917                 /*
918                  * CPUs can take much longer to come up in the
919                  * hotplug case.  Wait five seconds.
920                  */
921                 for (c = 25; c && !cpu_callin_map[cpu]; c--) {
922                         set_current_state(TASK_UNINTERRUPTIBLE);
923                         schedule_timeout(HZ/5);
924                 }
925 #endif
926
927         if (!cpu_callin_map[cpu]) {
928                 printk("Processor %u is stuck.\n", cpu);
929                 return -ENOENT;
930         }
931
932         printk("Processor %u found.\n", cpu);
933
934         if (smp_ops->give_timebase)
935                 smp_ops->give_timebase();
936
937         /* Wait until cpu puts itself in the online map */
938         while (!cpu_online(cpu))
939                 cpu_relax();
940
941         return 0;
942 }
943
944 extern unsigned int default_distrib_server;
945 /* Activate a secondary processor. */
946 int __devinit start_secondary(void *unused)
947 {
948         unsigned int cpu = smp_processor_id();
949
950         atomic_inc(&init_mm.mm_count);
951         current->active_mm = &init_mm;
952
953         smp_store_cpu_info(cpu);
954         set_dec(paca[cpu].default_decr);
955         cpu_callin_map[cpu] = 1;
956
957         smp_ops->setup_cpu(cpu);
958         if (smp_ops->take_timebase)
959                 smp_ops->take_timebase();
960
961 #ifdef CONFIG_PPC_PSERIES
962         if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
963                 vpa_init(cpu); 
964         }
965
966 #ifdef CONFIG_IRQ_ALL_CPUS
967         /* Put the calling processor into the GIQ.  This is really only
968          * necessary from a secondary thread as the OF start-cpu interface
969          * performs this function for us on primary threads.
970          */
971         /* TODO: 9005 is #defined in rtas-proc.c -- move to a header */
972         rtas_set_indicator(9005, default_distrib_server, 1);
973 #endif
974 #endif
975
976         spin_lock(&call_lock);
977         cpu_set(cpu, cpu_online_map);
978         spin_unlock(&call_lock);
979
980         local_irq_enable();
981
982         return cpu_idle(NULL);
983 }
984
985 int setup_profiling_timer(unsigned int multiplier)
986 {
987         return 0;
988 }
989
990 void __init smp_cpus_done(unsigned int max_cpus)
991 {
992         cpumask_t old_mask;
993
994         /* We want the setup_cpu() here to be called from CPU 0, but our
995          * init thread may have been "borrowed" by another CPU in the meantime
996          * se we pin us down to CPU 0 for a short while
997          */
998         old_mask = current->cpus_allowed;
999         set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
1000         
1001         smp_ops->setup_cpu(boot_cpuid);
1002
1003         /* XXX fix this, xics currently relies on it - Anton */
1004         smp_threads_ready = 1;
1005
1006         set_cpus_allowed(current, old_mask);
1007 }
1008
1009 #ifdef CONFIG_SCHED_SMT
1010 #ifdef CONFIG_NUMA
1011 static struct sched_group sched_group_cpus[NR_CPUS];
1012 static struct sched_group sched_group_phys[NR_CPUS];
1013 static struct sched_group sched_group_nodes[MAX_NUMNODES];
1014 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
1015 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
1016 static DEFINE_PER_CPU(struct sched_domain, node_domains);
1017 __init void arch_init_sched_domains(void)
1018 {
1019         int i;
1020         struct sched_group *first = NULL, *last = NULL;
1021
1022         /* Set up domains */
1023         for_each_cpu(i) {
1024                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1025                 struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
1026                 struct sched_domain *node_domain = &per_cpu(node_domains, i);
1027                 int node = cpu_to_node(i);
1028                 cpumask_t nodemask = node_to_cpumask(node);
1029                 cpumask_t my_cpumask = cpumask_of_cpu(i);
1030                 cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
1031
1032                 *cpu_domain = SD_SIBLING_INIT;
1033                 if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
1034                         cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
1035                 else
1036                         cpu_domain->span = my_cpumask;
1037                 cpu_domain->parent = phys_domain;
1038                 cpu_domain->groups = &sched_group_cpus[i];
1039
1040                 *phys_domain = SD_CPU_INIT;
1041                 phys_domain->span = nodemask;
1042                 phys_domain->parent = node_domain;
1043                 phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
1044
1045                 *node_domain = SD_NODE_INIT;
1046                 node_domain->span = cpu_possible_map;
1047                 node_domain->groups = &sched_group_nodes[node];
1048         }
1049
1050         /* Set up CPU (sibling) groups */
1051         for_each_cpu(i) {
1052                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1053                 int j;
1054                 first = last = NULL;
1055
1056                 if (i != first_cpu(cpu_domain->span))
1057                         continue;
1058
1059                 for_each_cpu_mask(j, cpu_domain->span) {
1060                         struct sched_group *cpu = &sched_group_cpus[j];
1061
1062                         cpus_clear(cpu->cpumask);
1063                         cpu_set(j, cpu->cpumask);
1064                         cpu->cpu_power = SCHED_LOAD_SCALE;
1065
1066                         if (!first)
1067                                 first = cpu;
1068                         if (last)
1069                                 last->next = cpu;
1070                         last = cpu;
1071                 }
1072                 last->next = first;
1073         }
1074
1075         for (i = 0; i < MAX_NUMNODES; i++) {
1076                 int j;
1077                 cpumask_t nodemask;
1078                 struct sched_group *node = &sched_group_nodes[i];
1079                 cpumask_t node_cpumask = node_to_cpumask(i);
1080                 cpus_and(nodemask, node_cpumask, cpu_possible_map);
1081
1082                 if (cpus_empty(nodemask))
1083                         continue;
1084
1085                 first = last = NULL;
1086                 /* Set up physical groups */
1087                 for_each_cpu_mask(j, nodemask) {
1088                         struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
1089                         struct sched_group *cpu = &sched_group_phys[j];
1090
1091                         if (j != first_cpu(cpu_domain->span))
1092                                 continue;
1093
1094                         cpu->cpumask = cpu_domain->span;
1095                         /*
1096                          * Make each extra sibling increase power by 10% of
1097                          * the basic CPU. This is very arbitrary.
1098                          */
1099                         cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
1100                         node->cpu_power += cpu->cpu_power;
1101
1102                         if (!first)
1103                                 first = cpu;
1104                         if (last)
1105                                 last->next = cpu;
1106                         last = cpu;
1107                 }
1108                 last->next = first;
1109         }
1110
1111         /* Set up nodes */
1112         first = last = NULL;
1113         for (i = 0; i < MAX_NUMNODES; i++) {
1114                 struct sched_group *cpu = &sched_group_nodes[i];
1115                 cpumask_t nodemask;
1116                 cpumask_t node_cpumask = node_to_cpumask(i);
1117                 cpus_and(nodemask, node_cpumask, cpu_possible_map);
1118
1119                 if (cpus_empty(nodemask))
1120                         continue;
1121
1122                 cpu->cpumask = nodemask;
1123                 /* ->cpu_power already setup */
1124
1125                 if (!first)
1126                         first = cpu;
1127                 if (last)
1128                         last->next = cpu;
1129                 last = cpu;
1130         }
1131         last->next = first;
1132
1133         mb();
1134         for_each_cpu(i) {
1135                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1136                 cpu_attach_domain(cpu_domain, i);
1137         }
1138 }
1139 #else /* !CONFIG_NUMA */
1140 static struct sched_group sched_group_cpus[NR_CPUS];
1141 static struct sched_group sched_group_phys[NR_CPUS];
1142 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
1143 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
1144 __init void arch_init_sched_domains(void)
1145 {
1146         int i;
1147         struct sched_group *first = NULL, *last = NULL;
1148
1149         /* Set up domains */
1150         for_each_cpu(i) {
1151                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1152                 struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
1153                 cpumask_t my_cpumask = cpumask_of_cpu(i);
1154                 cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
1155
1156                 *cpu_domain = SD_SIBLING_INIT;
1157                 if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
1158                         cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
1159                 else
1160                         cpu_domain->span = my_cpumask;
1161                 cpu_domain->parent = phys_domain;
1162                 cpu_domain->groups = &sched_group_cpus[i];
1163
1164                 *phys_domain = SD_CPU_INIT;
1165                 phys_domain->span = cpu_possible_map;
1166                 phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
1167         }
1168
1169         /* Set up CPU (sibling) groups */
1170         for_each_cpu(i) {
1171                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1172                 int j;
1173                 first = last = NULL;
1174
1175                 if (i != first_cpu(cpu_domain->span))
1176                         continue;
1177
1178                 for_each_cpu_mask(j, cpu_domain->span) {
1179                         struct sched_group *cpu = &sched_group_cpus[j];
1180
1181                         cpus_clear(cpu->cpumask);
1182                         cpu_set(j, cpu->cpumask);
1183                         cpu->cpu_power = SCHED_LOAD_SCALE;
1184
1185                         if (!first)
1186                                 first = cpu;
1187                         if (last)
1188                                 last->next = cpu;
1189                         last = cpu;
1190                 }
1191                 last->next = first;
1192         }
1193
1194         first = last = NULL;
1195         /* Set up physical groups */
1196         for_each_cpu(i) {
1197                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1198                 struct sched_group *cpu = &sched_group_phys[i];
1199
1200                 if (i != first_cpu(cpu_domain->span))
1201                         continue;
1202
1203                 cpu->cpumask = cpu_domain->span;
1204                 /* See SMT+NUMA setup for comment */
1205                 cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
1206
1207                 if (!first)
1208                         first = cpu;
1209                 if (last)
1210                         last->next = cpu;
1211                 last = cpu;
1212         }
1213         last->next = first;
1214
1215         mb();
1216         for_each_cpu(i) {
1217                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1218                 cpu_attach_domain(cpu_domain, i);
1219         }
1220 }
1221 #endif /* CONFIG_NUMA */
1222 #endif /* CONFIG_SCHED_SMT */