66243751439fc0fd9dcd5c155d9df452900f2e7b
[linux-2.6.git] / arch / ppc64 / kernel / smp.c
1 /*
2  * SMP support for ppc.
3  *
4  * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
5  * deal of code from the sparc and intel versions.
6  *
7  * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
8  *
9  * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
10  * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
11  *
12  *      This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/sched.h>
22 #include <linux/smp.h>
23 #include <linux/smp_lock.h>
24 #include <linux/interrupt.h>
25 #include <linux/kernel_stat.h>
26 #include <linux/delay.h>
27 #include <linux/init.h>
28 #include <linux/spinlock.h>
29 #include <linux/cache.h>
30 #include <linux/err.h>
31 #include <linux/sysdev.h>
32 #include <linux/cpu.h>
33
34 #include <asm/ptrace.h>
35 #include <asm/atomic.h>
36 #include <asm/irq.h>
37 #include <asm/page.h>
38 #include <asm/pgtable.h>
39 #include <asm/hardirq.h>
40 #include <asm/io.h>
41 #include <asm/prom.h>
42 #include <asm/smp.h>
43 #include <asm/naca.h>
44 #include <asm/paca.h>
45 #include <asm/iSeries/LparData.h>
46 #include <asm/iSeries/HvCall.h>
47 #include <asm/iSeries/HvCallCfg.h>
48 #include <asm/time.h>
49 #include <asm/ppcdebug.h>
50 #include "open_pic.h"
51 #include <asm/machdep.h>
52 #include <asm/xics.h>
53 #include <asm/cputable.h>
54 #include <asm/system.h>
55
56 int smp_threads_ready;
57 unsigned long cache_decay_ticks;
58
59 cpumask_t cpu_possible_map = CPU_MASK_NONE;
60 cpumask_t cpu_online_map = CPU_MASK_NONE;
61 cpumask_t cpu_available_map = CPU_MASK_NONE;
62 cpumask_t cpu_present_at_boot = CPU_MASK_NONE;
63
64 EXPORT_SYMBOL(cpu_online_map);
65 EXPORT_SYMBOL(cpu_possible_map);
66
67 struct smp_ops_t *smp_ops;
68
69 static volatile unsigned int cpu_callin_map[NR_CPUS];
70
71 extern unsigned char stab_array[];
72
73 extern int cpu_idle(void *unused);
74 void smp_call_function_interrupt(void);
75 extern long register_vpa(unsigned long flags, unsigned long proc,
76                          unsigned long vpa);
77
78 /* Low level assembly function used to backup CPU 0 state */
79 extern void __save_cpu_setup(void);
80
81 #ifdef CONFIG_PPC_ISERIES
82 static unsigned long iSeries_smp_message[NR_CPUS];
83
84 void iSeries_smp_message_recv( struct pt_regs * regs )
85 {
86         int cpu = smp_processor_id();
87         int msg;
88
89         if ( num_online_cpus() < 2 )
90                 return;
91
92         for ( msg = 0; msg < 4; ++msg )
93                 if ( test_and_clear_bit( msg, &iSeries_smp_message[cpu] ) )
94                         smp_message_recv( msg, regs );
95 }
96
97 static inline void smp_iSeries_do_message(int cpu, int msg)
98 {
99         set_bit(msg, &iSeries_smp_message[cpu]);
100         HvCall_sendIPI(&(paca[cpu]));
101 }
102
103 static void smp_iSeries_message_pass(int target, int msg)
104 {
105         int i;
106
107         if (target < NR_CPUS)
108                 smp_iSeries_do_message(target, msg);
109         else {
110                 for_each_online_cpu(i) {
111                         if (target == MSG_ALL_BUT_SELF
112                             && i == smp_processor_id())
113                                 continue;
114                         smp_iSeries_do_message(i, msg);
115                 }
116         }
117 }
118
119 static int smp_iSeries_numProcs(void)
120 {
121         unsigned np, i;
122         struct ItLpPaca * lpPaca;
123
124         np = 0;
125         for (i=0; i < NR_CPUS; ++i) {
126                 lpPaca = paca[i].xLpPacaPtr;
127                 if ( lpPaca->xDynProcStatus < 2 ) {
128                         cpu_set(i, cpu_available_map);
129                         cpu_set(i, cpu_possible_map);
130                         cpu_set(i, cpu_present_at_boot);
131                         ++np;
132                 }
133         }
134         return np;
135 }
136
137 static int smp_iSeries_probe(void)
138 {
139         unsigned i;
140         unsigned np = 0;
141         struct ItLpPaca *lpPaca;
142
143         for (i=0; i < NR_CPUS; ++i) {
144                 lpPaca = paca[i].xLpPacaPtr;
145                 if (lpPaca->xDynProcStatus < 2) {
146                         /*paca[i].active = 1;*/
147                         ++np;
148                 }
149         }
150
151         return np;
152 }
153
154 static void smp_iSeries_kick_cpu(int nr)
155 {
156         struct ItLpPaca *lpPaca;
157
158         BUG_ON(nr < 0 || nr >= NR_CPUS);
159
160         /* Verify that our partition has a processor nr */
161         lpPaca = paca[nr].xLpPacaPtr;
162         if (lpPaca->xDynProcStatus >= 2)
163                 return;
164
165         /* The processor is currently spinning, waiting
166          * for the xProcStart field to become non-zero
167          * After we set xProcStart, the processor will
168          * continue on to secondary_start in iSeries_head.S
169          */
170         paca[nr].xProcStart = 1;
171 }
172
173 static void __devinit smp_iSeries_setup_cpu(int nr)
174 {
175 }
176
177 static struct smp_ops_t iSeries_smp_ops = {
178         .message_pass = smp_iSeries_message_pass,
179         .probe        = smp_iSeries_probe,
180         .kick_cpu     = smp_iSeries_kick_cpu,
181         .setup_cpu    = smp_iSeries_setup_cpu,
182 };
183
184 /* This is called very early. */
185 void __init smp_init_iSeries(void)
186 {
187         smp_ops = &iSeries_smp_ops;
188         systemcfg->processorCount       = smp_iSeries_numProcs();
189 }
190 #endif
191
192 #ifdef CONFIG_PPC_PSERIES
193 void smp_openpic_message_pass(int target, int msg)
194 {
195         /* make sure we're sending something that translates to an IPI */
196         if ( msg > 0x3 ){
197                 printk("SMP %d: smp_message_pass: unknown msg %d\n",
198                        smp_processor_id(), msg);
199                 return;
200         }
201         switch ( target )
202         {
203         case MSG_ALL:
204                 openpic_cause_IPI(msg, 0xffffffff);
205                 break;
206         case MSG_ALL_BUT_SELF:
207                 openpic_cause_IPI(msg,
208                                   0xffffffff & ~(1 << smp_processor_id()));
209                 break;
210         default:
211                 openpic_cause_IPI(msg, 1<<target);
212                 break;
213         }
214 }
215
216 static int __init smp_openpic_probe(void)
217 {
218         int nr_cpus;
219
220         nr_cpus = cpus_weight(cpu_possible_map);
221
222         if (nr_cpus > 1)
223                 openpic_request_IPIs();
224
225         return nr_cpus;
226 }
227
228 static void __devinit smp_openpic_setup_cpu(int cpu)
229 {
230         do_openpic_setup_cpu();
231 }
232
233 #ifdef CONFIG_HOTPLUG_CPU
234 /* Get state of physical CPU.
235  * Return codes:
236  *      0       - The processor is in the RTAS stopped state
237  *      1       - stop-self is in progress
238  *      2       - The processor is not in the RTAS stopped state
239  *      -1      - Hardware Error
240  *      -2      - Hardware Busy, Try again later.
241  */
242 static int query_cpu_stopped(unsigned int pcpu)
243 {
244         long cpu_status;
245         int status, qcss_tok;
246
247         qcss_tok = rtas_token("query-cpu-stopped-state");
248         BUG_ON(qcss_tok == RTAS_UNKNOWN_SERVICE);
249         status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
250         if (status != 0) {
251                 printk(KERN_ERR
252                        "RTAS query-cpu-stopped-state failed: %i\n", status);
253                 return status;
254         }
255
256         return cpu_status;
257 }
258
259 int __cpu_disable(void)
260 {
261         /* FIXME: go put this in a header somewhere */
262         extern void xics_migrate_irqs_away(void);
263
264         systemcfg->processorCount--;
265
266         /*fix boot_cpuid here*/
267         if (smp_processor_id() == boot_cpuid)
268                 boot_cpuid = any_online_cpu(cpu_online_map);
269
270         /* FIXME: abstract this to not be platform specific later on */
271         xics_migrate_irqs_away();
272         return 0;
273 }
274
275 void __cpu_die(unsigned int cpu)
276 {
277         int tries;
278         int cpu_status;
279         unsigned int pcpu = get_hard_smp_processor_id(cpu);
280
281         for (tries = 0; tries < 5; tries++) {
282                 cpu_status = query_cpu_stopped(pcpu);
283
284                 if (cpu_status == 0)
285                         break;
286                 set_current_state(TASK_UNINTERRUPTIBLE);
287                 schedule_timeout(HZ);
288         }
289         if (cpu_status != 0) {
290                 printk("Querying DEAD? cpu %i (%i) shows %i\n",
291                        cpu, pcpu, cpu_status);
292         }
293
294         /* Isolation and deallocation are definatly done by
295          * drslot_chrp_cpu.  If they were not they would be
296          * done here.  Change isolate state to Isolate and
297          * change allocation-state to Unusable.
298          */
299         paca[cpu].xProcStart = 0;
300
301         /* So we can recognize if it fails to come up next time. */
302         cpu_callin_map[cpu] = 0;
303 }
304
305 /* Kill this cpu */
306 void cpu_die(void)
307 {
308         local_irq_disable();
309         rtas_stop_self();
310         /* Should never get here... */
311         BUG();
312         for(;;);
313 }
314
315 /* Search all cpu device nodes for an offline logical cpu.  If a
316  * device node has a "ibm,my-drc-index" property (meaning this is an
317  * LPAR), paranoid-check whether we own the cpu.  For each "thread"
318  * of a cpu, if it is offline and has the same hw index as before,
319  * grab that in preference.
320  */
321 static unsigned int find_physical_cpu_to_start(unsigned int old_hwindex)
322 {
323         struct device_node *np = NULL;
324         unsigned int best = -1U;
325
326         while ((np = of_find_node_by_type(np, "cpu"))) {
327                 int nr_threads, len;
328                 u32 *index = (u32 *)get_property(np, "ibm,my-drc-index", NULL);
329                 u32 *tid = (u32 *)
330                         get_property(np, "ibm,ppc-interrupt-server#s", &len);
331
332                 if (!tid)
333                         tid = (u32 *)get_property(np, "reg", &len);
334
335                 if (!tid)
336                         continue;
337
338                 /* If there is a drc-index, make sure that we own
339                  * the cpu.
340                  */
341                 if (index) {
342                         int state;
343                         int rc = rtas_get_sensor(9003, *index, &state);
344                         if (rc != 0 || state != 1)
345                                 continue;
346                 }
347
348                 nr_threads = len / sizeof(u32);
349
350                 while (nr_threads--) {
351                         if (0 == query_cpu_stopped(tid[nr_threads])) {
352                                 best = tid[nr_threads];
353                                 if (best == old_hwindex)
354                                         goto out;
355                         }
356                 }
357         }
358 out:
359         of_node_put(np);
360         return best;
361 }
362
363 /**
364  * smp_startup_cpu() - start the given cpu
365  *
366  * At boot time, there is nothing to do.  At run-time, call RTAS with
367  * the appropriate start location, if the cpu is in the RTAS stopped
368  * state.
369  *
370  * Returns:
371  *      0       - failure
372  *      1       - success
373  */
374 static inline int __devinit smp_startup_cpu(unsigned int lcpu)
375 {
376         int status;
377         extern void (*pseries_secondary_smp_init)(unsigned int cpu);
378         unsigned long start_here = __pa(pseries_secondary_smp_init);
379         unsigned int pcpu;
380
381         /* At boot time the cpus are already spinning in hold
382          * loops, so nothing to do. */
383         if (system_state == SYSTEM_BOOTING)
384                 return 1;
385
386         pcpu = find_physical_cpu_to_start(get_hard_smp_processor_id(lcpu));
387         if (pcpu == -1U) {
388                 printk(KERN_INFO "No more cpus available, failing\n");
389                 return 0;
390         }
391
392         /* Fixup atomic count: it exited inside IRQ handler. */
393         paca[lcpu].xCurrent->thread_info->preempt_count = 0;
394         /* Fixup SLB round-robin so next segment (kernel) goes in segment 0 */
395         paca[lcpu].xStab_data.next_round_robin = 0;
396
397         /* At boot this is done in prom.c. */
398         paca[lcpu].xHwProcNum = pcpu;
399
400         status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL,
401                            pcpu, start_here, lcpu);
402         if (status != 0) {
403                 printk(KERN_ERR "start-cpu failed: %i\n", status);
404                 return 0;
405         }
406         return 1;
407 }
408
409 static inline void look_for_more_cpus(void)
410 {
411         int num_addr_cell, num_size_cell, len, i, maxcpus;
412         struct device_node *np;
413         unsigned int *ireg;
414
415         /* Find the property which will tell us about how many CPUs
416          * we're allowed to have. */
417         if ((np = find_path_device("/rtas")) == NULL) {
418                 printk(KERN_ERR "Could not find /rtas in device tree!");
419                 return;
420         }
421         num_addr_cell = prom_n_addr_cells(np);
422         num_size_cell = prom_n_size_cells(np);
423
424         ireg = (unsigned int *)get_property(np, "ibm,lrdr-capacity", &len);
425         if (ireg == NULL) {
426                 /* FIXME: make sure not marked as lrdr_capable() */
427                 return;
428         }
429
430         maxcpus = ireg[num_addr_cell + num_size_cell];
431         /* DRENG need to account for threads here too */
432
433         if (maxcpus > NR_CPUS) {
434                 printk(KERN_WARNING
435                        "Partition configured for %d cpus, "
436                        "operating system maximum is %d.\n", maxcpus, NR_CPUS);
437                 maxcpus = NR_CPUS;
438         } else
439                 printk(KERN_INFO "Partition configured for %d cpus.\n",
440                        maxcpus);
441
442         /* Make those cpus (which might appear later) possible too. */
443         for (i = 0; i < maxcpus; i++)
444                 cpu_set(i, cpu_possible_map);
445 }
446 #else /* ... CONFIG_HOTPLUG_CPU */
447 static inline int __devinit smp_startup_cpu(unsigned int lcpu)
448 {
449         return 1;
450 }
451 static inline void look_for_more_cpus(void)
452 {
453 }
454 #endif /* CONFIG_HOTPLUG_CPU */
455
456 static void smp_pSeries_kick_cpu(int nr)
457 {
458         BUG_ON(nr < 0 || nr >= NR_CPUS);
459
460         if (!smp_startup_cpu(nr))
461                 return;
462
463         /* The processor is currently spinning, waiting
464          * for the xProcStart field to become non-zero
465          * After we set xProcStart, the processor will
466          * continue on to secondary_start
467          */
468         paca[nr].xProcStart = 1;
469 }
470 #endif /* CONFIG_PPC_PSERIES */
471
472 static void __init smp_space_timers(unsigned int max_cpus)
473 {
474         int i;
475         unsigned long offset = tb_ticks_per_jiffy / max_cpus;
476         unsigned long previous_tb = paca[boot_cpuid].next_jiffy_update_tb;
477
478         for_each_cpu(i) {
479                 if (i != boot_cpuid) {
480                         paca[i].next_jiffy_update_tb =
481                                 previous_tb + offset;
482                         previous_tb = paca[i].next_jiffy_update_tb;
483                 }
484         }
485 }
486
487 #ifdef CONFIG_PPC_PSERIES
488 void vpa_init(int cpu)
489 {
490         unsigned long flags;
491
492         /* Register the Virtual Processor Area (VPA) */
493         printk(KERN_INFO "register_vpa: cpu 0x%x\n", cpu);
494         flags = 1UL << (63 - 18);
495         paca[cpu].xLpPaca.xSLBCount = 64; /* SLB restore highwater mark */
496         register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].xLpPaca))); 
497 }
498
499 static inline void smp_xics_do_message(int cpu, int msg)
500 {
501         set_bit(msg, &xics_ipi_message[cpu].value);
502         mb();
503         xics_cause_IPI(cpu);
504 }
505
506 static void smp_xics_message_pass(int target, int msg)
507 {
508         unsigned int i;
509
510         if (target < NR_CPUS) {
511                 smp_xics_do_message(target, msg);
512         } else {
513                 for_each_online_cpu(i) {
514                         if (target == MSG_ALL_BUT_SELF
515                             && i == smp_processor_id())
516                                 continue;
517                         smp_xics_do_message(i, msg);
518                 }
519         }
520 }
521
522 extern void xics_request_IPIs(void);
523
524 static int __init smp_xics_probe(void)
525 {
526 #ifdef CONFIG_SMP
527         xics_request_IPIs();
528 #endif
529
530         return cpus_weight(cpu_possible_map);
531 }
532
533 static void __devinit smp_xics_setup_cpu(int cpu)
534 {
535         if (cpu != boot_cpuid)
536                 xics_setup_cpu();
537 }
538
539 static spinlock_t timebase_lock = SPIN_LOCK_UNLOCKED;
540 static unsigned long timebase = 0;
541
542 static void __devinit pSeries_give_timebase(void)
543 {
544         spin_lock(&timebase_lock);
545         rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
546         timebase = get_tb();
547         spin_unlock(&timebase_lock);
548
549         while (timebase)
550                 barrier();
551         rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
552 }
553
554 static void __devinit pSeries_take_timebase(void)
555 {
556         while (!timebase)
557                 barrier();
558         spin_lock(&timebase_lock);
559         set_tb(timebase >> 32, timebase & 0xffffffff);
560         timebase = 0;
561         spin_unlock(&timebase_lock);
562 }
563
564 static struct smp_ops_t pSeries_openpic_smp_ops = {
565         .message_pass   = smp_openpic_message_pass,
566         .probe          = smp_openpic_probe,
567         .kick_cpu       = smp_pSeries_kick_cpu,
568         .setup_cpu      = smp_openpic_setup_cpu,
569 };
570
571 static struct smp_ops_t pSeries_xics_smp_ops = {
572         .message_pass   = smp_xics_message_pass,
573         .probe          = smp_xics_probe,
574         .kick_cpu       = smp_pSeries_kick_cpu,
575         .setup_cpu      = smp_xics_setup_cpu,
576 };
577
578 /* This is called very early */
579 void __init smp_init_pSeries(void)
580 {
581
582         if (naca->interrupt_controller == IC_OPEN_PIC)
583                 smp_ops = &pSeries_openpic_smp_ops;
584         else
585                 smp_ops = &pSeries_xics_smp_ops;
586
587         /* Non-lpar has additional take/give timebase */
588         if (systemcfg->platform == PLATFORM_PSERIES) {
589                 smp_ops->give_timebase = pSeries_give_timebase;
590                 smp_ops->take_timebase = pSeries_take_timebase;
591         }
592 }
593 #endif
594
595 void smp_local_timer_interrupt(struct pt_regs * regs)
596 {
597         if (!--(get_paca()->prof_counter)) {
598                 update_process_times(user_mode(regs));
599                 (get_paca()->prof_counter)=get_paca()->prof_multiplier;
600         }
601 }
602
603 void smp_message_recv(int msg, struct pt_regs *regs)
604 {
605         switch(msg) {
606         case PPC_MSG_CALL_FUNCTION:
607                 smp_call_function_interrupt();
608                 break;
609         case PPC_MSG_RESCHEDULE: 
610                 /* XXX Do we have to do this? */
611                 set_need_resched();
612                 break;
613 #if 0
614         case PPC_MSG_MIGRATE_TASK:
615                 /* spare */
616                 break;
617 #endif
618 #ifdef CONFIG_DEBUGGER
619         case PPC_MSG_DEBUGGER_BREAK:
620                 debugger_ipi(regs);
621                 break;
622 #endif
623         default:
624                 printk("SMP %d: smp_message_recv(): unknown msg %d\n",
625                        smp_processor_id(), msg);
626                 break;
627         }
628 }
629
630 void smp_send_reschedule(int cpu)
631 {
632         smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
633 }
634
635 #ifdef CONFIG_DEBUGGER
636 void smp_send_debugger_break(int cpu)
637 {
638         smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
639 }
640 #endif
641
642 static void stop_this_cpu(void *dummy)
643 {
644         local_irq_disable();
645         while (1)
646                 ;
647 }
648
649 void smp_send_stop(void)
650 {
651         smp_call_function(stop_this_cpu, NULL, 1, 0);
652 }
653
654 /*
655  * Structure and data for smp_call_function(). This is designed to minimise
656  * static memory requirements. It also looks cleaner.
657  * Stolen from the i386 version.
658  */
659 static spinlock_t call_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
660
661 static struct call_data_struct {
662         void (*func) (void *info);
663         void *info;
664         atomic_t started;
665         atomic_t finished;
666         int wait;
667 } *call_data;
668
669 /* delay of at least 8 seconds on 1GHz cpu */
670 #define SMP_CALL_TIMEOUT (1UL << (30 + 3))
671
672 /*
673  * This function sends a 'generic call function' IPI to all other CPUs
674  * in the system.
675  *
676  * [SUMMARY] Run a function on all other CPUs.
677  * <func> The function to run. This must be fast and non-blocking.
678  * <info> An arbitrary pointer to pass to the function.
679  * <nonatomic> currently unused.
680  * <wait> If true, wait (atomically) until function has completed on other CPUs.
681  * [RETURNS] 0 on success, else a negative status code. Does not return until
682  * remote CPUs are nearly ready to execute <<func>> or are or have executed.
683  *
684  * You must not call this function with disabled interrupts or from a
685  * hardware interrupt handler or from a bottom half handler.
686  */
687 int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
688                        int wait)
689
690         struct call_data_struct data;
691         int ret = -1, cpus;
692         unsigned long timeout;
693
694         /* Can deadlock when called with interrupts disabled */
695         WARN_ON(irqs_disabled());
696
697         data.func = func;
698         data.info = info;
699         atomic_set(&data.started, 0);
700         data.wait = wait;
701         if (wait)
702                 atomic_set(&data.finished, 0);
703
704         spin_lock(&call_lock);
705         /* Must grab online cpu count with preempt disabled, otherwise
706          * it can change. */
707         cpus = num_online_cpus() - 1;
708         if (!cpus) {
709                 ret = 0;
710                 goto out;
711         }
712
713         call_data = &data;
714         wmb();
715         /* Send a message to all other CPUs and wait for them to respond */
716         smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION);
717
718         /* Wait for response */
719         timeout = SMP_CALL_TIMEOUT;
720         while (atomic_read(&data.started) != cpus) {
721                 HMT_low();
722                 if (--timeout == 0) {
723                         printk("smp_call_function on cpu %d: other cpus not "
724                                "responding (%d)\n", smp_processor_id(),
725                                atomic_read(&data.started));
726                         debugger(0);
727                         goto out;
728                 }
729         }
730
731         if (wait) {
732                 timeout = SMP_CALL_TIMEOUT;
733                 while (atomic_read(&data.finished) != cpus) {
734                         HMT_low();
735                         if (--timeout == 0) {
736                                 printk("smp_call_function on cpu %d: other "
737                                        "cpus not finishing (%d/%d)\n",
738                                        smp_processor_id(),
739                                        atomic_read(&data.finished),
740                                        atomic_read(&data.started));
741                                 debugger(0);
742                                 goto out;
743                         }
744                 }
745         }
746
747         ret = 0;
748
749 out:
750         call_data = NULL;
751         HMT_medium();
752         spin_unlock(&call_lock);
753         return ret;
754 }
755
756 EXPORT_SYMBOL_GPL(smp_call_function);
757
758 void smp_call_function_interrupt(void)
759 {
760         void (*func) (void *info);
761         void *info;
762         int wait;
763
764         /* call_data will be NULL if the sender timed out while
765          * waiting on us to receive the call.
766          */
767         if (!call_data)
768                 return;
769
770         func = call_data->func;
771         info = call_data->info;
772         wait = call_data->wait;
773
774         if (!wait)
775                 smp_mb__before_atomic_inc();
776
777         /*
778          * Notify initiating CPU that I've grabbed the data and am
779          * about to execute the function
780          */
781         atomic_inc(&call_data->started);
782         /*
783          * At this point the info structure may be out of scope unless wait==1
784          */
785         (*func)(info);
786         if (wait) {
787                 smp_mb__before_atomic_inc();
788                 atomic_inc(&call_data->finished);
789         }
790 }
791
792 extern unsigned long decr_overclock;
793 extern struct gettimeofday_struct do_gtod;
794
795 struct thread_info *current_set[NR_CPUS];
796
797 DECLARE_PER_CPU(unsigned int, pvr);
798
799 static void __devinit smp_store_cpu_info(int id)
800 {
801         per_cpu(pvr, id) = _get_PVR();
802 }
803
804 static void __init smp_create_idle(unsigned int cpu)
805 {
806         struct pt_regs regs;
807         struct task_struct *p;
808
809         /* create a process for the processor */
810         /* only regs.msr is actually used, and 0 is OK for it */
811         memset(&regs, 0, sizeof(struct pt_regs));
812         p = copy_process(CLONE_VM | CLONE_IDLETASK,
813                          0, &regs, 0, NULL, NULL);
814         if (IS_ERR(p))
815                 panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
816
817         wake_up_forked_process(p);
818         init_idle(p, cpu);
819         unhash_process(p);
820
821         paca[cpu].xCurrent = p;
822         current_set[cpu] = p->thread_info;
823 }
824
825 void __init smp_prepare_cpus(unsigned int max_cpus)
826 {
827         unsigned int cpu;
828
829         /* 
830          * setup_cpu may need to be called on the boot cpu. We havent
831          * spun any cpus up but lets be paranoid.
832          */
833         BUG_ON(boot_cpuid != smp_processor_id());
834
835         /* Fixup boot cpu */
836         smp_store_cpu_info(boot_cpuid);
837         cpu_callin_map[boot_cpuid] = 1;
838         paca[boot_cpuid].prof_counter = 1;
839         paca[boot_cpuid].prof_multiplier = 1;
840
841 #ifndef CONFIG_PPC_ISERIES
842         paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb();
843
844         /*
845          * Should update do_gtod.stamp_xsec.
846          * For now we leave it which means the time can be some
847          * number of msecs off until someone does a settimeofday()
848          */
849         do_gtod.tb_orig_stamp = tb_last_stamp;
850
851         look_for_more_cpus();
852 #endif
853
854         max_cpus = smp_ops->probe();
855  
856         /* Backup CPU 0 state if necessary */
857         __save_cpu_setup();
858
859         smp_space_timers(max_cpus);
860
861         for_each_cpu(cpu)
862                 if (cpu != boot_cpuid)
863                         smp_create_idle(cpu);
864 }
865
866 void __devinit smp_prepare_boot_cpu(void)
867 {
868         BUG_ON(smp_processor_id() != boot_cpuid);
869
870         /* cpu_possible is set up in prom.c */
871         cpu_set(boot_cpuid, cpu_online_map);
872
873         paca[boot_cpuid].xCurrent = current;
874         current_set[boot_cpuid] = current->thread_info;
875 }
876
877 int __devinit __cpu_up(unsigned int cpu)
878 {
879         int c;
880
881         /* At boot, don't bother with non-present cpus -JSCHOPP */
882         if (system_state == SYSTEM_BOOTING && !cpu_present_at_boot(cpu))
883                 return -ENOENT;
884
885         paca[cpu].prof_counter = 1;
886         paca[cpu].prof_multiplier = 1;
887         paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock;
888
889         if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) {
890                 void *tmp;
891
892                 /* maximum of 48 CPUs on machines with a segment table */
893                 if (cpu >= 48)
894                         BUG();
895
896                 tmp = &stab_array[PAGE_SIZE * cpu];
897                 memset(tmp, 0, PAGE_SIZE); 
898                 paca[cpu].xStab_data.virt = (unsigned long)tmp;
899                 paca[cpu].xStab_data.real = virt_to_abs(tmp);
900         }
901
902         /* The information for processor bringup must
903          * be written out to main store before we release
904          * the processor.
905          */
906         mb();
907
908         /* wake up cpus */
909         smp_ops->kick_cpu(cpu);
910
911         /*
912          * wait to see if the cpu made a callin (is actually up).
913          * use this value that I found through experimentation.
914          * -- Cort
915          */
916         if (system_state == SYSTEM_BOOTING)
917                 for (c = 5000; c && !cpu_callin_map[cpu]; c--)
918                         udelay(100);
919 #ifdef CONFIG_HOTPLUG_CPU
920         else
921                 /*
922                  * CPUs can take much longer to come up in the
923                  * hotplug case.  Wait five seconds.
924                  */
925                 for (c = 25; c && !cpu_callin_map[cpu]; c--) {
926                         set_current_state(TASK_UNINTERRUPTIBLE);
927                         schedule_timeout(HZ/5);
928                 }
929 #endif
930
931         if (!cpu_callin_map[cpu]) {
932                 printk("Processor %u is stuck.\n", cpu);
933                 return -ENOENT;
934         }
935
936         printk("Processor %u found.\n", cpu);
937
938         if (smp_ops->give_timebase)
939                 smp_ops->give_timebase();
940         cpu_set(cpu, cpu_online_map);
941         return 0;
942 }
943
944 extern unsigned int default_distrib_server;
945 /* Activate a secondary processor. */
946 int __devinit start_secondary(void *unused)
947 {
948         unsigned int cpu = smp_processor_id();
949
950         atomic_inc(&init_mm.mm_count);
951         current->active_mm = &init_mm;
952
953         smp_store_cpu_info(cpu);
954         set_dec(paca[cpu].default_decr);
955         cpu_callin_map[cpu] = 1;
956
957         smp_ops->setup_cpu(cpu);
958         if (smp_ops->take_timebase)
959                 smp_ops->take_timebase();
960
961         get_paca()->yielded = 0;
962
963 #ifdef CONFIG_PPC_PSERIES
964         if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
965                 vpa_init(cpu); 
966         }
967
968 #ifdef CONFIG_IRQ_ALL_CPUS
969         /* Put the calling processor into the GIQ.  This is really only
970          * necessary from a secondary thread as the OF start-cpu interface
971          * performs this function for us on primary threads.
972          */
973         /* TODO: 9005 is #defined in rtas-proc.c -- move to a header */
974         rtas_set_indicator(9005, default_distrib_server, 1);
975 #endif
976 #endif
977
978         local_irq_enable();
979
980         return cpu_idle(NULL);
981 }
982
983 int setup_profiling_timer(unsigned int multiplier)
984 {
985         return 0;
986 }
987
988 void __init smp_cpus_done(unsigned int max_cpus)
989 {
990         cpumask_t old_mask;
991
992         /* We want the setup_cpu() here to be called from CPU 0, but our
993          * init thread may have been "borrowed" by another CPU in the meantime
994          * se we pin us down to CPU 0 for a short while
995          */
996         old_mask = current->cpus_allowed;
997         set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
998         
999         smp_ops->setup_cpu(boot_cpuid);
1000
1001         /* XXX fix this, xics currently relies on it - Anton */
1002         smp_threads_ready = 1;
1003
1004         set_cpus_allowed(current, old_mask);
1005 }
1006
1007 #ifdef CONFIG_SCHED_SMT
1008 #ifdef CONFIG_NUMA
1009 static struct sched_group sched_group_cpus[NR_CPUS];
1010 static struct sched_group sched_group_phys[NR_CPUS];
1011 static struct sched_group sched_group_nodes[MAX_NUMNODES];
1012 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
1013 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
1014 static DEFINE_PER_CPU(struct sched_domain, node_domains);
1015 __init void arch_init_sched_domains(void)
1016 {
1017         int i;
1018         struct sched_group *first = NULL, *last = NULL;
1019
1020         /* Set up domains */
1021         for_each_cpu(i) {
1022                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1023                 struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
1024                 struct sched_domain *node_domain = &per_cpu(node_domains, i);
1025                 int node = cpu_to_node(i);
1026                 cpumask_t nodemask = node_to_cpumask(node);
1027                 cpumask_t my_cpumask = cpumask_of_cpu(i);
1028                 cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
1029
1030                 *cpu_domain = SD_SIBLING_INIT;
1031                 if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
1032                         cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
1033                 else
1034                         cpu_domain->span = my_cpumask;
1035                 cpu_domain->parent = phys_domain;
1036                 cpu_domain->groups = &sched_group_cpus[i];
1037
1038                 *phys_domain = SD_CPU_INIT;
1039                 phys_domain->span = nodemask;
1040                 phys_domain->parent = node_domain;
1041                 phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
1042
1043                 *node_domain = SD_NODE_INIT;
1044                 node_domain->span = cpu_possible_map;
1045                 node_domain->groups = &sched_group_nodes[node];
1046         }
1047
1048         /* Set up CPU (sibling) groups */
1049         for_each_cpu(i) {
1050                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1051                 int j;
1052                 first = last = NULL;
1053
1054                 if (i != first_cpu(cpu_domain->span))
1055                         continue;
1056
1057                 for_each_cpu_mask(j, cpu_domain->span) {
1058                         struct sched_group *cpu = &sched_group_cpus[j];
1059
1060                         cpus_clear(cpu->cpumask);
1061                         cpu_set(j, cpu->cpumask);
1062                         cpu->cpu_power = SCHED_LOAD_SCALE;
1063
1064                         if (!first)
1065                                 first = cpu;
1066                         if (last)
1067                                 last->next = cpu;
1068                         last = cpu;
1069                 }
1070                 last->next = first;
1071         }
1072
1073         for (i = 0; i < MAX_NUMNODES; i++) {
1074                 int j;
1075                 cpumask_t nodemask;
1076                 struct sched_group *node = &sched_group_nodes[i];
1077                 cpumask_t node_cpumask = node_to_cpumask(i);
1078                 cpus_and(nodemask, node_cpumask, cpu_possible_map);
1079
1080                 if (cpus_empty(nodemask))
1081                         continue;
1082
1083                 first = last = NULL;
1084                 /* Set up physical groups */
1085                 for_each_cpu_mask(j, nodemask) {
1086                         struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
1087                         struct sched_group *cpu = &sched_group_phys[j];
1088
1089                         if (j != first_cpu(cpu_domain->span))
1090                                 continue;
1091
1092                         cpu->cpumask = cpu_domain->span;
1093                         /*
1094                          * Make each extra sibling increase power by 10% of
1095                          * the basic CPU. This is very arbitrary.
1096                          */
1097                         cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
1098                         node->cpu_power += cpu->cpu_power;
1099
1100                         if (!first)
1101                                 first = cpu;
1102                         if (last)
1103                                 last->next = cpu;
1104                         last = cpu;
1105                 }
1106                 last->next = first;
1107         }
1108
1109         /* Set up nodes */
1110         first = last = NULL;
1111         for (i = 0; i < MAX_NUMNODES; i++) {
1112                 struct sched_group *cpu = &sched_group_nodes[i];
1113                 cpumask_t nodemask;
1114                 cpumask_t node_cpumask = node_to_cpumask(i);
1115                 cpus_and(nodemask, node_cpumask, cpu_possible_map);
1116
1117                 if (cpus_empty(nodemask))
1118                         continue;
1119
1120                 cpu->cpumask = nodemask;
1121                 /* ->cpu_power already setup */
1122
1123                 if (!first)
1124                         first = cpu;
1125                 if (last)
1126                         last->next = cpu;
1127                 last = cpu;
1128         }
1129         last->next = first;
1130
1131         mb();
1132         for_each_cpu(i) {
1133                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1134                 cpu_attach_domain(cpu_domain, i);
1135         }
1136 }
1137 #else /* !CONFIG_NUMA */
1138 static struct sched_group sched_group_cpus[NR_CPUS];
1139 static struct sched_group sched_group_phys[NR_CPUS];
1140 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
1141 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
1142 __init void arch_init_sched_domains(void)
1143 {
1144         int i;
1145         struct sched_group *first = NULL, *last = NULL;
1146
1147         /* Set up domains */
1148         for_each_cpu(i) {
1149                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1150                 struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
1151                 cpumask_t my_cpumask = cpumask_of_cpu(i);
1152                 cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
1153
1154                 *cpu_domain = SD_SIBLING_INIT;
1155                 if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
1156                         cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
1157                 else
1158                         cpu_domain->span = my_cpumask;
1159                 cpu_domain->parent = phys_domain;
1160                 cpu_domain->groups = &sched_group_cpus[i];
1161
1162                 *phys_domain = SD_CPU_INIT;
1163                 phys_domain->span = cpu_possible_map;
1164                 phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
1165         }
1166
1167         /* Set up CPU (sibling) groups */
1168         for_each_cpu(i) {
1169                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1170                 int j;
1171                 first = last = NULL;
1172
1173                 if (i != first_cpu(cpu_domain->span))
1174                         continue;
1175
1176                 for_each_cpu_mask(j, cpu_domain->span) {
1177                         struct sched_group *cpu = &sched_group_cpus[j];
1178
1179                         cpus_clear(cpu->cpumask);
1180                         cpu_set(j, cpu->cpumask);
1181                         cpu->cpu_power = SCHED_LOAD_SCALE;
1182
1183                         if (!first)
1184                                 first = cpu;
1185                         if (last)
1186                                 last->next = cpu;
1187                         last = cpu;
1188                 }
1189                 last->next = first;
1190         }
1191
1192         first = last = NULL;
1193         /* Set up physical groups */
1194         for_each_cpu(i) {
1195                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1196                 struct sched_group *cpu = &sched_group_phys[i];
1197
1198                 if (i != first_cpu(cpu_domain->span))
1199                         continue;
1200
1201                 cpu->cpumask = cpu_domain->span;
1202                 /* See SMT+NUMA setup for comment */
1203                 cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
1204
1205                 if (!first)
1206                         first = cpu;
1207                 if (last)
1208                         last->next = cpu;
1209                 last = cpu;
1210         }
1211         last->next = first;
1212
1213         mb();
1214         for_each_cpu(i) {
1215                 struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
1216                 cpu_attach_domain(cpu_domain, i);
1217         }
1218 }
1219 #endif /* CONFIG_NUMA */
1220 #endif /* CONFIG_SCHED_SMT */