This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / ppc64 / kernel / pSeries_smp.c
1 /*
2  * SMP support for pSeries machines.
3  *
4  * Dave Engebretsen, Peter Bergner, and
5  * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
6  *
7  * Plus various changes from other IBM teams...
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 #undef DEBUG
16
17 #include <linux/config.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22 #include <linux/smp_lock.h>
23 #include <linux/interrupt.h>
24 #include <linux/kernel_stat.h>
25 #include <linux/delay.h>
26 #include <linux/init.h>
27 #include <linux/spinlock.h>
28 #include <linux/cache.h>
29 #include <linux/err.h>
30 #include <linux/sysdev.h>
31 #include <linux/cpu.h>
32
33 #include <asm/ptrace.h>
34 #include <asm/atomic.h>
35 #include <asm/irq.h>
36 #include <asm/page.h>
37 #include <asm/pgtable.h>
38 #include <asm/io.h>
39 #include <asm/prom.h>
40 #include <asm/smp.h>
41 #include <asm/naca.h>
42 #include <asm/paca.h>
43 #include <asm/time.h>
44 #include <asm/ppcdebug.h>
45 #include <asm/machdep.h>
46 #include <asm/xics.h>
47 #include <asm/cputable.h>
48 #include <asm/system.h>
49 #include <asm/rtas.h>
50 #include <asm/plpar_wrappers.h>
51
52 #include "mpic.h"
53
54 #ifdef DEBUG
55 #define DBG(fmt...) udbg_printf(fmt)
56 #else
57 #define DBG(fmt...)
58 #endif
59
60 extern void pseries_secondary_smp_init(unsigned long); 
61
62 /* Get state of physical CPU.
63  * Return codes:
64  *      0       - The processor is in the RTAS stopped state
65  *      1       - stop-self is in progress
66  *      2       - The processor is not in the RTAS stopped state
67  *      -1      - Hardware Error
68  *      -2      - Hardware Busy, Try again later.
69  */
70 static int query_cpu_stopped(unsigned int pcpu)
71 {
72         int cpu_status;
73         int status, qcss_tok;
74
75         qcss_tok = rtas_token("query-cpu-stopped-state");
76         if (qcss_tok == RTAS_UNKNOWN_SERVICE)
77                 return -1;
78         status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
79         if (status != 0) {
80                 printk(KERN_ERR
81                        "RTAS query-cpu-stopped-state failed: %i\n", status);
82                 return status;
83         }
84
85         return cpu_status;
86 }
87
88
89 #ifdef CONFIG_HOTPLUG_CPU
90
91 int __cpu_disable(void)
92 {
93         /* FIXME: go put this in a header somewhere */
94         extern void xics_migrate_irqs_away(void);
95
96         systemcfg->processorCount--;
97
98         /*fix boot_cpuid here*/
99         if (smp_processor_id() == boot_cpuid)
100                 boot_cpuid = any_online_cpu(cpu_online_map);
101
102         /* FIXME: abstract this to not be platform specific later on */
103         xics_migrate_irqs_away();
104         return 0;
105 }
106
107 void __cpu_die(unsigned int cpu)
108 {
109         int tries;
110         int cpu_status;
111         unsigned int pcpu = get_hard_smp_processor_id(cpu);
112
113         for (tries = 0; tries < 25; tries++) {
114                 cpu_status = query_cpu_stopped(pcpu);
115                 if (cpu_status == 0 || cpu_status == -1)
116                         break;
117                 set_current_state(TASK_UNINTERRUPTIBLE);
118                 schedule_timeout(HZ/5);
119         }
120         if (cpu_status != 0) {
121                 printk("Querying DEAD? cpu %i (%i) shows %i\n",
122                        cpu, pcpu, cpu_status);
123         }
124
125         /* Isolation and deallocation are definatly done by
126          * drslot_chrp_cpu.  If they were not they would be
127          * done here.  Change isolate state to Isolate and
128          * change allocation-state to Unusable.
129          */
130         paca[cpu].cpu_start = 0;
131 }
132
133 /* Search all cpu device nodes for an offline logical cpu.  If a
134  * device node has a "ibm,my-drc-index" property (meaning this is an
135  * LPAR), paranoid-check whether we own the cpu.  For each "thread"
136  * of a cpu, if it is offline and has the same hw index as before,
137  * grab that in preference.
138  */
139 static unsigned int find_physical_cpu_to_start(unsigned int old_hwindex)
140 {
141         struct device_node *np = NULL;
142         unsigned int best = -1U;
143
144         while ((np = of_find_node_by_type(np, "cpu"))) {
145                 int nr_threads, len;
146                 u32 *index = (u32 *)get_property(np, "ibm,my-drc-index", NULL);
147                 u32 *tid = (u32 *)
148                         get_property(np, "ibm,ppc-interrupt-server#s", &len);
149
150                 if (!tid)
151                         tid = (u32 *)get_property(np, "reg", &len);
152
153                 if (!tid)
154                         continue;
155
156                 /* If there is a drc-index, make sure that we own
157                  * the cpu.
158                  */
159                 if (index) {
160                         int state;
161                         int rc = rtas_get_sensor(9003, *index, &state);
162                         if (rc != 0 || state != 1)
163                                 continue;
164                 }
165
166                 nr_threads = len / sizeof(u32);
167
168                 while (nr_threads--) {
169                         if (0 == query_cpu_stopped(tid[nr_threads])) {
170                                 best = tid[nr_threads];
171                                 if (best == old_hwindex)
172                                         goto out;
173                         }
174                 }
175         }
176 out:
177         of_node_put(np);
178         return best;
179 }
180
181 /**
182  * smp_startup_cpu() - start the given cpu
183  *
184  * At boot time, there is nothing to do.  At run-time, call RTAS with
185  * the appropriate start location, if the cpu is in the RTAS stopped
186  * state.
187  *
188  * Returns:
189  *      0       - failure
190  *      1       - success
191  */
192 static inline int __devinit smp_startup_cpu(unsigned int lcpu)
193 {
194         int status;
195         unsigned long start_here = __pa((u32)*((unsigned long *)
196                                                pseries_secondary_smp_init));
197         unsigned int pcpu;
198
199         /* At boot time the cpus are already spinning in hold
200          * loops, so nothing to do. */
201         if (system_state < SYSTEM_RUNNING)
202                 return 1;
203
204         pcpu = find_physical_cpu_to_start(get_hard_smp_processor_id(lcpu));
205         if (pcpu == -1U) {
206                 printk(KERN_INFO "No more cpus available, failing\n");
207                 return 0;
208         }
209
210         /* Fixup atomic count: it exited inside IRQ handler. */
211         paca[lcpu].__current->thread_info->preempt_count        = 0;
212
213         /* At boot this is done in prom.c. */
214         paca[lcpu].hw_cpu_id = pcpu;
215
216         status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL,
217                            pcpu, start_here, lcpu);
218         if (status != 0) {
219                 printk(KERN_ERR "start-cpu failed: %i\n", status);
220                 return 0;
221         }
222         return 1;
223 }
224 #else /* ... CONFIG_HOTPLUG_CPU */
225 static inline int __devinit smp_startup_cpu(unsigned int lcpu)
226 {
227         return 1;
228 }
229 #endif /* CONFIG_HOTPLUG_CPU */
230
231 static inline void smp_xics_do_message(int cpu, int msg)
232 {
233         set_bit(msg, &xics_ipi_message[cpu].value);
234         mb();
235         xics_cause_IPI(cpu);
236 }
237
238 static void smp_xics_message_pass(int target, int msg)
239 {
240         unsigned int i;
241
242         if (target < NR_CPUS) {
243                 smp_xics_do_message(target, msg);
244         } else {
245                 for_each_online_cpu(i) {
246                         if (target == MSG_ALL_BUT_SELF
247                             && i == smp_processor_id())
248                                 continue;
249                         smp_xics_do_message(i, msg);
250                 }
251         }
252 }
253
254 extern void xics_request_IPIs(void);
255
256 static int __init smp_xics_probe(void)
257 {
258         xics_request_IPIs();
259
260         return cpus_weight(cpu_possible_map);
261 }
262
263 static void __devinit smp_xics_setup_cpu(int cpu)
264 {
265         if (cpu != boot_cpuid)
266                 xics_setup_cpu();
267 }
268
269 static spinlock_t timebase_lock = SPIN_LOCK_UNLOCKED;
270 static unsigned long timebase = 0;
271
272 static void __devinit pSeries_give_timebase(void)
273 {
274         spin_lock(&timebase_lock);
275         rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
276         timebase = get_tb();
277         spin_unlock(&timebase_lock);
278
279         while (timebase)
280                 barrier();
281         rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
282 }
283
284 static void __devinit pSeries_take_timebase(void)
285 {
286         while (!timebase)
287                 barrier();
288         spin_lock(&timebase_lock);
289         set_tb(timebase >> 32, timebase & 0xffffffff);
290         timebase = 0;
291         spin_unlock(&timebase_lock);
292 }
293
294 static void __devinit pSeries_late_setup_cpu(int cpu)
295 {
296         extern unsigned int default_distrib_server;
297
298         if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
299                 vpa_init(cpu); 
300         }
301
302 #ifdef CONFIG_IRQ_ALL_CPUS
303         /* Put the calling processor into the GIQ.  This is really only
304          * necessary from a secondary thread as the OF start-cpu interface
305          * performs this function for us on primary threads.
306          */
307         /* TODO: 9005 is #defined in rtas-proc.c -- move to a header */
308         rtas_set_indicator(9005, default_distrib_server, 1);
309 #endif
310 }
311
312
313 void __devinit smp_pSeries_kick_cpu(int nr)
314 {
315         BUG_ON(nr < 0 || nr >= NR_CPUS);
316
317         if (!smp_startup_cpu(nr))
318                 return;
319
320         /*
321          * The processor is currently spinning, waiting for the
322          * cpu_start field to become non-zero After we set cpu_start,
323          * the processor will continue on to secondary_start
324          */
325         paca[nr].cpu_start = 1;
326 }
327
328 static struct smp_ops_t pSeries_mpic_smp_ops = {
329         .message_pass   = smp_mpic_message_pass,
330         .probe          = smp_mpic_probe,
331         .kick_cpu       = smp_pSeries_kick_cpu,
332         .setup_cpu      = smp_mpic_setup_cpu,
333         .late_setup_cpu = pSeries_late_setup_cpu,
334 };
335
336 static struct smp_ops_t pSeries_xics_smp_ops = {
337         .message_pass   = smp_xics_message_pass,
338         .probe          = smp_xics_probe,
339         .kick_cpu       = smp_pSeries_kick_cpu,
340         .setup_cpu      = smp_xics_setup_cpu,
341         .late_setup_cpu = pSeries_late_setup_cpu,
342 };
343
344 /* This is called very early */
345 void __init smp_init_pSeries(void)
346 {
347         int ret, i;
348
349         DBG(" -> smp_init_pSeries()\n");
350
351         if (naca->interrupt_controller == IC_OPEN_PIC)
352                 smp_ops = &pSeries_mpic_smp_ops;
353         else
354                 smp_ops = &pSeries_xics_smp_ops;
355
356         /* Start secondary threads on SMT systems; primary threads
357          * are already in the running state.
358          */
359         for_each_present_cpu(i) {
360                 if (query_cpu_stopped(get_hard_smp_processor_id(i)) == 0) {
361                         printk("%16.16x : starting thread\n", i);
362                         DBG("%16.16x : starting thread\n", i);
363                         rtas_call(rtas_token("start-cpu"), 3, 1, &ret,
364                                   get_hard_smp_processor_id(i),
365                                   __pa((u32)*((unsigned long *)
366                                               pseries_secondary_smp_init)),
367                                   i);
368                 }
369         }
370
371         if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
372                 vpa_init(boot_cpuid);
373
374         /* Non-lpar has additional take/give timebase */
375         if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
376                 smp_ops->give_timebase = pSeries_give_timebase;
377                 smp_ops->take_timebase = pSeries_take_timebase;
378         }
379
380         DBG(" <- smp_init_pSeries()\n");
381 }
382