This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / drivers / xen / core / smpboot.c
1 /*
2  *      Xen SMP booting functions
3  *
4  *      See arch/i386/kernel/smpboot.c for copyright and credits for derived
5  *      portions of this file.
6  */
7
8 #include <linux/module.h>
9 #include <linux/config.h>
10 #include <linux/init.h>
11 #include <linux/kernel.h>
12 #include <linux/mm.h>
13 #include <linux/sched.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/smp_lock.h>
16 #include <linux/irq.h>
17 #include <linux/bootmem.h>
18 #include <linux/notifier.h>
19 #include <linux/cpu.h>
20 #include <linux/percpu.h>
21 #include <asm/desc.h>
22 #include <asm/arch_hooks.h>
23 #include <asm/pgalloc.h>
24 #include <xen/evtchn.h>
25 #include <xen/interface/vcpu.h>
26 #include <xen/cpu_hotplug.h>
27 #include <xen/xenbus.h>
28
29 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
30 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
31
32 extern void local_setup_timer(unsigned int cpu);
33 extern void local_teardown_timer(unsigned int cpu);
34
35 extern void hypervisor_callback(void);
36 extern void failsafe_callback(void);
37 extern void system_call(void);
38 extern void smp_trap_init(trap_info_t *);
39
40 /* Number of siblings per CPU package */
41 int smp_num_siblings = 1;
42 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
43 EXPORT_SYMBOL(phys_proc_id);
44 int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
45 EXPORT_SYMBOL(cpu_core_id);
46 #if defined(__i386__)
47 int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
48 #elif defined(__x86_64__)
49 u8 cpu_llc_id[NR_CPUS] __cpuinitdata  = {[0 ... NR_CPUS-1] = BAD_APICID};
50 #endif
51
52 cpumask_t cpu_online_map;
53 EXPORT_SYMBOL(cpu_online_map);
54 cpumask_t cpu_possible_map;
55 EXPORT_SYMBOL(cpu_possible_map);
56
57 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
58 EXPORT_SYMBOL(cpu_data);
59
60 #ifdef CONFIG_HOTPLUG_CPU
61 DEFINE_PER_CPU(int, cpu_state) = { 0 };
62 #endif
63
64 static DEFINE_PER_CPU(int, resched_irq);
65 static DEFINE_PER_CPU(int, callfunc_irq);
66 static char resched_name[NR_CPUS][15];
67 static char callfunc_name[NR_CPUS][15];
68
69 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
70
71 void *xquad_portio;
72
73 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
74 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
75 EXPORT_SYMBOL(cpu_core_map);
76
77 #if defined(__i386__)
78 u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
79 EXPORT_SYMBOL(x86_cpu_to_apicid);
80 #elif !defined(CONFIG_X86_IO_APIC)
81 unsigned int maxcpus = NR_CPUS;
82 #endif
83
84 void __init prefill_possible_map(void)
85 {
86         int i, rc;
87
88         for (i = 0; i < NR_CPUS; i++) {
89                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
90                 if (rc >= 0)
91                         cpu_set(i, cpu_possible_map);
92         }
93 }
94
95 void __init smp_alloc_memory(void)
96 {
97 }
98
99 static inline void
100 set_cpu_sibling_map(int cpu)
101 {
102         phys_proc_id[cpu] = cpu;
103         cpu_core_id[cpu]  = 0;
104
105         cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
106         cpu_core_map[cpu]    = cpumask_of_cpu(cpu);
107
108         cpu_data[cpu].booted_cores = 1;
109 }
110
111 static void xen_smp_intr_init(unsigned int cpu)
112 {
113         sprintf(resched_name[cpu], "resched%d", cpu);
114         per_cpu(resched_irq, cpu) =
115                 bind_ipi_to_irqhandler(
116                         RESCHEDULE_VECTOR,
117                         cpu,
118                         smp_reschedule_interrupt,
119                         SA_INTERRUPT,
120                         resched_name[cpu],
121                         NULL);
122         BUG_ON(per_cpu(resched_irq, cpu) < 0);
123
124         sprintf(callfunc_name[cpu], "callfunc%d", cpu);
125         per_cpu(callfunc_irq, cpu) =
126                 bind_ipi_to_irqhandler(
127                         CALL_FUNCTION_VECTOR,
128                         cpu,
129                         smp_call_function_interrupt,
130                         SA_INTERRUPT,
131                         callfunc_name[cpu],
132                         NULL);
133         BUG_ON(per_cpu(callfunc_irq, cpu) < 0);
134
135         if (cpu != 0)
136                 local_setup_timer(cpu);
137 }
138
139 #ifdef CONFIG_HOTPLUG_CPU
140 static void xen_smp_intr_exit(unsigned int cpu)
141 {
142         if (cpu != 0)
143                 local_teardown_timer(cpu);
144
145         unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
146         unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
147 }
148 #endif
149
150 void cpu_bringup(void)
151 {
152         cpu_init();
153         touch_softlockup_watchdog();
154         preempt_disable();
155         local_irq_enable();
156 }
157
158 static void cpu_bringup_and_idle(void)
159 {
160         cpu_bringup();
161         cpu_idle();
162 }
163
164 void cpu_initialize_context(unsigned int cpu)
165 {
166         vcpu_guest_context_t ctxt;
167         struct task_struct *idle = idle_task(cpu);
168 #ifdef __x86_64__
169         struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
170 #else
171         struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
172 #endif
173
174         if (cpu == 0)
175                 return;
176
177         memset(&ctxt, 0, sizeof(ctxt));
178
179         ctxt.flags = VGCF_IN_KERNEL;
180         ctxt.user_regs.ds = __USER_DS;
181         ctxt.user_regs.es = __USER_DS;
182         ctxt.user_regs.fs = 0;
183         ctxt.user_regs.gs = 0;
184         ctxt.user_regs.ss = __KERNEL_DS;
185         ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
186         ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
187
188         memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
189
190         smp_trap_init(ctxt.trap_ctxt);
191
192         ctxt.ldt_ents = 0;
193
194         ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
195         ctxt.gdt_ents      = gdt_descr->size / 8;
196
197 #ifdef __i386__
198         ctxt.user_regs.cs = __KERNEL_CS;
199         ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
200
201         ctxt.kernel_ss = __KERNEL_DS;
202         ctxt.kernel_sp = idle->thread.esp0;
203
204         ctxt.event_callback_cs     = __KERNEL_CS;
205         ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
206         ctxt.failsafe_callback_cs  = __KERNEL_CS;
207         ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
208
209         ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
210 #else /* __x86_64__ */
211         ctxt.user_regs.cs = __KERNEL_CS;
212         ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
213
214         ctxt.kernel_ss = __KERNEL_DS;
215         ctxt.kernel_sp = idle->thread.rsp0;
216
217         ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
218         ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
219         ctxt.syscall_callback_eip  = (unsigned long)system_call;
220
221         ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
222
223         ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
224 #endif
225
226         BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
227 }
228
229 void __init smp_prepare_cpus(unsigned int max_cpus)
230 {
231         int cpu;
232         struct task_struct *idle;
233 #ifdef __x86_64__
234         struct desc_ptr *gdt_descr;
235 #else
236         struct Xgt_desc_struct *gdt_descr;
237 #endif
238
239         boot_cpu_data.apicid = 0;
240         cpu_data[0] = boot_cpu_data;
241
242         cpu_2_logical_apicid[0] = 0;
243         x86_cpu_to_apicid[0] = 0;
244
245         current_thread_info()->cpu = 0;
246
247         for (cpu = 0; cpu < NR_CPUS; cpu++) {
248                 cpus_clear(cpu_sibling_map[cpu]);
249                 cpus_clear(cpu_core_map[cpu]);
250         }
251
252         set_cpu_sibling_map(0);
253
254         xen_smp_intr_init(0);
255
256         for_each_cpu_mask (cpu, cpu_possible_map) {
257                 if (cpu == 0)
258                         continue;
259
260 #ifdef __x86_64__
261                 gdt_descr = &cpu_gdt_descr[cpu];
262 #else
263                 gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
264 #endif
265                 gdt_descr->address = get_zeroed_page(GFP_KERNEL);
266                 if (unlikely(!gdt_descr->address)) {
267                         printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
268                         continue;
269                 }
270                 gdt_descr->size = GDT_SIZE;
271                 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
272                 make_page_readonly(
273                         (void *)gdt_descr->address,
274                         XENFEAT_writable_descriptor_tables);
275
276                 cpu_data[cpu] = boot_cpu_data;
277                 cpu_data[cpu].apicid = cpu;
278
279                 cpu_2_logical_apicid[cpu] = cpu;
280                 x86_cpu_to_apicid[cpu] = cpu;
281
282                 idle = fork_idle(cpu);
283                 if (IS_ERR(idle))
284                         panic("failed fork for CPU %d", cpu);
285
286 #ifdef __x86_64__
287                 cpu_pda(cpu)->pcurrent = idle;
288                 cpu_pda(cpu)->cpunumber = cpu;
289                 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
290 #endif
291
292                 irq_ctx_init(cpu);
293
294 #ifdef CONFIG_HOTPLUG_CPU
295                 if (xen_start_info->flags & SIF_INITDOMAIN)
296                         cpu_set(cpu, cpu_present_map);
297 #else
298                 cpu_set(cpu, cpu_present_map);
299 #endif
300
301                 cpu_initialize_context(cpu);
302         }
303
304         init_xenbus_allowed_cpumask();
305
306         /* Currently, Xen gives no dynamic NUMA/HT info. */
307         for (cpu = 1; cpu < NR_CPUS; cpu++) {
308                 cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
309                 cpu_core_map[cpu]    = cpumask_of_cpu(cpu);
310         }
311
312 #ifdef CONFIG_X86_IO_APIC
313         /*
314          * Here we can be sure that there is an IO-APIC in the system. Let's
315          * go and set it up:
316          */
317         if (!skip_ioapic_setup && nr_ioapics)
318                 setup_IO_APIC();
319 #endif
320 }
321
322 void __devinit smp_prepare_boot_cpu(void)
323 {
324 }
325
326 #ifdef CONFIG_HOTPLUG_CPU
327
328 /*
329  * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
330  * But do it early enough to catch critical for_each_present_cpu() loops
331  * in i386-specific code.
332  */
333 static int __init initialize_cpu_present_map(void)
334 {
335         cpu_present_map = cpu_possible_map;
336         return 0;
337 }
338 core_initcall(initialize_cpu_present_map);
339
340 static void
341 remove_siblinginfo(int cpu)
342 {
343         phys_proc_id[cpu] = BAD_APICID;
344         cpu_core_id[cpu]  = BAD_APICID;
345
346         cpus_clear(cpu_sibling_map[cpu]);
347         cpus_clear(cpu_core_map[cpu]);
348
349         cpu_data[cpu].booted_cores = 0;
350 }
351
352 int __cpu_disable(void)
353 {
354         cpumask_t map = cpu_online_map;
355         int cpu = smp_processor_id();
356
357         if (cpu == 0)
358                 return -EBUSY;
359
360         remove_siblinginfo(cpu);
361
362         cpu_clear(cpu, map);
363         fixup_irqs(map);
364         cpu_clear(cpu, cpu_online_map);
365
366         return 0;
367 }
368
369 void __cpu_die(unsigned int cpu)
370 {
371         while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
372                 current->state = TASK_UNINTERRUPTIBLE;
373                 schedule_timeout(HZ/10);
374         }
375
376         xen_smp_intr_exit(cpu);
377
378 #ifdef __i386__
379         if (num_online_cpus() == 1)
380                 alternatives_smp_switch(0);
381 #endif
382 }
383
384 #else /* !CONFIG_HOTPLUG_CPU */
385
386 int __cpu_disable(void)
387 {
388         return -ENOSYS;
389 }
390
391 void __cpu_die(unsigned int cpu)
392 {
393         BUG();
394 }
395
396 #endif /* CONFIG_HOTPLUG_CPU */
397
398 int __devinit __cpu_up(unsigned int cpu)
399 {
400         int rc;
401
402         rc = cpu_up_check(cpu);
403         if (rc)
404                 return rc;
405
406 #ifdef __i386__
407         if (num_online_cpus() == 1)
408                 alternatives_smp_switch(1);
409 #endif
410
411         /* This must be done before setting cpu_online_map */
412         set_cpu_sibling_map(cpu);
413         wmb();
414
415         xen_smp_intr_init(cpu);
416         cpu_set(cpu, cpu_online_map);
417
418         rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
419         BUG_ON(rc);
420
421         return 0;
422 }
423
424 void __init smp_cpus_done(unsigned int max_cpus)
425 {
426 }
427
428 #ifndef CONFIG_X86_LOCAL_APIC
429 int setup_profiling_timer(unsigned int multiplier)
430 {
431         return -EINVAL;
432 }
433 #endif