2 * Xen SMP booting functions
4 * See arch/i386/kernel/smpboot.c for copyright and credits for derived
5 * portions of this file.
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/smp_lock.h>
15 #include <linux/irq.h>
16 #include <linux/bootmem.h>
17 #include <linux/notifier.h>
18 #include <linux/cpu.h>
19 #include <linux/percpu.h>
21 #include <asm/arch_hooks.h>
22 #include <asm/pgalloc.h>
26 #include <xen/evtchn.h>
27 #include <xen/interface/vcpu.h>
28 #include <xen/cpu_hotplug.h>
29 #include <xen/xenbus.h>
31 extern irqreturn_t smp_reschedule_interrupt(int, void *);
32 extern irqreturn_t smp_call_function_interrupt(int, void *);
34 extern void local_setup_timer(unsigned int cpu);
35 extern void local_teardown_timer(unsigned int cpu);
37 extern void hypervisor_callback(void);
38 extern void failsafe_callback(void);
39 extern void system_call(void);
40 extern void smp_trap_init(trap_info_t *);
42 /* Number of siblings per CPU package */
43 int smp_num_siblings = 1;
44 EXPORT_SYMBOL(smp_num_siblings);
46 int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
47 #elif defined(__x86_64__)
48 u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
50 EXPORT_SYMBOL(cpu_llc_id);
52 cpumask_t cpu_online_map;
53 EXPORT_SYMBOL(cpu_online_map);
54 cpumask_t cpu_possible_map;
55 EXPORT_SYMBOL(cpu_possible_map);
57 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
58 EXPORT_SYMBOL(cpu_data);
60 #ifdef CONFIG_HOTPLUG_CPU
61 DEFINE_PER_CPU(int, cpu_state) = { 0 };
64 static DEFINE_PER_CPU(int, resched_irq);
65 static DEFINE_PER_CPU(int, callfunc_irq);
66 static char resched_name[NR_CPUS][15];
67 static char callfunc_name[NR_CPUS][15];
69 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
73 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
74 EXPORT_SYMBOL(cpu_sibling_map);
75 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
76 EXPORT_SYMBOL(cpu_core_map);
79 u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
80 EXPORT_SYMBOL(x86_cpu_to_apicid);
83 void __init prefill_possible_map(void)
87 for (i = 0; i < NR_CPUS; i++) {
88 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
90 cpu_set(i, cpu_possible_map);
94 void __init smp_alloc_memory(void)
99 set_cpu_sibling_map(int cpu)
101 cpu_data[cpu].phys_proc_id = cpu;
102 cpu_data[cpu].cpu_core_id = 0;
104 cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
105 cpu_core_map[cpu] = cpumask_of_cpu(cpu);
107 cpu_data[cpu].booted_cores = 1;
110 static void xen_smp_intr_init(unsigned int cpu)
112 sprintf(resched_name[cpu], "resched%d", cpu);
113 per_cpu(resched_irq, cpu) =
114 bind_ipi_to_irqhandler(
117 smp_reschedule_interrupt,
121 BUG_ON(per_cpu(resched_irq, cpu) < 0);
123 sprintf(callfunc_name[cpu], "callfunc%d", cpu);
124 per_cpu(callfunc_irq, cpu) =
125 bind_ipi_to_irqhandler(
126 CALL_FUNCTION_VECTOR,
128 smp_call_function_interrupt,
132 BUG_ON(per_cpu(callfunc_irq, cpu) < 0);
135 local_setup_timer(cpu);
138 #ifdef CONFIG_HOTPLUG_CPU
139 static void xen_smp_intr_exit(unsigned int cpu)
142 local_teardown_timer(cpu);
144 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
145 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
150 static inline void set_kernel_gs(void)
152 /* Set %gs for this CPU's PDA. Memory clobber is to create a
153 barrier with respect to any PDA operations, so the compiler
154 doesn't move any before here. */
155 asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
159 void cpu_bringup(void)
163 secondary_cpu_init();
167 touch_softlockup_watchdog();
172 static void cpu_bringup_and_idle(void)
178 void cpu_initialize_context(unsigned int cpu)
180 vcpu_guest_context_t ctxt;
181 struct task_struct *idle = idle_task(cpu);
183 struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
185 struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
191 memset(&ctxt, 0, sizeof(ctxt));
193 ctxt.flags = VGCF_IN_KERNEL;
194 ctxt.user_regs.ds = __USER_DS;
195 ctxt.user_regs.es = __USER_DS;
196 ctxt.user_regs.fs = 0;
197 ctxt.user_regs.gs = 0;
198 ctxt.user_regs.ss = __KERNEL_DS;
199 ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
200 ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
202 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
204 smp_trap_init(ctxt.trap_ctxt);
208 ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
209 ctxt.gdt_ents = gdt_descr->size / 8;
212 ctxt.user_regs.cs = __KERNEL_CS;
213 ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
215 ctxt.kernel_ss = __KERNEL_DS;
216 ctxt.kernel_sp = idle->thread.esp0;
218 ctxt.event_callback_cs = __KERNEL_CS;
219 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
220 ctxt.failsafe_callback_cs = __KERNEL_CS;
221 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
223 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
224 #else /* __x86_64__ */
225 ctxt.user_regs.cs = __KERNEL_CS;
226 ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
228 ctxt.kernel_ss = __KERNEL_DS;
229 ctxt.kernel_sp = idle->thread.rsp0;
231 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
232 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
233 ctxt.syscall_callback_eip = (unsigned long)system_call;
235 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
237 ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
240 BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
243 void __init smp_prepare_cpus(unsigned int max_cpus)
246 struct task_struct *idle;
248 struct desc_ptr *gdt_descr;
250 struct Xgt_desc_struct *gdt_descr;
253 boot_cpu_data.apicid = 0;
254 cpu_data[0] = boot_cpu_data;
256 cpu_2_logical_apicid[0] = 0;
257 x86_cpu_to_apicid[0] = 0;
259 current_thread_info()->cpu = 0;
261 for (cpu = 0; cpu < NR_CPUS; cpu++) {
262 cpus_clear(cpu_sibling_map[cpu]);
263 cpus_clear(cpu_core_map[cpu]);
266 set_cpu_sibling_map(0);
268 xen_smp_intr_init(0);
270 /* Restrict the possible_map according to max_cpus. */
271 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
272 for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
274 cpu_clear(cpu, cpu_possible_map);
277 for_each_possible_cpu (cpu) {
279 struct i386_pda *pda;
280 struct desc_struct *gdt;
287 gdt_descr = &cpu_gdt_descr[cpu];
289 gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
291 gdt_descr->address = get_zeroed_page(GFP_KERNEL);
292 if (unlikely(!gdt_descr->address)) {
293 printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
297 gdt_descr->size = GDT_SIZE;
298 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
300 gdt = (struct desc_struct *)gdt_descr->address;
301 pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
303 if (unlikely(!pda)) {
304 printk(KERN_CRIT "CPU%d failed to allocate PDA\n",
309 cpu_pda(cpu)->cpu_number = cpu;
310 pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
311 (u32 *)&gdt[GDT_ENTRY_PDA].b,
312 (unsigned long)pda, sizeof(*pda) - 1,
313 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
316 (void *)gdt_descr->address,
317 XENFEAT_writable_descriptor_tables);
319 cpu_data[cpu] = boot_cpu_data;
320 cpu_data[cpu].apicid = cpu;
322 cpu_2_logical_apicid[cpu] = cpu;
323 x86_cpu_to_apicid[cpu] = cpu;
325 idle = fork_idle(cpu);
327 panic("failed fork for CPU %d", cpu);
329 cpu_pda(cpu)->pcurrent = idle;
331 cpu_pda(cpu)->cpunumber = cpu;
332 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
337 #ifdef CONFIG_HOTPLUG_CPU
338 if (is_initial_xendomain())
339 cpu_set(cpu, cpu_present_map);
341 cpu_set(cpu, cpu_present_map);
344 cpu_initialize_context(cpu);
347 init_xenbus_allowed_cpumask();
350 * Here we can be sure that there is an IO-APIC in the system. Let's
353 #ifdef CONFIG_X86_IO_APIC
354 if (!skip_ioapic_setup && nr_ioapics)
359 void __init smp_prepare_boot_cpu(void)
363 #ifdef CONFIG_HOTPLUG_CPU
366 * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
367 * But do it early enough to catch critical for_each_present_cpu() loops
368 * in i386-specific code.
370 static int __init initialize_cpu_present_map(void)
372 cpu_present_map = cpu_possible_map;
375 core_initcall(initialize_cpu_present_map);
378 remove_siblinginfo(int cpu)
380 cpu_data[cpu].phys_proc_id = BAD_APICID;
381 cpu_data[cpu].cpu_core_id = BAD_APICID;
383 cpus_clear(cpu_sibling_map[cpu]);
384 cpus_clear(cpu_core_map[cpu]);
386 cpu_data[cpu].booted_cores = 0;
389 int __cpu_disable(void)
391 cpumask_t map = cpu_online_map;
392 int cpu = smp_processor_id();
397 remove_siblinginfo(cpu);
401 cpu_clear(cpu, cpu_online_map);
406 void __cpu_die(unsigned int cpu)
408 while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
409 current->state = TASK_UNINTERRUPTIBLE;
410 schedule_timeout(HZ/10);
413 xen_smp_intr_exit(cpu);
415 if (num_online_cpus() == 1)
416 alternatives_smp_switch(0);
419 #else /* !CONFIG_HOTPLUG_CPU */
421 int __cpu_disable(void)
426 void __cpu_die(unsigned int cpu)
431 #endif /* CONFIG_HOTPLUG_CPU */
433 int __cpuinit __cpu_up(unsigned int cpu)
437 rc = cpu_up_check(cpu);
441 if (num_online_cpus() == 1)
442 alternatives_smp_switch(1);
444 /* This must be done before setting cpu_online_map */
445 set_cpu_sibling_map(cpu);
448 xen_smp_intr_init(cpu);
449 cpu_set(cpu, cpu_online_map);
451 rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
457 void __init smp_cpus_done(unsigned int max_cpus)
461 #ifdef CONFIG_X86_MPPARSE
463 * If the BIOS enumerates physical processors before logical,
464 * maxcpus=N at enumeration-time can be used to disable HT.
466 static int __init parse_maxcpus(char *arg)
468 extern unsigned int maxcpus;
470 maxcpus = simple_strtoul(arg, NULL, 0);
473 early_param("maxcpus", parse_maxcpus);
476 #if defined(CONFIG_XEN_UNPRIVILEGED_GUEST) && defined(CONFIG_X86_32)
477 int setup_profiling_timer(unsigned int multiplier)