X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fkernel%2Fsmpboot.c;h=1a9b081904928c2561fcaaf155c94e97f8feb2c6;hb=9bf4aaab3e101692164d49b7ca357651eb691cb6;hp=7baa4d420b730c8d1e2dcf6d2e03920066213266;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 7baa4d420..1a9b08190 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -46,7 +47,6 @@ #include #include -#include #include #include #include @@ -82,6 +82,7 @@ int smp_threads_ready; extern unsigned char trampoline_data []; extern unsigned char trampoline_end []; static unsigned char *trampoline_base; +static int trampoline_exec; /* * Currently trivial. Write the real->protected mode @@ -108,6 +109,10 @@ void __init smp_alloc_memory(void) */ if (__pa(trampoline_base) >= 0x9F000) BUG(); + /* + * Make the SMP trampoline executable: + */ + trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); } /* @@ -936,7 +941,7 @@ static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -int cpu_sibling_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; static void __init smp_boot_cpus(unsigned int max_cpus) { @@ -955,6 +960,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; smp_tune_scheduling(); + cpus_clear(cpu_sibling_map[0]); + cpu_set(0, cpu_sibling_map[0]); /* * If we couldn't find an SMP configuration at boot time, @@ -1081,34 +1088,39 @@ static void __init smp_boot_cpus(unsigned int max_cpus) Dprintk("Boot done.\n"); /* - * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so - * that we can tell the sibling CPU efficiently. + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. */ - if (cpu_has_ht && smp_num_siblings > 1) { - for (cpu = 0; cpu < NR_CPUS; cpu++) - cpu_sibling_map[cpu] = NO_PROC_ID; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpus_clear(cpu_sibling_map[cpu]); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + + if (smp_num_siblings > 1) { for (i = 0; i < NR_CPUS; i++) { - if (i == cpu || !cpu_isset(i, cpu_callout_map)) + if (!cpu_isset(i, cpu_callout_map)) continue; if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_sibling_map[cpu] = i; - printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]); - break; + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); } } - if (cpu_sibling_map[cpu] == NO_PROC_ID) { - smp_num_siblings = 1; - printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); - } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); } + + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); } + if (nmi_watchdog == NMI_LOCAL_APIC) + check_nmi_watchdog(); + smpboot_setup_io_apic(); setup_boot_APIC_clock(); @@ -1120,6 +1132,213 @@ static void __init smp_boot_cpus(unsigned int max_cpus) synchronize_tsc_bp(); } +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first = NULL, *last = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + cpu_domain->parent = phys_domain; + cpu_domain->groups = &sched_group_cpus[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + phys_domain->parent = node_domain; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + + *node_domain = SD_NODE_INIT; + node_domain->span = cpu_possible_map; + node_domain->groups = &sched_group_nodes[cpu_to_node(i)]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first = last = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + struct sched_group *node = &sched_group_nodes[i]; + cpumask_t node_cpumask = node_to_cpumask(i); + + cpus_and(nodemask, node_cpumask, cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + first = last = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* + * Make each extra sibling increase power by 10% of + * the basic CPU. This is very arbitrary. + */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + node->cpu_power += cpu->cpu_power; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + /* Set up nodes */ + first = last = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpumask_t node_cpumask = node_to_cpumask(i); + + cpus_and(nodemask, node_cpumask, cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + /* ->cpu_power already setup */ + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_domain, i); + } +} +#else /* !CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first = NULL, *last = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + cpu_domain->parent = phys_domain; + cpu_domain->groups = &sched_group_cpus[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_possible_map; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first = last = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + first = last = NULL; + /* Set up physical groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* See SMT+NUMA setup for comment */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_domain, i); + } +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_SCHED_SMT */ + /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) @@ -1158,10 +1377,13 @@ int __devinit __cpu_up(unsigned int cpu) void __init smp_cpus_done(unsigned int max_cpus) { #ifdef CONFIG_X86_IO_APIC - cpumask_t targets = CPU_MASK_ALL; - setup_ioapic_dest(targets); + setup_ioapic_dest(); #endif zap_low_mappings(); + /* + * Disable executability of the SMP trampoline: + */ + set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); } void __init smp_intr_init(void)