X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fppc64%2Fkernel%2Fsmp.c;h=1f3003408930aedb3ae1e78b8a04d7258edff10b;hb=9bf4aaab3e101692164d49b7ca357651eb691cb6;hp=2447cce8ba92ed40cd2c407952a78e3c1d0c327d;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c index 2447cce8b..1f3003408 100644 --- a/arch/ppc64/kernel/smp.c +++ b/arch/ppc64/kernel/smp.c @@ -52,6 +52,7 @@ #include #include #include +#include int smp_threads_ready; unsigned long cache_decay_ticks; @@ -119,12 +120,10 @@ static void smp_iSeries_message_pass(int target, int msg) static int smp_iSeries_numProcs(void) { unsigned np, i; - struct ItLpPaca * lpPaca; np = 0; for (i=0; i < NR_CPUS; ++i) { - lpPaca = paca[i].xLpPacaPtr; - if ( lpPaca->xDynProcStatus < 2 ) { + if (paca[i].lppaca.xDynProcStatus < 2) { cpu_set(i, cpu_available_map); cpu_set(i, cpu_possible_map); cpu_set(i, cpu_present_at_boot); @@ -138,11 +137,9 @@ static int smp_iSeries_probe(void) { unsigned i; unsigned np = 0; - struct ItLpPaca *lpPaca; for (i=0; i < NR_CPUS; ++i) { - lpPaca = paca[i].xLpPacaPtr; - if (lpPaca->xDynProcStatus < 2) { + if (paca[i].lppaca.xDynProcStatus < 2) { /*paca[i].active = 1;*/ ++np; } @@ -153,21 +150,18 @@ static int smp_iSeries_probe(void) static void smp_iSeries_kick_cpu(int nr) { - struct ItLpPaca *lpPaca; - BUG_ON(nr < 0 || nr >= NR_CPUS); /* Verify that our partition has a processor nr */ - lpPaca = paca[nr].xLpPacaPtr; - if (lpPaca->xDynProcStatus >= 2) + if (paca[nr].lppaca.xDynProcStatus >= 2) return; /* The processor is currently spinning, waiting - * for the xProcStart field to become non-zero - * After we set xProcStart, the processor will + * for the cpu_start field to become non-zero + * After we set cpu_start, the processor will * continue on to secondary_start in iSeries_head.S */ - paca[nr].xProcStart = 1; + paca[nr].cpu_start = 1; } static void __devinit smp_iSeries_setup_cpu(int nr) @@ -241,7 +235,7 @@ static void __devinit smp_openpic_setup_cpu(int cpu) */ static int query_cpu_stopped(unsigned int pcpu) { - long cpu_status; + int cpu_status; int status, qcss_tok; qcss_tok = rtas_token("query-cpu-stopped-state"); @@ -296,7 +290,7 @@ void __cpu_die(unsigned int cpu) * done here. Change isolate state to Isolate and * change allocation-state to Unusable. */ - paca[cpu].xProcStart = 0; + paca[cpu].cpu_start = 0; /* So we can recognize if it fails to come up next time. */ cpu_callin_map[cpu] = 0; @@ -306,6 +300,10 @@ void __cpu_die(unsigned int cpu) void cpu_die(void) { local_irq_disable(); + /* Some hardware requires clearing the CPPR, while other hardware does not + * it is safe either way + */ + pSeriesLP_cppr_info(0, 0); rtas_stop_self(); /* Should never get here... */ BUG(); @@ -390,13 +388,10 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) } /* Fixup atomic count: it exited inside IRQ handler. */ - ((struct task_struct *)paca[lcpu].xCurrent)->thread_info->preempt_count - = 0; - /* Fixup SLB round-robin so next segment (kernel) goes in segment 0 */ - paca[lcpu].xStab_data.next_round_robin = 0; + paca[lcpu].__current->thread_info->preempt_count = 0; /* At boot this is done in prom.c. */ - paca[lcpu].xHwProcNum = pcpu; + paca[lcpu].hw_cpu_id = pcpu; status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL, pcpu, start_here, lcpu); @@ -429,7 +424,11 @@ static inline void look_for_more_cpus(void) } maxcpus = ireg[num_addr_cell + num_size_cell]; - /* DRENG need to account for threads here too */ + + /* Double maxcpus for processors which have SMT capability */ + if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) + maxcpus *= 2; + if (maxcpus > NR_CPUS) { printk(KERN_WARNING @@ -461,12 +460,12 @@ static void smp_pSeries_kick_cpu(int nr) if (!smp_startup_cpu(nr)) return; - /* The processor is currently spinning, waiting - * for the xProcStart field to become non-zero - * After we set xProcStart, the processor will - * continue on to secondary_start + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start */ - paca[nr].xProcStart = 1; + paca[nr].cpu_start = 1; } #endif /* CONFIG_PPC_PSERIES */ @@ -491,10 +490,8 @@ void vpa_init(int cpu) unsigned long flags; /* Register the Virtual Processor Area (VPA) */ - printk(KERN_INFO "register_vpa: cpu 0x%x\n", cpu); flags = 1UL << (63 - 18); - paca[cpu].xLpPaca.xSLBCount = 64; /* SLB restore highwater mark */ - register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].xLpPaca))); + register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].lppaca))); } static inline void smp_xics_do_message(int cpu, int msg) @@ -618,7 +615,7 @@ void smp_message_recv(int msg, struct pt_regs *regs) #endif #ifdef CONFIG_DEBUGGER case PPC_MSG_DEBUGGER_BREAK: - debugger(regs); + debugger_ipi(regs); break; #endif default: @@ -692,6 +689,9 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int ret = -1, cpus; unsigned long timeout; + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + data.func = func; data.info = info; atomic_set(&data.started, 0); @@ -721,7 +721,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, printk("smp_call_function on cpu %d: other cpus not " "responding (%d)\n", smp_processor_id(), atomic_read(&data.started)); - debugger(0); + debugger(NULL); goto out; } } @@ -736,7 +736,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, smp_processor_id(), atomic_read(&data.finished), atomic_read(&data.started)); - debugger(0); + debugger(NULL); goto out; } } @@ -814,7 +814,7 @@ static void __init smp_create_idle(unsigned int cpu) init_idle(p, cpu); unhash_process(p); - paca[cpu].xCurrent = (u64)p; + paca[cpu].__current = p; current_set[cpu] = p->thread_info; } @@ -834,11 +834,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) paca[boot_cpuid].prof_counter = 1; paca[boot_cpuid].prof_multiplier = 1; - /* - * XXX very rough. - */ - cache_decay_ticks = HZ/100; - #ifndef CONFIG_PPC_ISERIES paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb(); @@ -871,7 +866,7 @@ void __devinit smp_prepare_boot_cpu(void) /* cpu_possible is set up in prom.c */ cpu_set(boot_cpuid, cpu_online_map); - paca[boot_cpuid].xCurrent = (u64)current; + paca[boot_cpuid].__current = current; current_set[boot_cpuid] = current->thread_info; } @@ -896,8 +891,8 @@ int __devinit __cpu_up(unsigned int cpu) tmp = &stab_array[PAGE_SIZE * cpu]; memset(tmp, 0, PAGE_SIZE); - paca[cpu].xStab_data.virt = (unsigned long)tmp; - paca[cpu].xStab_data.real = virt_to_abs(tmp); + paca[cpu].stab_addr = (unsigned long)tmp; + paca[cpu].stab_real = virt_to_abs(tmp); } /* The information for processor bringup must @@ -914,8 +909,20 @@ int __devinit __cpu_up(unsigned int cpu) * use this value that I found through experimentation. * -- Cort */ - for (c = 5000; c && !cpu_callin_map[cpu]; c--) - udelay(100); + if (system_state == SYSTEM_BOOTING) + for (c = 5000; c && !cpu_callin_map[cpu]; c--) + udelay(100); +#ifdef CONFIG_HOTPLUG_CPU + else + /* + * CPUs can take much longer to come up in the + * hotplug case. Wait five seconds. + */ + for (c = 25; c && !cpu_callin_map[cpu]; c--) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ/5); + } +#endif if (!cpu_callin_map[cpu]) { printk("Processor %u is stuck.\n", cpu); @@ -926,7 +933,11 @@ int __devinit __cpu_up(unsigned int cpu) if (smp_ops->give_timebase) smp_ops->give_timebase(); - cpu_set(cpu, cpu_online_map); + + /* Wait until cpu puts itself in the online map */ + while (!cpu_online(cpu)) + cpu_relax(); + return 0; } @@ -947,8 +958,6 @@ int __devinit start_secondary(void *unused) if (smp_ops->take_timebase) smp_ops->take_timebase(); - get_paca()->yielded = 0; - #ifdef CONFIG_PPC_PSERIES if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { vpa_init(cpu); @@ -964,6 +973,10 @@ int __devinit start_secondary(void *unused) #endif #endif + spin_lock(&call_lock); + cpu_set(cpu, cpu_online_map); + spin_unlock(&call_lock); + local_irq_enable(); return cpu_idle(NULL); @@ -992,3 +1005,218 @@ void __init smp_cpus_done(unsigned int max_cpus) set_cpus_allowed(current, old_mask); } + +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first = NULL, *last = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + cpumask_t my_cpumask = cpumask_of_cpu(i); + cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1); + + *cpu_domain = SD_SIBLING_INIT; + if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) + cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask); + else + cpu_domain->span = my_cpumask; + cpu_domain->parent = phys_domain; + cpu_domain->groups = &sched_group_cpus[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + phys_domain->parent = node_domain; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + + *node_domain = SD_NODE_INIT; + node_domain->span = cpu_possible_map; + node_domain->groups = &sched_group_nodes[node]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first = last = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + struct sched_group *node = &sched_group_nodes[i]; + cpumask_t node_cpumask = node_to_cpumask(i); + cpus_and(nodemask, node_cpumask, cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + first = last = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* + * Make each extra sibling increase power by 10% of + * the basic CPU. This is very arbitrary. + */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + node->cpu_power += cpu->cpu_power; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + /* Set up nodes */ + first = last = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpumask_t node_cpumask = node_to_cpumask(i); + cpus_and(nodemask, node_cpumask, cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + /* ->cpu_power already setup */ + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_domain, i); + } +} +#else /* !CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first = NULL, *last = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + cpumask_t my_cpumask = cpumask_of_cpu(i); + cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1); + + *cpu_domain = SD_SIBLING_INIT; + if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) + cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask); + else + cpu_domain->span = my_cpumask; + cpu_domain->parent = phys_domain; + cpu_domain->groups = &sched_group_cpus[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_possible_map; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first = last = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + first = last = NULL; + /* Set up physical groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* See SMT+NUMA setup for comment */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_domain, i); + } +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_SCHED_SMT */