* 2 of the License, or (at your option) any later version.
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/irq.h>
#include <asm/page.h>
#include <asm/pgtable.h>
-#include <asm/hardirq.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/xics.h>
#include <asm/cputable.h>
#include <asm/system.h>
+#include <asm/rtas.h>
+#include <asm/plpar_wrappers.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
int smp_threads_ready;
unsigned long cache_decay_ticks;
cpumask_t cpu_possible_map = CPU_MASK_NONE;
cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t cpu_available_map = CPU_MASK_NONE;
-cpumask_t cpu_present_at_boot = CPU_MASK_NONE;
+cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
EXPORT_SYMBOL(cpu_online_map);
EXPORT_SYMBOL(cpu_possible_map);
extern long register_vpa(unsigned long flags, unsigned long proc,
unsigned long vpa);
+int smt_enabled_at_boot = 1;
+
/* Low level assembly function used to backup CPU 0 state */
extern void __save_cpu_setup(void);
+extern void pseries_secondary_smp_init(unsigned long);
+
#ifdef CONFIG_PPC_ISERIES
static unsigned long iSeries_smp_message[NR_CPUS];
static int smp_iSeries_numProcs(void)
{
unsigned np, i;
- struct ItLpPaca * lpPaca;
np = 0;
for (i=0; i < NR_CPUS; ++i) {
- lpPaca = paca[i].xLpPacaPtr;
- if ( lpPaca->xDynProcStatus < 2 ) {
- cpu_set(i, cpu_available_map);
+ if (paca[i].lppaca.xDynProcStatus < 2) {
cpu_set(i, cpu_possible_map);
- cpu_set(i, cpu_present_at_boot);
+ cpu_set(i, cpu_present_map);
++np;
}
}
{
unsigned i;
unsigned np = 0;
- struct ItLpPaca *lpPaca;
for (i=0; i < NR_CPUS; ++i) {
- lpPaca = paca[i].xLpPacaPtr;
- if (lpPaca->xDynProcStatus < 2) {
+ if (paca[i].lppaca.xDynProcStatus < 2) {
/*paca[i].active = 1;*/
++np;
}
static void smp_iSeries_kick_cpu(int nr)
{
- struct ItLpPaca *lpPaca;
-
BUG_ON(nr < 0 || nr >= NR_CPUS);
/* Verify that our partition has a processor nr */
- lpPaca = paca[nr].xLpPacaPtr;
- if (lpPaca->xDynProcStatus >= 2)
+ if (paca[nr].lppaca.xDynProcStatus >= 2)
return;
/* The processor is currently spinning, waiting
- * for the xProcStart field to become non-zero
- * After we set xProcStart, the processor will
+ * for the cpu_start field to become non-zero
+ * After we set cpu_start, the processor will
* continue on to secondary_start in iSeries_head.S
*/
- paca[nr].xProcStart = 1;
+ paca[nr].cpu_start = 1;
}
static void __devinit smp_iSeries_setup_cpu(int nr)
}
#endif
-#ifdef CONFIG_PPC_PSERIES
+#ifdef CONFIG_PPC_MULTIPLATFORM
void smp_openpic_message_pass(int target, int msg)
{
/* make sure we're sending something that translates to an IPI */
do_openpic_setup_cpu();
}
-#ifdef CONFIG_HOTPLUG_CPU
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+#ifdef CONFIG_PPC_PSERIES
+
/* Get state of physical CPU.
* Return codes:
* 0 - The processor is in the RTAS stopped state
* -1 - Hardware Error
* -2 - Hardware Busy, Try again later.
*/
-static int query_cpu_stopped(unsigned int pcpu)
+int query_cpu_stopped(unsigned int pcpu)
{
- long cpu_status;
+ int cpu_status;
int status, qcss_tok;
+ DBG(" -> query_cpu_stopped(%d)\n", pcpu);
qcss_tok = rtas_token("query-cpu-stopped-state");
- BUG_ON(qcss_tok == RTAS_UNKNOWN_SERVICE);
+ if (qcss_tok == RTAS_UNKNOWN_SERVICE)
+ return -1;
status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
if (status != 0) {
printk(KERN_ERR
return status;
}
+ DBG(" <- query_cpu_stopped(), status: %d\n", cpu_status);
+
return cpu_status;
}
+#ifdef CONFIG_HOTPLUG_CPU
+
int __cpu_disable(void)
{
/* FIXME: go put this in a header somewhere */
int cpu_status;
unsigned int pcpu = get_hard_smp_processor_id(cpu);
- for (tries = 0; tries < 5; tries++) {
+ for (tries = 0; tries < 25; tries++) {
cpu_status = query_cpu_stopped(pcpu);
-
- if (cpu_status == 0)
+ if (cpu_status == 0 || cpu_status == -1)
break;
set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
+ schedule_timeout(HZ/5);
}
if (cpu_status != 0) {
printk("Querying DEAD? cpu %i (%i) shows %i\n",
* done here. Change isolate state to Isolate and
* change allocation-state to Unusable.
*/
- paca[cpu].xProcStart = 0;
+ paca[cpu].cpu_start = 0;
/* So we can recognize if it fails to come up next time. */
cpu_callin_map[cpu] = 0;
void cpu_die(void)
{
local_irq_disable();
+ /* Some hardware requires clearing the CPPR, while other hardware does not
+ * it is safe either way
+ */
+ pSeriesLP_cppr_info(0, 0);
rtas_stop_self();
/* Should never get here... */
BUG();
static inline int __devinit smp_startup_cpu(unsigned int lcpu)
{
int status;
- extern void (*pseries_secondary_smp_init)(unsigned int cpu);
- unsigned long start_here = __pa(pseries_secondary_smp_init);
+ unsigned long start_here = __pa((u32)*((unsigned long *)
+ pseries_secondary_smp_init));
unsigned int pcpu;
/* At boot time the cpus are already spinning in hold
* loops, so nothing to do. */
- if (system_state == SYSTEM_BOOTING)
+ if (system_state < SYSTEM_RUNNING)
return 1;
pcpu = find_physical_cpu_to_start(get_hard_smp_processor_id(lcpu));
}
/* Fixup atomic count: it exited inside IRQ handler. */
- paca[lcpu].xCurrent->thread_info->preempt_count = 0;
- /* Fixup SLB round-robin so next segment (kernel) goes in segment 0 */
- paca[lcpu].xStab_data.next_round_robin = 0;
+ paca[lcpu].__current->thread_info->preempt_count = 0;
/* At boot this is done in prom.c. */
- paca[lcpu].xHwProcNum = pcpu;
+ paca[lcpu].hw_cpu_id = pcpu;
status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL,
pcpu, start_here, lcpu);
}
return 1;
}
-
-static inline void look_for_more_cpus(void)
-{
- int num_addr_cell, num_size_cell, len, i, maxcpus;
- struct device_node *np;
- unsigned int *ireg;
-
- /* Find the property which will tell us about how many CPUs
- * we're allowed to have. */
- if ((np = find_path_device("/rtas")) == NULL) {
- printk(KERN_ERR "Could not find /rtas in device tree!");
- return;
- }
- num_addr_cell = prom_n_addr_cells(np);
- num_size_cell = prom_n_size_cells(np);
-
- ireg = (unsigned int *)get_property(np, "ibm,lrdr-capacity", &len);
- if (ireg == NULL) {
- /* FIXME: make sure not marked as lrdr_capable() */
- return;
- }
-
- maxcpus = ireg[num_addr_cell + num_size_cell];
- /* DRENG need to account for threads here too */
-
- if (maxcpus > NR_CPUS) {
- printk(KERN_WARNING
- "Partition configured for %d cpus, "
- "operating system maximum is %d.\n", maxcpus, NR_CPUS);
- maxcpus = NR_CPUS;
- } else
- printk(KERN_INFO "Partition configured for %d cpus.\n",
- maxcpus);
-
- /* Make those cpus (which might appear later) possible too. */
- for (i = 0; i < maxcpus; i++)
- cpu_set(i, cpu_possible_map);
-}
#else /* ... CONFIG_HOTPLUG_CPU */
static inline int __devinit smp_startup_cpu(unsigned int lcpu)
{
return 1;
}
-static inline void look_for_more_cpus(void)
-{
-}
#endif /* CONFIG_HOTPLUG_CPU */
static void smp_pSeries_kick_cpu(int nr)
if (!smp_startup_cpu(nr))
return;
- /* The processor is currently spinning, waiting
- * for the xProcStart field to become non-zero
- * After we set xProcStart, the processor will
- * continue on to secondary_start
+ /*
+ * The processor is currently spinning, waiting for the
+ * cpu_start field to become non-zero After we set cpu_start,
+ * the processor will continue on to secondary_start
*/
- paca[nr].xProcStart = 1;
+ paca[nr].cpu_start = 1;
}
#endif /* CONFIG_PPC_PSERIES */
}
#ifdef CONFIG_PPC_PSERIES
-void vpa_init(int cpu)
+static void vpa_init(int cpu)
{
- unsigned long flags;
+ unsigned long flags, pcpu = get_hard_smp_processor_id(cpu);
/* Register the Virtual Processor Area (VPA) */
- printk(KERN_INFO "register_vpa: cpu 0x%x\n", cpu);
flags = 1UL << (63 - 18);
- paca[cpu].xLpPaca.xSLBCount = 64; /* SLB restore highwater mark */
- register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].xLpPaca)));
+ register_vpa(flags, pcpu, __pa((unsigned long)&(paca[cpu].lppaca)));
}
static inline void smp_xics_do_message(int cpu, int msg)
/* This is called very early */
void __init smp_init_pSeries(void)
{
+ int ret, i;
+
+ DBG(" -> smp_init_pSeries()\n");
if (naca->interrupt_controller == IC_OPEN_PIC)
smp_ops = &pSeries_openpic_smp_ops;
else
smp_ops = &pSeries_xics_smp_ops;
+ /* Start secondary threads on SMT systems; primary threads
+ * are already in the running state.
+ */
+ for_each_present_cpu(i) {
+ if (query_cpu_stopped(get_hard_smp_processor_id(i)) == 0) {
+ printk("%16.16x : starting thread\n", i);
+ DBG("%16.16x : starting thread\n", i);
+ rtas_call(rtas_token("start-cpu"), 3, 1, &ret,
+ get_hard_smp_processor_id(i),
+ __pa((u32)*((unsigned long *)
+ pseries_secondary_smp_init)),
+ i);
+ }
+ }
+
+ if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
+ vpa_init(boot_cpuid);
+
/* Non-lpar has additional take/give timebase */
if (systemcfg->platform == PLATFORM_PSERIES) {
smp_ops->give_timebase = pSeries_give_timebase;
smp_ops->take_timebase = pSeries_take_timebase;
}
+
+
+ DBG(" <- smp_init_pSeries()\n");
}
-#endif
+#endif /* CONFIG_PPC_PSERIES */
void smp_local_timer_interrupt(struct pt_regs * regs)
{
- if (!--(get_paca()->prof_counter)) {
- update_process_times(user_mode(regs));
- (get_paca()->prof_counter)=get_paca()->prof_multiplier;
- }
+ update_process_times(user_mode(regs));
}
void smp_message_recv(int msg, struct pt_regs *regs)
printk("smp_call_function on cpu %d: other cpus not "
"responding (%d)\n", smp_processor_id(),
atomic_read(&data.started));
- debugger(0);
+ debugger(NULL);
goto out;
}
}
smp_processor_id(),
atomic_read(&data.finished),
atomic_read(&data.started));
- debugger(0);
+ debugger(NULL);
goto out;
}
}
return ret;
}
+EXPORT_SYMBOL(smp_call_function);
+
void smp_call_function_interrupt(void)
{
void (*func) (void *info);
static void __devinit smp_store_cpu_info(int id)
{
- per_cpu(pvr, id) = _get_PVR();
+ per_cpu(pvr, id) = mfspr(SPRN_PVR);
}
static void __init smp_create_idle(unsigned int cpu)
{
- struct pt_regs regs;
struct task_struct *p;
/* create a process for the processor */
- /* only regs.msr is actually used, and 0 is OK for it */
- memset(®s, 0, sizeof(struct pt_regs));
- p = copy_process(CLONE_VM | CLONE_IDLETASK,
- 0, ®s, 0, NULL, NULL);
+ p = fork_idle(cpu);
if (IS_ERR(p))
panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
-
- wake_up_forked_process(p);
- init_idle(p, cpu);
- unhash_process(p);
-
- paca[cpu].xCurrent = p;
+ paca[cpu].__current = p;
current_set[cpu] = p->thread_info;
}
/* Fixup boot cpu */
smp_store_cpu_info(boot_cpuid);
cpu_callin_map[boot_cpuid] = 1;
- paca[boot_cpuid].prof_counter = 1;
- paca[boot_cpuid].prof_multiplier = 1;
#ifndef CONFIG_PPC_ISERIES
paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb();
* number of msecs off until someone does a settimeofday()
*/
do_gtod.tb_orig_stamp = tb_last_stamp;
-
- look_for_more_cpus();
+ systemcfg->tb_orig_stamp = tb_last_stamp;
#endif
max_cpus = smp_ops->probe();
{
BUG_ON(smp_processor_id() != boot_cpuid);
- /* cpu_possible is set up in prom.c */
cpu_set(boot_cpuid, cpu_online_map);
- paca[boot_cpuid].xCurrent = current;
+ paca[boot_cpuid].__current = current;
current_set[boot_cpuid] = current->thread_info;
}
int c;
/* At boot, don't bother with non-present cpus -JSCHOPP */
- if (system_state == SYSTEM_BOOTING && !cpu_present_at_boot(cpu))
+ if (system_state < SYSTEM_RUNNING && !cpu_present(cpu))
return -ENOENT;
- paca[cpu].prof_counter = 1;
- paca[cpu].prof_multiplier = 1;
paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock;
if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) {
tmp = &stab_array[PAGE_SIZE * cpu];
memset(tmp, 0, PAGE_SIZE);
- paca[cpu].xStab_data.virt = (unsigned long)tmp;
- paca[cpu].xStab_data.real = virt_to_abs(tmp);
+ paca[cpu].stab_addr = (unsigned long)tmp;
+ paca[cpu].stab_real = virt_to_abs(tmp);
}
/* The information for processor bringup must
* use this value that I found through experimentation.
* -- Cort
*/
- if (system_state == SYSTEM_BOOTING)
+ if (system_state < SYSTEM_RUNNING)
for (c = 5000; c && !cpu_callin_map[cpu]; c--)
udelay(100);
#ifdef CONFIG_HOTPLUG_CPU
if (smp_ops->give_timebase)
smp_ops->give_timebase();
- cpu_set(cpu, cpu_online_map);
+
+ /* Wait until cpu puts itself in the online map */
+ while (!cpu_online(cpu))
+ cpu_relax();
+
return 0;
}
if (smp_ops->take_timebase)
smp_ops->take_timebase();
- get_paca()->yielded = 0;
-
#ifdef CONFIG_PPC_PSERIES
if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
vpa_init(cpu);
#endif
#endif
+ spin_lock(&call_lock);
+ cpu_set(cpu, cpu_online_map);
+ spin_unlock(&call_lock);
+
local_irq_enable();
return cpu_idle(NULL);
smp_threads_ready = 1;
set_cpus_allowed(current, old_mask);
-}
-#ifdef CONFIG_SCHED_SMT
-#ifdef CONFIG_NUMA
-static struct sched_group sched_group_cpus[NR_CPUS];
-static struct sched_group sched_group_phys[NR_CPUS];
-static struct sched_group sched_group_nodes[MAX_NUMNODES];
-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
-__init void arch_init_sched_domains(void)
-{
- int i;
- struct sched_group *first = NULL, *last = NULL;
-
- /* Set up domains */
- for_each_cpu(i) {
- struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
- struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
- struct sched_domain *node_domain = &per_cpu(node_domains, i);
- int node = cpu_to_node(i);
- cpumask_t nodemask = node_to_cpumask(node);
- cpumask_t my_cpumask = cpumask_of_cpu(i);
- cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
-
- *cpu_domain = SD_SIBLING_INIT;
- if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
- cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
- else
- cpu_domain->span = my_cpumask;
- cpu_domain->parent = phys_domain;
- cpu_domain->groups = &sched_group_cpus[i];
-
- *phys_domain = SD_CPU_INIT;
- phys_domain->span = nodemask;
- phys_domain->parent = node_domain;
- phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
-
- *node_domain = SD_NODE_INIT;
- node_domain->span = cpu_possible_map;
- node_domain->groups = &sched_group_nodes[node];
- }
-
- /* Set up CPU (sibling) groups */
- for_each_cpu(i) {
- struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
- int j;
- first = last = NULL;
-
- if (i != first_cpu(cpu_domain->span))
- continue;
-
- for_each_cpu_mask(j, cpu_domain->span) {
- struct sched_group *cpu = &sched_group_cpus[j];
-
- cpus_clear(cpu->cpumask);
- cpu_set(j, cpu->cpumask);
- cpu->cpu_power = SCHED_LOAD_SCALE;
-
- if (!first)
- first = cpu;
- if (last)
- last->next = cpu;
- last = cpu;
- }
- last->next = first;
- }
-
- for (i = 0; i < MAX_NUMNODES; i++) {
- int j;
- cpumask_t nodemask;
- struct sched_group *node = &sched_group_nodes[i];
- cpumask_t node_cpumask = node_to_cpumask(i);
- cpus_and(nodemask, node_cpumask, cpu_possible_map);
-
- if (cpus_empty(nodemask))
- continue;
-
- first = last = NULL;
- /* Set up physical groups */
- for_each_cpu_mask(j, nodemask) {
- struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
- struct sched_group *cpu = &sched_group_phys[j];
-
- if (j != first_cpu(cpu_domain->span))
- continue;
-
- cpu->cpumask = cpu_domain->span;
- /*
- * Make each extra sibling increase power by 10% of
- * the basic CPU. This is very arbitrary.
- */
- cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
- node->cpu_power += cpu->cpu_power;
-
- if (!first)
- first = cpu;
- if (last)
- last->next = cpu;
- last = cpu;
- }
- last->next = first;
- }
-
- /* Set up nodes */
- first = last = NULL;
- for (i = 0; i < MAX_NUMNODES; i++) {
- struct sched_group *cpu = &sched_group_nodes[i];
- cpumask_t nodemask;
- cpumask_t node_cpumask = node_to_cpumask(i);
- cpus_and(nodemask, node_cpumask, cpu_possible_map);
-
- if (cpus_empty(nodemask))
- continue;
-
- cpu->cpumask = nodemask;
- /* ->cpu_power already setup */
-
- if (!first)
- first = cpu;
- if (last)
- last->next = cpu;
- last = cpu;
- }
- last->next = first;
-
- mb();
- for_each_cpu(i) {
- struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
- cpu_attach_domain(cpu_domain, i);
- }
-}
-#else /* !CONFIG_NUMA */
-static struct sched_group sched_group_cpus[NR_CPUS];
-static struct sched_group sched_group_phys[NR_CPUS];
-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-__init void arch_init_sched_domains(void)
-{
- int i;
- struct sched_group *first = NULL, *last = NULL;
-
- /* Set up domains */
- for_each_cpu(i) {
- struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
- struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
- cpumask_t my_cpumask = cpumask_of_cpu(i);
- cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
-
- *cpu_domain = SD_SIBLING_INIT;
- if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
- cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
- else
- cpu_domain->span = my_cpumask;
- cpu_domain->parent = phys_domain;
- cpu_domain->groups = &sched_group_cpus[i];
-
- *phys_domain = SD_CPU_INIT;
- phys_domain->span = cpu_possible_map;
- phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
- }
-
- /* Set up CPU (sibling) groups */
- for_each_cpu(i) {
- struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
- int j;
- first = last = NULL;
-
- if (i != first_cpu(cpu_domain->span))
- continue;
-
- for_each_cpu_mask(j, cpu_domain->span) {
- struct sched_group *cpu = &sched_group_cpus[j];
-
- cpus_clear(cpu->cpumask);
- cpu_set(j, cpu->cpumask);
- cpu->cpu_power = SCHED_LOAD_SCALE;
-
- if (!first)
- first = cpu;
- if (last)
- last->next = cpu;
- last = cpu;
- }
- last->next = first;
- }
-
- first = last = NULL;
- /* Set up physical groups */
- for_each_cpu(i) {
- struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
- struct sched_group *cpu = &sched_group_phys[i];
-
- if (i != first_cpu(cpu_domain->span))
- continue;
-
- cpu->cpumask = cpu_domain->span;
- /* See SMT+NUMA setup for comment */
- cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
-
- if (!first)
- first = cpu;
- if (last)
- last->next = cpu;
- last = cpu;
- }
- last->next = first;
-
- mb();
- for_each_cpu(i) {
- struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
- cpu_attach_domain(cpu_domain, i);
- }
+ /*
+ * We know at boot the maximum number of cpus we can add to
+ * a partition and set cpu_possible_map accordingly. cpu_present_map
+ * needs to match for the hotplug code to allow us to hot add
+ * any offline cpus.
+ */
+ cpu_present_map = cpu_possible_map;
}
-#endif /* CONFIG_NUMA */
-#endif /* CONFIG_SCHED_SMT */