X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fppc64%2Fkernel%2Fsmp.c;h=1f3003408930aedb3ae1e78b8a04d7258edff10b;hb=9bf4aaab3e101692164d49b7ca357651eb691cb6;hp=2447cce8ba92ed40cd2c407952a78e3c1d0c327d;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git

diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c
index 2447cce8b..1f3003408 100644
--- a/arch/ppc64/kernel/smp.c
+++ b/arch/ppc64/kernel/smp.c
@@ -52,6 +52,7 @@
 #include <asm/xics.h>
 #include <asm/cputable.h>
 #include <asm/system.h>
+#include <asm/rtas.h>
 
 int smp_threads_ready;
 unsigned long cache_decay_ticks;
@@ -119,12 +120,10 @@ static void smp_iSeries_message_pass(int target, int msg)
 static int smp_iSeries_numProcs(void)
 {
 	unsigned np, i;
-	struct ItLpPaca * lpPaca;
 
 	np = 0;
         for (i=0; i < NR_CPUS; ++i) {
-                lpPaca = paca[i].xLpPacaPtr;
-                if ( lpPaca->xDynProcStatus < 2 ) {
+                if (paca[i].lppaca.xDynProcStatus < 2) {
 			cpu_set(i, cpu_available_map);
 			cpu_set(i, cpu_possible_map);
 			cpu_set(i, cpu_present_at_boot);
@@ -138,11 +137,9 @@ static int smp_iSeries_probe(void)
 {
 	unsigned i;
 	unsigned np = 0;
-	struct ItLpPaca *lpPaca;
 
 	for (i=0; i < NR_CPUS; ++i) {
-		lpPaca = paca[i].xLpPacaPtr;
-		if (lpPaca->xDynProcStatus < 2) {
+		if (paca[i].lppaca.xDynProcStatus < 2) {
 			/*paca[i].active = 1;*/
 			++np;
 		}
@@ -153,21 +150,18 @@ static int smp_iSeries_probe(void)
 
 static void smp_iSeries_kick_cpu(int nr)
 {
-	struct ItLpPaca *lpPaca;
-
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	/* Verify that our partition has a processor nr */
-	lpPaca = paca[nr].xLpPacaPtr;
-	if (lpPaca->xDynProcStatus >= 2)
+	if (paca[nr].lppaca.xDynProcStatus >= 2)
 		return;
 
 	/* The processor is currently spinning, waiting
-	 * for the xProcStart field to become non-zero
-	 * After we set xProcStart, the processor will
+	 * for the cpu_start field to become non-zero
+	 * After we set cpu_start, the processor will
 	 * continue on to secondary_start in iSeries_head.S
 	 */
-	paca[nr].xProcStart = 1;
+	paca[nr].cpu_start = 1;
 }
 
 static void __devinit smp_iSeries_setup_cpu(int nr)
@@ -241,7 +235,7 @@ static void __devinit smp_openpic_setup_cpu(int cpu)
  */
 static int query_cpu_stopped(unsigned int pcpu)
 {
-	long cpu_status;
+	int cpu_status;
 	int status, qcss_tok;
 
 	qcss_tok = rtas_token("query-cpu-stopped-state");
@@ -296,7 +290,7 @@ void __cpu_die(unsigned int cpu)
 	 * done here.  Change isolate state to Isolate and
 	 * change allocation-state to Unusable.
 	 */
-	paca[cpu].xProcStart = 0;
+	paca[cpu].cpu_start = 0;
 
 	/* So we can recognize if it fails to come up next time. */
 	cpu_callin_map[cpu] = 0;
@@ -306,6 +300,10 @@ void __cpu_die(unsigned int cpu)
 void cpu_die(void)
 {
 	local_irq_disable();
+	/* Some hardware requires clearing the CPPR, while other hardware does not
+	 * it is safe either way
+	 */
+	pSeriesLP_cppr_info(0, 0);
 	rtas_stop_self();
 	/* Should never get here... */
 	BUG();
@@ -390,13 +388,10 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	}
 
 	/* Fixup atomic count: it exited inside IRQ handler. */
-	((struct task_struct *)paca[lcpu].xCurrent)->thread_info->preempt_count
-		= 0;
-	/* Fixup SLB round-robin so next segment (kernel) goes in segment 0 */
-	paca[lcpu].xStab_data.next_round_robin = 0;
+	paca[lcpu].__current->thread_info->preempt_count	= 0;
 
 	/* At boot this is done in prom.c. */
-	paca[lcpu].xHwProcNum = pcpu;
+	paca[lcpu].hw_cpu_id = pcpu;
 
 	status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL,
 			   pcpu, start_here, lcpu);
@@ -429,7 +424,11 @@ static inline void look_for_more_cpus(void)
 	}
 
 	maxcpus = ireg[num_addr_cell + num_size_cell];
-	/* DRENG need to account for threads here too */
+
+	/* Double maxcpus for processors which have SMT capability */
+	if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
+		maxcpus *= 2;
+
 
 	if (maxcpus > NR_CPUS) {
 		printk(KERN_WARNING
@@ -461,12 +460,12 @@ static void smp_pSeries_kick_cpu(int nr)
 	if (!smp_startup_cpu(nr))
 		return;
 
-	/* The processor is currently spinning, waiting
-	 * for the xProcStart field to become non-zero
-	 * After we set xProcStart, the processor will
-	 * continue on to secondary_start
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
 	 */
-	paca[nr].xProcStart = 1;
+	paca[nr].cpu_start = 1;
 }
 #endif /* CONFIG_PPC_PSERIES */
 
@@ -491,10 +490,8 @@ void vpa_init(int cpu)
 	unsigned long flags;
 
 	/* Register the Virtual Processor Area (VPA) */
-	printk(KERN_INFO "register_vpa: cpu 0x%x\n", cpu);
 	flags = 1UL << (63 - 18);
-	paca[cpu].xLpPaca.xSLBCount = 64; /* SLB restore highwater mark */
-	register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].xLpPaca))); 
+	register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].lppaca)));
 }
 
 static inline void smp_xics_do_message(int cpu, int msg)
@@ -618,7 +615,7 @@ void smp_message_recv(int msg, struct pt_regs *regs)
 #endif
 #ifdef CONFIG_DEBUGGER
 	case PPC_MSG_DEBUGGER_BREAK:
-		debugger(regs);
+		debugger_ipi(regs);
 		break;
 #endif
 	default:
@@ -692,6 +689,9 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 	int ret = -1, cpus;
 	unsigned long timeout;
 
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
 	data.func = func;
 	data.info = info;
 	atomic_set(&data.started, 0);
@@ -721,7 +721,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 			printk("smp_call_function on cpu %d: other cpus not "
 			       "responding (%d)\n", smp_processor_id(),
 			       atomic_read(&data.started));
-			debugger(0);
+			debugger(NULL);
 			goto out;
 		}
 	}
@@ -736,7 +736,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 				       smp_processor_id(),
 				       atomic_read(&data.finished),
 				       atomic_read(&data.started));
-				debugger(0);
+				debugger(NULL);
 				goto out;
 			}
 		}
@@ -814,7 +814,7 @@ static void __init smp_create_idle(unsigned int cpu)
 	init_idle(p, cpu);
 	unhash_process(p);
 
-	paca[cpu].xCurrent = (u64)p;
+	paca[cpu].__current = p;
 	current_set[cpu] = p->thread_info;
 }
 
@@ -834,11 +834,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	paca[boot_cpuid].prof_counter = 1;
 	paca[boot_cpuid].prof_multiplier = 1;
 
-	/*
-	 * XXX very rough. 
-	 */
-	cache_decay_ticks = HZ/100;
-
 #ifndef CONFIG_PPC_ISERIES
 	paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb();
 
@@ -871,7 +866,7 @@ void __devinit smp_prepare_boot_cpu(void)
 	/* cpu_possible is set up in prom.c */
 	cpu_set(boot_cpuid, cpu_online_map);
 
-	paca[boot_cpuid].xCurrent = (u64)current;
+	paca[boot_cpuid].__current = current;
 	current_set[boot_cpuid] = current->thread_info;
 }
 
@@ -896,8 +891,8 @@ int __devinit __cpu_up(unsigned int cpu)
 
 		tmp = &stab_array[PAGE_SIZE * cpu];
 		memset(tmp, 0, PAGE_SIZE); 
-		paca[cpu].xStab_data.virt = (unsigned long)tmp;
-		paca[cpu].xStab_data.real = virt_to_abs(tmp);
+		paca[cpu].stab_addr = (unsigned long)tmp;
+		paca[cpu].stab_real = virt_to_abs(tmp);
 	}
 
 	/* The information for processor bringup must
@@ -914,8 +909,20 @@ int __devinit __cpu_up(unsigned int cpu)
 	 * use this value that I found through experimentation.
 	 * -- Cort
 	 */
-	for (c = 5000; c && !cpu_callin_map[cpu]; c--)
-		udelay(100);
+	if (system_state == SYSTEM_BOOTING)
+		for (c = 5000; c && !cpu_callin_map[cpu]; c--)
+			udelay(100);
+#ifdef CONFIG_HOTPLUG_CPU
+	else
+		/*
+		 * CPUs can take much longer to come up in the
+		 * hotplug case.  Wait five seconds.
+		 */
+		for (c = 25; c && !cpu_callin_map[cpu]; c--) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(HZ/5);
+		}
+#endif
 
 	if (!cpu_callin_map[cpu]) {
 		printk("Processor %u is stuck.\n", cpu);
@@ -926,7 +933,11 @@ int __devinit __cpu_up(unsigned int cpu)
 
 	if (smp_ops->give_timebase)
 		smp_ops->give_timebase();
-	cpu_set(cpu, cpu_online_map);
+
+	/* Wait until cpu puts itself in the online map */
+	while (!cpu_online(cpu))
+		cpu_relax();
+
 	return 0;
 }
 
@@ -947,8 +958,6 @@ int __devinit start_secondary(void *unused)
 	if (smp_ops->take_timebase)
 		smp_ops->take_timebase();
 
-	get_paca()->yielded = 0;
-
 #ifdef CONFIG_PPC_PSERIES
 	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
 		vpa_init(cpu); 
@@ -964,6 +973,10 @@ int __devinit start_secondary(void *unused)
 #endif
 #endif
 
+	spin_lock(&call_lock);
+	cpu_set(cpu, cpu_online_map);
+	spin_unlock(&call_lock);
+
 	local_irq_enable();
 
 	return cpu_idle(NULL);
@@ -992,3 +1005,218 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 	set_cpus_allowed(current, old_mask);
 }
+
+#ifdef CONFIG_SCHED_SMT
+#ifdef CONFIG_NUMA
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static struct sched_group sched_group_nodes[MAX_NUMNODES];
+static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct sched_domain, node_domains);
+__init void arch_init_sched_domains(void)
+{
+	int i;
+	struct sched_group *first = NULL, *last = NULL;
+
+	/* Set up domains */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+		struct sched_domain *node_domain = &per_cpu(node_domains, i);
+		int node = cpu_to_node(i);
+		cpumask_t nodemask = node_to_cpumask(node);
+		cpumask_t my_cpumask = cpumask_of_cpu(i);
+		cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
+
+		*cpu_domain = SD_SIBLING_INIT;
+		if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
+			cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
+		else
+			cpu_domain->span = my_cpumask;
+		cpu_domain->parent = phys_domain;
+		cpu_domain->groups = &sched_group_cpus[i];
+
+		*phys_domain = SD_CPU_INIT;
+		phys_domain->span = nodemask;
+		phys_domain->parent = node_domain;
+		phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
+
+		*node_domain = SD_NODE_INIT;
+		node_domain->span = cpu_possible_map;
+		node_domain->groups = &sched_group_nodes[node];
+	}
+
+	/* Set up CPU (sibling) groups */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		int j;
+		first = last = NULL;
+
+		if (i != first_cpu(cpu_domain->span))
+			continue;
+
+		for_each_cpu_mask(j, cpu_domain->span) {
+			struct sched_group *cpu = &sched_group_cpus[j];
+
+			cpus_clear(cpu->cpumask);
+			cpu_set(j, cpu->cpumask);
+			cpu->cpu_power = SCHED_LOAD_SCALE;
+
+			if (!first)
+				first = cpu;
+			if (last)
+				last->next = cpu;
+			last = cpu;
+		}
+		last->next = first;
+	}
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		int j;
+		cpumask_t nodemask;
+		struct sched_group *node = &sched_group_nodes[i];
+		cpumask_t node_cpumask = node_to_cpumask(i);
+		cpus_and(nodemask, node_cpumask, cpu_possible_map);
+
+		if (cpus_empty(nodemask))
+			continue;
+
+		first = last = NULL;
+		/* Set up physical groups */
+		for_each_cpu_mask(j, nodemask) {
+			struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
+			struct sched_group *cpu = &sched_group_phys[j];
+
+			if (j != first_cpu(cpu_domain->span))
+				continue;
+
+			cpu->cpumask = cpu_domain->span;
+			/*
+			 * Make each extra sibling increase power by 10% of
+			 * the basic CPU. This is very arbitrary.
+			 */
+			cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
+			node->cpu_power += cpu->cpu_power;
+
+			if (!first)
+				first = cpu;
+			if (last)
+				last->next = cpu;
+			last = cpu;
+		}
+		last->next = first;
+	}
+
+	/* Set up nodes */
+	first = last = NULL;
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		struct sched_group *cpu = &sched_group_nodes[i];
+		cpumask_t nodemask;
+		cpumask_t node_cpumask = node_to_cpumask(i);
+		cpus_and(nodemask, node_cpumask, cpu_possible_map);
+
+		if (cpus_empty(nodemask))
+			continue;
+
+		cpu->cpumask = nodemask;
+		/* ->cpu_power already setup */
+
+		if (!first)
+			first = cpu;
+		if (last)
+			last->next = cpu;
+		last = cpu;
+	}
+	last->next = first;
+
+	mb();
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		cpu_attach_domain(cpu_domain, i);
+	}
+}
+#else /* !CONFIG_NUMA */
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+__init void arch_init_sched_domains(void)
+{
+	int i;
+	struct sched_group *first = NULL, *last = NULL;
+
+	/* Set up domains */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+		cpumask_t my_cpumask = cpumask_of_cpu(i);
+		cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
+
+		*cpu_domain = SD_SIBLING_INIT;
+		if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
+			cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
+		else
+			cpu_domain->span = my_cpumask;
+		cpu_domain->parent = phys_domain;
+		cpu_domain->groups = &sched_group_cpus[i];
+
+		*phys_domain = SD_CPU_INIT;
+		phys_domain->span = cpu_possible_map;
+		phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
+	}
+
+	/* Set up CPU (sibling) groups */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		int j;
+		first = last = NULL;
+
+		if (i != first_cpu(cpu_domain->span))
+			continue;
+
+		for_each_cpu_mask(j, cpu_domain->span) {
+			struct sched_group *cpu = &sched_group_cpus[j];
+
+			cpus_clear(cpu->cpumask);
+			cpu_set(j, cpu->cpumask);
+			cpu->cpu_power = SCHED_LOAD_SCALE;
+
+			if (!first)
+				first = cpu;
+			if (last)
+				last->next = cpu;
+			last = cpu;
+		}
+		last->next = first;
+	}
+
+	first = last = NULL;
+	/* Set up physical groups */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		struct sched_group *cpu = &sched_group_phys[i];
+
+		if (i != first_cpu(cpu_domain->span))
+			continue;
+
+		cpu->cpumask = cpu_domain->span;
+		/* See SMT+NUMA setup for comment */
+		cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
+
+		if (!first)
+			first = cpu;
+		if (last)
+			last->next = cpu;
+		last = cpu;
+	}
+	last->next = first;
+
+	mb();
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		cpu_attach_domain(cpu_domain, i);
+	}
+}
+#endif /* CONFIG_NUMA */
+#endif /* CONFIG_SCHED_SMT */