X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fsparc64%2Fkernel%2Fsmp.c;h=f2bbaafd8d57ab69a2ab7c9b807edbb9ca37063b;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=64b873212243f7d6adfa8d62db5a15779c7602c9;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git

diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 64b873212..f2bbaafd8 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -19,6 +19,7 @@
 #include <linux/seq_file.h>
 #include <linux/cache.h>
 #include <linux/jiffies.h>
+#include <linux/profile.h>
 
 #include <asm/head.h>
 #include <asm/ptrace.h>
@@ -31,10 +32,10 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/oplib.h>
-#include <asm/hardirq.h>
 #include <asm/uaccess.h>
 #include <asm/timer.h>
 #include <asm/starfire.h>
+#include <asm/tlb.h>
 
 extern int linux_num_cpus;
 extern void calibrate_delay(void);
@@ -302,14 +303,7 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
 	struct task_struct *p;
 	int timeout, ret, cpu_node;
 
-	kernel_thread(NULL, NULL, CLONE_IDLETASK);
-
-	p = prev_task(&init_task);
-
-	init_idle(p, cpu);
-
-	unhash_process(p);
-
+	p = fork_idle(cpu);
 	callin_flag = 0;
 	cpu_new_thread = p->thread_info;
 	cpu_set(cpu, cpu_callout_map);
@@ -406,23 +400,14 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
 	int i;
 
 	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_isset(i, mask)) {
-			spitfire_xcall_helper(data0, data1, data2, pstate, i);
-			cpu_clear(i, mask);
-			if (cpus_empty(mask))
-				break;
-		}
-	}
+	for_each_cpu_mask(i, mask)
+		spitfire_xcall_helper(data0, data1, data2, pstate, i);
 }
 
 /* Cheetah now allows to send the whole 64-bytes of data in the interrupt
  * packet, but we have no use for that.  However we do take advantage of
  * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
  */
-#if NR_CPUS > 32
-#error Fixup cheetah_xcall_deliver Dave...
-#endif
 static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
 {
 	u64 pstate, ver;
@@ -456,25 +441,19 @@ retry:
 
 	nack_busy_id = 0;
 	{
-		cpumask_t work_mask = mask;
 		int i;
 
-		for (i = 0; i < NR_CPUS; i++) {
-			if (cpu_isset(i, work_mask)) {
-				u64 target = (i << 14) | 0x70;
-
-				if (!is_jalapeno)
-					target |= (nack_busy_id << 24);
-				__asm__ __volatile__(
-					"stxa	%%g0, [%0] %1\n\t"
-					"membar	#Sync\n\t"
-					: /* no outputs */
-					: "r" (target), "i" (ASI_INTR_W));
-				nack_busy_id++;
- 				cpu_clear(i, work_mask);
-				if (cpus_empty(work_mask))
-					break;
-			}
+		for_each_cpu_mask(i, mask) {
+			u64 target = (i << 14) | 0x70;
+
+			if (!is_jalapeno)
+				target |= (nack_busy_id << 24);
+			__asm__ __volatile__(
+				"stxa	%%g0, [%0] %1\n\t"
+				"membar	#Sync\n\t"
+				: /* no outputs */
+				: "r" (target), "i" (ASI_INTR_W));
+			nack_busy_id++;
 		}
 	}
 
@@ -507,7 +486,6 @@ retry:
 			printk("CPU[%d]: mondo stuckage result[%016lx]\n",
 			       smp_processor_id(), dispatch_stat);
 		} else {
-			cpumask_t work_mask = mask;
 			int i, this_busy_nack = 0;
 
 			/* Delay some random time with interrupts enabled
@@ -518,22 +496,17 @@ retry:
 			/* Clear out the mask bits for cpus which did not
 			 * NACK us.
 			 */
-			for (i = 0; i < NR_CPUS; i++) {
-				if (cpu_isset(i, work_mask)) {
-					u64 check_mask;
-
-					if (is_jalapeno)
-						check_mask = (0x2UL << (2*i));
-					else
-						check_mask = (0x2UL <<
-							      this_busy_nack);
-					if ((dispatch_stat & check_mask) == 0)
-						cpu_clear(i, mask);
-					this_busy_nack += 2;
-					cpu_clear(i, work_mask);
-					if (cpus_empty(work_mask))
-						break;
-				}
+			for_each_cpu_mask(i, mask) {
+				u64 check_mask;
+
+				if (is_jalapeno)
+					check_mask = (0x2UL << (2*i));
+				else
+					check_mask = (0x2UL <<
+						      this_busy_nack);
+				if ((dispatch_stat & check_mask) == 0)
+					cpu_clear(i, mask);
+				this_busy_nack += 2;
 			}
 
 			goto retry;
@@ -656,9 +629,8 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
 	}
 }
 
-extern unsigned long xcall_flush_tlb_page;
 extern unsigned long xcall_flush_tlb_mm;
-extern unsigned long xcall_flush_tlb_range;
+extern unsigned long xcall_flush_tlb_pending;
 extern unsigned long xcall_flush_tlb_kernel_range;
 extern unsigned long xcall_flush_tlb_all_spitfire;
 extern unsigned long xcall_flush_tlb_all_cheetah;
@@ -675,13 +647,13 @@ extern atomic_t dcpage_flushes_xcall;
 static __inline__ void __local_flush_dcache_page(struct page *page)
 {
 #if (L1DCACHE_SIZE > PAGE_SIZE)
-	__flush_dcache_page(page->virtual,
+	__flush_dcache_page(page_address(page),
 			    ((tlb_type == spitfire) &&
 			     page_mapping(page) != NULL));
 #else
 	if (page_mapping(page) != NULL &&
 	    tlb_type == spitfire)
-		__flush_icache_page(__pa(page->virtual));
+		__flush_icache_page(__pa(page_address(page)));
 #endif
 }
 
@@ -696,6 +668,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
 	if (cpu == this_cpu) {
 		__local_flush_dcache_page(page);
 	} else if (cpu_online(cpu)) {
+		void *pg_addr = page_address(page);
 		u64 data0;
 
 		if (tlb_type == spitfire) {
@@ -704,14 +677,14 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
 			if (page_mapping(page) != NULL)
 				data0 |= ((u64)1 << 32);
 			spitfire_xcall_deliver(data0,
-					       __pa(page->virtual),
-					       (u64) page->virtual,
+					       __pa(pg_addr),
+					       (u64) pg_addr,
 					       mask);
 		} else {
 			data0 =
 				((u64)&xcall_flush_dcache_page_cheetah);
 			cheetah_xcall_deliver(data0,
-					      __pa(page->virtual),
+					      __pa(pg_addr),
 					      0, mask);
 		}
 #ifdef CONFIG_DEBUG_DCFLUSH
@@ -724,6 +697,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
 
 void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
 {
+	void *pg_addr = page_address(page);
 	cpumask_t mask = cpu_online_map;
 	u64 data0;
 	int this_cpu = get_cpu();
@@ -740,13 +714,13 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
 		if (page_mapping(page) != NULL)
 			data0 |= ((u64)1 << 32);
 		spitfire_xcall_deliver(data0,
-				       __pa(page->virtual),
-				       (u64) page->virtual,
+				       __pa(pg_addr),
+				       (u64) pg_addr,
 				       mask);
 	} else {
 		data0 = ((u64)&xcall_flush_dcache_page_cheetah);
 		cheetah_xcall_deliver(data0,
-				      __pa(page->virtual),
+				      __pa(pg_addr),
 				      0, mask);
 	}
 #ifdef CONFIG_DEBUG_DCFLUSH
@@ -854,7 +828,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
 		int cpu = get_cpu();
 
 		if (atomic_read(&mm->mm_users) == 1) {
-			/* See smp_flush_tlb_page for info about this. */
 			mm->cpu_vm_mask = cpumask_of_cpu(cpu);
 			goto local_flush_and_out;
 		}
@@ -870,27 +843,40 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
 	}
 }
 
-void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start,
-			 unsigned long end)
+void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
 {
 	u32 ctx = CTX_HWBITS(mm->context);
 	int cpu = get_cpu();
 
-	start &= PAGE_MASK;
-	end    = PAGE_ALIGN(end);
-
 	if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) {
 		mm->cpu_vm_mask = cpumask_of_cpu(cpu);
 		goto local_flush_and_out;
+	} else {
+		/* This optimization is not valid.  Normally
+		 * we will be holding the page_table_lock, but
+		 * there is an exception which is copy_page_range()
+		 * when forking.  The lock is held during the individual
+		 * page table updates in the parent, but not at the
+		 * top level, which is where we are invoked.
+		 */
+		if (0) {
+			cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
+
+			/* By virtue of running under the mm->page_table_lock,
+			 * and mmu_context.h:switch_mm doing the same, the
+			 * following operation is safe.
+			 */
+			if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
+				goto local_flush_and_out;
+		}
 	}
 
-	smp_cross_call_masked(&xcall_flush_tlb_range,
-			      ctx, start, end,
+	smp_cross_call_masked(&xcall_flush_tlb_pending,
+			      ctx, nr, (unsigned long) vaddrs,
 			      mm->cpu_vm_mask);
 
- local_flush_and_out:
-	__flush_tlb_range(ctx, start, SECONDARY_CONTEXT,
-			  end, PAGE_SIZE, (end-start));
+local_flush_and_out:
+	__flush_tlb_pending(ctx, nr, vaddrs);
 
 	put_cpu();
 }
@@ -907,55 +893,6 @@ void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	}
 }
 
-void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page)
-{
-	{
-		u32 ctx = CTX_HWBITS(mm->context);
-		int cpu = get_cpu();
-
-		page &= PAGE_MASK;
-		if (mm == current->active_mm &&
-		    atomic_read(&mm->mm_users) == 1) {
-			/* By virtue of being the current address space, and
-			 * having the only reference to it, the following
-			 * operation is safe.
-			 *
-			 * It would not be a win to perform the xcall tlb
-			 * flush in this case, because even if we switch back
-			 * to one of the other processors in cpu_vm_mask it
-			 * is almost certain that all TLB entries for this
-			 * context will be replaced by the time that happens.
-			 */
-			mm->cpu_vm_mask = cpumask_of_cpu(cpu);
-			goto local_flush_and_out;
-		} else {
-			cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
-
-			/* By virtue of running under the mm->page_table_lock,
-			 * and mmu_context.h:switch_mm doing the same, the
-			 * following operation is safe.
-			 */
-			if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
-				goto local_flush_and_out;
-		}
-
-		/* OK, we have to actually perform the cross call.  Most
-		 * likely this is a cloned mm or kswapd is kicking out pages
-		 * for a task which has run recently on another cpu.
-		 */
-		smp_cross_call_masked(&xcall_flush_tlb_page,
-				      ctx, page, 0,
-				      mm->cpu_vm_mask);
-		if (!cpu_isset(cpu, mm->cpu_vm_mask))
-			return;
-
-	local_flush_and_out:
-		__flush_tlb_page(ctx, page, SECONDARY_CONTEXT);
-
-		put_cpu();
-	}
-}
-
 /* CPU capture. */
 /* #define CAPTURE_DEBUG */
 extern unsigned long xcall_capture;
@@ -1037,8 +974,6 @@ void smp_promstop_others(void)
 	smp_cross_call(&xcall_promstop, 0, 0, 0);
 }
 
-extern void sparc64_do_profile(struct pt_regs *regs);
-
 #define prof_multiplier(__cpu)		cpu_data(__cpu).multiplier
 #define prof_counter(__cpu)		cpu_data(__cpu).counter
 
@@ -1064,7 +999,7 @@ void smp_percpu_timer_interrupt(struct pt_regs *regs)
 	}
 
 	do {
-		sparc64_do_profile(regs);
+		profile_tick(CPU_PROFILING, regs);
 		if (!--prof_counter(cpu)) {
 			irq_enter();