X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Fasm-sparc64%2Fmmu_context.h;h=2337eb48771908ca86f94a1a5d4cba3d42c0e43d;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=08275bc3478ac95ffe0b5b868341435d3b027276;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git

diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 08275bc34..2337eb487 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -4,23 +4,6 @@
 
 /* Derived heavily from Linus's Alpha/AXP ASN code... */
 
-#include <asm/page.h>
-
-/*
- * For the 8k pagesize kernel, use only 10 hw context bits to optimize some shifts in
- * the fast tlbmiss handlers, instead of all 13 bits (specifically for vpte offset
- * calculation). For other pagesizes, this optimization in the tlbhandlers can not be 
- * done; but still, all 13 bits can not be used because the tlb handlers use "andcc"
- * instruction which sign extends 13 bit arguments.
- */
-#if PAGE_SHIFT == 13
-#define CTX_VERSION_SHIFT	10
-#define TAG_CONTEXT_BITS	0x3ff
-#else
-#define CTX_VERSION_SHIFT	12
-#define TAG_CONTEXT_BITS	0xfff
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include <linux/spinlock.h>
@@ -35,110 +18,112 @@ extern spinlock_t ctx_alloc_lock;
 extern unsigned long tlb_context_cache;
 extern unsigned long mmu_context_bmap[];
 
-#define CTX_VERSION_MASK	((~0UL) << CTX_VERSION_SHIFT)
-#define CTX_FIRST_VERSION	((1UL << CTX_VERSION_SHIFT) + 1UL)
-#define CTX_VALID(__ctx)	\
-	 (!(((__ctx) ^ tlb_context_cache) & CTX_VERSION_MASK))
-#define CTX_HWBITS(__ctx)	((__ctx) & ~CTX_VERSION_MASK)
-
 extern void get_new_mmu_context(struct mm_struct *mm);
+#ifdef CONFIG_SMP
+extern void smp_new_mmu_context_version(void);
+#else
+#define smp_new_mmu_context_version() do { } while (0)
+#endif
+
+extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+extern void destroy_context(struct mm_struct *mm);
+
+extern void __tsb_context_switch(unsigned long pgd_pa,
+				 struct tsb_config *tsb_base,
+				 struct tsb_config *tsb_huge,
+				 unsigned long tsb_descr_pa);
+
+static inline void tsb_context_switch(struct mm_struct *mm)
+{
+	__tsb_context_switch(__pa(mm->pgd),
+			     &mm->context.tsb_block[0],
+#ifdef CONFIG_HUGETLB_PAGE
+			     (mm->context.tsb_block[1].tsb ?
+			      &mm->context.tsb_block[1] :
+			      NULL)
+#else
+			     NULL
+#endif
+			     , __pa(&mm->context.tsb_descr[0]));
+}
 
-/* Initialize a new mmu context.  This is invoked when a new
- * address space instance (unique or shared) is instantiated.
- * This just needs to set mm->context to an invalid context.
- */
-#define init_new_context(__tsk, __mm)	(((__mm)->context = 0UL), 0)
-
-/* Destroy a dead context.  This occurs when mmput drops the
- * mm_users count to zero, the mmaps have been released, and
- * all the page tables have been flushed.  Our job is to destroy
- * any remaining processor-specific state, and in the sparc64
- * case this just means freeing up the mmu context ID held by
- * this task if valid.
- */
-#define destroy_context(__mm)					\
-do {	spin_lock(&ctx_alloc_lock);				\
-	if (CTX_VALID((__mm)->context)) {			\
-		unsigned long nr = CTX_HWBITS((__mm)->context);	\
-		mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));	\
-	}							\
-	spin_unlock(&ctx_alloc_lock);				\
-} while(0)
-
-/* Reload the two core values used by TLB miss handler
- * processing on sparc64.  They are:
- * 1) The physical address of mm->pgd, when full page
- *    table walks are necessary, this is where the
- *    search begins.
- * 2) A "PGD cache".  For 32-bit tasks only pgd[0] is
- *    ever used since that maps the entire low 4GB
- *    completely.  To speed up TLB miss processing we
- *    make this value available to the handlers.  This
- *    decreases the amount of memory traffic incurred.
- */
-#define reload_tlbmiss_state(__tsk, __mm) \
-do { \
-	register unsigned long paddr asm("o5"); \
-	register unsigned long pgd_cache asm("o4"); \
-	paddr = __pa((__mm)->pgd); \
-	pgd_cache = 0UL; \
-	if ((__tsk)->thread_info->flags & _TIF_32BIT) \
-		pgd_cache = get_pgd_cache((__mm)->pgd); \
-	__asm__ __volatile__("wrpr	%%g0, 0x494, %%pstate\n\t" \
-			     "mov	%3, %%g4\n\t" \
-			     "mov	%0, %%g7\n\t" \
-			     "stxa	%1, [%%g4] %2\n\t" \
-			     "membar	#Sync\n\t" \
-			     "wrpr	%%g0, 0x096, %%pstate" \
-			     : /* no outputs */ \
-			     : "r" (paddr), "r" (pgd_cache),\
-			       "i" (ASI_DMMU), "i" (TSB_REG)); \
-} while(0)
+extern void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss);
+#ifdef CONFIG_SMP
+extern void smp_tsb_sync(struct mm_struct *mm);
+#else
+#define smp_tsb_sync(__mm) do { } while (0)
+#endif
 
 /* Set MMU context in the actual hardware. */
 #define load_secondary_context(__mm) \
-	__asm__ __volatile__("stxa	%0, [%1] %2\n\t" \
-			     "flush	%%g6" \
-			     : /* No outputs */ \
-			     : "r" (CTX_HWBITS((__mm)->context)), \
-			       "r" (0x10), "i" (ASI_DMMU))
+	__asm__ __volatile__( \
+	"\n661:	stxa		%0, [%1] %2\n" \
+	"	.section	.sun4v_1insn_patch, \"ax\"\n" \
+	"	.word		661b\n" \
+	"	stxa		%0, [%1] %3\n" \
+	"	.previous\n" \
+	"	flush		%%g6\n" \
+	: /* No outputs */ \
+	: "r" (CTX_HWBITS((__mm)->context)), \
+	  "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU), "i" (ASI_MMU))
 
 extern void __flush_tlb_mm(unsigned long, unsigned long);
 
-/* Switch the current MM context. */
+/* Switch the current MM context.  Interrupts are disabled.  */
 static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
 {
-	unsigned long ctx_valid;
-
-	spin_lock(&mm->page_table_lock);
-	if (CTX_VALID(mm->context))
-		ctx_valid = 1;
-        else
-		ctx_valid = 0;
+	unsigned long ctx_valid, flags;
+	int cpu;
 
-	if (!ctx_valid || (old_mm != mm)) {
-		if (!ctx_valid)
-			get_new_mmu_context(mm);
+	spin_lock_irqsave(&mm->context.lock, flags);
+	ctx_valid = CTX_VALID(mm->context);
+	if (!ctx_valid)
+		get_new_mmu_context(mm);
 
-		load_secondary_context(mm);
-		reload_tlbmiss_state(tsk, mm);
-	}
+	/* We have to be extremely careful here or else we will miss
+	 * a TSB grow if we switch back and forth between a kernel
+	 * thread and an address space which has it's TSB size increased
+	 * on another processor.
+	 *
+	 * It is possible to play some games in order to optimize the
+	 * switch, but the safest thing to do is to unconditionally
+	 * perform the secondary context load and the TSB context switch.
+	 *
+	 * For reference the bad case is, for address space "A":
+	 *
+	 *		CPU 0			CPU 1
+	 *	run address space A
+	 *	set cpu0's bits in cpu_vm_mask
+	 *	switch to kernel thread, borrow
+	 *	address space A via entry_lazy_tlb
+	 *					run address space A
+	 *					set cpu1's bit in cpu_vm_mask
+	 *					flush_tlb_pending()
+	 *					reset cpu_vm_mask to just cpu1
+	 *					TSB grow
+	 *	run address space A
+	 *	context was valid, so skip
+	 *	TSB context switch
+	 *
+	 * At that point cpu0 continues to use a stale TSB, the one from
+	 * before the TSB grow performed on cpu1.  cpu1 did not cross-call
+	 * cpu0 to update it's TSB because at that point the cpu_vm_mask
+	 * only had cpu1 set in it.
+	 */
+	load_secondary_context(mm);
+	tsb_context_switch(mm);
 
-	{
-		int cpu = smp_processor_id();
-
-		/* Even if (mm == old_mm) we _must_ check
-		 * the cpu_vm_mask.  If we do not we could
-		 * corrupt the TLB state because of how
-		 * smp_flush_tlb_{page,range,mm} on sparc64
-		 * and lazy tlb switches work. -DaveM
-		 */
-		if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) {
-			cpu_set(cpu, mm->cpu_vm_mask);
-			__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
-		}
+	/* Any time a processor runs a context on an address space
+	 * for the first time, we must flush that context out of the
+	 * local TLB.
+	 */
+	cpu = smp_processor_id();
+	if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) {
+		cpu_set(cpu, mm->cpu_vm_mask);
+		__flush_tlb_mm(CTX_HWBITS(mm->context),
+			       SECONDARY_CONTEXT);
 	}
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock_irqrestore(&mm->context.lock, flags);
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
@@ -146,19 +131,20 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
 /* Activate a new MM instance for the current task. */
 static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
 {
+	unsigned long flags;
 	int cpu;
 
-	spin_lock(&mm->page_table_lock);
+	spin_lock_irqsave(&mm->context.lock, flags);
 	if (!CTX_VALID(mm->context))
 		get_new_mmu_context(mm);
 	cpu = smp_processor_id();
 	if (!cpu_isset(cpu, mm->cpu_vm_mask))
 		cpu_set(cpu, mm->cpu_vm_mask);
-	spin_unlock(&mm->page_table_lock);
 
 	load_secondary_context(mm);
 	__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
-	reload_tlbmiss_state(current, mm);
+	tsb_context_switch(mm);
+	spin_unlock_irqrestore(&mm->context.lock, flags);
 }
 
 #endif /* !(__ASSEMBLY__) */