X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Fasm-sparc64%2Fmmu_context.h;h=2337eb48771908ca86f94a1a5d4cba3d42c0e43d;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=08275bc3478ac95ffe0b5b868341435d3b027276;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 08275bc34..2337eb487 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -4,23 +4,6 @@ /* Derived heavily from Linus's Alpha/AXP ASN code... */ -#include - -/* - * For the 8k pagesize kernel, use only 10 hw context bits to optimize some shifts in - * the fast tlbmiss handlers, instead of all 13 bits (specifically for vpte offset - * calculation). For other pagesizes, this optimization in the tlbhandlers can not be - * done; but still, all 13 bits can not be used because the tlb handlers use "andcc" - * instruction which sign extends 13 bit arguments. - */ -#if PAGE_SHIFT == 13 -#define CTX_VERSION_SHIFT 10 -#define TAG_CONTEXT_BITS 0x3ff -#else -#define CTX_VERSION_SHIFT 12 -#define TAG_CONTEXT_BITS 0xfff -#endif - #ifndef __ASSEMBLY__ #include @@ -35,110 +18,112 @@ extern spinlock_t ctx_alloc_lock; extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; -#define CTX_VERSION_MASK ((~0UL) << CTX_VERSION_SHIFT) -#define CTX_FIRST_VERSION ((1UL << CTX_VERSION_SHIFT) + 1UL) -#define CTX_VALID(__ctx) \ - (!(((__ctx) ^ tlb_context_cache) & CTX_VERSION_MASK)) -#define CTX_HWBITS(__ctx) ((__ctx) & ~CTX_VERSION_MASK) - extern void get_new_mmu_context(struct mm_struct *mm); +#ifdef CONFIG_SMP +extern void smp_new_mmu_context_version(void); +#else +#define smp_new_mmu_context_version() do { } while (0) +#endif + +extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +extern void destroy_context(struct mm_struct *mm); + +extern void __tsb_context_switch(unsigned long pgd_pa, + struct tsb_config *tsb_base, + struct tsb_config *tsb_huge, + unsigned long tsb_descr_pa); + +static inline void tsb_context_switch(struct mm_struct *mm) +{ + __tsb_context_switch(__pa(mm->pgd), + &mm->context.tsb_block[0], +#ifdef CONFIG_HUGETLB_PAGE + (mm->context.tsb_block[1].tsb ? + &mm->context.tsb_block[1] : + NULL) +#else + NULL +#endif + , __pa(&mm->context.tsb_descr[0])); +} -/* Initialize a new mmu context. This is invoked when a new - * address space instance (unique or shared) is instantiated. - * This just needs to set mm->context to an invalid context. - */ -#define init_new_context(__tsk, __mm) (((__mm)->context = 0UL), 0) - -/* Destroy a dead context. This occurs when mmput drops the - * mm_users count to zero, the mmaps have been released, and - * all the page tables have been flushed. Our job is to destroy - * any remaining processor-specific state, and in the sparc64 - * case this just means freeing up the mmu context ID held by - * this task if valid. - */ -#define destroy_context(__mm) \ -do { spin_lock(&ctx_alloc_lock); \ - if (CTX_VALID((__mm)->context)) { \ - unsigned long nr = CTX_HWBITS((__mm)->context); \ - mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \ - } \ - spin_unlock(&ctx_alloc_lock); \ -} while(0) - -/* Reload the two core values used by TLB miss handler - * processing on sparc64. They are: - * 1) The physical address of mm->pgd, when full page - * table walks are necessary, this is where the - * search begins. - * 2) A "PGD cache". For 32-bit tasks only pgd[0] is - * ever used since that maps the entire low 4GB - * completely. To speed up TLB miss processing we - * make this value available to the handlers. This - * decreases the amount of memory traffic incurred. - */ -#define reload_tlbmiss_state(__tsk, __mm) \ -do { \ - register unsigned long paddr asm("o5"); \ - register unsigned long pgd_cache asm("o4"); \ - paddr = __pa((__mm)->pgd); \ - pgd_cache = 0UL; \ - if ((__tsk)->thread_info->flags & _TIF_32BIT) \ - pgd_cache = get_pgd_cache((__mm)->pgd); \ - __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \ - "mov %3, %%g4\n\t" \ - "mov %0, %%g7\n\t" \ - "stxa %1, [%%g4] %2\n\t" \ - "membar #Sync\n\t" \ - "wrpr %%g0, 0x096, %%pstate" \ - : /* no outputs */ \ - : "r" (paddr), "r" (pgd_cache),\ - "i" (ASI_DMMU), "i" (TSB_REG)); \ -} while(0) +extern void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss); +#ifdef CONFIG_SMP +extern void smp_tsb_sync(struct mm_struct *mm); +#else +#define smp_tsb_sync(__mm) do { } while (0) +#endif /* Set MMU context in the actual hardware. */ #define load_secondary_context(__mm) \ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" \ - "flush %%g6" \ - : /* No outputs */ \ - : "r" (CTX_HWBITS((__mm)->context)), \ - "r" (0x10), "i" (ASI_DMMU)) + __asm__ __volatile__( \ + "\n661: stxa %0, [%1] %2\n" \ + " .section .sun4v_1insn_patch, \"ax\"\n" \ + " .word 661b\n" \ + " stxa %0, [%1] %3\n" \ + " .previous\n" \ + " flush %%g6\n" \ + : /* No outputs */ \ + : "r" (CTX_HWBITS((__mm)->context)), \ + "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU), "i" (ASI_MMU)) extern void __flush_tlb_mm(unsigned long, unsigned long); -/* Switch the current MM context. */ +/* Switch the current MM context. Interrupts are disabled. */ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { - unsigned long ctx_valid; - - spin_lock(&mm->page_table_lock); - if (CTX_VALID(mm->context)) - ctx_valid = 1; - else - ctx_valid = 0; + unsigned long ctx_valid, flags; + int cpu; - if (!ctx_valid || (old_mm != mm)) { - if (!ctx_valid) - get_new_mmu_context(mm); + spin_lock_irqsave(&mm->context.lock, flags); + ctx_valid = CTX_VALID(mm->context); + if (!ctx_valid) + get_new_mmu_context(mm); - load_secondary_context(mm); - reload_tlbmiss_state(tsk, mm); - } + /* We have to be extremely careful here or else we will miss + * a TSB grow if we switch back and forth between a kernel + * thread and an address space which has it's TSB size increased + * on another processor. + * + * It is possible to play some games in order to optimize the + * switch, but the safest thing to do is to unconditionally + * perform the secondary context load and the TSB context switch. + * + * For reference the bad case is, for address space "A": + * + * CPU 0 CPU 1 + * run address space A + * set cpu0's bits in cpu_vm_mask + * switch to kernel thread, borrow + * address space A via entry_lazy_tlb + * run address space A + * set cpu1's bit in cpu_vm_mask + * flush_tlb_pending() + * reset cpu_vm_mask to just cpu1 + * TSB grow + * run address space A + * context was valid, so skip + * TSB context switch + * + * At that point cpu0 continues to use a stale TSB, the one from + * before the TSB grow performed on cpu1. cpu1 did not cross-call + * cpu0 to update it's TSB because at that point the cpu_vm_mask + * only had cpu1 set in it. + */ + load_secondary_context(mm); + tsb_context_switch(mm); - { - int cpu = smp_processor_id(); - - /* Even if (mm == old_mm) we _must_ check - * the cpu_vm_mask. If we do not we could - * corrupt the TLB state because of how - * smp_flush_tlb_{page,range,mm} on sparc64 - * and lazy tlb switches work. -DaveM - */ - if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) { - cpu_set(cpu, mm->cpu_vm_mask); - __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - } + /* Any time a processor runs a context on an address space + * for the first time, we must flush that context out of the + * local TLB. + */ + cpu = smp_processor_id(); + if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) { + cpu_set(cpu, mm->cpu_vm_mask); + __flush_tlb_mm(CTX_HWBITS(mm->context), + SECONDARY_CONTEXT); } - spin_unlock(&mm->page_table_lock); + spin_unlock_irqrestore(&mm->context.lock, flags); } #define deactivate_mm(tsk,mm) do { } while (0) @@ -146,19 +131,20 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str /* Activate a new MM instance for the current task. */ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) { + unsigned long flags; int cpu; - spin_lock(&mm->page_table_lock); + spin_lock_irqsave(&mm->context.lock, flags); if (!CTX_VALID(mm->context)) get_new_mmu_context(mm); cpu = smp_processor_id(); if (!cpu_isset(cpu, mm->cpu_vm_mask)) cpu_set(cpu, mm->cpu_vm_mask); - spin_unlock(&mm->page_table_lock); load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - reload_tlbmiss_state(current, mm); + tsb_context_switch(mm); + spin_unlock_irqrestore(&mm->context.lock, flags); } #endif /* !(__ASSEMBLY__) */