X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fmm%2Ftlb.c;h=ffad7624436c84e02e62b7e38b72066bc689492f;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=44fb8b84aa860e2bbaed2e5ef55a89a574c191f5;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 44fb8b84a..ffad76244 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -8,87 +8,91 @@ * Modified RID allocation for SMP * Goutham Rao * IPI based ptc implementation and A-step IPI implementation. + * Rohit Seth + * Ken Chen */ -#include #include #include #include #include #include #include +#include #include #include #include #include #include +#include static struct { unsigned long mask; /* mask of supported purge page-sizes */ - unsigned long max_bits; /* log2() of largest supported purge page-size */ + unsigned long max_bits; /* log2 of largest supported purge page-size */ } purge; struct ia64_ctx ia64_ctx = { .lock = SPIN_LOCK_UNLOCKED, .next = 1, - .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */ .max_ctx = ~0U }; DEFINE_PER_CPU(u8, ia64_need_tlb_flush); +/* + * Initializes the ia64_ctx.bitmap array based on max_ctx+1. + * Called after cpu_init() has setup ia64_ctx.max_ctx based on + * maximum RID that is supported by boot CPU. + */ +void __init +mmu_context_init (void) +{ + ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); + ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); +} + /* * Acquire the ia64_ctx.lock before calling this function! */ void wrap_mmu_context (struct mm_struct *mm) { - unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; - struct task_struct *tsk; - int i; + int i, cpu; + unsigned long flush_bit; - if (ia64_ctx.next > max_ctx) - ia64_ctx.next = 300; /* skip daemons */ - ia64_ctx.limit = max_ctx + 1; + for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { + flush_bit = xchg(&ia64_ctx.flushmap[i], 0); + ia64_ctx.bitmap[i] ^= flush_bit; + } + + /* use offset at 300 to skip daemons */ + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, 300); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); /* - * Scan all the task's mm->context and set proper safe range + * can't call flush_tlb_all() here because of race condition + * with O(1) scheduler [EF] */ - - read_lock(&tasklist_lock); - repeat: - for_each_process(tsk) { - if (!tsk->mm) - continue; - tsk_context = tsk->mm->context; - if (tsk_context == ia64_ctx.next) { - if (++ia64_ctx.next >= ia64_ctx.limit) { - /* empty range: reset the range limit and start over */ - if (ia64_ctx.next > max_ctx) - ia64_ctx.next = 300; - ia64_ctx.limit = max_ctx + 1; - goto repeat; - } - } - if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit)) - ia64_ctx.limit = tsk_context; - } - read_unlock(&tasklist_lock); - /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ - { - int cpu = get_cpu(); /* prevent preemption/migration */ - for (i = 0; i < NR_CPUS; ++i) - if (cpu_online(i) && (i != cpu)) - per_cpu(ia64_need_tlb_flush, i) = 1; - put_cpu(); - } + cpu = get_cpu(); /* prevent preemption/migration */ + for_each_online_cpu(i) + if (i != cpu) + per_cpu(ia64_need_tlb_flush, i) = 1; + put_cpu(); local_flush_tlb_all(); } void -ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) +ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) { - static spinlock_t ptcg_lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(ptcg_lock); + + if (mm != current->active_mm || !current->mm) { + flush_tlb_all(); + return; + } /* HW requires global serialization of ptc.ga. */ spin_lock(&ptcg_lock); @@ -127,46 +131,48 @@ local_flush_tlb_all (void) local_irq_restore(flags); ia64_srlz_i(); /* srlz.i implies srlz.d */ } -EXPORT_SYMBOL(local_flush_tlb_all); void -flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end) +flush_tlb_range (struct vm_area_struct *vma, unsigned long start, + unsigned long end) { struct mm_struct *mm = vma->vm_mm; unsigned long size = end - start; unsigned long nbits; +#ifndef CONFIG_SMP if (mm != current->active_mm) { - /* this does happen, but perhaps it's not worth optimizing for? */ -#ifdef CONFIG_SMP - flush_tlb_all(); -#else mm->context = 0; -#endif return; } +#endif nbits = ia64_fls(size + 0xfff); - while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) + while (unlikely (((1UL << nbits) & purge.mask) == 0) && + (nbits < purge.max_bits)) ++nbits; if (nbits > purge.max_bits) nbits = purge.max_bits; start &= ~((1UL << nbits) - 1); -# ifdef CONFIG_SMP - platform_global_tlb_purge(start, end, nbits); -# else + preempt_disable(); +#ifdef CONFIG_SMP + if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) { + platform_global_tlb_purge(mm, start, end, nbits); + preempt_enable(); + return; + } +#endif do { ia64_ptcl(start, (nbits<<2)); start += (1UL << nbits); } while (start < end); -# endif - + preempt_enable(); ia64_srlz_i(); /* srlz.i implies srlz.d */ } EXPORT_SYMBOL(flush_tlb_range); -void __init +void __devinit ia64_tlb_init (void) { ia64_ptce_info_t ptce_info; @@ -176,7 +182,7 @@ ia64_tlb_init (void) if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld;" "defaulting to architected purge page-sizes.\n", status); - purge.mask = 0x115557000; + purge.mask = 0x115557000UL; } purge.max_bits = ia64_fls(purge.mask); @@ -187,5 +193,5 @@ ia64_tlb_init (void) local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; - local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ + local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ }