Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff

[linux-2.6.git] / arch / ia64 / mm / tlb.c
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c

index 464557e..4dbbca0 100644 (file)
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
   *             Modified RID allocation for SMP
   *          Goutham Rao <goutham.rao@intel.com>
   *              IPI based ptc implementation and A-step IPI implementation.
+ * Rohit Seth <rohit.seth@intel.com>
+ * Ken Chen <kenneth.w.chen@intel.com>
   */
  #include <linux/config.h>
  #include <linux/module.h>
@@ -16,80 +18,83 @@
  #include <linux/sched.h>
  #include <linux/smp.h>
  #include <linux/mm.h>
+#include <linux/bootmem.h>
  
  #include <asm/delay.h>
  #include <asm/mmu_context.h>
  #include <asm/pgalloc.h>
  #include <asm/pal.h>
  #include <asm/tlbflush.h>
+#include <asm/dma.h>
  
  static struct {
         unsigned long mask;     /* mask of supported purge page-sizes */
-       unsigned long max_bits; /* log2() of largest supported purge page-size */
+       unsigned long max_bits; /* log2 of largest supported purge page-size */
  } purge;
  
  struct ia64_ctx ia64_ctx = {
         .lock =         SPIN_LOCK_UNLOCKED,
         .next =         1,
-       .limit =        (1 << 15) - 1,          /* start out with the safe (architected) limit */
         .max_ctx =      ~0U
  };
  
  DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
  
+/*
+ * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
+ * Called after cpu_init() has setup ia64_ctx.max_ctx based on
+ * maximum RID that is supported by boot CPU.
+ */
+void __init
+mmu_context_init (void)
+{
+       ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+       ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+}
+
  /*
   * Acquire the ia64_ctx.lock before calling this function!
   */
  void
  wrap_mmu_context (struct mm_struct *mm)
  {
-       unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
-       struct task_struct *tsk;
-       int i;
+       int i, cpu;
+       unsigned long flush_bit;
  
-       if (ia64_ctx.next > max_ctx)
-               ia64_ctx.next = 300;    /* skip daemons */
-       ia64_ctx.limit = max_ctx + 1;
+       for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
+               flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
+               ia64_ctx.bitmap[i] ^= flush_bit;
+       }
+ 
+       /* use offset at 300 to skip daemons */
+       ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+                               ia64_ctx.max_ctx, 300);
+       ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+                               ia64_ctx.max_ctx, ia64_ctx.next);
  
         /*
-        * Scan all the task's mm->context and set proper safe range
+        * can't call flush_tlb_all() here because of race condition
+        * with O(1) scheduler [EF]
          */
-
-       read_lock(&tasklist_lock);
-  repeat:
-       for_each_process(tsk) {
-               if (!tsk->mm)
-                       continue;
-               tsk_context = tsk->mm->context;
-               if (tsk_context == ia64_ctx.next) {
-                       if (++ia64_ctx.next >= ia64_ctx.limit) {
-                               /* empty range: reset the range limit and start over */
-                               if (ia64_ctx.next > max_ctx)
-                                       ia64_ctx.next = 300;
-                               ia64_ctx.limit = max_ctx + 1;
-                               goto repeat;
-                       }
-               }
-               if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
-                       ia64_ctx.limit = tsk_context;
-       }
-       read_unlock(&tasklist_lock);
-       /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
-       {
-               int cpu = get_cpu(); /* prevent preemption/migration */
-               for (i = 0; i < NR_CPUS; ++i)
-                       if (cpu_online(i) && (i != cpu))
-                               per_cpu(ia64_need_tlb_flush, i) = 1;
-               put_cpu();
-       }
+       cpu = get_cpu(); /* prevent preemption/migration */
+       for_each_online_cpu(i)
+               if (i != cpu)
+                       per_cpu(ia64_need_tlb_flush, i) = 1;
+       put_cpu();
         local_flush_tlb_all();
  }
  
  void
-ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
+                      unsigned long end, unsigned long nbits)
  {
         static DEFINE_SPINLOCK(ptcg_lock);
  
+       if (mm != current->active_mm || !current->mm) {
+               flush_tlb_all();
+               return;
+       }
+
         /* HW requires global serialization of ptc.ga.  */
         spin_lock(&ptcg_lock);
         {
@@ -129,38 +134,41 @@ local_flush_tlb_all (void)
  }
  
  void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+                unsigned long end)
  {
         struct mm_struct *mm = vma->vm_mm;
         unsigned long size = end - start;
         unsigned long nbits;
  
+#ifndef CONFIG_SMP
         if (mm != current->active_mm) {
-               /* this does happen, but perhaps it's not worth optimizing for? */
-#ifdef CONFIG_SMP
-               flush_tlb_all();
-#else
                 mm->context = 0;
-#endif
                 return;
         }
+#endif
  
         nbits = ia64_fls(size + 0xfff);
-       while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
+       while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
+                       (nbits < purge.max_bits))
                 ++nbits;
         if (nbits > purge.max_bits)
                 nbits = purge.max_bits;
         start &= ~((1UL << nbits) - 1);
  
-# ifdef CONFIG_SMP
-       platform_global_tlb_purge(start, end, nbits);
-# else
+       preempt_disable();
+#ifdef CONFIG_SMP
+       if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) {
+               platform_global_tlb_purge(mm, start, end, nbits);
+               preempt_enable();
+               return;
+       }
+#endif
         do {
                 ia64_ptcl(start, (nbits<<2));
                 start += (1UL << nbits);
         } while (start < end);
-# endif
-
+       preempt_enable();
         ia64_srlz_i();                  /* srlz.i implies srlz.d */
  }
  EXPORT_SYMBOL(flush_tlb_range);
@@ -186,5 +194,5 @@ ia64_tlb_init (void)
         local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
         local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
  
-       local_flush_tlb_all();          /* nuke left overs from bootstrapping... */
+       local_flush_tlb_all();  /* nuke left overs from bootstrapping... */
  }