arch/x86_64/kernel/smp.c

   1 /*
   2  *      Intel SMP support routines.
   3  *
   4  *      (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
   5  *      (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
   6  *      (c) 2002,2003 Andi Kleen, SuSE Labs.
   7  *
   8  *      This code is released under the GNU General Public License version 2 or
   9  *      later.
  10  */
  11
  12 #include <linux/init.h>
  13
  14 #include <linux/mm.h>
  15 #include <linux/irq.h>
  16 #include <linux/delay.h>
  17 #include <linux/spinlock.h>
  18 #include <linux/smp_lock.h>
  19 #include <linux/smp.h>
  20 #include <linux/kernel_stat.h>
  21 #include <linux/mc146818rtc.h>
  22 #include <linux/interrupt.h>
  23
  24 #include <asm/mtrr.h>
  25 #include <asm/pgalloc.h>
  26 #include <asm/tlbflush.h>
  27 #include <asm/mach_apic.h>
  28 #include <asm/proto.h>
  29
  30 /*
  31  *      Smarter SMP flushing macros.
  32  *              c/o Linus Torvalds.
  33  *
  34  *      These mean you can really definitely utterly forget about
  35  *      writing to user space from interrupts. (Its not allowed anyway).
  36  *
  37  *      Optimizations Manfred Spraul <manfred@colorfullife.com>
  38  */
  39
  40 static cpumask_t flush_cpumask;
  41 static struct mm_struct * flush_mm;
  42 static unsigned long flush_va;
  43 static DEFINE_SPINLOCK(tlbstate_lock);
  44 #define FLUSH_ALL       0xffffffff
  45
  46 /*
  47  * We cannot call mmdrop() because we are in interrupt context,
  48  * instead update mm->cpu_vm_mask.
  49  */
  50 static inline void leave_mm (unsigned long cpu)
  51 {
  52         if (read_pda(mmu_state) == TLBSTATE_OK)
  53                 BUG();
  54         clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask);
  55         __flush_tlb();
  56 }
  57
  58 /*
  59  *
  60  * The flush IPI assumes that a thread switch happens in this order:
  61  * [cpu0: the cpu that switches]
  62  * 1) switch_mm() either 1a) or 1b)
  63  * 1a) thread switch to a different mm
  64  * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask);
  65  *      Stop ipi delivery for the old mm. This is not synchronized with
  66  *      the other cpus, but smp_invalidate_interrupt ignore flush ipis
  67  *      for the wrong mm, and in the worst case we perform a superfluous
  68  *      tlb flush.
  69  * 1a2) set cpu mmu_state to TLBSTATE_OK
  70  *      Now the smp_invalidate_interrupt won't call leave_mm if cpu0
  71  *      was in lazy tlb mode.
  72  * 1a3) update cpu active_mm
  73  *      Now cpu0 accepts tlb flushes for the new mm.
  74  * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask);
  75  *      Now the other cpus will send tlb flush ipis.
  76  * 1a4) change cr3.
  77  * 1b) thread switch without mm change
  78  *      cpu active_mm is correct, cpu0 already handles
  79  *      flush ipis.
  80  * 1b1) set cpu mmu_state to TLBSTATE_OK
  81  * 1b2) test_and_set the cpu bit in cpu_vm_mask.
  82  *      Atomically set the bit [other cpus will start sending flush ipis],
  83  *      and test the bit.
  84  * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
  85  * 2) switch %%esp, ie current
  86  *
  87  * The interrupt must handle 2 special cases:
  88  * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
  89  * - the cpu performs speculative tlb reads, i.e. even if the cpu only
  90  *   runs in kernel space, the cpu could load tlb entries for user space
  91  *   pages.
  92  *
  93  * The good news is that cpu mmu_state is local to each cpu, no
  94  * write/read ordering problems.
  95  */
  96
  97 /*
  98  * TLB flush IPI:
  99  *
 100  * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
 101  * 2) Leave the mm if we are in the lazy tlb mode.
 102  */
 103
 104 asmlinkage void smp_invalidate_interrupt (void)
 105 {
 106         unsigned long cpu;
 107
 108         cpu = get_cpu();
 109
 110         if (!cpu_isset(cpu, flush_cpumask))
 111                 goto out;
 112                 /*
 113                  * This was a BUG() but until someone can quote me the
 114                  * line from the intel manual that guarantees an IPI to
 115                  * multiple CPUs is retried _only_ on the erroring CPUs
 116                  * its staying as a return
 117                  *
 118                  * BUG();
 119                  */
 120
 121         if (flush_mm == read_pda(active_mm)) {
 122                 if (read_pda(mmu_state) == TLBSTATE_OK) {
 123                         if (flush_va == FLUSH_ALL)
 124                                 local_flush_tlb();
 125                         else
 126                                 __flush_tlb_one(flush_va);
 127                 } else
 128                         leave_mm(cpu);
 129         }
 130         ack_APIC_irq();
 131         cpu_clear(cpu, flush_cpumask);
 132
 133 out:
 134         put_cpu_no_resched();
 135 }
 136
 137 static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 138                                                 unsigned long va)
 139 {
 140         cpumask_t tmp;
 141         /*
 142          * A couple of (to be removed) sanity checks:
 143          *
 144          * - we do not send IPIs to not-yet booted CPUs.
 145          * - current CPU must not be in mask
 146          * - mask must exist :)
 147          */
 148         BUG_ON(cpus_empty(cpumask));
 149         cpus_and(tmp, cpumask, cpu_online_map);
 150         BUG_ON(!cpus_equal(tmp, cpumask));
 151         BUG_ON(cpu_isset(smp_processor_id(), cpumask));
 152         if (!mm)
 153                 BUG();
 154
 155         /*
 156          * I'm not happy about this global shared spinlock in the
 157          * MM hot path, but we'll see how contended it is.
 158          * Temporarily this turns IRQs off, so that lockups are
 159          * detected by the NMI watchdog.
 160          */
 161         spin_lock(&tlbstate_lock);
 162
 163         flush_mm = mm;
 164         flush_va = va;
 165         cpus_or(flush_cpumask, cpumask, flush_cpumask);
 166
 167         /*
 168          * We have to send the IPI only to
 169          * CPUs affected.
 170          */
 171         send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
 172
 173         while (!cpus_empty(flush_cpumask))
 174                 mb();   /* nothing. lockup detection does not belong here */;
 175
 176         flush_mm = NULL;
 177         flush_va = 0;
 178         spin_unlock(&tlbstate_lock);
 179 }
 180
 181 void flush_tlb_current_task(void)
 182 {
 183         struct mm_struct *mm = current->mm;
 184         cpumask_t cpu_mask;
 185
 186         preempt_disable();
 187         cpu_mask = mm->cpu_vm_mask;
 188         cpu_clear(smp_processor_id(), cpu_mask);
 189
 190         local_flush_tlb();
 191         if (!cpus_empty(cpu_mask))
 192                 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
 193         preempt_enable();
 194 }
 195
 196 void flush_tlb_mm (struct mm_struct * mm)
 197 {
 198         cpumask_t cpu_mask;
 199
 200         preempt_disable();
 201         cpu_mask = mm->cpu_vm_mask;
 202         cpu_clear(smp_processor_id(), cpu_mask);
 203
 204         if (current->active_mm == mm) {
 205                 if (current->mm)
 206                         local_flush_tlb();
 207                 else
 208                         leave_mm(smp_processor_id());
 209         }
 210         if (!cpus_empty(cpu_mask))
 211                 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
 212
 213         preempt_enable();
 214 }
 215
 216 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 217 {
 218         struct mm_struct *mm = vma->vm_mm;
 219         cpumask_t cpu_mask;
 220
 221         preempt_disable();
 222         cpu_mask = mm->cpu_vm_mask;
 223         cpu_clear(smp_processor_id(), cpu_mask);
 224
 225         if (current->active_mm == mm) {
 226                 if(current->mm)
 227                         __flush_tlb_one(va);
 228                  else
 229                         leave_mm(smp_processor_id());
 230         }
 231
 232         if (!cpus_empty(cpu_mask))
 233                 flush_tlb_others(cpu_mask, mm, va);
 234
 235         preempt_enable();
 236 }
 237
 238 static void do_flush_tlb_all(void* info)
 239 {
 240         unsigned long cpu = smp_processor_id();
 241
 242         __flush_tlb_all();
 243         if (read_pda(mmu_state) == TLBSTATE_LAZY)
 244                 leave_mm(cpu);
 245 }
 246
 247 void flush_tlb_all(void)
 248 {
 249         on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
 250 }
 251
 252 void smp_kdb_stop(void)
 253 {
 254         send_IPI_allbutself(KDB_VECTOR);
 255 }
 256
 257 /*
 258  * this function sends a 'reschedule' IPI to another CPU.
 259  * it goes straight through and wastes no time serializing
 260  * anything. Worst case is that we lose a reschedule ...
 261  */
 262
 263 void smp_send_reschedule(int cpu)
 264 {
 265         send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 266 }
 267
 268 /*
 269  * Structure and data for smp_call_function(). This is designed to minimise
 270  * static memory requirements. It also looks cleaner.
 271  */
 272 static DEFINE_SPINLOCK(call_lock);
 273
 274 struct call_data_struct {
 275         void (*func) (void *info);
 276         void *info;
 277         atomic_t started;
 278         atomic_t finished;
 279         int wait;
 280 };
 281
 282 static struct call_data_struct * call_data;
 283
 284 /*
 285  * this function sends a 'generic call function' IPI to all other CPUs
 286  * in the system.
 287  */
 288 static void __smp_call_function (void (*func) (void *info), void *info,
 289                                 int nonatomic, int wait)
 290 {
 291         struct call_data_struct data;
 292         int cpus = num_online_cpus()-1;
 293
 294         if (!cpus)
 295                 return;
 296
 297         data.func = func;
 298         data.info = info;
 299         atomic_set(&data.started, 0);
 300         data.wait = wait;
 301         if (wait)
 302                 atomic_set(&data.finished, 0);
 303
 304         call_data = &data;
 305         wmb();
 306         /* Send a message to all other CPUs and wait for them to respond */
 307         send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 308
 309         /* Wait for response */
 310         while (atomic_read(&data.started) != cpus)
 311                 cpu_relax();
 312
 313         if (!wait)
 314                 return;
 315
 316         while (atomic_read(&data.finished) != cpus)
 317                 cpu_relax();
 318 }
 319
 320 /*
 321  * smp_call_function - run a function on all other CPUs.
 322  * @func: The function to run. This must be fast and non-blocking.
 323  * @info: An arbitrary pointer to pass to the function.
 324  * @nonatomic: currently unused.
 325  * @wait: If true, wait (atomically) until function has completed on other
 326  *        CPUs.
 327  *
 328  * Returns 0 on success, else a negative status code. Does not return until
 329  * remote CPUs are nearly ready to execute func or are or have executed.
 330  *
 331  * You must not call this function with disabled interrupts or from a
 332  * hardware interrupt handler or from a bottom half handler.
 333  * Actually there are a few legal cases, like panic.
 334  */
 335 int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 336                         int wait)
 337 {
 338         spin_lock(&call_lock);
 339         __smp_call_function(func,info,nonatomic,wait);
 340         spin_unlock(&call_lock);
 341         return 0;
 342 }
 343
 344 void smp_stop_cpu(void)
 345 {
 346         /*
 347          * Remove this CPU:
 348          */
 349         cpu_clear(smp_processor_id(), cpu_online_map);
 350         local_irq_disable();
 351         disable_local_APIC();
 352         local_irq_enable();
 353 }
 354
 355 static void smp_really_stop_cpu(void *dummy)
 356 {
 357         smp_stop_cpu();
 358         for (;;)
 359                 asm("hlt");
 360 }
 361
 362 void smp_send_stop(void)
 363 {
 364         int nolock = 0;
 365         if (reboot_force)
 366                 return;
 367         /* Don't deadlock on the call lock in panic */
 368         if (!spin_trylock(&call_lock)) {
 369                 /* ignore locking because we have paniced anyways */
 370                 nolock = 1;
 371         }
 372         __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
 373         if (!nolock)
 374                 spin_unlock(&call_lock);
 375         smp_stop_cpu();
 376 }
 377
 378 /*
 379  * Reschedule call back. Nothing to do,
 380  * all the work is done automatically when
 381  * we return from the interrupt.
 382  */
 383 asmlinkage void smp_reschedule_interrupt(void)
 384 {
 385         ack_APIC_irq();
 386 }
 387
 388 asmlinkage void smp_call_function_interrupt(void)
 389 {
 390         void (*func) (void *info) = call_data->func;
 391         void *info = call_data->info;
 392         int wait = call_data->wait;
 393
 394         ack_APIC_irq();
 395         /*
 396          * Notify initiating CPU that I've grabbed the data and am
 397          * about to execute the function
 398          */
 399         mb();
 400         atomic_inc(&call_data->started);
 401         /*
 402          * At this point the info structure may be out of scope unless wait==1
 403          */
 404         irq_enter();
 405         (*func)(info);
 406         irq_exit();
 407         if (wait) {
 408                 mb();
 409                 atomic_inc(&call_data->finished);
 410         }
 411 }