fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / arch / alpha / kernel / smp.c
index 9f06d31..d1ec4f5 100644 (file)
@@ -25,6 +25,8 @@
 #include <linux/spinlock.h>
 #include <linux/irq.h>
 #include <linux/cache.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
 
 #include <asm/hwrpb.h>
 #include <asm/ptrace.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/bitops.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
-#include <asm/hardirq.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
 
@@ -52,6 +52,7 @@
 
 /* A collection of per-processor data.  */
 struct cpuinfo_alpha cpu_data[NR_CPUS];
+EXPORT_SYMBOL(cpu_data);
 
 /* A collection of single bit ipi messages.  */
 static struct {
@@ -68,18 +69,13 @@ enum ipi_message_type {
 static int smp_secondary_alive __initdata = 0;
 
 /* Which cpus ids came online.  */
-unsigned long cpu_present_mask;
 cpumask_t cpu_online_map;
 
 EXPORT_SYMBOL(cpu_online_map);
 
-/* cpus reported in the hwrpb */
-static unsigned long hwrpb_cpu_present_mask __initdata = 0;
-
 int smp_num_probed;            /* Internal processor count */
 int smp_num_cpus = 1;          /* Number that came online.  */
-cycles_t cacheflush_time;
-unsigned long cache_decay_ticks;
+EXPORT_SYMBOL(smp_num_cpus);
 
 extern void calibrate_delay(void);
 
@@ -177,57 +173,6 @@ smp_callin(void)
        cpu_idle();
 }
 
-
-/*
- * Rough estimation for SMP scheduling, this is the number of cycles it
- * takes for a fully memory-limited process to flush the SMP-local cache.
- *
- * We are not told how much cache there is, so we have to guess.
- */
-static void __init
-smp_tune_scheduling (int cpuid)
-{
-       struct percpu_struct *cpu;
-       unsigned long on_chip_cache;    /* kB */
-       unsigned long freq;             /* Hz */
-       unsigned long bandwidth = 350;  /* MB/s */
-
-       cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset
-                                     + cpuid * hwrpb->processor_size);
-       switch (cpu->type)
-       {
-       case EV45_CPU:
-               on_chip_cache = 16 + 16;
-               break;
-
-       case EV5_CPU:
-       case EV56_CPU:
-               on_chip_cache = 8 + 8 + 96;
-               break;
-
-       case PCA56_CPU:
-               on_chip_cache = 16 + 8;
-               break;
-
-       case EV6_CPU:
-       case EV67_CPU:
-       default:
-               on_chip_cache = 64 + 64;
-               break;
-       }
-
-       freq = hwrpb->cycle_freq ? : est_cycle_freq;
-
-       cacheflush_time = (freq / 1000000) * (on_chip_cache << 10) / bandwidth;
-       cache_decay_ticks = cacheflush_time / (freq / 1000) * HZ / 1000;
-
-       printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
-              cacheflush_time/(freq/1000000),
-              (cacheflush_time*100/(freq/1000000)) % 100);
-       printk("task migration cache decay timeout: %ld msecs.\n",
-              (cache_decay_ticks + 1) * 1000 / HZ);
-}
-
 /* Wait until hwrpb->txrdy is clear for cpu.  Return -1 on timeout.  */
 static int __init
 wait_for_txrdy (unsigned long cpumask)
@@ -355,7 +300,7 @@ secondary_cpu_start(int cpuid, struct task_struct *idle)
                 + hwrpb->processor_offset
                 + cpuid * hwrpb->processor_size);
        hwpcb = (struct pcb_struct *) cpu->hwpcb;
-       ipcb = &idle->thread_info->pcb;
+       ipcb = &task_thread_info(idle)->pcb;
 
        /* Initialize the CPU's HWPCB to something just good enough for
           us to get started.  Immediately after starting, we'll swpctx
@@ -411,15 +356,6 @@ secondary_cpu_start(int cpuid, struct task_struct *idle)
        return 0;
 }
 
-static struct task_struct * __init
-fork_by_hand(void)
-{
-       /* Don't care about the contents of regs since we'll never
-          reschedule the forked task. */
-       struct pt_regs regs;
-       return copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL);
-}
-
 /*
  * Bring one cpu online.
  */
@@ -435,15 +371,10 @@ smp_boot_one_cpu(int cpuid)
           the other task-y sort of data structures set up like we
           wish.  We can't use kernel_thread since we must avoid
           rescheduling the child.  */
-       idle = fork_by_hand();
+       idle = fork_idle(cpuid);
        if (IS_ERR(idle))
                panic("failed fork for CPU %d", cpuid);
 
-       wake_up_forked_process(idle);
-
-       init_idle(idle, cpuid);
-       unhash_process(idle);
-
        DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n",
              cpuid, idle->state, idle->flags));
 
@@ -509,7 +440,7 @@ setup_smp(void)
                        if ((cpu->flags & 0x1cc) == 0x1cc) {
                                smp_num_probed++;
                                /* Assume here that "whami" == index */
-                               hwrpb_cpu_present_mask |= (1UL << i);
+                               cpu_set(i, cpu_present_map);
                                cpu->pal_revision = boot_cpu_palrev;
                        }
 
@@ -520,12 +451,10 @@ setup_smp(void)
                }
        } else {
                smp_num_probed = 1;
-               hwrpb_cpu_present_mask = (1UL << boot_cpuid);
        }
-       cpu_present_mask = 1UL << boot_cpuid;
 
-       printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n",
-              smp_num_probed, hwrpb_cpu_present_mask);
+       printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_map = %lx\n",
+              smp_num_probed, cpu_present_map.bits[0]);
 }
 
 /*
@@ -534,49 +463,29 @@ setup_smp(void)
 void __init
 smp_prepare_cpus(unsigned int max_cpus)
 {
-       int cpu_count, i;
-
        /* Take care of some initial bookkeeping.  */
        memset(ipi_data, 0, sizeof(ipi_data));
 
        current_thread_info()->cpu = boot_cpuid;
 
        smp_store_cpu_info(boot_cpuid);
-       smp_tune_scheduling(boot_cpuid);
        smp_setup_percpu_timer(boot_cpuid);
 
        /* Nothing to do on a UP box, or when told not to.  */
        if (smp_num_probed == 1 || max_cpus == 0) {
-               cpu_present_mask = 1UL << boot_cpuid;
+               cpu_present_map = cpumask_of_cpu(boot_cpuid);
                printk(KERN_INFO "SMP mode deactivated.\n");
                return;
        }
 
        printk(KERN_INFO "SMP starting up secondaries.\n");
 
-       cpu_count = 1;
-       for (i = 0; (i < NR_CPUS) && (cpu_count < max_cpus); i++) {
-               if (i == boot_cpuid)
-                       continue;
-
-               if (((hwrpb_cpu_present_mask >> i) & 1) == 0)
-                       continue;
-
-               cpu_present_mask |= 1UL << i;
-               cpu_count++;
-       }
-
-       smp_num_cpus = cpu_count;
+       smp_num_cpus = smp_num_probed;
 }
 
 void __devinit
 smp_prepare_boot_cpu(void)
 {
-       /*
-        * Mark the boot cpu (current cpu) as both present and online
-        */ 
-       cpu_set(smp_processor_id(), cpu_present_mask);
-       cpu_set(smp_processor_id(), cpu_online_map);
 }
 
 int __devinit
@@ -597,7 +506,7 @@ smp_cpus_done(unsigned int max_cpus)
                if (cpu_online(cpu))
                        bogosum += cpu_data[cpu].loops_per_jiffy;
        
-       printk(KERN_INFO "SMP: Total of %ld processors activated "
+       printk(KERN_INFO "SMP: Total of %d processors activated "
               "(%lu.%02lu BogoMIPS).\n",
               num_online_cpus(), 
               (bogosum + 2500) / (500000/HZ),
@@ -608,13 +517,15 @@ smp_cpus_done(unsigned int max_cpus)
 void
 smp_percpu_timer_interrupt(struct pt_regs *regs)
 {
+       struct pt_regs *old_regs;
        int cpu = smp_processor_id();
        unsigned long user = user_mode(regs);
        struct cpuinfo_alpha *data = &cpu_data[cpu];
 
+       old_regs = set_irq_regs(regs);
+
        /* Record kernel PC.  */
-       if (!user)
-               alpha_do_profile(regs->pc);
+       profile_tick(CPU_PROFILING);
 
        if (!--data->prof_counter) {
                /* We need to make like a normal interrupt -- otherwise
@@ -628,6 +539,7 @@ smp_percpu_timer_interrupt(struct pt_regs *regs)
 
                irq_exit();
        }
+       set_irq_regs(old_regs);
 }
 
 int __init
@@ -638,23 +550,17 @@ setup_profiling_timer(unsigned int multiplier)
 
 \f
 static void
-send_ipi_message(unsigned long to_whom, enum ipi_message_type operation)
+send_ipi_message(cpumask_t to_whom, enum ipi_message_type operation)
 {
-       unsigned long i, set, n;
+       int i;
 
        mb();
-       for (i = to_whom; i ; i &= ~set) {
-               set = i & -i;
-               n = __ffs(set);
-               set_bit(operation, &ipi_data[n].bits);
-       }
+       for_each_cpu_mask(i, to_whom)
+               set_bit(operation, &ipi_data[i].bits);
 
        mb();
-       for (i = to_whom; i ; i &= ~set) {
-               set = i & -i;
-               n = __ffs(set);
-               wripir(n);
-       }
+       for_each_cpu_mask(i, to_whom)
+               wripir(i);
 }
 
 /* Structure and data for smp_call_function.  This is designed to 
@@ -779,19 +685,20 @@ handle_ipi(struct pt_regs *regs)
 void
 smp_send_reschedule(int cpu)
 {
-#if DEBUG_IPI_MSG
+#ifdef DEBUG_IPI_MSG
        if (cpu == hard_smp_processor_id())
                printk(KERN_WARNING
                       "smp_send_reschedule: Sending IPI to self.\n");
 #endif
-       send_ipi_message(1UL << cpu, IPI_RESCHEDULE);
+       send_ipi_message(cpumask_of_cpu(cpu), IPI_RESCHEDULE);
 }
 
 void
 smp_send_stop(void)
 {
-       unsigned long to_whom = cpu_present_mask & ~(1UL << smp_processor_id());
-#if DEBUG_IPI_MSG
+       cpumask_t to_whom = cpu_possible_map;
+       cpu_clear(smp_processor_id(), to_whom);
+#ifdef DEBUG_IPI_MSG
        if (hard_smp_processor_id() != boot_cpu_id)
                printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n");
 #endif
@@ -814,18 +721,21 @@ smp_send_stop(void)
 
 int
 smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry,
-                         int wait, unsigned long to_whom)
+                         int wait, cpumask_t to_whom)
 {
        struct smp_call_struct data;
        unsigned long timeout;
        int num_cpus_to_call;
        
+       /* Can deadlock when called with interrupts disabled */
+       WARN_ON(irqs_disabled());
+
        data.func = func;
        data.info = info;
        data.wait = wait;
 
-       to_whom &= ~(1L << smp_processor_id());
-       num_cpus_to_call = hweight64(to_whom);
+       cpu_clear(smp_processor_id(), to_whom);
+       num_cpus_to_call = cpus_weight(to_whom);
 
        atomic_set(&data.unstarted_count, num_cpus_to_call);
        atomic_set(&data.unfinished_count, num_cpus_to_call);
@@ -866,7 +776,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry,
 
        /* We either got one or timed out -- clear the lock. */
        mb();
-       smp_call_function_data = 0;
+       smp_call_function_data = NULL;
 
        /* 
         * If after both the initial and long timeout periods we still don't
@@ -882,6 +792,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry,
 
        return 0;
 }
+EXPORT_SYMBOL(smp_call_function_on_cpu);
 
 int
 smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
@@ -889,6 +800,7 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
        return smp_call_function_on_cpu (func, info, retry, wait,
                                         cpu_online_map);
 }
+EXPORT_SYMBOL(smp_call_function);
 
 static void
 ipi_imb(void *ignored)
@@ -903,6 +815,7 @@ smp_imb(void)
        if (on_each_cpu(ipi_imb, NULL, 1, 1))
                printk(KERN_CRIT "smp_imb: timed out\n");
 }
+EXPORT_SYMBOL(smp_imb);
 
 static void
 ipi_flush_tlb_all(void *ignored)
@@ -958,6 +871,7 @@ flush_tlb_mm(struct mm_struct *mm)
 
        preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_mm);
 
 struct flush_tlb_page_struct {
        struct vm_area_struct *vma;
@@ -1010,6 +924,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 
        preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_page);
 
 void
 flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
@@ -1017,6 +932,7 @@ flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long e
        /* On the Alpha we always flush the whole user tlb.  */
        flush_tlb_mm(vma->vm_mm);
 }
+EXPORT_SYMBOL(flush_tlb_range);
 
 static void
 ipi_flush_icache_page(void *x)
@@ -1060,175 +976,3 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
 
        preempt_enable();
 }
-\f
-#ifdef CONFIG_DEBUG_SPINLOCK
-void
-_raw_spin_unlock(spinlock_t * lock)
-{
-       mb();
-       lock->lock = 0;
-
-       lock->on_cpu = -1;
-       lock->previous = NULL;
-       lock->task = NULL;
-       lock->base_file = "none";
-       lock->line_no = 0;
-}
-
-void
-debug_spin_lock(spinlock_t * lock, const char *base_file, int line_no)
-{
-       long tmp;
-       long stuck;
-       void *inline_pc = __builtin_return_address(0);
-       unsigned long started = jiffies;
-       int printed = 0;
-       int cpu = smp_processor_id();
-
-       stuck = 1L << 30;
- try_again:
-
-       /* Use sub-sections to put the actual loop at the end
-          of this object file's text section so as to perfect
-          branch prediction.  */
-       __asm__ __volatile__(
-       "1:     ldl_l   %0,%1\n"
-       "       subq    %2,1,%2\n"
-       "       blbs    %0,2f\n"
-       "       or      %0,1,%0\n"
-       "       stl_c   %0,%1\n"
-       "       beq     %0,3f\n"
-       "4:     mb\n"
-       ".subsection 2\n"
-       "2:     ldl     %0,%1\n"
-       "       subq    %2,1,%2\n"
-       "3:     blt     %2,4b\n"
-       "       blbs    %0,2b\n"
-       "       br      1b\n"
-       ".previous"
-       : "=r" (tmp), "=m" (lock->lock), "=r" (stuck)
-       : "1" (lock->lock), "2" (stuck) : "memory");
-
-       if (stuck < 0) {
-               printk(KERN_WARNING
-                      "%s:%d spinlock stuck in %s at %p(%d)"
-                      " owner %s at %p(%d) %s:%d\n",
-                      base_file, line_no,
-                      current->comm, inline_pc, cpu,
-                      lock->task->comm, lock->previous,
-                      lock->on_cpu, lock->base_file, lock->line_no);
-               stuck = 1L << 36;
-               printed = 1;
-               goto try_again;
-       }
-
-       /* Exiting.  Got the lock.  */
-       lock->on_cpu = cpu;
-       lock->previous = inline_pc;
-       lock->task = current;
-       lock->base_file = base_file;
-       lock->line_no = line_no;
-
-       if (printed) {
-               printk(KERN_WARNING
-                      "%s:%d spinlock grabbed in %s at %p(%d) %ld ticks\n",
-                      base_file, line_no, current->comm, inline_pc,
-                      cpu, jiffies - started);
-       }
-}
-
-int
-debug_spin_trylock(spinlock_t * lock, const char *base_file, int line_no)
-{
-       int ret;
-       if ((ret = !test_and_set_bit(0, lock))) {
-               lock->on_cpu = smp_processor_id();
-               lock->previous = __builtin_return_address(0);
-               lock->task = current;
-       } else {
-               lock->base_file = base_file;
-               lock->line_no = line_no;
-       }
-       return ret;
-}
-#endif /* CONFIG_DEBUG_SPINLOCK */
-\f
-#ifdef CONFIG_DEBUG_RWLOCK
-void _raw_write_lock(rwlock_t * lock)
-{
-       long regx, regy;
-       int stuck_lock, stuck_reader;
-       void *inline_pc = __builtin_return_address(0);
-
- try_again:
-
-       stuck_lock = 1<<30;
-       stuck_reader = 1<<30;
-
-       __asm__ __volatile__(
-       "1:     ldl_l   %1,%0\n"
-       "       blbs    %1,6f\n"
-       "       blt     %1,8f\n"
-       "       mov     1,%1\n"
-       "       stl_c   %1,%0\n"
-       "       beq     %1,6f\n"
-       "4:     mb\n"
-       ".subsection 2\n"
-       "6:     blt     %3,4b   # debug\n"
-       "       subl    %3,1,%3 # debug\n"
-       "       ldl     %1,%0\n"
-       "       blbs    %1,6b\n"
-       "8:     blt     %4,4b   # debug\n"
-       "       subl    %4,1,%4 # debug\n"
-       "       ldl     %1,%0\n"
-       "       blt     %1,8b\n"
-       "       br      1b\n"
-       ".previous"
-       : "=m" (*(volatile int *)lock), "=&r" (regx), "=&r" (regy),
-         "=&r" (stuck_lock), "=&r" (stuck_reader)
-       : "0" (*(volatile int *)lock), "3" (stuck_lock), "4" (stuck_reader) : "memory");
-
-       if (stuck_lock < 0) {
-               printk(KERN_WARNING "write_lock stuck at %p\n", inline_pc);
-               goto try_again;
-       }
-       if (stuck_reader < 0) {
-               printk(KERN_WARNING "write_lock stuck on readers at %p\n",
-                      inline_pc);
-               goto try_again;
-       }
-}
-
-void _raw_read_lock(rwlock_t * lock)
-{
-       long regx;
-       int stuck_lock;
-       void *inline_pc = __builtin_return_address(0);
-
- try_again:
-
-       stuck_lock = 1<<30;
-
-       __asm__ __volatile__(
-       "1:     ldl_l   %1,%0;"
-       "       blbs    %1,6f;"
-       "       subl    %1,2,%1;"
-       "       stl_c   %1,%0;"
-       "       beq     %1,6f;"
-       "4:     mb\n"
-       ".subsection 2\n"
-       "6:     ldl     %1,%0;"
-       "       blt     %2,4b   # debug\n"
-       "       subl    %2,1,%2 # debug\n"
-       "       blbs    %1,6b;"
-       "       br      1b\n"
-       ".previous"
-       : "=m" (*(volatile int *)lock), "=&r" (regx), "=&r" (stuck_lock)
-       : "0" (*(volatile int *)lock), "2" (stuck_lock) : "memory");
-
-       if (stuck_lock < 0) {
-               printk(KERN_WARNING "read_lock stuck at %p\n", inline_pc);
-               goto try_again;
-       }
-}
-#endif /* CONFIG_DEBUG_RWLOCK */