Merge to Fedora kernel-2.6.7-1.492

[linux-2.6.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 9667edb..40fadc7 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -40,13 +40,12 @@
  #include <linux/cpu.h>
  #include <linux/percpu.h>
  #include <linux/kthread.h>
+#include <asm/tlb.h>
  #include <linux/vserver/sched.h>
  #include <linux/vs_base.h>
  
  #include <asm/unistd.h>
  
-#include <asm/unistd.h>
-
  #ifdef CONFIG_NUMA
  #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
  #else
@@ -222,7 +221,7 @@ struct runqueue {
  #if defined(CONFIG_SMP)
         unsigned long cpu_load;
  #endif
-       unsigned long long nr_switches;
+       unsigned long long nr_switches, nr_preempt;
         unsigned long expired_timestamp, nr_uninterruptible;
         unsigned long long timestamp_last_tick;
         task_t *curr, *idle;
@@ -868,10 +867,9 @@ static int wake_idle(int cpu, task_t *p)
                 return cpu;
  
         cpus_and(tmp, sd->span, cpu_online_map);
-       for_each_cpu_mask(i, tmp) {
-               if (!cpu_isset(i, p->cpus_allowed))
-                       continue;
+       cpus_and(tmp, tmp, p->cpus_allowed);
  
+       for_each_cpu_mask(i, tmp) {
                 if (idle_cpu(i))
                         return i;
         }
@@ -1328,6 +1326,16 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
                 spin_unlock(&rq2->lock);
  }
  
+unsigned long long nr_preempt(void)
+{
+       unsigned long long i, sum = 0;
+
+       for_each_online_cpu(i)
+               sum += cpu_rq(i)->nr_preempt;
+
+       return sum;
+}
+
  enum idle_type
  {
         IDLE,
@@ -2583,6 +2591,7 @@ asmlinkage void __sched schedule(void)
         int maxidle = -HZ;
  #endif
  
+       //WARN_ON(system_state == SYSTEM_BOOTING);
         /*
          * Test if we are atomic.  Since do_exit() needs to call into
          * schedule() atomically, we ignore that path for now.
@@ -2713,7 +2722,8 @@ pick_next:
         next->activated = 0;
  switch_tasks:
         prefetch(next);
-       clear_tsk_need_resched(prev);
+       if (test_and_clear_tsk_thread_flag(prev,TIF_NEED_RESCHED))
+               rq->nr_preempt++;
         RCU_qsctr(task_cpu(prev))++;
  
  #ifdef CONFIG_CKRM_CPU_SCHEDULE
@@ -3381,6 +3391,21 @@ out_unlock:
         return retval;
  }
  
+/*
+ * Represents all cpu's present in the system
+ * In systems capable of hotplug, this map could dynamically grow
+ * as new cpu's are detected in the system via any platform specific
+ * method, such as ACPI for e.g.
+ */
+
+cpumask_t cpu_present_map;
+EXPORT_SYMBOL(cpu_present_map);
+
+#ifndef CONFIG_SMP
+cpumask_t cpu_online_map = CPU_MASK_ALL;
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
+#endif
+
  /**
   * sys_sched_getaffinity - get the cpu affinity of a process
   * @pid: pid of the process
@@ -3460,12 +3485,34 @@ asmlinkage long sys_sched_yield(void)
  
  void __sched __cond_resched(void)
  {
-       set_current_state(TASK_RUNNING);
-       schedule();
+#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+       __might_sleep(__FILE__, __LINE__, 0);
+#endif
+       /*
+        * The system_state check is somewhat ugly but we might be
+        * called during early boot when we are not yet ready to reschedule.
+        */
+       if (need_resched() && system_state >= SYSTEM_BOOTING_SCHEDULER_OK) {
+               set_current_state(TASK_RUNNING);
+               schedule();
+       }
  }
  
  EXPORT_SYMBOL(__cond_resched);
  
+void __sched __cond_resched_lock(spinlock_t * lock)
+{
+        if (need_resched()) {
+                _raw_spin_unlock(lock);
+                preempt_enable_no_resched();
+               set_current_state(TASK_RUNNING);
+               schedule();
+                spin_lock(lock);
+        }
+}
+
+EXPORT_SYMBOL(__cond_resched_lock);
+
  /**
   * yield - yield the current processor to other threads.
   *
@@ -3697,6 +3744,8 @@ void show_state(void)
         read_unlock(&tasklist_lock);
  }
  
+EXPORT_SYMBOL_GPL(show_state);
+
  void __devinit init_idle(task_t *idle, int cpu)
  {
         runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(task_cpu(idle));
@@ -3766,7 +3815,7 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
         runqueue_t *rq;
  
         rq = task_rq_lock(p, &flags);
-       if (any_online_cpu(new_mask) == NR_CPUS) {
+       if (!cpus_intersects(new_mask, cpu_online_map)) {
                 ret = -EINVAL;
                 goto out;
         }
@@ -3781,6 +3830,7 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
                 task_rq_unlock(rq, &flags);
                 wake_up_process(rq->migration_thread);
                 wait_for_completion(&req.done);
+               tlb_migrate_finish(p->mm);
                 return 0;
         }
  out:
@@ -3943,8 +3993,7 @@ static void migrate_all_tasks(int src_cpu)
                 if (dest_cpu == NR_CPUS)
                         dest_cpu = any_online_cpu(tsk->cpus_allowed);
                 if (dest_cpu == NR_CPUS) {
-                       cpus_clear(tsk->cpus_allowed);
-                       cpus_complement(tsk->cpus_allowed);
+                       cpus_setall(tsk->cpus_allowed);
                         dest_cpu = any_online_cpu(tsk->cpus_allowed);
  
                         /* Don't tell them about moving exiting tasks
@@ -4006,6 +4055,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
                 p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
                 if (IS_ERR(p))
                         return NOTIFY_BAD;
+               p->flags |= PF_NOFREEZE;
                 kthread_bind(p, cpu);
                 /* Must be high prio: stop_machine expects to yield to it. */
                 rq = task_rq_lock(p, &flags);
@@ -4253,14 +4303,14 @@ void sched_domain_debug(void)
  
                 sd = rq->sd;
  
-               printk(KERN_WARNING "CPU%d: %s\n",
+               printk(KERN_DEBUG "CPU%d: %s\n",
                                 i, (cpu_online(i) ? " online" : "offline"));
  
                 do {
                         int j;
                         char str[NR_CPUS];
                         struct sched_group *group = sd->groups;
-                       cpumask_t groupmask, tmp;
+                       cpumask_t groupmask;
  
                         cpumask_scnprintf(str, NR_CPUS, sd->span);
                         cpus_clear(groupmask);
@@ -4271,13 +4321,13 @@ void sched_domain_debug(void)
                         printk("domain %d: span %s\n", level, str);
  
                         if (!cpu_isset(i, sd->span))
-                               printk(KERN_WARNING "ERROR domain->span does not contain CPU%d\n", i);
+                               printk(KERN_DEBUG "ERROR domain->span does not contain CPU%d\n", i);
                         if (!cpu_isset(i, group->cpumask))
-                               printk(KERN_WARNING "ERROR domain->groups does not contain CPU%d\n", i);
+                               printk(KERN_DEBUG "ERROR domain->groups does not contain CPU%d\n", i);
                         if (!group->cpu_power)
-                               printk(KERN_WARNING "ERROR domain->cpu_power not set\n");
+                               printk(KERN_DEBUG "ERROR domain->cpu_power not set\n");
  
-                       printk(KERN_WARNING);
+                       printk(KERN_DEBUG);
                         for (j = 0; j < level + 2; j++)
                                 printk(" ");
                         printk("groups:");
@@ -4290,8 +4340,7 @@ void sched_domain_debug(void)
                                 if (!cpus_weight(group->cpumask))
                                         printk(" ERROR empty group:");
  
-                               cpus_and(tmp, groupmask, group->cpumask);
-                               if (cpus_weight(tmp) > 0)
+                               if (cpus_intersects(groupmask, group->cpumask))
                                         printk(" ERROR repeated CPUs:");
  
                                 cpus_or(groupmask, groupmask, group->cpumask);
@@ -4310,9 +4359,8 @@ void sched_domain_debug(void)
                         sd = sd->parent;
  
                         if (sd) {
-                               cpus_and(tmp, groupmask, sd->span);
-                               if (!cpus_equal(tmp, groupmask))
-                                       printk(KERN_WARNING "ERROR parent span is not a superset of domain->span\n");
+                               if (!cpus_subset(groupmask, sd->span))
+                                       printk(KERN_DEBUG "ERROR parent span is not a superset of domain->span\n");
                         }
  
                 } while (sd);
@@ -4353,16 +4401,15 @@ void __init sched_init(void)
         /* Set up an initial dummy domain for early boot */
         static struct sched_domain sched_domain_init;
         static struct sched_group sched_group_init;
-       cpumask_t cpu_mask_all = CPU_MASK_ALL;
  
         memset(&sched_domain_init, 0, sizeof(struct sched_domain));
-       sched_domain_init.span = cpu_mask_all;
+       sched_domain_init.span = CPU_MASK_ALL;
         sched_domain_init.groups = &sched_group_init;
         sched_domain_init.last_balance = jiffies;
         sched_domain_init.balance_interval = INT_MAX; /* Don't balance */
  
         memset(&sched_group_init, 0, sizeof(struct sched_group));
-       sched_group_init.cpumask = cpu_mask_all;
+       sched_group_init.cpumask = CPU_MASK_ALL;
         sched_group_init.next = &sched_group_init;
         sched_group_init.cpu_power = SCHED_LOAD_SCALE;
  #endif
@@ -4430,20 +4477,23 @@ void __init sched_init(void)
  }
  
  #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-void __might_sleep(char *file, int line)
+void __might_sleep(char *file, int line, int atomic_depth)
  {
  #if defined(in_atomic)
         static unsigned long prev_jiffy;        /* ratelimiting */
  
-       if ((in_atomic() || irqs_disabled()) &&
+#ifndef CONFIG_PREEMPT
+       atomic_depth = 0;
+#endif
+       if (((in_atomic() != atomic_depth) || irqs_disabled()) &&
             system_state == SYSTEM_RUNNING) {
                 if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                         return;
                 prev_jiffy = jiffies;
                 printk(KERN_ERR "Debug: sleeping function called from invalid"
                                 " context at %s:%d\n", file, line);
-               printk("in_atomic():%d, irqs_disabled():%d\n",
-                       in_atomic(), irqs_disabled());
+               printk("in_atomic():%d[expected: %d], irqs_disabled():%d\n",
+                       in_atomic(), atomic_depth, irqs_disabled());
                 dump_stack();
         }
  #endif