+ curr = curr->prev;
+
+ if (!can_migrate_task(tmp, busiest, this_cpu, sd, idle)) {
+ if (curr != head)
+ goto skip_queue;
+ idx++;
+ goto skip_bitmap;
+ }
+ pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
+ pulled++;
+
+ /* We only want to steal up to the prescribed number of tasks. */
+ if (pulled < max_nr_move) {
+ if (curr != head)
+ goto skip_queue;
+ idx++;
+ goto skip_bitmap;
+ }
+out:
+ return pulled;
+}
+
+/*
+ * find_busiest_group finds and returns the busiest CPU group within the
+ * domain. It calculates and returns the number of tasks which should be
+ * moved to restore balance via the imbalance parameter.
+ */
+static struct sched_group *
+find_busiest_group(struct sched_domain *sd, int this_cpu,
+ unsigned long *imbalance, enum idle_type idle)
+{
+ struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
+ unsigned long max_load, avg_load, total_load, this_load, total_pwr;
+
+ max_load = this_load = total_load = total_pwr = 0;
+
+ do {
+ cpumask_t tmp;
+ unsigned long load;
+ int local_group;
+ int i, nr_cpus = 0;
+
+ local_group = cpu_isset(this_cpu, group->cpumask);
+
+ /* Tally up the load of all CPUs in the group */
+ avg_load = 0;
+ cpus_and(tmp, group->cpumask, cpu_online_map);
+ if (unlikely(cpus_empty(tmp)))
+ goto nextgroup;
+
+ for_each_cpu_mask(i, tmp) {
+ /* Bias balancing toward cpus of our domain */
+ if (local_group)
+ load = target_load(i);
+ else
+ load = source_load(i);
+
+ nr_cpus++;
+ avg_load += load;
+ }
+
+ if (!nr_cpus)
+ goto nextgroup;
+
+ total_load += avg_load;
+ total_pwr += group->cpu_power;
+
+ /* Adjust by relative CPU power of the group */
+ avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
+
+ if (local_group) {
+ this_load = avg_load;
+ this = group;
+ goto nextgroup;
+ } else if (avg_load > max_load) {
+ max_load = avg_load;
+ busiest = group;
+ }
+nextgroup:
+ group = group->next;
+ } while (group != sd->groups);
+
+ if (!busiest || this_load >= max_load)
+ goto out_balanced;
+
+ avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
+
+ if (this_load >= avg_load ||
+ 100*max_load <= sd->imbalance_pct*this_load)
+ goto out_balanced;
+
+ /*
+ * We're trying to get all the cpus to the average_load, so we don't
+ * want to push ourselves above the average load, nor do we wish to
+ * reduce the max loaded cpu below the average load, as either of these
+ * actions would just result in more rebalancing later, and ping-pong
+ * tasks around. Thus we look for the minimum possible imbalance.
+ * Negative imbalances (*we* are more loaded than anyone else) will
+ * be counted as no imbalance for these purposes -- we can't fix that
+ * by pulling tasks to us. Be careful of negative numbers as they'll
+ * appear as very large values with unsigned longs.
+ */
+ *imbalance = min(max_load - avg_load, avg_load - this_load);
+
+ /* How much load to actually move to equalise the imbalance */
+ *imbalance = (*imbalance * min(busiest->cpu_power, this->cpu_power))
+ / SCHED_LOAD_SCALE;
+
+ if (*imbalance < SCHED_LOAD_SCALE - 1) {
+ unsigned long pwr_now = 0, pwr_move = 0;
+ unsigned long tmp;
+
+ if (max_load - this_load >= SCHED_LOAD_SCALE*2) {
+ *imbalance = 1;
+ return busiest;
+ }
+
+ /*
+ * OK, we don't have enough imbalance to justify moving tasks,
+ * however we may be able to increase total CPU power used by
+ * moving them.
+ */
+
+ pwr_now += busiest->cpu_power*min(SCHED_LOAD_SCALE, max_load);
+ pwr_now += this->cpu_power*min(SCHED_LOAD_SCALE, this_load);
+ pwr_now /= SCHED_LOAD_SCALE;
+
+ /* Amount of load we'd subtract */
+ tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/busiest->cpu_power;
+ if (max_load > tmp)
+ pwr_move += busiest->cpu_power*min(SCHED_LOAD_SCALE,
+ max_load - tmp);
+
+ /* Amount of load we'd add */
+ tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/this->cpu_power;
+ if (max_load < tmp)
+ tmp = max_load;
+ pwr_move += this->cpu_power*min(SCHED_LOAD_SCALE, this_load + tmp);
+ pwr_move /= SCHED_LOAD_SCALE;
+
+ /* Move if we gain another 8th of a CPU worth of throughput */
+ if (pwr_move < pwr_now + SCHED_LOAD_SCALE / 8)
+ goto out_balanced;
+
+ *imbalance = 1;
+ return busiest;
+ }
+
+ /* Get rid of the scaling factor, rounding down as we divide */
+ *imbalance = (*imbalance + 1) / SCHED_LOAD_SCALE;
+
+ return busiest;
+
+out_balanced:
+ if (busiest && (idle == NEWLY_IDLE ||
+ (idle == IDLE && max_load > SCHED_LOAD_SCALE)) ) {
+ *imbalance = 1;
+ return busiest;
+ }
+
+ *imbalance = 0;
+ return NULL;
+}
+
+/*
+ * find_busiest_queue - find the busiest runqueue among the cpus in group.
+ */
+static runqueue_t *find_busiest_queue(struct sched_group *group)
+{
+ cpumask_t tmp;
+ unsigned long load, max_load = 0;
+ runqueue_t *busiest = NULL;
+ int i;
+
+ cpus_and(tmp, group->cpumask, cpu_online_map);
+ for_each_cpu_mask(i, tmp) {
+ load = source_load(i);
+
+ if (load > max_load) {
+ max_load = load;
+ busiest = cpu_rq(i);
+ }
+ }
+
+ return busiest;
+}
+
+/*
+ * Check this_cpu to ensure it is balanced within domain. Attempt to move
+ * tasks if there is an imbalance.
+ *
+ * Called with this_rq unlocked.
+ */
+static int load_balance(int this_cpu, runqueue_t *this_rq,
+ struct sched_domain *sd, enum idle_type idle)
+{
+ struct sched_group *group;
+ runqueue_t *busiest;
+ unsigned long imbalance;
+ int nr_moved;
+
+ spin_lock(&this_rq->lock);
+
+ group = find_busiest_group(sd, this_cpu, &imbalance, idle);
+ if (!group)
+ goto out_balanced;
+
+ busiest = find_busiest_queue(group);
+ if (!busiest)
+ goto out_balanced;
+ /*
+ * This should be "impossible", but since load
+ * balancing is inherently racy and statistical,
+ * it could happen in theory.
+ */
+ if (unlikely(busiest == this_rq)) {
+ WARN_ON(1);
+ goto out_balanced;
+ }
+
+ nr_moved = 0;
+ if (busiest->nr_running > 1) {
+ /*
+ * Attempt to move tasks. If find_busiest_group has found
+ * an imbalance but busiest->nr_running <= 1, the group is
+ * still unbalanced. nr_moved simply stays zero, so it is
+ * correctly treated as an imbalance.
+ */
+ double_lock_balance(this_rq, busiest);
+ nr_moved = move_tasks(this_rq, this_cpu, busiest,
+ imbalance, sd, idle);
+ spin_unlock(&busiest->lock);
+ }
+ spin_unlock(&this_rq->lock);
+
+ if (!nr_moved) {
+ sd->nr_balance_failed++;
+
+ if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
+ int wake = 0;
+
+ spin_lock(&busiest->lock);
+ if (!busiest->active_balance) {
+ busiest->active_balance = 1;
+ busiest->push_cpu = this_cpu;
+ wake = 1;
+ }
+ spin_unlock(&busiest->lock);
+ if (wake)
+ wake_up_process(busiest->migration_thread);
+
+ /*
+ * We've kicked active balancing, reset the failure
+ * counter.
+ */
+ sd->nr_balance_failed = sd->cache_nice_tries;
+ }
+ } else
+ sd->nr_balance_failed = 0;
+
+ /* We were unbalanced, so reset the balancing interval */
+ sd->balance_interval = sd->min_interval;
+
+ return nr_moved;
+
+out_balanced:
+ spin_unlock(&this_rq->lock);
+
+ /* tune up the balancing interval */
+ if (sd->balance_interval < sd->max_interval)
+ sd->balance_interval *= 2;
+
+ return 0;
+}
+
+/*
+ * Check this_cpu to ensure it is balanced within domain. Attempt to move
+ * tasks if there is an imbalance.
+ *
+ * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
+ * this_rq is locked.
+ */
+static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
+ struct sched_domain *sd)
+{
+ struct sched_group *group;
+ runqueue_t *busiest = NULL;
+ unsigned long imbalance;
+ int nr_moved = 0;
+
+ group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE);
+ if (!group)
+ goto out;
+
+ busiest = find_busiest_queue(group);
+ if (!busiest || busiest == this_rq)
+ goto out;
+
+ /* Attempt to move tasks */
+ double_lock_balance(this_rq, busiest);
+
+ nr_moved = move_tasks(this_rq, this_cpu, busiest,
+ imbalance, sd, NEWLY_IDLE);
+
+ spin_unlock(&busiest->lock);
+
+out:
+ return nr_moved;
+}
+
+/*
+ * idle_balance is called by schedule() if this_cpu is about to become
+ * idle. Attempts to pull tasks from other CPUs.
+ */
+static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
+{
+ struct sched_domain *sd;
+
+ for_each_domain(this_cpu, sd) {
+ if (sd->flags & SD_BALANCE_NEWIDLE) {
+ if (load_balance_newidle(this_cpu, this_rq, sd)) {
+ /* We've pulled tasks over so stop searching */
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * active_load_balance is run by migration threads. It pushes a running
+ * task off the cpu. It can be required to correctly have at least 1 task
+ * running on each physical CPU where possible, and not have a physical /
+ * logical imbalance.
+ *
+ * Called with busiest locked.
+ */
+static void active_load_balance(runqueue_t *busiest, int busiest_cpu)
+{
+ struct sched_domain *sd;
+ struct sched_group *group, *busy_group;
+ int i;
+
+ if (busiest->nr_running <= 1)
+ return;
+
+ for_each_domain(busiest_cpu, sd)
+ if (cpu_isset(busiest->push_cpu, sd->span))
+ break;
+ if (!sd) {
+ WARN_ON(1);
+ return;
+ }
+
+ group = sd->groups;
+ while (!cpu_isset(busiest_cpu, group->cpumask))
+ group = group->next;
+ busy_group = group;
+
+ group = sd->groups;
+ do {
+ cpumask_t tmp;
+ runqueue_t *rq;
+ int push_cpu = 0;