patch-2_6_7-vs1_9_1_12

[linux-2.6.git] / arch / i386 / kernel / smpboot.c
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c

index 7baa4d4..45e8fba 100644 (file)
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -39,6 +39,7 @@
  #include <linux/kernel.h>
  
  #include <linux/mm.h>
+#include <linux/sched.h>
  #include <linux/kernel_stat.h>
  #include <linux/smp_lock.h>
  #include <linux/irq.h>
@@ -936,7 +937,7 @@ static int boot_cpu_logical_apicid;
  /* Where the IO area was mapped on multiquad, always 0 otherwise */
  void *xquad_portio;
  
-int cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
  
  static void __init smp_boot_cpus(unsigned int max_cpus)
  {
@@ -955,6 +956,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
  
         current_thread_info()->cpu = 0;
         smp_tune_scheduling();
+       cpus_clear(cpu_sibling_map[0]);
+       cpu_set(0, cpu_sibling_map[0]);
  
         /*
          * If we couldn't find an SMP configuration at boot time,
@@ -1081,34 +1084,39 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
         Dprintk("Boot done.\n");
  
         /*
-        * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so
-        * that we can tell the sibling CPU efficiently.
+        * construct cpu_sibling_map[], so that we can tell sibling CPUs
+        * efficiently.
          */
-       if (cpu_has_ht && smp_num_siblings > 1) {
-               for (cpu = 0; cpu < NR_CPUS; cpu++)
-                       cpu_sibling_map[cpu] = NO_PROC_ID;
-               
-               for (cpu = 0; cpu < NR_CPUS; cpu++) {
-                       int     i;
-                       if (!cpu_isset(cpu, cpu_callout_map))
-                               continue;
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               cpus_clear(cpu_sibling_map[cpu]);
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+               int siblings = 0;
+               int i;
+               if (!cpu_isset(cpu, cpu_callout_map))
+                       continue;
  
+               if (smp_num_siblings > 1) {
                         for (i = 0; i < NR_CPUS; i++) {
-                               if (i == cpu || !cpu_isset(i, cpu_callout_map))
+                               if (!cpu_isset(i, cpu_callout_map))
                                         continue;
                                 if (phys_proc_id[cpu] == phys_proc_id[i]) {
-                                       cpu_sibling_map[cpu] = i;
-                                       printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]);
-                                       break;
+                                       siblings++;
+                                       cpu_set(i, cpu_sibling_map[cpu]);
                                 }
                         }
-                       if (cpu_sibling_map[cpu] == NO_PROC_ID) {
-                               smp_num_siblings = 1;
-                               printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu);
-                       }
+               } else {
+                       siblings++;
+                       cpu_set(cpu, cpu_sibling_map[cpu]);
                 }
+
+               if (siblings != smp_num_siblings)
+                       printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
         }
  
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               check_nmi_watchdog();
+
         smpboot_setup_io_apic();
  
         setup_boot_APIC_clock();
@@ -1120,6 +1128,209 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
                 synchronize_tsc_bp();
  }
  
+#ifdef CONFIG_SCHED_SMT
+#ifdef CONFIG_NUMA
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static struct sched_group sched_group_nodes[MAX_NUMNODES];
+static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct sched_domain, node_domains);
+__init void arch_init_sched_domains(void)
+{
+       int i;
+       struct sched_group *first = NULL, *last = NULL;
+
+       /* Set up domains */
+       for_each_cpu(i) {
+               struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+               struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+               struct sched_domain *node_domain = &per_cpu(node_domains, i);
+               int node = cpu_to_node(i);
+               cpumask_t nodemask = node_to_cpumask(node);
+
+               *cpu_domain = SD_SIBLING_INIT;
+               cpu_domain->span = cpu_sibling_map[i];
+               cpu_domain->parent = phys_domain;
+               cpu_domain->groups = &sched_group_cpus[i];
+
+               *phys_domain = SD_CPU_INIT;
+               phys_domain->span = nodemask;
+               phys_domain->parent = node_domain;
+               phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
+
+               *node_domain = SD_NODE_INIT;
+               node_domain->span = cpu_possible_map;
+               node_domain->groups = &sched_group_nodes[cpu_to_node(i)];
+       }
+
+       /* Set up CPU (sibling) groups */
+       for_each_cpu(i) {
+               struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+               int j;
+               first = last = NULL;
+
+               if (i != first_cpu(cpu_domain->span))
+                       continue;
+
+               for_each_cpu_mask(j, cpu_domain->span) {
+                       struct sched_group *cpu = &sched_group_cpus[j];
+
+                       cpu->cpumask = CPU_MASK_NONE;
+                       cpu_set(j, cpu->cpumask);
+                       cpu->cpu_power = SCHED_LOAD_SCALE;
+
+                       if (!first)
+                               first = cpu;
+                       if (last)
+                               last->next = cpu;
+                       last = cpu;
+               }
+               last->next = first;
+       }
+
+       for (i = 0; i < MAX_NUMNODES; i++) {
+               int j;
+               cpumask_t nodemask;
+               struct sched_group *node = &sched_group_nodes[i];
+               cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map);
+
+               if (cpus_empty(nodemask))
+                       continue;
+
+               first = last = NULL;
+               /* Set up physical groups */
+               for_each_cpu_mask(j, nodemask) {
+                       struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
+                       struct sched_group *cpu = &sched_group_phys[j];
+
+                       if (j != first_cpu(cpu_domain->span))
+                               continue;
+
+                       cpu->cpumask = cpu_domain->span;
+                       /*
+                        * Make each extra sibling increase power by 10% of
+                        * the basic CPU. This is very arbitrary.
+                        */
+                       cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
+                       node->cpu_power += cpu->cpu_power;
+
+                       if (!first)
+                               first = cpu;
+                       if (last)
+                               last->next = cpu;
+                       last = cpu;
+               }
+               last->next = first;
+       }
+
+       /* Set up nodes */
+       first = last = NULL;
+       for (i = 0; i < MAX_NUMNODES; i++) {
+               struct sched_group *cpu = &sched_group_nodes[i];
+               cpumask_t nodemask;
+               cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map);
+
+               if (cpus_empty(nodemask))
+                       continue;
+
+               cpu->cpumask = nodemask;
+               /* ->cpu_power already setup */
+
+               if (!first)
+                       first = cpu;
+               if (last)
+                       last->next = cpu;
+               last = cpu;
+       }
+       last->next = first;
+
+       mb();
+       for_each_cpu(i) {
+               struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+               cpu_attach_domain(cpu_domain, i);
+       }
+}
+#else /* !CONFIG_NUMA */
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+__init void arch_init_sched_domains(void)
+{
+       int i;
+       struct sched_group *first = NULL, *last = NULL;
+
+       /* Set up domains */
+       for_each_cpu(i) {
+               struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+               struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+
+               *cpu_domain = SD_SIBLING_INIT;
+               cpu_domain->span = cpu_sibling_map[i];
+               cpu_domain->parent = phys_domain;
+               cpu_domain->groups = &sched_group_cpus[i];
+
+               *phys_domain = SD_CPU_INIT;
+               phys_domain->span = cpu_possible_map;
+               phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
+       }
+
+       /* Set up CPU (sibling) groups */
+       for_each_cpu(i) {
+               struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+               int j;
+               first = last = NULL;
+
+               if (i != first_cpu(cpu_domain->span))
+                       continue;
+
+               for_each_cpu_mask(j, cpu_domain->span) {
+                       struct sched_group *cpu = &sched_group_cpus[j];
+
+                       cpus_clear(cpu->cpumask);
+                       cpu_set(j, cpu->cpumask);
+                       cpu->cpu_power = SCHED_LOAD_SCALE;
+
+                       if (!first)
+                               first = cpu;
+                       if (last)
+                               last->next = cpu;
+                       last = cpu;
+               }
+               last->next = first;
+       }
+
+       first = last = NULL;
+       /* Set up physical groups */
+       for_each_cpu(i) {
+               struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+               struct sched_group *cpu = &sched_group_phys[i];
+
+               if (i != first_cpu(cpu_domain->span))
+                       continue;
+
+               cpu->cpumask = cpu_domain->span;
+               /* See SMT+NUMA setup for comment */
+               cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
+
+               if (!first)
+                       first = cpu;
+               if (last)
+                       last->next = cpu;
+               last = cpu;
+       }
+       last->next = first;
+
+       mb();
+       for_each_cpu(i) {
+               struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+               cpu_attach_domain(cpu_domain, i);
+       }
+}
+#endif /* CONFIG_NUMA */
+#endif /* CONFIG_SCHED_SMT */
+
  /* These are wrappers to interface to the new boot process.  Someone
     who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
  void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -1158,8 +1369,7 @@ int __devinit __cpu_up(unsigned int cpu)
  void __init smp_cpus_done(unsigned int max_cpus)
  {
  #ifdef CONFIG_X86_IO_APIC
-       cpumask_t targets = CPU_MASK_ALL;
-       setup_ioapic_dest(targets);
+       setup_ioapic_dest();
  #endif
         zap_low_mappings();
  }