arch/ia64/kernel/domain.c

   1 /*
   2  * arch/ia64/kernel/domain.c
   3  * Architecture specific sched-domains builder.
   4  *
   5  * Copyright (C) 2004 Jesse Barnes
   6  * Copyright (C) 2004 Silicon Graphics, Inc.
   7  */
   8
   9 #include <linux/sched.h>
  10 #include <linux/percpu.h>
  11 #include <linux/slab.h>
  12 #include <linux/cpumask.h>
  13 #include <linux/init.h>
  14 #include <linux/topology.h>
  15
  16 #define SD_NODES_PER_DOMAIN 6
  17
  18 #ifdef CONFIG_NUMA
  19 /**
  20  * find_next_best_node - find the next node to include in a sched_domain
  21  * @node: node whose sched_domain we're building
  22  * @used_nodes: nodes already in the sched_domain
  23  *
  24  * Find the next node to include in a given scheduling domain.  Simply
  25  * finds the closest node not already in the @used_nodes map.
  26  *
  27  * Should use nodemask_t.
  28  */
  29 static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
  30 {
  31         int i, n, val, min_val, best_node = 0;
  32
  33         min_val = INT_MAX;
  34
  35         for (i = 0; i < MAX_NUMNODES; i++) {
  36                 /* Start at @node */
  37                 n = (node + i) % MAX_NUMNODES;
  38
  39                 if (!nr_cpus_node(n))
  40                         continue;
  41
  42                 /* Skip already used nodes */
  43                 if (test_bit(n, used_nodes))
  44                         continue;
  45
  46                 /* Simple min distance search */
  47                 val = node_distance(node, n);
  48
  49                 if (val < min_val) {
  50                         min_val = val;
  51                         best_node = n;
  52                 }
  53         }
  54
  55         set_bit(best_node, used_nodes);
  56         return best_node;
  57 }
  58
  59 /**
  60  * sched_domain_node_span - get a cpumask for a node's sched_domain
  61  * @node: node whose cpumask we're constructing
  62  * @size: number of nodes to include in this span
  63  *
  64  * Given a node, construct a good cpumask for its sched_domain to span.  It
  65  * should be one that prevents unnecessary balancing, but also spreads tasks
  66  * out optimally.
  67  */
  68 static cpumask_t __devinit sched_domain_node_span(int node)
  69 {
  70         int i;
  71         cpumask_t span, nodemask;
  72         DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
  73
  74         cpus_clear(span);
  75         bitmap_zero(used_nodes, MAX_NUMNODES);
  76
  77         nodemask = node_to_cpumask(node);
  78         cpus_or(span, span, nodemask);
  79         set_bit(node, used_nodes);
  80
  81         for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
  82                 int next_node = find_next_best_node(node, used_nodes);
  83                 nodemask = node_to_cpumask(next_node);
  84                 cpus_or(span, span, nodemask);
  85         }
  86
  87         return span;
  88 }
  89 #endif
  90
  91 /*
  92  * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
  93  * can switch it on easily if needed.
  94  */
  95 #ifdef CONFIG_SCHED_SMT
  96 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
  97 static struct sched_group sched_group_cpus[NR_CPUS];
  98 static int __devinit cpu_to_cpu_group(int cpu)
  99 {
 100         return cpu;
 101 }
 102 #endif
 103
 104 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
 105 static struct sched_group sched_group_phys[NR_CPUS];
 106 static int __devinit cpu_to_phys_group(int cpu)
 107 {
 108 #ifdef CONFIG_SCHED_SMT
 109         return first_cpu(cpu_sibling_map[cpu]);
 110 #else
 111         return cpu;
 112 #endif
 113 }
 114
 115 #ifdef CONFIG_NUMA
 116 /*
 117  * The init_sched_build_groups can't handle what we want to do with node
 118  * groups, so roll our own. Now each node has its own list of groups which
 119  * gets dynamically allocated.
 120  */
 121 static DEFINE_PER_CPU(struct sched_domain, node_domains);
 122 static struct sched_group *sched_group_nodes[MAX_NUMNODES];
 123
 124 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
 125 static struct sched_group sched_group_allnodes[MAX_NUMNODES];
 126
 127 static int __devinit cpu_to_allnodes_group(int cpu)
 128 {
 129         return cpu_to_node(cpu);
 130 }
 131 #endif
 132
 133 /*
 134  * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
 135  */
 136 void __devinit arch_init_sched_domains(void)
 137 {
 138         int i;
 139         cpumask_t cpu_default_map;
 140
 141         /*
 142          * Setup mask for cpus without special case scheduling requirements.
 143          * For now this just excludes isolated cpus, but could be used to
 144          * exclude other special cases in the future.
 145          */
 146         cpus_complement(cpu_default_map, cpu_isolated_map);
 147         cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
 148
 149         /*
 150          * Set up domains. Isolated domains just stay on the dummy domain.
 151          */
 152         for_each_cpu_mask(i, cpu_default_map) {
 153                 int node = cpu_to_node(i);
 154                 int group;
 155                 struct sched_domain *sd = NULL, *p;
 156                 cpumask_t nodemask = node_to_cpumask(node);
 157
 158                 cpus_and(nodemask, nodemask, cpu_default_map);
 159
 160 #ifdef CONFIG_NUMA
 161                 if (num_online_cpus()
 162                                 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
 163                         sd = &per_cpu(allnodes_domains, i);
 164                         *sd = SD_ALLNODES_INIT;
 165                         sd->span = cpu_default_map;
 166                         group = cpu_to_allnodes_group(i);
 167                         sd->groups = &sched_group_allnodes[group];
 168                         p = sd;
 169                 } else
 170                         p = NULL;
 171
 172                 sd = &per_cpu(node_domains, i);
 173                 *sd = SD_NODE_INIT;
 174                 sd->span = sched_domain_node_span(node);
 175                 sd->parent = p;
 176                 cpus_and(sd->span, sd->span, cpu_default_map);
 177 #endif
 178
 179                 p = sd;
 180                 sd = &per_cpu(phys_domains, i);
 181                 group = cpu_to_phys_group(i);
 182                 *sd = SD_CPU_INIT;
 183                 sd->span = nodemask;
 184                 sd->parent = p;
 185                 sd->groups = &sched_group_phys[group];
 186
 187 #ifdef CONFIG_SCHED_SMT
 188                 p = sd;
 189                 sd = &per_cpu(cpu_domains, i);
 190                 group = cpu_to_cpu_group(i);
 191                 *sd = SD_SIBLING_INIT;
 192                 sd->span = cpu_sibling_map[i];
 193                 cpus_and(sd->span, sd->span, cpu_default_map);
 194                 sd->parent = p;
 195                 sd->groups = &sched_group_cpus[group];
 196 #endif
 197         }
 198
 199 #ifdef CONFIG_SCHED_SMT
 200         /* Set up CPU (sibling) groups */
 201         for_each_cpu_mask(i, cpu_default_map) {
 202                 cpumask_t this_sibling_map = cpu_sibling_map[i];
 203                 cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
 204                 if (i != first_cpu(this_sibling_map))
 205                         continue;
 206
 207                 init_sched_build_groups(sched_group_cpus, this_sibling_map,
 208                                                 &cpu_to_cpu_group);
 209         }
 210 #endif
 211
 212         /* Set up physical groups */
 213         for (i = 0; i < MAX_NUMNODES; i++) {
 214                 cpumask_t nodemask = node_to_cpumask(i);
 215
 216                 cpus_and(nodemask, nodemask, cpu_default_map);
 217                 if (cpus_empty(nodemask))
 218                         continue;
 219
 220                 init_sched_build_groups(sched_group_phys, nodemask,
 221                                                 &cpu_to_phys_group);
 222         }
 223
 224 #ifdef CONFIG_NUMA
 225         init_sched_build_groups(sched_group_allnodes, cpu_default_map,
 226                                 &cpu_to_allnodes_group);
 227
 228         for (i = 0; i < MAX_NUMNODES; i++) {
 229                 /* Set up node groups */
 230                 struct sched_group *sg, *prev;
 231                 cpumask_t nodemask = node_to_cpumask(i);
 232                 cpumask_t domainspan;
 233                 cpumask_t covered = CPU_MASK_NONE;
 234                 int j;
 235
 236                 cpus_and(nodemask, nodemask, cpu_default_map);
 237                 if (cpus_empty(nodemask))
 238                         continue;
 239
 240                 domainspan = sched_domain_node_span(i);
 241                 cpus_and(domainspan, domainspan, cpu_default_map);
 242
 243                 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
 244                 sched_group_nodes[i] = sg;
 245                 for_each_cpu_mask(j, nodemask) {
 246                         struct sched_domain *sd;
 247                         sd = &per_cpu(node_domains, j);
 248                         sd->groups = sg;
 249                         if (sd->groups == NULL) {
 250                                 /* Turn off balancing if we have no groups */
 251                                 sd->flags = 0;
 252                         }
 253                 }
 254                 if (!sg) {
 255                         printk(KERN_WARNING
 256                         "Can not alloc domain group for node %d\n", i);
 257                         continue;
 258                 }
 259                 sg->cpu_power = 0;
 260                 sg->cpumask = nodemask;
 261                 cpus_or(covered, covered, nodemask);
 262                 prev = sg;
 263
 264                 for (j = 0; j < MAX_NUMNODES; j++) {
 265                         cpumask_t tmp, notcovered;
 266                         int n = (i + j) % MAX_NUMNODES;
 267
 268                         cpus_complement(notcovered, covered);
 269                         cpus_and(tmp, notcovered, cpu_default_map);
 270                         cpus_and(tmp, tmp, domainspan);
 271                         if (cpus_empty(tmp))
 272                                 break;
 273
 274                         nodemask = node_to_cpumask(n);
 275                         cpus_and(tmp, tmp, nodemask);
 276                         if (cpus_empty(tmp))
 277                                 continue;
 278
 279                         sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
 280                         if (!sg) {
 281                                 printk(KERN_WARNING
 282                                 "Can not alloc domain group for node %d\n", j);
 283                                 break;
 284                         }
 285                         sg->cpu_power = 0;
 286                         sg->cpumask = tmp;
 287                         cpus_or(covered, covered, tmp);
 288                         prev->next = sg;
 289                         prev = sg;
 290                 }
 291                 prev->next = sched_group_nodes[i];
 292         }
 293 #endif
 294
 295         /* Calculate CPU power for physical packages and nodes */
 296         for_each_cpu_mask(i, cpu_default_map) {
 297                 int power;
 298                 struct sched_domain *sd;
 299 #ifdef CONFIG_SCHED_SMT
 300                 sd = &per_cpu(cpu_domains, i);
 301                 power = SCHED_LOAD_SCALE;
 302                 sd->groups->cpu_power = power;
 303 #endif
 304
 305                 sd = &per_cpu(phys_domains, i);
 306                 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 307                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
 308                 sd->groups->cpu_power = power;
 309
 310 #ifdef CONFIG_NUMA
 311                 sd = &per_cpu(allnodes_domains, i);
 312                 if (sd->groups) {
 313                         power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 314                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
 315                         sd->groups->cpu_power = power;
 316                 }
 317 #endif
 318         }
 319
 320 #ifdef CONFIG_NUMA
 321         for (i = 0; i < MAX_NUMNODES; i++) {
 322                 struct sched_group *sg = sched_group_nodes[i];
 323                 int j;
 324
 325                 if (sg == NULL)
 326                         continue;
 327 next_sg:
 328                 for_each_cpu_mask(j, sg->cpumask) {
 329                         struct sched_domain *sd;
 330                         int power;
 331
 332                         sd = &per_cpu(phys_domains, j);
 333                         if (j != first_cpu(sd->groups->cpumask)) {
 334                                 /*
 335                                  * Only add "power" once for each
 336                                  * physical package.
 337                                  */
 338                                 continue;
 339                         }
 340                         power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 341                                 (cpus_weight(sd->groups->cpumask)-1) / 10;
 342
 343                         sg->cpu_power += power;
 344                 }
 345                 sg = sg->next;
 346                 if (sg != sched_group_nodes[i])
 347                         goto next_sg;
 348         }
 349 #endif
 350
 351         /* Attach the domains */
 352         for_each_online_cpu(i) {
 353                 struct sched_domain *sd;
 354 #ifdef CONFIG_SCHED_SMT
 355                 sd = &per_cpu(cpu_domains, i);
 356 #else
 357                 sd = &per_cpu(phys_domains, i);
 358 #endif
 359                 cpu_attach_domain(sd, i);
 360         }
 361 }
 362
 363 void __devinit arch_destroy_sched_domains(void)
 364 {
 365 #ifdef CONFIG_NUMA
 366         int i;
 367         for (i = 0; i < MAX_NUMNODES; i++) {
 368                 struct sched_group *oldsg, *sg = sched_group_nodes[i];
 369                 if (sg == NULL)
 370                         continue;
 371                 sg = sg->next;
 372 next_sg:
 373                 oldsg = sg;
 374                 sg = sg->next;
 375                 kfree(oldsg);
 376                 if (oldsg != sched_group_nodes[i])
 377                         goto next_sg;
 378                 sched_group_nodes[i] = NULL;
 379         }
 380 #endif
 381 }
 382