X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=arch%2Fppc64%2Fmm%2Fnuma.c;h=83d7ec3b7c104eb9108a661fa6ed3122edd1824b;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=977140f78a3788ed14ddf1bff0ba68f6884b8f60;hpb=87fc8d1bb10cd459024a742c6a10961fefcef18f;p=linux-2.6.git diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index 977140f78..83d7ec3b7 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -38,6 +41,7 @@ int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0}; struct pglist_data *node_data[MAX_NUMNODES]; bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; static unsigned long node0_io_hole_size; +static int min_common_depth; /* * We need somewhere to store start/span for each node until we have @@ -63,7 +67,24 @@ static inline void map_cpu_to_node(int cpu, int node) } } -static struct device_node * __init find_cpu_node(unsigned int cpu) +#ifdef CONFIG_HOTPLUG_CPU +static void unmap_cpu_from_node(unsigned long cpu) +{ + int node = numa_cpu_lookup_table[cpu]; + + dbg("removing cpu %lu from node %d\n", cpu, node); + + if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { + cpu_clear(cpu, numa_cpumask_lookup_table[node]); + nr_cpus_in_node[node]--; + } else { + printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", + cpu, node); + } +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static struct device_node * __devinit find_cpu_node(unsigned int cpu) { unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); struct device_node *cpu_node = NULL; @@ -75,9 +96,11 @@ static struct device_node * __init find_cpu_node(unsigned int cpu) interrupt_server = (unsigned int *)get_property(cpu_node, "ibm,ppc-interrupt-server#s", &len); + len = len / sizeof(u32); + if (interrupt_server && (len > 0)) { while (len--) { - if (interrupt_server[len-1] == hw_cpuid) + if (interrupt_server[len] == hw_cpuid) return cpu_node; } } else { @@ -93,26 +116,21 @@ static struct device_node * __init find_cpu_node(unsigned int cpu) /* must hold reference to node during call */ static int *of_get_associativity(struct device_node *dev) - { - unsigned int *result; - int len; - - result = (unsigned int *)get_property(dev, "ibm,associativity", &len); - - if (len <= 0) - return NULL; - - return result; +{ + return (unsigned int *)get_property(dev, "ibm,associativity", NULL); } -static int of_node_numa_domain(struct device_node *device, int depth) +static int of_node_numa_domain(struct device_node *device) { int numa_domain; unsigned int *tmp; + if (min_common_depth == -1) + return 0; + tmp = of_get_associativity(device); - if (tmp && (tmp[0] >= depth)) { - numa_domain = tmp[depth]; + if (tmp && (tmp[0] >= min_common_depth)) { + numa_domain = tmp[min_common_depth]; } else { dbg("WARNING: no NUMA information for %s\n", device->full_name); @@ -135,7 +153,7 @@ static int of_node_numa_domain(struct device_node *device, int depth) * * - Dave Hansen */ -static int find_min_common_depth(void) +static int __init find_min_common_depth(void) { int depth; unsigned int *ref_points; @@ -182,11 +200,73 @@ static unsigned long read_cell_ul(struct device_node *device, unsigned int **buf return result; } +/* + * Figure out to which domain a cpu belongs and stick it there. + * Return the id of the domain used. + */ +static int numa_setup_cpu(unsigned long lcpu) +{ + int numa_domain = 0; + struct device_node *cpu = find_cpu_node(lcpu); + + if (!cpu) { + WARN_ON(1); + goto out; + } + + numa_domain = of_node_numa_domain(cpu); + + if (numa_domain >= num_online_nodes()) { + /* + * POWER4 LPAR uses 0xffff as invalid node, + * dont warn in this case. + */ + if (numa_domain != 0xffff) + printk(KERN_ERR "WARNING: cpu %ld " + "maps to invalid NUMA node %d\n", + lcpu, numa_domain); + numa_domain = 0; + } +out: + node_set_online(numa_domain); + + map_cpu_to_node(lcpu, numa_domain); + + of_node_put(cpu); + + return numa_domain; +} + +static int cpu_numa_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned long lcpu = (unsigned long)hcpu; + int ret = NOTIFY_DONE; + + switch (action) { + case CPU_UP_PREPARE: + if (min_common_depth == -1 || !numa_enabled) + map_cpu_to_node(lcpu, 0); + else + numa_setup_cpu(lcpu); + ret = NOTIFY_OK; + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + case CPU_UP_CANCELED: + unmap_cpu_from_node(lcpu); + break; + ret = NOTIFY_OK; +#endif + } + return ret; +} + static int __init parse_numa_properties(void) { struct device_node *cpu = NULL; struct device_node *memory = NULL; - int depth; int max_domain = 0; long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT; unsigned long i; @@ -203,43 +283,34 @@ static int __init parse_numa_properties(void) for (i = 0; i < entries ; i++) numa_memory_lookup_table[i] = ARRAY_INITIALISER; - depth = find_min_common_depth(); + min_common_depth = find_min_common_depth(); + + dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); + if (min_common_depth < 0) + return min_common_depth; - dbg("NUMA associativity depth for CPU/Memory: %d\n", depth); - if (depth < 0) - return depth; + max_domain = numa_setup_cpu(boot_cpuid); + /* + * Even though we connect cpus to numa domains later in SMP init, + * we need to know the maximum node id now. This is because each + * node id must have NODE_DATA etc backing it. + * As a result of hotplug we could still have cpus appear later on + * with larger node ids. In that case we force the cpu into node 0. + */ for_each_cpu(i) { int numa_domain; cpu = find_cpu_node(i); if (cpu) { - numa_domain = of_node_numa_domain(cpu, depth); + numa_domain = of_node_numa_domain(cpu); of_node_put(cpu); - if (numa_domain >= MAX_NUMNODES) { - /* - * POWER4 LPAR uses 0xffff as invalid node, - * dont warn in this case. - */ - if (numa_domain != 0xffff) - printk(KERN_ERR "WARNING: cpu %ld " - "maps to invalid NUMA node %d\n", - i, numa_domain); - numa_domain = 0; - } - } else { - dbg("WARNING: no NUMA information for cpu %ld\n", i); - numa_domain = 0; + if (numa_domain < MAX_NUMNODES && + max_domain < numa_domain) + max_domain = numa_domain; } - - node_set_online(numa_domain); - - if (max_domain < numa_domain) - max_domain = numa_domain; - - map_cpu_to_node(i, numa_domain); } memory = NULL; @@ -264,7 +335,7 @@ new_range: start = _ALIGN_DOWN(start, MEMORY_INCREMENT); size = _ALIGN_UP(size, MEMORY_INCREMENT); - numa_domain = of_node_numa_domain(memory, depth); + numa_domain = of_node_numa_domain(memory); if (numa_domain >= MAX_NUMNODES) { if (numa_domain != 0xffff) @@ -274,8 +345,6 @@ new_range: numa_domain = 0; } - node_set_online(numa_domain); - if (max_domain < numa_domain) max_domain = numa_domain; @@ -290,14 +359,19 @@ new_range: init_node_data[numa_domain].node_start_pfn + init_node_data[numa_domain].node_spanned_pages; if (shouldstart != (start / PAGE_SIZE)) { - printk(KERN_ERR "WARNING: Hole in node, " - "disabling region start %lx " - "length %lx\n", start, size); - continue; + /* Revert to non-numa for now */ + printk(KERN_ERR + "WARNING: Unexpected node layout: " + "region start %lx length %lx\n", + start, size); + printk(KERN_ERR "NUMA is disabled\n"); + goto err; } init_node_data[numa_domain].node_spanned_pages += size / PAGE_SIZE; } else { + node_set_online(numa_domain); + init_node_data[numa_domain].node_start_pfn = start / PAGE_SIZE; init_node_data[numa_domain].node_spanned_pages = @@ -313,9 +387,18 @@ new_range: goto new_range; } - numnodes = max_domain + 1; + for (i = 0; i <= max_domain; i++) + node_set_online(i); return 0; +err: + /* Something has gone wrong; revert any setup we've done */ + for_each_node(i) { + node_set_offline(i); + init_node_data[i].node_start_pfn = 0; + init_node_data[i].node_spanned_pages = 0; + } + return -1; } static void __init setup_nonnuma(void) @@ -338,8 +421,7 @@ static void __init setup_nonnuma(void) numa_memory_lookup_table[i] = ARRAY_INITIALISER; } - for (i = 0; i < NR_CPUS; i++) - map_cpu_to_node(i, 0); + map_cpu_to_node(boot_cpuid, 0); node_set_online(0); @@ -355,42 +437,14 @@ static void __init setup_nonnuma(void) static void __init dump_numa_topology(void) { unsigned int node; - unsigned int cpu, count; + unsigned int count; - for (node = 0; node < MAX_NUMNODES; node++) { - if (!node_online(node)) - continue; - - printk(KERN_INFO "Node %d CPUs:", node); - - count = 0; - /* - * If we used a CPU iterator here we would miss printing - * the holes in the cpumap. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { - if (count == 0) - printk(" %u", cpu); - ++count; - } else { - if (count > 1) - printk("-%u", cpu - 1); - count = 0; - } - } + if (min_common_depth == -1 || !numa_enabled) + return; - if (count > 1) - printk("-%u", NR_CPUS - 1); - printk("\n"); - } - - for (node = 0; node < MAX_NUMNODES; node++) { + for_each_online_node(node) { unsigned long i; - if (!node_online(node)) - continue; - printk(KERN_INFO "Node %d Memory:", node); count = 0; @@ -411,6 +465,7 @@ static void __init dump_numa_topology(void) printk("-0x%lx", i); printk("\n"); } + return; } /* @@ -457,6 +512,10 @@ static unsigned long careful_allocation(int nid, unsigned long size, void __init do_init_bootmem(void) { int nid; + static struct notifier_block ppc64_numa_nb = { + .notifier_call = cpu_numa_callback, + .priority = 1 /* Must run before sched domains notifier. */ + }; min_low_pfn = 0; max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; @@ -467,7 +526,9 @@ void __init do_init_bootmem(void) else dump_numa_topology(); - for (nid = 0; nid < numnodes; nid++) { + register_cpu_notifier(&ppc64_numa_nb); + + for_each_online_node(nid) { unsigned long start_paddr, end_paddr; int i; unsigned long bootmem_paddr; @@ -567,7 +628,7 @@ void __init paging_init(void) memset(zones_size, 0, sizeof(zones_size)); memset(zholes_size, 0, sizeof(zholes_size)); - for (nid = 0; nid < numnodes; nid++) { + for_each_online_node(nid) { unsigned long start_pfn; unsigned long end_pfn;