2 * Generic VM initialization for x86-64 NUMA setups.
3 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
5 #include <linux/kernel.h>
7 #include <linux/string.h>
8 #include <linux/init.h>
9 #include <linux/bootmem.h>
10 #include <linux/mmzone.h>
11 #include <linux/ctype.h>
12 #include <linux/module.h>
14 #include <asm/proto.h>
22 struct pglist_data *node_data[MAXNODE];
23 bootmem_data_t plat_node_bdata[MAX_NUMNODES];
26 u8 memnodemap[NODEMAPSIZE];
28 unsigned char cpu_to_node[NR_CPUS];
29 cpumask_t node_to_cpumask[MAXNODE];
31 static int numa_off __initdata;
33 unsigned long nodes_present;
35 int __init compute_hash_shift(struct node *nodes)
41 /* When in doubt use brute force. */
43 memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
44 for (i = 0; i < numnodes; i++) {
45 if (nodes[i].start == nodes[i].end)
47 for (addr = nodes[i].start;
49 addr += (1UL << shift)) {
50 if (memnodemap[addr >> shift] != 0xff &&
51 memnodemap[addr >> shift] != i) {
53 "node %d shift %d addr %Lx conflict %d\n",
54 i, shift, addr, memnodemap[addr>>shift]);
57 memnodemap[addr >> shift] = i;
64 memset(memnodemap,0,sizeof(*memnodemap) * NODEMAPSIZE);
68 /* Initialize bootmem allocator for a node */
69 void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
71 unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
72 unsigned long nodedata_phys;
73 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
75 start = round_up(start, ZONE_ALIGN);
77 printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
79 start_pfn = start >> PAGE_SHIFT;
80 end_pfn = end >> PAGE_SHIFT;
82 nodedata_phys = find_e820_area(start, end, pgdat_size);
83 if (nodedata_phys == -1L)
84 panic("Cannot find memory pgdat in node %d\n", nodeid);
86 Dprintk("nodedata_phys %lx\n", nodedata_phys);
88 node_data[nodeid] = phys_to_virt(nodedata_phys);
89 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
90 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
91 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
92 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
94 /* Find a place for the bootmem map */
95 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
96 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
97 bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT);
98 if (bootmap_start == -1L)
99 panic("Not enough continuous space for bootmap on node %d", nodeid);
100 Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
102 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
103 bootmap_start >> PAGE_SHIFT,
106 e820_bootmem_free(NODE_DATA(nodeid), start, end);
108 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
109 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
110 if (nodeid + 1 > numnodes)
111 numnodes = nodeid + 1;
112 node_set_online(nodeid);
115 /* Initialize final allocator for a zone */
116 void __init setup_node_zones(int nodeid)
118 unsigned long start_pfn, end_pfn;
119 unsigned long zones[MAX_NR_ZONES];
120 unsigned long dma_end_pfn;
122 memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES);
124 start_pfn = node_start_pfn(nodeid);
125 end_pfn = node_end_pfn(nodeid);
127 Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
129 /* All nodes > 0 have a zero length zone DMA */
130 dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT;
131 if (start_pfn < dma_end_pfn) {
132 zones[ZONE_DMA] = dma_end_pfn - start_pfn;
133 zones[ZONE_NORMAL] = end_pfn - dma_end_pfn;
135 zones[ZONE_NORMAL] = end_pfn - start_pfn;
138 free_area_init_node(nodeid, NODE_DATA(nodeid), NULL, zones,
142 void __init numa_init_array(void)
145 /* There are unfortunately some poorly designed mainboards around
146 that only connect memory to a single CPU. This breaks the 1:1 cpu->node
147 mapping. To avoid this fill in the mapping for all possible
148 CPUs, as the number of CPUs is not known yet.
149 We round robin the existing nodes. */
151 for (i = 0; i < MAXNODE; i++) {
154 rr = find_next_bit(node_online_map, MAX_NUMNODES, rr);
155 if (rr == MAX_NUMNODES)
156 rr = find_first_bit(node_online_map, MAX_NUMNODES);
157 node_data[i] = node_data[rr];
162 set_bit(0, &node_to_cpumask[cpu_to_node(0)]);
165 void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
169 #ifdef CONFIG_K8_NUMA
170 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
173 printk(KERN_INFO "%s\n",
174 numa_off ? "NUMA turned off" : "No NUMA configuration found");
176 printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
177 start_pfn << PAGE_SHIFT,
178 end_pfn << PAGE_SHIFT);
179 /* setup dummy node covering all memory */
183 for (i = 0; i < NR_CPUS; i++)
185 node_to_cpumask[0] = cpumask_of_cpu(0);
186 setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
189 __init void numa_add_cpu(int cpu)
191 /* BP is initialized elsewhere */
193 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
196 unsigned long __init numa_free_all_bootmem(void)
199 unsigned long pages = 0;
201 pages += free_all_bootmem_node(NODE_DATA(i));
206 void __init paging_init(void)
215 __init int numa_setup(char *opt)
217 if (!strncmp(opt,"off",3))
222 EXPORT_SYMBOL(cpu_to_node);
223 EXPORT_SYMBOL(node_to_cpumask);
224 EXPORT_SYMBOL(memnode_shift);
225 EXPORT_SYMBOL(memnodemap);
226 EXPORT_SYMBOL(node_data);