+#endif /* CONFIG_NUMA */
+
+void __init build_all_zonelists(void)
+{
+ int i;
+
+ for_each_online_node(i)
+ build_zonelists(NODE_DATA(i));
+ printk("Built %i zonelists\n", num_online_nodes());
+ cpuset_init_current_mems_allowed();
+}
+
+/*
+ * Helper functions to size the waitqueue hash table.
+ * Essentially these want to choose hash table sizes sufficiently
+ * large so that collisions trying to wait on pages are rare.
+ * But in fact, the number of active page waitqueues on typical
+ * systems is ridiculously low, less than 200. So this is even
+ * conservative, even though it seems large.
+ *
+ * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
+ * waitqueues, i.e. the size of the waitq table given the number of pages.
+ */
+#define PAGES_PER_WAITQUEUE 256
+
+static inline unsigned long wait_table_size(unsigned long pages)
+{
+ unsigned long size = 1;
+
+ pages /= PAGES_PER_WAITQUEUE;
+
+ while (size < pages)
+ size <<= 1;
+
+ /*
+ * Once we have dozens or even hundreds of threads sleeping
+ * on IO we've got bigger problems than wait queue collision.
+ * Limit the size of the wait table to a reasonable size.
+ */
+ size = min(size, 4096UL);
+
+ return max(size, 4UL);
+}
+
+/*
+ * This is an integer logarithm so that shifts can be used later
+ * to extract the more random high bits from the multiplicative
+ * hash function before the remainder is taken.
+ */
+static inline unsigned long wait_table_bits(unsigned long size)
+{
+ return ffz(~size);
+}
+
+#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
+
+static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
+ unsigned long *zones_size, unsigned long *zholes_size)
+{
+ unsigned long realtotalpages, totalpages = 0;
+ int i;
+
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ totalpages += zones_size[i];
+ pgdat->node_spanned_pages = totalpages;
+
+ realtotalpages = totalpages;
+ if (zholes_size)
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ realtotalpages -= zholes_size[i];
+ pgdat->node_present_pages = realtotalpages;
+ printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
+}
+
+
+/*
+ * Initially all pages are reserved - free ones are freed
+ * up by free_all_bootmem() once the early boot process is
+ * done. Non-atomic initialization, single-pass.
+ */
+void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+ unsigned long start_pfn)
+{
+ struct page *page;
+ unsigned long end_pfn = start_pfn + size;
+ unsigned long pfn;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ if (!early_pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ set_page_links(page, zone, nid, pfn);
+ init_page_count(page);
+ reset_page_mapcount(page);
+ SetPageReserved(page);
+ INIT_LIST_HEAD(&page->lru);
+#ifdef WANT_PAGE_VIRTUAL
+ /* The shift won't overflow because ZONE_NORMAL is below 4G. */
+ if (!is_highmem_idx(zone))
+ set_page_address(page, __va(pfn << PAGE_SHIFT));
+#endif
+ }
+}
+
+void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
+ unsigned long size)
+{
+ int order;
+ for (order = 0; order < MAX_ORDER ; order++) {
+ INIT_LIST_HEAD(&zone->free_area[order].free_list);
+ zone->free_area[order].nr_free = 0;
+ }
+}
+
+#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr)
+void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
+ unsigned long size)
+{
+ unsigned long snum = pfn_to_section_nr(pfn);
+ unsigned long end = pfn_to_section_nr(pfn + size);
+
+ if (FLAGS_HAS_NODE)
+ zone_table[ZONETABLE_INDEX(nid, zid)] = zone;
+ else
+ for (; snum <= end; snum++)
+ zone_table[ZONETABLE_INDEX(snum, zid)] = zone;
+}
+
+#ifndef __HAVE_ARCH_MEMMAP_INIT
+#define memmap_init(size, nid, zone, start_pfn) \
+ memmap_init_zone((size), (nid), (zone), (start_pfn))
+#endif
+
+static int __cpuinit zone_batchsize(struct zone *zone)
+{
+ int batch;
+
+ /*
+ * The per-cpu-pages pools are set to around 1000th of the
+ * size of the zone. But no more than 1/2 of a meg.
+ *
+ * OK, so we don't know how big the cache is. So guess.
+ */
+ batch = zone->present_pages / 1024;
+ if (batch * PAGE_SIZE > 512 * 1024)
+ batch = (512 * 1024) / PAGE_SIZE;
+ batch /= 4; /* We effectively *= 4 below */
+ if (batch < 1)
+ batch = 1;
+
+ /*
+ * Clamp the batch to a 2^n - 1 value. Having a power
+ * of 2 value was found to be more likely to have
+ * suboptimal cache aliasing properties in some cases.
+ *
+ * For example if 2 tasks are alternately allocating
+ * batches of pages, one task can end up with a lot
+ * of pages of one half of the possible page colors
+ * and the other with pages of the other colors.
+ */
+ batch = (1 << (fls(batch + batch/2)-1)) - 1;
+
+ return batch;
+}
+
+inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+{
+ struct per_cpu_pages *pcp;
+
+ memset(p, 0, sizeof(*p));
+
+ pcp = &p->pcp[0]; /* hot */
+ pcp->count = 0;
+ pcp->high = 6 * batch;
+ pcp->batch = max(1UL, 1 * batch);
+ INIT_LIST_HEAD(&pcp->list);
+
+ pcp = &p->pcp[1]; /* cold*/
+ pcp->count = 0;
+ pcp->high = 2 * batch;
+ pcp->batch = max(1UL, batch/2);
+ INIT_LIST_HEAD(&pcp->list);
+}
+
+/*
+ * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
+ * to the value high for the pageset p.
+ */
+
+static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+ unsigned long high)
+{
+ struct per_cpu_pages *pcp;
+
+ pcp = &p->pcp[0]; /* hot list */
+ pcp->high = high;
+ pcp->batch = max(1UL, high/4);
+ if ((high/4) > (PAGE_SHIFT * 8))
+ pcp->batch = PAGE_SHIFT * 8;
+}
+
+
+#ifdef CONFIG_NUMA
+/*
+ * Boot pageset table. One per cpu which is going to be used for all
+ * zones and all nodes. The parameters will be set in such a way
+ * that an item put on a list will immediately be handed over to
+ * the buddy list. This is safe since pageset manipulation is done
+ * with interrupts disabled.
+ *
+ * Some NUMA counter updates may also be caught by the boot pagesets.
+ *
+ * The boot_pagesets must be kept even after bootup is complete for
+ * unused processors and/or zones. They do play a role for bootstrapping
+ * hotplugged processors.
+ *
+ * zoneinfo_show() and maybe other functions do
+ * not check if the processor is online before following the pageset pointer.
+ * Other parts of the kernel may not check if the zone is available.
+ */
+static struct per_cpu_pageset boot_pageset[NR_CPUS];
+
+/*
+ * Dynamically allocate memory for the
+ * per cpu pageset array in struct zone.
+ */
+static int __cpuinit process_zones(int cpu)
+{
+ struct zone *zone, *dzone;
+
+ for_each_zone(zone) {
+
+ zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!zone_pcp(zone, cpu))
+ goto bad;
+
+ setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
+
+ if (percpu_pagelist_fraction)
+ setup_pagelist_highmark(zone_pcp(zone, cpu),
+ (zone->present_pages / percpu_pagelist_fraction));
+ }
+
+ return 0;
+bad:
+ for_each_zone(dzone) {
+ if (dzone == zone)
+ break;
+ kfree(zone_pcp(dzone, cpu));
+ zone_pcp(dzone, cpu) = NULL;
+ }
+ return -ENOMEM;
+}
+
+static inline void free_zone_pagesets(int cpu)