/*
* linux/arch/arm/mm/init.c
*
- * Copyright (C) 1995-2002 Russell King
+ * Copyright (C) 1995-2005 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/mman.h>
+#include <linux/nodemask.h>
#include <linux/initrd.h>
#include <asm/mach-types.h>
-#include <asm/hardware.h>
#include <asm/setup.h>
+#include <asm/sizes.h>
#include <asm/tlb.h>
#include <asm/mach/arch.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern char _stext, _text, _etext, _end, __init_begin, __init_end;
+extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end;
extern unsigned long phys_initrd_start;
extern unsigned long phys_initrd_size;
show_free_areas();
printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- for (node = 0; node < numnodes; node++) {
+ for_each_online_node(node) {
struct page *page, *end;
page = NODE_MEM_MAP(node);
printk("%d pages swap cached\n", cached);
}
-struct node_info {
- unsigned int start;
- unsigned int end;
- int bootmap_pages;
-};
-
-#define O_PFN_DOWN(x) ((x) >> PAGE_SHIFT)
-#define V_PFN_DOWN(x) O_PFN_DOWN(__pa(x))
+static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt)
+{
+ return pmd_offset(pgd, virt);
+}
-#define O_PFN_UP(x) (PAGE_ALIGN(x) >> PAGE_SHIFT)
-#define V_PFN_UP(x) O_PFN_UP(__pa(x))
+static inline pmd_t *pmd_off_k(unsigned long virt)
+{
+ return pmd_off(pgd_offset_k(virt), virt);
+}
-#define PFN_SIZE(x) ((x) >> PAGE_SHIFT)
-#define PFN_RANGE(s,e) PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \
- (((unsigned long)(s)) & PAGE_MASK))
+#define for_each_nodebank(iter,mi,no) \
+ for (iter = 0; iter < mi->nr_banks; iter++) \
+ if (mi->bank[iter].node == no)
/*
* FIXME: We really want to avoid allocating the bootmap bitmap
{
unsigned int start_pfn, bank, bootmap_pfn;
- start_pfn = V_PFN_UP(&_end);
+ start_pfn = PAGE_ALIGN(__pa(&_end)) >> PAGE_SHIFT;
bootmap_pfn = 0;
- for (bank = 0; bank < mi->nr_banks; bank ++) {
+ for_each_nodebank(bank, mi, node) {
unsigned int start, end;
- if (mi->bank[bank].node != node)
- continue;
-
- start = O_PFN_UP(mi->bank[bank].start);
- end = O_PFN_DOWN(mi->bank[bank].size +
- mi->bank[bank].start);
+ start = mi->bank[bank].start >> PAGE_SHIFT;
+ end = (mi->bank[bank].size +
+ mi->bank[bank].start) >> PAGE_SHIFT;
if (end < start_pfn)
continue;
return bootmap_pfn;
}
-/*
- * Scan the memory info structure and pull out:
- * - the end of memory
- * - the number of nodes
- * - the pfn range of each node
- * - the number of bootmem bitmap pages
- */
-static unsigned int __init
-find_memend_and_nodes(struct meminfo *mi, struct node_info *np)
-{
- unsigned int i, bootmem_pages = 0, memend_pfn = 0;
-
- for (i = 0; i < MAX_NUMNODES; i++) {
- np[i].start = -1U;
- np[i].end = 0;
- np[i].bootmap_pages = 0;
- }
-
- for (i = 0; i < mi->nr_banks; i++) {
- unsigned long start, end;
- int node;
-
- if (mi->bank[i].size == 0) {
- /*
- * Mark this bank with an invalid node number
- */
- mi->bank[i].node = -1;
- continue;
- }
-
- node = mi->bank[i].node;
-
- if (node >= numnodes) {
- numnodes = node + 1;
-
- /*
- * Make sure we haven't exceeded the maximum number
- * of nodes that we have in this configuration. If
- * we have, we're in trouble. (maybe we ought to
- * limit, instead of bugging?)
- */
- if (numnodes > MAX_NUMNODES)
- BUG();
- }
-
- /*
- * Get the start and end pfns for this bank
- */
- start = O_PFN_UP(mi->bank[i].start);
- end = O_PFN_DOWN(mi->bank[i].start + mi->bank[i].size);
-
- if (np[node].start > start)
- np[node].start = start;
-
- if (np[node].end < end)
- np[node].end = end;
-
- if (memend_pfn < end)
- memend_pfn = end;
- }
-
- /*
- * Calculate the number of pages we require to
- * store the bootmem bitmaps.
- */
- for (i = 0; i < numnodes; i++) {
- if (np[i].end == 0)
- continue;
-
- np[i].bootmap_pages = bootmem_bootmap_pages(np[i].end -
- np[i].start);
- bootmem_pages += np[i].bootmap_pages;
- }
-
- high_memory = __va(memend_pfn << PAGE_SHIFT);
-
- /*
- * This doesn't seem to be used by the Linux memory
- * manager any more. If we can get rid of it, we
- * also get rid of some of the stuff above as well.
- */
- max_low_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
- max_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
-
- return bootmem_pages;
-}
-
static int __init check_initrd(struct meminfo *mi)
{
int initrd_node = -2;
/*
* Reserve the various regions of node 0
*/
-static __init void reserve_node_zero(unsigned int bootmap_pfn, unsigned int bootmap_pages)
+static __init void reserve_node_zero(pg_data_t *pgdat)
{
- pg_data_t *pgdat = NODE_DATA(0);
unsigned long res_size = 0;
/*
* Register the kernel text and data with bootmem.
* Note that this can only be in node 0.
*/
+#ifdef CONFIG_XIP_KERNEL
+ reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start);
+#else
reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
+#endif
/*
* Reserve the page tables. These are already in use,
reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
PTRS_PER_PGD * sizeof(pgd_t));
- /*
- * And don't forget to reserve the allocator bitmap,
- * which will be freed later.
- */
- reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT,
- bootmap_pages << PAGE_SHIFT);
-
/*
* Hmm... This should go elsewhere, but we really really need to
* stop things allocating the low memory; ideally we need a better
reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size);
}
-/*
- * Register all available RAM in this node with the bootmem allocator.
- */
-static inline void free_bootmem_node_bank(int node, struct meminfo *mi)
+void __init build_mem_type_table(void);
+void __init create_mapping(struct map_desc *md);
+
+static unsigned long __init
+bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
{
- pg_data_t *pgdat = NODE_DATA(node);
- int bank;
+ unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
+ unsigned long start_pfn, end_pfn, boot_pfn;
+ unsigned int boot_pages;
+ pg_data_t *pgdat;
+ int i;
- for (bank = 0; bank < mi->nr_banks; bank++)
- if (mi->bank[bank].node == node)
- free_bootmem_node(pgdat, mi->bank[bank].start,
- mi->bank[bank].size);
-}
+ start_pfn = -1UL;
+ end_pfn = 0;
-/*
- * Initialise the bootmem allocator for all nodes. This is called
- * early during the architecture specific initialisation.
- */
-static void __init bootmem_init(struct meminfo *mi)
-{
- struct node_info node_info[MAX_NUMNODES], *np = node_info;
- unsigned int bootmap_pages, bootmap_pfn, map_pg;
- int node, initrd_node;
+ /*
+ * Calculate the pfn range, and map the memory banks for this node.
+ */
+ for_each_nodebank(i, mi, node) {
+ unsigned long start, end;
+ struct map_desc map;
- bootmap_pages = find_memend_and_nodes(mi, np);
- bootmap_pfn = find_bootmap_pfn(0, mi, bootmap_pages);
- initrd_node = check_initrd(mi);
+ start = mi->bank[i].start >> PAGE_SHIFT;
+ end = (mi->bank[i].start + mi->bank[i].size) >> PAGE_SHIFT;
- map_pg = bootmap_pfn;
+ if (start_pfn > start)
+ start_pfn = start;
+ if (end_pfn < end)
+ end_pfn = end;
+
+ map.pfn = __phys_to_pfn(mi->bank[i].start);
+ map.virtual = __phys_to_virt(mi->bank[i].start);
+ map.length = mi->bank[i].size;
+ map.type = MT_MEMORY;
+
+ create_mapping(&map);
+ }
/*
- * Initialise the bootmem nodes.
- *
- * What we really want to do is:
- *
- * unmap_all_regions_except_kernel();
- * for_each_node_in_reverse_order(node) {
- * map_node(node);
- * allocate_bootmem_map(node);
- * init_bootmem_node(node);
- * free_bootmem_node(node);
- * }
- *
- * but this is a 2.5-type change. For now, we just set
- * the nodes up in reverse order.
- *
- * (we could also do with rolling bootmem_init and paging_init
- * into one generic "memory_init" type function).
+ * If there is no memory in this node, ignore it.
*/
- np += numnodes - 1;
- for (node = numnodes - 1; node >= 0; node--, np--) {
- /*
- * If there are no pages in this node, ignore it.
- * Note that node 0 must always have some pages.
- */
- if (np->end == 0) {
- if (node == 0)
- BUG();
- continue;
- }
+ if (end_pfn == 0)
+ return end_pfn;
- /*
- * Initialise the bootmem allocator.
- */
- init_bootmem_node(NODE_DATA(node), map_pg, np->start, np->end);
- free_bootmem_node_bank(node, mi);
- map_pg += np->bootmap_pages;
+ /*
+ * Allocate the bootmem bitmap page.
+ */
+ boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
+ boot_pfn = find_bootmap_pfn(node, mi, boot_pages);
- /*
- * If this is node 0, we need to reserve some areas ASAP -
- * we may use bootmem on node 0 to setup the other nodes.
- */
- if (node == 0)
- reserve_node_zero(bootmap_pfn, bootmap_pages);
- }
+ /*
+ * Initialise the bootmem allocator for this node, handing the
+ * memory banks over to bootmem.
+ */
+ node_set_online(node);
+ pgdat = NODE_DATA(node);
+ init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn);
+
+ for_each_nodebank(i, mi, node)
+ free_bootmem_node(pgdat, mi->bank[i].start, mi->bank[i].size);
+ /*
+ * Reserve the bootmem bitmap for this node.
+ */
+ reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT,
+ boot_pages << PAGE_SHIFT);
#ifdef CONFIG_BLK_DEV_INITRD
- if (phys_initrd_size && initrd_node >= 0) {
- reserve_bootmem_node(NODE_DATA(initrd_node), phys_initrd_start,
+ /*
+ * If the initrd is in this node, reserve its memory.
+ */
+ if (node == initrd_node) {
+ reserve_bootmem_node(pgdat, phys_initrd_start,
phys_initrd_size);
initrd_start = __phys_to_virt(phys_initrd_start);
initrd_end = initrd_start + phys_initrd_size;
}
#endif
- BUG_ON(map_pg != bootmap_pfn + bootmap_pages);
+ /*
+ * Finally, reserve any node zero regions.
+ */
+ if (node == 0)
+ reserve_node_zero(pgdat);
+
+ /*
+ * initialise the zones within this node.
+ */
+ memset(zone_size, 0, sizeof(zone_size));
+ memset(zhole_size, 0, sizeof(zhole_size));
+
+ /*
+ * The size of this node has already been determined. If we need
+ * to do anything fancy with the allocation of this memory to the
+ * zones, now is the time to do it.
+ */
+ zone_size[0] = end_pfn - start_pfn;
+
+ /*
+ * For each bank in this node, calculate the size of the holes.
+ * holes = node_size - sum(bank_sizes_in_node)
+ */
+ zhole_size[0] = zone_size[0];
+ for_each_nodebank(i, mi, node)
+ zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
+
+ /*
+ * Adjust the sizes according to any special requirements for
+ * this machine type.
+ */
+ arch_adjust_zones(node, zone_size, zhole_size);
+
+ free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size);
+
+ return end_pfn;
}
-/*
- * paging_init() sets up the page tables, initialises the zone memory
- * maps, and sets up the zero page, bad page and bad page tables.
- */
-void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
+static void __init bootmem_init(struct meminfo *mi)
{
- void *zero_page;
- int node;
+ unsigned long addr, memend_pfn = 0;
+ int node, initrd_node, i;
- bootmem_init(mi);
+ /*
+ * Invalidate the node number for empty or invalid memory banks
+ */
+ for (i = 0; i < mi->nr_banks; i++)
+ if (mi->bank[i].size == 0 || mi->bank[i].node >= MAX_NUMNODES)
+ mi->bank[i].node = -1;
memcpy(&meminfo, mi, sizeof(meminfo));
/*
- * allocate the zero page. Note that we count on this going ok.
+ * Clear out all the mappings below the kernel image.
*/
- zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
+ for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE)
+ pmd_clear(pmd_off_k(addr));
+#ifdef CONFIG_XIP_KERNEL
+ /* The XIP kernel is mapped in the module area -- skip over it */
+ addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
+#endif
+ for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
+ pmd_clear(pmd_off_k(addr));
/*
- * initialise the page tables.
+ * Clear out all the kernel space mappings, except for the first
+ * memory bank, up to the end of the vmalloc region.
*/
- memtable_init(mi);
- if (mdesc->map_io)
- mdesc->map_io();
- flush_tlb_all();
+ for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
+ addr < VMALLOC_END; addr += PGDIR_SIZE)
+ pmd_clear(pmd_off_k(addr));
/*
- * initialise the zones within each node
+ * Locate which node contains the ramdisk image, if any.
*/
- for (node = 0; node < numnodes; node++) {
- unsigned long zone_size[MAX_NR_ZONES];
- unsigned long zhole_size[MAX_NR_ZONES];
- struct bootmem_data *bdata;
- pg_data_t *pgdat;
- int i;
+ initrd_node = check_initrd(mi);
- /*
- * Initialise the zone size information.
- */
- for (i = 0; i < MAX_NR_ZONES; i++) {
- zone_size[i] = 0;
- zhole_size[i] = 0;
- }
+ /*
+ * Run through each node initialising the bootmem allocator.
+ */
+ for_each_node(node) {
+ unsigned long end_pfn;
- pgdat = NODE_DATA(node);
- bdata = pgdat->bdata;
+ end_pfn = bootmem_init_node(node, initrd_node, mi);
/*
- * The size of this node has already been determined.
- * If we need to do anything fancy with the allocation
- * of this memory to the zones, now is the time to do
- * it.
+ * Remember the highest memory PFN.
*/
- zone_size[0] = bdata->node_low_pfn -
- (bdata->node_boot_start >> PAGE_SHIFT);
+ if (end_pfn > memend_pfn)
+ memend_pfn = end_pfn;
+ }
- /*
- * If this zone has zero size, skip it.
- */
- if (!zone_size[0])
- continue;
+ high_memory = __va(memend_pfn << PAGE_SHIFT);
- /*
- * For each bank in this node, calculate the size of the
- * holes. holes = node_size - sum(bank_sizes_in_node)
- */
- zhole_size[0] = zone_size[0];
- for (i = 0; i < mi->nr_banks; i++) {
- if (mi->bank[i].node != node)
- continue;
+ /*
+ * This doesn't seem to be used by the Linux memory manager any
+ * more, but is used by ll_rw_block. If we can get rid of it, we
+ * also get rid of some of the stuff above as well.
+ *
+ * Note: max_low_pfn and max_pfn reflect the number of _pages_ in
+ * the system, not the maximum PFN.
+ */
+ max_pfn = max_low_pfn = memend_pfn - PHYS_PFN_OFFSET;
+}
- zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
- }
+/*
+ * Set up device the mappings. Since we clear out the page tables for all
+ * mappings above VMALLOC_END, we will remove any debug device mappings.
+ * This means you have to be careful how you debug this function, or any
+ * called function. This means you can't use any function or debugging
+ * method which may touch any device, otherwise the kernel _will_ crash.
+ */
+static void __init devicemaps_init(struct machine_desc *mdesc)
+{
+ struct map_desc map;
+ unsigned long addr;
+ void *vectors;
- /*
- * Adjust the sizes according to any special
- * requirements for this machine type.
- */
- arch_adjust_zones(node, zone_size, zhole_size);
+ /*
+ * Allocate the vector page early.
+ */
+ vectors = alloc_bootmem_low_pages(PAGE_SIZE);
+ BUG_ON(!vectors);
- free_area_init_node(node, pgdat, zone_size,
- bdata->node_boot_start >> PAGE_SHIFT, zhole_size);
- }
+ for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
+ pmd_clear(pmd_off_k(addr));
-#ifndef CONFIG_DISCONTIGMEM
- mem_map = contig_page_data.node_mem_map;
+ /*
+ * Map the kernel if it is XIP.
+ * It is always first in the modulearea.
+ */
+#ifdef CONFIG_XIP_KERNEL
+ map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & PGDIR_MASK);
+ map.virtual = MODULE_START;
+ map.length = ((unsigned long)&_etext - map.virtual + ~PGDIR_MASK) & PGDIR_MASK;
+ map.type = MT_ROM;
+ create_mapping(&map);
+#endif
+
+ /*
+ * Map the cache flushing regions.
+ */
+#ifdef FLUSH_BASE
+ map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
+ map.virtual = FLUSH_BASE;
+ map.length = SZ_1M;
+ map.type = MT_CACHECLEAN;
+ create_mapping(&map);
#endif
+#ifdef FLUSH_BASE_MINICACHE
+ map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
+ map.virtual = FLUSH_BASE_MINICACHE;
+ map.length = SZ_1M;
+ map.type = MT_MINICLEAN;
+ create_mapping(&map);
+#endif
+
+ /*
+ * Create a mapping for the machine vectors at the high-vectors
+ * location (0xffff0000). If we aren't using high-vectors, also
+ * create a mapping at the low-vectors virtual address.
+ */
+ map.pfn = __phys_to_pfn(virt_to_phys(vectors));
+ map.virtual = 0xffff0000;
+ map.length = PAGE_SIZE;
+ map.type = MT_HIGH_VECTORS;
+ create_mapping(&map);
+
+ if (!vectors_high()) {
+ map.virtual = 0;
+ map.type = MT_LOW_VECTORS;
+ create_mapping(&map);
+ }
+
+ /*
+ * Ask the machine support to map in the statically mapped devices.
+ */
+ if (mdesc->map_io)
+ mdesc->map_io();
+
+ /*
+ * Finally flush the caches and tlb to ensure that we're in a
+ * consistent state wrt the writebuffer. This also ensures that
+ * any write-allocated cache lines in the vector page are written
+ * back. After this point, we can start to touch devices again.
+ */
+ local_flush_tlb_all();
+ flush_cache_all();
+}
+
+/*
+ * paging_init() sets up the page tables, initialises the zone memory
+ * maps, and sets up the zero page, bad page and bad page tables.
+ */
+void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
+{
+ void *zero_page;
+
+ build_mem_type_table();
+ bootmem_init(mi);
+ devicemaps_init(mdesc);
+
+ top_pmd = pmd_off_k(0xffff0000);
/*
- * finish off the bad pages once
- * the mem_map is initialised
+ * allocate the zero page. Note that we count on this going ok.
*/
+ zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
memzero(zero_page, PAGE_SIZE);
empty_zero_page = virt_to_page(zero_page);
flush_dcache_page(empty_zero_page);
for (; addr < end; addr += PAGE_SIZE) {
struct page *page = virt_to_page(addr);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(addr);
totalram_pages++;
}
printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
}
+static inline void
+free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn)
+{
+ struct page *start_pg, *end_pg;
+ unsigned long pg, pgend;
+
+ /*
+ * Convert start_pfn/end_pfn to a struct page pointer.
+ */
+ start_pg = pfn_to_page(start_pfn);
+ end_pg = pfn_to_page(end_pfn);
+
+ /*
+ * Convert to physical addresses, and
+ * round start upwards and end downwards.
+ */
+ pg = PAGE_ALIGN(__pa(start_pg));
+ pgend = __pa(end_pg) & PAGE_MASK;
+
+ /*
+ * If there are free pages between these,
+ * free the section of the memmap array.
+ */
+ if (pg < pgend)
+ free_bootmem_node(NODE_DATA(node), pg, pgend - pg);
+}
+
+/*
+ * The mem_map array can get very big. Free the unused area of the memory map.
+ */
+static void __init free_unused_memmap_node(int node, struct meminfo *mi)
+{
+ unsigned long bank_start, prev_bank_end = 0;
+ unsigned int i;
+
+ /*
+ * [FIXME] This relies on each bank being in address order. This
+ * may not be the case, especially if the user has provided the
+ * information on the command line.
+ */
+ for_each_nodebank(i, mi, node) {
+ bank_start = mi->bank[i].start >> PAGE_SHIFT;
+ if (bank_start < prev_bank_end) {
+ printk(KERN_ERR "MEM: unordered memory banks. "
+ "Not freeing memmap.\n");
+ break;
+ }
+
+ /*
+ * If we had a previous bank, and there is a space
+ * between the current bank and the previous, free it.
+ */
+ if (prev_bank_end && prev_bank_end != bank_start)
+ free_memmap(node, prev_bank_end, bank_start);
+
+ prev_bank_end = (mi->bank[i].start +
+ mi->bank[i].size) >> PAGE_SHIFT;
+ }
+}
+
/*
* mem_init() marks the free areas in the mem_map and tells us how much
* memory is free. This is done after various parts of the system have
int i, node;
codepages = &_etext - &_text;
- datapages = &_end - &_etext;
+ datapages = &_end - &__data_start;
initpages = &__init_end - &__init_begin;
#ifndef CONFIG_DISCONTIGMEM
max_mapnr = virt_to_page(high_memory) - mem_map;
#endif
- /*
- * We may have non-contiguous memory.
- */
- if (meminfo.nr_banks != 1)
- create_memmap_holes(&meminfo);
-
/* this will put all unused low memory onto the freelists */
- for (node = 0; node < numnodes; node++) {
+ for_each_online_node(node) {
pg_data_t *pgdat = NODE_DATA(node);
+ free_unused_memmap_node(node, &meminfo);
+
if (pgdat->node_spanned_pages != 0)
totalram_pages += free_all_bootmem_node(pgdat);
}