X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fe820.c;h=998b8721bac8cd2a4e82bb56105154b9140c3186;hb=a2f44b27303a5353859d77a3e96a1d3f33f56ab7;hp=5b65992b80d39c7b7468e2c47362911489ccccba;hpb=70790a4b5cd6c0291e5b1a2836e2832d46036ac6;p=linux-2.6.git diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 5b65992b8..998b8721b 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -1,26 +1,37 @@ /* * Handle the memory map. * The functions here do the job until bootmem takes over. - * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $ + * + * Getting sanitize_e820_map() in sync with i386 version by applying change: + * - Provisions for empty E820 memory regions (reported by certain BIOSes). + * Alex Achenbach , December 2002. + * Venkatesh Pallipadi + * */ -#include #include #include #include #include #include #include +#include +#include +#include + +#include #include #include #include #include +#include -extern char _end[]; +struct e820map e820; /* * PFN of last memory page. */ unsigned long end_pfn; +EXPORT_SYMBOL(end_pfn); /* * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. @@ -32,7 +43,7 @@ unsigned long end_pfn_map; /* * Last pfn which the user wants to use. */ -unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT; +static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; extern struct resource code_resource, data_resource; @@ -43,13 +54,13 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) /* various gunk below that needed for SMP startup */ if (addr < 0x8000) { - *addrp = 0x8000; + *addrp = PAGE_ALIGN(0x8000); return 1; } /* direct mapping tables of the kernel */ if (last >= table_start<= INITRD_START && addr < INITRD_START+INITRD_SIZE) { - *addrp = INITRD_START + INITRD_SIZE; + *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE); return 1; } #endif - /* kernel code + 640k memory hole (later should not be needed, but - be paranoid for now) */ - if (last >= 640*1024 && addr < __pa_symbol(&_end)) { - *addrp = __pa_symbol(&_end); + /* kernel code */ + if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) { + *addrp = PAGE_ALIGN(__pa_symbol(&_end)); return 1; } + + if (last >= ebda_addr && addr < ebda_addr + ebda_size) { + *addrp = PAGE_ALIGN(ebda_addr + ebda_size); + return 1; + } + /* XXX ramdisk image here? */ return 0; } -int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) +/* + * This function checks if any part of the range is mapped + * with type. + */ +int __meminit +e820_any_mapped(unsigned long start, unsigned long end, unsigned type) { int i; for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; if (type && ei->type != type) continue; - if (ei->addr >= end || ei->addr + ei->size < start) + if (ei->addr >= end || ei->addr + ei->size <= start) continue; return 1; } return 0; } +/* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) +{ + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + /* * Find a free area in a specific range. */ @@ -100,9 +150,9 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi addr = start; if (addr > ei->addr + ei->size) continue; - while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size) + while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) ; - last = addr + size; + last = PAGE_ALIGN(addr) + size; if (last > ei->addr + ei->size) continue; if (last > end) @@ -112,59 +162,14 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi return -1UL; } -/* - * Free bootmem based on the e820 table for a node. - */ -void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long last, addr; - - if (ei->type != E820_RAM || - ei->addr+ei->size <= start || - ei->addr > end) - continue; - - addr = round_up(ei->addr, PAGE_SIZE); - if (addr < start) - addr = start; - - last = round_down(ei->addr + ei->size, PAGE_SIZE); - if (last >= end) - last = end; - - if (last > addr && last-addr >= PAGE_SIZE) - free_bootmem_node(pgdat, addr, last-addr); - } -} - /* * Find the highest page frame number we have available */ unsigned long __init e820_end_of_ram(void) { - int i; unsigned long end_pfn = 0; + end_pfn = find_max_pfn_with_active_regions(); - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long start, end; - - start = round_up(ei->addr, PAGE_SIZE); - end = round_down(ei->addr + ei->size, PAGE_SIZE); - if (start >= end) - continue; - if (ei->type == E820_RAM) { - if (end > end_pfn<>PAGE_SHIFT; - } else { - if (end > end_pfn_map<>PAGE_SHIFT; - } - } - if (end_pfn > end_pfn_map) end_pfn_map = end_pfn; if (end_pfn_map > MAXMEM>>PAGE_SHIFT) @@ -174,10 +179,11 @@ unsigned long __init e820_end_of_ram(void) if (end_pfn > end_pfn_map) end_pfn = end_pfn_map; + printk("end_pfn_map = %lu\n", end_pfn_map); return end_pfn; } -/* +/* * Mark e820 reserved areas as busy for the resource manager. */ void __init e820_reserve_resources(void) @@ -185,8 +191,6 @@ void __init e820_reserve_resources(void) int i; for (i = 0; i < e820.nr_map; i++) { struct resource *res; - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; res = alloc_bootmem_low(sizeof(struct resource)); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; @@ -206,10 +210,103 @@ void __init e820_reserve_resources(void) */ request_resource(res, &code_resource); request_resource(res, &data_resource); +#ifdef CONFIG_KEXEC + request_resource(res, &crashk_res); +#endif } } } +/* Mark pages corresponding to given address range as nosave */ +static void __init +e820_mark_nosave_range(unsigned long start, unsigned long end) +{ + unsigned long pfn, max_pfn; + + if (start >= end) + return; + + printk("Nosave address range: %016lx - %016lx\n", start, end); + max_pfn = end >> PAGE_SHIFT; + for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) + if (pfn_valid(pfn)) + SetPageNosave(pfn_to_page(pfn)); +} + +/* + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for software + * suspend and suspend to RAM. + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(void) +{ + int i; + unsigned long paddr; + + paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (paddr < ei->addr) + e820_mark_nosave_range(paddr, + round_up(ei->addr, PAGE_SIZE)); + + paddr = round_down(ei->addr + ei->size, PAGE_SIZE); + if (ei->type != E820_RAM) + e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), + paddr); + + if (paddr >= (end_pfn << PAGE_SHIFT)) + break; + } +} + +/* Walk the e820 map and register active regions within a node */ +void __init +e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn) +{ + int i; + unsigned long ei_startpfn, ei_endpfn; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; + ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) + >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (ei_startpfn >= ei_endpfn) + continue; + + /* Check if end_pfn_map should be updated */ + if (ei->type != E820_RAM && ei_endpfn > end_pfn_map) + end_pfn_map = ei_endpfn; + + /* Skip if map is outside the node */ + if (ei->type != E820_RAM || + ei_endpfn <= start_pfn || + ei_startpfn >= end_pfn) + continue; + + /* Check for overlaps */ + if (ei_startpfn < start_pfn) + ei_startpfn = start_pfn; + if (ei_endpfn > end_pfn) + ei_endpfn = end_pfn; + + /* Obey end_user_pfn to save on memmap */ + if (ei_startpfn >= end_user_pfn) + continue; + if (ei_endpfn > end_user_pfn) + ei_endpfn = end_user_pfn; + + add_active_range(nid, ei_startpfn, ei_endpfn); + } +} + /* * Add a memory region to the kernel e820 map. */ @@ -277,7 +374,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) int chgidx, still_changing; int overlap_entries; int new_bios_entry; - int old_nr, new_nr; + int old_nr, new_nr, chg_nr; int i; /* @@ -331,20 +428,24 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) for (i=0; i < 2*old_nr; i++) change_point[i] = &change_point_list[i]; - /* record all known change-points (starting and ending addresses) */ + /* record all known change-points (starting and ending addresses), + omitting those that are for empty memory regions */ chgidx = 0; for (i=0; i < old_nr; i++) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; + if (biosmap[i].size != 0) { + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } } + chg_nr = chgidx; /* sort change-point list by memory addresses (low -> high) */ still_changing = 1; while (still_changing) { still_changing = 0; - for (i=1; i < 2*old_nr; i++) { + for (i=1; i < chg_nr; i++) { /* if > , swap */ /* or, if current= & last=, swap */ if ((change_point[i]->addr < change_point[i-1]->addr) || @@ -367,7 +468,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) last_type = 0; /* start with undefined memory type */ last_addr = 0; /* start with 0 as last starting address */ /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < 2*old_nr; chgidx++) + for (chgidx=0; chgidx < chg_nr; chgidx++) { /* keep track of all overlapping bios entries */ if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) @@ -426,13 +527,6 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) * If we're lucky and live on a modern system, the setup code * will have given us a memory map that we can use to properly * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) */ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) { @@ -450,34 +544,19 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) if (start > end) return -1; - /* - * Some BIOSes claim RAM in the 640k - 1M region. - * Not right. Fix it up. - * - * This should be removed on Hammer which is supposed to not - * have non e820 covered ISA mappings there, but I had some strange - * problems so it stays for now. -AK - */ - if (type == E820_RAM) { - if (start < 0x100000ULL && end > 0xA0000ULL) { - if (start < 0xA0000ULL) - add_memory_region(start, 0xA0000ULL-start, type); - if (end <= 0x100000ULL) - continue; - start = 0x100000ULL; - size = end - start; - } - } - add_memory_region(start, size, type); } while (biosmap++,--nr_map); return 0; } -void __init setup_memory_region(void) +void early_panic(char *msg) { - char *who = "BIOS-e820"; + early_printk(msg); + panic(msg); +} +void __init setup_memory_region(void) +{ /* * Try to copy the BIOS-supplied E820-map. * @@ -485,43 +564,131 @@ void __init setup_memory_region(void) * the next section from 1mb->appropriate_mem_k */ sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { - unsigned long mem_size; - - /* compare results from other methods and take the greater */ - if (ALT_MEM_K < EXT_MEM_K) { - mem_size = EXT_MEM_K; - who = "BIOS-88"; - } else { - mem_size = ALT_MEM_K; - who = "BIOS-e801"; - } - - e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); - } + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) + early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - e820_print_map(who); + e820_print_map("BIOS-e820"); } -void __init parse_memopt(char *p, char **from) -{ - /* - * mem=XXX[kKmM] limits kernel memory to XXX+1MB - * - * It would be more logical to count from 0 instead of from - * HIGH_MEMORY, but we keep that for now for i386 compatibility. - * - * No support for custom mapping like i386. The reason is - * that we need to read the e820 map anyways to handle the - * ACPI mappings in the direct map. Also on x86-64 there - * should be always a good e820 map. This is only an upper - * limit, you cannot force usage of memory not in e820. - * - * -AK - */ - end_user_pfn = memparse(p, from) + HIGH_MEMORY; +static int __init parse_memopt(char *p) +{ + if (!p) + return -EINVAL; + end_user_pfn = memparse(p, &p); end_user_pfn >>= PAGE_SHIFT; + return 0; } +early_param("mem", parse_memopt); + +static int userdef __initdata; + +static int __init parse_memmap_opt(char *p) +{ + char *oldp; + unsigned long long start_at, mem_size; + + if (!strcmp(p, "exactmap")) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. + */ + e820_register_active_regions(0, 0, -1UL); + saved_max_pfn = e820_end_of_ram(); + remove_all_active_ranges(); +#endif + end_pfn_map = 0; + e820.nr_map = 0; + userdef = 1; + return 0; + } + + oldp = p; + mem_size = memparse(p, &p); + if (p == oldp) + return -EINVAL; + if (*p == '@') { + start_at = memparse(p+1, &p); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*p == '#') { + start_at = memparse(p+1, &p); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*p == '$') { + start_at = memparse(p+1, &p); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + end_user_pfn = (mem_size >> PAGE_SHIFT); + } + return *p == '\0' ? 0 : -EINVAL; +} +early_param("memmap", parse_memmap_opt); + +void finish_e820_parsing(void) +{ + if (userdef) { + printk(KERN_INFO "user-defined physical RAM map:\n"); + e820_print_map("user"); + } +} + +unsigned long pci_mem_start = 0xaeedbabe; +EXPORT_SYMBOL(pci_mem_start); + +/* + * Search for the biggest gap in the low 32 bits of the e820 + * memory space. We pass this space to PCI to assign MMIO resources + * for hotplug or unconfigured devices in. + * Hopefully the BIOS let enough space left. + */ +__init void e820_setup_gap(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long last; + int i; + int found = 0; + + last = 0x100000000ull; + gapstart = 0x10000000; + gapsize = 0x400000; + i = e820.nr_map; + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap > gapsize) { + gapsize = gap; + gapstart = end; + found = 1; + } + } + if (start < last) + last = start; + } + + if (!found) { + gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" + KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); + } + + /* + * See how much we want to round up: start off with + * rounding to the next 1MB area. + */ + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; + + printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", + pci_mem_start, gapstart, gapsize); +}