X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fmm%2Finit.c;h=e2e0a9a22c7f01648a2bcdae307fd1cfb786b5fa;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=7a7ea3737265284eceafdcdc82000f3c87d06690;hpb=f7f1b0f1e2fbadeab12d24236000e778aa9b1ead;p=linux-2.6.git diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 7a7ea3737..e2e0a9a22 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -6,7 +6,6 @@ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ -#include #include #include #include @@ -23,10 +22,14 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include #include #include @@ -189,18 +192,17 @@ static inline int page_kills_ppro(unsigned long pagenr) return 0; } -extern int is_available_memory(efi_memory_desc_t *); - -static inline int page_is_ram(unsigned long pagenr) +int page_is_ram(unsigned long pagenr) { int i; unsigned long addr, end; if (efi_enabled) { efi_memory_desc_t *md; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if (!is_available_memory(md)) continue; addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; @@ -229,6 +231,27 @@ static inline int page_is_ram(unsigned long pagenr) return 0; } +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address is + * valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} + +EXPORT_SYMBOL_GPL(page_is_ram); + #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; pgprot_t kmap_prot; @@ -265,29 +288,57 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +static void __meminit free_new_highpage(struct page *page) +{ + init_page_count(page); + __free_page(page); + totalhigh_pages++; +} + +void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); - set_bit(PG_highmem, &page->flags); - set_page_count(page, 1); - __free_page(page); - totalhigh_pages++; + free_new_highpage(page); } else SetPageReserved(page); } -#ifndef CONFIG_DISCONTIGMEM +static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) +{ + free_new_highpage(page); + totalram_pages++; +#ifdef CONFIG_FLATMEM + max_mapnr = max(pfn, max_mapnr); +#endif + num_physpages++; + return 0; +} + +/* + * Not currently handling the NUMA case. + * Assuming single node and all memory that + * has been added dynamically that would be + * onlined here is in HIGHMEM + */ +void __meminit online_page(struct page *page) +{ + ClearPageReserved(page); + add_one_highpage_hotplug(page, page_to_pfn(page)); +} + + +#ifdef CONFIG_NUMA +extern void set_highmem_pages_init(int); +#else static void __init set_highmem_pages_init(int bad_ppro) { int pfn; for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) - one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } -#else -extern void set_highmem_pages_init(int); -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* CONFIG_FLATMEM */ #else #define kmap_init() do { } while (0) @@ -296,12 +347,13 @@ extern void set_highmem_pages_init(int); #endif /* CONFIG_HIGHMEM */ unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; +EXPORT_SYMBOL(__PAGE_KERNEL); unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; -#ifndef CONFIG_DISCONTIGMEM -#define remap_numa_kva() do {} while (0) -#else +#ifdef CONFIG_NUMA extern void __init remap_numa_kva(void); +#else +#define remap_numa_kva() do {} while (0) #endif static void __init pagetable_init (void) @@ -348,11 +400,11 @@ static void __init pagetable_init (void) * All user-space mappings are explicitly cleared after * SMP startup. */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; + set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); #endif } -#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND) +#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) /* * Swap suspend & friends need this for resume because things like the intel-agp * driver might have split up a kernel 4MB mapping. @@ -392,7 +444,7 @@ void zap_low_mappings (void) } static int disable_nx __initdata = 0; -u64 __supported_pte_mask = ~_PAGE_NX; +u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; /* * noexec = on|off @@ -400,18 +452,25 @@ u64 __supported_pte_mask = ~_PAGE_NX; * Control non executable mappings. * * on Enable - * off Disable + * off Disable (disables exec-shield too) */ -void __init noexec_setup(const char *str) +static int __init noexec_setup(char *str) { - if (!strncmp(str, "on",2) && cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str,"off",3)) { + if (!str || !strcmp(str, "on")) { + if (cpu_has_nx) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } + } else if (!strcmp(str,"off")) { disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; - } + exec_shield = 0; + } else + return -EINVAL; + + return 0; } +early_param("noexec", noexec_setup); int nx_enabled = 0; #ifdef CONFIG_X86_PAE @@ -454,6 +513,7 @@ int __init set_kernel_exec(unsigned long vaddr, int enable) pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); else pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); + pte_update_defer(&init_mm, vaddr, pte); __flush_tlb_all(); out: return ret; @@ -474,7 +534,10 @@ void __init paging_init(void) set_nx(); if (nx_enabled) printk("NX (Execute Disable) protection: active\n"); + else #endif + if (exec_shield) + printk("Using x86 segment limits to approximate NX protection\n"); pagetable_init(); @@ -519,18 +582,6 @@ static void __init test_wp_bit(void) } } -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_HIGHMEM - num_physpages = highend_pfn; -#else - num_physpages = max_low_pfn; -#endif -#ifndef CONFIG_DISCONTIGMEM - max_mapnr = num_physpages; -#endif -} - static struct kcore_list kcore_mem, kcore_vmalloc; void __init mem_init(void) @@ -540,9 +591,8 @@ void __init mem_init(void) int tmp; int bad_ppro; -#ifndef CONFIG_DISCONTIGMEM - if (!mem_map) - BUG(); +#ifdef CONFIG_FLATMEM + BUG_ON(!mem_map); #endif bad_ppro = ppro_with_ram_bug(); @@ -557,14 +607,6 @@ void __init mem_init(void) } #endif - set_max_mapnr_init(); - -#ifdef CONFIG_HIGHMEM - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); @@ -596,6 +638,48 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); +#if 1 /* double-sanity-check paranoia */ + printk("virtual kernel memory layout:\n" + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#endif + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", + FIXADDR_START, FIXADDR_TOP, + (FIXADDR_TOP - FIXADDR_START) >> 10, + +#ifdef CONFIG_HIGHMEM + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, + (LAST_PKMAP*PAGE_SIZE) >> 10, +#endif + + VMALLOC_START, VMALLOC_END, + (VMALLOC_END - VMALLOC_START) >> 20, + + (unsigned long)__va(0), (unsigned long)high_memory, + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, + + (unsigned long)&__init_begin, (unsigned long)&__init_end, + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, + + (unsigned long)&_etext, (unsigned long)&_edata, + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, + + (unsigned long)&_text, (unsigned long)&_etext, + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + +#ifdef CONFIG_HIGHMEM + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); + BUG_ON(VMALLOC_END > PKMAP_BASE); +#endif + BUG_ON(VMALLOC_START > VMALLOC_END); + BUG_ON((unsigned long)high_memory > VMALLOC_START); +#endif /* double-sanity-check paranoia */ + #ifdef CONFIG_X86_PAE if (!cpu_has_pae) panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); @@ -614,8 +698,26 @@ void __init mem_init(void) #endif } -kmem_cache_t *pgd_cache; -kmem_cache_t *pmd_cache; +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size) +{ + struct pglist_data *pgdata = NODE_DATA(nid); + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + return __add_pages(zone, start_pfn, nr_pages); +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} +EXPORT_SYMBOL_GPL(remove_memory); +#endif + +struct kmem_cache *pgd_cache; +struct kmem_cache *pmd_cache; void __init pgtable_cache_init(void) { @@ -666,31 +768,53 @@ static int noinline do_test_wp_bit(void) return flag; } -void free_initmem(void) +#ifdef CONFIG_DEBUG_RODATA + +void mark_rodata_ro(void) +{ + unsigned long addr = (unsigned long)__start_rodata; + + for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) + change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); + + printk("Write protecting the kernel read-only data: %uk\n", + (__end_rodata - __start_rodata) >> 10); + + /* + * change_page_attr() requires a global_flush_tlb() call after it. + * We do this after the printk so that if something went wrong in the + * change, the printk gets out at least to give a better debug hint + * of who is the culprit. + */ + global_flush_tlb(); +} +#endif + +void free_init_pages(char *what, unsigned long begin, unsigned long end) { unsigned long addr; - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { + for (addr = begin; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); - memset((void *)addr, 0xcc, PAGE_SIZE); + init_page_count(virt_to_page(addr)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } - printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10); + printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); +} + +void free_initmem(void) +{ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } + free_init_pages("initrd memory", start, end); } #endif +