2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/sched.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/mca.h>
37 #include <linux/root_dev.h>
38 #include <linux/highmem.h>
39 #include <linux/module.h>
40 #include <linux/efi.h>
41 #include <linux/init.h>
42 #include <linux/edd.h>
43 #include <linux/nodemask.h>
44 #include <video/edid.h>
46 #include <asm/mpspec.h>
47 #include <asm/setup.h>
48 #include <asm/arch_hooks.h>
49 #include <asm/sections.h>
50 #include <asm/io_apic.h>
53 #include <asm/crash_dump.h>
54 #include "setup_arch_pre.h"
55 #include <bios_ebda.h>
57 /* This value is set up by the early boot code to point to the value
58 immediately after the boot time page tables. It contains a *physical*
59 address, and must not be in the .bss segment! */
60 unsigned long init_pg_tables_end __initdata = ~0UL;
62 int disable_pse __initdata = 0;
63 unsigned int dump_enabled;
71 EXPORT_SYMBOL(efi_enabled);
74 /* cpu data as detected by the assembly code in head.S */
75 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
76 /* common cpu data for all cpus */
77 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
79 unsigned long mmu_cr4_features;
81 #ifdef CONFIG_ACPI_INTERPRETER
82 int acpi_disabled = 0;
84 int acpi_disabled = 1;
86 EXPORT_SYMBOL(acpi_disabled);
88 #ifdef CONFIG_ACPI_BOOT
89 int __initdata acpi_force = 0;
90 extern acpi_interrupt_flags acpi_sci_flags;
93 /* for MCA, but anyone else can use it if they want */
94 unsigned int machine_id;
95 unsigned int machine_submodel_id;
96 unsigned int BIOS_revision;
97 unsigned int mca_pentium_flag;
99 /* For PCI or other memory-mapped resources */
100 unsigned long pci_mem_start = 0x10000000;
102 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
105 /* user-defined highmem size */
106 static unsigned int highmem_pages = -1;
111 struct drive_info_struct { char dummy[32]; } drive_info;
112 struct screen_info screen_info;
113 struct apm_info apm_info;
114 struct sys_desc_table_struct {
115 unsigned short length;
116 unsigned char table[0];
118 struct edid_info edid_info;
119 struct ist_info ist_info;
122 extern void early_cpu_init(void);
123 extern void dmi_scan_machine(void);
124 extern void generic_apic_probe(char *);
125 extern int root_mountflags;
127 unsigned long saved_videomode;
129 #define RAMDISK_IMAGE_START_MASK 0x07FF
130 #define RAMDISK_PROMPT_FLAG 0x8000
131 #define RAMDISK_LOAD_FLAG 0x4000
133 static char command_line[COMMAND_LINE_SIZE];
135 unsigned char __initdata boot_params[PARAM_SIZE];
137 static struct resource data_resource = {
138 .name = "Kernel data",
141 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
144 static struct resource code_resource = {
145 .name = "Kernel code",
148 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
151 static struct resource system_rom_resource = {
152 .name = "System ROM",
155 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
158 static struct resource extension_rom_resource = {
159 .name = "Extension ROM",
162 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
165 static struct resource adapter_rom_resources[] = { {
166 .name = "Adapter ROM",
169 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
171 .name = "Adapter ROM",
174 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
176 .name = "Adapter ROM",
179 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
181 .name = "Adapter ROM",
184 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
186 .name = "Adapter ROM",
189 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
191 .name = "Adapter ROM",
194 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
197 #define ADAPTER_ROM_RESOURCES \
198 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
200 static struct resource video_rom_resource = {
204 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
207 static struct resource video_ram_resource = {
208 .name = "Video RAM area",
211 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
214 static struct resource standard_io_resources[] = { {
218 .flags = IORESOURCE_BUSY | IORESOURCE_IO
223 .flags = IORESOURCE_BUSY | IORESOURCE_IO
228 .flags = IORESOURCE_BUSY | IORESOURCE_IO
233 .flags = IORESOURCE_BUSY | IORESOURCE_IO
238 .flags = IORESOURCE_BUSY | IORESOURCE_IO
240 .name = "dma page reg",
243 .flags = IORESOURCE_BUSY | IORESOURCE_IO
248 .flags = IORESOURCE_BUSY | IORESOURCE_IO
253 .flags = IORESOURCE_BUSY | IORESOURCE_IO
258 .flags = IORESOURCE_BUSY | IORESOURCE_IO
261 #define STANDARD_IO_RESOURCES \
262 (sizeof standard_io_resources / sizeof standard_io_resources[0])
264 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
266 static int __init romchecksum(unsigned char *rom, unsigned long length)
268 unsigned char *p, sum = 0;
270 for (p = rom; p < rom + length; p++)
275 static void __init probe_roms(void)
277 unsigned long start, length, upper;
282 upper = adapter_rom_resources[0].start;
283 for (start = video_rom_resource.start; start < upper; start += 2048) {
284 rom = isa_bus_to_virt(start);
285 if (!romsignature(rom))
288 video_rom_resource.start = start;
290 /* 0 < length <= 0x7f * 512, historically */
291 length = rom[2] * 512;
293 /* if checksum okay, trust length byte */
294 if (length && romchecksum(rom, length))
295 video_rom_resource.end = start + length - 1;
297 request_resource(&iomem_resource, &video_rom_resource);
301 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
306 request_resource(&iomem_resource, &system_rom_resource);
307 upper = system_rom_resource.start;
309 /* check for extension rom (ignore length byte!) */
310 rom = isa_bus_to_virt(extension_rom_resource.start);
311 if (romsignature(rom)) {
312 length = extension_rom_resource.end - extension_rom_resource.start + 1;
313 if (romchecksum(rom, length)) {
314 request_resource(&iomem_resource, &extension_rom_resource);
315 upper = extension_rom_resource.start;
319 /* check for adapter roms on 2k boundaries */
320 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
321 rom = isa_bus_to_virt(start);
322 if (!romsignature(rom))
325 /* 0 < length <= 0x7f * 512, historically */
326 length = rom[2] * 512;
328 /* but accept any length that fits if checksum okay */
329 if (!length || start + length > upper || !romchecksum(rom, length))
332 adapter_rom_resources[i].start = start;
333 adapter_rom_resources[i].end = start + length - 1;
334 request_resource(&iomem_resource, &adapter_rom_resources[i]);
336 start = adapter_rom_resources[i++].end & ~2047UL;
340 static void __init limit_regions(unsigned long long size)
342 unsigned long long current_addr = 0;
346 for (i = 0; i < memmap.nr_map; i++) {
347 current_addr = memmap.map[i].phys_addr +
348 (memmap.map[i].num_pages << 12);
349 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
350 if (current_addr >= size) {
351 memmap.map[i].num_pages -=
352 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
353 memmap.nr_map = i + 1;
359 for (i = 0; i < e820.nr_map; i++) {
360 if (e820.map[i].type == E820_RAM) {
361 current_addr = e820.map[i].addr + e820.map[i].size;
362 if (current_addr >= size) {
363 e820.map[i].size -= current_addr-size;
371 static void __init add_memory_region(unsigned long long start,
372 unsigned long long size, int type)
380 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
384 e820.map[x].addr = start;
385 e820.map[x].size = size;
386 e820.map[x].type = type;
389 } /* add_memory_region */
393 static void __init print_memory_map(char *who)
397 for (i = 0; i < e820.nr_map; i++) {
398 printk(" %s: %016Lx - %016Lx ", who,
400 e820.map[i].addr + e820.map[i].size);
401 switch (e820.map[i].type) {
402 case E820_RAM: printk("(usable)\n");
405 printk("(reserved)\n");
408 printk("(ACPI data)\n");
411 printk("(ACPI NVS)\n");
413 default: printk("type %lu\n", e820.map[i].type);
420 * Sanitize the BIOS e820 map.
422 * Some e820 responses include overlapping entries. The following
423 * replaces the original e820 map with a new one, removing overlaps.
426 struct change_member {
427 struct e820entry *pbios; /* pointer to original bios entry */
428 unsigned long long addr; /* address for this change point */
430 static struct change_member change_point_list[2*E820MAX] __initdata;
431 static struct change_member *change_point[2*E820MAX] __initdata;
432 static struct e820entry *overlap_list[E820MAX] __initdata;
433 static struct e820entry new_bios[E820MAX] __initdata;
435 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
437 struct change_member *change_tmp;
438 unsigned long current_type, last_type;
439 unsigned long long last_addr;
440 int chgidx, still_changing;
443 int old_nr, new_nr, chg_nr;
447 Visually we're performing the following (1,2,3,4 = memory types)...
449 Sample memory map (w/overlaps):
450 ____22__________________
451 ______________________4_
452 ____1111________________
453 _44_____________________
454 11111111________________
455 ____________________33__
456 ___________44___________
457 __________33333_________
458 ______________22________
459 ___________________2222_
460 _________111111111______
461 _____________________11_
462 _________________4______
464 Sanitized equivalent (no overlap):
465 1_______________________
466 _44_____________________
467 ___1____________________
468 ____22__________________
469 ______11________________
470 _________1______________
471 __________3_____________
472 ___________44___________
473 _____________33_________
474 _______________2________
475 ________________1_______
476 _________________4______
477 ___________________2____
478 ____________________33__
479 ______________________4_
482 /* if there's only one memory region, don't bother */
488 /* bail out if we find any unreasonable addresses in bios map */
489 for (i=0; i<old_nr; i++)
490 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
493 /* create pointers for initial change-point information (for sorting) */
494 for (i=0; i < 2*old_nr; i++)
495 change_point[i] = &change_point_list[i];
497 /* record all known change-points (starting and ending addresses),
498 omitting those that are for empty memory regions */
500 for (i=0; i < old_nr; i++) {
501 if (biosmap[i].size != 0) {
502 change_point[chgidx]->addr = biosmap[i].addr;
503 change_point[chgidx++]->pbios = &biosmap[i];
504 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
505 change_point[chgidx++]->pbios = &biosmap[i];
508 chg_nr = chgidx; /* true number of change-points */
510 /* sort change-point list by memory addresses (low -> high) */
512 while (still_changing) {
514 for (i=1; i < chg_nr; i++) {
515 /* if <current_addr> > <last_addr>, swap */
516 /* or, if current=<start_addr> & last=<end_addr>, swap */
517 if ((change_point[i]->addr < change_point[i-1]->addr) ||
518 ((change_point[i]->addr == change_point[i-1]->addr) &&
519 (change_point[i]->addr == change_point[i]->pbios->addr) &&
520 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
523 change_tmp = change_point[i];
524 change_point[i] = change_point[i-1];
525 change_point[i-1] = change_tmp;
531 /* create a new bios memory map, removing overlaps */
532 overlap_entries=0; /* number of entries in the overlap table */
533 new_bios_entry=0; /* index for creating new bios map entries */
534 last_type = 0; /* start with undefined memory type */
535 last_addr = 0; /* start with 0 as last starting address */
536 /* loop through change-points, determining affect on the new bios map */
537 for (chgidx=0; chgidx < chg_nr; chgidx++)
539 /* keep track of all overlapping bios entries */
540 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
542 /* add map entry to overlap list (> 1 entry implies an overlap) */
543 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
547 /* remove entry from list (order independent, so swap with last) */
548 for (i=0; i<overlap_entries; i++)
550 if (overlap_list[i] == change_point[chgidx]->pbios)
551 overlap_list[i] = overlap_list[overlap_entries-1];
555 /* if there are overlapping entries, decide which "type" to use */
556 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
558 for (i=0; i<overlap_entries; i++)
559 if (overlap_list[i]->type > current_type)
560 current_type = overlap_list[i]->type;
561 /* continue building up new bios map based on this information */
562 if (current_type != last_type) {
563 if (last_type != 0) {
564 new_bios[new_bios_entry].size =
565 change_point[chgidx]->addr - last_addr;
566 /* move forward only if the new size was non-zero */
567 if (new_bios[new_bios_entry].size != 0)
568 if (++new_bios_entry >= E820MAX)
569 break; /* no more space left for new bios entries */
571 if (current_type != 0) {
572 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
573 new_bios[new_bios_entry].type = current_type;
574 last_addr=change_point[chgidx]->addr;
576 last_type = current_type;
579 new_nr = new_bios_entry; /* retain count for new bios entries */
581 /* copy new bios mapping into original location */
582 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
589 * Copy the BIOS e820 map into a safe place.
591 * Sanity-check it while we're at it..
593 * If we're lucky and live on a modern system, the setup code
594 * will have given us a memory map that we can use to properly
595 * set up memory. If we aren't, we'll fake a memory map.
597 * We check to see that the memory map contains at least 2 elements
598 * before we'll use it, because the detection code in setup.S may
599 * not be perfect and most every PC known to man has two memory
600 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
601 * thinkpad 560x, for example, does not cooperate with the memory
604 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
606 /* Only one memory region (or negative)? Ignore it */
611 unsigned long long start = biosmap->addr;
612 unsigned long long size = biosmap->size;
613 unsigned long long end = start + size;
614 unsigned long type = biosmap->type;
616 /* Overflow in 64 bits? Ignore the memory map. */
621 * Some BIOSes claim RAM in the 640k - 1M region.
622 * Not right. Fix it up.
624 if (type == E820_RAM) {
625 if (start < 0x100000ULL && end > 0xA0000ULL) {
626 if (start < 0xA0000ULL)
627 add_memory_region(start, 0xA0000ULL-start, type);
628 if (end <= 0x100000ULL)
634 add_memory_region(start, size, type);
635 } while (biosmap++,--nr_map);
639 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
641 #ifdef CONFIG_EDD_MODULE
645 * copy_edd() - Copy the BIOS EDD information
646 * from boot_params into a safe place.
649 static inline void copy_edd(void)
651 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
652 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
653 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
654 edd.edd_info_nr = EDD_NR;
657 static inline void copy_edd(void)
663 * Do NOT EVER look at the BIOS memory size location.
664 * It does not work on many machines.
666 #define LOWMEMSIZE() (0x9f000)
668 unsigned long crashdump_addr = 0xdeadbeef;
670 static void __init parse_cmdline_early (char ** cmdline_p)
672 char c = ' ', *to = command_line, *from = saved_command_line;
676 /* Save unparsed command line copy for /proc/cmdline */
677 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
683 * "mem=nopentium" disables the 4MB page tables.
684 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
685 * to <mem>, overriding the bios size.
686 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
687 * <start> to <start>+<mem>, overriding the bios size.
689 * HPA tells me bootloaders need to parse mem=, so no new
690 * option should be mem= [also see Documentation/i386/boot.txt]
692 if (!memcmp(from, "mem=", 4)) {
693 if (to != command_line)
695 if (!memcmp(from+4, "nopentium", 9)) {
697 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
700 /* If the user specifies memory size, we
701 * limit the BIOS-provided memory map to
702 * that size. exactmap can be used to specify
703 * the exact map. mem=number can be used to
704 * trim the existing memory map.
706 unsigned long long mem_size;
708 mem_size = memparse(from+4, &from);
709 limit_regions(mem_size);
714 else if (!memcmp(from, "memmap=", 7)) {
715 if (to != command_line)
717 if (!memcmp(from+7, "exactmap", 8)) {
718 /* If we are doing a crash dump, we
719 * still need to know the real mem
727 /* If the user specifies memory size, we
728 * limit the BIOS-provided memory map to
729 * that size. exactmap can be used to specify
730 * the exact map. mem=number can be used to
731 * trim the existing memory map.
733 unsigned long long start_at, mem_size;
735 mem_size = memparse(from+7, &from);
737 start_at = memparse(from+1, &from);
738 add_memory_region(start_at, mem_size, E820_RAM);
739 } else if (*from == '#') {
740 start_at = memparse(from+1, &from);
741 add_memory_region(start_at, mem_size, E820_ACPI);
742 } else if (*from == '$') {
743 start_at = memparse(from+1, &from);
744 add_memory_region(start_at, mem_size, E820_RESERVED);
746 limit_regions(mem_size);
752 else if (!memcmp(from, "noexec=", 7))
753 noexec_setup(from + 7);
756 #ifdef CONFIG_X86_SMP
758 * If the BIOS enumerates physical processors before logical,
759 * maxcpus=N at enumeration-time can be used to disable HT.
761 else if (!memcmp(from, "maxcpus=", 8)) {
762 extern unsigned int maxcpus;
764 maxcpus = simple_strtoul(from + 8, NULL, 0);
768 #ifdef CONFIG_ACPI_BOOT
769 /* "acpi=off" disables both ACPI table parsing and interpreter */
770 else if (!memcmp(from, "acpi=off", 8)) {
774 /* acpi=force to over-ride black-list */
775 else if (!memcmp(from, "acpi=force", 10)) {
781 /* acpi=strict disables out-of-spec workarounds */
782 else if (!memcmp(from, "acpi=strict", 11)) {
786 /* Limit ACPI just to boot-time to enable HT */
787 else if (!memcmp(from, "acpi=ht", 7)) {
793 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
794 else if (!memcmp(from, "pci=noacpi", 10)) {
797 /* "acpi=noirq" disables ACPI interrupt routing */
798 else if (!memcmp(from, "acpi=noirq", 10)) {
802 else if (!memcmp(from, "acpi_sci=edge", 13))
803 acpi_sci_flags.trigger = 1;
805 else if (!memcmp(from, "acpi_sci=level", 14))
806 acpi_sci_flags.trigger = 3;
808 else if (!memcmp(from, "acpi_sci=high", 13))
809 acpi_sci_flags.polarity = 1;
811 else if (!memcmp(from, "acpi_sci=low", 12))
812 acpi_sci_flags.polarity = 3;
814 #ifdef CONFIG_X86_IO_APIC
815 else if (!memcmp(from, "acpi_skip_timer_override", 24))
816 acpi_skip_timer_override = 1;
819 #ifdef CONFIG_X86_LOCAL_APIC
820 /* disable IO-APIC */
821 else if (!memcmp(from, "noapic", 6))
822 disable_ioapic_setup();
823 #endif /* CONFIG_X86_LOCAL_APIC */
824 #endif /* CONFIG_ACPI_BOOT */
827 * highmem=size forces highmem to be exactly 'size' bytes.
828 * This works even on boxes that have no highmem otherwise.
829 * This also works to reduce highmem size on bigger boxes.
831 else if (!memcmp(from, "highmem=", 8))
832 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
834 if (!memcmp(from, "dump", 4))
837 if (c == ' ' && !memcmp(from, "crashdump=", 10))
838 crashdump_addr = memparse(from+10, &from);
841 * vmalloc=size forces the vmalloc area to be exactly 'size'
842 * bytes. This can be used to increase (or decrease) the
843 * vmalloc area - the default is 128m.
845 else if (!memcmp(from, "vmalloc=", 8))
846 __VMALLOC_RESERVE = memparse(from+8, &from);
852 if (COMMAND_LINE_SIZE <= ++len)
857 *cmdline_p = command_line;
859 printk(KERN_INFO "user-defined physical RAM map:\n");
860 print_memory_map("user");
865 * Callback for efi_memory_walk.
868 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
870 unsigned long *max_pfn = arg, pfn;
873 pfn = PFN_UP(end -1);
882 * Find the highest page frame number we have available
884 void __init find_max_pfn(void)
890 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
894 for (i = 0; i < e820.nr_map; i++) {
895 unsigned long start, end;
897 if (e820.map[i].type != E820_RAM)
899 start = PFN_UP(e820.map[i].addr);
900 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
909 * Determine low and high memory ranges:
911 unsigned long __init find_max_low_pfn(void)
913 unsigned long max_low_pfn;
915 max_low_pfn = max_pfn;
916 if (max_low_pfn > MAXMEM_PFN) {
917 if (highmem_pages == -1)
918 highmem_pages = max_pfn - MAXMEM_PFN;
919 if (highmem_pages + MAXMEM_PFN < max_pfn)
920 max_pfn = MAXMEM_PFN + highmem_pages;
921 if (highmem_pages + MAXMEM_PFN > max_pfn) {
922 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
925 max_low_pfn = MAXMEM_PFN;
926 #ifndef CONFIG_HIGHMEM
927 /* Maximum memory usable is what is directly addressable */
928 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
930 if (max_pfn > MAX_NONPAE_PFN)
931 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
933 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
934 max_pfn = MAXMEM_PFN;
935 #else /* !CONFIG_HIGHMEM */
936 #ifndef CONFIG_X86_PAE
937 if (max_pfn > MAX_NONPAE_PFN) {
938 max_pfn = MAX_NONPAE_PFN;
939 printk(KERN_WARNING "Warning only 4GB will be used.\n");
940 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
942 #endif /* !CONFIG_X86_PAE */
943 #endif /* !CONFIG_HIGHMEM */
945 if (highmem_pages == -1)
947 #ifdef CONFIG_HIGHMEM
948 if (highmem_pages >= max_pfn) {
949 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
953 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
954 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
957 max_low_pfn -= highmem_pages;
961 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
968 * Free all available memory for boot time allocation. Used
969 * as a callback function by efi_memory_walk()
973 free_available_memory(unsigned long start, unsigned long end, void *arg)
975 /* check max_low_pfn */
976 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
978 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
979 end = (max_low_pfn + 1) << PAGE_SHIFT;
981 free_bootmem(start, end - start);
986 * Register fully available low RAM pages with the bootmem allocator.
988 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
993 efi_memmap_walk(free_available_memory, NULL);
996 for (i = 0; i < e820.nr_map; i++) {
997 unsigned long curr_pfn, last_pfn, size;
999 * Reserve usable low memory
1001 if (e820.map[i].type != E820_RAM)
1004 * We are rounding up the start address of usable memory:
1006 curr_pfn = PFN_UP(e820.map[i].addr);
1007 if (curr_pfn >= max_low_pfn)
1010 * ... and at the end of the usable range downwards:
1012 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1014 if (last_pfn > max_low_pfn)
1015 last_pfn = max_low_pfn;
1018 * .. finally, did all the rounding and playing
1019 * around just make the area go away?
1021 if (last_pfn <= curr_pfn)
1024 size = last_pfn - curr_pfn;
1025 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1030 * workaround for Dell systems that neglect to reserve EBDA
1032 static void __init reserve_ebda_region(void)
1035 addr = get_bios_ebda();
1037 reserve_bootmem(addr, PAGE_SIZE);
1040 #ifndef CONFIG_DISCONTIGMEM
1041 void __init setup_bootmem_allocator(void);
1042 static unsigned long __init setup_memory(void)
1045 * partially used pages are not usable - thus
1046 * we are rounding upwards:
1048 min_low_pfn = PFN_UP(init_pg_tables_end);
1052 max_low_pfn = find_max_low_pfn();
1054 #ifdef CONFIG_HIGHMEM
1055 highstart_pfn = highend_pfn = max_pfn;
1056 if (max_pfn > max_low_pfn) {
1057 highstart_pfn = max_low_pfn;
1059 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1060 pages_to_mb(highend_pfn - highstart_pfn));
1062 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1063 pages_to_mb(max_low_pfn));
1065 setup_bootmem_allocator();
1070 void __init zone_sizes_init(void)
1072 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1073 unsigned int max_dma, low;
1075 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1079 zones_size[ZONE_DMA] = low;
1081 zones_size[ZONE_DMA] = max_dma;
1082 zones_size[ZONE_NORMAL] = low - max_dma;
1083 #ifdef CONFIG_HIGHMEM
1084 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1087 free_area_init(zones_size);
1090 extern unsigned long setup_memory(void);
1091 extern void zone_sizes_init(void);
1092 #endif /* !CONFIG_DISCONTIGMEM */
1094 void __init setup_bootmem_allocator(void)
1096 unsigned long bootmap_size;
1098 * Initialize the boot-time allocator (with low memory only):
1100 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1102 register_bootmem_low_pages(max_low_pfn);
1105 * Reserve the bootmem bitmap itself as well. We do this in two
1106 * steps (first step was init_bootmem()) because this catches
1107 * the (very unlikely) case of us accidentally initializing the
1108 * bootmem allocator with an invalid RAM area.
1110 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
1111 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1114 * reserve physical page 0 - it's a special BIOS page on many boxes,
1115 * enabling clean reboots, SMP operation, laptop functions.
1117 reserve_bootmem(0, PAGE_SIZE);
1119 /* reserve EBDA region, it's a 4K region */
1120 reserve_ebda_region();
1122 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1123 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1124 unless you have no PS/2 mouse plugged in. */
1125 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1126 boot_cpu_data.x86 == 6)
1127 reserve_bootmem(0xa0000 - 4096, 4096);
1131 * But first pinch a few for the stack/trampoline stuff
1132 * FIXME: Don't need the extra page at 4K, but need to fix
1133 * trampoline before removing it. (see the GDT stuff)
1135 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1137 #ifdef CONFIG_ACPI_SLEEP
1139 * Reserve low memory region for sleep support.
1141 acpi_reserve_bootmem();
1143 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1145 * Find and reserve possible boot-time SMP configuration:
1150 #ifdef CONFIG_BLK_DEV_INITRD
1151 if (LOADER_TYPE && INITRD_START) {
1152 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1153 reserve_bootmem(INITRD_START, INITRD_SIZE);
1155 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1156 initrd_end = initrd_start+INITRD_SIZE;
1159 printk(KERN_ERR "initrd extends beyond end of memory "
1160 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1161 INITRD_START + INITRD_SIZE,
1162 max_low_pfn << PAGE_SHIFT);
1170 * The node 0 pgdat is initialized before all of these because
1171 * it's needed for bootmem. node>0 pgdats have their virtual
1172 * space allocated before the pagetables are in place to access
1173 * them, so they can't be cleared then.
1175 * This should all compile down to nothing when NUMA is off.
1177 void __init remapped_pgdat_init(void)
1181 for_each_online_node(nid) {
1183 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1188 * Request address space for all standard RAM and ROM resources
1189 * and also for regions reported as reserved by the e820.
1192 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1197 for (i = 0; i < e820.nr_map; i++) {
1198 struct resource *res;
1199 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1201 res = alloc_bootmem_low(sizeof(struct resource));
1202 switch (e820.map[i].type) {
1203 case E820_RAM: res->name = "System RAM"; break;
1204 case E820_ACPI: res->name = "ACPI Tables"; break;
1205 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1206 default: res->name = "reserved";
1208 res->start = e820.map[i].addr;
1209 res->end = res->start + e820.map[i].size - 1;
1210 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1211 request_resource(&iomem_resource, res);
1212 if (e820.map[i].type == E820_RAM) {
1214 * We don't know which RAM region contains kernel data,
1215 * so we try it repeatedly and let the resource manager
1218 request_resource(res, code_resource);
1219 request_resource(res, data_resource);
1225 * Request address space for all standard resources
1227 static void __init register_memory(void)
1229 unsigned long gapstart, gapsize;
1230 unsigned long long last;
1234 efi_initialize_iomem_resources(&code_resource, &data_resource);
1236 legacy_init_iomem_resources(&code_resource, &data_resource);
1238 /* EFI systems may still have VGA */
1239 request_resource(&iomem_resource, &video_ram_resource);
1241 /* request I/O space for devices used on all i[345]86 PCs */
1242 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1243 request_resource(&ioport_resource, &standard_io_resources[i]);
1246 * Search for the bigest gap in the low 32 bits of the e820
1249 last = 0x100000000ull;
1250 gapstart = 0x10000000;
1254 unsigned long long start = e820.map[i].addr;
1255 unsigned long long end = start + e820.map[i].size;
1258 * Since "last" is at most 4GB, we know we'll
1259 * fit in 32 bits if this condition is true
1262 unsigned long gap = last - end;
1264 if (gap > gapsize) {
1274 * Start allocating dynamic PCI memory a bit into the gap,
1275 * aligned up to the nearest megabyte.
1277 * Question: should we try to pad it up a bit (do something
1278 * like " + (gapsize >> 3)" in there too?). We now have the
1281 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1283 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1284 pci_mem_start, gapstart, gapsize);
1287 /* Use inline assembly to define this because the nops are defined
1288 as inline assembly strings in the include files and we cannot
1289 get them easily into strings. */
1290 asm("\t.data\nintelnops: "
1291 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1292 GENERIC_NOP7 GENERIC_NOP8);
1293 asm("\t.data\nk8nops: "
1294 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1296 asm("\t.data\nk7nops: "
1297 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1300 extern unsigned char intelnops[], k8nops[], k7nops[];
1301 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1306 intelnops + 1 + 2 + 3,
1307 intelnops + 1 + 2 + 3 + 4,
1308 intelnops + 1 + 2 + 3 + 4 + 5,
1309 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1310 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1312 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1318 k8nops + 1 + 2 + 3 + 4,
1319 k8nops + 1 + 2 + 3 + 4 + 5,
1320 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1321 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1323 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1329 k7nops + 1 + 2 + 3 + 4,
1330 k7nops + 1 + 2 + 3 + 4 + 5,
1331 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1332 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1336 unsigned char **noptable;
1338 { X86_FEATURE_K8, k8_nops },
1339 { X86_FEATURE_K7, k7_nops },
1343 /* Replace instructions with better alternatives for this CPU type.
1345 This runs before SMP is initialized to avoid SMP problems with
1346 self modifying code. This implies that assymetric systems where
1347 APs have less capabilities than the boot processor are not handled.
1348 In this case boot with "noreplacement". */
1349 void apply_alternatives(void *start, void *end)
1351 struct alt_instr *a;
1353 unsigned char **noptable = intel_nops;
1354 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1355 if (boot_cpu_has(noptypes[i].cpuid)) {
1356 noptable = noptypes[i].noptable;
1360 for (a = start; (void *)a < end; a++) {
1361 if (!boot_cpu_has(a->cpuid))
1363 BUG_ON(a->replacementlen > a->instrlen);
1364 memcpy(a->instr, a->replacement, a->replacementlen);
1365 diff = a->instrlen - a->replacementlen;
1366 /* Pad the rest with nops */
1367 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1369 if (k > ASM_NOP_MAX)
1371 memcpy(a->instr + i, noptable[k], k);
1376 static int no_replacement __initdata = 0;
1378 void __init alternative_instructions(void)
1380 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1383 apply_alternatives(__alt_instructions, __alt_instructions_end);
1386 static int __init noreplacement_setup(char *s)
1392 __setup("noreplacement", noreplacement_setup);
1394 static char * __init machine_specific_memory_setup(void);
1396 #ifdef CONFIG_CRASH_DUMP_SOFTBOOT
1397 extern void crashdump_reserve(void);
1401 static void set_mca_bus(int x)
1406 static void set_mca_bus(int x) { }
1410 * Determine if we were loaded by an EFI loader. If so, then we have also been
1411 * passed the efi memmap, systab, etc., so we should use these data structures
1412 * for initialization. Note, the efi init code path is determined by the
1413 * global efi_enabled. This allows the same kernel image to be used on existing
1414 * systems (with a traditional BIOS) as well as on EFI systems.
1416 void __init setup_arch(char **cmdline_p)
1418 unsigned long max_low_pfn;
1420 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1421 pre_setup_arch_hook();
1425 * FIXME: This isn't an official loader_type right
1426 * now but does currently work with elilo.
1427 * If we were configured as an EFI kernel, check to make
1428 * sure that we were loaded correctly from elilo and that
1429 * the system table is valid. If not, then initialize normally.
1432 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1436 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1437 drive_info = DRIVE_INFO;
1438 screen_info = SCREEN_INFO;
1439 edid_info = EDID_INFO;
1440 apm_info.bios = APM_BIOS_INFO;
1441 ist_info = IST_INFO;
1442 saved_videomode = VIDEO_MODE;
1443 if( SYS_DESC_TABLE.length != 0 ) {
1444 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1445 machine_id = SYS_DESC_TABLE.table[0];
1446 machine_submodel_id = SYS_DESC_TABLE.table[1];
1447 BIOS_revision = SYS_DESC_TABLE.table[2];
1449 bootloader_type = LOADER_TYPE;
1451 #ifdef CONFIG_BLK_DEV_RAM
1452 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1453 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1454 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1460 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1461 print_memory_map(machine_specific_memory_setup());
1466 if (!MOUNT_ROOT_RDONLY)
1467 root_mountflags &= ~MS_RDONLY;
1468 init_mm.start_code = (unsigned long) _text;
1469 init_mm.end_code = (unsigned long) _etext;
1470 init_mm.end_data = (unsigned long) _edata;
1471 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1473 code_resource.start = virt_to_phys(_text);
1474 code_resource.end = virt_to_phys(_etext)-1;
1475 data_resource.start = virt_to_phys(_etext);
1476 data_resource.end = virt_to_phys(_edata)-1;
1478 parse_cmdline_early(cmdline_p);
1480 max_low_pfn = setup_memory();
1483 * NOTE: before this point _nobody_ is allowed to allocate
1484 * any memory using the bootmem allocator. Although the
1485 * alloctor is now initialised only the first 8Mb of the kernel
1486 * virtual address space has been mapped. All allocations before
1487 * paging_init() has completed must use the alloc_bootmem_low_pages()
1488 * variant (which allocates DMA'able memory) and care must be taken
1489 * not to exceed the 8Mb limit.
1493 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1496 remapped_pgdat_init();
1500 * NOTE: at this point the bootmem allocator is fully available.
1503 #ifdef CONFIG_EARLY_PRINTK
1505 char *s = strstr(*cmdline_p, "earlyprintk=");
1507 extern void setup_early_printk(char *);
1509 setup_early_printk(s);
1510 printk("early console enabled\n");
1516 #ifdef CONFIG_CRASH_DUMP_SOFTBOOT
1517 crashdump_reserve(); /* Preserve crash dump state from prev boot */
1522 #ifdef CONFIG_X86_GENERICARCH
1523 generic_apic_probe(*cmdline_p);
1528 #ifdef CONFIG_ACPI_BOOT
1530 * Parse the ACPI tables for possible boot-time SMP configuration.
1532 acpi_boot_table_init();
1536 #ifdef CONFIG_X86_LOCAL_APIC
1537 if (smp_found_config)
1544 #if defined(CONFIG_VGA_CONSOLE)
1545 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1546 conswitchp = &vga_con;
1547 #elif defined(CONFIG_DUMMY_CONSOLE)
1548 conswitchp = &dummy_con;
1553 #include "setup_arch_post.h"
1557 * c-file-style:"k&r"