2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/sched.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/root_dev.h>
37 #include <linux/highmem.h>
38 #include <linux/module.h>
39 #include <linux/efi.h>
40 #include <linux/init.h>
41 #include <linux/edd.h>
42 #include <video/edid.h>
44 #include <asm/mpspec.h>
45 #include <asm/setup.h>
46 #include <asm/arch_hooks.h>
47 #include <asm/sections.h>
48 #include <asm/io_apic.h>
51 #include <asm/crash_dump.h>
52 #include "setup_arch_pre.h"
53 #include <bios_ebda.h>
55 /* This value is set up by the early boot code to point to the value
56 immediately after the boot time page tables. It contains a *physical*
57 address, and must not be in the .bss segment! */
58 unsigned long init_pg_tables_end __initdata = ~0UL;
60 int disable_pse __initdata = 0;
61 unsigned int dump_enabled;
69 EXPORT_SYMBOL(efi_enabled);
72 /* cpu data as detected by the assembly code in head.S */
73 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
74 /* common cpu data for all cpus */
75 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
77 unsigned long mmu_cr4_features;
78 EXPORT_SYMBOL_GPL(mmu_cr4_features);
80 #ifdef CONFIG_ACPI_INTERPRETER
81 int acpi_disabled = 0;
83 int acpi_disabled = 1;
85 EXPORT_SYMBOL(acpi_disabled);
87 #ifdef CONFIG_ACPI_BOOT
88 int __initdata acpi_force = 0;
89 extern acpi_interrupt_flags acpi_sci_flags;
93 /* for MCA, but anyone else can use it if they want */
94 unsigned int machine_id;
95 unsigned int machine_submodel_id;
96 unsigned int BIOS_revision;
97 unsigned int mca_pentium_flag;
99 /* For PCI or other memory-mapped resources */
100 unsigned long pci_mem_start = 0x10000000;
102 /* user-defined highmem size */
103 static unsigned int highmem_pages = -1;
108 struct drive_info_struct { char dummy[32]; } drive_info;
109 struct screen_info screen_info;
110 struct apm_info apm_info;
111 struct sys_desc_table_struct {
112 unsigned short length;
113 unsigned char table[0];
115 struct edid_info edid_info;
116 struct ist_info ist_info;
119 unsigned char aux_device_present;
121 extern void early_cpu_init(void);
122 extern void dmi_scan_machine(void);
123 extern void generic_apic_probe(char *);
124 extern int root_mountflags;
126 unsigned long saved_videomode;
128 #define RAMDISK_IMAGE_START_MASK 0x07FF
129 #define RAMDISK_PROMPT_FLAG 0x8000
130 #define RAMDISK_LOAD_FLAG 0x4000
132 static char command_line[COMMAND_LINE_SIZE];
134 unsigned char __initdata boot_params[PARAM_SIZE];
136 static struct resource data_resource = {
137 .name = "Kernel data",
140 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
143 static struct resource code_resource = {
144 .name = "Kernel code",
147 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
150 static struct resource system_rom_resource = {
151 .name = "System ROM",
154 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
157 static struct resource extension_rom_resource = {
158 .name = "Extension ROM",
161 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
164 static struct resource adapter_rom_resources[] = { {
165 .name = "Adapter ROM",
168 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
170 .name = "Adapter ROM",
173 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
175 .name = "Adapter ROM",
178 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
180 .name = "Adapter ROM",
183 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
185 .name = "Adapter ROM",
188 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
190 .name = "Adapter ROM",
193 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
196 #define ADAPTER_ROM_RESOURCES \
197 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
199 static struct resource video_rom_resource = {
203 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
206 static struct resource video_ram_resource = {
207 .name = "Video RAM area",
210 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
213 static struct resource standard_io_resources[] = { {
217 .flags = IORESOURCE_BUSY | IORESOURCE_IO
222 .flags = IORESOURCE_BUSY | IORESOURCE_IO
227 .flags = IORESOURCE_BUSY | IORESOURCE_IO
232 .flags = IORESOURCE_BUSY | IORESOURCE_IO
237 .flags = IORESOURCE_BUSY | IORESOURCE_IO
239 .name = "dma page reg",
242 .flags = IORESOURCE_BUSY | IORESOURCE_IO
247 .flags = IORESOURCE_BUSY | IORESOURCE_IO
252 .flags = IORESOURCE_BUSY | IORESOURCE_IO
257 .flags = IORESOURCE_BUSY | IORESOURCE_IO
260 #define STANDARD_IO_RESOURCES \
261 (sizeof standard_io_resources / sizeof standard_io_resources[0])
263 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
265 static int __init romchecksum(unsigned char *rom, unsigned long length)
267 unsigned char *p, sum = 0;
269 for (p = rom; p < rom + length; p++)
274 static void __init probe_roms(void)
276 unsigned long start, length, upper;
281 upper = adapter_rom_resources[0].start;
282 for (start = video_rom_resource.start; start < upper; start += 2048) {
283 rom = isa_bus_to_virt(start);
284 if (!romsignature(rom))
287 video_rom_resource.start = start;
289 /* 0 < length <= 0x7f * 512, historically */
290 length = rom[2] * 512;
292 /* if checksum okay, trust length byte */
293 if (length && romchecksum(rom, length))
294 video_rom_resource.end = start + length - 1;
296 request_resource(&iomem_resource, &video_rom_resource);
300 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
305 request_resource(&iomem_resource, &system_rom_resource);
306 upper = system_rom_resource.start;
308 /* check for extension rom (ignore length byte!) */
309 rom = isa_bus_to_virt(extension_rom_resource.start);
310 if (romsignature(rom)) {
311 length = extension_rom_resource.end - extension_rom_resource.start + 1;
312 if (romchecksum(rom, length)) {
313 request_resource(&iomem_resource, &extension_rom_resource);
314 upper = extension_rom_resource.start;
318 /* check for adapter roms on 2k boundaries */
319 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
320 rom = isa_bus_to_virt(start);
321 if (!romsignature(rom))
324 /* 0 < length <= 0x7f * 512, historically */
325 length = rom[2] * 512;
327 /* but accept any length that fits if checksum okay */
328 if (!length || start + length > upper || !romchecksum(rom, length))
331 adapter_rom_resources[i].start = start;
332 adapter_rom_resources[i].end = start + length - 1;
333 request_resource(&iomem_resource, &adapter_rom_resources[i]);
335 start = adapter_rom_resources[i++].end & ~2047UL;
339 static void __init limit_regions(unsigned long long size)
341 unsigned long long current_addr = 0;
345 for (i = 0; i < memmap.nr_map; i++) {
346 current_addr = memmap.map[i].phys_addr +
347 (memmap.map[i].num_pages << 12);
348 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
349 if (current_addr >= size) {
350 memmap.map[i].num_pages -=
351 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
352 memmap.nr_map = i + 1;
358 for (i = 0; i < e820.nr_map; i++) {
359 if (e820.map[i].type == E820_RAM) {
360 current_addr = e820.map[i].addr + e820.map[i].size;
361 if (current_addr >= size) {
362 e820.map[i].size -= current_addr-size;
370 static void __init add_memory_region(unsigned long long start,
371 unsigned long long size, int type)
379 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
383 e820.map[x].addr = start;
384 e820.map[x].size = size;
385 e820.map[x].type = type;
388 } /* add_memory_region */
392 static void __init print_memory_map(char *who)
396 for (i = 0; i < e820.nr_map; i++) {
397 printk(" %s: %016Lx - %016Lx ", who,
399 e820.map[i].addr + e820.map[i].size);
400 switch (e820.map[i].type) {
401 case E820_RAM: printk("(usable)\n");
404 printk("(reserved)\n");
407 printk("(ACPI data)\n");
410 printk("(ACPI NVS)\n");
412 default: printk("type %lu\n", e820.map[i].type);
419 * Sanitize the BIOS e820 map.
421 * Some e820 responses include overlapping entries. The following
422 * replaces the original e820 map with a new one, removing overlaps.
425 struct change_member {
426 struct e820entry *pbios; /* pointer to original bios entry */
427 unsigned long long addr; /* address for this change point */
429 struct change_member change_point_list[2*E820MAX] __initdata;
430 struct change_member *change_point[2*E820MAX] __initdata;
431 struct e820entry *overlap_list[E820MAX] __initdata;
432 struct e820entry new_bios[E820MAX] __initdata;
434 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
436 struct change_member *change_tmp;
437 unsigned long current_type, last_type;
438 unsigned long long last_addr;
439 int chgidx, still_changing;
442 int old_nr, new_nr, chg_nr;
446 Visually we're performing the following (1,2,3,4 = memory types)...
448 Sample memory map (w/overlaps):
449 ____22__________________
450 ______________________4_
451 ____1111________________
452 _44_____________________
453 11111111________________
454 ____________________33__
455 ___________44___________
456 __________33333_________
457 ______________22________
458 ___________________2222_
459 _________111111111______
460 _____________________11_
461 _________________4______
463 Sanitized equivalent (no overlap):
464 1_______________________
465 _44_____________________
466 ___1____________________
467 ____22__________________
468 ______11________________
469 _________1______________
470 __________3_____________
471 ___________44___________
472 _____________33_________
473 _______________2________
474 ________________1_______
475 _________________4______
476 ___________________2____
477 ____________________33__
478 ______________________4_
481 /* if there's only one memory region, don't bother */
487 /* bail out if we find any unreasonable addresses in bios map */
488 for (i=0; i<old_nr; i++)
489 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
492 /* create pointers for initial change-point information (for sorting) */
493 for (i=0; i < 2*old_nr; i++)
494 change_point[i] = &change_point_list[i];
496 /* record all known change-points (starting and ending addresses),
497 omitting those that are for empty memory regions */
499 for (i=0; i < old_nr; i++) {
500 if (biosmap[i].size != 0) {
501 change_point[chgidx]->addr = biosmap[i].addr;
502 change_point[chgidx++]->pbios = &biosmap[i];
503 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
504 change_point[chgidx++]->pbios = &biosmap[i];
507 chg_nr = chgidx; /* true number of change-points */
509 /* sort change-point list by memory addresses (low -> high) */
511 while (still_changing) {
513 for (i=1; i < chg_nr; i++) {
514 /* if <current_addr> > <last_addr>, swap */
515 /* or, if current=<start_addr> & last=<end_addr>, swap */
516 if ((change_point[i]->addr < change_point[i-1]->addr) ||
517 ((change_point[i]->addr == change_point[i-1]->addr) &&
518 (change_point[i]->addr == change_point[i]->pbios->addr) &&
519 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
522 change_tmp = change_point[i];
523 change_point[i] = change_point[i-1];
524 change_point[i-1] = change_tmp;
530 /* create a new bios memory map, removing overlaps */
531 overlap_entries=0; /* number of entries in the overlap table */
532 new_bios_entry=0; /* index for creating new bios map entries */
533 last_type = 0; /* start with undefined memory type */
534 last_addr = 0; /* start with 0 as last starting address */
535 /* loop through change-points, determining affect on the new bios map */
536 for (chgidx=0; chgidx < chg_nr; chgidx++)
538 /* keep track of all overlapping bios entries */
539 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
541 /* add map entry to overlap list (> 1 entry implies an overlap) */
542 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
546 /* remove entry from list (order independent, so swap with last) */
547 for (i=0; i<overlap_entries; i++)
549 if (overlap_list[i] == change_point[chgidx]->pbios)
550 overlap_list[i] = overlap_list[overlap_entries-1];
554 /* if there are overlapping entries, decide which "type" to use */
555 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
557 for (i=0; i<overlap_entries; i++)
558 if (overlap_list[i]->type > current_type)
559 current_type = overlap_list[i]->type;
560 /* continue building up new bios map based on this information */
561 if (current_type != last_type) {
562 if (last_type != 0) {
563 new_bios[new_bios_entry].size =
564 change_point[chgidx]->addr - last_addr;
565 /* move forward only if the new size was non-zero */
566 if (new_bios[new_bios_entry].size != 0)
567 if (++new_bios_entry >= E820MAX)
568 break; /* no more space left for new bios entries */
570 if (current_type != 0) {
571 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
572 new_bios[new_bios_entry].type = current_type;
573 last_addr=change_point[chgidx]->addr;
575 last_type = current_type;
578 new_nr = new_bios_entry; /* retain count for new bios entries */
580 /* copy new bios mapping into original location */
581 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
588 * Copy the BIOS e820 map into a safe place.
590 * Sanity-check it while we're at it..
592 * If we're lucky and live on a modern system, the setup code
593 * will have given us a memory map that we can use to properly
594 * set up memory. If we aren't, we'll fake a memory map.
596 * We check to see that the memory map contains at least 2 elements
597 * before we'll use it, because the detection code in setup.S may
598 * not be perfect and most every PC known to man has two memory
599 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
600 * thinkpad 560x, for example, does not cooperate with the memory
603 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
605 /* Only one memory region (or negative)? Ignore it */
610 unsigned long long start = biosmap->addr;
611 unsigned long long size = biosmap->size;
612 unsigned long long end = start + size;
613 unsigned long type = biosmap->type;
615 /* Overflow in 64 bits? Ignore the memory map. */
620 * Some BIOSes claim RAM in the 640k - 1M region.
621 * Not right. Fix it up.
623 if (type == E820_RAM) {
624 if (start < 0x100000ULL && end > 0xA0000ULL) {
625 if (start < 0xA0000ULL)
626 add_memory_region(start, 0xA0000ULL-start, type);
627 if (end <= 0x100000ULL)
633 add_memory_region(start, size, type);
634 } while (biosmap++,--nr_map);
638 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
640 #ifdef CONFIG_EDD_MODULE
644 * copy_edd() - Copy the BIOS EDD information
645 * from boot_params into a safe place.
648 static inline void copy_edd(void)
650 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
651 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
652 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
653 edd.edd_info_nr = EDD_NR;
656 static inline void copy_edd(void)
662 * Do NOT EVER look at the BIOS memory size location.
663 * It does not work on many machines.
665 #define LOWMEMSIZE() (0x9f000)
667 unsigned long crashdump_addr = 0xdeadbeef;
669 static void __init parse_cmdline_early (char ** cmdline_p)
671 char c = ' ', *to = command_line, *from = saved_command_line;
675 /* Save unparsed command line copy for /proc/cmdline */
676 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
680 * "mem=nopentium" disables the 4MB page tables.
681 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
682 * to <mem>, overriding the bios size.
683 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
684 * <start> to <start>+<mem>, overriding the bios size.
686 * HPA tells me bootloaders need to parse mem=, so no new
687 * option should be mem= [also see Documentation/i386/boot.txt]
689 if (c == ' ' && !memcmp(from, "mem=", 4)) {
690 if (to != command_line)
692 if (!memcmp(from+4, "nopentium", 9)) {
694 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
697 /* If the user specifies memory size, we
698 * limit the BIOS-provided memory map to
699 * that size. exactmap can be used to specify
700 * the exact map. mem=number can be used to
701 * trim the existing memory map.
703 unsigned long long mem_size;
705 mem_size = memparse(from+4, &from);
706 limit_regions(mem_size);
711 if (c == ' ' && !memcmp(from, "memmap=", 7)) {
712 if (to != command_line)
714 if (!memcmp(from+7, "exactmap", 8)) {
715 /* If we are doing a crash dump, we
716 * still need to know the real mem
724 /* If the user specifies memory size, we
725 * limit the BIOS-provided memory map to
726 * that size. exactmap can be used to specify
727 * the exact map. mem=number can be used to
728 * trim the existing memory map.
730 unsigned long long start_at, mem_size;
732 mem_size = memparse(from+7, &from);
734 start_at = memparse(from+1, &from);
735 add_memory_region(start_at, mem_size, E820_RAM);
736 } else if (*from == '#') {
737 start_at = memparse(from+1, &from);
738 add_memory_region(start_at, mem_size, E820_ACPI);
739 } else if (*from == '$') {
740 start_at = memparse(from+1, &from);
741 add_memory_region(start_at, mem_size, E820_RESERVED);
743 limit_regions(mem_size);
749 #ifdef CONFIG_X86_SMP
751 * If the BIOS enumerates physical processors before logical,
752 * maxcpus=N at enumeration-time can be used to disable HT.
754 else if (!memcmp(from, "maxcpus=", 8)) {
755 extern unsigned int maxcpus;
757 maxcpus = simple_strtoul(from + 8, NULL, 0);
761 #ifdef CONFIG_ACPI_BOOT
762 /* "acpi=off" disables both ACPI table parsing and interpreter */
763 else if (!memcmp(from, "acpi=off", 8)) {
767 /* acpi=force to over-ride black-list */
768 else if (!memcmp(from, "acpi=force", 10)) {
774 /* acpi=strict disables out-of-spec workarounds */
775 else if (!memcmp(from, "acpi=strict", 11)) {
779 /* Limit ACPI just to boot-time to enable HT */
780 else if (!memcmp(from, "acpi=ht", 7)) {
786 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
787 else if (!memcmp(from, "pci=noacpi", 10)) {
790 /* "acpi=noirq" disables ACPI interrupt routing */
791 else if (!memcmp(from, "acpi=noirq", 10)) {
795 else if (!memcmp(from, "acpi_sci=edge", 13))
796 acpi_sci_flags.trigger = 1;
798 else if (!memcmp(from, "acpi_sci=level", 14))
799 acpi_sci_flags.trigger = 3;
801 else if (!memcmp(from, "acpi_sci=high", 13))
802 acpi_sci_flags.polarity = 1;
804 else if (!memcmp(from, "acpi_sci=low", 12))
805 acpi_sci_flags.polarity = 3;
807 #ifdef CONFIG_X86_IO_APIC
808 else if (!memcmp(from, "acpi_skip_timer_override", 24))
809 acpi_skip_timer_override = 1;
812 #ifdef CONFIG_X86_LOCAL_APIC
813 /* disable IO-APIC */
814 else if (!memcmp(from, "noapic", 6))
815 disable_ioapic_setup();
816 #endif /* CONFIG_X86_LOCAL_APIC */
817 #endif /* CONFIG_ACPI_BOOT */
820 * highmem=size forces highmem to be exactly 'size' bytes.
821 * This works even on boxes that have no highmem otherwise.
822 * This also works to reduce highmem size on bigger boxes.
824 if (c == ' ' && !memcmp(from, "highmem=", 8))
825 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
827 if (!memcmp(from, "dump", 4))
830 if (c == ' ' && !memcmp(from, "crashdump=", 10))
831 crashdump_addr = memparse(from+10, &from);
834 * vmalloc=size forces the vmalloc area to be exactly 'size'
835 * bytes. This can be used to increase (or decrease) the
836 * vmalloc area - the default is 128m.
838 if (c == ' ' && !memcmp(from, "vmalloc=", 8))
839 __VMALLOC_RESERVE = memparse(from+8, &from);
844 if (COMMAND_LINE_SIZE <= ++len)
849 *cmdline_p = command_line;
851 printk(KERN_INFO "user-defined physical RAM map:\n");
852 print_memory_map("user");
857 * Callback for efi_memory_walk.
860 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
862 unsigned long *max_pfn = arg, pfn;
865 pfn = PFN_UP(end -1);
874 * Find the highest page frame number we have available
876 void __init find_max_pfn(void)
882 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
886 for (i = 0; i < e820.nr_map; i++) {
887 unsigned long start, end;
889 if (e820.map[i].type != E820_RAM)
891 start = PFN_UP(e820.map[i].addr);
892 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
901 * Determine low and high memory ranges:
903 unsigned long __init find_max_low_pfn(void)
905 unsigned long max_low_pfn;
907 max_low_pfn = max_pfn;
908 if (max_low_pfn > MAXMEM_PFN) {
909 if (highmem_pages == -1)
910 highmem_pages = max_pfn - MAXMEM_PFN;
911 if (highmem_pages + MAXMEM_PFN < max_pfn)
912 max_pfn = MAXMEM_PFN + highmem_pages;
913 if (highmem_pages + MAXMEM_PFN > max_pfn) {
914 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
917 max_low_pfn = MAXMEM_PFN;
918 #ifndef CONFIG_HIGHMEM
919 /* Maximum memory usable is what is directly addressable */
920 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
922 if (max_pfn > MAX_NONPAE_PFN)
923 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
925 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
926 max_pfn = MAXMEM_PFN;
927 #else /* !CONFIG_HIGHMEM */
928 #ifndef CONFIG_X86_PAE
929 if (max_pfn > MAX_NONPAE_PFN) {
930 max_pfn = MAX_NONPAE_PFN;
931 printk(KERN_WARNING "Warning only 4GB will be used.\n");
932 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
934 #endif /* !CONFIG_X86_PAE */
935 #endif /* !CONFIG_HIGHMEM */
937 if (highmem_pages == -1)
939 #ifdef CONFIG_HIGHMEM
940 if (highmem_pages >= max_pfn) {
941 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
945 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
946 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
949 max_low_pfn -= highmem_pages;
953 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
959 #ifndef CONFIG_DISCONTIGMEM
962 * Free all available memory for boot time allocation. Used
963 * as a callback function by efi_memory_walk()
967 free_available_memory(unsigned long start, unsigned long end, void *arg)
969 /* check max_low_pfn */
970 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
972 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
973 end = (max_low_pfn + 1) << PAGE_SHIFT;
975 free_bootmem(start, end - start);
980 * Register fully available low RAM pages with the bootmem allocator.
982 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
987 efi_memmap_walk(free_available_memory, NULL);
990 for (i = 0; i < e820.nr_map; i++) {
991 unsigned long curr_pfn, last_pfn, size;
993 * Reserve usable low memory
995 if (e820.map[i].type != E820_RAM)
998 * We are rounding up the start address of usable memory:
1000 curr_pfn = PFN_UP(e820.map[i].addr);
1001 if (curr_pfn >= max_low_pfn)
1004 * ... and at the end of the usable range downwards:
1006 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1008 if (last_pfn > max_low_pfn)
1009 last_pfn = max_low_pfn;
1012 * .. finally, did all the rounding and playing
1013 * around just make the area go away?
1015 if (last_pfn <= curr_pfn)
1018 size = last_pfn - curr_pfn;
1019 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1024 * workaround for Dell systems that neglect to reserve EBDA
1026 static void __init reserve_ebda_region(void)
1029 addr = get_bios_ebda();
1031 reserve_bootmem(addr, PAGE_SIZE);
1034 static unsigned long __init setup_memory(void)
1036 unsigned long bootmap_size, start_pfn, max_low_pfn;
1039 * partially used pages are not usable - thus
1040 * we are rounding upwards:
1042 start_pfn = PFN_UP(init_pg_tables_end);
1046 max_low_pfn = find_max_low_pfn();
1048 #ifdef CONFIG_HIGHMEM
1049 highstart_pfn = highend_pfn = max_pfn;
1050 if (max_pfn > max_low_pfn) {
1051 highstart_pfn = max_low_pfn;
1053 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1054 pages_to_mb(highend_pfn - highstart_pfn));
1056 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1057 pages_to_mb(max_low_pfn));
1059 * Initialize the boot-time allocator (with low memory only):
1061 bootmap_size = init_bootmem(start_pfn, max_low_pfn);
1063 register_bootmem_low_pages(max_low_pfn);
1066 * Reserve the bootmem bitmap itself as well. We do this in two
1067 * steps (first step was init_bootmem()) because this catches
1068 * the (very unlikely) case of us accidentally initializing the
1069 * bootmem allocator with an invalid RAM area.
1071 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
1072 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1075 * reserve physical page 0 - it's a special BIOS page on many boxes,
1076 * enabling clean reboots, SMP operation, laptop functions.
1078 reserve_bootmem(0, PAGE_SIZE);
1080 /* reserve EBDA region, it's a 4K region */
1081 reserve_ebda_region();
1083 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1084 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1085 unless you have no PS/2 mouse plugged in. */
1086 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1087 boot_cpu_data.x86 == 6)
1088 reserve_bootmem(0xa0000 - 4096, 4096);
1092 * But first pinch a few for the stack/trampoline stuff
1093 * FIXME: Don't need the extra page at 4K, but need to fix
1094 * trampoline before removing it. (see the GDT stuff)
1096 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1098 #ifdef CONFIG_ACPI_SLEEP
1100 * Reserve low memory region for sleep support.
1102 acpi_reserve_bootmem();
1104 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1106 * Find and reserve possible boot-time SMP configuration:
1111 #ifdef CONFIG_BLK_DEV_INITRD
1112 if (LOADER_TYPE && INITRD_START) {
1113 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1114 reserve_bootmem(INITRD_START, INITRD_SIZE);
1116 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1117 initrd_end = initrd_start+INITRD_SIZE;
1120 printk(KERN_ERR "initrd extends beyond end of memory "
1121 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1122 INITRD_START + INITRD_SIZE,
1123 max_low_pfn << PAGE_SHIFT);
1129 crash_reserve_bootmem();
1134 extern unsigned long setup_memory(void);
1135 #endif /* !CONFIG_DISCONTIGMEM */
1138 * Request address space for all standard RAM and ROM resources
1139 * and also for regions reported as reserved by the e820.
1142 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1147 for (i = 0; i < e820.nr_map; i++) {
1148 struct resource *res;
1149 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1151 res = alloc_bootmem_low(sizeof(struct resource));
1152 switch (e820.map[i].type) {
1153 case E820_RAM: res->name = "System RAM"; break;
1154 case E820_ACPI: res->name = "ACPI Tables"; break;
1155 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1156 default: res->name = "reserved";
1158 res->start = e820.map[i].addr;
1159 res->end = res->start + e820.map[i].size - 1;
1160 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1161 request_resource(&iomem_resource, res);
1162 if (e820.map[i].type == E820_RAM) {
1164 * We don't know which RAM region contains kernel data,
1165 * so we try it repeatedly and let the resource manager
1168 request_resource(res, code_resource);
1169 request_resource(res, data_resource);
1175 * Request address space for all standard resources
1177 static void __init register_memory(unsigned long max_low_pfn)
1179 unsigned long low_mem_size;
1183 efi_initialize_iomem_resources(&code_resource, &data_resource);
1185 legacy_init_iomem_resources(&code_resource, &data_resource);
1187 /* EFI systems may still have VGA */
1188 request_resource(&iomem_resource, &video_ram_resource);
1190 /* request I/O space for devices used on all i[345]86 PCs */
1191 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1192 request_resource(&ioport_resource, &standard_io_resources[i]);
1194 /* Tell the PCI layer not to allocate too close to the RAM area.. */
1195 low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
1196 if (low_mem_size > pci_mem_start)
1197 pci_mem_start = low_mem_size;
1200 /* Use inline assembly to define this because the nops are defined
1201 as inline assembly strings in the include files and we cannot
1202 get them easily into strings. */
1203 asm("\t.data\nintelnops: "
1204 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1205 GENERIC_NOP7 GENERIC_NOP8);
1206 asm("\t.data\nk8nops: "
1207 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1209 asm("\t.data\nk7nops: "
1210 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1213 extern unsigned char intelnops[], k8nops[], k7nops[];
1214 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1219 intelnops + 1 + 2 + 3,
1220 intelnops + 1 + 2 + 3 + 4,
1221 intelnops + 1 + 2 + 3 + 4 + 5,
1222 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1223 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1225 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1231 k8nops + 1 + 2 + 3 + 4,
1232 k8nops + 1 + 2 + 3 + 4 + 5,
1233 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1234 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1236 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1242 k7nops + 1 + 2 + 3 + 4,
1243 k7nops + 1 + 2 + 3 + 4 + 5,
1244 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1245 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1249 unsigned char **noptable;
1251 { X86_FEATURE_K8, k8_nops },
1252 { X86_FEATURE_K7, k7_nops },
1256 /* Replace instructions with better alternatives for this CPU type.
1258 This runs before SMP is initialized to avoid SMP problems with
1259 self modifying code. This implies that assymetric systems where
1260 APs have less capabilities than the boot processor are not handled.
1261 In this case boot with "noreplacement". */
1262 void apply_alternatives(void *start, void *end)
1264 struct alt_instr *a;
1266 unsigned char **noptable = intel_nops;
1267 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1268 if (boot_cpu_has(noptypes[i].cpuid)) {
1269 noptable = noptypes[i].noptable;
1273 for (a = start; (void *)a < end; a++) {
1274 if (!boot_cpu_has(a->cpuid))
1276 BUG_ON(a->replacementlen > a->instrlen);
1277 memcpy(a->instr, a->replacement, a->replacementlen);
1278 diff = a->instrlen - a->replacementlen;
1279 /* Pad the rest with nops */
1280 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1282 if (k > ASM_NOP_MAX)
1284 memcpy(a->instr + i, noptable[k], k);
1289 static int no_replacement __initdata = 0;
1291 void __init alternative_instructions(void)
1293 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1296 apply_alternatives(__alt_instructions, __alt_instructions_end);
1299 static int __init noreplacement_setup(char *s)
1305 __setup("noreplacement", noreplacement_setup);
1307 static char * __init machine_specific_memory_setup(void);
1309 #ifdef CONFIG_CRASH_DUMP_SOFTBOOT
1310 extern void crashdump_reserve(void);
1314 * Determine if we were loaded by an EFI loader. If so, then we have also been
1315 * passed the efi memmap, systab, etc., so we should use these data structures
1316 * for initialization. Note, the efi init code path is determined by the
1317 * global efi_enabled. This allows the same kernel image to be used on existing
1318 * systems (with a traditional BIOS) as well as on EFI systems.
1320 void __init setup_arch(char **cmdline_p)
1322 unsigned long max_low_pfn;
1324 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1325 pre_setup_arch_hook();
1329 * FIXME: This isn't an official loader_type right
1330 * now but does currently work with elilo.
1331 * If we were configured as an EFI kernel, check to make
1332 * sure that we were loaded correctly from elilo and that
1333 * the system table is valid. If not, then initialize normally.
1336 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1340 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1341 drive_info = DRIVE_INFO;
1342 screen_info = SCREEN_INFO;
1343 edid_info = EDID_INFO;
1344 apm_info.bios = APM_BIOS_INFO;
1345 ist_info = IST_INFO;
1346 saved_videomode = VIDEO_MODE;
1347 if( SYS_DESC_TABLE.length != 0 ) {
1348 MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
1349 machine_id = SYS_DESC_TABLE.table[0];
1350 machine_submodel_id = SYS_DESC_TABLE.table[1];
1351 BIOS_revision = SYS_DESC_TABLE.table[2];
1353 aux_device_present = AUX_DEVICE_INFO;
1355 #ifdef CONFIG_BLK_DEV_RAM
1356 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1357 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1358 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1364 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1365 print_memory_map(machine_specific_memory_setup());
1370 if (!MOUNT_ROOT_RDONLY)
1371 root_mountflags &= ~MS_RDONLY;
1372 init_mm.start_code = (unsigned long) _text;
1373 init_mm.end_code = (unsigned long) _etext;
1374 init_mm.end_data = (unsigned long) _edata;
1375 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1377 code_resource.start = virt_to_phys(_text);
1378 code_resource.end = virt_to_phys(_etext)-1;
1379 data_resource.start = virt_to_phys(_etext);
1380 data_resource.end = virt_to_phys(_edata)-1;
1382 parse_cmdline_early(cmdline_p);
1384 max_low_pfn = setup_memory();
1387 * NOTE: before this point _nobody_ is allowed to allocate
1388 * any memory using the bootmem allocator. Although the
1389 * alloctor is now initialised only the first 8Mb of the kernel
1390 * virtual address space has been mapped. All allocations before
1391 * paging_init() has completed must use the alloc_bootmem_low_pages()
1392 * variant (which allocates DMA'able memory) and care must be taken
1393 * not to exceed the 8Mb limit.
1397 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1402 * NOTE: at this point the bootmem allocator is fully available.
1405 #ifdef CONFIG_EARLY_PRINTK
1407 char *s = strstr(*cmdline_p, "earlyprintk=");
1409 extern void setup_early_printk(char *);
1411 setup_early_printk(s);
1412 printk("early console enabled\n");
1418 #ifdef CONFIG_CRASH_DUMP_SOFTBOOT
1419 crashdump_reserve(); /* Preserve crash dump state from prev boot */
1424 #ifdef CONFIG_X86_GENERICARCH
1425 generic_apic_probe(*cmdline_p);
1431 * Parse the ACPI tables for possible boot-time SMP configuration.
1435 #ifdef CONFIG_X86_LOCAL_APIC
1436 if (smp_found_config)
1440 register_memory(max_low_pfn);
1443 #if defined(CONFIG_VGA_CONSOLE)
1444 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1445 conswitchp = &vga_con;
1446 #elif defined(CONFIG_DUMMY_CONSOLE)
1447 conswitchp = &dummy_con;
1452 #include "setup_arch_post.h"
1456 * c-file-style:"k&r"