2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/sched.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/root_dev.h>
37 #include <linux/highmem.h>
38 #include <linux/module.h>
39 #include <linux/efi.h>
40 #include <linux/init.h>
41 #include <linux/edd.h>
42 #include <video/edid.h>
44 #include <asm/mpspec.h>
45 #include <asm/setup.h>
46 #include <asm/arch_hooks.h>
47 #include <asm/sections.h>
48 #include <asm/io_apic.h>
51 #include "setup_arch_pre.h"
53 /* This value is set up by the early boot code to point to the value
54 immediately after the boot time page tables. It contains a *physical*
55 address, and must not be in the .bss segment! */
56 unsigned long init_pg_tables_end __initdata = ~0UL;
58 int disable_pse __initdata = 0;
66 EXPORT_SYMBOL(efi_enabled);
69 /* cpu data as detected by the assembly code in head.S */
70 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
71 /* common cpu data for all cpus */
72 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
74 unsigned long mmu_cr4_features;
75 EXPORT_SYMBOL_GPL(mmu_cr4_features);
77 #ifdef CONFIG_ACPI_INTERPRETER
78 int acpi_disabled = 0;
80 int acpi_disabled = 1;
82 EXPORT_SYMBOL(acpi_disabled);
84 #ifdef CONFIG_ACPI_BOOT
85 int __initdata acpi_force = 0;
86 extern acpi_interrupt_flags acpi_sci_flags;
90 /* for MCA, but anyone else can use it if they want */
91 unsigned int machine_id;
92 unsigned int machine_submodel_id;
93 unsigned int BIOS_revision;
94 unsigned int mca_pentium_flag;
96 /* For PCI or other memory-mapped resources */
97 unsigned long pci_mem_start = 0x10000000;
99 /* user-defined highmem size */
100 static unsigned int highmem_pages = -1;
105 struct drive_info_struct { char dummy[32]; } drive_info;
106 struct screen_info screen_info;
107 struct apm_info apm_info;
108 struct sys_desc_table_struct {
109 unsigned short length;
110 unsigned char table[0];
112 struct edid_info edid_info;
113 struct ist_info ist_info;
116 unsigned char aux_device_present;
118 extern void early_cpu_init(void);
119 extern void dmi_scan_machine(void);
120 extern void generic_apic_probe(char *);
121 extern int root_mountflags;
123 unsigned long saved_videomode;
125 #define RAMDISK_IMAGE_START_MASK 0x07FF
126 #define RAMDISK_PROMPT_FLAG 0x8000
127 #define RAMDISK_LOAD_FLAG 0x4000
129 static char command_line[COMMAND_LINE_SIZE];
131 unsigned char __initdata boot_params[PARAM_SIZE];
133 static struct resource data_resource = {
134 .name = "Kernel data",
137 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
140 static struct resource code_resource = {
141 .name = "Kernel code",
144 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
147 static struct resource system_rom_resource = {
148 .name = "System ROM",
151 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
154 static struct resource extension_rom_resource = {
155 .name = "Extension ROM",
158 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
161 static struct resource adapter_rom_resources[] = { {
162 .name = "Adapter ROM",
165 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
167 .name = "Adapter ROM",
170 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
172 .name = "Adapter ROM",
175 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
177 .name = "Adapter ROM",
180 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
182 .name = "Adapter ROM",
185 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
187 .name = "Adapter ROM",
190 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
193 #define ADAPTER_ROM_RESOURCES \
194 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
196 static struct resource video_rom_resource = {
200 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
203 static struct resource video_ram_resource = {
204 .name = "Video RAM area",
207 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
210 static struct resource standard_io_resources[] = { {
214 .flags = IORESOURCE_BUSY | IORESOURCE_IO
219 .flags = IORESOURCE_BUSY | IORESOURCE_IO
224 .flags = IORESOURCE_BUSY | IORESOURCE_IO
229 .flags = IORESOURCE_BUSY | IORESOURCE_IO
231 .name = "dma page reg",
234 .flags = IORESOURCE_BUSY | IORESOURCE_IO
239 .flags = IORESOURCE_BUSY | IORESOURCE_IO
244 .flags = IORESOURCE_BUSY | IORESOURCE_IO
249 .flags = IORESOURCE_BUSY | IORESOURCE_IO
252 #define STANDARD_IO_RESOURCES \
253 (sizeof standard_io_resources / sizeof standard_io_resources[0])
255 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
257 static int __init romchecksum(unsigned char *rom, unsigned long length)
259 unsigned char *p, sum = 0;
261 for (p = rom; p < rom + length; p++)
266 static void __init probe_roms(void)
268 unsigned long start, length, upper;
273 upper = adapter_rom_resources[0].start;
274 for (start = video_rom_resource.start; start < upper; start += 2048) {
275 rom = isa_bus_to_virt(start);
276 if (!romsignature(rom))
279 video_rom_resource.start = start;
281 /* 0 < length <= 0x7f * 512, historically */
282 length = rom[2] * 512;
284 /* if checksum okay, trust length byte */
285 if (length && romchecksum(rom, length))
286 video_rom_resource.end = start + length - 1;
288 request_resource(&iomem_resource, &video_rom_resource);
292 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
297 request_resource(&iomem_resource, &system_rom_resource);
298 upper = system_rom_resource.start;
300 /* check for extension rom (ignore length byte!) */
301 rom = isa_bus_to_virt(extension_rom_resource.start);
302 if (romsignature(rom)) {
303 length = extension_rom_resource.end - extension_rom_resource.start + 1;
304 if (romchecksum(rom, length)) {
305 request_resource(&iomem_resource, &extension_rom_resource);
306 upper = extension_rom_resource.start;
310 /* check for adapter roms on 2k boundaries */
311 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
312 rom = isa_bus_to_virt(start);
313 if (!romsignature(rom))
316 /* 0 < length <= 0x7f * 512, historically */
317 length = rom[2] * 512;
319 /* but accept any length that fits if checksum okay */
320 if (!length || start + length > upper || !romchecksum(rom, length))
323 adapter_rom_resources[i].start = start;
324 adapter_rom_resources[i].end = start + length - 1;
325 request_resource(&iomem_resource, &adapter_rom_resources[i]);
327 start = adapter_rom_resources[i++].end & ~2047UL;
331 static void __init limit_regions(unsigned long long size)
333 unsigned long long current_addr = 0;
337 for (i = 0; i < memmap.nr_map; i++) {
338 current_addr = memmap.map[i].phys_addr +
339 (memmap.map[i].num_pages << 12);
340 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
341 if (current_addr >= size) {
342 memmap.map[i].num_pages -=
343 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
344 memmap.nr_map = i + 1;
350 for (i = 0; i < e820.nr_map; i++) {
351 if (e820.map[i].type == E820_RAM) {
352 current_addr = e820.map[i].addr + e820.map[i].size;
353 if (current_addr >= size) {
354 e820.map[i].size -= current_addr-size;
362 static void __init add_memory_region(unsigned long long start,
363 unsigned long long size, int type)
371 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
375 e820.map[x].addr = start;
376 e820.map[x].size = size;
377 e820.map[x].type = type;
380 } /* add_memory_region */
384 static void __init print_memory_map(char *who)
388 for (i = 0; i < e820.nr_map; i++) {
389 printk(" %s: %016Lx - %016Lx ", who,
391 e820.map[i].addr + e820.map[i].size);
392 switch (e820.map[i].type) {
393 case E820_RAM: printk("(usable)\n");
396 printk("(reserved)\n");
399 printk("(ACPI data)\n");
402 printk("(ACPI NVS)\n");
404 default: printk("type %lu\n", e820.map[i].type);
411 * Sanitize the BIOS e820 map.
413 * Some e820 responses include overlapping entries. The following
414 * replaces the original e820 map with a new one, removing overlaps.
417 struct change_member {
418 struct e820entry *pbios; /* pointer to original bios entry */
419 unsigned long long addr; /* address for this change point */
421 struct change_member change_point_list[2*E820MAX] __initdata;
422 struct change_member *change_point[2*E820MAX] __initdata;
423 struct e820entry *overlap_list[E820MAX] __initdata;
424 struct e820entry new_bios[E820MAX] __initdata;
426 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
428 struct change_member *change_tmp;
429 unsigned long current_type, last_type;
430 unsigned long long last_addr;
431 int chgidx, still_changing;
434 int old_nr, new_nr, chg_nr;
438 Visually we're performing the following (1,2,3,4 = memory types)...
440 Sample memory map (w/overlaps):
441 ____22__________________
442 ______________________4_
443 ____1111________________
444 _44_____________________
445 11111111________________
446 ____________________33__
447 ___________44___________
448 __________33333_________
449 ______________22________
450 ___________________2222_
451 _________111111111______
452 _____________________11_
453 _________________4______
455 Sanitized equivalent (no overlap):
456 1_______________________
457 _44_____________________
458 ___1____________________
459 ____22__________________
460 ______11________________
461 _________1______________
462 __________3_____________
463 ___________44___________
464 _____________33_________
465 _______________2________
466 ________________1_______
467 _________________4______
468 ___________________2____
469 ____________________33__
470 ______________________4_
473 /* if there's only one memory region, don't bother */
479 /* bail out if we find any unreasonable addresses in bios map */
480 for (i=0; i<old_nr; i++)
481 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
484 /* create pointers for initial change-point information (for sorting) */
485 for (i=0; i < 2*old_nr; i++)
486 change_point[i] = &change_point_list[i];
488 /* record all known change-points (starting and ending addresses),
489 omitting those that are for empty memory regions */
491 for (i=0; i < old_nr; i++) {
492 if (biosmap[i].size != 0) {
493 change_point[chgidx]->addr = biosmap[i].addr;
494 change_point[chgidx++]->pbios = &biosmap[i];
495 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
496 change_point[chgidx++]->pbios = &biosmap[i];
499 chg_nr = chgidx; /* true number of change-points */
501 /* sort change-point list by memory addresses (low -> high) */
503 while (still_changing) {
505 for (i=1; i < chg_nr; i++) {
506 /* if <current_addr> > <last_addr>, swap */
507 /* or, if current=<start_addr> & last=<end_addr>, swap */
508 if ((change_point[i]->addr < change_point[i-1]->addr) ||
509 ((change_point[i]->addr == change_point[i-1]->addr) &&
510 (change_point[i]->addr == change_point[i]->pbios->addr) &&
511 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
514 change_tmp = change_point[i];
515 change_point[i] = change_point[i-1];
516 change_point[i-1] = change_tmp;
522 /* create a new bios memory map, removing overlaps */
523 overlap_entries=0; /* number of entries in the overlap table */
524 new_bios_entry=0; /* index for creating new bios map entries */
525 last_type = 0; /* start with undefined memory type */
526 last_addr = 0; /* start with 0 as last starting address */
527 /* loop through change-points, determining affect on the new bios map */
528 for (chgidx=0; chgidx < chg_nr; chgidx++)
530 /* keep track of all overlapping bios entries */
531 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
533 /* add map entry to overlap list (> 1 entry implies an overlap) */
534 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
538 /* remove entry from list (order independent, so swap with last) */
539 for (i=0; i<overlap_entries; i++)
541 if (overlap_list[i] == change_point[chgidx]->pbios)
542 overlap_list[i] = overlap_list[overlap_entries-1];
546 /* if there are overlapping entries, decide which "type" to use */
547 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
549 for (i=0; i<overlap_entries; i++)
550 if (overlap_list[i]->type > current_type)
551 current_type = overlap_list[i]->type;
552 /* continue building up new bios map based on this information */
553 if (current_type != last_type) {
554 if (last_type != 0) {
555 new_bios[new_bios_entry].size =
556 change_point[chgidx]->addr - last_addr;
557 /* move forward only if the new size was non-zero */
558 if (new_bios[new_bios_entry].size != 0)
559 if (++new_bios_entry >= E820MAX)
560 break; /* no more space left for new bios entries */
562 if (current_type != 0) {
563 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
564 new_bios[new_bios_entry].type = current_type;
565 last_addr=change_point[chgidx]->addr;
567 last_type = current_type;
570 new_nr = new_bios_entry; /* retain count for new bios entries */
572 /* copy new bios mapping into original location */
573 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
580 * Copy the BIOS e820 map into a safe place.
582 * Sanity-check it while we're at it..
584 * If we're lucky and live on a modern system, the setup code
585 * will have given us a memory map that we can use to properly
586 * set up memory. If we aren't, we'll fake a memory map.
588 * We check to see that the memory map contains at least 2 elements
589 * before we'll use it, because the detection code in setup.S may
590 * not be perfect and most every PC known to man has two memory
591 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
592 * thinkpad 560x, for example, does not cooperate with the memory
595 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
597 /* Only one memory region (or negative)? Ignore it */
602 unsigned long long start = biosmap->addr;
603 unsigned long long size = biosmap->size;
604 unsigned long long end = start + size;
605 unsigned long type = biosmap->type;
607 /* Overflow in 64 bits? Ignore the memory map. */
612 * Some BIOSes claim RAM in the 640k - 1M region.
613 * Not right. Fix it up.
615 if (type == E820_RAM) {
616 if (start < 0x100000ULL && end > 0xA0000ULL) {
617 if (start < 0xA0000ULL)
618 add_memory_region(start, 0xA0000ULL-start, type);
619 if (end <= 0x100000ULL)
625 add_memory_region(start, size, type);
626 } while (biosmap++,--nr_map);
630 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
632 #ifdef CONFIG_EDD_MODULE
636 * copy_edd() - Copy the BIOS EDD information
637 * from boot_params into a safe place.
640 static inline void copy_edd(void)
642 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
643 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
644 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
645 edd.edd_info_nr = EDD_NR;
648 static inline void copy_edd(void)
654 * Do NOT EVER look at the BIOS memory size location.
655 * It does not work on many machines.
657 #define LOWMEMSIZE() (0x9f000)
659 unsigned long crashdump_addr = 0xdeadbeef;
661 static void __init parse_cmdline_early (char ** cmdline_p)
663 char c = ' ', *to = command_line, *from = saved_command_line;
667 /* Save unparsed command line copy for /proc/cmdline */
668 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
672 * "mem=nopentium" disables the 4MB page tables.
673 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
674 * to <mem>, overriding the bios size.
675 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
676 * <start> to <start>+<mem>, overriding the bios size.
678 * HPA tells me bootloaders need to parse mem=, so no new
679 * option should be mem= [also see Documentation/i386/boot.txt]
681 if (c == ' ' && !memcmp(from, "mem=", 4)) {
682 if (to != command_line)
684 if (!memcmp(from+4, "nopentium", 9)) {
686 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
689 /* If the user specifies memory size, we
690 * limit the BIOS-provided memory map to
691 * that size. exactmap can be used to specify
692 * the exact map. mem=number can be used to
693 * trim the existing memory map.
695 unsigned long long mem_size;
697 mem_size = memparse(from+4, &from);
698 limit_regions(mem_size);
703 if (c == ' ' && !memcmp(from, "memmap=", 7)) {
704 if (to != command_line)
706 if (!memcmp(from+7, "exactmap", 8)) {
711 /* If the user specifies memory size, we
712 * limit the BIOS-provided memory map to
713 * that size. exactmap can be used to specify
714 * the exact map. mem=number can be used to
715 * trim the existing memory map.
717 unsigned long long start_at, mem_size;
719 mem_size = memparse(from+7, &from);
721 start_at = memparse(from+1, &from);
722 add_memory_region(start_at, mem_size, E820_RAM);
723 } else if (*from == '#') {
724 start_at = memparse(from+1, &from);
725 add_memory_region(start_at, mem_size, E820_ACPI);
726 } else if (*from == '$') {
727 start_at = memparse(from+1, &from);
728 add_memory_region(start_at, mem_size, E820_RESERVED);
730 limit_regions(mem_size);
736 #ifdef CONFIG_X86_SMP
738 * If the BIOS enumerates physical processors before logical,
739 * maxcpus=N at enumeration-time can be used to disable HT.
741 else if (!memcmp(from, "maxcpus=", 8)) {
742 extern unsigned int maxcpus;
744 maxcpus = simple_strtoul(from + 8, NULL, 0);
748 #ifdef CONFIG_ACPI_BOOT
749 /* "acpi=off" disables both ACPI table parsing and interpreter */
750 else if (!memcmp(from, "acpi=off", 8)) {
754 /* acpi=force to over-ride black-list */
755 else if (!memcmp(from, "acpi=force", 10)) {
761 /* acpi=strict disables out-of-spec workarounds */
762 else if (!memcmp(from, "acpi=strict", 11)) {
766 /* Limit ACPI just to boot-time to enable HT */
767 else if (!memcmp(from, "acpi=ht", 7)) {
773 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
774 else if (!memcmp(from, "pci=noacpi", 10)) {
777 /* "acpi=noirq" disables ACPI interrupt routing */
778 else if (!memcmp(from, "acpi=noirq", 10)) {
782 else if (!memcmp(from, "acpi_sci=edge", 13))
783 acpi_sci_flags.trigger = 1;
785 else if (!memcmp(from, "acpi_sci=level", 14))
786 acpi_sci_flags.trigger = 3;
788 else if (!memcmp(from, "acpi_sci=high", 13))
789 acpi_sci_flags.polarity = 1;
791 else if (!memcmp(from, "acpi_sci=low", 12))
792 acpi_sci_flags.polarity = 3;
794 #ifdef CONFIG_X86_IO_APIC
795 else if (!memcmp(from, "acpi_skip_timer_override", 24))
796 acpi_skip_timer_override = 1;
799 #ifdef CONFIG_X86_LOCAL_APIC
800 /* disable IO-APIC */
801 else if (!memcmp(from, "noapic", 6))
802 disable_ioapic_setup();
803 #endif /* CONFIG_X86_LOCAL_APIC */
804 #endif /* CONFIG_ACPI_BOOT */
807 * highmem=size forces highmem to be exactly 'size' bytes.
808 * This works even on boxes that have no highmem otherwise.
809 * This also works to reduce highmem size on bigger boxes.
811 if (c == ' ' && !memcmp(from, "highmem=", 8))
812 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
814 if (c == ' ' && !memcmp(from, "crashdump=", 10))
815 crashdump_addr = memparse(from+10, &from);
820 if (COMMAND_LINE_SIZE <= ++len)
825 *cmdline_p = command_line;
827 printk(KERN_INFO "user-defined physical RAM map:\n");
828 print_memory_map("user");
833 * Callback for efi_memory_walk.
836 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
838 unsigned long *max_pfn = arg, pfn;
841 pfn = PFN_UP(end -1);
850 * Find the highest page frame number we have available
852 void __init find_max_pfn(void)
858 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
862 for (i = 0; i < e820.nr_map; i++) {
863 unsigned long start, end;
865 if (e820.map[i].type != E820_RAM)
867 start = PFN_UP(e820.map[i].addr);
868 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
877 * Determine low and high memory ranges:
879 unsigned long __init find_max_low_pfn(void)
881 unsigned long max_low_pfn;
883 max_low_pfn = max_pfn;
884 if (max_low_pfn > MAXMEM_PFN) {
885 if (highmem_pages == -1)
886 highmem_pages = max_pfn - MAXMEM_PFN;
887 if (highmem_pages + MAXMEM_PFN < max_pfn)
888 max_pfn = MAXMEM_PFN + highmem_pages;
889 if (highmem_pages + MAXMEM_PFN > max_pfn) {
890 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
893 max_low_pfn = MAXMEM_PFN;
894 #ifndef CONFIG_HIGHMEM
895 /* Maximum memory usable is what is directly addressable */
896 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
898 if (max_pfn > MAX_NONPAE_PFN)
899 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
901 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
902 max_pfn = MAXMEM_PFN;
903 #else /* !CONFIG_HIGHMEM */
904 #ifndef CONFIG_X86_PAE
905 if (max_pfn > MAX_NONPAE_PFN) {
906 max_pfn = MAX_NONPAE_PFN;
907 printk(KERN_WARNING "Warning only 4GB will be used.\n");
908 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
910 #endif /* !CONFIG_X86_PAE */
911 #endif /* !CONFIG_HIGHMEM */
913 if (highmem_pages == -1)
915 #ifdef CONFIG_HIGHMEM
916 if (highmem_pages >= max_pfn) {
917 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
921 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
922 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
925 max_low_pfn -= highmem_pages;
929 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
935 #ifndef CONFIG_DISCONTIGMEM
938 * Free all available memory for boot time allocation. Used
939 * as a callback function by efi_memory_walk()
943 free_available_memory(unsigned long start, unsigned long end, void *arg)
945 /* check max_low_pfn */
946 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
948 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
949 end = (max_low_pfn + 1) << PAGE_SHIFT;
951 free_bootmem(start, end - start);
956 * Register fully available low RAM pages with the bootmem allocator.
958 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
963 efi_memmap_walk(free_available_memory, NULL);
966 for (i = 0; i < e820.nr_map; i++) {
967 unsigned long curr_pfn, last_pfn, size;
969 * Reserve usable low memory
971 if (e820.map[i].type != E820_RAM)
974 * We are rounding up the start address of usable memory:
976 curr_pfn = PFN_UP(e820.map[i].addr);
977 if (curr_pfn >= max_low_pfn)
980 * ... and at the end of the usable range downwards:
982 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
984 if (last_pfn > max_low_pfn)
985 last_pfn = max_low_pfn;
988 * .. finally, did all the rounding and playing
989 * around just make the area go away?
991 if (last_pfn <= curr_pfn)
994 size = last_pfn - curr_pfn;
995 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
999 static unsigned long __init setup_memory(void)
1001 unsigned long bootmap_size, start_pfn, max_low_pfn;
1004 * partially used pages are not usable - thus
1005 * we are rounding upwards:
1007 start_pfn = PFN_UP(init_pg_tables_end);
1011 max_low_pfn = find_max_low_pfn();
1013 #ifdef CONFIG_HIGHMEM
1014 highstart_pfn = highend_pfn = max_pfn;
1015 if (max_pfn > max_low_pfn) {
1016 highstart_pfn = max_low_pfn;
1018 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1019 pages_to_mb(highend_pfn - highstart_pfn));
1021 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1022 pages_to_mb(max_low_pfn));
1024 * Initialize the boot-time allocator (with low memory only):
1026 bootmap_size = init_bootmem(start_pfn, max_low_pfn);
1028 register_bootmem_low_pages(max_low_pfn);
1031 * Reserve the bootmem bitmap itself as well. We do this in two
1032 * steps (first step was init_bootmem()) because this catches
1033 * the (very unlikely) case of us accidentally initializing the
1034 * bootmem allocator with an invalid RAM area.
1036 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
1037 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1040 * reserve physical page 0 - it's a special BIOS page on many boxes,
1041 * enabling clean reboots, SMP operation, laptop functions.
1043 reserve_bootmem(0, PAGE_SIZE);
1045 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1046 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1047 unless you have no PS/2 mouse plugged in. */
1048 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1049 boot_cpu_data.x86 == 6)
1050 reserve_bootmem(0xa0000 - 4096, 4096);
1054 * But first pinch a few for the stack/trampoline stuff
1055 * FIXME: Don't need the extra page at 4K, but need to fix
1056 * trampoline before removing it. (see the GDT stuff)
1058 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1060 #ifdef CONFIG_ACPI_SLEEP
1062 * Reserve low memory region for sleep support.
1064 acpi_reserve_bootmem();
1066 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1068 * Find and reserve possible boot-time SMP configuration:
1073 #ifdef CONFIG_BLK_DEV_INITRD
1074 if (LOADER_TYPE && INITRD_START) {
1075 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1076 reserve_bootmem(INITRD_START, INITRD_SIZE);
1078 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1079 initrd_end = initrd_start+INITRD_SIZE;
1082 printk(KERN_ERR "initrd extends beyond end of memory "
1083 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1084 INITRD_START + INITRD_SIZE,
1085 max_low_pfn << PAGE_SHIFT);
1093 extern unsigned long setup_memory(void);
1094 #endif /* !CONFIG_DISCONTIGMEM */
1097 * Request address space for all standard RAM and ROM resources
1098 * and also for regions reported as reserved by the e820.
1101 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1106 for (i = 0; i < e820.nr_map; i++) {
1107 struct resource *res;
1108 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1110 res = alloc_bootmem_low(sizeof(struct resource));
1111 switch (e820.map[i].type) {
1112 case E820_RAM: res->name = "System RAM"; break;
1113 case E820_ACPI: res->name = "ACPI Tables"; break;
1114 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1115 default: res->name = "reserved";
1117 res->start = e820.map[i].addr;
1118 res->end = res->start + e820.map[i].size - 1;
1119 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1120 request_resource(&iomem_resource, res);
1121 if (e820.map[i].type == E820_RAM) {
1123 * We don't know which RAM region contains kernel data,
1124 * so we try it repeatedly and let the resource manager
1127 request_resource(res, code_resource);
1128 request_resource(res, data_resource);
1134 * Request address space for all standard resources
1136 static void __init register_memory(unsigned long max_low_pfn)
1138 unsigned long low_mem_size;
1142 efi_initialize_iomem_resources(&code_resource, &data_resource);
1144 legacy_init_iomem_resources(&code_resource, &data_resource);
1146 /* EFI systems may still have VGA */
1147 request_resource(&iomem_resource, &video_ram_resource);
1149 /* request I/O space for devices used on all i[345]86 PCs */
1150 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1151 request_resource(&ioport_resource, &standard_io_resources[i]);
1153 /* Tell the PCI layer not to allocate too close to the RAM area.. */
1154 low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
1155 if (low_mem_size > pci_mem_start)
1156 pci_mem_start = low_mem_size;
1159 /* Use inline assembly to define this because the nops are defined
1160 as inline assembly strings in the include files and we cannot
1161 get them easily into strings. */
1162 asm("\t.data\nintelnops: "
1163 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1164 GENERIC_NOP7 GENERIC_NOP8);
1165 asm("\t.data\nk8nops: "
1166 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1168 asm("\t.data\nk7nops: "
1169 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1172 extern unsigned char intelnops[], k8nops[], k7nops[];
1173 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1178 intelnops + 1 + 2 + 3,
1179 intelnops + 1 + 2 + 3 + 4,
1180 intelnops + 1 + 2 + 3 + 4 + 5,
1181 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1182 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1184 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1190 k8nops + 1 + 2 + 3 + 4,
1191 k8nops + 1 + 2 + 3 + 4 + 5,
1192 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1193 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1195 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1201 k7nops + 1 + 2 + 3 + 4,
1202 k7nops + 1 + 2 + 3 + 4 + 5,
1203 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1204 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1208 unsigned char **noptable;
1210 { X86_FEATURE_K8, k8_nops },
1211 { X86_FEATURE_K7, k7_nops },
1215 /* Replace instructions with better alternatives for this CPU type.
1217 This runs before SMP is initialized to avoid SMP problems with
1218 self modifying code. This implies that assymetric systems where
1219 APs have less capabilities than the boot processor are not handled.
1220 In this case boot with "noreplacement". */
1221 void apply_alternatives(void *start, void *end)
1223 struct alt_instr *a;
1225 unsigned char **noptable = intel_nops;
1226 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1227 if (boot_cpu_has(noptypes[i].cpuid)) {
1228 noptable = noptypes[i].noptable;
1232 for (a = start; (void *)a < end; a++) {
1233 if (!boot_cpu_has(a->cpuid))
1235 BUG_ON(a->replacementlen > a->instrlen);
1236 memcpy(a->instr, a->replacement, a->replacementlen);
1237 diff = a->instrlen - a->replacementlen;
1238 /* Pad the rest with nops */
1239 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1241 if (k > ASM_NOP_MAX)
1243 memcpy(a->instr + i, noptable[k], k);
1248 static int no_replacement __initdata = 0;
1250 void __init alternative_instructions(void)
1252 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1255 apply_alternatives(__alt_instructions, __alt_instructions_end);
1258 static int __init noreplacement_setup(char *s)
1264 __setup("noreplacement", noreplacement_setup);
1266 static char * __init machine_specific_memory_setup(void);
1268 #ifdef CONFIG_CRASH_DUMP_SOFTBOOT
1269 extern void crashdump_reserve(void);
1273 * Determine if we were loaded by an EFI loader. If so, then we have also been
1274 * passed the efi memmap, systab, etc., so we should use these data structures
1275 * for initialization. Note, the efi init code path is determined by the
1276 * global efi_enabled. This allows the same kernel image to be used on existing
1277 * systems (with a traditional BIOS) as well as on EFI systems.
1279 void __init setup_arch(char **cmdline_p)
1281 unsigned long max_low_pfn;
1283 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1284 pre_setup_arch_hook();
1288 * FIXME: This isn't an official loader_type right
1289 * now but does currently work with elilo.
1290 * If we were configured as an EFI kernel, check to make
1291 * sure that we were loaded correctly from elilo and that
1292 * the system table is valid. If not, then initialize normally.
1295 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1299 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1300 drive_info = DRIVE_INFO;
1301 screen_info = SCREEN_INFO;
1302 edid_info = EDID_INFO;
1303 apm_info.bios = APM_BIOS_INFO;
1304 ist_info = IST_INFO;
1305 saved_videomode = VIDEO_MODE;
1306 if( SYS_DESC_TABLE.length != 0 ) {
1307 MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
1308 machine_id = SYS_DESC_TABLE.table[0];
1309 machine_submodel_id = SYS_DESC_TABLE.table[1];
1310 BIOS_revision = SYS_DESC_TABLE.table[2];
1312 aux_device_present = AUX_DEVICE_INFO;
1314 #ifdef CONFIG_BLK_DEV_RAM
1315 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1316 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1317 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1323 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1324 print_memory_map(machine_specific_memory_setup());
1329 if (!MOUNT_ROOT_RDONLY)
1330 root_mountflags &= ~MS_RDONLY;
1331 init_mm.start_code = (unsigned long) _text;
1332 init_mm.end_code = (unsigned long) _etext;
1333 init_mm.end_data = (unsigned long) _edata;
1334 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1336 code_resource.start = virt_to_phys(_text);
1337 code_resource.end = virt_to_phys(_etext)-1;
1338 data_resource.start = virt_to_phys(_etext);
1339 data_resource.end = virt_to_phys(_edata)-1;
1341 parse_cmdline_early(cmdline_p);
1343 max_low_pfn = setup_memory();
1346 * NOTE: before this point _nobody_ is allowed to allocate
1347 * any memory using the bootmem allocator.
1351 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1355 #ifdef CONFIG_EARLY_PRINTK
1357 char *s = strstr(*cmdline_p, "earlyprintk=");
1359 extern void setup_early_printk(char *);
1361 setup_early_printk(s);
1362 printk("early console enabled\n");
1368 #ifdef CONFIG_CRASH_DUMP_SOFTBOOT
1369 crashdump_reserve(); /* Preserve crash dump state from prev boot */
1374 #ifdef CONFIG_X86_GENERICARCH
1375 generic_apic_probe(*cmdline_p);
1381 * Parse the ACPI tables for possible boot-time SMP configuration.
1385 #ifdef CONFIG_X86_LOCAL_APIC
1386 if (smp_found_config)
1390 register_memory(max_low_pfn);
1393 #if defined(CONFIG_VGA_CONSOLE)
1394 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1395 conswitchp = &vga_con;
1396 #elif defined(CONFIG_DUMMY_CONSOLE)
1397 conswitchp = &dummy_con;
1402 #include "setup_arch_post.h"
1406 * c-file-style:"k&r"