2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/sched.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/root_dev.h>
37 #include <linux/highmem.h>
38 #include <linux/module.h>
39 #include <linux/efi.h>
40 #include <linux/init.h>
41 #include <linux/edd.h>
42 #include <video/edid.h>
44 #include <asm/mpspec.h>
45 #include <asm/setup.h>
46 #include <asm/arch_hooks.h>
47 #include <asm/sections.h>
48 #include <asm/io_apic.h>
51 #include "setup_arch_pre.h"
52 #include <bios_ebda.h>
54 /* This value is set up by the early boot code to point to the value
55 immediately after the boot time page tables. It contains a *physical*
56 address, and must not be in the .bss segment! */
57 unsigned long init_pg_tables_end __initdata = ~0UL;
59 int disable_pse __initdata = 0;
67 EXPORT_SYMBOL(efi_enabled);
70 /* cpu data as detected by the assembly code in head.S */
71 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
72 /* common cpu data for all cpus */
73 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
75 unsigned long mmu_cr4_features;
76 EXPORT_SYMBOL_GPL(mmu_cr4_features);
78 #ifdef CONFIG_ACPI_INTERPRETER
79 int acpi_disabled = 0;
81 int acpi_disabled = 1;
83 EXPORT_SYMBOL(acpi_disabled);
85 #ifdef CONFIG_ACPI_BOOT
86 int __initdata acpi_force = 0;
87 extern acpi_interrupt_flags acpi_sci_flags;
91 /* for MCA, but anyone else can use it if they want */
92 unsigned int machine_id;
93 unsigned int machine_submodel_id;
94 unsigned int BIOS_revision;
95 unsigned int mca_pentium_flag;
97 /* For PCI or other memory-mapped resources */
98 unsigned long pci_mem_start = 0x10000000;
100 /* user-defined highmem size */
101 static unsigned int highmem_pages = -1;
106 struct drive_info_struct { char dummy[32]; } drive_info;
107 struct screen_info screen_info;
108 struct apm_info apm_info;
109 struct sys_desc_table_struct {
110 unsigned short length;
111 unsigned char table[0];
113 struct edid_info edid_info;
114 struct ist_info ist_info;
117 unsigned char aux_device_present;
119 extern void early_cpu_init(void);
120 extern void dmi_scan_machine(void);
121 extern void generic_apic_probe(char *);
122 extern int root_mountflags;
124 unsigned long saved_videomode;
126 #define RAMDISK_IMAGE_START_MASK 0x07FF
127 #define RAMDISK_PROMPT_FLAG 0x8000
128 #define RAMDISK_LOAD_FLAG 0x4000
130 static char command_line[COMMAND_LINE_SIZE];
132 unsigned char __initdata boot_params[PARAM_SIZE];
134 static struct resource data_resource = {
135 .name = "Kernel data",
138 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
141 static struct resource code_resource = {
142 .name = "Kernel code",
145 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
148 static struct resource system_rom_resource = {
149 .name = "System ROM",
152 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
155 static struct resource extension_rom_resource = {
156 .name = "Extension ROM",
159 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
162 static struct resource adapter_rom_resources[] = { {
163 .name = "Adapter ROM",
166 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
168 .name = "Adapter ROM",
171 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
173 .name = "Adapter ROM",
176 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
178 .name = "Adapter ROM",
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183 .name = "Adapter ROM",
186 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
188 .name = "Adapter ROM",
191 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
194 #define ADAPTER_ROM_RESOURCES \
195 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
197 static struct resource video_rom_resource = {
201 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
204 static struct resource video_ram_resource = {
205 .name = "Video RAM area",
208 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
211 static struct resource standard_io_resources[] = { {
215 .flags = IORESOURCE_BUSY | IORESOURCE_IO
220 .flags = IORESOURCE_BUSY | IORESOURCE_IO
225 .flags = IORESOURCE_BUSY | IORESOURCE_IO
230 .flags = IORESOURCE_BUSY | IORESOURCE_IO
235 .flags = IORESOURCE_BUSY | IORESOURCE_IO
237 .name = "dma page reg",
240 .flags = IORESOURCE_BUSY | IORESOURCE_IO
245 .flags = IORESOURCE_BUSY | IORESOURCE_IO
250 .flags = IORESOURCE_BUSY | IORESOURCE_IO
255 .flags = IORESOURCE_BUSY | IORESOURCE_IO
258 #define STANDARD_IO_RESOURCES \
259 (sizeof standard_io_resources / sizeof standard_io_resources[0])
261 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
263 static int __init romchecksum(unsigned char *rom, unsigned long length)
265 unsigned char *p, sum = 0;
267 for (p = rom; p < rom + length; p++)
272 static void __init probe_roms(void)
274 unsigned long start, length, upper;
279 upper = adapter_rom_resources[0].start;
280 for (start = video_rom_resource.start; start < upper; start += 2048) {
281 rom = isa_bus_to_virt(start);
282 if (!romsignature(rom))
285 video_rom_resource.start = start;
287 /* 0 < length <= 0x7f * 512, historically */
288 length = rom[2] * 512;
290 /* if checksum okay, trust length byte */
291 if (length && romchecksum(rom, length))
292 video_rom_resource.end = start + length - 1;
294 request_resource(&iomem_resource, &video_rom_resource);
298 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
303 request_resource(&iomem_resource, &system_rom_resource);
304 upper = system_rom_resource.start;
306 /* check for extension rom (ignore length byte!) */
307 rom = isa_bus_to_virt(extension_rom_resource.start);
308 if (romsignature(rom)) {
309 length = extension_rom_resource.end - extension_rom_resource.start + 1;
310 if (romchecksum(rom, length)) {
311 request_resource(&iomem_resource, &extension_rom_resource);
312 upper = extension_rom_resource.start;
316 /* check for adapter roms on 2k boundaries */
317 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
318 rom = isa_bus_to_virt(start);
319 if (!romsignature(rom))
322 /* 0 < length <= 0x7f * 512, historically */
323 length = rom[2] * 512;
325 /* but accept any length that fits if checksum okay */
326 if (!length || start + length > upper || !romchecksum(rom, length))
329 adapter_rom_resources[i].start = start;
330 adapter_rom_resources[i].end = start + length - 1;
331 request_resource(&iomem_resource, &adapter_rom_resources[i]);
333 start = adapter_rom_resources[i++].end & ~2047UL;
337 static void __init limit_regions(unsigned long long size)
339 unsigned long long current_addr = 0;
343 for (i = 0; i < memmap.nr_map; i++) {
344 current_addr = memmap.map[i].phys_addr +
345 (memmap.map[i].num_pages << 12);
346 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
347 if (current_addr >= size) {
348 memmap.map[i].num_pages -=
349 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
350 memmap.nr_map = i + 1;
356 for (i = 0; i < e820.nr_map; i++) {
357 if (e820.map[i].type == E820_RAM) {
358 current_addr = e820.map[i].addr + e820.map[i].size;
359 if (current_addr >= size) {
360 e820.map[i].size -= current_addr-size;
368 static void __init add_memory_region(unsigned long long start,
369 unsigned long long size, int type)
377 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
381 e820.map[x].addr = start;
382 e820.map[x].size = size;
383 e820.map[x].type = type;
386 } /* add_memory_region */
390 static void __init print_memory_map(char *who)
394 for (i = 0; i < e820.nr_map; i++) {
395 printk(" %s: %016Lx - %016Lx ", who,
397 e820.map[i].addr + e820.map[i].size);
398 switch (e820.map[i].type) {
399 case E820_RAM: printk("(usable)\n");
402 printk("(reserved)\n");
405 printk("(ACPI data)\n");
408 printk("(ACPI NVS)\n");
410 default: printk("type %lu\n", e820.map[i].type);
417 * Sanitize the BIOS e820 map.
419 * Some e820 responses include overlapping entries. The following
420 * replaces the original e820 map with a new one, removing overlaps.
423 struct change_member {
424 struct e820entry *pbios; /* pointer to original bios entry */
425 unsigned long long addr; /* address for this change point */
427 struct change_member change_point_list[2*E820MAX] __initdata;
428 struct change_member *change_point[2*E820MAX] __initdata;
429 struct e820entry *overlap_list[E820MAX] __initdata;
430 struct e820entry new_bios[E820MAX] __initdata;
432 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
434 struct change_member *change_tmp;
435 unsigned long current_type, last_type;
436 unsigned long long last_addr;
437 int chgidx, still_changing;
440 int old_nr, new_nr, chg_nr;
444 Visually we're performing the following (1,2,3,4 = memory types)...
446 Sample memory map (w/overlaps):
447 ____22__________________
448 ______________________4_
449 ____1111________________
450 _44_____________________
451 11111111________________
452 ____________________33__
453 ___________44___________
454 __________33333_________
455 ______________22________
456 ___________________2222_
457 _________111111111______
458 _____________________11_
459 _________________4______
461 Sanitized equivalent (no overlap):
462 1_______________________
463 _44_____________________
464 ___1____________________
465 ____22__________________
466 ______11________________
467 _________1______________
468 __________3_____________
469 ___________44___________
470 _____________33_________
471 _______________2________
472 ________________1_______
473 _________________4______
474 ___________________2____
475 ____________________33__
476 ______________________4_
479 /* if there's only one memory region, don't bother */
485 /* bail out if we find any unreasonable addresses in bios map */
486 for (i=0; i<old_nr; i++)
487 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
490 /* create pointers for initial change-point information (for sorting) */
491 for (i=0; i < 2*old_nr; i++)
492 change_point[i] = &change_point_list[i];
494 /* record all known change-points (starting and ending addresses),
495 omitting those that are for empty memory regions */
497 for (i=0; i < old_nr; i++) {
498 if (biosmap[i].size != 0) {
499 change_point[chgidx]->addr = biosmap[i].addr;
500 change_point[chgidx++]->pbios = &biosmap[i];
501 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
502 change_point[chgidx++]->pbios = &biosmap[i];
505 chg_nr = chgidx; /* true number of change-points */
507 /* sort change-point list by memory addresses (low -> high) */
509 while (still_changing) {
511 for (i=1; i < chg_nr; i++) {
512 /* if <current_addr> > <last_addr>, swap */
513 /* or, if current=<start_addr> & last=<end_addr>, swap */
514 if ((change_point[i]->addr < change_point[i-1]->addr) ||
515 ((change_point[i]->addr == change_point[i-1]->addr) &&
516 (change_point[i]->addr == change_point[i]->pbios->addr) &&
517 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
520 change_tmp = change_point[i];
521 change_point[i] = change_point[i-1];
522 change_point[i-1] = change_tmp;
528 /* create a new bios memory map, removing overlaps */
529 overlap_entries=0; /* number of entries in the overlap table */
530 new_bios_entry=0; /* index for creating new bios map entries */
531 last_type = 0; /* start with undefined memory type */
532 last_addr = 0; /* start with 0 as last starting address */
533 /* loop through change-points, determining affect on the new bios map */
534 for (chgidx=0; chgidx < chg_nr; chgidx++)
536 /* keep track of all overlapping bios entries */
537 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
539 /* add map entry to overlap list (> 1 entry implies an overlap) */
540 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
544 /* remove entry from list (order independent, so swap with last) */
545 for (i=0; i<overlap_entries; i++)
547 if (overlap_list[i] == change_point[chgidx]->pbios)
548 overlap_list[i] = overlap_list[overlap_entries-1];
552 /* if there are overlapping entries, decide which "type" to use */
553 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
555 for (i=0; i<overlap_entries; i++)
556 if (overlap_list[i]->type > current_type)
557 current_type = overlap_list[i]->type;
558 /* continue building up new bios map based on this information */
559 if (current_type != last_type) {
560 if (last_type != 0) {
561 new_bios[new_bios_entry].size =
562 change_point[chgidx]->addr - last_addr;
563 /* move forward only if the new size was non-zero */
564 if (new_bios[new_bios_entry].size != 0)
565 if (++new_bios_entry >= E820MAX)
566 break; /* no more space left for new bios entries */
568 if (current_type != 0) {
569 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
570 new_bios[new_bios_entry].type = current_type;
571 last_addr=change_point[chgidx]->addr;
573 last_type = current_type;
576 new_nr = new_bios_entry; /* retain count for new bios entries */
578 /* copy new bios mapping into original location */
579 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
586 * Copy the BIOS e820 map into a safe place.
588 * Sanity-check it while we're at it..
590 * If we're lucky and live on a modern system, the setup code
591 * will have given us a memory map that we can use to properly
592 * set up memory. If we aren't, we'll fake a memory map.
594 * We check to see that the memory map contains at least 2 elements
595 * before we'll use it, because the detection code in setup.S may
596 * not be perfect and most every PC known to man has two memory
597 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
598 * thinkpad 560x, for example, does not cooperate with the memory
601 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
603 /* Only one memory region (or negative)? Ignore it */
608 unsigned long long start = biosmap->addr;
609 unsigned long long size = biosmap->size;
610 unsigned long long end = start + size;
611 unsigned long type = biosmap->type;
613 /* Overflow in 64 bits? Ignore the memory map. */
618 * Some BIOSes claim RAM in the 640k - 1M region.
619 * Not right. Fix it up.
621 if (type == E820_RAM) {
622 if (start < 0x100000ULL && end > 0xA0000ULL) {
623 if (start < 0xA0000ULL)
624 add_memory_region(start, 0xA0000ULL-start, type);
625 if (end <= 0x100000ULL)
631 add_memory_region(start, size, type);
632 } while (biosmap++,--nr_map);
636 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
638 #ifdef CONFIG_EDD_MODULE
642 * copy_edd() - Copy the BIOS EDD information
643 * from boot_params into a safe place.
646 static inline void copy_edd(void)
648 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
649 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
650 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
651 edd.edd_info_nr = EDD_NR;
654 static inline void copy_edd(void)
660 * Do NOT EVER look at the BIOS memory size location.
661 * It does not work on many machines.
663 #define LOWMEMSIZE() (0x9f000)
665 static void __init parse_cmdline_early (char ** cmdline_p)
667 char c = ' ', *to = command_line, *from = saved_command_line;
671 /* Save unparsed command line copy for /proc/cmdline */
672 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
676 * "mem=nopentium" disables the 4MB page tables.
677 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
678 * to <mem>, overriding the bios size.
679 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
680 * <start> to <start>+<mem>, overriding the bios size.
682 * HPA tells me bootloaders need to parse mem=, so no new
683 * option should be mem= [also see Documentation/i386/boot.txt]
685 if (c == ' ' && !memcmp(from, "mem=", 4)) {
686 if (to != command_line)
688 if (!memcmp(from+4, "nopentium", 9)) {
690 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
693 /* If the user specifies memory size, we
694 * limit the BIOS-provided memory map to
695 * that size. exactmap can be used to specify
696 * the exact map. mem=number can be used to
697 * trim the existing memory map.
699 unsigned long long mem_size;
701 mem_size = memparse(from+4, &from);
702 limit_regions(mem_size);
707 if (c == ' ' && !memcmp(from, "memmap=", 7)) {
708 if (to != command_line)
710 if (!memcmp(from+7, "exactmap", 8)) {
715 /* If the user specifies memory size, we
716 * limit the BIOS-provided memory map to
717 * that size. exactmap can be used to specify
718 * the exact map. mem=number can be used to
719 * trim the existing memory map.
721 unsigned long long start_at, mem_size;
723 mem_size = memparse(from+7, &from);
725 start_at = memparse(from+1, &from);
726 add_memory_region(start_at, mem_size, E820_RAM);
727 } else if (*from == '#') {
728 start_at = memparse(from+1, &from);
729 add_memory_region(start_at, mem_size, E820_ACPI);
730 } else if (*from == '$') {
731 start_at = memparse(from+1, &from);
732 add_memory_region(start_at, mem_size, E820_RESERVED);
734 limit_regions(mem_size);
740 #ifdef CONFIG_X86_SMP
742 * If the BIOS enumerates physical processors before logical,
743 * maxcpus=N at enumeration-time can be used to disable HT.
745 else if (!memcmp(from, "maxcpus=", 8)) {
746 extern unsigned int maxcpus;
748 maxcpus = simple_strtoul(from + 8, NULL, 0);
752 #ifdef CONFIG_ACPI_BOOT
753 /* "acpi=off" disables both ACPI table parsing and interpreter */
754 else if (!memcmp(from, "acpi=off", 8)) {
758 /* acpi=force to over-ride black-list */
759 else if (!memcmp(from, "acpi=force", 10)) {
765 /* acpi=strict disables out-of-spec workarounds */
766 else if (!memcmp(from, "acpi=strict", 11)) {
770 /* Limit ACPI just to boot-time to enable HT */
771 else if (!memcmp(from, "acpi=ht", 7)) {
777 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
778 else if (!memcmp(from, "pci=noacpi", 10)) {
781 /* "acpi=noirq" disables ACPI interrupt routing */
782 else if (!memcmp(from, "acpi=noirq", 10)) {
786 else if (!memcmp(from, "acpi_sci=edge", 13))
787 acpi_sci_flags.trigger = 1;
789 else if (!memcmp(from, "acpi_sci=level", 14))
790 acpi_sci_flags.trigger = 3;
792 else if (!memcmp(from, "acpi_sci=high", 13))
793 acpi_sci_flags.polarity = 1;
795 else if (!memcmp(from, "acpi_sci=low", 12))
796 acpi_sci_flags.polarity = 3;
798 #ifdef CONFIG_X86_IO_APIC
799 else if (!memcmp(from, "acpi_skip_timer_override", 24))
800 acpi_skip_timer_override = 1;
803 #ifdef CONFIG_X86_LOCAL_APIC
804 /* disable IO-APIC */
805 else if (!memcmp(from, "noapic", 6))
806 disable_ioapic_setup();
807 #endif /* CONFIG_X86_LOCAL_APIC */
808 #endif /* CONFIG_ACPI_BOOT */
811 * highmem=size forces highmem to be exactly 'size' bytes.
812 * This works even on boxes that have no highmem otherwise.
813 * This also works to reduce highmem size on bigger boxes.
815 if (c == ' ' && !memcmp(from, "highmem=", 8))
816 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
819 * vmalloc=size forces the vmalloc area to be exactly 'size'
820 * bytes. This can be used to increase (or decrease) the
821 * vmalloc area - the default is 128m.
823 if (c == ' ' && !memcmp(from, "vmalloc=", 8))
824 __VMALLOC_RESERVE = memparse(from+8, &from);
829 if (COMMAND_LINE_SIZE <= ++len)
834 *cmdline_p = command_line;
836 printk(KERN_INFO "user-defined physical RAM map:\n");
837 print_memory_map("user");
842 * Callback for efi_memory_walk.
845 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
847 unsigned long *max_pfn = arg, pfn;
850 pfn = PFN_UP(end -1);
859 * Find the highest page frame number we have available
861 void __init find_max_pfn(void)
867 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
871 for (i = 0; i < e820.nr_map; i++) {
872 unsigned long start, end;
874 if (e820.map[i].type != E820_RAM)
876 start = PFN_UP(e820.map[i].addr);
877 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
886 * Determine low and high memory ranges:
888 unsigned long __init find_max_low_pfn(void)
890 unsigned long max_low_pfn;
892 max_low_pfn = max_pfn;
893 if (max_low_pfn > MAXMEM_PFN) {
894 if (highmem_pages == -1)
895 highmem_pages = max_pfn - MAXMEM_PFN;
896 if (highmem_pages + MAXMEM_PFN < max_pfn)
897 max_pfn = MAXMEM_PFN + highmem_pages;
898 if (highmem_pages + MAXMEM_PFN > max_pfn) {
899 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
902 max_low_pfn = MAXMEM_PFN;
903 #ifndef CONFIG_HIGHMEM
904 /* Maximum memory usable is what is directly addressable */
905 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
907 if (max_pfn > MAX_NONPAE_PFN)
908 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
910 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
911 max_pfn = MAXMEM_PFN;
912 #else /* !CONFIG_HIGHMEM */
913 #ifndef CONFIG_X86_PAE
914 if (max_pfn > MAX_NONPAE_PFN) {
915 max_pfn = MAX_NONPAE_PFN;
916 printk(KERN_WARNING "Warning only 4GB will be used.\n");
917 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
919 #endif /* !CONFIG_X86_PAE */
920 #endif /* !CONFIG_HIGHMEM */
922 if (highmem_pages == -1)
924 #ifdef CONFIG_HIGHMEM
925 if (highmem_pages >= max_pfn) {
926 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
930 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
931 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
934 max_low_pfn -= highmem_pages;
938 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
944 #ifndef CONFIG_DISCONTIGMEM
947 * Free all available memory for boot time allocation. Used
948 * as a callback function by efi_memory_walk()
952 free_available_memory(unsigned long start, unsigned long end, void *arg)
954 /* check max_low_pfn */
955 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
957 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
958 end = (max_low_pfn + 1) << PAGE_SHIFT;
960 free_bootmem(start, end - start);
965 * Register fully available low RAM pages with the bootmem allocator.
967 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
972 efi_memmap_walk(free_available_memory, NULL);
975 for (i = 0; i < e820.nr_map; i++) {
976 unsigned long curr_pfn, last_pfn, size;
978 * Reserve usable low memory
980 if (e820.map[i].type != E820_RAM)
983 * We are rounding up the start address of usable memory:
985 curr_pfn = PFN_UP(e820.map[i].addr);
986 if (curr_pfn >= max_low_pfn)
989 * ... and at the end of the usable range downwards:
991 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
993 if (last_pfn > max_low_pfn)
994 last_pfn = max_low_pfn;
997 * .. finally, did all the rounding and playing
998 * around just make the area go away?
1000 if (last_pfn <= curr_pfn)
1003 size = last_pfn - curr_pfn;
1004 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1009 * workaround for Dell systems that neglect to reserve EBDA
1011 static void __init reserve_ebda_region(void)
1014 addr = get_bios_ebda();
1016 reserve_bootmem(addr, PAGE_SIZE);
1019 static unsigned long __init setup_memory(void)
1021 unsigned long bootmap_size, start_pfn, max_low_pfn;
1024 * partially used pages are not usable - thus
1025 * we are rounding upwards:
1027 start_pfn = PFN_UP(init_pg_tables_end);
1031 max_low_pfn = find_max_low_pfn();
1033 #ifdef CONFIG_HIGHMEM
1034 highstart_pfn = highend_pfn = max_pfn;
1035 if (max_pfn > max_low_pfn) {
1036 highstart_pfn = max_low_pfn;
1038 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1039 pages_to_mb(highend_pfn - highstart_pfn));
1041 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1042 pages_to_mb(max_low_pfn));
1044 * Initialize the boot-time allocator (with low memory only):
1046 bootmap_size = init_bootmem(start_pfn, max_low_pfn);
1048 register_bootmem_low_pages(max_low_pfn);
1051 * Reserve the bootmem bitmap itself as well. We do this in two
1052 * steps (first step was init_bootmem()) because this catches
1053 * the (very unlikely) case of us accidentally initializing the
1054 * bootmem allocator with an invalid RAM area.
1056 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
1057 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1060 * reserve physical page 0 - it's a special BIOS page on many boxes,
1061 * enabling clean reboots, SMP operation, laptop functions.
1063 reserve_bootmem(0, PAGE_SIZE);
1065 /* reserve EBDA region, it's a 4K region */
1066 reserve_ebda_region();
1068 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1069 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1070 unless you have no PS/2 mouse plugged in. */
1071 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1072 boot_cpu_data.x86 == 6)
1073 reserve_bootmem(0xa0000 - 4096, 4096);
1077 * But first pinch a few for the stack/trampoline stuff
1078 * FIXME: Don't need the extra page at 4K, but need to fix
1079 * trampoline before removing it. (see the GDT stuff)
1081 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1083 #ifdef CONFIG_ACPI_SLEEP
1085 * Reserve low memory region for sleep support.
1087 acpi_reserve_bootmem();
1089 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1091 * Find and reserve possible boot-time SMP configuration:
1096 #ifdef CONFIG_BLK_DEV_INITRD
1097 if (LOADER_TYPE && INITRD_START) {
1098 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1099 reserve_bootmem(INITRD_START, INITRD_SIZE);
1101 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1102 initrd_end = initrd_start+INITRD_SIZE;
1105 printk(KERN_ERR "initrd extends beyond end of memory "
1106 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1107 INITRD_START + INITRD_SIZE,
1108 max_low_pfn << PAGE_SHIFT);
1116 extern unsigned long setup_memory(void);
1117 #endif /* !CONFIG_DISCONTIGMEM */
1120 * Request address space for all standard RAM and ROM resources
1121 * and also for regions reported as reserved by the e820.
1124 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1129 for (i = 0; i < e820.nr_map; i++) {
1130 struct resource *res;
1131 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1133 res = alloc_bootmem_low(sizeof(struct resource));
1134 switch (e820.map[i].type) {
1135 case E820_RAM: res->name = "System RAM"; break;
1136 case E820_ACPI: res->name = "ACPI Tables"; break;
1137 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1138 default: res->name = "reserved";
1140 res->start = e820.map[i].addr;
1141 res->end = res->start + e820.map[i].size - 1;
1142 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1143 request_resource(&iomem_resource, res);
1144 if (e820.map[i].type == E820_RAM) {
1146 * We don't know which RAM region contains kernel data,
1147 * so we try it repeatedly and let the resource manager
1150 request_resource(res, code_resource);
1151 request_resource(res, data_resource);
1157 * Request address space for all standard resources
1159 static void __init register_memory(unsigned long max_low_pfn)
1161 unsigned long low_mem_size;
1165 efi_initialize_iomem_resources(&code_resource, &data_resource);
1167 legacy_init_iomem_resources(&code_resource, &data_resource);
1169 /* EFI systems may still have VGA */
1170 request_resource(&iomem_resource, &video_ram_resource);
1172 /* request I/O space for devices used on all i[345]86 PCs */
1173 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1174 request_resource(&ioport_resource, &standard_io_resources[i]);
1176 /* Tell the PCI layer not to allocate too close to the RAM area.. */
1177 low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
1178 if (low_mem_size > pci_mem_start)
1179 pci_mem_start = low_mem_size;
1182 /* Use inline assembly to define this because the nops are defined
1183 as inline assembly strings in the include files and we cannot
1184 get them easily into strings. */
1185 asm("\t.data\nintelnops: "
1186 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1187 GENERIC_NOP7 GENERIC_NOP8);
1188 asm("\t.data\nk8nops: "
1189 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1191 asm("\t.data\nk7nops: "
1192 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1195 extern unsigned char intelnops[], k8nops[], k7nops[];
1196 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1201 intelnops + 1 + 2 + 3,
1202 intelnops + 1 + 2 + 3 + 4,
1203 intelnops + 1 + 2 + 3 + 4 + 5,
1204 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1205 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1207 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1213 k8nops + 1 + 2 + 3 + 4,
1214 k8nops + 1 + 2 + 3 + 4 + 5,
1215 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1216 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1218 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1224 k7nops + 1 + 2 + 3 + 4,
1225 k7nops + 1 + 2 + 3 + 4 + 5,
1226 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1227 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1231 unsigned char **noptable;
1233 { X86_FEATURE_K8, k8_nops },
1234 { X86_FEATURE_K7, k7_nops },
1238 /* Replace instructions with better alternatives for this CPU type.
1240 This runs before SMP is initialized to avoid SMP problems with
1241 self modifying code. This implies that assymetric systems where
1242 APs have less capabilities than the boot processor are not handled.
1243 In this case boot with "noreplacement". */
1244 void apply_alternatives(void *start, void *end)
1246 struct alt_instr *a;
1248 unsigned char **noptable = intel_nops;
1249 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1250 if (boot_cpu_has(noptypes[i].cpuid)) {
1251 noptable = noptypes[i].noptable;
1255 for (a = start; (void *)a < end; a++) {
1256 if (!boot_cpu_has(a->cpuid))
1258 BUG_ON(a->replacementlen > a->instrlen);
1259 memcpy(a->instr, a->replacement, a->replacementlen);
1260 diff = a->instrlen - a->replacementlen;
1261 /* Pad the rest with nops */
1262 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1264 if (k > ASM_NOP_MAX)
1266 memcpy(a->instr + i, noptable[k], k);
1271 static int no_replacement __initdata = 0;
1273 void __init alternative_instructions(void)
1275 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1278 apply_alternatives(__alt_instructions, __alt_instructions_end);
1281 static int __init noreplacement_setup(char *s)
1287 __setup("noreplacement", noreplacement_setup);
1289 static char * __init machine_specific_memory_setup(void);
1292 * Determine if we were loaded by an EFI loader. If so, then we have also been
1293 * passed the efi memmap, systab, etc., so we should use these data structures
1294 * for initialization. Note, the efi init code path is determined by the
1295 * global efi_enabled. This allows the same kernel image to be used on existing
1296 * systems (with a traditional BIOS) as well as on EFI systems.
1298 void __init setup_arch(char **cmdline_p)
1300 unsigned long max_low_pfn;
1302 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1303 pre_setup_arch_hook();
1307 * FIXME: This isn't an official loader_type right
1308 * now but does currently work with elilo.
1309 * If we were configured as an EFI kernel, check to make
1310 * sure that we were loaded correctly from elilo and that
1311 * the system table is valid. If not, then initialize normally.
1314 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1318 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1319 drive_info = DRIVE_INFO;
1320 screen_info = SCREEN_INFO;
1321 edid_info = EDID_INFO;
1322 apm_info.bios = APM_BIOS_INFO;
1323 ist_info = IST_INFO;
1324 saved_videomode = VIDEO_MODE;
1325 if( SYS_DESC_TABLE.length != 0 ) {
1326 MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
1327 machine_id = SYS_DESC_TABLE.table[0];
1328 machine_submodel_id = SYS_DESC_TABLE.table[1];
1329 BIOS_revision = SYS_DESC_TABLE.table[2];
1331 aux_device_present = AUX_DEVICE_INFO;
1333 #ifdef CONFIG_BLK_DEV_RAM
1334 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1335 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1336 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1342 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1343 print_memory_map(machine_specific_memory_setup());
1348 if (!MOUNT_ROOT_RDONLY)
1349 root_mountflags &= ~MS_RDONLY;
1350 init_mm.start_code = (unsigned long) _text;
1351 init_mm.end_code = (unsigned long) _etext;
1352 init_mm.end_data = (unsigned long) _edata;
1353 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1355 code_resource.start = virt_to_phys(_text);
1356 code_resource.end = virt_to_phys(_etext)-1;
1357 data_resource.start = virt_to_phys(_etext);
1358 data_resource.end = virt_to_phys(_edata)-1;
1360 parse_cmdline_early(cmdline_p);
1362 max_low_pfn = setup_memory();
1365 * NOTE: before this point _nobody_ is allowed to allocate
1366 * any memory using the bootmem allocator. Although the
1367 * alloctor is now initialised only the first 8Mb of the kernel
1368 * virtual address space has been mapped. All allocations before
1369 * paging_init() has completed must use the alloc_bootmem_low_pages()
1370 * variant (which allocates DMA'able memory) and care must be taken
1371 * not to exceed the 8Mb limit.
1375 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1380 * NOTE: at this point the bootmem allocator is fully available.
1383 #ifdef CONFIG_EARLY_PRINTK
1385 char *s = strstr(*cmdline_p, "earlyprintk=");
1387 extern void setup_early_printk(char *);
1389 setup_early_printk(s);
1390 printk("early console enabled\n");
1398 #ifdef CONFIG_X86_GENERICARCH
1399 generic_apic_probe(*cmdline_p);
1405 * Parse the ACPI tables for possible boot-time SMP configuration.
1409 #ifdef CONFIG_X86_LOCAL_APIC
1410 if (smp_found_config)
1414 register_memory(max_low_pfn);
1417 #if defined(CONFIG_VGA_CONSOLE)
1418 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1419 conswitchp = &vga_con;
1420 #elif defined(CONFIG_DUMMY_CONSOLE)
1421 conswitchp = &dummy_con;
1426 #include "setup_arch_post.h"
1430 * c-file-style:"k&r"