2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/sched.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/root_dev.h>
37 #include <linux/highmem.h>
38 #include <linux/module.h>
39 #include <linux/efi.h>
40 #include <linux/init.h>
41 #include <linux/edd.h>
42 #include <video/edid.h>
44 #include <asm/mpspec.h>
45 #include <asm/setup.h>
46 #include <asm/arch_hooks.h>
47 #include <asm/sections.h>
48 #include <asm/io_apic.h>
51 #include "setup_arch_pre.h"
52 #include <bios_ebda.h>
54 /* This value is set up by the early boot code to point to the value
55 immediately after the boot time page tables. It contains a *physical*
56 address, and must not be in the .bss segment! */
57 unsigned long init_pg_tables_end __initdata = ~0UL;
59 int disable_pse __initdata = 0;
67 EXPORT_SYMBOL(efi_enabled);
70 /* cpu data as detected by the assembly code in head.S */
71 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
72 /* common cpu data for all cpus */
73 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
75 unsigned long mmu_cr4_features;
76 EXPORT_SYMBOL_GPL(mmu_cr4_features);
78 #ifdef CONFIG_ACPI_INTERPRETER
79 int acpi_disabled = 0;
81 int acpi_disabled = 1;
83 EXPORT_SYMBOL(acpi_disabled);
85 #ifdef CONFIG_ACPI_BOOT
86 int __initdata acpi_force = 0;
87 extern acpi_interrupt_flags acpi_sci_flags;
91 /* for MCA, but anyone else can use it if they want */
92 unsigned int machine_id;
93 unsigned int machine_submodel_id;
94 unsigned int BIOS_revision;
95 unsigned int mca_pentium_flag;
97 /* For PCI or other memory-mapped resources */
98 unsigned long pci_mem_start = 0x10000000;
100 /* user-defined highmem size */
101 static unsigned int highmem_pages = -1;
106 struct drive_info_struct { char dummy[32]; } drive_info;
107 struct screen_info screen_info;
108 struct apm_info apm_info;
109 struct sys_desc_table_struct {
110 unsigned short length;
111 unsigned char table[0];
113 struct edid_info edid_info;
114 struct ist_info ist_info;
117 unsigned char aux_device_present;
119 extern void early_cpu_init(void);
120 extern void dmi_scan_machine(void);
121 extern void generic_apic_probe(char *);
122 extern int root_mountflags;
124 unsigned long saved_videomode;
126 #define RAMDISK_IMAGE_START_MASK 0x07FF
127 #define RAMDISK_PROMPT_FLAG 0x8000
128 #define RAMDISK_LOAD_FLAG 0x4000
130 static char command_line[COMMAND_LINE_SIZE];
132 unsigned char __initdata boot_params[PARAM_SIZE];
134 static struct resource data_resource = {
135 .name = "Kernel data",
138 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
141 static struct resource code_resource = {
142 .name = "Kernel code",
145 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
148 static struct resource system_rom_resource = {
149 .name = "System ROM",
152 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
155 static struct resource extension_rom_resource = {
156 .name = "Extension ROM",
159 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
162 static struct resource adapter_rom_resources[] = { {
163 .name = "Adapter ROM",
166 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
168 .name = "Adapter ROM",
171 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
173 .name = "Adapter ROM",
176 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
178 .name = "Adapter ROM",
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183 .name = "Adapter ROM",
186 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
188 .name = "Adapter ROM",
191 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
194 #define ADAPTER_ROM_RESOURCES \
195 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
197 static struct resource video_rom_resource = {
201 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
204 static struct resource video_ram_resource = {
205 .name = "Video RAM area",
208 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
211 static struct resource standard_io_resources[] = { {
215 .flags = IORESOURCE_BUSY | IORESOURCE_IO
220 .flags = IORESOURCE_BUSY | IORESOURCE_IO
225 .flags = IORESOURCE_BUSY | IORESOURCE_IO
230 .flags = IORESOURCE_BUSY | IORESOURCE_IO
235 .flags = IORESOURCE_BUSY | IORESOURCE_IO
237 .name = "dma page reg",
240 .flags = IORESOURCE_BUSY | IORESOURCE_IO
245 .flags = IORESOURCE_BUSY | IORESOURCE_IO
250 .flags = IORESOURCE_BUSY | IORESOURCE_IO
255 .flags = IORESOURCE_BUSY | IORESOURCE_IO
258 #define STANDARD_IO_RESOURCES \
259 (sizeof standard_io_resources / sizeof standard_io_resources[0])
261 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
263 static int __init romchecksum(unsigned char *rom, unsigned long length)
265 unsigned char *p, sum = 0;
267 for (p = rom; p < rom + length; p++)
272 static void __init probe_roms(void)
274 unsigned long start, length, upper;
279 upper = adapter_rom_resources[0].start;
280 for (start = video_rom_resource.start; start < upper; start += 2048) {
281 rom = isa_bus_to_virt(start);
282 if (!romsignature(rom))
285 video_rom_resource.start = start;
287 /* 0 < length <= 0x7f * 512, historically */
288 length = rom[2] * 512;
290 /* if checksum okay, trust length byte */
291 if (length && romchecksum(rom, length))
292 video_rom_resource.end = start + length - 1;
294 request_resource(&iomem_resource, &video_rom_resource);
298 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
303 request_resource(&iomem_resource, &system_rom_resource);
304 upper = system_rom_resource.start;
306 /* check for extension rom (ignore length byte!) */
307 rom = isa_bus_to_virt(extension_rom_resource.start);
308 if (romsignature(rom)) {
309 length = extension_rom_resource.end - extension_rom_resource.start + 1;
310 if (romchecksum(rom, length)) {
311 request_resource(&iomem_resource, &extension_rom_resource);
312 upper = extension_rom_resource.start;
316 /* check for adapter roms on 2k boundaries */
317 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
318 rom = isa_bus_to_virt(start);
319 if (!romsignature(rom))
322 /* 0 < length <= 0x7f * 512, historically */
323 length = rom[2] * 512;
325 /* but accept any length that fits if checksum okay */
326 if (!length || start + length > upper || !romchecksum(rom, length))
329 adapter_rom_resources[i].start = start;
330 adapter_rom_resources[i].end = start + length - 1;
331 request_resource(&iomem_resource, &adapter_rom_resources[i]);
333 start = adapter_rom_resources[i++].end & ~2047UL;
337 static void __init limit_regions(unsigned long long size)
339 unsigned long long current_addr = 0;
343 for (i = 0; i < memmap.nr_map; i++) {
344 current_addr = memmap.map[i].phys_addr +
345 (memmap.map[i].num_pages << 12);
346 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
347 if (current_addr >= size) {
348 memmap.map[i].num_pages -=
349 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
350 memmap.nr_map = i + 1;
356 for (i = 0; i < e820.nr_map; i++) {
357 if (e820.map[i].type == E820_RAM) {
358 current_addr = e820.map[i].addr + e820.map[i].size;
359 if (current_addr >= size) {
360 e820.map[i].size -= current_addr-size;
368 static void __init add_memory_region(unsigned long long start,
369 unsigned long long size, int type)
377 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
381 e820.map[x].addr = start;
382 e820.map[x].size = size;
383 e820.map[x].type = type;
386 } /* add_memory_region */
390 static void __init print_memory_map(char *who)
394 for (i = 0; i < e820.nr_map; i++) {
395 printk(" %s: %016Lx - %016Lx ", who,
397 e820.map[i].addr + e820.map[i].size);
398 switch (e820.map[i].type) {
399 case E820_RAM: printk("(usable)\n");
402 printk("(reserved)\n");
405 printk("(ACPI data)\n");
408 printk("(ACPI NVS)\n");
410 default: printk("type %lu\n", e820.map[i].type);
417 * Sanitize the BIOS e820 map.
419 * Some e820 responses include overlapping entries. The following
420 * replaces the original e820 map with a new one, removing overlaps.
423 struct change_member {
424 struct e820entry *pbios; /* pointer to original bios entry */
425 unsigned long long addr; /* address for this change point */
427 struct change_member change_point_list[2*E820MAX] __initdata;
428 struct change_member *change_point[2*E820MAX] __initdata;
429 struct e820entry *overlap_list[E820MAX] __initdata;
430 struct e820entry new_bios[E820MAX] __initdata;
432 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
434 struct change_member *change_tmp;
435 unsigned long current_type, last_type;
436 unsigned long long last_addr;
437 int chgidx, still_changing;
440 int old_nr, new_nr, chg_nr;
444 Visually we're performing the following (1,2,3,4 = memory types)...
446 Sample memory map (w/overlaps):
447 ____22__________________
448 ______________________4_
449 ____1111________________
450 _44_____________________
451 11111111________________
452 ____________________33__
453 ___________44___________
454 __________33333_________
455 ______________22________
456 ___________________2222_
457 _________111111111______
458 _____________________11_
459 _________________4______
461 Sanitized equivalent (no overlap):
462 1_______________________
463 _44_____________________
464 ___1____________________
465 ____22__________________
466 ______11________________
467 _________1______________
468 __________3_____________
469 ___________44___________
470 _____________33_________
471 _______________2________
472 ________________1_______
473 _________________4______
474 ___________________2____
475 ____________________33__
476 ______________________4_
479 /* if there's only one memory region, don't bother */
485 /* bail out if we find any unreasonable addresses in bios map */
486 for (i=0; i<old_nr; i++)
487 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
490 /* create pointers for initial change-point information (for sorting) */
491 for (i=0; i < 2*old_nr; i++)
492 change_point[i] = &change_point_list[i];
494 /* record all known change-points (starting and ending addresses),
495 omitting those that are for empty memory regions */
497 for (i=0; i < old_nr; i++) {
498 if (biosmap[i].size != 0) {
499 change_point[chgidx]->addr = biosmap[i].addr;
500 change_point[chgidx++]->pbios = &biosmap[i];
501 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
502 change_point[chgidx++]->pbios = &biosmap[i];
505 chg_nr = chgidx; /* true number of change-points */
507 /* sort change-point list by memory addresses (low -> high) */
509 while (still_changing) {
511 for (i=1; i < chg_nr; i++) {
512 /* if <current_addr> > <last_addr>, swap */
513 /* or, if current=<start_addr> & last=<end_addr>, swap */
514 if ((change_point[i]->addr < change_point[i-1]->addr) ||
515 ((change_point[i]->addr == change_point[i-1]->addr) &&
516 (change_point[i]->addr == change_point[i]->pbios->addr) &&
517 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
520 change_tmp = change_point[i];
521 change_point[i] = change_point[i-1];
522 change_point[i-1] = change_tmp;
528 /* create a new bios memory map, removing overlaps */
529 overlap_entries=0; /* number of entries in the overlap table */
530 new_bios_entry=0; /* index for creating new bios map entries */
531 last_type = 0; /* start with undefined memory type */
532 last_addr = 0; /* start with 0 as last starting address */
533 /* loop through change-points, determining affect on the new bios map */
534 for (chgidx=0; chgidx < chg_nr; chgidx++)
536 /* keep track of all overlapping bios entries */
537 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
539 /* add map entry to overlap list (> 1 entry implies an overlap) */
540 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
544 /* remove entry from list (order independent, so swap with last) */
545 for (i=0; i<overlap_entries; i++)
547 if (overlap_list[i] == change_point[chgidx]->pbios)
548 overlap_list[i] = overlap_list[overlap_entries-1];
552 /* if there are overlapping entries, decide which "type" to use */
553 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
555 for (i=0; i<overlap_entries; i++)
556 if (overlap_list[i]->type > current_type)
557 current_type = overlap_list[i]->type;
558 /* continue building up new bios map based on this information */
559 if (current_type != last_type) {
560 if (last_type != 0) {
561 new_bios[new_bios_entry].size =
562 change_point[chgidx]->addr - last_addr;
563 /* move forward only if the new size was non-zero */
564 if (new_bios[new_bios_entry].size != 0)
565 if (++new_bios_entry >= E820MAX)
566 break; /* no more space left for new bios entries */
568 if (current_type != 0) {
569 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
570 new_bios[new_bios_entry].type = current_type;
571 last_addr=change_point[chgidx]->addr;
573 last_type = current_type;
576 new_nr = new_bios_entry; /* retain count for new bios entries */
578 /* copy new bios mapping into original location */
579 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
586 * Copy the BIOS e820 map into a safe place.
588 * Sanity-check it while we're at it..
590 * If we're lucky and live on a modern system, the setup code
591 * will have given us a memory map that we can use to properly
592 * set up memory. If we aren't, we'll fake a memory map.
594 * We check to see that the memory map contains at least 2 elements
595 * before we'll use it, because the detection code in setup.S may
596 * not be perfect and most every PC known to man has two memory
597 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
598 * thinkpad 560x, for example, does not cooperate with the memory
601 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
603 /* Only one memory region (or negative)? Ignore it */
608 unsigned long long start = biosmap->addr;
609 unsigned long long size = biosmap->size;
610 unsigned long long end = start + size;
611 unsigned long type = biosmap->type;
613 /* Overflow in 64 bits? Ignore the memory map. */
618 * Some BIOSes claim RAM in the 640k - 1M region.
619 * Not right. Fix it up.
621 if (type == E820_RAM) {
622 if (start < 0x100000ULL && end > 0xA0000ULL) {
623 if (start < 0xA0000ULL)
624 add_memory_region(start, 0xA0000ULL-start, type);
625 if (end <= 0x100000ULL)
631 add_memory_region(start, size, type);
632 } while (biosmap++,--nr_map);
636 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
638 #ifdef CONFIG_EDD_MODULE
642 * copy_edd() - Copy the BIOS EDD information
643 * from boot_params into a safe place.
646 static inline void copy_edd(void)
648 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
649 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
650 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
651 edd.edd_info_nr = EDD_NR;
654 static inline void copy_edd(void)
660 * Do NOT EVER look at the BIOS memory size location.
661 * It does not work on many machines.
663 #define LOWMEMSIZE() (0x9f000)
665 unsigned long crashdump_addr = 0xdeadbeef;
667 static void __init parse_cmdline_early (char ** cmdline_p)
669 char c = ' ', *to = command_line, *from = saved_command_line;
673 /* Save unparsed command line copy for /proc/cmdline */
674 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
678 * "mem=nopentium" disables the 4MB page tables.
679 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
680 * to <mem>, overriding the bios size.
681 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
682 * <start> to <start>+<mem>, overriding the bios size.
684 * HPA tells me bootloaders need to parse mem=, so no new
685 * option should be mem= [also see Documentation/i386/boot.txt]
687 if (c == ' ' && !memcmp(from, "mem=", 4)) {
688 if (to != command_line)
690 if (!memcmp(from+4, "nopentium", 9)) {
692 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
695 /* If the user specifies memory size, we
696 * limit the BIOS-provided memory map to
697 * that size. exactmap can be used to specify
698 * the exact map. mem=number can be used to
699 * trim the existing memory map.
701 unsigned long long mem_size;
703 mem_size = memparse(from+4, &from);
704 limit_regions(mem_size);
709 if (c == ' ' && !memcmp(from, "memmap=", 7)) {
710 if (to != command_line)
712 if (!memcmp(from+7, "exactmap", 8)) {
717 /* If the user specifies memory size, we
718 * limit the BIOS-provided memory map to
719 * that size. exactmap can be used to specify
720 * the exact map. mem=number can be used to
721 * trim the existing memory map.
723 unsigned long long start_at, mem_size;
725 mem_size = memparse(from+7, &from);
727 start_at = memparse(from+1, &from);
728 add_memory_region(start_at, mem_size, E820_RAM);
729 } else if (*from == '#') {
730 start_at = memparse(from+1, &from);
731 add_memory_region(start_at, mem_size, E820_ACPI);
732 } else if (*from == '$') {
733 start_at = memparse(from+1, &from);
734 add_memory_region(start_at, mem_size, E820_RESERVED);
736 limit_regions(mem_size);
742 #ifdef CONFIG_X86_SMP
744 * If the BIOS enumerates physical processors before logical,
745 * maxcpus=N at enumeration-time can be used to disable HT.
747 else if (!memcmp(from, "maxcpus=", 8)) {
748 extern unsigned int maxcpus;
750 maxcpus = simple_strtoul(from + 8, NULL, 0);
754 #ifdef CONFIG_ACPI_BOOT
755 /* "acpi=off" disables both ACPI table parsing and interpreter */
756 else if (!memcmp(from, "acpi=off", 8)) {
760 /* acpi=force to over-ride black-list */
761 else if (!memcmp(from, "acpi=force", 10)) {
767 /* acpi=strict disables out-of-spec workarounds */
768 else if (!memcmp(from, "acpi=strict", 11)) {
772 /* Limit ACPI just to boot-time to enable HT */
773 else if (!memcmp(from, "acpi=ht", 7)) {
779 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
780 else if (!memcmp(from, "pci=noacpi", 10)) {
783 /* "acpi=noirq" disables ACPI interrupt routing */
784 else if (!memcmp(from, "acpi=noirq", 10)) {
788 else if (!memcmp(from, "acpi_sci=edge", 13))
789 acpi_sci_flags.trigger = 1;
791 else if (!memcmp(from, "acpi_sci=level", 14))
792 acpi_sci_flags.trigger = 3;
794 else if (!memcmp(from, "acpi_sci=high", 13))
795 acpi_sci_flags.polarity = 1;
797 else if (!memcmp(from, "acpi_sci=low", 12))
798 acpi_sci_flags.polarity = 3;
800 #ifdef CONFIG_X86_IO_APIC
801 else if (!memcmp(from, "acpi_skip_timer_override", 24))
802 acpi_skip_timer_override = 1;
805 #ifdef CONFIG_X86_LOCAL_APIC
806 /* disable IO-APIC */
807 else if (!memcmp(from, "noapic", 6))
808 disable_ioapic_setup();
809 #endif /* CONFIG_X86_LOCAL_APIC */
810 #endif /* CONFIG_ACPI_BOOT */
813 * highmem=size forces highmem to be exactly 'size' bytes.
814 * This works even on boxes that have no highmem otherwise.
815 * This also works to reduce highmem size on bigger boxes.
817 if (c == ' ' && !memcmp(from, "highmem=", 8))
818 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
820 if (c == ' ' && !memcmp(from, "crashdump=", 10))
821 crashdump_addr = memparse(from+10, &from);
824 * vmalloc=size forces the vmalloc area to be exactly 'size'
825 * bytes. This can be used to increase (or decrease) the
826 * vmalloc area - the default is 128m.
828 if (c == ' ' && !memcmp(from, "vmalloc=", 8))
829 __VMALLOC_RESERVE = memparse(from+8, &from);
834 if (COMMAND_LINE_SIZE <= ++len)
839 *cmdline_p = command_line;
841 printk(KERN_INFO "user-defined physical RAM map:\n");
842 print_memory_map("user");
847 * Callback for efi_memory_walk.
850 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
852 unsigned long *max_pfn = arg, pfn;
855 pfn = PFN_UP(end -1);
864 * Find the highest page frame number we have available
866 void __init find_max_pfn(void)
872 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
876 for (i = 0; i < e820.nr_map; i++) {
877 unsigned long start, end;
879 if (e820.map[i].type != E820_RAM)
881 start = PFN_UP(e820.map[i].addr);
882 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
891 * Determine low and high memory ranges:
893 unsigned long __init find_max_low_pfn(void)
895 unsigned long max_low_pfn;
897 max_low_pfn = max_pfn;
898 if (max_low_pfn > MAXMEM_PFN) {
899 if (highmem_pages == -1)
900 highmem_pages = max_pfn - MAXMEM_PFN;
901 if (highmem_pages + MAXMEM_PFN < max_pfn)
902 max_pfn = MAXMEM_PFN + highmem_pages;
903 if (highmem_pages + MAXMEM_PFN > max_pfn) {
904 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
907 max_low_pfn = MAXMEM_PFN;
908 #ifndef CONFIG_HIGHMEM
909 /* Maximum memory usable is what is directly addressable */
910 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
912 if (max_pfn > MAX_NONPAE_PFN)
913 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
915 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
916 max_pfn = MAXMEM_PFN;
917 #else /* !CONFIG_HIGHMEM */
918 #ifndef CONFIG_X86_PAE
919 if (max_pfn > MAX_NONPAE_PFN) {
920 max_pfn = MAX_NONPAE_PFN;
921 printk(KERN_WARNING "Warning only 4GB will be used.\n");
922 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
924 #endif /* !CONFIG_X86_PAE */
925 #endif /* !CONFIG_HIGHMEM */
927 if (highmem_pages == -1)
929 #ifdef CONFIG_HIGHMEM
930 if (highmem_pages >= max_pfn) {
931 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
935 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
936 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
939 max_low_pfn -= highmem_pages;
943 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
949 #ifndef CONFIG_DISCONTIGMEM
952 * Free all available memory for boot time allocation. Used
953 * as a callback function by efi_memory_walk()
957 free_available_memory(unsigned long start, unsigned long end, void *arg)
959 /* check max_low_pfn */
960 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
962 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
963 end = (max_low_pfn + 1) << PAGE_SHIFT;
965 free_bootmem(start, end - start);
970 * Register fully available low RAM pages with the bootmem allocator.
972 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
977 efi_memmap_walk(free_available_memory, NULL);
980 for (i = 0; i < e820.nr_map; i++) {
981 unsigned long curr_pfn, last_pfn, size;
983 * Reserve usable low memory
985 if (e820.map[i].type != E820_RAM)
988 * We are rounding up the start address of usable memory:
990 curr_pfn = PFN_UP(e820.map[i].addr);
991 if (curr_pfn >= max_low_pfn)
994 * ... and at the end of the usable range downwards:
996 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
998 if (last_pfn > max_low_pfn)
999 last_pfn = max_low_pfn;
1002 * .. finally, did all the rounding and playing
1003 * around just make the area go away?
1005 if (last_pfn <= curr_pfn)
1008 size = last_pfn - curr_pfn;
1009 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1014 * workaround for Dell systems that neglect to reserve EBDA
1016 static void __init reserve_ebda_region(void)
1019 addr = get_bios_ebda();
1021 reserve_bootmem(addr, PAGE_SIZE);
1024 static unsigned long __init setup_memory(void)
1026 unsigned long bootmap_size, start_pfn, max_low_pfn;
1029 * partially used pages are not usable - thus
1030 * we are rounding upwards:
1032 start_pfn = PFN_UP(init_pg_tables_end);
1036 max_low_pfn = find_max_low_pfn();
1038 #ifdef CONFIG_HIGHMEM
1039 highstart_pfn = highend_pfn = max_pfn;
1040 if (max_pfn > max_low_pfn) {
1041 highstart_pfn = max_low_pfn;
1043 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1044 pages_to_mb(highend_pfn - highstart_pfn));
1046 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1047 pages_to_mb(max_low_pfn));
1049 * Initialize the boot-time allocator (with low memory only):
1051 bootmap_size = init_bootmem(start_pfn, max_low_pfn);
1053 register_bootmem_low_pages(max_low_pfn);
1056 * Reserve the bootmem bitmap itself as well. We do this in two
1057 * steps (first step was init_bootmem()) because this catches
1058 * the (very unlikely) case of us accidentally initializing the
1059 * bootmem allocator with an invalid RAM area.
1061 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
1062 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1065 * reserve physical page 0 - it's a special BIOS page on many boxes,
1066 * enabling clean reboots, SMP operation, laptop functions.
1068 reserve_bootmem(0, PAGE_SIZE);
1070 /* reserve EBDA region, it's a 4K region */
1071 reserve_ebda_region();
1073 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1074 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1075 unless you have no PS/2 mouse plugged in. */
1076 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1077 boot_cpu_data.x86 == 6)
1078 reserve_bootmem(0xa0000 - 4096, 4096);
1082 * But first pinch a few for the stack/trampoline stuff
1083 * FIXME: Don't need the extra page at 4K, but need to fix
1084 * trampoline before removing it. (see the GDT stuff)
1086 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1088 #ifdef CONFIG_ACPI_SLEEP
1090 * Reserve low memory region for sleep support.
1092 acpi_reserve_bootmem();
1094 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1096 * Find and reserve possible boot-time SMP configuration:
1101 #ifdef CONFIG_BLK_DEV_INITRD
1102 if (LOADER_TYPE && INITRD_START) {
1103 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1104 reserve_bootmem(INITRD_START, INITRD_SIZE);
1106 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1107 initrd_end = initrd_start+INITRD_SIZE;
1110 printk(KERN_ERR "initrd extends beyond end of memory "
1111 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1112 INITRD_START + INITRD_SIZE,
1113 max_low_pfn << PAGE_SHIFT);
1121 extern unsigned long setup_memory(void);
1122 #endif /* !CONFIG_DISCONTIGMEM */
1125 * Request address space for all standard RAM and ROM resources
1126 * and also for regions reported as reserved by the e820.
1129 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1134 for (i = 0; i < e820.nr_map; i++) {
1135 struct resource *res;
1136 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1138 res = alloc_bootmem_low(sizeof(struct resource));
1139 switch (e820.map[i].type) {
1140 case E820_RAM: res->name = "System RAM"; break;
1141 case E820_ACPI: res->name = "ACPI Tables"; break;
1142 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1143 default: res->name = "reserved";
1145 res->start = e820.map[i].addr;
1146 res->end = res->start + e820.map[i].size - 1;
1147 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1148 request_resource(&iomem_resource, res);
1149 if (e820.map[i].type == E820_RAM) {
1151 * We don't know which RAM region contains kernel data,
1152 * so we try it repeatedly and let the resource manager
1155 request_resource(res, code_resource);
1156 request_resource(res, data_resource);
1162 * Request address space for all standard resources
1164 static void __init register_memory(unsigned long max_low_pfn)
1166 unsigned long low_mem_size;
1170 efi_initialize_iomem_resources(&code_resource, &data_resource);
1172 legacy_init_iomem_resources(&code_resource, &data_resource);
1174 /* EFI systems may still have VGA */
1175 request_resource(&iomem_resource, &video_ram_resource);
1177 /* request I/O space for devices used on all i[345]86 PCs */
1178 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1179 request_resource(&ioport_resource, &standard_io_resources[i]);
1181 /* Tell the PCI layer not to allocate too close to the RAM area.. */
1182 low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
1183 if (low_mem_size > pci_mem_start)
1184 pci_mem_start = low_mem_size;
1187 /* Use inline assembly to define this because the nops are defined
1188 as inline assembly strings in the include files and we cannot
1189 get them easily into strings. */
1190 asm("\t.data\nintelnops: "
1191 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1192 GENERIC_NOP7 GENERIC_NOP8);
1193 asm("\t.data\nk8nops: "
1194 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1196 asm("\t.data\nk7nops: "
1197 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1200 extern unsigned char intelnops[], k8nops[], k7nops[];
1201 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1206 intelnops + 1 + 2 + 3,
1207 intelnops + 1 + 2 + 3 + 4,
1208 intelnops + 1 + 2 + 3 + 4 + 5,
1209 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1210 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1212 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1218 k8nops + 1 + 2 + 3 + 4,
1219 k8nops + 1 + 2 + 3 + 4 + 5,
1220 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1221 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1223 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1229 k7nops + 1 + 2 + 3 + 4,
1230 k7nops + 1 + 2 + 3 + 4 + 5,
1231 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1232 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1236 unsigned char **noptable;
1238 { X86_FEATURE_K8, k8_nops },
1239 { X86_FEATURE_K7, k7_nops },
1243 /* Replace instructions with better alternatives for this CPU type.
1245 This runs before SMP is initialized to avoid SMP problems with
1246 self modifying code. This implies that assymetric systems where
1247 APs have less capabilities than the boot processor are not handled.
1248 In this case boot with "noreplacement". */
1249 void apply_alternatives(void *start, void *end)
1251 struct alt_instr *a;
1253 unsigned char **noptable = intel_nops;
1254 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1255 if (boot_cpu_has(noptypes[i].cpuid)) {
1256 noptable = noptypes[i].noptable;
1260 for (a = start; (void *)a < end; a++) {
1261 if (!boot_cpu_has(a->cpuid))
1263 BUG_ON(a->replacementlen > a->instrlen);
1264 memcpy(a->instr, a->replacement, a->replacementlen);
1265 diff = a->instrlen - a->replacementlen;
1266 /* Pad the rest with nops */
1267 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1269 if (k > ASM_NOP_MAX)
1271 memcpy(a->instr + i, noptable[k], k);
1276 static int no_replacement __initdata = 0;
1278 void __init alternative_instructions(void)
1280 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1283 apply_alternatives(__alt_instructions, __alt_instructions_end);
1286 static int __init noreplacement_setup(char *s)
1292 __setup("noreplacement", noreplacement_setup);
1294 static char * __init machine_specific_memory_setup(void);
1296 #ifdef CONFIG_CRASH_DUMP_SOFTBOOT
1297 extern void crashdump_reserve(void);
1301 * Determine if we were loaded by an EFI loader. If so, then we have also been
1302 * passed the efi memmap, systab, etc., so we should use these data structures
1303 * for initialization. Note, the efi init code path is determined by the
1304 * global efi_enabled. This allows the same kernel image to be used on existing
1305 * systems (with a traditional BIOS) as well as on EFI systems.
1307 void __init setup_arch(char **cmdline_p)
1309 unsigned long max_low_pfn;
1311 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1312 pre_setup_arch_hook();
1316 * FIXME: This isn't an official loader_type right
1317 * now but does currently work with elilo.
1318 * If we were configured as an EFI kernel, check to make
1319 * sure that we were loaded correctly from elilo and that
1320 * the system table is valid. If not, then initialize normally.
1323 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1327 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1328 drive_info = DRIVE_INFO;
1329 screen_info = SCREEN_INFO;
1330 edid_info = EDID_INFO;
1331 apm_info.bios = APM_BIOS_INFO;
1332 ist_info = IST_INFO;
1333 saved_videomode = VIDEO_MODE;
1334 if( SYS_DESC_TABLE.length != 0 ) {
1335 MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
1336 machine_id = SYS_DESC_TABLE.table[0];
1337 machine_submodel_id = SYS_DESC_TABLE.table[1];
1338 BIOS_revision = SYS_DESC_TABLE.table[2];
1340 aux_device_present = AUX_DEVICE_INFO;
1342 #ifdef CONFIG_BLK_DEV_RAM
1343 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1344 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1345 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1351 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1352 print_memory_map(machine_specific_memory_setup());
1357 if (!MOUNT_ROOT_RDONLY)
1358 root_mountflags &= ~MS_RDONLY;
1359 init_mm.start_code = (unsigned long) _text;
1360 init_mm.end_code = (unsigned long) _etext;
1361 init_mm.end_data = (unsigned long) _edata;
1362 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1364 code_resource.start = virt_to_phys(_text);
1365 code_resource.end = virt_to_phys(_etext)-1;
1366 data_resource.start = virt_to_phys(_etext);
1367 data_resource.end = virt_to_phys(_edata)-1;
1369 parse_cmdline_early(cmdline_p);
1371 max_low_pfn = setup_memory();
1374 * NOTE: before this point _nobody_ is allowed to allocate
1375 * any memory using the bootmem allocator. Although the
1376 * alloctor is now initialised only the first 8Mb of the kernel
1377 * virtual address space has been mapped. All allocations before
1378 * paging_init() has completed must use the alloc_bootmem_low_pages()
1379 * variant (which allocates DMA'able memory) and care must be taken
1380 * not to exceed the 8Mb limit.
1384 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1389 * NOTE: at this point the bootmem allocator is fully available.
1392 #ifdef CONFIG_EARLY_PRINTK
1394 char *s = strstr(*cmdline_p, "earlyprintk=");
1396 extern void setup_early_printk(char *);
1398 setup_early_printk(s);
1399 printk("early console enabled\n");
1405 #ifdef CONFIG_CRASH_DUMP_SOFTBOOT
1406 crashdump_reserve(); /* Preserve crash dump state from prev boot */
1411 #ifdef CONFIG_X86_GENERICARCH
1412 generic_apic_probe(*cmdline_p);
1418 * Parse the ACPI tables for possible boot-time SMP configuration.
1422 #ifdef CONFIG_X86_LOCAL_APIC
1423 if (smp_found_config)
1427 register_memory(max_low_pfn);
1430 #if defined(CONFIG_VGA_CONSOLE)
1431 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1432 conswitchp = &vga_con;
1433 #elif defined(CONFIG_DUMMY_CONSOLE)
1434 conswitchp = &dummy_con;
1439 #include "setup_arch_post.h"
1443 * c-file-style:"k&r"