2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/sched.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/mca.h>
37 #include <linux/root_dev.h>
38 #include <linux/highmem.h>
39 #include <linux/module.h>
40 #include <linux/efi.h>
41 #include <linux/init.h>
42 #include <linux/edd.h>
43 #include <linux/percpu.h>
44 #include <linux/notifier.h>
45 #include <video/edid.h>
47 #include <asm/mpspec.h>
48 #include <asm/setup.h>
49 #include <asm/arch_hooks.h>
50 #include <asm/sections.h>
51 #include <asm/io_apic.h>
54 #include <asm-xen/hypervisor.h>
55 #include <asm-xen/xen-public/physdev.h>
56 #include "setup_arch_pre.h"
57 #include <bios_ebda.h>
59 /* Allows setting of maximum possible memory size */
60 static unsigned long xen_override_max_pfn;
62 extern struct notifier_block *panic_notifier_list;
63 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
64 static struct notifier_block xen_panic_block = {
67 0 /* try to go last */
71 int disable_pse __initdata = 0;
79 EXPORT_SYMBOL(efi_enabled);
82 /* cpu data as detected by the assembly code in head.S */
83 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
84 /* common cpu data for all cpus */
85 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
87 unsigned long mmu_cr4_features;
88 EXPORT_SYMBOL_GPL(mmu_cr4_features);
90 #ifdef CONFIG_ACPI_INTERPRETER
91 int acpi_disabled = 0;
93 int acpi_disabled = 1;
95 EXPORT_SYMBOL(acpi_disabled);
97 #ifdef CONFIG_ACPI_BOOT
98 int __initdata acpi_force = 0;
99 extern acpi_interrupt_flags acpi_sci_flags;
102 /* for MCA, but anyone else can use it if they want */
103 unsigned int machine_id;
104 unsigned int machine_submodel_id;
105 unsigned int BIOS_revision;
106 unsigned int mca_pentium_flag;
108 /* For PCI or other memory-mapped resources */
109 unsigned long pci_mem_start = 0x10000000;
111 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
114 /* user-defined highmem size */
115 static unsigned int highmem_pages = -1;
120 struct drive_info_struct { char dummy[32]; } drive_info;
121 struct screen_info screen_info;
122 struct apm_info apm_info;
123 struct sys_desc_table_struct {
124 unsigned short length;
125 unsigned char table[0];
127 struct edid_info edid_info;
128 struct ist_info ist_info;
131 unsigned char aux_device_present;
133 extern void early_cpu_init(void);
134 extern void dmi_scan_machine(void);
135 extern void generic_apic_probe(char *);
136 extern int root_mountflags;
138 unsigned long saved_videomode;
140 #define RAMDISK_IMAGE_START_MASK 0x07FF
141 #define RAMDISK_PROMPT_FLAG 0x8000
142 #define RAMDISK_LOAD_FLAG 0x4000
144 static char command_line[COMMAND_LINE_SIZE];
146 unsigned char __initdata boot_params[PARAM_SIZE];
148 static struct resource data_resource = {
149 .name = "Kernel data",
152 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
155 static struct resource code_resource = {
156 .name = "Kernel code",
159 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
162 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
163 static struct resource system_rom_resource = {
164 .name = "System ROM",
167 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
170 static struct resource extension_rom_resource = {
171 .name = "Extension ROM",
174 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
177 static struct resource adapter_rom_resources[] = { {
178 .name = "Adapter ROM",
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183 .name = "Adapter ROM",
186 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
188 .name = "Adapter ROM",
191 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
193 .name = "Adapter ROM",
196 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
198 .name = "Adapter ROM",
201 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
203 .name = "Adapter ROM",
206 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
209 #define ADAPTER_ROM_RESOURCES \
210 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
212 static struct resource video_rom_resource = {
216 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
220 static struct resource video_ram_resource = {
221 .name = "Video RAM area",
224 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
227 static struct resource standard_io_resources[] = { {
231 .flags = IORESOURCE_BUSY | IORESOURCE_IO
236 .flags = IORESOURCE_BUSY | IORESOURCE_IO
241 .flags = IORESOURCE_BUSY | IORESOURCE_IO
246 .flags = IORESOURCE_BUSY | IORESOURCE_IO
251 .flags = IORESOURCE_BUSY | IORESOURCE_IO
253 .name = "dma page reg",
256 .flags = IORESOURCE_BUSY | IORESOURCE_IO
261 .flags = IORESOURCE_BUSY | IORESOURCE_IO
266 .flags = IORESOURCE_BUSY | IORESOURCE_IO
271 .flags = IORESOURCE_BUSY | IORESOURCE_IO
274 #define STANDARD_IO_RESOURCES \
275 (sizeof standard_io_resources / sizeof standard_io_resources[0])
277 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
278 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
280 static int __init romchecksum(unsigned char *rom, unsigned long length)
282 unsigned char *p, sum = 0;
284 for (p = rom; p < rom + length; p++)
289 static void __init probe_roms(void)
291 unsigned long start, length, upper;
296 upper = adapter_rom_resources[0].start;
297 for (start = video_rom_resource.start; start < upper; start += 2048) {
298 rom = isa_bus_to_virt(start);
299 if (!romsignature(rom))
302 video_rom_resource.start = start;
304 /* 0 < length <= 0x7f * 512, historically */
305 length = rom[2] * 512;
307 /* if checksum okay, trust length byte */
308 if (length && romchecksum(rom, length))
309 video_rom_resource.end = start + length - 1;
311 request_resource(&iomem_resource, &video_rom_resource);
315 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
320 request_resource(&iomem_resource, &system_rom_resource);
321 upper = system_rom_resource.start;
323 /* check for extension rom (ignore length byte!) */
324 rom = isa_bus_to_virt(extension_rom_resource.start);
325 if (romsignature(rom)) {
326 length = extension_rom_resource.end - extension_rom_resource.start + 1;
327 if (romchecksum(rom, length)) {
328 request_resource(&iomem_resource, &extension_rom_resource);
329 upper = extension_rom_resource.start;
333 /* check for adapter roms on 2k boundaries */
334 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
335 rom = isa_bus_to_virt(start);
336 if (!romsignature(rom))
339 /* 0 < length <= 0x7f * 512, historically */
340 length = rom[2] * 512;
342 /* but accept any length that fits if checksum okay */
343 if (!length || start + length > upper || !romchecksum(rom, length))
346 adapter_rom_resources[i].start = start;
347 adapter_rom_resources[i].end = start + length - 1;
348 request_resource(&iomem_resource, &adapter_rom_resources[i]);
350 start = adapter_rom_resources[i++].end & ~2047UL;
356 * Point at the empty zero page to start with. We map the real shared_info
357 * page as soon as fixmap is up and running.
359 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
360 EXPORT_SYMBOL(HYPERVISOR_shared_info);
362 unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
363 EXPORT_SYMBOL(phys_to_machine_mapping);
365 /* Raw start-of-day parameters from the hypervisor. */
366 union xen_start_info_union xen_start_info_union;
368 static void __init limit_regions(unsigned long long size)
370 unsigned long long current_addr = 0;
374 for (i = 0; i < memmap.nr_map; i++) {
375 current_addr = memmap.map[i].phys_addr +
376 (memmap.map[i].num_pages << 12);
377 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
378 if (current_addr >= size) {
379 memmap.map[i].num_pages -=
380 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
381 memmap.nr_map = i + 1;
387 for (i = 0; i < e820.nr_map; i++) {
388 if (e820.map[i].type == E820_RAM) {
389 current_addr = e820.map[i].addr + e820.map[i].size;
390 if (current_addr >= size) {
391 e820.map[i].size -= current_addr-size;
399 static void __init add_memory_region(unsigned long long start,
400 unsigned long long size, int type)
408 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
412 e820.map[x].addr = start;
413 e820.map[x].size = size;
414 e820.map[x].type = type;
417 } /* add_memory_region */
421 static void __init print_memory_map(char *who)
425 for (i = 0; i < e820.nr_map; i++) {
426 printk(" %s: %016Lx - %016Lx ", who,
428 e820.map[i].addr + e820.map[i].size);
429 switch (e820.map[i].type) {
430 case E820_RAM: printk("(usable)\n");
433 printk("(reserved)\n");
436 printk("(ACPI data)\n");
439 printk("(ACPI NVS)\n");
441 default: printk("type %lu\n", e820.map[i].type);
449 * Sanitize the BIOS e820 map.
451 * Some e820 responses include overlapping entries. The following
452 * replaces the original e820 map with a new one, removing overlaps.
455 struct change_member {
456 struct e820entry *pbios; /* pointer to original bios entry */
457 unsigned long long addr; /* address for this change point */
459 struct change_member change_point_list[2*E820MAX] __initdata;
460 struct change_member *change_point[2*E820MAX] __initdata;
461 struct e820entry *overlap_list[E820MAX] __initdata;
462 struct e820entry new_bios[E820MAX] __initdata;
464 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
466 struct change_member *change_tmp;
467 unsigned long current_type, last_type;
468 unsigned long long last_addr;
469 int chgidx, still_changing;
472 int old_nr, new_nr, chg_nr;
476 Visually we're performing the following (1,2,3,4 = memory types)...
478 Sample memory map (w/overlaps):
479 ____22__________________
480 ______________________4_
481 ____1111________________
482 _44_____________________
483 11111111________________
484 ____________________33__
485 ___________44___________
486 __________33333_________
487 ______________22________
488 ___________________2222_
489 _________111111111______
490 _____________________11_
491 _________________4______
493 Sanitized equivalent (no overlap):
494 1_______________________
495 _44_____________________
496 ___1____________________
497 ____22__________________
498 ______11________________
499 _________1______________
500 __________3_____________
501 ___________44___________
502 _____________33_________
503 _______________2________
504 ________________1_______
505 _________________4______
506 ___________________2____
507 ____________________33__
508 ______________________4_
511 /* if there's only one memory region, don't bother */
517 /* bail out if we find any unreasonable addresses in bios map */
518 for (i=0; i<old_nr; i++)
519 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
522 /* create pointers for initial change-point information (for sorting) */
523 for (i=0; i < 2*old_nr; i++)
524 change_point[i] = &change_point_list[i];
526 /* record all known change-points (starting and ending addresses),
527 omitting those that are for empty memory regions */
529 for (i=0; i < old_nr; i++) {
530 if (biosmap[i].size != 0) {
531 change_point[chgidx]->addr = biosmap[i].addr;
532 change_point[chgidx++]->pbios = &biosmap[i];
533 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
534 change_point[chgidx++]->pbios = &biosmap[i];
537 chg_nr = chgidx; /* true number of change-points */
539 /* sort change-point list by memory addresses (low -> high) */
541 while (still_changing) {
543 for (i=1; i < chg_nr; i++) {
544 /* if <current_addr> > <last_addr>, swap */
545 /* or, if current=<start_addr> & last=<end_addr>, swap */
546 if ((change_point[i]->addr < change_point[i-1]->addr) ||
547 ((change_point[i]->addr == change_point[i-1]->addr) &&
548 (change_point[i]->addr == change_point[i]->pbios->addr) &&
549 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
552 change_tmp = change_point[i];
553 change_point[i] = change_point[i-1];
554 change_point[i-1] = change_tmp;
560 /* create a new bios memory map, removing overlaps */
561 overlap_entries=0; /* number of entries in the overlap table */
562 new_bios_entry=0; /* index for creating new bios map entries */
563 last_type = 0; /* start with undefined memory type */
564 last_addr = 0; /* start with 0 as last starting address */
565 /* loop through change-points, determining affect on the new bios map */
566 for (chgidx=0; chgidx < chg_nr; chgidx++)
568 /* keep track of all overlapping bios entries */
569 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
571 /* add map entry to overlap list (> 1 entry implies an overlap) */
572 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
576 /* remove entry from list (order independent, so swap with last) */
577 for (i=0; i<overlap_entries; i++)
579 if (overlap_list[i] == change_point[chgidx]->pbios)
580 overlap_list[i] = overlap_list[overlap_entries-1];
584 /* if there are overlapping entries, decide which "type" to use */
585 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
587 for (i=0; i<overlap_entries; i++)
588 if (overlap_list[i]->type > current_type)
589 current_type = overlap_list[i]->type;
590 /* continue building up new bios map based on this information */
591 if (current_type != last_type) {
592 if (last_type != 0) {
593 new_bios[new_bios_entry].size =
594 change_point[chgidx]->addr - last_addr;
595 /* move forward only if the new size was non-zero */
596 if (new_bios[new_bios_entry].size != 0)
597 if (++new_bios_entry >= E820MAX)
598 break; /* no more space left for new bios entries */
600 if (current_type != 0) {
601 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
602 new_bios[new_bios_entry].type = current_type;
603 last_addr=change_point[chgidx]->addr;
605 last_type = current_type;
608 new_nr = new_bios_entry; /* retain count for new bios entries */
610 /* copy new bios mapping into original location */
611 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
618 * Copy the BIOS e820 map into a safe place.
620 * Sanity-check it while we're at it..
622 * If we're lucky and live on a modern system, the setup code
623 * will have given us a memory map that we can use to properly
624 * set up memory. If we aren't, we'll fake a memory map.
626 * We check to see that the memory map contains at least 2 elements
627 * before we'll use it, because the detection code in setup.S may
628 * not be perfect and most every PC known to man has two memory
629 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
630 * thinkpad 560x, for example, does not cooperate with the memory
633 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
635 /* Only one memory region (or negative)? Ignore it */
640 unsigned long long start = biosmap->addr;
641 unsigned long long size = biosmap->size;
642 unsigned long long end = start + size;
643 unsigned long type = biosmap->type;
645 /* Overflow in 64 bits? Ignore the memory map. */
650 * Some BIOSes claim RAM in the 640k - 1M region.
651 * Not right. Fix it up.
653 if (type == E820_RAM) {
654 if (start < 0x100000ULL && end > 0xA0000ULL) {
655 if (start < 0xA0000ULL)
656 add_memory_region(start, 0xA0000ULL-start, type);
657 if (end <= 0x100000ULL)
663 add_memory_region(start, size, type);
664 } while (biosmap++,--nr_map);
669 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
671 #ifdef CONFIG_EDD_MODULE
675 * copy_edd() - Copy the BIOS EDD information
676 * from boot_params into a safe place.
679 static inline void copy_edd(void)
681 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
682 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
683 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
684 edd.edd_info_nr = EDD_NR;
687 static inline void copy_edd(void)
693 * Do NOT EVER look at the BIOS memory size location.
694 * It does not work on many machines.
696 #define LOWMEMSIZE() (0x9f000)
698 static void __init parse_cmdline_early (char ** cmdline_p)
700 char c = ' ', *to = command_line, *from = saved_command_line;
704 memcpy(saved_command_line, xen_start_info.cmd_line, MAX_CMDLINE);
705 /* Save unparsed command line copy for /proc/cmdline */
706 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
712 * "mem=nopentium" disables the 4MB page tables.
713 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
714 * to <mem>, overriding the bios size.
715 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
716 * <start> to <start>+<mem>, overriding the bios size.
718 * HPA tells me bootloaders need to parse mem=, so no new
719 * option should be mem= [also see Documentation/i386/boot.txt]
721 if (!memcmp(from, "mem=", 4)) {
722 if (to != command_line)
724 if (!memcmp(from+4, "nopentium", 9)) {
726 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
729 /* If the user specifies memory size, we
730 * limit the BIOS-provided memory map to
731 * that size. exactmap can be used to specify
732 * the exact map. mem=number can be used to
733 * trim the existing memory map.
735 unsigned long long mem_size;
737 mem_size = memparse(from+4, &from);
739 limit_regions(mem_size);
742 xen_override_max_pfn =
743 (unsigned long)(mem_size>>PAGE_SHIFT);
748 else if (!memcmp(from, "memmap=", 7)) {
749 if (to != command_line)
751 if (!memcmp(from+7, "exactmap", 8)) {
756 /* If the user specifies memory size, we
757 * limit the BIOS-provided memory map to
758 * that size. exactmap can be used to specify
759 * the exact map. mem=number can be used to
760 * trim the existing memory map.
762 unsigned long long start_at, mem_size;
764 mem_size = memparse(from+7, &from);
766 start_at = memparse(from+1, &from);
767 add_memory_region(start_at, mem_size, E820_RAM);
768 } else if (*from == '#') {
769 start_at = memparse(from+1, &from);
770 add_memory_region(start_at, mem_size, E820_ACPI);
771 } else if (*from == '$') {
772 start_at = memparse(from+1, &from);
773 add_memory_region(start_at, mem_size, E820_RESERVED);
775 limit_regions(mem_size);
781 else if (!memcmp(from, "noexec=", 7))
782 noexec_setup(from + 7);
785 #ifdef CONFIG_X86_SMP
787 * If the BIOS enumerates physical processors before logical,
788 * maxcpus=N at enumeration-time can be used to disable HT.
790 else if (!memcmp(from, "maxcpus=", 8)) {
791 extern unsigned int maxcpus;
793 maxcpus = simple_strtoul(from + 8, NULL, 0);
797 #ifdef CONFIG_ACPI_BOOT
798 /* "acpi=off" disables both ACPI table parsing and interpreter */
799 else if (!memcmp(from, "acpi=off", 8)) {
803 /* acpi=force to over-ride black-list */
804 else if (!memcmp(from, "acpi=force", 10)) {
810 /* acpi=strict disables out-of-spec workarounds */
811 else if (!memcmp(from, "acpi=strict", 11)) {
815 /* Limit ACPI just to boot-time to enable HT */
816 else if (!memcmp(from, "acpi=ht", 7)) {
822 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
823 else if (!memcmp(from, "pci=noacpi", 10)) {
826 /* "acpi=noirq" disables ACPI interrupt routing */
827 else if (!memcmp(from, "acpi=noirq", 10)) {
831 else if (!memcmp(from, "acpi_sci=edge", 13))
832 acpi_sci_flags.trigger = 1;
834 else if (!memcmp(from, "acpi_sci=level", 14))
835 acpi_sci_flags.trigger = 3;
837 else if (!memcmp(from, "acpi_sci=high", 13))
838 acpi_sci_flags.polarity = 1;
840 else if (!memcmp(from, "acpi_sci=low", 12))
841 acpi_sci_flags.polarity = 3;
843 #ifdef CONFIG_X86_IO_APIC
844 else if (!memcmp(from, "acpi_skip_timer_override", 24))
845 acpi_skip_timer_override = 1;
848 #ifdef CONFIG_X86_LOCAL_APIC
849 /* disable IO-APIC */
850 else if (!memcmp(from, "noapic", 6))
851 disable_ioapic_setup();
852 #endif /* CONFIG_X86_LOCAL_APIC */
853 #endif /* CONFIG_ACPI_BOOT */
856 * highmem=size forces highmem to be exactly 'size' bytes.
857 * This works even on boxes that have no highmem otherwise.
858 * This also works to reduce highmem size on bigger boxes.
860 else if (!memcmp(from, "highmem=", 8))
861 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
864 * vmalloc=size forces the vmalloc area to be exactly 'size'
865 * bytes. This can be used to increase (or decrease) the
866 * vmalloc area - the default is 128m.
868 else if (!memcmp(from, "vmalloc=", 8))
869 __VMALLOC_RESERVE = memparse(from+8, &from);
875 if (COMMAND_LINE_SIZE <= ++len)
880 *cmdline_p = command_line;
882 printk(KERN_INFO "user-defined physical RAM map:\n");
883 print_memory_map("user");
889 * Callback for efi_memory_walk.
892 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
894 unsigned long *max_pfn = arg, pfn;
897 pfn = PFN_UP(end -1);
905 * Find the highest page frame number we have available
907 void __init find_max_pfn(void)
913 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
917 for (i = 0; i < e820.nr_map; i++) {
918 unsigned long start, end;
920 if (e820.map[i].type != E820_RAM)
922 start = PFN_UP(e820.map[i].addr);
923 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
931 /* We don't use the fake e820 because we need to respond to user override. */
932 void __init find_max_pfn(void)
934 if ( xen_override_max_pfn < xen_start_info.nr_pages )
935 xen_override_max_pfn = xen_start_info.nr_pages;
936 max_pfn = xen_override_max_pfn;
941 * Determine low and high memory ranges:
943 unsigned long __init find_max_low_pfn(void)
945 unsigned long max_low_pfn;
947 max_low_pfn = max_pfn;
948 if (max_low_pfn > MAXMEM_PFN) {
949 if (highmem_pages == -1)
950 highmem_pages = max_pfn - MAXMEM_PFN;
951 if (highmem_pages + MAXMEM_PFN < max_pfn)
952 max_pfn = MAXMEM_PFN + highmem_pages;
953 if (highmem_pages + MAXMEM_PFN > max_pfn) {
954 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
957 max_low_pfn = MAXMEM_PFN;
958 #ifndef CONFIG_HIGHMEM
959 /* Maximum memory usable is what is directly addressable */
960 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
962 if (max_pfn > MAX_NONPAE_PFN)
963 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
965 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
966 max_pfn = MAXMEM_PFN;
967 #else /* !CONFIG_HIGHMEM */
968 #ifndef CONFIG_X86_PAE
969 if (max_pfn > MAX_NONPAE_PFN) {
970 max_pfn = MAX_NONPAE_PFN;
971 printk(KERN_WARNING "Warning only 4GB will be used.\n");
972 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
974 #endif /* !CONFIG_X86_PAE */
975 #endif /* !CONFIG_HIGHMEM */
977 if (highmem_pages == -1)
979 #ifdef CONFIG_HIGHMEM
980 if (highmem_pages >= max_pfn) {
981 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
985 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
986 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
989 max_low_pfn -= highmem_pages;
993 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
999 #ifndef CONFIG_DISCONTIGMEM
1002 * Free all available memory for boot time allocation. Used
1003 * as a callback function by efi_memory_walk()
1007 free_available_memory(unsigned long start, unsigned long end, void *arg)
1009 /* check max_low_pfn */
1010 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1012 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1013 end = (max_low_pfn + 1) << PAGE_SHIFT;
1015 free_bootmem(start, end - start);
1020 * Register fully available low RAM pages with the bootmem allocator.
1022 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1027 efi_memmap_walk(free_available_memory, NULL);
1030 for (i = 0; i < e820.nr_map; i++) {
1031 unsigned long curr_pfn, last_pfn, size;
1033 * Reserve usable low memory
1035 if (e820.map[i].type != E820_RAM)
1038 * We are rounding up the start address of usable memory:
1040 curr_pfn = PFN_UP(e820.map[i].addr);
1041 if (curr_pfn >= max_low_pfn)
1044 * ... and at the end of the usable range downwards:
1046 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1048 if (last_pfn > max_low_pfn)
1049 last_pfn = max_low_pfn;
1052 * .. finally, did all the rounding and playing
1053 * around just make the area go away?
1055 if (last_pfn <= curr_pfn)
1058 size = last_pfn - curr_pfn;
1059 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1064 * workaround for Dell systems that neglect to reserve EBDA
1066 static void __init reserve_ebda_region(void)
1069 addr = get_bios_ebda();
1071 reserve_bootmem(addr, PAGE_SIZE);
1074 static unsigned long __init setup_memory(void)
1076 unsigned long bootmap_size, start_pfn, max_low_pfn;
1079 * partially used pages are not usable - thus
1080 * we are rounding upwards:
1082 start_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames;
1086 max_low_pfn = find_max_low_pfn();
1088 #ifdef CONFIG_HIGHMEM
1089 highstart_pfn = highend_pfn = max_pfn;
1090 if (max_pfn > max_low_pfn) {
1091 highstart_pfn = max_low_pfn;
1093 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1094 pages_to_mb(highend_pfn - highstart_pfn));
1096 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1097 pages_to_mb(max_low_pfn));
1099 * Initialize the boot-time allocator (with low memory only):
1101 bootmap_size = init_bootmem(start_pfn, max_low_pfn);
1103 register_bootmem_low_pages(max_low_pfn);
1106 * Reserve the bootmem bitmap itself as well. We do this in two
1107 * steps (first step was init_bootmem()) because this catches
1108 * the (very unlikely) case of us accidentally initializing the
1109 * bootmem allocator with an invalid RAM area.
1111 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
1112 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1114 /* reserve EBDA region, it's a 4K region */
1115 reserve_ebda_region();
1117 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1118 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1119 unless you have no PS/2 mouse plugged in. */
1120 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1121 boot_cpu_data.x86 == 6)
1122 reserve_bootmem(0xa0000 - 4096, 4096);
1126 * But first pinch a few for the stack/trampoline stuff
1127 * FIXME: Don't need the extra page at 4K, but need to fix
1128 * trampoline before removing it. (see the GDT stuff)
1130 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1132 #ifdef CONFIG_ACPI_SLEEP
1134 * Reserve low memory region for sleep support.
1136 acpi_reserve_bootmem();
1138 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1140 * Find and reserve possible boot-time SMP configuration:
1145 #ifdef CONFIG_BLK_DEV_INITRD
1146 if (xen_start_info.mod_start) {
1147 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1148 /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1149 initrd_start = INITRD_START + PAGE_OFFSET;
1150 initrd_end = initrd_start+INITRD_SIZE;
1151 initrd_below_start_ok = 1;
1154 printk(KERN_ERR "initrd extends beyond end of memory "
1155 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1156 INITRD_START + INITRD_SIZE,
1157 max_low_pfn << PAGE_SHIFT);
1163 phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
1168 extern unsigned long setup_memory(void);
1169 #endif /* !CONFIG_DISCONTIGMEM */
1172 * Request address space for all standard RAM and ROM resources
1173 * and also for regions reported as reserved by the e820.
1176 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1180 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1183 for (i = 0; i < e820.nr_map; i++) {
1184 struct resource *res;
1185 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1187 res = alloc_bootmem_low(sizeof(struct resource));
1188 switch (e820.map[i].type) {
1189 case E820_RAM: res->name = "System RAM"; break;
1190 case E820_ACPI: res->name = "ACPI Tables"; break;
1191 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1192 default: res->name = "reserved";
1194 res->start = e820.map[i].addr;
1195 res->end = res->start + e820.map[i].size - 1;
1196 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1197 request_resource(&iomem_resource, res);
1198 if (e820.map[i].type == E820_RAM) {
1200 * We don't know which RAM region contains kernel data,
1201 * so we try it repeatedly and let the resource manager
1204 request_resource(res, code_resource);
1205 request_resource(res, data_resource);
1211 * Request address space for all standard resources
1213 static void __init register_memory(void)
1215 unsigned long gapstart, gapsize;
1216 unsigned long long last;
1220 efi_initialize_iomem_resources(&code_resource, &data_resource);
1222 legacy_init_iomem_resources(&code_resource, &data_resource);
1224 /* EFI systems may still have VGA */
1225 request_resource(&iomem_resource, &video_ram_resource);
1227 /* request I/O space for devices used on all i[345]86 PCs */
1228 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1229 request_resource(&ioport_resource, &standard_io_resources[i]);
1232 * Search for the bigest gap in the low 32 bits of the e820
1235 last = 0x100000000ull;
1236 gapstart = 0x10000000;
1240 unsigned long long start = e820.map[i].addr;
1241 unsigned long long end = start + e820.map[i].size;
1244 * Since "last" is at most 4GB, we know we'll
1245 * fit in 32 bits if this condition is true
1248 unsigned long gap = last - end;
1250 if (gap > gapsize) {
1260 * Start allocating dynamic PCI memory a bit into the gap,
1261 * aligned up to the nearest megabyte.
1263 * Question: should we try to pad it up a bit (do something
1264 * like " + (gapsize >> 3)" in there too?). We now have the
1267 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1269 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1270 pci_mem_start, gapstart, gapsize);
1273 /* Use inline assembly to define this because the nops are defined
1274 as inline assembly strings in the include files and we cannot
1275 get them easily into strings. */
1276 asm("\t.data\nintelnops: "
1277 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1278 GENERIC_NOP7 GENERIC_NOP8);
1279 asm("\t.data\nk8nops: "
1280 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1282 asm("\t.data\nk7nops: "
1283 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1286 extern unsigned char intelnops[], k8nops[], k7nops[];
1287 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1292 intelnops + 1 + 2 + 3,
1293 intelnops + 1 + 2 + 3 + 4,
1294 intelnops + 1 + 2 + 3 + 4 + 5,
1295 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1296 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1298 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1304 k8nops + 1 + 2 + 3 + 4,
1305 k8nops + 1 + 2 + 3 + 4 + 5,
1306 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1307 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1309 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1315 k7nops + 1 + 2 + 3 + 4,
1316 k7nops + 1 + 2 + 3 + 4 + 5,
1317 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1318 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1322 unsigned char **noptable;
1324 { X86_FEATURE_K8, k8_nops },
1325 { X86_FEATURE_K7, k7_nops },
1329 /* Replace instructions with better alternatives for this CPU type.
1331 This runs before SMP is initialized to avoid SMP problems with
1332 self modifying code. This implies that assymetric systems where
1333 APs have less capabilities than the boot processor are not handled.
1334 In this case boot with "noreplacement". */
1335 void apply_alternatives(void *start, void *end)
1337 struct alt_instr *a;
1339 unsigned char **noptable = intel_nops;
1340 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1341 if (boot_cpu_has(noptypes[i].cpuid)) {
1342 noptable = noptypes[i].noptable;
1346 for (a = start; (void *)a < end; a++) {
1347 if (!boot_cpu_has(a->cpuid))
1349 BUG_ON(a->replacementlen > a->instrlen);
1350 memcpy(a->instr, a->replacement, a->replacementlen);
1351 diff = a->instrlen - a->replacementlen;
1352 /* Pad the rest with nops */
1353 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1355 if (k > ASM_NOP_MAX)
1357 memcpy(a->instr + i, noptable[k], k);
1362 static int no_replacement __initdata = 0;
1364 void __init alternative_instructions(void)
1366 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1369 apply_alternatives(__alt_instructions, __alt_instructions_end);
1372 static int __init noreplacement_setup(char *s)
1378 __setup("noreplacement", noreplacement_setup);
1380 static char * __init machine_specific_memory_setup(void);
1383 static void set_mca_bus(int x)
1388 static void set_mca_bus(int x) { }
1392 * Determine if we were loaded by an EFI loader. If so, then we have also been
1393 * passed the efi memmap, systab, etc., so we should use these data structures
1394 * for initialization. Note, the efi init code path is determined by the
1395 * global efi_enabled. This allows the same kernel image to be used on existing
1396 * systems (with a traditional BIOS) as well as on EFI systems.
1398 void __init setup_arch(char **cmdline_p)
1402 unsigned long max_low_pfn;
1404 /* Force a quick death if the kernel panics. */
1405 extern int panic_timeout;
1406 if ( panic_timeout == 0 )
1409 /* Register a call for panic conditions. */
1410 notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1412 HYPERVISOR_vm_assist(
1413 VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
1414 HYPERVISOR_vm_assist(
1415 VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
1417 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1421 * FIXME: This isn't an official loader_type right
1422 * now but does currently work with elilo.
1423 * If we were configured as an EFI kernel, check to make
1424 * sure that we were loaded correctly from elilo and that
1425 * the system table is valid. If not, then initialize normally.
1428 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1432 /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1433 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1435 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1436 drive_info = DRIVE_INFO;
1437 screen_info = SCREEN_INFO;
1438 edid_info = EDID_INFO;
1439 apm_info.bios = APM_BIOS_INFO;
1440 ist_info = IST_INFO;
1441 saved_videomode = VIDEO_MODE;
1442 if( SYS_DESC_TABLE.length != 0 ) {
1443 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1444 machine_id = SYS_DESC_TABLE.table[0];
1445 machine_submodel_id = SYS_DESC_TABLE.table[1];
1446 BIOS_revision = SYS_DESC_TABLE.table[2];
1448 aux_device_present = AUX_DEVICE_INFO;
1449 bootloader_type = LOADER_TYPE;
1451 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
1452 /* This is drawn from a dump from vgacon:startup in standard Linux. */
1453 screen_info.orig_video_mode = 3;
1454 screen_info.orig_video_isVGA = 1;
1455 screen_info.orig_video_lines = 25;
1456 screen_info.orig_video_cols = 80;
1457 screen_info.orig_video_ega_bx = 3;
1458 screen_info.orig_video_points = 16;
1461 #ifdef CONFIG_BLK_DEV_RAM
1462 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1463 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1464 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1470 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1471 print_memory_map(machine_specific_memory_setup());
1476 if (!MOUNT_ROOT_RDONLY)
1477 root_mountflags &= ~MS_RDONLY;
1478 init_mm.start_code = (unsigned long) _text;
1479 init_mm.end_code = (unsigned long) _etext;
1480 init_mm.end_data = (unsigned long) _edata;
1481 init_mm.brk = (PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames) << PAGE_SHIFT;
1483 /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
1484 /*code_resource.start = virt_to_phys(_text);*/
1485 /*code_resource.end = virt_to_phys(_etext)-1;*/
1486 /*data_resource.start = virt_to_phys(_etext);*/
1487 /*data_resource.end = virt_to_phys(_edata)-1;*/
1489 parse_cmdline_early(cmdline_p);
1491 max_low_pfn = setup_memory();
1494 * NOTE: before this point _nobody_ is allowed to allocate
1495 * any memory using the bootmem allocator. Although the
1496 * alloctor is now initialised only the first 8Mb of the kernel
1497 * virtual address space has been mapped. All allocations before
1498 * paging_init() has completed must use the alloc_bootmem_low_pages()
1499 * variant (which allocates DMA'able memory) and care must be taken
1500 * not to exceed the 8Mb limit.
1504 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1508 /* Make sure we have a correctly sized P->M table. */
1509 if (max_pfn != xen_start_info.nr_pages) {
1510 phys_to_machine_mapping = alloc_bootmem_low_pages(
1511 max_pfn * sizeof(unsigned long));
1513 if (max_pfn > xen_start_info.nr_pages) {
1514 /* set to INVALID_P2M_ENTRY */
1515 memset(phys_to_machine_mapping, ~0,
1516 max_pfn * sizeof(unsigned long));
1517 memcpy(phys_to_machine_mapping,
1518 (unsigned long *)xen_start_info.mfn_list,
1519 xen_start_info.nr_pages * sizeof(unsigned long));
1521 memcpy(phys_to_machine_mapping,
1522 (unsigned long *)xen_start_info.mfn_list,
1523 max_pfn * sizeof(unsigned long));
1524 if (HYPERVISOR_dom_mem_op(
1525 MEMOP_decrease_reservation,
1526 (unsigned long *)xen_start_info.mfn_list + max_pfn,
1527 xen_start_info.nr_pages - max_pfn, 0) !=
1528 (xen_start_info.nr_pages - max_pfn)) BUG();
1531 __pa(xen_start_info.mfn_list),
1532 PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
1533 sizeof(unsigned long))));
1536 pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
1537 for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
1539 pfn_to_mfn_frame_list[j] =
1540 virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
1542 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
1543 virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
1547 * NOTE: at this point the bootmem allocator is fully available.
1550 #ifdef CONFIG_EARLY_PRINTK
1552 char *s = strstr(*cmdline_p, "earlyprintk=");
1554 extern void setup_early_printk(char *);
1556 setup_early_printk(s);
1557 printk("early console enabled\n");
1565 #ifdef CONFIG_X86_GENERICARCH
1566 generic_apic_probe(*cmdline_p);
1571 #ifdef CONFIG_ACPI_BOOT
1573 * Parse the ACPI tables for possible boot-time SMP configuration.
1575 acpi_boot_table_init();
1579 #ifdef CONFIG_X86_LOCAL_APIC
1580 if (smp_found_config)
1584 /* XXX Disable irqdebug until we have a way to avoid interrupt
1586 noirqdebug_setup("");
1590 op.cmd = PHYSDEVOP_SET_IOPL;
1591 op.u.set_iopl.iopl = current->thread.io_pl = 1;
1592 HYPERVISOR_physdev_op(&op);
1594 if (xen_start_info.flags & SIF_INITDOMAIN) {
1595 if (!(xen_start_info.flags & SIF_PRIVILEGED))
1596 panic("Xen granted us console access "
1597 "but not privileged status");
1600 #if defined(CONFIG_VGA_CONSOLE)
1602 (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1603 conswitchp = &vga_con;
1604 #elif defined(CONFIG_DUMMY_CONSOLE)
1605 conswitchp = &dummy_con;
1609 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1610 extern const struct consw xennull_con;
1611 extern int console_use_vt;
1612 #if defined(CONFIG_VGA_CONSOLE)
1613 /* disable VGA driver */
1614 ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
1616 conswitchp = &xennull_con;
1624 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1627 /* we're never actually going to get here... */
1632 #include "setup_arch_post.h"
1636 * c-file-style:"k&r"