Merge to Fedora kernel-2.6.18-1.2255_FC5-vs2.0.2.2-rc9 patched with stable patch...
[linux-2.6.git] / arch / i386 / kernel / setup-xen.c
1 /*
2  *  linux/arch/i386/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *
8  *  Memory region support
9  *      David Parsons <orc@pell.chi.il.us>, July-August 1999
10  *
11  *  Added E820 sanitization routine (removes overlapping memory regions);
12  *  Brian Moyle <bmoyle@mvista.com>, February 2001
13  *
14  * Moved CPU detection code to cpu/${cpu}.c
15  *    Patrick Mochel <mochel@osdl.org>, March 2002
16  *
17  *  Provisions for empty E820 memory regions (reported by certain BIOSes).
18  *  Alex Achenbach <xela@slit.de>, December 2002.
19  *
20  */
21
22 /*
23  * This file handles the architecture-dependent parts of initialization
24  */
25
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/mmzone.h>
29 #include <linux/screen_info.h>
30 #include <linux/ioport.h>
31 #include <linux/acpi.h>
32 #include <linux/apm_bios.h>
33 #include <linux/initrd.h>
34 #include <linux/bootmem.h>
35 #include <linux/seq_file.h>
36 #include <linux/platform_device.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kernel.h>
47 #include <linux/percpu.h>
48 #include <linux/notifier.h>
49 #include <linux/kexec.h>
50 #include <linux/crash_dump.h>
51 #include <linux/dmi.h>
52 #include <linux/pfn.h>
53
54 #include <video/edid.h>
55
56 #include <asm/apic.h>
57 #include <asm/e820.h>
58 #include <asm/mpspec.h>
59 #include <asm/setup.h>
60 #include <asm/arch_hooks.h>
61 #include <asm/sections.h>
62 #include <asm/io_apic.h>
63 #include <asm/ist.h>
64 #include <asm/io.h>
65 #include <asm/hypervisor.h>
66 #include <xen/interface/physdev.h>
67 #include <xen/interface/memory.h>
68 #include <xen/features.h>
69 #include <xen/xencons.h>
70 #include "setup_arch.h"
71 #include <bios_ebda.h>
72
73 /* Forward Declaration. */
74 void __init find_max_pfn(void);
75
76 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
77 static struct notifier_block xen_panic_block = {
78         xen_panic_event, NULL, 0 /* try to go last */
79 };
80
81 extern char hypercall_page[PAGE_SIZE];
82 EXPORT_SYMBOL(hypercall_page);
83
84 int disable_pse __devinitdata = 0;
85
86 /*
87  * Machine setup..
88  */
89
90 #ifdef CONFIG_EFI
91 int efi_enabled = 0;
92 EXPORT_SYMBOL(efi_enabled);
93 #endif
94
95 /* cpu data as detected by the assembly code in head.S */
96 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
97 /* common cpu data for all cpus */
98 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
99 EXPORT_SYMBOL(boot_cpu_data);
100
101 unsigned long mmu_cr4_features;
102
103 #ifdef  CONFIG_ACPI
104         int acpi_disabled = 0;
105 #else
106         int acpi_disabled = 1;
107 #endif
108 EXPORT_SYMBOL(acpi_disabled);
109
110 #ifdef  CONFIG_ACPI
111 int __initdata acpi_force = 0;
112 extern acpi_interrupt_flags     acpi_sci_flags;
113 #endif
114
115 /* for MCA, but anyone else can use it if they want */
116 unsigned int machine_id;
117 #ifdef CONFIG_MCA
118 EXPORT_SYMBOL(machine_id);
119 #endif
120 unsigned int machine_submodel_id;
121 unsigned int BIOS_revision;
122 unsigned int mca_pentium_flag;
123
124 /* For PCI or other memory-mapped resources */
125 unsigned long pci_mem_start = 0x10000000;
126 #ifdef CONFIG_PCI
127 EXPORT_SYMBOL(pci_mem_start);
128 #endif
129
130 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
131 int bootloader_type;
132
133 /* user-defined highmem size */
134 static unsigned int highmem_pages = -1;
135
136 /*
137  * Setup options
138  */
139 struct drive_info_struct { char dummy[32]; } drive_info;
140 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
141     defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
142 EXPORT_SYMBOL(drive_info);
143 #endif
144 struct screen_info screen_info;
145 EXPORT_SYMBOL(screen_info);
146 struct apm_info apm_info;
147 EXPORT_SYMBOL(apm_info);
148 struct sys_desc_table_struct {
149         unsigned short length;
150         unsigned char table[0];
151 };
152 struct edid_info edid_info;
153 EXPORT_SYMBOL_GPL(edid_info);
154 struct ist_info ist_info;
155 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
156         defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
157 EXPORT_SYMBOL(ist_info);
158 #endif
159 struct e820map e820;
160 static void __init e820_setup_gap(struct e820entry *e820, int nr_map);
161 #ifdef CONFIG_XEN
162 struct e820map machine_e820;
163 #endif
164
165 extern void early_cpu_init(void);
166 extern void generic_apic_probe(char *);
167 extern int root_mountflags;
168
169 unsigned long saved_videomode;
170
171 #define RAMDISK_IMAGE_START_MASK        0x07FF
172 #define RAMDISK_PROMPT_FLAG             0x8000
173 #define RAMDISK_LOAD_FLAG               0x4000  
174
175 static char command_line[COMMAND_LINE_SIZE];
176
177 unsigned char __initdata boot_params[PARAM_SIZE];
178
179 static struct resource data_resource = {
180         .name   = "Kernel data",
181         .start  = 0,
182         .end    = 0,
183         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
184 };
185
186 static struct resource code_resource = {
187         .name   = "Kernel code",
188         .start  = 0,
189         .end    = 0,
190         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
191 };
192
193 static struct resource system_rom_resource = {
194         .name   = "System ROM",
195         .start  = 0xf0000,
196         .end    = 0xfffff,
197         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
198 };
199
200 static struct resource extension_rom_resource = {
201         .name   = "Extension ROM",
202         .start  = 0xe0000,
203         .end    = 0xeffff,
204         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
205 };
206
207 static struct resource adapter_rom_resources[] = { {
208         .name   = "Adapter ROM",
209         .start  = 0xc8000,
210         .end    = 0,
211         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212 }, {
213         .name   = "Adapter ROM",
214         .start  = 0,
215         .end    = 0,
216         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
217 }, {
218         .name   = "Adapter ROM",
219         .start  = 0,
220         .end    = 0,
221         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
222 }, {
223         .name   = "Adapter ROM",
224         .start  = 0,
225         .end    = 0,
226         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
227 }, {
228         .name   = "Adapter ROM",
229         .start  = 0,
230         .end    = 0,
231         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
232 }, {
233         .name   = "Adapter ROM",
234         .start  = 0,
235         .end    = 0,
236         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
237 } };
238
239 #define ADAPTER_ROM_RESOURCES \
240         (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
241
242 static struct resource video_rom_resource = {
243         .name   = "Video ROM",
244         .start  = 0xc0000,
245         .end    = 0xc7fff,
246         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
247 };
248
249 static struct resource video_ram_resource = {
250         .name   = "Video RAM area",
251         .start  = 0xa0000,
252         .end    = 0xbffff,
253         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
254 };
255
256 static struct resource standard_io_resources[] = { {
257         .name   = "dma1",
258         .start  = 0x0000,
259         .end    = 0x001f,
260         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
261 }, {
262         .name   = "pic1",
263         .start  = 0x0020,
264         .end    = 0x0021,
265         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
266 }, {
267         .name   = "timer0",
268         .start  = 0x0040,
269         .end    = 0x0043,
270         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
271 }, {
272         .name   = "timer1",
273         .start  = 0x0050,
274         .end    = 0x0053,
275         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
276 }, {
277         .name   = "keyboard",
278         .start  = 0x0060,
279         .end    = 0x006f,
280         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
281 }, {
282         .name   = "dma page reg",
283         .start  = 0x0080,
284         .end    = 0x008f,
285         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
286 }, {
287         .name   = "pic2",
288         .start  = 0x00a0,
289         .end    = 0x00a1,
290         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
291 }, {
292         .name   = "dma2",
293         .start  = 0x00c0,
294         .end    = 0x00df,
295         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
296 }, {
297         .name   = "fpu",
298         .start  = 0x00f0,
299         .end    = 0x00ff,
300         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
301 } };
302
303 #define STANDARD_IO_RESOURCES \
304         (sizeof standard_io_resources / sizeof standard_io_resources[0])
305
306 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
307
308 static int __init romchecksum(unsigned char *rom, unsigned long length)
309 {
310         unsigned char *p, sum = 0;
311
312         for (p = rom; p < rom + length; p++)
313                 sum += *p;
314         return sum == 0;
315 }
316
317 static void __init probe_roms(void)
318 {
319         unsigned long start, length, upper;
320         unsigned char *rom;
321         int           i;
322
323 #ifdef CONFIG_XEN
324         /* Nothing to do if not running in dom0. */
325         if (!is_initial_xendomain())
326                 return;
327 #endif
328
329         /* video rom */
330         upper = adapter_rom_resources[0].start;
331         for (start = video_rom_resource.start; start < upper; start += 2048) {
332                 rom = isa_bus_to_virt(start);
333                 if (!romsignature(rom))
334                         continue;
335
336                 video_rom_resource.start = start;
337
338                 /* 0 < length <= 0x7f * 512, historically */
339                 length = rom[2] * 512;
340
341                 /* if checksum okay, trust length byte */
342                 if (length && romchecksum(rom, length))
343                         video_rom_resource.end = start + length - 1;
344                 break;
345         }
346
347         start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
348         if (start < upper)
349                 start = upper;
350
351         /* system rom */
352         request_resource(&iomem_resource, &system_rom_resource);
353         upper = system_rom_resource.start;
354
355         /* check for extension rom (ignore length byte!) */
356         rom = isa_bus_to_virt(extension_rom_resource.start);
357         if (romsignature(rom)) {
358                 length = extension_rom_resource.end - extension_rom_resource.start + 1;
359                 if (romchecksum(rom, length)) {
360                         request_resource(&iomem_resource, &extension_rom_resource);
361                         upper = extension_rom_resource.start;
362                 }
363         }
364
365         /* check for adapter roms on 2k boundaries */
366         for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
367                 rom = isa_bus_to_virt(start);
368                 if (!romsignature(rom))
369                         continue;
370
371                 /* 0 < length <= 0x7f * 512, historically */
372                 length = rom[2] * 512;
373
374                 /* but accept any length that fits if checksum okay */
375                 if (!length || start + length > upper || !romchecksum(rom, length))
376                         continue;
377
378                 adapter_rom_resources[i].start = start;
379                 adapter_rom_resources[i].end = start + length - 1;
380                 request_resource(&iomem_resource, &adapter_rom_resources[i]);
381
382                 start = adapter_rom_resources[i++].end & ~2047UL;
383         }
384 }
385
386 /*
387  * Point at the empty zero page to start with. We map the real shared_info
388  * page as soon as fixmap is up and running.
389  */
390 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
391 EXPORT_SYMBOL(HYPERVISOR_shared_info);
392
393 unsigned long *phys_to_machine_mapping;
394 unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
395 EXPORT_SYMBOL(phys_to_machine_mapping);
396
397 /* Raw start-of-day parameters from the hypervisor. */
398 start_info_t *xen_start_info;
399 EXPORT_SYMBOL(xen_start_info);
400
401 void __init add_memory_region(unsigned long long start,
402                                   unsigned long long size, int type)
403 {
404         int x;
405
406         if (!efi_enabled) {
407                 x = e820.nr_map;
408
409                 if (x == E820MAX) {
410                     printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
411                     return;
412                 }
413
414                 e820.map[x].addr = start;
415                 e820.map[x].size = size;
416                 e820.map[x].type = type;
417                 e820.nr_map++;
418         }
419 } /* add_memory_region */
420
421 static void __init limit_regions(unsigned long long size)
422 {
423         unsigned long long current_addr = 0;
424         int i;
425
426         if (efi_enabled) {
427                 efi_memory_desc_t *md;
428                 void *p;
429
430                 for (p = memmap.map, i = 0; p < memmap.map_end;
431                         p += memmap.desc_size, i++) {
432                         md = p;
433                         current_addr = md->phys_addr + (md->num_pages << 12);
434                         if (md->type == EFI_CONVENTIONAL_MEMORY) {
435                                 if (current_addr >= size) {
436                                         md->num_pages -=
437                                                 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
438                                         memmap.nr_map = i + 1;
439                                         return;
440                                 }
441                         }
442                 }
443         }
444         for (i = 0; i < e820.nr_map; i++) {
445                 current_addr = e820.map[i].addr + e820.map[i].size;
446                 if (current_addr < size)
447                         continue;
448
449                 if (e820.map[i].type != E820_RAM)
450                         continue;
451
452                 if (e820.map[i].addr >= size) {
453                         /*
454                          * This region starts past the end of the
455                          * requested size, skip it completely.
456                          */
457                         e820.nr_map = i;
458                 } else {
459                         e820.nr_map = i + 1;
460                         e820.map[i].size -= current_addr - size;
461                 }
462                 return;
463         }
464 #ifdef CONFIG_XEN
465         if (i==e820.nr_map && current_addr < size) {
466                 /*
467                  * The e820 map finished before our requested size so
468                  * extend the final entry to the requested address.
469                  */
470                 --i;
471                 if (e820.map[i].type == E820_RAM)
472                         e820.map[i].size -= current_addr - size;
473                 else
474                         add_memory_region(current_addr, size - current_addr, E820_RAM);
475         }
476 #endif
477 }
478
479 #define E820_DEBUG      1
480
481 static void __init print_memory_map(char *who)
482 {
483         int i;
484
485         for (i = 0; i < e820.nr_map; i++) {
486                 printk(" %s: %016Lx - %016Lx ", who,
487                         e820.map[i].addr,
488                         e820.map[i].addr + e820.map[i].size);
489                 switch (e820.map[i].type) {
490                 case E820_RAM:  printk("(usable)\n");
491                                 break;
492                 case E820_RESERVED:
493                                 printk("(reserved)\n");
494                                 break;
495                 case E820_ACPI:
496                                 printk("(ACPI data)\n");
497                                 break;
498                 case E820_NVS:
499                                 printk("(ACPI NVS)\n");
500                                 break;
501                 default:        printk("type %lu\n", e820.map[i].type);
502                                 break;
503                 }
504         }
505 }
506
507 /*
508  * Sanitize the BIOS e820 map.
509  *
510  * Some e820 responses include overlapping entries.  The following 
511  * replaces the original e820 map with a new one, removing overlaps.
512  *
513  */
514 struct change_member {
515         struct e820entry *pbios; /* pointer to original bios entry */
516         unsigned long long addr; /* address for this change point */
517 };
518 static struct change_member change_point_list[2*E820MAX] __initdata;
519 static struct change_member *change_point[2*E820MAX] __initdata;
520 static struct e820entry *overlap_list[E820MAX] __initdata;
521 static struct e820entry new_bios[E820MAX] __initdata;
522
523 int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
524 {
525         struct change_member *change_tmp;
526         unsigned long current_type, last_type;
527         unsigned long long last_addr;
528         int chgidx, still_changing;
529         int overlap_entries;
530         int new_bios_entry;
531         int old_nr, new_nr, chg_nr;
532         int i;
533
534         /*
535                 Visually we're performing the following (1,2,3,4 = memory types)...
536
537                 Sample memory map (w/overlaps):
538                    ____22__________________
539                    ______________________4_
540                    ____1111________________
541                    _44_____________________
542                    11111111________________
543                    ____________________33__
544                    ___________44___________
545                    __________33333_________
546                    ______________22________
547                    ___________________2222_
548                    _________111111111______
549                    _____________________11_
550                    _________________4______
551
552                 Sanitized equivalent (no overlap):
553                    1_______________________
554                    _44_____________________
555                    ___1____________________
556                    ____22__________________
557                    ______11________________
558                    _________1______________
559                    __________3_____________
560                    ___________44___________
561                    _____________33_________
562                    _______________2________
563                    ________________1_______
564                    _________________4______
565                    ___________________2____
566                    ____________________33__
567                    ______________________4_
568         */
569
570         /* if there's only one memory region, don't bother */
571         if (*pnr_map < 2)
572                 return -1;
573
574         old_nr = *pnr_map;
575
576         /* bail out if we find any unreasonable addresses in bios map */
577         for (i=0; i<old_nr; i++)
578                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
579                         return -1;
580
581         /* create pointers for initial change-point information (for sorting) */
582         for (i=0; i < 2*old_nr; i++)
583                 change_point[i] = &change_point_list[i];
584
585         /* record all known change-points (starting and ending addresses),
586            omitting those that are for empty memory regions */
587         chgidx = 0;
588         for (i=0; i < old_nr; i++)      {
589                 if (biosmap[i].size != 0) {
590                         change_point[chgidx]->addr = biosmap[i].addr;
591                         change_point[chgidx++]->pbios = &biosmap[i];
592                         change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
593                         change_point[chgidx++]->pbios = &biosmap[i];
594                 }
595         }
596         chg_nr = chgidx;        /* true number of change-points */
597
598         /* sort change-point list by memory addresses (low -> high) */
599         still_changing = 1;
600         while (still_changing)  {
601                 still_changing = 0;
602                 for (i=1; i < chg_nr; i++)  {
603                         /* if <current_addr> > <last_addr>, swap */
604                         /* or, if current=<start_addr> & last=<end_addr>, swap */
605                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
606                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
607                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
608                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
609                            )
610                         {
611                                 change_tmp = change_point[i];
612                                 change_point[i] = change_point[i-1];
613                                 change_point[i-1] = change_tmp;
614                                 still_changing=1;
615                         }
616                 }
617         }
618
619         /* create a new bios memory map, removing overlaps */
620         overlap_entries=0;       /* number of entries in the overlap table */
621         new_bios_entry=0;        /* index for creating new bios map entries */
622         last_type = 0;           /* start with undefined memory type */
623         last_addr = 0;           /* start with 0 as last starting address */
624         /* loop through change-points, determining affect on the new bios map */
625         for (chgidx=0; chgidx < chg_nr; chgidx++)
626         {
627                 /* keep track of all overlapping bios entries */
628                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
629                 {
630                         /* add map entry to overlap list (> 1 entry implies an overlap) */
631                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
632                 }
633                 else
634                 {
635                         /* remove entry from list (order independent, so swap with last) */
636                         for (i=0; i<overlap_entries; i++)
637                         {
638                                 if (overlap_list[i] == change_point[chgidx]->pbios)
639                                         overlap_list[i] = overlap_list[overlap_entries-1];
640                         }
641                         overlap_entries--;
642                 }
643                 /* if there are overlapping entries, decide which "type" to use */
644                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
645                 current_type = 0;
646                 for (i=0; i<overlap_entries; i++)
647                         if (overlap_list[i]->type > current_type)
648                                 current_type = overlap_list[i]->type;
649                 /* continue building up new bios map based on this information */
650                 if (current_type != last_type)  {
651                         if (last_type != 0)      {
652                                 new_bios[new_bios_entry].size =
653                                         change_point[chgidx]->addr - last_addr;
654                                 /* move forward only if the new size was non-zero */
655                                 if (new_bios[new_bios_entry].size != 0)
656                                         if (++new_bios_entry >= E820MAX)
657                                                 break;  /* no more space left for new bios entries */
658                         }
659                         if (current_type != 0)  {
660                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
661                                 new_bios[new_bios_entry].type = current_type;
662                                 last_addr=change_point[chgidx]->addr;
663                         }
664                         last_type = current_type;
665                 }
666         }
667         new_nr = new_bios_entry;   /* retain count for new bios entries */
668
669         /* copy new bios mapping into original location */
670         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
671         *pnr_map = new_nr;
672
673         return 0;
674 }
675
676 /*
677  * Copy the BIOS e820 map into a safe place.
678  *
679  * Sanity-check it while we're at it..
680  *
681  * If we're lucky and live on a modern system, the setup code
682  * will have given us a memory map that we can use to properly
683  * set up memory.  If we aren't, we'll fake a memory map.
684  *
685  * We check to see that the memory map contains at least 2 elements
686  * before we'll use it, because the detection code in setup.S may
687  * not be perfect and most every PC known to man has two memory
688  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
689  * thinkpad 560x, for example, does not cooperate with the memory
690  * detection code.)
691  */
692 int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
693 {
694 #ifndef CONFIG_XEN
695         /* Only one memory region (or negative)? Ignore it */
696         if (nr_map < 2)
697                 return -1;
698 #else
699         BUG_ON(nr_map < 1);
700 #endif
701
702         do {
703                 unsigned long long start = biosmap->addr;
704                 unsigned long long size = biosmap->size;
705                 unsigned long long end = start + size;
706                 unsigned long type = biosmap->type;
707
708                 /* Overflow in 64 bits? Ignore the memory map. */
709                 if (start > end)
710                         return -1;
711
712 #ifndef CONFIG_XEN
713                 /*
714                  * Some BIOSes claim RAM in the 640k - 1M region.
715                  * Not right. Fix it up.
716                  */
717                 if (type == E820_RAM) {
718                         if (start < 0x100000ULL && end > 0xA0000ULL) {
719                                 if (start < 0xA0000ULL)
720                                         add_memory_region(start, 0xA0000ULL-start, type);
721                                 if (end <= 0x100000ULL)
722                                         continue;
723                                 start = 0x100000ULL;
724                                 size = end - start;
725                         }
726                 }
727 #endif
728                 add_memory_region(start, size, type);
729         } while (biosmap++,--nr_map);
730         return 0;
731 }
732
733 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
734 struct edd edd;
735 #ifdef CONFIG_EDD_MODULE
736 EXPORT_SYMBOL(edd);
737 #endif
738 /**
739  * copy_edd() - Copy the BIOS EDD information
740  *              from boot_params into a safe place.
741  *
742  */
743 static inline void copy_edd(void)
744 {
745      memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
746      memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
747      edd.mbr_signature_nr = EDD_MBR_SIG_NR;
748      edd.edd_info_nr = EDD_NR;
749 }
750 #else
751 static inline void copy_edd(void)
752 {
753 }
754 #endif
755
756 static void __init parse_cmdline_early (char ** cmdline_p)
757 {
758         char c = ' ', *to = command_line, *from = saved_command_line;
759         int len = 0, max_cmdline;
760         int userdef = 0;
761
762         if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
763                 max_cmdline = COMMAND_LINE_SIZE;
764         memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
765         /* Save unparsed command line copy for /proc/cmdline */
766         saved_command_line[max_cmdline-1] = '\0';
767
768         for (;;) {
769                 if (c != ' ')
770                         goto next_char;
771                 /*
772                  * "mem=nopentium" disables the 4MB page tables.
773                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
774                  * to <mem>, overriding the bios size.
775                  * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
776                  * <start> to <start>+<mem>, overriding the bios size.
777                  *
778                  * HPA tells me bootloaders need to parse mem=, so no new
779                  * option should be mem=  [also see Documentation/i386/boot.txt]
780                  */
781                 if (!memcmp(from, "mem=", 4)) {
782                         if (to != command_line)
783                                 to--;
784                         if (!memcmp(from+4, "nopentium", 9)) {
785                                 from += 9+4;
786                                 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
787                                 disable_pse = 1;
788                         } else {
789                                 /* If the user specifies memory size, we
790                                  * limit the BIOS-provided memory map to
791                                  * that size. exactmap can be used to specify
792                                  * the exact map. mem=number can be used to
793                                  * trim the existing memory map.
794                                  */
795                                 unsigned long long mem_size;
796  
797                                 mem_size = memparse(from+4, &from);
798                                 limit_regions(mem_size);
799                                 userdef=1;
800                         }
801                 }
802
803                 else if (!memcmp(from, "memmap=", 7)) {
804                         if (to != command_line)
805                                 to--;
806                         if (!memcmp(from+7, "exactmap", 8)) {
807 #ifdef CONFIG_CRASH_DUMP
808                                 /* If we are doing a crash dump, we
809                                  * still need to know the real mem
810                                  * size before original memory map is
811                                  * reset.
812                                  */
813                                 find_max_pfn();
814                                 saved_max_pfn = max_pfn;
815 #endif
816                                 from += 8+7;
817                                 e820.nr_map = 0;
818                                 userdef = 1;
819                         } else {
820                                 /* If the user specifies memory size, we
821                                  * limit the BIOS-provided memory map to
822                                  * that size. exactmap can be used to specify
823                                  * the exact map. mem=number can be used to
824                                  * trim the existing memory map.
825                                  */
826                                 unsigned long long start_at, mem_size;
827  
828                                 mem_size = memparse(from+7, &from);
829                                 if (*from == '@') {
830                                         start_at = memparse(from+1, &from);
831                                         add_memory_region(start_at, mem_size, E820_RAM);
832                                 } else if (*from == '#') {
833                                         start_at = memparse(from+1, &from);
834                                         add_memory_region(start_at, mem_size, E820_ACPI);
835                                 } else if (*from == '$') {
836                                         start_at = memparse(from+1, &from);
837                                         add_memory_region(start_at, mem_size, E820_RESERVED);
838                                 } else {
839                                         limit_regions(mem_size);
840                                         userdef=1;
841                                 }
842                         }
843                 }
844
845                 else if (!memcmp(from, "noexec=", 7))
846                         noexec_setup(from + 7);
847
848
849 #ifdef  CONFIG_X86_MPPARSE
850                 /*
851                  * If the BIOS enumerates physical processors before logical,
852                  * maxcpus=N at enumeration-time can be used to disable HT.
853                  */
854                 else if (!memcmp(from, "maxcpus=", 8)) {
855                         extern unsigned int maxcpus;
856
857                         maxcpus = simple_strtoul(from + 8, NULL, 0);
858                 }
859 #endif
860
861 #ifdef CONFIG_ACPI
862                 /* "acpi=off" disables both ACPI table parsing and interpreter */
863                 else if (!memcmp(from, "acpi=off", 8)) {
864                         disable_acpi();
865                 }
866
867                 /* acpi=force to over-ride black-list */
868                 else if (!memcmp(from, "acpi=force", 10)) {
869                         acpi_force = 1;
870                         acpi_ht = 1;
871                         acpi_disabled = 0;
872                 }
873
874                 /* acpi=strict disables out-of-spec workarounds */
875                 else if (!memcmp(from, "acpi=strict", 11)) {
876                         acpi_strict = 1;
877                 }
878
879                 /* Limit ACPI just to boot-time to enable HT */
880                 else if (!memcmp(from, "acpi=ht", 7)) {
881                         if (!acpi_force)
882                                 disable_acpi();
883                         acpi_ht = 1;
884                 }
885                 
886                 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
887                 else if (!memcmp(from, "pci=noacpi", 10)) {
888                         acpi_disable_pci();
889                 }
890                 /* "acpi=noirq" disables ACPI interrupt routing */
891                 else if (!memcmp(from, "acpi=noirq", 10)) {
892                         acpi_noirq_set();
893                 }
894
895                 else if (!memcmp(from, "acpi_sci=edge", 13))
896                         acpi_sci_flags.trigger =  1;
897
898                 else if (!memcmp(from, "acpi_sci=level", 14))
899                         acpi_sci_flags.trigger = 3;
900
901                 else if (!memcmp(from, "acpi_sci=high", 13))
902                         acpi_sci_flags.polarity = 1;
903
904                 else if (!memcmp(from, "acpi_sci=low", 12))
905                         acpi_sci_flags.polarity = 3;
906
907 #ifdef CONFIG_X86_IO_APIC
908                 else if (!memcmp(from, "acpi_skip_timer_override", 24))
909                         acpi_skip_timer_override = 1;
910
911                 if (!memcmp(from, "disable_timer_pin_1", 19))
912                         disable_timer_pin_1 = 1;
913                 if (!memcmp(from, "enable_timer_pin_1", 18))
914                         disable_timer_pin_1 = -1;
915
916                 /* disable IO-APIC */
917                 else if (!memcmp(from, "noapic", 6))
918                         disable_ioapic_setup();
919 #endif /* CONFIG_X86_IO_APIC */
920 #endif /* CONFIG_ACPI */
921
922 #ifdef CONFIG_X86_LOCAL_APIC
923                 /* enable local APIC */
924                 else if (!memcmp(from, "lapic", 5))
925                         lapic_enable();
926
927                 /* disable local APIC */
928                 else if (!memcmp(from, "nolapic", 6))
929                         lapic_disable();
930 #endif /* CONFIG_X86_LOCAL_APIC */
931
932 #ifdef CONFIG_KEXEC
933                 /* crashkernel=size@addr specifies the location to reserve for
934                  * a crash kernel.  By reserving this memory we guarantee
935                  * that linux never set's it up as a DMA target.
936                  * Useful for holding code to do something appropriate
937                  * after a kernel panic.
938                  */
939                 else if (!memcmp(from, "crashkernel=", 12)) {
940                         unsigned long size, base;
941                         size = memparse(from+12, &from);
942                         if (*from == '@') {
943                                 base = memparse(from+1, &from);
944                                 /* FIXME: Do I want a sanity check
945                                  * to validate the memory range?
946                                  */
947                                 crashk_res.start = base;
948                                 crashk_res.end   = base + size - 1;
949                         }
950                 }
951 #endif
952 #ifdef CONFIG_PROC_VMCORE
953                 /* elfcorehdr= specifies the location of elf core header
954                  * stored by the crashed kernel.
955                  */
956                 else if (!memcmp(from, "elfcorehdr=", 11))
957                         elfcorehdr_addr = memparse(from+11, &from);
958 #endif
959
960                 /*
961                  * highmem=size forces highmem to be exactly 'size' bytes.
962                  * This works even on boxes that have no highmem otherwise.
963                  * This also works to reduce highmem size on bigger boxes.
964                  */
965                 else if (!memcmp(from, "highmem=", 8))
966                         highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
967         
968                 /*
969                  * vmalloc=size forces the vmalloc area to be exactly 'size'
970                  * bytes. This can be used to increase (or decrease) the
971                  * vmalloc area - the default is 128m.
972                  */
973                 else if (!memcmp(from, "vmalloc=", 8))
974                         __VMALLOC_RESERVE = memparse(from+8, &from);
975
976         next_char:
977                 c = *(from++);
978                 if (!c)
979                         break;
980                 if (COMMAND_LINE_SIZE <= ++len)
981                         break;
982                 *(to++) = c;
983         }
984         *to = '\0';
985         *cmdline_p = command_line;
986         if (userdef) {
987                 printk(KERN_INFO "user-defined physical RAM map:\n");
988                 print_memory_map("user");
989         }
990 }
991
992 /*
993  * Callback for efi_memory_walk.
994  */
995 static int __init
996 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
997 {
998         unsigned long *max_pfn = arg, pfn;
999
1000         if (start < end) {
1001                 pfn = PFN_UP(end -1);
1002                 if (pfn > *max_pfn)
1003                         *max_pfn = pfn;
1004         }
1005         return 0;
1006 }
1007
1008 static int __init
1009 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
1010 {
1011         memory_present(0, start, end);
1012         return 0;
1013 }
1014
1015  /*
1016   * This function checks if the entire range <start,end> is mapped with type.
1017   *
1018   * Note: this function only works correct if the e820 table is sorted and
1019   * not-overlapping, which is the case
1020   */
1021 int __init
1022 e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
1023 {
1024         u64 start = s;
1025         u64 end = e;
1026         int i;
1027         for (i = 0; i < e820.nr_map; i++) {
1028                 struct e820entry *ei = &e820.map[i];
1029                 if (type && ei->type != type)
1030                         continue;
1031                 /* is the region (part) in overlap with the current region ?*/
1032                 if (ei->addr >= end || ei->addr + ei->size <= start)
1033                         continue;
1034                 /* if the region is at the beginning of <start,end> we move
1035                  * start to the end of the region since it's ok until there
1036                  */
1037                 if (ei->addr <= start)
1038                         start = ei->addr + ei->size;
1039                 /* if start is now at or beyond end, we're done, full
1040                  * coverage */
1041                 if (start >= end)
1042                         return 1; /* we're done */
1043         }
1044         return 0;
1045 }
1046
1047 /*
1048  * Find the highest page frame number we have available
1049  */
1050 void __init find_max_pfn(void)
1051 {
1052         int i;
1053
1054         max_pfn = 0;
1055         if (efi_enabled) {
1056                 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
1057                 efi_memmap_walk(efi_memory_present_wrapper, NULL);
1058                 return;
1059         }
1060
1061         for (i = 0; i < e820.nr_map; i++) {
1062                 unsigned long start, end;
1063                 /* RAM? */
1064                 if (e820.map[i].type != E820_RAM)
1065                         continue;
1066                 start = PFN_UP(e820.map[i].addr);
1067                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1068                 if (start >= end)
1069                         continue;
1070                 if (end > max_pfn)
1071                         max_pfn = end;
1072                 memory_present(0, start, end);
1073         }
1074 }
1075
1076 /*
1077  * Determine low and high memory ranges:
1078  */
1079 unsigned long __init find_max_low_pfn(void)
1080 {
1081         unsigned long max_low_pfn;
1082
1083         max_low_pfn = max_pfn;
1084         if (max_low_pfn > MAXMEM_PFN) {
1085                 if (highmem_pages == -1)
1086                         highmem_pages = max_pfn - MAXMEM_PFN;
1087                 if (highmem_pages + MAXMEM_PFN < max_pfn)
1088                         max_pfn = MAXMEM_PFN + highmem_pages;
1089                 if (highmem_pages + MAXMEM_PFN > max_pfn) {
1090                         printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1091                         highmem_pages = 0;
1092                 }
1093                 max_low_pfn = MAXMEM_PFN;
1094 #ifndef CONFIG_HIGHMEM
1095                 /* Maximum memory usable is what is directly addressable */
1096                 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1097                                         MAXMEM>>20);
1098                 if (max_pfn > MAX_NONPAE_PFN)
1099                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1100                 else
1101                         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1102                 max_pfn = MAXMEM_PFN;
1103 #else /* !CONFIG_HIGHMEM */
1104 #ifndef CONFIG_X86_PAE
1105                 if (max_pfn > MAX_NONPAE_PFN) {
1106                         max_pfn = MAX_NONPAE_PFN;
1107                         printk(KERN_WARNING "Warning only 4GB will be used.\n");
1108                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1109                 }
1110 #endif /* !CONFIG_X86_PAE */
1111 #endif /* !CONFIG_HIGHMEM */
1112         } else {
1113                 if (highmem_pages == -1)
1114                         highmem_pages = 0;
1115 #ifdef CONFIG_HIGHMEM
1116                 if (highmem_pages >= max_pfn) {
1117                         printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1118                         highmem_pages = 0;
1119                 }
1120                 if (highmem_pages) {
1121                         if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1122                                 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1123                                 highmem_pages = 0;
1124                         }
1125                         max_low_pfn -= highmem_pages;
1126                 }
1127 #else
1128                 if (highmem_pages)
1129                         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1130 #endif
1131         }
1132         return max_low_pfn;
1133 }
1134
1135 /*
1136  * Free all available memory for boot time allocation.  Used
1137  * as a callback function by efi_memory_walk()
1138  */
1139
1140 static int __init
1141 free_available_memory(unsigned long start, unsigned long end, void *arg)
1142 {
1143         /* check max_low_pfn */
1144         if (start >= (max_low_pfn << PAGE_SHIFT))
1145                 return 0;
1146         if (end >= (max_low_pfn << PAGE_SHIFT))
1147                 end = max_low_pfn << PAGE_SHIFT;
1148         if (start < end)
1149                 free_bootmem(start, end - start);
1150
1151         return 0;
1152 }
1153 /*
1154  * Register fully available low RAM pages with the bootmem allocator.
1155  */
1156 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1157 {
1158         int i;
1159
1160         if (efi_enabled) {
1161                 efi_memmap_walk(free_available_memory, NULL);
1162                 return;
1163         }
1164         for (i = 0; i < e820.nr_map; i++) {
1165                 unsigned long curr_pfn, last_pfn, size;
1166                 /*
1167                  * Reserve usable low memory
1168                  */
1169                 if (e820.map[i].type != E820_RAM)
1170                         continue;
1171                 /*
1172                  * We are rounding up the start address of usable memory:
1173                  */
1174                 curr_pfn = PFN_UP(e820.map[i].addr);
1175                 if (curr_pfn >= max_low_pfn)
1176                         continue;
1177                 /*
1178                  * ... and at the end of the usable range downwards:
1179                  */
1180                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1181
1182 #ifdef CONFIG_XEN
1183                 /*
1184                  * Truncate to the number of actual pages currently
1185                  * present.
1186                  */
1187                 if (last_pfn > xen_start_info->nr_pages)
1188                         last_pfn = xen_start_info->nr_pages;
1189 #endif
1190
1191                 if (last_pfn > max_low_pfn)
1192                         last_pfn = max_low_pfn;
1193
1194                 /*
1195                  * .. finally, did all the rounding and playing
1196                  * around just make the area go away?
1197                  */
1198                 if (last_pfn <= curr_pfn)
1199                         continue;
1200
1201                 size = last_pfn - curr_pfn;
1202                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1203         }
1204 }
1205
1206 #ifndef CONFIG_XEN
1207 /*
1208  * workaround for Dell systems that neglect to reserve EBDA
1209  */
1210 static void __init reserve_ebda_region(void)
1211 {
1212         unsigned int addr;
1213         addr = get_bios_ebda();
1214         if (addr)
1215                 reserve_bootmem(addr, PAGE_SIZE);       
1216 }
1217 #endif
1218
1219 #ifndef CONFIG_NEED_MULTIPLE_NODES
1220 void __init setup_bootmem_allocator(void);
1221 static unsigned long __init setup_memory(void)
1222 {
1223         /*
1224          * partially used pages are not usable - thus
1225          * we are rounding upwards:
1226          */
1227         min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
1228                 xen_start_info->nr_pt_frames;
1229
1230         find_max_pfn();
1231
1232         max_low_pfn = find_max_low_pfn();
1233
1234 #ifdef CONFIG_HIGHMEM
1235         highstart_pfn = highend_pfn = max_pfn;
1236         if (max_pfn > max_low_pfn) {
1237                 highstart_pfn = max_low_pfn;
1238         }
1239         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1240                 pages_to_mb(highend_pfn - highstart_pfn));
1241 #endif
1242         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1243                         pages_to_mb(max_low_pfn));
1244
1245         setup_bootmem_allocator();
1246
1247         return max_low_pfn;
1248 }
1249
1250 void __init zone_sizes_init(void)
1251 {
1252         unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1253         unsigned int max_dma, low;
1254
1255         /*
1256          * XEN: Our notion of "DMA memory" is fake when running over Xen.
1257          * We simply put all RAM in the DMA zone so that those drivers which
1258          * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
1259          * Those drivers that *do* require lowmem are screwed anyway when
1260          * running over Xen!
1261          */
1262         max_dma = max_low_pfn;
1263         low = max_low_pfn;
1264
1265         if (low < max_dma)
1266                 zones_size[ZONE_DMA] = low;
1267         else {
1268                 zones_size[ZONE_DMA] = max_dma;
1269                 zones_size[ZONE_NORMAL] = low - max_dma;
1270 #ifdef CONFIG_HIGHMEM
1271                 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1272 #endif
1273         }
1274         free_area_init(zones_size);
1275 }
1276 #else
1277 extern unsigned long __init setup_memory(void);
1278 extern void zone_sizes_init(void);
1279 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1280
1281 void __init setup_bootmem_allocator(void)
1282 {
1283         unsigned long bootmap_size;
1284         /*
1285          * Initialize the boot-time allocator (with low memory only):
1286          */
1287         bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1288
1289         register_bootmem_low_pages(max_low_pfn);
1290
1291         /*
1292          * Reserve the bootmem bitmap itself as well. We do this in two
1293          * steps (first step was init_bootmem()) because this catches
1294          * the (very unlikely) case of us accidentally initializing the
1295          * bootmem allocator with an invalid RAM area.
1296          */
1297         reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1298                          bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1299
1300 #ifndef CONFIG_XEN
1301         /*
1302          * reserve physical page 0 - it's a special BIOS page on many boxes,
1303          * enabling clean reboots, SMP operation, laptop functions.
1304          */
1305         reserve_bootmem(0, PAGE_SIZE);
1306
1307         /* reserve EBDA region, it's a 4K region */
1308         reserve_ebda_region();
1309
1310     /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
1311        PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1312        unless you have no PS/2 mouse plugged in. */
1313         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1314             boot_cpu_data.x86 == 6)
1315              reserve_bootmem(0xa0000 - 4096, 4096);
1316
1317 #ifdef CONFIG_SMP
1318         /*
1319          * But first pinch a few for the stack/trampoline stuff
1320          * FIXME: Don't need the extra page at 4K, but need to fix
1321          * trampoline before removing it. (see the GDT stuff)
1322          */
1323         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1324 #endif
1325 #ifdef CONFIG_ACPI_SLEEP
1326         /*
1327          * Reserve low memory region for sleep support.
1328          */
1329         acpi_reserve_bootmem();
1330 #endif
1331 #endif /* !CONFIG_XEN */
1332
1333 #ifdef CONFIG_BLK_DEV_INITRD
1334         if (xen_start_info->mod_start) {
1335                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1336                         /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1337                         initrd_start = INITRD_START + PAGE_OFFSET;
1338                         initrd_end = initrd_start+INITRD_SIZE;
1339                         initrd_below_start_ok = 1;
1340                 }
1341                 else {
1342                         printk(KERN_ERR "initrd extends beyond end of memory "
1343                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1344                             INITRD_START + INITRD_SIZE,
1345                             max_low_pfn << PAGE_SHIFT);
1346                         initrd_start = 0;
1347                 }
1348         }
1349 #endif
1350 #ifdef CONFIG_KEXEC
1351         if (crashk_res.start != crashk_res.end)
1352                 reserve_bootmem(crashk_res.start,
1353                         crashk_res.end - crashk_res.start + 1);
1354 #endif
1355
1356         if (!xen_feature(XENFEAT_auto_translated_physmap))
1357                 phys_to_machine_mapping =
1358                         (unsigned long *)xen_start_info->mfn_list;
1359 }
1360
1361 /*
1362  * The node 0 pgdat is initialized before all of these because
1363  * it's needed for bootmem.  node>0 pgdats have their virtual
1364  * space allocated before the pagetables are in place to access
1365  * them, so they can't be cleared then.
1366  *
1367  * This should all compile down to nothing when NUMA is off.
1368  */
1369 void __init remapped_pgdat_init(void)
1370 {
1371         int nid;
1372
1373         for_each_online_node(nid) {
1374                 if (nid != 0)
1375                         memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1376         }
1377 }
1378
1379 /*
1380  * Request address space for all standard RAM and ROM resources
1381  * and also for regions reported as reserved by the e820.
1382  */
1383 static void __init
1384 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1385 {
1386         int i;
1387         struct e820entry *map = e820.map;
1388         int nr_map = e820.nr_map;
1389 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1390         struct xen_memory_map memmap;
1391
1392         map = machine_e820.map;
1393         memmap.nr_entries = E820MAX;
1394
1395         set_xen_guest_handle(memmap.buffer, map);
1396
1397         if(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1398                 BUG();
1399         machine_e820.nr_map = memmap.nr_entries;
1400         nr_map = memmap.nr_entries;
1401         e820_setup_gap(map, memmap.nr_entries);
1402 #endif
1403
1404         probe_roms();
1405
1406         for (i = 0; i < nr_map; i++) {
1407                 struct resource *res;
1408                 if (map[i].addr + map[i].size > 0x100000000ULL)
1409                         continue;
1410                 res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
1411                 switch (map[i].type) {
1412                 case E820_RAM:  res->name = "System RAM"; break;
1413                 case E820_ACPI: res->name = "ACPI Tables"; break;
1414                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
1415                 default:        res->name = "reserved";
1416                 }
1417                 res->start = map[i].addr;
1418                 res->end = res->start + map[i].size - 1;
1419                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1420                 if (request_resource(&iomem_resource, res)) {
1421                         kfree(res);
1422                         continue;
1423                 }
1424                 if (map[i].type == E820_RAM) {
1425                         /*
1426                          *  We don't know which RAM region contains kernel data,
1427                          *  so we try it repeatedly and let the resource manager
1428                          *  test it.
1429                          */
1430 #ifndef CONFIG_XEN
1431                         request_resource(res, code_resource);
1432                         request_resource(res, data_resource);
1433 #endif
1434 #ifdef CONFIG_KEXEC
1435                         request_resource(res, &crashk_res);
1436 #endif
1437                 }
1438         }
1439 }
1440
1441 /*
1442  * Request address space for all standard resources
1443  *
1444  * This is called just before pcibios_init(), which is also a
1445  * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
1446  */
1447 static int __init request_standard_resources(void)
1448 {
1449         int i;
1450
1451         /* Nothing to do if not running in dom0. */
1452         if (!is_initial_xendomain())
1453                 return 0;
1454
1455         printk("Setting up standard PCI resources\n");
1456         if (efi_enabled)
1457                 efi_initialize_iomem_resources(&code_resource, &data_resource);
1458         else
1459                 legacy_init_iomem_resources(&code_resource, &data_resource);
1460
1461         /* EFI systems may still have VGA */
1462         request_resource(&iomem_resource, &video_ram_resource);
1463
1464         /* request I/O space for devices used on all i[345]86 PCs */
1465         for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1466                 request_resource(&ioport_resource, &standard_io_resources[i]);
1467         return 0;
1468 }
1469
1470 subsys_initcall(request_standard_resources);
1471
1472 /*
1473  * Locate a unused range of the physical address space below 4G which
1474  * can be used for PCI mappings.
1475  */
1476 static void __init
1477 e820_setup_gap(struct e820entry *e820, int nr_map)
1478 {
1479         unsigned long gapstart, gapsize, round;
1480         unsigned long long last;
1481         int i;
1482
1483         /*
1484          * Search for the bigest gap in the low 32 bits of the e820
1485          * memory space.
1486          */
1487         last = 0x100000000ull;
1488         gapstart = 0x10000000;
1489         gapsize = 0x400000;
1490         i = nr_map;
1491         while (--i >= 0) {
1492                 unsigned long long start = e820[i].addr;
1493                 unsigned long long end = start + e820[i].size;
1494
1495                 /*
1496                  * Since "last" is at most 4GB, we know we'll
1497                  * fit in 32 bits if this condition is true
1498                  */
1499                 if (last > end) {
1500                         unsigned long gap = last - end;
1501
1502                         if (gap > gapsize) {
1503                                 gapsize = gap;
1504                                 gapstart = end;
1505                         }
1506                 }
1507                 if (start < last)
1508                         last = start;
1509         }
1510
1511         /*
1512          * See how much we want to round up: start off with
1513          * rounding to the next 1MB area.
1514          */
1515         round = 0x100000;
1516         while ((gapsize >> 4) > round)
1517                 round += round;
1518         /* Fun with two's complement */
1519         pci_mem_start = (gapstart + round) & -round;
1520
1521         printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1522                 pci_mem_start, gapstart, gapsize);
1523 }
1524
1525 static void __init register_memory(void)
1526 {
1527 #ifndef CONFIG_XEN
1528         e820_setup_gap(e820.map, e820.nr_map);
1529 #endif
1530 }
1531
1532 #ifdef CONFIG_MCA
1533 static void set_mca_bus(int x)
1534 {
1535         MCA_bus = x;
1536 }
1537 #else
1538 static void set_mca_bus(int x) { }
1539 #endif
1540
1541 /*
1542  * Determine if we were loaded by an EFI loader.  If so, then we have also been
1543  * passed the efi memmap, systab, etc., so we should use these data structures
1544  * for initialization.  Note, the efi init code path is determined by the
1545  * global efi_enabled. This allows the same kernel image to be used on existing
1546  * systems (with a traditional BIOS) as well as on EFI systems.
1547  */
1548 void __init setup_arch(char **cmdline_p)
1549 {
1550         int i, j, k, fpp;
1551         struct physdev_set_iopl set_iopl;
1552         unsigned long max_low_pfn;
1553
1554         /* Force a quick death if the kernel panics (not domain 0). */
1555         extern int panic_timeout;
1556         if (!panic_timeout && !is_initial_xendomain())
1557                 panic_timeout = 1;
1558
1559         /* Register a call for panic conditions. */
1560         atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1561
1562         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
1563         HYPERVISOR_vm_assist(VMASST_CMD_enable,
1564                              VMASST_TYPE_writable_pagetables);
1565
1566         memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1567         pre_setup_arch_hook();
1568         early_cpu_init();
1569
1570         /*
1571          * FIXME: This isn't an official loader_type right
1572          * now but does currently work with elilo.
1573          * If we were configured as an EFI kernel, check to make
1574          * sure that we were loaded correctly from elilo and that
1575          * the system table is valid.  If not, then initialize normally.
1576          */
1577 #ifdef CONFIG_EFI
1578         if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1579                 efi_enabled = 1;
1580 #endif
1581
1582         /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1583            properly.  Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1584         */
1585         ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1586         drive_info = DRIVE_INFO;
1587         screen_info = SCREEN_INFO;
1588         edid_info = EDID_INFO;
1589         apm_info.bios = APM_BIOS_INFO;
1590         ist_info = IST_INFO;
1591         saved_videomode = VIDEO_MODE;
1592         if( SYS_DESC_TABLE.length != 0 ) {
1593                 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1594                 machine_id = SYS_DESC_TABLE.table[0];
1595                 machine_submodel_id = SYS_DESC_TABLE.table[1];
1596                 BIOS_revision = SYS_DESC_TABLE.table[2];
1597         }
1598         bootloader_type = LOADER_TYPE;
1599
1600         if (is_initial_xendomain()) {
1601                 /* This is drawn from a dump from vgacon:startup in
1602                  * standard Linux. */
1603                 screen_info.orig_video_mode = 3; 
1604                 screen_info.orig_video_isVGA = 1;
1605                 screen_info.orig_video_lines = 25;
1606                 screen_info.orig_video_cols = 80;
1607                 screen_info.orig_video_ega_bx = 3;
1608                 screen_info.orig_video_points = 16;
1609                 screen_info.orig_y = screen_info.orig_video_lines - 1;
1610                 if (xen_start_info->console.dom0.info_size >=
1611                     sizeof(struct dom0_vga_console_info)) {
1612                         const struct dom0_vga_console_info *info =
1613                                 (struct dom0_vga_console_info *)(
1614                                         (char *)xen_start_info +
1615                                         xen_start_info->console.dom0.info_off);
1616                         dom0_init_screen_info(info);
1617                 }
1618                 xen_start_info->console.domU.mfn = 0;
1619                 xen_start_info->console.domU.evtchn = 0;
1620         } else
1621                 screen_info.orig_video_isVGA = 0;
1622
1623 #ifdef CONFIG_BLK_DEV_RAM
1624         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1625         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1626         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1627 #endif
1628
1629         setup_xen_features();
1630
1631         ARCH_SETUP
1632         if (efi_enabled)
1633                 efi_init();
1634         else {
1635                 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1636                 print_memory_map(machine_specific_memory_setup());
1637         }
1638
1639         copy_edd();
1640
1641         if (!MOUNT_ROOT_RDONLY)
1642                 root_mountflags &= ~MS_RDONLY;
1643         init_mm.start_code = (unsigned long) _text;
1644         init_mm.end_code = (unsigned long) _etext;
1645         init_mm.end_data = (unsigned long) _edata;
1646         init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
1647                        xen_start_info->nr_pt_frames) << PAGE_SHIFT;
1648
1649         code_resource.start = virt_to_phys(_text);
1650         code_resource.end = virt_to_phys(_etext)-1;
1651         data_resource.start = virt_to_phys(_etext);
1652         data_resource.end = virt_to_phys(_edata)-1;
1653
1654         parse_cmdline_early(cmdline_p);
1655
1656 #ifdef CONFIG_EARLY_PRINTK
1657         {
1658                 char *s = strstr(*cmdline_p, "earlyprintk=");
1659                 if (s) {
1660                         setup_early_printk(strchr(s, '=') + 1);
1661                         printk("early console enabled\n");
1662                 }
1663         }
1664 #endif
1665
1666         max_low_pfn = setup_memory();
1667
1668         /*
1669          * NOTE: before this point _nobody_ is allowed to allocate
1670          * any memory using the bootmem allocator.  Although the
1671          * alloctor is now initialised only the first 8Mb of the kernel
1672          * virtual address space has been mapped.  All allocations before
1673          * paging_init() has completed must use the alloc_bootmem_low_pages()
1674          * variant (which allocates DMA'able memory) and care must be taken
1675          * not to exceed the 8Mb limit.
1676          */
1677
1678 #ifdef CONFIG_SMP
1679         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1680 #endif
1681         paging_init();
1682         remapped_pgdat_init();
1683         sparse_init();
1684         zone_sizes_init();
1685
1686 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1687         /*
1688          * Find and reserve possible boot-time SMP configuration:
1689          */
1690         find_smp_config();
1691 #endif
1692
1693         /* Make sure we have a correctly sized P->M table. */
1694         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1695                 phys_to_machine_mapping = alloc_bootmem_low_pages(
1696                      max_pfn * sizeof(unsigned long));
1697                 memset(phys_to_machine_mapping, ~0,
1698                        max_pfn * sizeof(unsigned long));
1699                 memcpy(phys_to_machine_mapping,
1700                        (unsigned long *)xen_start_info->mfn_list,
1701                        xen_start_info->nr_pages * sizeof(unsigned long));
1702                 free_bootmem(
1703                      __pa(xen_start_info->mfn_list),
1704                      PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
1705                                      sizeof(unsigned long))));
1706
1707                 /*
1708                  * Initialise the list of the frames that specify the list of
1709                  * frames that make up the p2m table. Used by save/restore
1710                  */
1711                 pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
1712                 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
1713                      virt_to_mfn(pfn_to_mfn_frame_list_list);
1714
1715                 fpp = PAGE_SIZE/sizeof(unsigned long);
1716                 for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) {
1717                         if ((j % fpp) == 0) {
1718                                 k++;
1719                                 BUG_ON(k>=16);
1720                                 pfn_to_mfn_frame_list[k] =
1721                                         alloc_bootmem_low_pages(PAGE_SIZE);
1722                                 pfn_to_mfn_frame_list_list[k] =
1723                                         virt_to_mfn(pfn_to_mfn_frame_list[k]);
1724                                 j=0;
1725                         }
1726                         pfn_to_mfn_frame_list[k][j] =
1727                                 virt_to_mfn(&phys_to_machine_mapping[i]);
1728                 }
1729                 HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
1730         }
1731
1732         /*
1733          * NOTE: at this point the bootmem allocator is fully available.
1734          */
1735
1736         if (is_initial_xendomain())
1737                 dmi_scan_machine();
1738
1739 #ifdef CONFIG_X86_GENERICARCH
1740         generic_apic_probe(*cmdline_p);
1741 #endif  
1742         if (efi_enabled)
1743                 efi_map_memmap();
1744
1745         set_iopl.iopl = 1;
1746         HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1747
1748 #ifdef CONFIG_ACPI
1749         if (!is_initial_xendomain()) {
1750                 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
1751                 acpi_disabled = 1;
1752                 acpi_ht = 0;
1753         }
1754
1755         /*
1756          * Parse the ACPI tables for possible boot-time SMP configuration.
1757          */
1758         acpi_boot_table_init();
1759 #endif
1760
1761 #ifdef CONFIG_X86_IO_APIC
1762         check_acpi_pci();       /* Checks more than just ACPI actually */
1763 #endif
1764
1765 #ifdef CONFIG_ACPI
1766         acpi_boot_init();
1767
1768 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1769         if (def_to_bigsmp)
1770                 printk(KERN_WARNING "More than 8 CPUs detected and "
1771                         "CONFIG_X86_PC cannot handle it.\nUse "
1772                         "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1773 #endif
1774 #endif
1775 #ifdef CONFIG_X86_LOCAL_APIC
1776         if (smp_found_config)
1777                 get_smp_config();
1778 #endif
1779 #if defined(CONFIG_XEN) && defined(CONFIG_SMP)
1780         prefill_possible_map();
1781 #endif
1782
1783         register_memory();
1784
1785         if (is_initial_xendomain()) {
1786 #ifdef CONFIG_VT
1787 #if defined(CONFIG_VGA_CONSOLE)
1788                 if (!efi_enabled ||
1789                     (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1790                         conswitchp = &vga_con;
1791 #elif defined(CONFIG_DUMMY_CONSOLE)
1792                 conswitchp = &dummy_con;
1793 #endif
1794 #endif
1795         } else {
1796 #if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
1797                 conswitchp = &dummy_con;
1798 #endif
1799         }
1800 #ifdef CONFIG_X86_TSC
1801         tsc_init();
1802 #endif
1803 }
1804
1805 static int
1806 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1807 {
1808         HYPERVISOR_shutdown(SHUTDOWN_crash);
1809         /* we're never actually going to get here... */
1810         return NOTIFY_DONE;
1811 }
1812
1813 static __init int add_pcspkr(void)
1814 {
1815         struct platform_device *pd;
1816         int ret;
1817
1818         pd = platform_device_alloc("pcspkr", -1);
1819         if (!pd)
1820                 return -ENOMEM;
1821
1822         ret = platform_device_add(pd);
1823         if (ret)
1824                 platform_device_put(pd);
1825
1826         return ret;
1827 }
1828 device_initcall(add_pcspkr);
1829
1830 /*
1831  * Local Variables:
1832  * mode:c
1833  * c-file-style:"k&r"
1834  * c-basic-offset:8
1835  * End:
1836  */