Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / arch / i386 / kernel / setup-xen.c
1 /*
2  *  linux/arch/i386/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *
8  *  Memory region support
9  *      David Parsons <orc@pell.chi.il.us>, July-August 1999
10  *
11  *  Added E820 sanitization routine (removes overlapping memory regions);
12  *  Brian Moyle <bmoyle@mvista.com>, February 2001
13  *
14  * Moved CPU detection code to cpu/${cpu}.c
15  *    Patrick Mochel <mochel@osdl.org>, March 2002
16  *
17  *  Provisions for empty E820 memory regions (reported by certain BIOSes).
18  *  Alex Achenbach <xela@slit.de>, December 2002.
19  *
20  */
21
22 /*
23  * This file handles the architecture-dependent parts of initialization
24  */
25
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/mmzone.h>
29 #include <linux/screen_info.h>
30 #include <linux/ioport.h>
31 #include <linux/acpi.h>
32 #include <linux/apm_bios.h>
33 #include <linux/initrd.h>
34 #include <linux/bootmem.h>
35 #include <linux/seq_file.h>
36 #include <linux/platform_device.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kexec.h>
47 #include <linux/crash_dump.h>
48 #include <linux/dmi.h>
49 #include <linux/pfn.h>
50
51 #include <video/edid.h>
52
53 #include <asm/apic.h>
54 #include <asm/e820.h>
55 #include <asm/mpspec.h>
56 #include <asm/setup.h>
57 #include <asm/arch_hooks.h>
58 #include <asm/sections.h>
59 #include <asm/io_apic.h>
60 #include <asm/ist.h>
61 #include <asm/io.h>
62 #include <asm/hypervisor.h>
63 #include <xen/interface/physdev.h>
64 #include <xen/interface/memory.h>
65 #include <xen/features.h>
66 #include <xen/xencons.h>
67 #include "setup_arch.h"
68 #include <bios_ebda.h>
69
70 /* Forward Declaration. */
71 void __init find_max_pfn(void);
72
73 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
74 static struct notifier_block xen_panic_block = {
75         xen_panic_event, NULL, 0 /* try to go last */
76 };
77
78 extern char hypercall_page[PAGE_SIZE];
79 EXPORT_SYMBOL(hypercall_page);
80
81 int disable_pse __devinitdata = 0;
82
83 /*
84  * Machine setup..
85  */
86
87 #ifdef CONFIG_EFI
88 int efi_enabled = 0;
89 EXPORT_SYMBOL(efi_enabled);
90 #endif
91
92 /* cpu data as detected by the assembly code in head.S */
93 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
94 /* common cpu data for all cpus */
95 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
96 EXPORT_SYMBOL(boot_cpu_data);
97
98 unsigned long mmu_cr4_features;
99
100 #ifdef  CONFIG_ACPI
101         int acpi_disabled = 0;
102 #else
103         int acpi_disabled = 1;
104 #endif
105 EXPORT_SYMBOL(acpi_disabled);
106
107 #ifdef  CONFIG_ACPI
108 int __initdata acpi_force = 0;
109 extern acpi_interrupt_flags     acpi_sci_flags;
110 #endif
111
112 /* for MCA, but anyone else can use it if they want */
113 unsigned int machine_id;
114 #ifdef CONFIG_MCA
115 EXPORT_SYMBOL(machine_id);
116 #endif
117 unsigned int machine_submodel_id;
118 unsigned int BIOS_revision;
119 unsigned int mca_pentium_flag;
120
121 /* For PCI or other memory-mapped resources */
122 unsigned long pci_mem_start = 0x10000000;
123 #ifdef CONFIG_PCI
124 EXPORT_SYMBOL(pci_mem_start);
125 #endif
126
127 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
128 int bootloader_type;
129
130 /* user-defined highmem size */
131 static unsigned int highmem_pages = -1;
132
133 /*
134  * Setup options
135  */
136 struct drive_info_struct { char dummy[32]; } drive_info;
137 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
138     defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
139 EXPORT_SYMBOL(drive_info);
140 #endif
141 struct screen_info screen_info;
142 EXPORT_SYMBOL(screen_info);
143 struct apm_info apm_info;
144 EXPORT_SYMBOL(apm_info);
145 struct sys_desc_table_struct {
146         unsigned short length;
147         unsigned char table[0];
148 };
149 struct edid_info edid_info;
150 EXPORT_SYMBOL_GPL(edid_info);
151 struct ist_info ist_info;
152 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
153         defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
154 EXPORT_SYMBOL(ist_info);
155 #endif
156 struct e820map e820;
157 static void __init e820_setup_gap(struct e820entry *e820, int nr_map);
158 #ifdef CONFIG_XEN
159 struct e820map machine_e820;
160 #endif
161
162 extern void early_cpu_init(void);
163 extern void generic_apic_probe(char *);
164 extern int root_mountflags;
165
166 unsigned long saved_videomode;
167
168 #define RAMDISK_IMAGE_START_MASK        0x07FF
169 #define RAMDISK_PROMPT_FLAG             0x8000
170 #define RAMDISK_LOAD_FLAG               0x4000  
171
172 static char command_line[COMMAND_LINE_SIZE];
173
174 unsigned char __initdata boot_params[PARAM_SIZE];
175
176 static struct resource data_resource = {
177         .name   = "Kernel data",
178         .start  = 0,
179         .end    = 0,
180         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
181 };
182
183 static struct resource code_resource = {
184         .name   = "Kernel code",
185         .start  = 0,
186         .end    = 0,
187         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
188 };
189
190 static struct resource system_rom_resource = {
191         .name   = "System ROM",
192         .start  = 0xf0000,
193         .end    = 0xfffff,
194         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
195 };
196
197 static struct resource extension_rom_resource = {
198         .name   = "Extension ROM",
199         .start  = 0xe0000,
200         .end    = 0xeffff,
201         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
202 };
203
204 static struct resource adapter_rom_resources[] = { {
205         .name   = "Adapter ROM",
206         .start  = 0xc8000,
207         .end    = 0,
208         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
209 }, {
210         .name   = "Adapter ROM",
211         .start  = 0,
212         .end    = 0,
213         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
214 }, {
215         .name   = "Adapter ROM",
216         .start  = 0,
217         .end    = 0,
218         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
219 }, {
220         .name   = "Adapter ROM",
221         .start  = 0,
222         .end    = 0,
223         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
224 }, {
225         .name   = "Adapter ROM",
226         .start  = 0,
227         .end    = 0,
228         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
229 }, {
230         .name   = "Adapter ROM",
231         .start  = 0,
232         .end    = 0,
233         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
234 } };
235
236 #define ADAPTER_ROM_RESOURCES \
237         (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
238
239 static struct resource video_rom_resource = {
240         .name   = "Video ROM",
241         .start  = 0xc0000,
242         .end    = 0xc7fff,
243         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
244 };
245
246 static struct resource video_ram_resource = {
247         .name   = "Video RAM area",
248         .start  = 0xa0000,
249         .end    = 0xbffff,
250         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
251 };
252
253 static struct resource standard_io_resources[] = { {
254         .name   = "dma1",
255         .start  = 0x0000,
256         .end    = 0x001f,
257         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
258 }, {
259         .name   = "pic1",
260         .start  = 0x0020,
261         .end    = 0x0021,
262         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
263 }, {
264         .name   = "timer0",
265         .start  = 0x0040,
266         .end    = 0x0043,
267         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
268 }, {
269         .name   = "timer1",
270         .start  = 0x0050,
271         .end    = 0x0053,
272         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
273 }, {
274         .name   = "keyboard",
275         .start  = 0x0060,
276         .end    = 0x006f,
277         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
278 }, {
279         .name   = "dma page reg",
280         .start  = 0x0080,
281         .end    = 0x008f,
282         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
283 }, {
284         .name   = "pic2",
285         .start  = 0x00a0,
286         .end    = 0x00a1,
287         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
288 }, {
289         .name   = "dma2",
290         .start  = 0x00c0,
291         .end    = 0x00df,
292         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
293 }, {
294         .name   = "fpu",
295         .start  = 0x00f0,
296         .end    = 0x00ff,
297         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
298 } };
299
300 #define STANDARD_IO_RESOURCES \
301         (sizeof standard_io_resources / sizeof standard_io_resources[0])
302
303 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
304
305 static int __init romchecksum(unsigned char *rom, unsigned long length)
306 {
307         unsigned char *p, sum = 0;
308
309         for (p = rom; p < rom + length; p++)
310                 sum += *p;
311         return sum == 0;
312 }
313
314 static void __init probe_roms(void)
315 {
316         unsigned long start, length, upper;
317         unsigned char *rom;
318         int           i;
319
320 #ifdef CONFIG_XEN
321         /* Nothing to do if not running in dom0. */
322         if (!is_initial_xendomain())
323                 return;
324 #endif
325
326         /* video rom */
327         upper = adapter_rom_resources[0].start;
328         for (start = video_rom_resource.start; start < upper; start += 2048) {
329                 rom = isa_bus_to_virt(start);
330                 if (!romsignature(rom))
331                         continue;
332
333                 video_rom_resource.start = start;
334
335                 /* 0 < length <= 0x7f * 512, historically */
336                 length = rom[2] * 512;
337
338                 /* if checksum okay, trust length byte */
339                 if (length && romchecksum(rom, length))
340                         video_rom_resource.end = start + length - 1;
341                 break;
342         }
343
344         start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
345         if (start < upper)
346                 start = upper;
347
348         /* system rom */
349         request_resource(&iomem_resource, &system_rom_resource);
350         upper = system_rom_resource.start;
351
352         /* check for extension rom (ignore length byte!) */
353         rom = isa_bus_to_virt(extension_rom_resource.start);
354         if (romsignature(rom)) {
355                 length = extension_rom_resource.end - extension_rom_resource.start + 1;
356                 if (romchecksum(rom, length)) {
357                         request_resource(&iomem_resource, &extension_rom_resource);
358                         upper = extension_rom_resource.start;
359                 }
360         }
361
362         /* check for adapter roms on 2k boundaries */
363         for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
364                 rom = isa_bus_to_virt(start);
365                 if (!romsignature(rom))
366                         continue;
367
368                 /* 0 < length <= 0x7f * 512, historically */
369                 length = rom[2] * 512;
370
371                 /* but accept any length that fits if checksum okay */
372                 if (!length || start + length > upper || !romchecksum(rom, length))
373                         continue;
374
375                 adapter_rom_resources[i].start = start;
376                 adapter_rom_resources[i].end = start + length - 1;
377                 request_resource(&iomem_resource, &adapter_rom_resources[i]);
378
379                 start = adapter_rom_resources[i++].end & ~2047UL;
380         }
381 }
382
383 /*
384  * Point at the empty zero page to start with. We map the real shared_info
385  * page as soon as fixmap is up and running.
386  */
387 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
388 EXPORT_SYMBOL(HYPERVISOR_shared_info);
389
390 unsigned long *phys_to_machine_mapping;
391 unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
392 EXPORT_SYMBOL(phys_to_machine_mapping);
393
394 /* Raw start-of-day parameters from the hypervisor. */
395 start_info_t *xen_start_info;
396 EXPORT_SYMBOL(xen_start_info);
397
398 void __init add_memory_region(unsigned long long start,
399                                   unsigned long long size, int type)
400 {
401         int x;
402
403         if (!efi_enabled) {
404                 x = e820.nr_map;
405
406                 if (x == E820MAX) {
407                     printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
408                     return;
409                 }
410
411                 e820.map[x].addr = start;
412                 e820.map[x].size = size;
413                 e820.map[x].type = type;
414                 e820.nr_map++;
415         }
416 } /* add_memory_region */
417
418 static void __init limit_regions(unsigned long long size)
419 {
420         unsigned long long current_addr = 0;
421         int i;
422
423         if (efi_enabled) {
424                 efi_memory_desc_t *md;
425                 void *p;
426
427                 for (p = memmap.map, i = 0; p < memmap.map_end;
428                         p += memmap.desc_size, i++) {
429                         md = p;
430                         current_addr = md->phys_addr + (md->num_pages << 12);
431                         if (md->type == EFI_CONVENTIONAL_MEMORY) {
432                                 if (current_addr >= size) {
433                                         md->num_pages -=
434                                                 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
435                                         memmap.nr_map = i + 1;
436                                         return;
437                                 }
438                         }
439                 }
440         }
441         for (i = 0; i < e820.nr_map; i++) {
442                 current_addr = e820.map[i].addr + e820.map[i].size;
443                 if (current_addr < size)
444                         continue;
445
446                 if (e820.map[i].type != E820_RAM)
447                         continue;
448
449                 if (e820.map[i].addr >= size) {
450                         /*
451                          * This region starts past the end of the
452                          * requested size, skip it completely.
453                          */
454                         e820.nr_map = i;
455                 } else {
456                         e820.nr_map = i + 1;
457                         e820.map[i].size -= current_addr - size;
458                 }
459                 return;
460         }
461 #ifdef CONFIG_XEN
462         if (i==e820.nr_map && current_addr < size) {
463                 /*
464                  * The e820 map finished before our requested size so
465                  * extend the final entry to the requested address.
466                  */
467                 --i;
468                 if (e820.map[i].type == E820_RAM)
469                         e820.map[i].size -= current_addr - size;
470                 else
471                         add_memory_region(current_addr, size - current_addr, E820_RAM);
472         }
473 #endif
474 }
475
476 #define E820_DEBUG      1
477
478 static void __init print_memory_map(char *who)
479 {
480         int i;
481
482         for (i = 0; i < e820.nr_map; i++) {
483                 printk(" %s: %016Lx - %016Lx ", who,
484                         e820.map[i].addr,
485                         e820.map[i].addr + e820.map[i].size);
486                 switch (e820.map[i].type) {
487                 case E820_RAM:  printk("(usable)\n");
488                                 break;
489                 case E820_RESERVED:
490                                 printk("(reserved)\n");
491                                 break;
492                 case E820_ACPI:
493                                 printk("(ACPI data)\n");
494                                 break;
495                 case E820_NVS:
496                                 printk("(ACPI NVS)\n");
497                                 break;
498                 default:        printk("type %lu\n", e820.map[i].type);
499                                 break;
500                 }
501         }
502 }
503
504 /*
505  * Sanitize the BIOS e820 map.
506  *
507  * Some e820 responses include overlapping entries.  The following 
508  * replaces the original e820 map with a new one, removing overlaps.
509  *
510  */
511 struct change_member {
512         struct e820entry *pbios; /* pointer to original bios entry */
513         unsigned long long addr; /* address for this change point */
514 };
515 static struct change_member change_point_list[2*E820MAX] __initdata;
516 static struct change_member *change_point[2*E820MAX] __initdata;
517 static struct e820entry *overlap_list[E820MAX] __initdata;
518 static struct e820entry new_bios[E820MAX] __initdata;
519
520 int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
521 {
522         struct change_member *change_tmp;
523         unsigned long current_type, last_type;
524         unsigned long long last_addr;
525         int chgidx, still_changing;
526         int overlap_entries;
527         int new_bios_entry;
528         int old_nr, new_nr, chg_nr;
529         int i;
530
531         /*
532                 Visually we're performing the following (1,2,3,4 = memory types)...
533
534                 Sample memory map (w/overlaps):
535                    ____22__________________
536                    ______________________4_
537                    ____1111________________
538                    _44_____________________
539                    11111111________________
540                    ____________________33__
541                    ___________44___________
542                    __________33333_________
543                    ______________22________
544                    ___________________2222_
545                    _________111111111______
546                    _____________________11_
547                    _________________4______
548
549                 Sanitized equivalent (no overlap):
550                    1_______________________
551                    _44_____________________
552                    ___1____________________
553                    ____22__________________
554                    ______11________________
555                    _________1______________
556                    __________3_____________
557                    ___________44___________
558                    _____________33_________
559                    _______________2________
560                    ________________1_______
561                    _________________4______
562                    ___________________2____
563                    ____________________33__
564                    ______________________4_
565         */
566
567         /* if there's only one memory region, don't bother */
568         if (*pnr_map < 2)
569                 return -1;
570
571         old_nr = *pnr_map;
572
573         /* bail out if we find any unreasonable addresses in bios map */
574         for (i=0; i<old_nr; i++)
575                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
576                         return -1;
577
578         /* create pointers for initial change-point information (for sorting) */
579         for (i=0; i < 2*old_nr; i++)
580                 change_point[i] = &change_point_list[i];
581
582         /* record all known change-points (starting and ending addresses),
583            omitting those that are for empty memory regions */
584         chgidx = 0;
585         for (i=0; i < old_nr; i++)      {
586                 if (biosmap[i].size != 0) {
587                         change_point[chgidx]->addr = biosmap[i].addr;
588                         change_point[chgidx++]->pbios = &biosmap[i];
589                         change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
590                         change_point[chgidx++]->pbios = &biosmap[i];
591                 }
592         }
593         chg_nr = chgidx;        /* true number of change-points */
594
595         /* sort change-point list by memory addresses (low -> high) */
596         still_changing = 1;
597         while (still_changing)  {
598                 still_changing = 0;
599                 for (i=1; i < chg_nr; i++)  {
600                         /* if <current_addr> > <last_addr>, swap */
601                         /* or, if current=<start_addr> & last=<end_addr>, swap */
602                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
603                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
604                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
605                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
606                            )
607                         {
608                                 change_tmp = change_point[i];
609                                 change_point[i] = change_point[i-1];
610                                 change_point[i-1] = change_tmp;
611                                 still_changing=1;
612                         }
613                 }
614         }
615
616         /* create a new bios memory map, removing overlaps */
617         overlap_entries=0;       /* number of entries in the overlap table */
618         new_bios_entry=0;        /* index for creating new bios map entries */
619         last_type = 0;           /* start with undefined memory type */
620         last_addr = 0;           /* start with 0 as last starting address */
621         /* loop through change-points, determining affect on the new bios map */
622         for (chgidx=0; chgidx < chg_nr; chgidx++)
623         {
624                 /* keep track of all overlapping bios entries */
625                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
626                 {
627                         /* add map entry to overlap list (> 1 entry implies an overlap) */
628                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
629                 }
630                 else
631                 {
632                         /* remove entry from list (order independent, so swap with last) */
633                         for (i=0; i<overlap_entries; i++)
634                         {
635                                 if (overlap_list[i] == change_point[chgidx]->pbios)
636                                         overlap_list[i] = overlap_list[overlap_entries-1];
637                         }
638                         overlap_entries--;
639                 }
640                 /* if there are overlapping entries, decide which "type" to use */
641                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
642                 current_type = 0;
643                 for (i=0; i<overlap_entries; i++)
644                         if (overlap_list[i]->type > current_type)
645                                 current_type = overlap_list[i]->type;
646                 /* continue building up new bios map based on this information */
647                 if (current_type != last_type)  {
648                         if (last_type != 0)      {
649                                 new_bios[new_bios_entry].size =
650                                         change_point[chgidx]->addr - last_addr;
651                                 /* move forward only if the new size was non-zero */
652                                 if (new_bios[new_bios_entry].size != 0)
653                                         if (++new_bios_entry >= E820MAX)
654                                                 break;  /* no more space left for new bios entries */
655                         }
656                         if (current_type != 0)  {
657                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
658                                 new_bios[new_bios_entry].type = current_type;
659                                 last_addr=change_point[chgidx]->addr;
660                         }
661                         last_type = current_type;
662                 }
663         }
664         new_nr = new_bios_entry;   /* retain count for new bios entries */
665
666         /* copy new bios mapping into original location */
667         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
668         *pnr_map = new_nr;
669
670         return 0;
671 }
672
673 /*
674  * Copy the BIOS e820 map into a safe place.
675  *
676  * Sanity-check it while we're at it..
677  *
678  * If we're lucky and live on a modern system, the setup code
679  * will have given us a memory map that we can use to properly
680  * set up memory.  If we aren't, we'll fake a memory map.
681  *
682  * We check to see that the memory map contains at least 2 elements
683  * before we'll use it, because the detection code in setup.S may
684  * not be perfect and most every PC known to man has two memory
685  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
686  * thinkpad 560x, for example, does not cooperate with the memory
687  * detection code.)
688  */
689 int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
690 {
691 #ifndef CONFIG_XEN
692         /* Only one memory region (or negative)? Ignore it */
693         if (nr_map < 2)
694                 return -1;
695 #else
696         BUG_ON(nr_map < 1);
697 #endif
698
699         do {
700                 unsigned long long start = biosmap->addr;
701                 unsigned long long size = biosmap->size;
702                 unsigned long long end = start + size;
703                 unsigned long type = biosmap->type;
704
705                 /* Overflow in 64 bits? Ignore the memory map. */
706                 if (start > end)
707                         return -1;
708
709 #ifndef CONFIG_XEN
710                 /*
711                  * Some BIOSes claim RAM in the 640k - 1M region.
712                  * Not right. Fix it up.
713                  */
714                 if (type == E820_RAM) {
715                         if (start < 0x100000ULL && end > 0xA0000ULL) {
716                                 if (start < 0xA0000ULL)
717                                         add_memory_region(start, 0xA0000ULL-start, type);
718                                 if (end <= 0x100000ULL)
719                                         continue;
720                                 start = 0x100000ULL;
721                                 size = end - start;
722                         }
723                 }
724 #endif
725                 add_memory_region(start, size, type);
726         } while (biosmap++,--nr_map);
727         return 0;
728 }
729
730 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
731 struct edd edd;
732 #ifdef CONFIG_EDD_MODULE
733 EXPORT_SYMBOL(edd);
734 #endif
735 /**
736  * copy_edd() - Copy the BIOS EDD information
737  *              from boot_params into a safe place.
738  *
739  */
740 static inline void copy_edd(void)
741 {
742      memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
743      memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
744      edd.mbr_signature_nr = EDD_MBR_SIG_NR;
745      edd.edd_info_nr = EDD_NR;
746 }
747 #else
748 static inline void copy_edd(void)
749 {
750 }
751 #endif
752
753 static void __init parse_cmdline_early (char ** cmdline_p)
754 {
755         char c = ' ', *to = command_line, *from = saved_command_line;
756         int len = 0, max_cmdline;
757         int userdef = 0;
758
759         if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
760                 max_cmdline = COMMAND_LINE_SIZE;
761         memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
762         /* Save unparsed command line copy for /proc/cmdline */
763         saved_command_line[max_cmdline-1] = '\0';
764
765         for (;;) {
766                 if (c != ' ')
767                         goto next_char;
768                 /*
769                  * "mem=nopentium" disables the 4MB page tables.
770                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
771                  * to <mem>, overriding the bios size.
772                  * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
773                  * <start> to <start>+<mem>, overriding the bios size.
774                  *
775                  * HPA tells me bootloaders need to parse mem=, so no new
776                  * option should be mem=  [also see Documentation/i386/boot.txt]
777                  */
778                 if (!memcmp(from, "mem=", 4)) {
779                         if (to != command_line)
780                                 to--;
781                         if (!memcmp(from+4, "nopentium", 9)) {
782                                 from += 9+4;
783                                 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
784                                 disable_pse = 1;
785                         } else {
786                                 /* If the user specifies memory size, we
787                                  * limit the BIOS-provided memory map to
788                                  * that size. exactmap can be used to specify
789                                  * the exact map. mem=number can be used to
790                                  * trim the existing memory map.
791                                  */
792                                 unsigned long long mem_size;
793  
794                                 mem_size = memparse(from+4, &from);
795                                 limit_regions(mem_size);
796                                 userdef=1;
797                         }
798                 }
799
800                 else if (!memcmp(from, "memmap=", 7)) {
801                         if (to != command_line)
802                                 to--;
803                         if (!memcmp(from+7, "exactmap", 8)) {
804 #ifdef CONFIG_CRASH_DUMP
805                                 /* If we are doing a crash dump, we
806                                  * still need to know the real mem
807                                  * size before original memory map is
808                                  * reset.
809                                  */
810                                 find_max_pfn();
811                                 saved_max_pfn = max_pfn;
812 #endif
813                                 from += 8+7;
814                                 e820.nr_map = 0;
815                                 userdef = 1;
816                         } else {
817                                 /* If the user specifies memory size, we
818                                  * limit the BIOS-provided memory map to
819                                  * that size. exactmap can be used to specify
820                                  * the exact map. mem=number can be used to
821                                  * trim the existing memory map.
822                                  */
823                                 unsigned long long start_at, mem_size;
824  
825                                 mem_size = memparse(from+7, &from);
826                                 if (*from == '@') {
827                                         start_at = memparse(from+1, &from);
828                                         add_memory_region(start_at, mem_size, E820_RAM);
829                                 } else if (*from == '#') {
830                                         start_at = memparse(from+1, &from);
831                                         add_memory_region(start_at, mem_size, E820_ACPI);
832                                 } else if (*from == '$') {
833                                         start_at = memparse(from+1, &from);
834                                         add_memory_region(start_at, mem_size, E820_RESERVED);
835                                 } else {
836                                         limit_regions(mem_size);
837                                         userdef=1;
838                                 }
839                         }
840                 }
841
842                 else if (!memcmp(from, "noexec=", 7))
843                         noexec_setup(from + 7);
844
845
846 #ifdef  CONFIG_X86_MPPARSE
847                 /*
848                  * If the BIOS enumerates physical processors before logical,
849                  * maxcpus=N at enumeration-time can be used to disable HT.
850                  */
851                 else if (!memcmp(from, "maxcpus=", 8)) {
852                         extern unsigned int maxcpus;
853
854                         maxcpus = simple_strtoul(from + 8, NULL, 0);
855                 }
856 #endif
857
858 #ifdef CONFIG_ACPI
859                 /* "acpi=off" disables both ACPI table parsing and interpreter */
860                 else if (!memcmp(from, "acpi=off", 8)) {
861                         disable_acpi();
862                 }
863
864                 /* acpi=force to over-ride black-list */
865                 else if (!memcmp(from, "acpi=force", 10)) {
866                         acpi_force = 1;
867                         acpi_ht = 1;
868                         acpi_disabled = 0;
869                 }
870
871                 /* acpi=strict disables out-of-spec workarounds */
872                 else if (!memcmp(from, "acpi=strict", 11)) {
873                         acpi_strict = 1;
874                 }
875
876                 /* Limit ACPI just to boot-time to enable HT */
877                 else if (!memcmp(from, "acpi=ht", 7)) {
878                         if (!acpi_force)
879                                 disable_acpi();
880                         acpi_ht = 1;
881                 }
882                 
883                 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
884                 else if (!memcmp(from, "pci=noacpi", 10)) {
885                         acpi_disable_pci();
886                 }
887                 /* "acpi=noirq" disables ACPI interrupt routing */
888                 else if (!memcmp(from, "acpi=noirq", 10)) {
889                         acpi_noirq_set();
890                 }
891
892                 else if (!memcmp(from, "acpi_sci=edge", 13))
893                         acpi_sci_flags.trigger =  1;
894
895                 else if (!memcmp(from, "acpi_sci=level", 14))
896                         acpi_sci_flags.trigger = 3;
897
898                 else if (!memcmp(from, "acpi_sci=high", 13))
899                         acpi_sci_flags.polarity = 1;
900
901                 else if (!memcmp(from, "acpi_sci=low", 12))
902                         acpi_sci_flags.polarity = 3;
903
904 #ifdef CONFIG_X86_IO_APIC
905                 else if (!memcmp(from, "acpi_skip_timer_override", 24))
906                         acpi_skip_timer_override = 1;
907
908                 if (!memcmp(from, "disable_timer_pin_1", 19))
909                         disable_timer_pin_1 = 1;
910                 if (!memcmp(from, "enable_timer_pin_1", 18))
911                         disable_timer_pin_1 = -1;
912
913                 /* disable IO-APIC */
914                 else if (!memcmp(from, "noapic", 6))
915                         disable_ioapic_setup();
916 #endif /* CONFIG_X86_IO_APIC */
917 #endif /* CONFIG_ACPI */
918
919 #ifdef CONFIG_X86_LOCAL_APIC
920                 /* enable local APIC */
921                 else if (!memcmp(from, "lapic", 5))
922                         lapic_enable();
923
924                 /* disable local APIC */
925                 else if (!memcmp(from, "nolapic", 6))
926                         lapic_disable();
927 #endif /* CONFIG_X86_LOCAL_APIC */
928
929 #ifdef CONFIG_KEXEC
930                 /* crashkernel=size@addr specifies the location to reserve for
931                  * a crash kernel.  By reserving this memory we guarantee
932                  * that linux never set's it up as a DMA target.
933                  * Useful for holding code to do something appropriate
934                  * after a kernel panic.
935                  */
936                 else if (!memcmp(from, "crashkernel=", 12)) {
937                         unsigned long size, base;
938                         size = memparse(from+12, &from);
939                         if (*from == '@') {
940                                 base = memparse(from+1, &from);
941                                 /* FIXME: Do I want a sanity check
942                                  * to validate the memory range?
943                                  */
944                                 crashk_res.start = base;
945                                 crashk_res.end   = base + size - 1;
946                         }
947                 }
948 #endif
949 #ifdef CONFIG_PROC_VMCORE
950                 /* elfcorehdr= specifies the location of elf core header
951                  * stored by the crashed kernel.
952                  */
953                 else if (!memcmp(from, "elfcorehdr=", 11))
954                         elfcorehdr_addr = memparse(from+11, &from);
955 #endif
956
957                 /*
958                  * highmem=size forces highmem to be exactly 'size' bytes.
959                  * This works even on boxes that have no highmem otherwise.
960                  * This also works to reduce highmem size on bigger boxes.
961                  */
962                 else if (!memcmp(from, "highmem=", 8))
963                         highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
964         
965                 /*
966                  * vmalloc=size forces the vmalloc area to be exactly 'size'
967                  * bytes. This can be used to increase (or decrease) the
968                  * vmalloc area - the default is 128m.
969                  */
970                 else if (!memcmp(from, "vmalloc=", 8))
971                         __VMALLOC_RESERVE = memparse(from+8, &from);
972
973         next_char:
974                 c = *(from++);
975                 if (!c)
976                         break;
977                 if (COMMAND_LINE_SIZE <= ++len)
978                         break;
979                 *(to++) = c;
980         }
981         *to = '\0';
982         *cmdline_p = command_line;
983         if (userdef) {
984                 printk(KERN_INFO "user-defined physical RAM map:\n");
985                 print_memory_map("user");
986         }
987 }
988
989 /*
990  * Callback for efi_memory_walk.
991  */
992 static int __init
993 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
994 {
995         unsigned long *max_pfn = arg, pfn;
996
997         if (start < end) {
998                 pfn = PFN_UP(end -1);
999                 if (pfn > *max_pfn)
1000                         *max_pfn = pfn;
1001         }
1002         return 0;
1003 }
1004
1005 static int __init
1006 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
1007 {
1008         memory_present(0, start, end);
1009         return 0;
1010 }
1011
1012  /*
1013   * This function checks if the entire range <start,end> is mapped with type.
1014   *
1015   * Note: this function only works correct if the e820 table is sorted and
1016   * not-overlapping, which is the case
1017   */
1018 int __init
1019 e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
1020 {
1021         u64 start = s;
1022         u64 end = e;
1023         int i;
1024         for (i = 0; i < e820.nr_map; i++) {
1025                 struct e820entry *ei = &e820.map[i];
1026                 if (type && ei->type != type)
1027                         continue;
1028                 /* is the region (part) in overlap with the current region ?*/
1029                 if (ei->addr >= end || ei->addr + ei->size <= start)
1030                         continue;
1031                 /* if the region is at the beginning of <start,end> we move
1032                  * start to the end of the region since it's ok until there
1033                  */
1034                 if (ei->addr <= start)
1035                         start = ei->addr + ei->size;
1036                 /* if start is now at or beyond end, we're done, full
1037                  * coverage */
1038                 if (start >= end)
1039                         return 1; /* we're done */
1040         }
1041         return 0;
1042 }
1043
1044 /*
1045  * Find the highest page frame number we have available
1046  */
1047 void __init find_max_pfn(void)
1048 {
1049         int i;
1050
1051         max_pfn = 0;
1052         if (efi_enabled) {
1053                 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
1054                 efi_memmap_walk(efi_memory_present_wrapper, NULL);
1055                 return;
1056         }
1057
1058         for (i = 0; i < e820.nr_map; i++) {
1059                 unsigned long start, end;
1060                 /* RAM? */
1061                 if (e820.map[i].type != E820_RAM)
1062                         continue;
1063                 start = PFN_UP(e820.map[i].addr);
1064                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1065                 if (start >= end)
1066                         continue;
1067                 if (end > max_pfn)
1068                         max_pfn = end;
1069                 memory_present(0, start, end);
1070         }
1071 }
1072
1073 /*
1074  * Determine low and high memory ranges:
1075  */
1076 unsigned long __init find_max_low_pfn(void)
1077 {
1078         unsigned long max_low_pfn;
1079
1080         max_low_pfn = max_pfn;
1081         if (max_low_pfn > MAXMEM_PFN) {
1082                 if (highmem_pages == -1)
1083                         highmem_pages = max_pfn - MAXMEM_PFN;
1084                 if (highmem_pages + MAXMEM_PFN < max_pfn)
1085                         max_pfn = MAXMEM_PFN + highmem_pages;
1086                 if (highmem_pages + MAXMEM_PFN > max_pfn) {
1087                         printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1088                         highmem_pages = 0;
1089                 }
1090                 max_low_pfn = MAXMEM_PFN;
1091 #ifndef CONFIG_HIGHMEM
1092                 /* Maximum memory usable is what is directly addressable */
1093                 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1094                                         MAXMEM>>20);
1095                 if (max_pfn > MAX_NONPAE_PFN)
1096                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1097                 else
1098                         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1099                 max_pfn = MAXMEM_PFN;
1100 #else /* !CONFIG_HIGHMEM */
1101 #ifndef CONFIG_X86_PAE
1102                 if (max_pfn > MAX_NONPAE_PFN) {
1103                         max_pfn = MAX_NONPAE_PFN;
1104                         printk(KERN_WARNING "Warning only 4GB will be used.\n");
1105                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1106                 }
1107 #endif /* !CONFIG_X86_PAE */
1108 #endif /* !CONFIG_HIGHMEM */
1109         } else {
1110                 if (highmem_pages == -1)
1111                         highmem_pages = 0;
1112 #ifdef CONFIG_HIGHMEM
1113                 if (highmem_pages >= max_pfn) {
1114                         printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1115                         highmem_pages = 0;
1116                 }
1117                 if (highmem_pages) {
1118                         if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1119                                 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1120                                 highmem_pages = 0;
1121                         }
1122                         max_low_pfn -= highmem_pages;
1123                 }
1124 #else
1125                 if (highmem_pages)
1126                         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1127 #endif
1128         }
1129         return max_low_pfn;
1130 }
1131
1132 /*
1133  * Free all available memory for boot time allocation.  Used
1134  * as a callback function by efi_memory_walk()
1135  */
1136
1137 static int __init
1138 free_available_memory(unsigned long start, unsigned long end, void *arg)
1139 {
1140         /* check max_low_pfn */
1141         if (start >= (max_low_pfn << PAGE_SHIFT))
1142                 return 0;
1143         if (end >= (max_low_pfn << PAGE_SHIFT))
1144                 end = max_low_pfn << PAGE_SHIFT;
1145         if (start < end)
1146                 free_bootmem(start, end - start);
1147
1148         return 0;
1149 }
1150 /*
1151  * Register fully available low RAM pages with the bootmem allocator.
1152  */
1153 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1154 {
1155         int i;
1156
1157         if (efi_enabled) {
1158                 efi_memmap_walk(free_available_memory, NULL);
1159                 return;
1160         }
1161         for (i = 0; i < e820.nr_map; i++) {
1162                 unsigned long curr_pfn, last_pfn, size;
1163                 /*
1164                  * Reserve usable low memory
1165                  */
1166                 if (e820.map[i].type != E820_RAM)
1167                         continue;
1168                 /*
1169                  * We are rounding up the start address of usable memory:
1170                  */
1171                 curr_pfn = PFN_UP(e820.map[i].addr);
1172                 if (curr_pfn >= max_low_pfn)
1173                         continue;
1174                 /*
1175                  * ... and at the end of the usable range downwards:
1176                  */
1177                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1178
1179 #ifdef CONFIG_XEN
1180                 /*
1181                  * Truncate to the number of actual pages currently
1182                  * present.
1183                  */
1184                 if (last_pfn > xen_start_info->nr_pages)
1185                         last_pfn = xen_start_info->nr_pages;
1186 #endif
1187
1188                 if (last_pfn > max_low_pfn)
1189                         last_pfn = max_low_pfn;
1190
1191                 /*
1192                  * .. finally, did all the rounding and playing
1193                  * around just make the area go away?
1194                  */
1195                 if (last_pfn <= curr_pfn)
1196                         continue;
1197
1198                 size = last_pfn - curr_pfn;
1199                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1200         }
1201 }
1202
1203 #ifndef CONFIG_XEN
1204 /*
1205  * workaround for Dell systems that neglect to reserve EBDA
1206  */
1207 static void __init reserve_ebda_region(void)
1208 {
1209         unsigned int addr;
1210         addr = get_bios_ebda();
1211         if (addr)
1212                 reserve_bootmem(addr, PAGE_SIZE);       
1213 }
1214 #endif
1215
1216 #ifndef CONFIG_NEED_MULTIPLE_NODES
1217 void __init setup_bootmem_allocator(void);
1218 static unsigned long __init setup_memory(void)
1219 {
1220         /*
1221          * partially used pages are not usable - thus
1222          * we are rounding upwards:
1223          */
1224         min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
1225                 xen_start_info->nr_pt_frames;
1226
1227         find_max_pfn();
1228
1229         max_low_pfn = find_max_low_pfn();
1230
1231 #ifdef CONFIG_HIGHMEM
1232         highstart_pfn = highend_pfn = max_pfn;
1233         if (max_pfn > max_low_pfn) {
1234                 highstart_pfn = max_low_pfn;
1235         }
1236         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1237                 pages_to_mb(highend_pfn - highstart_pfn));
1238 #endif
1239         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1240                         pages_to_mb(max_low_pfn));
1241
1242         setup_bootmem_allocator();
1243
1244         return max_low_pfn;
1245 }
1246
1247 void __init zone_sizes_init(void)
1248 {
1249         unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1250         unsigned int max_dma, low;
1251
1252         /*
1253          * XEN: Our notion of "DMA memory" is fake when running over Xen.
1254          * We simply put all RAM in the DMA zone so that those drivers which
1255          * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
1256          * Those drivers that *do* require lowmem are screwed anyway when
1257          * running over Xen!
1258          */
1259         max_dma = max_low_pfn;
1260         low = max_low_pfn;
1261
1262         if (low < max_dma)
1263                 zones_size[ZONE_DMA] = low;
1264         else {
1265                 zones_size[ZONE_DMA] = max_dma;
1266                 zones_size[ZONE_NORMAL] = low - max_dma;
1267 #ifdef CONFIG_HIGHMEM
1268                 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1269 #endif
1270         }
1271         free_area_init(zones_size);
1272 }
1273 #else
1274 extern unsigned long __init setup_memory(void);
1275 extern void zone_sizes_init(void);
1276 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1277
1278 void __init setup_bootmem_allocator(void)
1279 {
1280         unsigned long bootmap_size;
1281         /*
1282          * Initialize the boot-time allocator (with low memory only):
1283          */
1284         bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1285
1286         register_bootmem_low_pages(max_low_pfn);
1287
1288         /*
1289          * Reserve the bootmem bitmap itself as well. We do this in two
1290          * steps (first step was init_bootmem()) because this catches
1291          * the (very unlikely) case of us accidentally initializing the
1292          * bootmem allocator with an invalid RAM area.
1293          */
1294         reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1295                          bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1296
1297 #ifndef CONFIG_XEN
1298         /*
1299          * reserve physical page 0 - it's a special BIOS page on many boxes,
1300          * enabling clean reboots, SMP operation, laptop functions.
1301          */
1302         reserve_bootmem(0, PAGE_SIZE);
1303
1304         /* reserve EBDA region, it's a 4K region */
1305         reserve_ebda_region();
1306
1307     /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
1308        PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1309        unless you have no PS/2 mouse plugged in. */
1310         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1311             boot_cpu_data.x86 == 6)
1312              reserve_bootmem(0xa0000 - 4096, 4096);
1313
1314 #ifdef CONFIG_SMP
1315         /*
1316          * But first pinch a few for the stack/trampoline stuff
1317          * FIXME: Don't need the extra page at 4K, but need to fix
1318          * trampoline before removing it. (see the GDT stuff)
1319          */
1320         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1321 #endif
1322 #ifdef CONFIG_ACPI_SLEEP
1323         /*
1324          * Reserve low memory region for sleep support.
1325          */
1326         acpi_reserve_bootmem();
1327 #endif
1328 #endif /* !CONFIG_XEN */
1329
1330 #ifdef CONFIG_BLK_DEV_INITRD
1331         if (xen_start_info->mod_start) {
1332                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1333                         /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1334                         initrd_start = INITRD_START + PAGE_OFFSET;
1335                         initrd_end = initrd_start+INITRD_SIZE;
1336                         initrd_below_start_ok = 1;
1337                 }
1338                 else {
1339                         printk(KERN_ERR "initrd extends beyond end of memory "
1340                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1341                             INITRD_START + INITRD_SIZE,
1342                             max_low_pfn << PAGE_SHIFT);
1343                         initrd_start = 0;
1344                 }
1345         }
1346 #endif
1347 #ifdef CONFIG_KEXEC
1348         if (crashk_res.start != crashk_res.end)
1349                 reserve_bootmem(crashk_res.start,
1350                         crashk_res.end - crashk_res.start + 1);
1351 #endif
1352
1353         if (!xen_feature(XENFEAT_auto_translated_physmap))
1354                 phys_to_machine_mapping =
1355                         (unsigned long *)xen_start_info->mfn_list;
1356 }
1357
1358 /*
1359  * The node 0 pgdat is initialized before all of these because
1360  * it's needed for bootmem.  node>0 pgdats have their virtual
1361  * space allocated before the pagetables are in place to access
1362  * them, so they can't be cleared then.
1363  *
1364  * This should all compile down to nothing when NUMA is off.
1365  */
1366 void __init remapped_pgdat_init(void)
1367 {
1368         int nid;
1369
1370         for_each_online_node(nid) {
1371                 if (nid != 0)
1372                         memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1373         }
1374 }
1375
1376 /*
1377  * Request address space for all standard RAM and ROM resources
1378  * and also for regions reported as reserved by the e820.
1379  */
1380 static void __init
1381 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1382 {
1383         int i;
1384         struct e820entry *map = e820.map;
1385         int nr_map = e820.nr_map;
1386 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1387         struct xen_memory_map memmap;
1388
1389         map = machine_e820.map;
1390         memmap.nr_entries = E820MAX;
1391
1392         set_xen_guest_handle(memmap.buffer, map);
1393
1394         if(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1395                 BUG();
1396         machine_e820.nr_map = memmap.nr_entries;
1397         nr_map = memmap.nr_entries;
1398         e820_setup_gap(map, memmap.nr_entries);
1399 #endif
1400
1401         probe_roms();
1402
1403         for (i = 0; i < nr_map; i++) {
1404                 struct resource *res;
1405                 if (map[i].addr + map[i].size > 0x100000000ULL)
1406                         continue;
1407                 res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
1408                 switch (map[i].type) {
1409                 case E820_RAM:  res->name = "System RAM"; break;
1410                 case E820_ACPI: res->name = "ACPI Tables"; break;
1411                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
1412                 default:        res->name = "reserved";
1413                 }
1414                 res->start = map[i].addr;
1415                 res->end = res->start + map[i].size - 1;
1416                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1417                 if (request_resource(&iomem_resource, res)) {
1418                         kfree(res);
1419                         continue;
1420                 }
1421                 if (map[i].type == E820_RAM) {
1422                         /*
1423                          *  We don't know which RAM region contains kernel data,
1424                          *  so we try it repeatedly and let the resource manager
1425                          *  test it.
1426                          */
1427 #ifndef CONFIG_XEN
1428                         request_resource(res, code_resource);
1429                         request_resource(res, data_resource);
1430 #endif
1431 #ifdef CONFIG_KEXEC
1432                         request_resource(res, &crashk_res);
1433 #endif
1434                 }
1435         }
1436 }
1437
1438 /*
1439  * Request address space for all standard resources
1440  *
1441  * This is called just before pcibios_init(), which is also a
1442  * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
1443  */
1444 static int __init request_standard_resources(void)
1445 {
1446         int i;
1447
1448         /* Nothing to do if not running in dom0. */
1449         if (!is_initial_xendomain())
1450                 return 0;
1451
1452         printk("Setting up standard PCI resources\n");
1453         if (efi_enabled)
1454                 efi_initialize_iomem_resources(&code_resource, &data_resource);
1455         else
1456                 legacy_init_iomem_resources(&code_resource, &data_resource);
1457
1458         /* EFI systems may still have VGA */
1459         request_resource(&iomem_resource, &video_ram_resource);
1460
1461         /* request I/O space for devices used on all i[345]86 PCs */
1462         for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1463                 request_resource(&ioport_resource, &standard_io_resources[i]);
1464         return 0;
1465 }
1466
1467 subsys_initcall(request_standard_resources);
1468
1469 /*
1470  * Locate a unused range of the physical address space below 4G which
1471  * can be used for PCI mappings.
1472  */
1473 static void __init
1474 e820_setup_gap(struct e820entry *e820, int nr_map)
1475 {
1476         unsigned long gapstart, gapsize, round;
1477         unsigned long long last;
1478         int i;
1479
1480         /*
1481          * Search for the bigest gap in the low 32 bits of the e820
1482          * memory space.
1483          */
1484         last = 0x100000000ull;
1485         gapstart = 0x10000000;
1486         gapsize = 0x400000;
1487         i = nr_map;
1488         while (--i >= 0) {
1489                 unsigned long long start = e820[i].addr;
1490                 unsigned long long end = start + e820[i].size;
1491
1492                 /*
1493                  * Since "last" is at most 4GB, we know we'll
1494                  * fit in 32 bits if this condition is true
1495                  */
1496                 if (last > end) {
1497                         unsigned long gap = last - end;
1498
1499                         if (gap > gapsize) {
1500                                 gapsize = gap;
1501                                 gapstart = end;
1502                         }
1503                 }
1504                 if (start < last)
1505                         last = start;
1506         }
1507
1508         /*
1509          * See how much we want to round up: start off with
1510          * rounding to the next 1MB area.
1511          */
1512         round = 0x100000;
1513         while ((gapsize >> 4) > round)
1514                 round += round;
1515         /* Fun with two's complement */
1516         pci_mem_start = (gapstart + round) & -round;
1517
1518         printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1519                 pci_mem_start, gapstart, gapsize);
1520 }
1521
1522 static void __init register_memory(void)
1523 {
1524 #ifndef CONFIG_XEN
1525         e820_setup_gap(e820.map, e820.nr_map);
1526 #endif
1527 }
1528
1529 #ifdef CONFIG_MCA
1530 static void set_mca_bus(int x)
1531 {
1532         MCA_bus = x;
1533 }
1534 #else
1535 static void set_mca_bus(int x) { }
1536 #endif
1537
1538 /*
1539  * Determine if we were loaded by an EFI loader.  If so, then we have also been
1540  * passed the efi memmap, systab, etc., so we should use these data structures
1541  * for initialization.  Note, the efi init code path is determined by the
1542  * global efi_enabled. This allows the same kernel image to be used on existing
1543  * systems (with a traditional BIOS) as well as on EFI systems.
1544  */
1545 void __init setup_arch(char **cmdline_p)
1546 {
1547         int i, j, k, fpp;
1548         struct physdev_set_iopl set_iopl;
1549         unsigned long max_low_pfn;
1550
1551         /* Force a quick death if the kernel panics (not domain 0). */
1552         extern int panic_timeout;
1553         if (!panic_timeout && !is_initial_xendomain())
1554                 panic_timeout = 1;
1555
1556         /* Register a call for panic conditions. */
1557         atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1558
1559         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
1560         HYPERVISOR_vm_assist(VMASST_CMD_enable,
1561                              VMASST_TYPE_writable_pagetables);
1562
1563         memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1564         pre_setup_arch_hook();
1565         early_cpu_init();
1566
1567         /*
1568          * FIXME: This isn't an official loader_type right
1569          * now but does currently work with elilo.
1570          * If we were configured as an EFI kernel, check to make
1571          * sure that we were loaded correctly from elilo and that
1572          * the system table is valid.  If not, then initialize normally.
1573          */
1574 #ifdef CONFIG_EFI
1575         if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1576                 efi_enabled = 1;
1577 #endif
1578
1579         /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1580            properly.  Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1581         */
1582         ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1583         drive_info = DRIVE_INFO;
1584         screen_info = SCREEN_INFO;
1585         edid_info = EDID_INFO;
1586         apm_info.bios = APM_BIOS_INFO;
1587         ist_info = IST_INFO;
1588         saved_videomode = VIDEO_MODE;
1589         if( SYS_DESC_TABLE.length != 0 ) {
1590                 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1591                 machine_id = SYS_DESC_TABLE.table[0];
1592                 machine_submodel_id = SYS_DESC_TABLE.table[1];
1593                 BIOS_revision = SYS_DESC_TABLE.table[2];
1594         }
1595         bootloader_type = LOADER_TYPE;
1596
1597         if (is_initial_xendomain()) {
1598                 /* This is drawn from a dump from vgacon:startup in
1599                  * standard Linux. */
1600                 screen_info.orig_video_mode = 3; 
1601                 screen_info.orig_video_isVGA = 1;
1602                 screen_info.orig_video_lines = 25;
1603                 screen_info.orig_video_cols = 80;
1604                 screen_info.orig_video_ega_bx = 3;
1605                 screen_info.orig_video_points = 16;
1606                 screen_info.orig_y = screen_info.orig_video_lines - 1;
1607                 if (xen_start_info->console.dom0.info_size >=
1608                     sizeof(struct dom0_vga_console_info)) {
1609                         const struct dom0_vga_console_info *info =
1610                                 (struct dom0_vga_console_info *)(
1611                                         (char *)xen_start_info +
1612                                         xen_start_info->console.dom0.info_off);
1613                         dom0_init_screen_info(info);
1614                 }
1615                 xen_start_info->console.domU.mfn = 0;
1616                 xen_start_info->console.domU.evtchn = 0;
1617         } else
1618                 screen_info.orig_video_isVGA = 0;
1619
1620 #ifdef CONFIG_BLK_DEV_RAM
1621         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1622         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1623         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1624 #endif
1625
1626         setup_xen_features();
1627
1628         ARCH_SETUP
1629         if (efi_enabled)
1630                 efi_init();
1631         else {
1632                 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1633                 print_memory_map(machine_specific_memory_setup());
1634         }
1635
1636         copy_edd();
1637
1638         if (!MOUNT_ROOT_RDONLY)
1639                 root_mountflags &= ~MS_RDONLY;
1640         init_mm.start_code = (unsigned long) _text;
1641         init_mm.end_code = (unsigned long) _etext;
1642         init_mm.end_data = (unsigned long) _edata;
1643         init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
1644                        xen_start_info->nr_pt_frames) << PAGE_SHIFT;
1645
1646         code_resource.start = virt_to_phys(_text);
1647         code_resource.end = virt_to_phys(_etext)-1;
1648         data_resource.start = virt_to_phys(_etext);
1649         data_resource.end = virt_to_phys(_edata)-1;
1650
1651         parse_cmdline_early(cmdline_p);
1652
1653 #ifdef CONFIG_EARLY_PRINTK
1654         {
1655                 char *s = strstr(*cmdline_p, "earlyprintk=");
1656                 if (s) {
1657                         setup_early_printk(strchr(s, '=') + 1);
1658                         printk("early console enabled\n");
1659                 }
1660         }
1661 #endif
1662
1663         max_low_pfn = setup_memory();
1664
1665         /*
1666          * NOTE: before this point _nobody_ is allowed to allocate
1667          * any memory using the bootmem allocator.  Although the
1668          * alloctor is now initialised only the first 8Mb of the kernel
1669          * virtual address space has been mapped.  All allocations before
1670          * paging_init() has completed must use the alloc_bootmem_low_pages()
1671          * variant (which allocates DMA'able memory) and care must be taken
1672          * not to exceed the 8Mb limit.
1673          */
1674
1675 #ifdef CONFIG_SMP
1676         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1677 #endif
1678         paging_init();
1679         remapped_pgdat_init();
1680         sparse_init();
1681         zone_sizes_init();
1682
1683 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1684         /*
1685          * Find and reserve possible boot-time SMP configuration:
1686          */
1687         find_smp_config();
1688 #endif
1689
1690         /* Make sure we have a correctly sized P->M table. */
1691         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1692                 phys_to_machine_mapping = alloc_bootmem_low_pages(
1693                      max_pfn * sizeof(unsigned long));
1694                 memset(phys_to_machine_mapping, ~0,
1695                        max_pfn * sizeof(unsigned long));
1696                 memcpy(phys_to_machine_mapping,
1697                        (unsigned long *)xen_start_info->mfn_list,
1698                        xen_start_info->nr_pages * sizeof(unsigned long));
1699                 free_bootmem(
1700                      __pa(xen_start_info->mfn_list),
1701                      PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
1702                                      sizeof(unsigned long))));
1703
1704                 /*
1705                  * Initialise the list of the frames that specify the list of
1706                  * frames that make up the p2m table. Used by save/restore
1707                  */
1708                 pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
1709                 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
1710                      virt_to_mfn(pfn_to_mfn_frame_list_list);
1711
1712                 fpp = PAGE_SIZE/sizeof(unsigned long);
1713                 for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) {
1714                         if ((j % fpp) == 0) {
1715                                 k++;
1716                                 BUG_ON(k>=16);
1717                                 pfn_to_mfn_frame_list[k] =
1718                                         alloc_bootmem_low_pages(PAGE_SIZE);
1719                                 pfn_to_mfn_frame_list_list[k] =
1720                                         virt_to_mfn(pfn_to_mfn_frame_list[k]);
1721                                 j=0;
1722                         }
1723                         pfn_to_mfn_frame_list[k][j] =
1724                                 virt_to_mfn(&phys_to_machine_mapping[i]);
1725                 }
1726                 HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
1727         }
1728
1729         /*
1730          * NOTE: at this point the bootmem allocator is fully available.
1731          */
1732
1733         if (is_initial_xendomain())
1734                 dmi_scan_machine();
1735
1736 #ifdef CONFIG_X86_GENERICARCH
1737         generic_apic_probe(*cmdline_p);
1738 #endif  
1739         if (efi_enabled)
1740                 efi_map_memmap();
1741
1742         set_iopl.iopl = 1;
1743         HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1744
1745 #ifdef CONFIG_ACPI
1746         if (!is_initial_xendomain()) {
1747                 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
1748                 acpi_disabled = 1;
1749                 acpi_ht = 0;
1750         }
1751
1752         /*
1753          * Parse the ACPI tables for possible boot-time SMP configuration.
1754          */
1755         acpi_boot_table_init();
1756 #endif
1757
1758 #ifdef CONFIG_X86_IO_APIC
1759         check_acpi_pci();       /* Checks more than just ACPI actually */
1760 #endif
1761
1762 #ifdef CONFIG_ACPI
1763         acpi_boot_init();
1764
1765 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1766         if (def_to_bigsmp)
1767                 printk(KERN_WARNING "More than 8 CPUs detected and "
1768                         "CONFIG_X86_PC cannot handle it.\nUse "
1769                         "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1770 #endif
1771 #endif
1772 #ifdef CONFIG_X86_LOCAL_APIC
1773         if (smp_found_config)
1774                 get_smp_config();
1775 #endif
1776 #if defined(CONFIG_XEN) && defined(CONFIG_SMP)
1777         prefill_possible_map();
1778 #endif
1779
1780         register_memory();
1781
1782         if (is_initial_xendomain()) {
1783 #ifdef CONFIG_VT
1784 #if defined(CONFIG_VGA_CONSOLE)
1785                 if (!efi_enabled ||
1786                     (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1787                         conswitchp = &vga_con;
1788 #elif defined(CONFIG_DUMMY_CONSOLE)
1789                 conswitchp = &dummy_con;
1790 #endif
1791 #endif
1792         } else {
1793 #if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
1794                 conswitchp = &dummy_con;
1795 #endif
1796         }
1797 #ifdef CONFIG_X86_TSC
1798         tsc_init();
1799 #endif
1800 }
1801
1802 static int
1803 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1804 {
1805         HYPERVISOR_shutdown(SHUTDOWN_crash);
1806         /* we're never actually going to get here... */
1807         return NOTIFY_DONE;
1808 }
1809
1810 static __init int add_pcspkr(void)
1811 {
1812         struct platform_device *pd;
1813         int ret;
1814
1815         pd = platform_device_alloc("pcspkr", -1);
1816         if (!pd)
1817                 return -ENOMEM;
1818
1819         ret = platform_device_add(pd);
1820         if (ret)
1821                 platform_device_put(pd);
1822
1823         return ret;
1824 }
1825 device_initcall(add_pcspkr);
1826
1827 /*
1828  * Local Variables:
1829  * mode:c
1830  * c-file-style:"k&r"
1831  * c-basic-offset:8
1832  * End:
1833  */