Initial revision
[linux-2.6.git] / arch / xen / i386 / kernel / setup.c
1 /*
2  *  linux/arch/i386/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *
8  *  Memory region support
9  *      David Parsons <orc@pell.chi.il.us>, July-August 1999
10  *
11  *  Added E820 sanitization routine (removes overlapping memory regions);
12  *  Brian Moyle <bmoyle@mvista.com>, February 2001
13  *
14  * Moved CPU detection code to cpu/${cpu}.c
15  *    Patrick Mochel <mochel@osdl.org>, March 2002
16  *
17  *  Provisions for empty E820 memory regions (reported by certain BIOSes).
18  *  Alex Achenbach <xela@slit.de>, December 2002.
19  *
20  */
21
22 /*
23  * This file handles the architecture-dependent parts of initialization
24  */
25
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/mca.h>
37 #include <linux/root_dev.h>
38 #include <linux/highmem.h>
39 #include <linux/module.h>
40 #include <linux/efi.h>
41 #include <linux/init.h>
42 #include <linux/edd.h>
43 #include <linux/percpu.h>
44 #include <linux/notifier.h>
45 #include <video/edid.h>
46 #include <asm/e820.h>
47 #include <asm/mpspec.h>
48 #include <asm/setup.h>
49 #include <asm/arch_hooks.h>
50 #include <asm/sections.h>
51 #include <asm/io_apic.h>
52 #include <asm/ist.h>
53 #include <asm/io.h>
54 #include <asm-xen/hypervisor.h>
55 #include <asm-xen/xen-public/physdev.h>
56 #include "setup_arch_pre.h"
57 #include <bios_ebda.h>
58
59 /* Allows setting of maximum possible memory size  */
60 static unsigned long xen_override_max_pfn;
61
62 extern struct notifier_block *panic_notifier_list;
63 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
64 static struct notifier_block xen_panic_block = {
65         xen_panic_event,
66         NULL,
67         0 /* try to go last */
68 };
69
70
71 int disable_pse __initdata = 0;
72
73 /*
74  * Machine setup..
75  */
76
77 #ifdef CONFIG_EFI
78 int efi_enabled = 0;
79 EXPORT_SYMBOL(efi_enabled);
80 #endif
81
82 /* cpu data as detected by the assembly code in head.S */
83 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
84 /* common cpu data for all cpus */
85 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
86
87 unsigned long mmu_cr4_features;
88 EXPORT_SYMBOL_GPL(mmu_cr4_features);
89
90 #ifdef  CONFIG_ACPI_INTERPRETER
91         int acpi_disabled = 0;
92 #else
93         int acpi_disabled = 1;
94 #endif
95 EXPORT_SYMBOL(acpi_disabled);
96
97 #ifdef  CONFIG_ACPI_BOOT
98 int __initdata acpi_force = 0;
99 extern acpi_interrupt_flags     acpi_sci_flags;
100 #endif
101
102 /* for MCA, but anyone else can use it if they want */
103 unsigned int machine_id;
104 unsigned int machine_submodel_id;
105 unsigned int BIOS_revision;
106 unsigned int mca_pentium_flag;
107
108 /* For PCI or other memory-mapped resources */
109 unsigned long pci_mem_start = 0x10000000;
110
111 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
112 int bootloader_type;
113
114 /* user-defined highmem size */
115 static unsigned int highmem_pages = -1;
116
117 /*
118  * Setup options
119  */
120 struct drive_info_struct { char dummy[32]; } drive_info;
121 struct screen_info screen_info;
122 struct apm_info apm_info;
123 struct sys_desc_table_struct {
124         unsigned short length;
125         unsigned char table[0];
126 };
127 struct edid_info edid_info;
128 struct ist_info ist_info;
129 struct e820map e820;
130
131 unsigned char aux_device_present;
132
133 extern void early_cpu_init(void);
134 extern void dmi_scan_machine(void);
135 extern void generic_apic_probe(char *);
136 extern int root_mountflags;
137
138 unsigned long saved_videomode;
139
140 #define RAMDISK_IMAGE_START_MASK        0x07FF
141 #define RAMDISK_PROMPT_FLAG             0x8000
142 #define RAMDISK_LOAD_FLAG               0x4000  
143
144 static char command_line[COMMAND_LINE_SIZE];
145
146 unsigned char __initdata boot_params[PARAM_SIZE];
147
148 static struct resource data_resource = {
149         .name   = "Kernel data",
150         .start  = 0,
151         .end    = 0,
152         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
153 };
154
155 static struct resource code_resource = {
156         .name   = "Kernel code",
157         .start  = 0,
158         .end    = 0,
159         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
160 };
161
162 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
163 static struct resource system_rom_resource = {
164         .name   = "System ROM",
165         .start  = 0xf0000,
166         .end    = 0xfffff,
167         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
168 };
169
170 static struct resource extension_rom_resource = {
171         .name   = "Extension ROM",
172         .start  = 0xe0000,
173         .end    = 0xeffff,
174         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
175 };
176
177 static struct resource adapter_rom_resources[] = { {
178         .name   = "Adapter ROM",
179         .start  = 0xc8000,
180         .end    = 0,
181         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
182 }, {
183         .name   = "Adapter ROM",
184         .start  = 0,
185         .end    = 0,
186         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
187 }, {
188         .name   = "Adapter ROM",
189         .start  = 0,
190         .end    = 0,
191         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
192 }, {
193         .name   = "Adapter ROM",
194         .start  = 0,
195         .end    = 0,
196         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
197 }, {
198         .name   = "Adapter ROM",
199         .start  = 0,
200         .end    = 0,
201         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
202 }, {
203         .name   = "Adapter ROM",
204         .start  = 0,
205         .end    = 0,
206         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
207 } };
208
209 #define ADAPTER_ROM_RESOURCES \
210         (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
211
212 static struct resource video_rom_resource = {
213         .name   = "Video ROM",
214         .start  = 0xc0000,
215         .end    = 0xc7fff,
216         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
217 };
218 #endif
219
220 static struct resource video_ram_resource = {
221         .name   = "Video RAM area",
222         .start  = 0xa0000,
223         .end    = 0xbffff,
224         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
225 };
226
227 static struct resource standard_io_resources[] = { {
228         .name   = "dma1",
229         .start  = 0x0000,
230         .end    = 0x001f,
231         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
232 }, {
233         .name   = "pic1",
234         .start  = 0x0020,
235         .end    = 0x0021,
236         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
237 }, {
238         .name   = "timer0",
239         .start  = 0x0040,
240         .end    = 0x0043,
241         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
242 }, {
243         .name   = "timer1",
244         .start  = 0x0050,
245         .end    = 0x0053,
246         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
247 }, {
248         .name   = "keyboard",
249         .start  = 0x0060,
250         .end    = 0x006f,
251         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
252 }, {
253         .name   = "dma page reg",
254         .start  = 0x0080,
255         .end    = 0x008f,
256         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
257 }, {
258         .name   = "pic2",
259         .start  = 0x00a0,
260         .end    = 0x00a1,
261         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
262 }, {
263         .name   = "dma2",
264         .start  = 0x00c0,
265         .end    = 0x00df,
266         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
267 }, {
268         .name   = "fpu",
269         .start  = 0x00f0,
270         .end    = 0x00ff,
271         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
272 } };
273
274 #define STANDARD_IO_RESOURCES \
275         (sizeof standard_io_resources / sizeof standard_io_resources[0])
276
277 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
278 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
279
280 static int __init romchecksum(unsigned char *rom, unsigned long length)
281 {
282         unsigned char *p, sum = 0;
283
284         for (p = rom; p < rom + length; p++)
285                 sum += *p;
286         return sum == 0;
287 }
288
289 static void __init probe_roms(void)
290 {
291         unsigned long start, length, upper;
292         unsigned char *rom;
293         int           i;
294
295         /* video rom */
296         upper = adapter_rom_resources[0].start;
297         for (start = video_rom_resource.start; start < upper; start += 2048) {
298                 rom = isa_bus_to_virt(start);
299                 if (!romsignature(rom))
300                         continue;
301
302                 video_rom_resource.start = start;
303
304                 /* 0 < length <= 0x7f * 512, historically */
305                 length = rom[2] * 512;
306
307                 /* if checksum okay, trust length byte */
308                 if (length && romchecksum(rom, length))
309                         video_rom_resource.end = start + length - 1;
310
311                 request_resource(&iomem_resource, &video_rom_resource);
312                 break;
313         }
314
315         start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
316         if (start < upper)
317                 start = upper;
318
319         /* system rom */
320         request_resource(&iomem_resource, &system_rom_resource);
321         upper = system_rom_resource.start;
322
323         /* check for extension rom (ignore length byte!) */
324         rom = isa_bus_to_virt(extension_rom_resource.start);
325         if (romsignature(rom)) {
326                 length = extension_rom_resource.end - extension_rom_resource.start + 1;
327                 if (romchecksum(rom, length)) {
328                         request_resource(&iomem_resource, &extension_rom_resource);
329                         upper = extension_rom_resource.start;
330                 }
331         }
332
333         /* check for adapter roms on 2k boundaries */
334         for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
335                 rom = isa_bus_to_virt(start);
336                 if (!romsignature(rom))
337                         continue;
338
339                 /* 0 < length <= 0x7f * 512, historically */
340                 length = rom[2] * 512;
341
342                 /* but accept any length that fits if checksum okay */
343                 if (!length || start + length > upper || !romchecksum(rom, length))
344                         continue;
345
346                 adapter_rom_resources[i].start = start;
347                 adapter_rom_resources[i].end = start + length - 1;
348                 request_resource(&iomem_resource, &adapter_rom_resources[i]);
349
350                 start = adapter_rom_resources[i++].end & ~2047UL;
351         }
352 }
353 #endif
354
355 /*
356  * Point at the empty zero page to start with. We map the real shared_info
357  * page as soon as fixmap is up and running.
358  */
359 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
360 EXPORT_SYMBOL(HYPERVISOR_shared_info);
361
362 unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
363 EXPORT_SYMBOL(phys_to_machine_mapping);
364
365 /* Raw start-of-day parameters from the hypervisor. */
366 union xen_start_info_union xen_start_info_union;
367
368 static void __init limit_regions(unsigned long long size)
369 {
370         unsigned long long current_addr = 0;
371         int i;
372
373         if (efi_enabled) {
374                 for (i = 0; i < memmap.nr_map; i++) {
375                         current_addr = memmap.map[i].phys_addr +
376                                        (memmap.map[i].num_pages << 12);
377                         if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
378                                 if (current_addr >= size) {
379                                         memmap.map[i].num_pages -=
380                                                 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
381                                         memmap.nr_map = i + 1;
382                                         return;
383                                 }
384                         }
385                 }
386         }
387         for (i = 0; i < e820.nr_map; i++) {
388                 if (e820.map[i].type == E820_RAM) {
389                         current_addr = e820.map[i].addr + e820.map[i].size;
390                         if (current_addr >= size) {
391                                 e820.map[i].size -= current_addr-size;
392                                 e820.nr_map = i + 1;
393                                 return;
394                         }
395                 }
396         }
397 }
398
399 static void __init add_memory_region(unsigned long long start,
400                                   unsigned long long size, int type)
401 {
402         int x;
403
404         if (!efi_enabled) {
405                 x = e820.nr_map;
406
407                 if (x == E820MAX) {
408                     printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
409                     return;
410                 }
411
412                 e820.map[x].addr = start;
413                 e820.map[x].size = size;
414                 e820.map[x].type = type;
415                 e820.nr_map++;
416         }
417 } /* add_memory_region */
418
419 #define E820_DEBUG      1
420
421 static void __init print_memory_map(char *who)
422 {
423         int i;
424
425         for (i = 0; i < e820.nr_map; i++) {
426                 printk(" %s: %016Lx - %016Lx ", who,
427                         e820.map[i].addr,
428                         e820.map[i].addr + e820.map[i].size);
429                 switch (e820.map[i].type) {
430                 case E820_RAM:  printk("(usable)\n");
431                                 break;
432                 case E820_RESERVED:
433                                 printk("(reserved)\n");
434                                 break;
435                 case E820_ACPI:
436                                 printk("(ACPI data)\n");
437                                 break;
438                 case E820_NVS:
439                                 printk("(ACPI NVS)\n");
440                                 break;
441                 default:        printk("type %lu\n", e820.map[i].type);
442                                 break;
443                 }
444         }
445 }
446
447 #if 0
448 /*
449  * Sanitize the BIOS e820 map.
450  *
451  * Some e820 responses include overlapping entries.  The following 
452  * replaces the original e820 map with a new one, removing overlaps.
453  *
454  */
455 struct change_member {
456         struct e820entry *pbios; /* pointer to original bios entry */
457         unsigned long long addr; /* address for this change point */
458 };
459 struct change_member change_point_list[2*E820MAX] __initdata;
460 struct change_member *change_point[2*E820MAX] __initdata;
461 struct e820entry *overlap_list[E820MAX] __initdata;
462 struct e820entry new_bios[E820MAX] __initdata;
463
464 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
465 {
466         struct change_member *change_tmp;
467         unsigned long current_type, last_type;
468         unsigned long long last_addr;
469         int chgidx, still_changing;
470         int overlap_entries;
471         int new_bios_entry;
472         int old_nr, new_nr, chg_nr;
473         int i;
474
475         /*
476                 Visually we're performing the following (1,2,3,4 = memory types)...
477
478                 Sample memory map (w/overlaps):
479                    ____22__________________
480                    ______________________4_
481                    ____1111________________
482                    _44_____________________
483                    11111111________________
484                    ____________________33__
485                    ___________44___________
486                    __________33333_________
487                    ______________22________
488                    ___________________2222_
489                    _________111111111______
490                    _____________________11_
491                    _________________4______
492
493                 Sanitized equivalent (no overlap):
494                    1_______________________
495                    _44_____________________
496                    ___1____________________
497                    ____22__________________
498                    ______11________________
499                    _________1______________
500                    __________3_____________
501                    ___________44___________
502                    _____________33_________
503                    _______________2________
504                    ________________1_______
505                    _________________4______
506                    ___________________2____
507                    ____________________33__
508                    ______________________4_
509         */
510
511         /* if there's only one memory region, don't bother */
512         if (*pnr_map < 2)
513                 return -1;
514
515         old_nr = *pnr_map;
516
517         /* bail out if we find any unreasonable addresses in bios map */
518         for (i=0; i<old_nr; i++)
519                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
520                         return -1;
521
522         /* create pointers for initial change-point information (for sorting) */
523         for (i=0; i < 2*old_nr; i++)
524                 change_point[i] = &change_point_list[i];
525
526         /* record all known change-points (starting and ending addresses),
527            omitting those that are for empty memory regions */
528         chgidx = 0;
529         for (i=0; i < old_nr; i++)      {
530                 if (biosmap[i].size != 0) {
531                         change_point[chgidx]->addr = biosmap[i].addr;
532                         change_point[chgidx++]->pbios = &biosmap[i];
533                         change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
534                         change_point[chgidx++]->pbios = &biosmap[i];
535                 }
536         }
537         chg_nr = chgidx;        /* true number of change-points */
538
539         /* sort change-point list by memory addresses (low -> high) */
540         still_changing = 1;
541         while (still_changing)  {
542                 still_changing = 0;
543                 for (i=1; i < chg_nr; i++)  {
544                         /* if <current_addr> > <last_addr>, swap */
545                         /* or, if current=<start_addr> & last=<end_addr>, swap */
546                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
547                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
548                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
549                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
550                            )
551                         {
552                                 change_tmp = change_point[i];
553                                 change_point[i] = change_point[i-1];
554                                 change_point[i-1] = change_tmp;
555                                 still_changing=1;
556                         }
557                 }
558         }
559
560         /* create a new bios memory map, removing overlaps */
561         overlap_entries=0;       /* number of entries in the overlap table */
562         new_bios_entry=0;        /* index for creating new bios map entries */
563         last_type = 0;           /* start with undefined memory type */
564         last_addr = 0;           /* start with 0 as last starting address */
565         /* loop through change-points, determining affect on the new bios map */
566         for (chgidx=0; chgidx < chg_nr; chgidx++)
567         {
568                 /* keep track of all overlapping bios entries */
569                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
570                 {
571                         /* add map entry to overlap list (> 1 entry implies an overlap) */
572                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
573                 }
574                 else
575                 {
576                         /* remove entry from list (order independent, so swap with last) */
577                         for (i=0; i<overlap_entries; i++)
578                         {
579                                 if (overlap_list[i] == change_point[chgidx]->pbios)
580                                         overlap_list[i] = overlap_list[overlap_entries-1];
581                         }
582                         overlap_entries--;
583                 }
584                 /* if there are overlapping entries, decide which "type" to use */
585                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
586                 current_type = 0;
587                 for (i=0; i<overlap_entries; i++)
588                         if (overlap_list[i]->type > current_type)
589                                 current_type = overlap_list[i]->type;
590                 /* continue building up new bios map based on this information */
591                 if (current_type != last_type)  {
592                         if (last_type != 0)      {
593                                 new_bios[new_bios_entry].size =
594                                         change_point[chgidx]->addr - last_addr;
595                                 /* move forward only if the new size was non-zero */
596                                 if (new_bios[new_bios_entry].size != 0)
597                                         if (++new_bios_entry >= E820MAX)
598                                                 break;  /* no more space left for new bios entries */
599                         }
600                         if (current_type != 0)  {
601                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
602                                 new_bios[new_bios_entry].type = current_type;
603                                 last_addr=change_point[chgidx]->addr;
604                         }
605                         last_type = current_type;
606                 }
607         }
608         new_nr = new_bios_entry;   /* retain count for new bios entries */
609
610         /* copy new bios mapping into original location */
611         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
612         *pnr_map = new_nr;
613
614         return 0;
615 }
616
617 /*
618  * Copy the BIOS e820 map into a safe place.
619  *
620  * Sanity-check it while we're at it..
621  *
622  * If we're lucky and live on a modern system, the setup code
623  * will have given us a memory map that we can use to properly
624  * set up memory.  If we aren't, we'll fake a memory map.
625  *
626  * We check to see that the memory map contains at least 2 elements
627  * before we'll use it, because the detection code in setup.S may
628  * not be perfect and most every PC known to man has two memory
629  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
630  * thinkpad 560x, for example, does not cooperate with the memory
631  * detection code.)
632  */
633 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
634 {
635         /* Only one memory region (or negative)? Ignore it */
636         if (nr_map < 2)
637                 return -1;
638
639         do {
640                 unsigned long long start = biosmap->addr;
641                 unsigned long long size = biosmap->size;
642                 unsigned long long end = start + size;
643                 unsigned long type = biosmap->type;
644
645                 /* Overflow in 64 bits? Ignore the memory map. */
646                 if (start > end)
647                         return -1;
648
649                 /*
650                  * Some BIOSes claim RAM in the 640k - 1M region.
651                  * Not right. Fix it up.
652                  */
653                 if (type == E820_RAM) {
654                         if (start < 0x100000ULL && end > 0xA0000ULL) {
655                                 if (start < 0xA0000ULL)
656                                         add_memory_region(start, 0xA0000ULL-start, type);
657                                 if (end <= 0x100000ULL)
658                                         continue;
659                                 start = 0x100000ULL;
660                                 size = end - start;
661                         }
662                 }
663                 add_memory_region(start, size, type);
664         } while (biosmap++,--nr_map);
665         return 0;
666 }
667 #endif
668
669 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
670 struct edd edd;
671 #ifdef CONFIG_EDD_MODULE
672 EXPORT_SYMBOL(edd);
673 #endif
674 /**
675  * copy_edd() - Copy the BIOS EDD information
676  *              from boot_params into a safe place.
677  *
678  */
679 static inline void copy_edd(void)
680 {
681      memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
682      memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
683      edd.mbr_signature_nr = EDD_MBR_SIG_NR;
684      edd.edd_info_nr = EDD_NR;
685 }
686 #else
687 static inline void copy_edd(void)
688 {
689 }
690 #endif
691
692 /*
693  * Do NOT EVER look at the BIOS memory size location.
694  * It does not work on many machines.
695  */
696 #define LOWMEMSIZE()    (0x9f000)
697
698 static void __init parse_cmdline_early (char ** cmdline_p)
699 {
700         char c = ' ', *to = command_line, *from = saved_command_line;
701         int len = 0;
702         int userdef = 0;
703
704         memcpy(saved_command_line, xen_start_info.cmd_line, MAX_CMDLINE);
705         /* Save unparsed command line copy for /proc/cmdline */
706         saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
707
708         for (;;) {
709                 if (c != ' ')
710                         goto next_char;
711                 /*
712                  * "mem=nopentium" disables the 4MB page tables.
713                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
714                  * to <mem>, overriding the bios size.
715                  * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
716                  * <start> to <start>+<mem>, overriding the bios size.
717                  *
718                  * HPA tells me bootloaders need to parse mem=, so no new
719                  * option should be mem=  [also see Documentation/i386/boot.txt]
720                  */
721                 if (!memcmp(from, "mem=", 4)) {
722                         if (to != command_line)
723                                 to--;
724                         if (!memcmp(from+4, "nopentium", 9)) {
725                                 from += 9+4;
726                                 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
727                                 disable_pse = 1;
728                         } else {
729                                 /* If the user specifies memory size, we
730                                  * limit the BIOS-provided memory map to
731                                  * that size. exactmap can be used to specify
732                                  * the exact map. mem=number can be used to
733                                  * trim the existing memory map.
734                                  */
735                                 unsigned long long mem_size;
736  
737                                 mem_size = memparse(from+4, &from);
738 #if 0
739                                 limit_regions(mem_size);
740                                 userdef=1;
741 #else
742                                 xen_override_max_pfn =
743                                         (unsigned long)(mem_size>>PAGE_SHIFT);
744 #endif
745                         }
746                 }
747
748                 else if (!memcmp(from, "memmap=", 7)) {
749                         if (to != command_line)
750                                 to--;
751                         if (!memcmp(from+7, "exactmap", 8)) {
752                                 from += 8+7;
753                                 e820.nr_map = 0;
754                                 userdef = 1;
755                         } else {
756                                 /* If the user specifies memory size, we
757                                  * limit the BIOS-provided memory map to
758                                  * that size. exactmap can be used to specify
759                                  * the exact map. mem=number can be used to
760                                  * trim the existing memory map.
761                                  */
762                                 unsigned long long start_at, mem_size;
763  
764                                 mem_size = memparse(from+7, &from);
765                                 if (*from == '@') {
766                                         start_at = memparse(from+1, &from);
767                                         add_memory_region(start_at, mem_size, E820_RAM);
768                                 } else if (*from == '#') {
769                                         start_at = memparse(from+1, &from);
770                                         add_memory_region(start_at, mem_size, E820_ACPI);
771                                 } else if (*from == '$') {
772                                         start_at = memparse(from+1, &from);
773                                         add_memory_region(start_at, mem_size, E820_RESERVED);
774                                 } else {
775                                         limit_regions(mem_size);
776                                         userdef=1;
777                                 }
778                         }
779                 }
780
781                 else if (!memcmp(from, "noexec=", 7))
782                         noexec_setup(from + 7);
783
784
785 #ifdef  CONFIG_X86_SMP
786                 /*
787                  * If the BIOS enumerates physical processors before logical,
788                  * maxcpus=N at enumeration-time can be used to disable HT.
789                  */
790                 else if (!memcmp(from, "maxcpus=", 8)) {
791                         extern unsigned int maxcpus;
792
793                         maxcpus = simple_strtoul(from + 8, NULL, 0);
794                 }
795 #endif
796
797 #ifdef CONFIG_ACPI_BOOT
798                 /* "acpi=off" disables both ACPI table parsing and interpreter */
799                 else if (!memcmp(from, "acpi=off", 8)) {
800                         disable_acpi();
801                 }
802
803                 /* acpi=force to over-ride black-list */
804                 else if (!memcmp(from, "acpi=force", 10)) {
805                         acpi_force = 1;
806                         acpi_ht = 1;
807                         acpi_disabled = 0;
808                 }
809
810                 /* acpi=strict disables out-of-spec workarounds */
811                 else if (!memcmp(from, "acpi=strict", 11)) {
812                         acpi_strict = 1;
813                 }
814
815                 /* Limit ACPI just to boot-time to enable HT */
816                 else if (!memcmp(from, "acpi=ht", 7)) {
817                         if (!acpi_force)
818                                 disable_acpi();
819                         acpi_ht = 1;
820                 }
821                 
822                 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
823                 else if (!memcmp(from, "pci=noacpi", 10)) {
824                         acpi_disable_pci();
825                 }
826                 /* "acpi=noirq" disables ACPI interrupt routing */
827                 else if (!memcmp(from, "acpi=noirq", 10)) {
828                         acpi_noirq_set();
829                 }
830
831                 else if (!memcmp(from, "acpi_sci=edge", 13))
832                         acpi_sci_flags.trigger =  1;
833
834                 else if (!memcmp(from, "acpi_sci=level", 14))
835                         acpi_sci_flags.trigger = 3;
836
837                 else if (!memcmp(from, "acpi_sci=high", 13))
838                         acpi_sci_flags.polarity = 1;
839
840                 else if (!memcmp(from, "acpi_sci=low", 12))
841                         acpi_sci_flags.polarity = 3;
842
843 #ifdef CONFIG_X86_IO_APIC
844                 else if (!memcmp(from, "acpi_skip_timer_override", 24))
845                         acpi_skip_timer_override = 1;
846 #endif
847
848 #ifdef CONFIG_X86_LOCAL_APIC
849                 /* disable IO-APIC */
850                 else if (!memcmp(from, "noapic", 6))
851                         disable_ioapic_setup();
852 #endif /* CONFIG_X86_LOCAL_APIC */
853 #endif /* CONFIG_ACPI_BOOT */
854
855                 /*
856                  * highmem=size forces highmem to be exactly 'size' bytes.
857                  * This works even on boxes that have no highmem otherwise.
858                  * This also works to reduce highmem size on bigger boxes.
859                  */
860                 else if (!memcmp(from, "highmem=", 8))
861                         highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
862         
863                 /*
864                  * vmalloc=size forces the vmalloc area to be exactly 'size'
865                  * bytes. This can be used to increase (or decrease) the
866                  * vmalloc area - the default is 128m.
867                  */
868                 else if (!memcmp(from, "vmalloc=", 8))
869                         __VMALLOC_RESERVE = memparse(from+8, &from);
870
871         next_char:
872                 c = *(from++);
873                 if (!c)
874                         break;
875                 if (COMMAND_LINE_SIZE <= ++len)
876                         break;
877                 *(to++) = c;
878         }
879         *to = '\0';
880         *cmdline_p = command_line;
881         if (userdef) {
882                 printk(KERN_INFO "user-defined physical RAM map:\n");
883                 print_memory_map("user");
884         }
885 }
886
887 #if 0 /* !XEN */
888 /*
889  * Callback for efi_memory_walk.
890  */
891 static int __init
892 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
893 {
894         unsigned long *max_pfn = arg, pfn;
895
896         if (start < end) {
897                 pfn = PFN_UP(end -1);
898                 if (pfn > *max_pfn)
899                         *max_pfn = pfn;
900         }
901         return 0;
902 }
903
904 /*
905  * Find the highest page frame number we have available
906  */
907 void __init find_max_pfn(void)
908 {
909         int i;
910
911         max_pfn = 0;
912         if (efi_enabled) {
913                 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
914                 return;
915         }
916
917         for (i = 0; i < e820.nr_map; i++) {
918                 unsigned long start, end;
919                 /* RAM? */
920                 if (e820.map[i].type != E820_RAM)
921                         continue;
922                 start = PFN_UP(e820.map[i].addr);
923                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
924                 if (start >= end)
925                         continue;
926                 if (end > max_pfn)
927                         max_pfn = end;
928         }
929 }
930 #else
931 /* We don't use the fake e820 because we need to respond to user override. */
932 void __init find_max_pfn(void)
933 {
934         if ( xen_override_max_pfn < xen_start_info.nr_pages )
935                 xen_override_max_pfn = xen_start_info.nr_pages;
936         max_pfn = xen_override_max_pfn;
937 }
938 #endif /* XEN */
939
940 /*
941  * Determine low and high memory ranges:
942  */
943 unsigned long __init find_max_low_pfn(void)
944 {
945         unsigned long max_low_pfn;
946
947         max_low_pfn = max_pfn;
948         if (max_low_pfn > MAXMEM_PFN) {
949                 if (highmem_pages == -1)
950                         highmem_pages = max_pfn - MAXMEM_PFN;
951                 if (highmem_pages + MAXMEM_PFN < max_pfn)
952                         max_pfn = MAXMEM_PFN + highmem_pages;
953                 if (highmem_pages + MAXMEM_PFN > max_pfn) {
954                         printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
955                         highmem_pages = 0;
956                 }
957                 max_low_pfn = MAXMEM_PFN;
958 #ifndef CONFIG_HIGHMEM
959                 /* Maximum memory usable is what is directly addressable */
960                 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
961                                         MAXMEM>>20);
962                 if (max_pfn > MAX_NONPAE_PFN)
963                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
964                 else
965                         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
966                 max_pfn = MAXMEM_PFN;
967 #else /* !CONFIG_HIGHMEM */
968 #ifndef CONFIG_X86_PAE
969                 if (max_pfn > MAX_NONPAE_PFN) {
970                         max_pfn = MAX_NONPAE_PFN;
971                         printk(KERN_WARNING "Warning only 4GB will be used.\n");
972                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
973                 }
974 #endif /* !CONFIG_X86_PAE */
975 #endif /* !CONFIG_HIGHMEM */
976         } else {
977                 if (highmem_pages == -1)
978                         highmem_pages = 0;
979 #ifdef CONFIG_HIGHMEM
980                 if (highmem_pages >= max_pfn) {
981                         printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
982                         highmem_pages = 0;
983                 }
984                 if (highmem_pages) {
985                         if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
986                                 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
987                                 highmem_pages = 0;
988                         }
989                         max_low_pfn -= highmem_pages;
990                 }
991 #else
992                 if (highmem_pages)
993                         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
994 #endif
995         }
996         return max_low_pfn;
997 }
998
999 #ifndef CONFIG_DISCONTIGMEM
1000
1001 /*
1002  * Free all available memory for boot time allocation.  Used
1003  * as a callback function by efi_memory_walk()
1004  */
1005
1006 static int __init
1007 free_available_memory(unsigned long start, unsigned long end, void *arg)
1008 {
1009         /* check max_low_pfn */
1010         if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1011                 return 0;
1012         if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1013                 end = (max_low_pfn + 1) << PAGE_SHIFT;
1014         if (start < end)
1015                 free_bootmem(start, end - start);
1016
1017         return 0;
1018 }
1019 /*
1020  * Register fully available low RAM pages with the bootmem allocator.
1021  */
1022 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1023 {
1024         int i;
1025
1026         if (efi_enabled) {
1027                 efi_memmap_walk(free_available_memory, NULL);
1028                 return;
1029         }
1030         for (i = 0; i < e820.nr_map; i++) {
1031                 unsigned long curr_pfn, last_pfn, size;
1032                 /*
1033                  * Reserve usable low memory
1034                  */
1035                 if (e820.map[i].type != E820_RAM)
1036                         continue;
1037                 /*
1038                  * We are rounding up the start address of usable memory:
1039                  */
1040                 curr_pfn = PFN_UP(e820.map[i].addr);
1041                 if (curr_pfn >= max_low_pfn)
1042                         continue;
1043                 /*
1044                  * ... and at the end of the usable range downwards:
1045                  */
1046                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1047
1048                 if (last_pfn > max_low_pfn)
1049                         last_pfn = max_low_pfn;
1050
1051                 /*
1052                  * .. finally, did all the rounding and playing
1053                  * around just make the area go away?
1054                  */
1055                 if (last_pfn <= curr_pfn)
1056                         continue;
1057
1058                 size = last_pfn - curr_pfn;
1059                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1060         }
1061 }
1062
1063 /*
1064  * workaround for Dell systems that neglect to reserve EBDA
1065  */
1066 static void __init reserve_ebda_region(void)
1067 {
1068         unsigned int addr;
1069         addr = get_bios_ebda();
1070         if (addr)
1071                 reserve_bootmem(addr, PAGE_SIZE);       
1072 }
1073
1074 static unsigned long __init setup_memory(void)
1075 {
1076         unsigned long bootmap_size, start_pfn, max_low_pfn;
1077
1078         /*
1079          * partially used pages are not usable - thus
1080          * we are rounding upwards:
1081          */
1082         start_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames;
1083
1084         find_max_pfn();
1085
1086         max_low_pfn = find_max_low_pfn();
1087
1088 #ifdef CONFIG_HIGHMEM
1089         highstart_pfn = highend_pfn = max_pfn;
1090         if (max_pfn > max_low_pfn) {
1091                 highstart_pfn = max_low_pfn;
1092         }
1093         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1094                 pages_to_mb(highend_pfn - highstart_pfn));
1095 #endif
1096         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1097                         pages_to_mb(max_low_pfn));
1098         /*
1099          * Initialize the boot-time allocator (with low memory only):
1100          */
1101         bootmap_size = init_bootmem(start_pfn, max_low_pfn);
1102
1103         register_bootmem_low_pages(max_low_pfn);
1104
1105         /*
1106          * Reserve the bootmem bitmap itself as well. We do this in two
1107          * steps (first step was init_bootmem()) because this catches
1108          * the (very unlikely) case of us accidentally initializing the
1109          * bootmem allocator with an invalid RAM area.
1110          */
1111         reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
1112                          bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1113
1114         /* reserve EBDA region, it's a 4K region */
1115         reserve_ebda_region();
1116
1117     /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
1118        PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1119        unless you have no PS/2 mouse plugged in. */
1120         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1121             boot_cpu_data.x86 == 6)
1122              reserve_bootmem(0xa0000 - 4096, 4096);
1123
1124 #ifdef CONFIG_SMP
1125         /*
1126          * But first pinch a few for the stack/trampoline stuff
1127          * FIXME: Don't need the extra page at 4K, but need to fix
1128          * trampoline before removing it. (see the GDT stuff)
1129          */
1130         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1131 #endif
1132 #ifdef CONFIG_ACPI_SLEEP
1133         /*
1134          * Reserve low memory region for sleep support.
1135          */
1136         acpi_reserve_bootmem();
1137 #endif
1138 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1139         /*
1140          * Find and reserve possible boot-time SMP configuration:
1141          */
1142         find_smp_config();
1143 #endif
1144
1145 #ifdef CONFIG_BLK_DEV_INITRD
1146         if (xen_start_info.mod_start) {
1147                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1148                         /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1149                         initrd_start = INITRD_START + PAGE_OFFSET;
1150                         initrd_end = initrd_start+INITRD_SIZE;
1151                         initrd_below_start_ok = 1;
1152                 }
1153                 else {
1154                         printk(KERN_ERR "initrd extends beyond end of memory "
1155                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1156                             INITRD_START + INITRD_SIZE,
1157                             max_low_pfn << PAGE_SHIFT);
1158                         initrd_start = 0;
1159                 }
1160         }
1161 #endif
1162
1163         phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
1164
1165         return max_low_pfn;
1166 }
1167 #else
1168 extern unsigned long setup_memory(void);
1169 #endif /* !CONFIG_DISCONTIGMEM */
1170
1171 /*
1172  * Request address space for all standard RAM and ROM resources
1173  * and also for regions reported as reserved by the e820.
1174  */
1175 static void __init
1176 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1177 {
1178         int i;
1179
1180 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1181         probe_roms();
1182 #endif
1183         for (i = 0; i < e820.nr_map; i++) {
1184                 struct resource *res;
1185                 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1186                         continue;
1187                 res = alloc_bootmem_low(sizeof(struct resource));
1188                 switch (e820.map[i].type) {
1189                 case E820_RAM:  res->name = "System RAM"; break;
1190                 case E820_ACPI: res->name = "ACPI Tables"; break;
1191                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
1192                 default:        res->name = "reserved";
1193                 }
1194                 res->start = e820.map[i].addr;
1195                 res->end = res->start + e820.map[i].size - 1;
1196                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1197                 request_resource(&iomem_resource, res);
1198                 if (e820.map[i].type == E820_RAM) {
1199                         /*
1200                          *  We don't know which RAM region contains kernel data,
1201                          *  so we try it repeatedly and let the resource manager
1202                          *  test it.
1203                          */
1204                         request_resource(res, code_resource);
1205                         request_resource(res, data_resource);
1206                 }
1207         }
1208 }
1209
1210 /*
1211  * Request address space for all standard resources
1212  */
1213 static void __init register_memory(void)
1214 {
1215         unsigned long gapstart, gapsize;
1216         unsigned long long last;
1217         int           i;
1218
1219         if (efi_enabled)
1220                 efi_initialize_iomem_resources(&code_resource, &data_resource);
1221         else
1222                 legacy_init_iomem_resources(&code_resource, &data_resource);
1223
1224         /* EFI systems may still have VGA */
1225         request_resource(&iomem_resource, &video_ram_resource);
1226
1227         /* request I/O space for devices used on all i[345]86 PCs */
1228         for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1229                 request_resource(&ioport_resource, &standard_io_resources[i]);
1230
1231         /*
1232          * Search for the bigest gap in the low 32 bits of the e820
1233          * memory space.
1234          */
1235         last = 0x100000000ull;
1236         gapstart = 0x10000000;
1237         gapsize = 0x400000;
1238         i = e820.nr_map;
1239         while (--i >= 0) {
1240                 unsigned long long start = e820.map[i].addr;
1241                 unsigned long long end = start + e820.map[i].size;
1242
1243                 /*
1244                  * Since "last" is at most 4GB, we know we'll
1245                  * fit in 32 bits if this condition is true
1246                  */
1247                 if (last > end) {
1248                         unsigned long gap = last - end;
1249
1250                         if (gap > gapsize) {
1251                                 gapsize = gap;
1252                                 gapstart = end;
1253                         }
1254                 }
1255                 if (start < last)
1256                         last = start;
1257         }
1258
1259         /*
1260          * Start allocating dynamic PCI memory a bit into the gap,
1261          * aligned up to the nearest megabyte.
1262          *
1263          * Question: should we try to pad it up a bit (do something
1264          * like " + (gapsize >> 3)" in there too?). We now have the
1265          * technology.
1266          */
1267         pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1268
1269         printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1270                 pci_mem_start, gapstart, gapsize);
1271 }
1272
1273 /* Use inline assembly to define this because the nops are defined 
1274    as inline assembly strings in the include files and we cannot 
1275    get them easily into strings. */
1276 asm("\t.data\nintelnops: " 
1277     GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1278     GENERIC_NOP7 GENERIC_NOP8); 
1279 asm("\t.data\nk8nops: " 
1280     K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1281     K8_NOP7 K8_NOP8); 
1282 asm("\t.data\nk7nops: " 
1283     K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1284     K7_NOP7 K7_NOP8); 
1285     
1286 extern unsigned char intelnops[], k8nops[], k7nops[];
1287 static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
1288      NULL,
1289      intelnops,
1290      intelnops + 1,
1291      intelnops + 1 + 2,
1292      intelnops + 1 + 2 + 3,
1293      intelnops + 1 + 2 + 3 + 4,
1294      intelnops + 1 + 2 + 3 + 4 + 5,
1295      intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1296      intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1297 }; 
1298 static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
1299      NULL,
1300      k8nops,
1301      k8nops + 1,
1302      k8nops + 1 + 2,
1303      k8nops + 1 + 2 + 3,
1304      k8nops + 1 + 2 + 3 + 4,
1305      k8nops + 1 + 2 + 3 + 4 + 5,
1306      k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1307      k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1308 }; 
1309 static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
1310      NULL,
1311      k7nops,
1312      k7nops + 1,
1313      k7nops + 1 + 2,
1314      k7nops + 1 + 2 + 3,
1315      k7nops + 1 + 2 + 3 + 4,
1316      k7nops + 1 + 2 + 3 + 4 + 5,
1317      k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1318      k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1319 }; 
1320 static struct nop { 
1321      int cpuid; 
1322      unsigned char **noptable; 
1323 } noptypes[] = { 
1324      { X86_FEATURE_K8, k8_nops }, 
1325      { X86_FEATURE_K7, k7_nops }, 
1326      { -1, NULL }
1327 }; 
1328
1329 /* Replace instructions with better alternatives for this CPU type.
1330
1331    This runs before SMP is initialized to avoid SMP problems with
1332    self modifying code. This implies that assymetric systems where
1333    APs have less capabilities than the boot processor are not handled. 
1334    In this case boot with "noreplacement". */ 
1335 void apply_alternatives(void *start, void *end) 
1336
1337         struct alt_instr *a; 
1338         int diff, i, k;
1339         unsigned char **noptable = intel_nops; 
1340         for (i = 0; noptypes[i].cpuid >= 0; i++) { 
1341                 if (boot_cpu_has(noptypes[i].cpuid)) { 
1342                         noptable = noptypes[i].noptable;
1343                         break;
1344                 }
1345         } 
1346         for (a = start; (void *)a < end; a++) { 
1347                 if (!boot_cpu_has(a->cpuid))
1348                         continue;
1349                 BUG_ON(a->replacementlen > a->instrlen); 
1350                 memcpy(a->instr, a->replacement, a->replacementlen); 
1351                 diff = a->instrlen - a->replacementlen; 
1352                 /* Pad the rest with nops */
1353                 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1354                         k = diff;
1355                         if (k > ASM_NOP_MAX)
1356                                 k = ASM_NOP_MAX;
1357                         memcpy(a->instr + i, noptable[k], k); 
1358                 } 
1359         }
1360
1361
1362 static int no_replacement __initdata = 0; 
1363  
1364 void __init alternative_instructions(void)
1365 {
1366         extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1367         if (no_replacement) 
1368                 return;
1369         apply_alternatives(__alt_instructions, __alt_instructions_end);
1370 }
1371
1372 static int __init noreplacement_setup(char *s)
1373
1374      no_replacement = 1; 
1375      return 0; 
1376
1377
1378 __setup("noreplacement", noreplacement_setup); 
1379
1380 static char * __init machine_specific_memory_setup(void);
1381
1382 #ifdef CONFIG_MCA
1383 static void set_mca_bus(int x)
1384 {
1385         MCA_bus = x;
1386 }
1387 #else
1388 static void set_mca_bus(int x) { }
1389 #endif
1390
1391 /*
1392  * Determine if we were loaded by an EFI loader.  If so, then we have also been
1393  * passed the efi memmap, systab, etc., so we should use these data structures
1394  * for initialization.  Note, the efi init code path is determined by the
1395  * global efi_enabled. This allows the same kernel image to be used on existing
1396  * systems (with a traditional BIOS) as well as on EFI systems.
1397  */
1398 void __init setup_arch(char **cmdline_p)
1399 {
1400         int i,j;
1401         physdev_op_t op;
1402         unsigned long max_low_pfn;
1403
1404         /* Force a quick death if the kernel panics. */
1405         extern int panic_timeout;
1406         if ( panic_timeout == 0 )
1407                 panic_timeout = 1;
1408
1409         /* Register a call for panic conditions. */
1410         notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1411
1412         HYPERVISOR_vm_assist(
1413                 VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
1414         HYPERVISOR_vm_assist(
1415                 VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
1416
1417         memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1418         early_cpu_init();
1419
1420         /*
1421          * FIXME: This isn't an official loader_type right
1422          * now but does currently work with elilo.
1423          * If we were configured as an EFI kernel, check to make
1424          * sure that we were loaded correctly from elilo and that
1425          * the system table is valid.  If not, then initialize normally.
1426          */
1427 #ifdef CONFIG_EFI
1428         if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1429                 efi_enabled = 1;
1430 #endif
1431
1432         /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1433            properly.  Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1434         */
1435         ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1436         drive_info = DRIVE_INFO;
1437         screen_info = SCREEN_INFO;
1438         edid_info = EDID_INFO;
1439         apm_info.bios = APM_BIOS_INFO;
1440         ist_info = IST_INFO;
1441         saved_videomode = VIDEO_MODE;
1442         if( SYS_DESC_TABLE.length != 0 ) {
1443                 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1444                 machine_id = SYS_DESC_TABLE.table[0];
1445                 machine_submodel_id = SYS_DESC_TABLE.table[1];
1446                 BIOS_revision = SYS_DESC_TABLE.table[2];
1447         }
1448         aux_device_present = AUX_DEVICE_INFO;
1449         bootloader_type = LOADER_TYPE;
1450
1451 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
1452         /* This is drawn from a dump from vgacon:startup in standard Linux. */
1453         screen_info.orig_video_mode = 3; 
1454         screen_info.orig_video_isVGA = 1;
1455         screen_info.orig_video_lines = 25;
1456         screen_info.orig_video_cols = 80;
1457         screen_info.orig_video_ega_bx = 3;
1458         screen_info.orig_video_points = 16;
1459 #endif
1460
1461 #ifdef CONFIG_BLK_DEV_RAM
1462         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1463         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1464         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1465 #endif
1466         ARCH_SETUP
1467         if (efi_enabled)
1468                 efi_init();
1469         else {
1470                 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1471                 print_memory_map(machine_specific_memory_setup());
1472         }
1473
1474         copy_edd();
1475
1476         if (!MOUNT_ROOT_RDONLY)
1477                 root_mountflags &= ~MS_RDONLY;
1478         init_mm.start_code = (unsigned long) _text;
1479         init_mm.end_code = (unsigned long) _etext;
1480         init_mm.end_data = (unsigned long) _edata;
1481         init_mm.brk = (PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames) << PAGE_SHIFT;
1482
1483         /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
1484         /*code_resource.start = virt_to_phys(_text);*/
1485         /*code_resource.end = virt_to_phys(_etext)-1;*/
1486         /*data_resource.start = virt_to_phys(_etext);*/
1487         /*data_resource.end = virt_to_phys(_edata)-1;*/
1488
1489         parse_cmdline_early(cmdline_p);
1490
1491         max_low_pfn = setup_memory();
1492
1493         /*
1494          * NOTE: before this point _nobody_ is allowed to allocate
1495          * any memory using the bootmem allocator.  Although the
1496          * alloctor is now initialised only the first 8Mb of the kernel
1497          * virtual address space has been mapped.  All allocations before
1498          * paging_init() has completed must use the alloc_bootmem_low_pages()
1499          * variant (which allocates DMA'able memory) and care must be taken
1500          * not to exceed the 8Mb limit.
1501          */
1502
1503 #ifdef CONFIG_SMP
1504         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1505 #endif
1506         paging_init();
1507
1508         /* Make sure we have a correctly sized P->M table. */
1509         if (max_pfn != xen_start_info.nr_pages) {
1510                 phys_to_machine_mapping = alloc_bootmem_low_pages(
1511                         max_pfn * sizeof(unsigned long));
1512
1513                 if (max_pfn > xen_start_info.nr_pages) {
1514                         /* set to INVALID_P2M_ENTRY */                        
1515                         memset(phys_to_machine_mapping, ~0,
1516                                 max_pfn * sizeof(unsigned long));
1517                         memcpy(phys_to_machine_mapping,
1518                                 (unsigned long *)xen_start_info.mfn_list,
1519                                 xen_start_info.nr_pages * sizeof(unsigned long));
1520                 } else {
1521                         memcpy(phys_to_machine_mapping,
1522                                 (unsigned long *)xen_start_info.mfn_list,
1523                                 max_pfn * sizeof(unsigned long));
1524                         if (HYPERVISOR_dom_mem_op(
1525                                 MEMOP_decrease_reservation,
1526                                 (unsigned long *)xen_start_info.mfn_list + max_pfn,
1527                                 xen_start_info.nr_pages - max_pfn, 0) !=
1528                             (xen_start_info.nr_pages - max_pfn)) BUG();
1529                 }
1530                 free_bootmem(
1531                         __pa(xen_start_info.mfn_list), 
1532                         PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
1533                         sizeof(unsigned long))));
1534         }
1535
1536         pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
1537         for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
1538         {       
1539              pfn_to_mfn_frame_list[j] = 
1540                   virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
1541         }
1542         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
1543              virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
1544
1545
1546         /*
1547          * NOTE: at this point the bootmem allocator is fully available.
1548          */
1549
1550 #ifdef CONFIG_EARLY_PRINTK
1551         {
1552                 char *s = strstr(*cmdline_p, "earlyprintk=");
1553                 if (s) {
1554                         extern void setup_early_printk(char *);
1555
1556                         setup_early_printk(s);
1557                         printk("early console enabled\n");
1558                 }
1559         }
1560 #endif
1561
1562
1563         dmi_scan_machine();
1564
1565 #ifdef CONFIG_X86_GENERICARCH
1566         generic_apic_probe(*cmdline_p);
1567 #endif  
1568         if (efi_enabled)
1569                 efi_map_memmap();
1570
1571 #ifdef CONFIG_ACPI_BOOT
1572         /*
1573          * Parse the ACPI tables for possible boot-time SMP configuration.
1574          */
1575         acpi_boot_table_init();
1576         acpi_boot_init();
1577 #endif
1578
1579 #ifdef CONFIG_X86_LOCAL_APIC
1580         if (smp_found_config)
1581                 get_smp_config();
1582 #endif
1583
1584         /* XXX Disable irqdebug until we have a way to avoid interrupt
1585          * conflicts. */
1586         noirqdebug_setup("");
1587
1588         register_memory();
1589
1590         op.cmd             = PHYSDEVOP_SET_IOPL;
1591         op.u.set_iopl.iopl = current->thread.io_pl = 1;
1592         HYPERVISOR_physdev_op(&op);
1593
1594         if (xen_start_info.flags & SIF_INITDOMAIN) {
1595                 if (!(xen_start_info.flags & SIF_PRIVILEGED))
1596                         panic("Xen granted us console access "
1597                               "but not privileged status");
1598
1599 #ifdef CONFIG_VT
1600 #if defined(CONFIG_VGA_CONSOLE)
1601                 if (!efi_enabled ||
1602                     (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1603                         conswitchp = &vga_con;
1604 #elif defined(CONFIG_DUMMY_CONSOLE)
1605                 conswitchp = &dummy_con;
1606 #endif
1607 #endif
1608         } else {
1609 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1610                 extern const struct consw xennull_con;
1611                 extern int console_use_vt;
1612 #if defined(CONFIG_VGA_CONSOLE)
1613                 /* disable VGA driver */
1614                 ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
1615 #endif
1616                 conswitchp = &xennull_con;
1617                 console_use_vt = 0;
1618 #endif
1619         }
1620 }
1621
1622
1623 static int
1624 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1625 {
1626      HYPERVISOR_crash();    
1627      /* we're never actually going to get here... */
1628      return NOTIFY_DONE;
1629 }
1630
1631
1632 #include "setup_arch_post.h"
1633 /*
1634  * Local Variables:
1635  * mode:c
1636  * c-file-style:"k&r"
1637  * c-basic-offset:8
1638  * End:
1639  */