patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / arch / i386 / kernel / setup.c
1 /*
2  *  linux/arch/i386/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *
8  *  Memory region support
9  *      David Parsons <orc@pell.chi.il.us>, July-August 1999
10  *
11  *  Added E820 sanitization routine (removes overlapping memory regions);
12  *  Brian Moyle <bmoyle@mvista.com>, February 2001
13  *
14  * Moved CPU detection code to cpu/${cpu}.c
15  *    Patrick Mochel <mochel@osdl.org>, March 2002
16  *
17  *  Provisions for empty E820 memory regions (reported by certain BIOSes).
18  *  Alex Achenbach <xela@slit.de>, December 2002.
19  *
20  */
21
22 /*
23  * This file handles the architecture-dependent parts of initialization
24  */
25
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/root_dev.h>
37 #include <linux/highmem.h>
38 #include <linux/module.h>
39 #include <linux/efi.h>
40 #include <linux/init.h>
41 #include <linux/edd.h>
42 #include <video/edid.h>
43 #include <asm/e820.h>
44 #include <asm/mpspec.h>
45 #include <asm/setup.h>
46 #include <asm/arch_hooks.h>
47 #include <asm/sections.h>
48 #include <asm/io_apic.h>
49 #include <asm/ist.h>
50 #include <asm/std_resources.h>
51 #include "setup_arch_pre.h"
52
53 /* This value is set up by the early boot code to point to the value
54    immediately after the boot time page tables.  It contains a *physical*
55    address, and must not be in the .bss segment! */
56 unsigned long init_pg_tables_end __initdata = ~0UL;
57
58 int disable_pse __initdata = 0;
59
60 static inline char * __init machine_specific_memory_setup(void);
61
62 /*
63  * Machine setup..
64  */
65
66 #ifdef CONFIG_EFI
67 int efi_enabled = 0;
68 EXPORT_SYMBOL(efi_enabled);
69 #endif
70
71 /* cpu data as detected by the assembly code in head.S */
72 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
73 /* common cpu data for all cpus */
74 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
75
76 unsigned long mmu_cr4_features;
77 EXPORT_SYMBOL_GPL(mmu_cr4_features);
78
79 #ifdef  CONFIG_ACPI_INTERPRETER
80         int acpi_disabled = 0;
81 #else
82         int acpi_disabled = 1;
83 #endif
84 EXPORT_SYMBOL(acpi_disabled);
85
86 #ifdef  CONFIG_ACPI_BOOT
87 int __initdata acpi_force = 0;
88 extern acpi_interrupt_flags     acpi_sci_flags;
89 #endif
90
91 int MCA_bus;
92 /* for MCA, but anyone else can use it if they want */
93 unsigned int machine_id;
94 unsigned int machine_submodel_id;
95 unsigned int BIOS_revision;
96 unsigned int mca_pentium_flag;
97
98 /* For PCI or other memory-mapped resources */
99 unsigned long pci_mem_start = 0x10000000;
100
101 /* user-defined highmem size */
102 static unsigned int highmem_pages = -1;
103
104 /*
105  * Setup options
106  */
107 struct drive_info_struct { char dummy[32]; } drive_info;
108 struct screen_info screen_info;
109 struct apm_info apm_info;
110 struct sys_desc_table_struct {
111         unsigned short length;
112         unsigned char table[0];
113 };
114 struct edid_info edid_info;
115 struct ist_info ist_info;
116 struct e820map e820;
117
118 unsigned char aux_device_present;
119
120 extern void early_cpu_init(void);
121 extern void dmi_scan_machine(void);
122 extern void generic_apic_probe(char *);
123 extern int root_mountflags;
124
125 unsigned long saved_videomode;
126
127 #define RAMDISK_IMAGE_START_MASK        0x07FF
128 #define RAMDISK_PROMPT_FLAG             0x8000
129 #define RAMDISK_LOAD_FLAG               0x4000  
130
131 static char command_line[COMMAND_LINE_SIZE];
132        char saved_command_line[COMMAND_LINE_SIZE];
133
134 unsigned char __initdata boot_params[PARAM_SIZE];
135
136 static struct resource code_resource = { "Kernel code", 0x100000, 0 };
137 static struct resource data_resource = { "Kernel data", 0, 0 };
138
139 static void __init limit_regions(unsigned long long size)
140 {
141         unsigned long long current_addr = 0;
142         int i;
143
144         if (efi_enabled) {
145                 for (i = 0; i < memmap.nr_map; i++) {
146                         current_addr = memmap.map[i].phys_addr +
147                                        (memmap.map[i].num_pages << 12);
148                         if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
149                                 if (current_addr >= size) {
150                                         memmap.map[i].num_pages -=
151                                                 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
152                                         memmap.nr_map = i + 1;
153                                         return;
154                                 }
155                         }
156                 }
157         }
158         for (i = 0; i < e820.nr_map; i++) {
159                 if (e820.map[i].type == E820_RAM) {
160                         current_addr = e820.map[i].addr + e820.map[i].size;
161                         if (current_addr >= size) {
162                                 e820.map[i].size -= current_addr-size;
163                                 e820.nr_map = i + 1;
164                                 return;
165                         }
166                 }
167         }
168 }
169
170 static void __init add_memory_region(unsigned long long start,
171                                   unsigned long long size, int type)
172 {
173         int x;
174
175         if (!efi_enabled) {
176                 x = e820.nr_map;
177
178                 if (x == E820MAX) {
179                     printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
180                     return;
181                 }
182
183                 e820.map[x].addr = start;
184                 e820.map[x].size = size;
185                 e820.map[x].type = type;
186                 e820.nr_map++;
187         }
188 } /* add_memory_region */
189
190 #define E820_DEBUG      1
191
192 static void __init print_memory_map(char *who)
193 {
194         int i;
195
196         for (i = 0; i < e820.nr_map; i++) {
197                 printk(" %s: %016Lx - %016Lx ", who,
198                         e820.map[i].addr,
199                         e820.map[i].addr + e820.map[i].size);
200                 switch (e820.map[i].type) {
201                 case E820_RAM:  printk("(usable)\n");
202                                 break;
203                 case E820_RESERVED:
204                                 printk("(reserved)\n");
205                                 break;
206                 case E820_ACPI:
207                                 printk("(ACPI data)\n");
208                                 break;
209                 case E820_NVS:
210                                 printk("(ACPI NVS)\n");
211                                 break;
212                 default:        printk("type %lu\n", e820.map[i].type);
213                                 break;
214                 }
215         }
216 }
217
218 /*
219  * Sanitize the BIOS e820 map.
220  *
221  * Some e820 responses include overlapping entries.  The following 
222  * replaces the original e820 map with a new one, removing overlaps.
223  *
224  */
225 struct change_member {
226         struct e820entry *pbios; /* pointer to original bios entry */
227         unsigned long long addr; /* address for this change point */
228 };
229 struct change_member change_point_list[2*E820MAX] __initdata;
230 struct change_member *change_point[2*E820MAX] __initdata;
231 struct e820entry *overlap_list[E820MAX] __initdata;
232 struct e820entry new_bios[E820MAX] __initdata;
233
234 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
235 {
236         struct change_member *change_tmp;
237         unsigned long current_type, last_type;
238         unsigned long long last_addr;
239         int chgidx, still_changing;
240         int overlap_entries;
241         int new_bios_entry;
242         int old_nr, new_nr, chg_nr;
243         int i;
244
245         /*
246                 Visually we're performing the following (1,2,3,4 = memory types)...
247
248                 Sample memory map (w/overlaps):
249                    ____22__________________
250                    ______________________4_
251                    ____1111________________
252                    _44_____________________
253                    11111111________________
254                    ____________________33__
255                    ___________44___________
256                    __________33333_________
257                    ______________22________
258                    ___________________2222_
259                    _________111111111______
260                    _____________________11_
261                    _________________4______
262
263                 Sanitized equivalent (no overlap):
264                    1_______________________
265                    _44_____________________
266                    ___1____________________
267                    ____22__________________
268                    ______11________________
269                    _________1______________
270                    __________3_____________
271                    ___________44___________
272                    _____________33_________
273                    _______________2________
274                    ________________1_______
275                    _________________4______
276                    ___________________2____
277                    ____________________33__
278                    ______________________4_
279         */
280
281         /* if there's only one memory region, don't bother */
282         if (*pnr_map < 2)
283                 return -1;
284
285         old_nr = *pnr_map;
286
287         /* bail out if we find any unreasonable addresses in bios map */
288         for (i=0; i<old_nr; i++)
289                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
290                         return -1;
291
292         /* create pointers for initial change-point information (for sorting) */
293         for (i=0; i < 2*old_nr; i++)
294                 change_point[i] = &change_point_list[i];
295
296         /* record all known change-points (starting and ending addresses),
297            omitting those that are for empty memory regions */
298         chgidx = 0;
299         for (i=0; i < old_nr; i++)      {
300                 if (biosmap[i].size != 0) {
301                         change_point[chgidx]->addr = biosmap[i].addr;
302                         change_point[chgidx++]->pbios = &biosmap[i];
303                         change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
304                         change_point[chgidx++]->pbios = &biosmap[i];
305                 }
306         }
307         chg_nr = chgidx;        /* true number of change-points */
308
309         /* sort change-point list by memory addresses (low -> high) */
310         still_changing = 1;
311         while (still_changing)  {
312                 still_changing = 0;
313                 for (i=1; i < chg_nr; i++)  {
314                         /* if <current_addr> > <last_addr>, swap */
315                         /* or, if current=<start_addr> & last=<end_addr>, swap */
316                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
317                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
318                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
319                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
320                            )
321                         {
322                                 change_tmp = change_point[i];
323                                 change_point[i] = change_point[i-1];
324                                 change_point[i-1] = change_tmp;
325                                 still_changing=1;
326                         }
327                 }
328         }
329
330         /* create a new bios memory map, removing overlaps */
331         overlap_entries=0;       /* number of entries in the overlap table */
332         new_bios_entry=0;        /* index for creating new bios map entries */
333         last_type = 0;           /* start with undefined memory type */
334         last_addr = 0;           /* start with 0 as last starting address */
335         /* loop through change-points, determining affect on the new bios map */
336         for (chgidx=0; chgidx < chg_nr; chgidx++)
337         {
338                 /* keep track of all overlapping bios entries */
339                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
340                 {
341                         /* add map entry to overlap list (> 1 entry implies an overlap) */
342                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
343                 }
344                 else
345                 {
346                         /* remove entry from list (order independent, so swap with last) */
347                         for (i=0; i<overlap_entries; i++)
348                         {
349                                 if (overlap_list[i] == change_point[chgidx]->pbios)
350                                         overlap_list[i] = overlap_list[overlap_entries-1];
351                         }
352                         overlap_entries--;
353                 }
354                 /* if there are overlapping entries, decide which "type" to use */
355                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
356                 current_type = 0;
357                 for (i=0; i<overlap_entries; i++)
358                         if (overlap_list[i]->type > current_type)
359                                 current_type = overlap_list[i]->type;
360                 /* continue building up new bios map based on this information */
361                 if (current_type != last_type)  {
362                         if (last_type != 0)      {
363                                 new_bios[new_bios_entry].size =
364                                         change_point[chgidx]->addr - last_addr;
365                                 /* move forward only if the new size was non-zero */
366                                 if (new_bios[new_bios_entry].size != 0)
367                                         if (++new_bios_entry >= E820MAX)
368                                                 break;  /* no more space left for new bios entries */
369                         }
370                         if (current_type != 0)  {
371                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
372                                 new_bios[new_bios_entry].type = current_type;
373                                 last_addr=change_point[chgidx]->addr;
374                         }
375                         last_type = current_type;
376                 }
377         }
378         new_nr = new_bios_entry;   /* retain count for new bios entries */
379
380         /* copy new bios mapping into original location */
381         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
382         *pnr_map = new_nr;
383
384         return 0;
385 }
386
387 /*
388  * Copy the BIOS e820 map into a safe place.
389  *
390  * Sanity-check it while we're at it..
391  *
392  * If we're lucky and live on a modern system, the setup code
393  * will have given us a memory map that we can use to properly
394  * set up memory.  If we aren't, we'll fake a memory map.
395  *
396  * We check to see that the memory map contains at least 2 elements
397  * before we'll use it, because the detection code in setup.S may
398  * not be perfect and most every PC known to man has two memory
399  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
400  * thinkpad 560x, for example, does not cooperate with the memory
401  * detection code.)
402  */
403 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
404 {
405         /* Only one memory region (or negative)? Ignore it */
406         if (nr_map < 2)
407                 return -1;
408
409         do {
410                 unsigned long long start = biosmap->addr;
411                 unsigned long long size = biosmap->size;
412                 unsigned long long end = start + size;
413                 unsigned long type = biosmap->type;
414
415                 /* Overflow in 64 bits? Ignore the memory map. */
416                 if (start > end)
417                         return -1;
418
419                 /*
420                  * Some BIOSes claim RAM in the 640k - 1M region.
421                  * Not right. Fix it up.
422                  */
423                 if (type == E820_RAM) {
424                         if (start < 0x100000ULL && end > 0xA0000ULL) {
425                                 if (start < 0xA0000ULL)
426                                         add_memory_region(start, 0xA0000ULL-start, type);
427                                 if (end <= 0x100000ULL)
428                                         continue;
429                                 start = 0x100000ULL;
430                                 size = end - start;
431                         }
432                 }
433                 add_memory_region(start, size, type);
434         } while (biosmap++,--nr_map);
435         return 0;
436 }
437
438 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
439 unsigned char eddnr;
440 struct edd_info edd[EDDMAXNR];
441 unsigned int edd_disk80_sig;
442 #ifdef CONFIG_EDD_MODULE
443 EXPORT_SYMBOL(eddnr);
444 EXPORT_SYMBOL(edd);
445 EXPORT_SYMBOL(edd_disk80_sig);
446 #endif
447 /**
448  * copy_edd() - Copy the BIOS EDD information
449  *              from boot_params into a safe place.
450  *
451  */
452 static inline void copy_edd(void)
453 {
454      eddnr = EDD_NR;
455      memcpy(edd, EDD_BUF, sizeof(edd));
456      edd_disk80_sig = DISK80_SIGNATURE;
457 }
458 #else
459 #define copy_edd() do {} while (0)
460 #endif
461
462 /*
463  * Do NOT EVER look at the BIOS memory size location.
464  * It does not work on many machines.
465  */
466 #define LOWMEMSIZE()    (0x9f000)
467
468 static void __init setup_memory_region(void)
469 {
470         char *who = machine_specific_memory_setup();
471         printk(KERN_INFO "BIOS-provided physical RAM map:\n");
472         print_memory_map(who);
473 } /* setup_memory_region */
474
475
476 static void __init parse_cmdline_early (char ** cmdline_p)
477 {
478         char c = ' ', *to = command_line, *from = saved_command_line;
479         int len = 0;
480         int userdef = 0;
481
482         /* Save unparsed command line copy for /proc/cmdline */
483         saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
484
485         for (;;) {
486                 /*
487                  * "mem=nopentium" disables the 4MB page tables.
488                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
489                  * to <mem>, overriding the bios size.
490                  * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
491                  * <start> to <start>+<mem>, overriding the bios size.
492                  *
493                  * HPA tells me bootloaders need to parse mem=, so no new
494                  * option should be mem=  [also see Documentation/i386/boot.txt]
495                  */
496                 if (c == ' ' && !memcmp(from, "mem=", 4)) {
497                         if (to != command_line)
498                                 to--;
499                         if (!memcmp(from+4, "nopentium", 9)) {
500                                 from += 9+4;
501                                 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
502                                 disable_pse = 1;
503                         } else {
504                                 /* If the user specifies memory size, we
505                                  * limit the BIOS-provided memory map to
506                                  * that size. exactmap can be used to specify
507                                  * the exact map. mem=number can be used to
508                                  * trim the existing memory map.
509                                  */
510                                 unsigned long long mem_size;
511  
512                                 mem_size = memparse(from+4, &from);
513                                 limit_regions(mem_size);
514                                 userdef=1;
515                         }
516                 }
517
518                 if (c == ' ' && !memcmp(from, "memmap=", 7)) {
519                         if (to != command_line)
520                                 to--;
521                         if (!memcmp(from+7, "exactmap", 8)) {
522                                 from += 8+7;
523                                 e820.nr_map = 0;
524                                 userdef = 1;
525                         } else {
526                                 /* If the user specifies memory size, we
527                                  * limit the BIOS-provided memory map to
528                                  * that size. exactmap can be used to specify
529                                  * the exact map. mem=number can be used to
530                                  * trim the existing memory map.
531                                  */
532                                 unsigned long long start_at, mem_size;
533  
534                                 mem_size = memparse(from+7, &from);
535                                 if (*from == '@') {
536                                         start_at = memparse(from+1, &from);
537                                         add_memory_region(start_at, mem_size, E820_RAM);
538                                 } else if (*from == '#') {
539                                         start_at = memparse(from+1, &from);
540                                         add_memory_region(start_at, mem_size, E820_ACPI);
541                                 } else if (*from == '$') {
542                                         start_at = memparse(from+1, &from);
543                                         add_memory_region(start_at, mem_size, E820_RESERVED);
544                                 } else {
545                                         limit_regions(mem_size);
546                                         userdef=1;
547                                 }
548                         }
549                 }
550
551 #ifdef  CONFIG_X86_SMP
552                 /*
553                  * If the BIOS enumerates physical processors before logical,
554                  * maxcpus=N at enumeration-time can be used to disable HT.
555                  */
556                 else if (!memcmp(from, "maxcpus=", 8)) {
557                         extern unsigned int maxcpus;
558
559                         maxcpus = simple_strtoul(from + 8, NULL, 0);
560                 }
561 #endif
562
563 #ifdef CONFIG_ACPI_BOOT
564                 /* "acpi=off" disables both ACPI table parsing and interpreter */
565                 else if (!memcmp(from, "acpi=off", 8)) {
566                         disable_acpi();
567                 }
568
569                 /* acpi=force to over-ride black-list */
570                 else if (!memcmp(from, "acpi=force", 10)) {
571                         acpi_force = 1;
572                         acpi_ht = 1;
573                         acpi_disabled = 0;
574                 }
575
576                 /* acpi=strict disables out-of-spec workarounds */
577                 else if (!memcmp(from, "acpi=strict", 11)) {
578                         acpi_strict = 1;
579                 }
580
581                 /* Limit ACPI just to boot-time to enable HT */
582                 else if (!memcmp(from, "acpi=ht", 7)) {
583                         if (!acpi_force)
584                                 disable_acpi();
585                         acpi_ht = 1;
586                 }
587                 
588                 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
589                 else if (!memcmp(from, "pci=noacpi", 10)) {
590                         acpi_disable_pci();
591                 }
592                 /* "acpi=noirq" disables ACPI interrupt routing */
593                 else if (!memcmp(from, "acpi=noirq", 10)) {
594                         acpi_noirq_set();
595                 }
596
597                 else if (!memcmp(from, "acpi_sci=edge", 13))
598                         acpi_sci_flags.trigger =  1;
599
600                 else if (!memcmp(from, "acpi_sci=level", 14))
601                         acpi_sci_flags.trigger = 3;
602
603                 else if (!memcmp(from, "acpi_sci=high", 13))
604                         acpi_sci_flags.polarity = 1;
605
606                 else if (!memcmp(from, "acpi_sci=low", 12))
607                         acpi_sci_flags.polarity = 3;
608
609 #ifdef CONFIG_X86_IO_APIC
610                 else if (!memcmp(from, "acpi_skip_timer_override", 24))
611                         acpi_skip_timer_override = 1;
612 #endif
613
614 #ifdef CONFIG_X86_LOCAL_APIC
615                 /* disable IO-APIC */
616                 else if (!memcmp(from, "noapic", 6))
617                         disable_ioapic_setup();
618 #endif /* CONFIG_X86_LOCAL_APIC */
619 #endif /* CONFIG_ACPI_BOOT */
620
621                 /*
622                  * highmem=size forces highmem to be exactly 'size' bytes.
623                  * This works even on boxes that have no highmem otherwise.
624                  * This also works to reduce highmem size on bigger boxes.
625                  */
626                 if (c == ' ' && !memcmp(from, "highmem=", 8))
627                         highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
628         
629                 c = *(from++);
630                 if (!c)
631                         break;
632                 if (COMMAND_LINE_SIZE <= ++len)
633                         break;
634                 *(to++) = c;
635         }
636         *to = '\0';
637         *cmdline_p = command_line;
638         if (userdef) {
639                 printk(KERN_INFO "user-defined physical RAM map:\n");
640                 print_memory_map("user");
641         }
642 }
643
644 /*
645  * Callback for efi_memory_walk.
646  */
647 static int __init
648 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
649 {
650         unsigned long *max_pfn = arg, pfn;
651
652         if (start < end) {
653                 pfn = PFN_UP(end -1);
654                 if (pfn > *max_pfn)
655                         *max_pfn = pfn;
656         }
657         return 0;
658 }
659
660
661 /*
662  * Find the highest page frame number we have available
663  */
664 void __init find_max_pfn(void)
665 {
666         int i;
667
668         max_pfn = 0;
669         if (efi_enabled) {
670                 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
671                 return;
672         }
673
674         for (i = 0; i < e820.nr_map; i++) {
675                 unsigned long start, end;
676                 /* RAM? */
677                 if (e820.map[i].type != E820_RAM)
678                         continue;
679                 start = PFN_UP(e820.map[i].addr);
680                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
681                 if (start >= end)
682                         continue;
683                 if (end > max_pfn)
684                         max_pfn = end;
685         }
686 }
687
688 /*
689  * Determine low and high memory ranges:
690  */
691 unsigned long __init find_max_low_pfn(void)
692 {
693         unsigned long max_low_pfn;
694
695         max_low_pfn = max_pfn;
696         if (max_low_pfn > MAXMEM_PFN) {
697                 if (highmem_pages == -1)
698                         highmem_pages = max_pfn - MAXMEM_PFN;
699                 if (highmem_pages + MAXMEM_PFN < max_pfn)
700                         max_pfn = MAXMEM_PFN + highmem_pages;
701                 if (highmem_pages + MAXMEM_PFN > max_pfn) {
702                         printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
703                         highmem_pages = 0;
704                 }
705                 max_low_pfn = MAXMEM_PFN;
706 #ifndef CONFIG_HIGHMEM
707                 /* Maximum memory usable is what is directly addressable */
708                 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
709                                         MAXMEM>>20);
710                 if (max_pfn > MAX_NONPAE_PFN)
711                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
712                 else
713                         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
714                 max_pfn = MAXMEM_PFN;
715 #else /* !CONFIG_HIGHMEM */
716 #ifndef CONFIG_X86_PAE
717                 if (max_pfn > MAX_NONPAE_PFN) {
718                         max_pfn = MAX_NONPAE_PFN;
719                         printk(KERN_WARNING "Warning only 4GB will be used.\n");
720                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
721                 }
722 #endif /* !CONFIG_X86_PAE */
723 #endif /* !CONFIG_HIGHMEM */
724         } else {
725                 if (highmem_pages == -1)
726                         highmem_pages = 0;
727 #ifdef CONFIG_HIGHMEM
728                 if (highmem_pages >= max_pfn) {
729                         printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
730                         highmem_pages = 0;
731                 }
732                 if (highmem_pages) {
733                         if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
734                                 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
735                                 highmem_pages = 0;
736                         }
737                         max_low_pfn -= highmem_pages;
738                 }
739 #else
740                 if (highmem_pages)
741                         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
742 #endif
743         }
744         return max_low_pfn;
745 }
746
747 #ifndef CONFIG_DISCONTIGMEM
748
749 /*
750  * Free all available memory for boot time allocation.  Used
751  * as a callback function by efi_memory_walk()
752  */
753
754 static int __init
755 free_available_memory(unsigned long start, unsigned long end, void *arg)
756 {
757         /* check max_low_pfn */
758         if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
759                 return 0;
760         if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
761                 end = (max_low_pfn + 1) << PAGE_SHIFT;
762         if (start < end)
763                 free_bootmem(start, end - start);
764
765         return 0;
766 }
767 /*
768  * Register fully available low RAM pages with the bootmem allocator.
769  */
770 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
771 {
772         int i;
773
774         if (efi_enabled) {
775                 efi_memmap_walk(free_available_memory, NULL);
776                 return;
777         }
778         for (i = 0; i < e820.nr_map; i++) {
779                 unsigned long curr_pfn, last_pfn, size;
780                 /*
781                  * Reserve usable low memory
782                  */
783                 if (e820.map[i].type != E820_RAM)
784                         continue;
785                 /*
786                  * We are rounding up the start address of usable memory:
787                  */
788                 curr_pfn = PFN_UP(e820.map[i].addr);
789                 if (curr_pfn >= max_low_pfn)
790                         continue;
791                 /*
792                  * ... and at the end of the usable range downwards:
793                  */
794                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
795
796                 if (last_pfn > max_low_pfn)
797                         last_pfn = max_low_pfn;
798
799                 /*
800                  * .. finally, did all the rounding and playing
801                  * around just make the area go away?
802                  */
803                 if (last_pfn <= curr_pfn)
804                         continue;
805
806                 size = last_pfn - curr_pfn;
807                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
808         }
809 }
810
811 static unsigned long __init setup_memory(void)
812 {
813         unsigned long bootmap_size, start_pfn, max_low_pfn;
814
815         /*
816          * partially used pages are not usable - thus
817          * we are rounding upwards:
818          */
819         start_pfn = PFN_UP(init_pg_tables_end);
820
821         find_max_pfn();
822
823         max_low_pfn = find_max_low_pfn();
824
825 #ifdef CONFIG_HIGHMEM
826         highstart_pfn = highend_pfn = max_pfn;
827         if (max_pfn > max_low_pfn) {
828                 highstart_pfn = max_low_pfn;
829         }
830         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
831                 pages_to_mb(highend_pfn - highstart_pfn));
832 #endif
833         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
834                         pages_to_mb(max_low_pfn));
835         /*
836          * Initialize the boot-time allocator (with low memory only):
837          */
838         bootmap_size = init_bootmem(start_pfn, max_low_pfn);
839
840         register_bootmem_low_pages(max_low_pfn);
841
842         /*
843          * Reserve the bootmem bitmap itself as well. We do this in two
844          * steps (first step was init_bootmem()) because this catches
845          * the (very unlikely) case of us accidentally initializing the
846          * bootmem allocator with an invalid RAM area.
847          */
848         reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
849                          bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
850
851         /*
852          * reserve physical page 0 - it's a special BIOS page on many boxes,
853          * enabling clean reboots, SMP operation, laptop functions.
854          */
855         reserve_bootmem(0, PAGE_SIZE);
856
857     /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
858        PCI prefetch into it (errata #56). Usually the page is reserved anyways,
859        unless you have no PS/2 mouse plugged in. */
860         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
861             boot_cpu_data.x86 == 6)
862              reserve_bootmem(0xa0000 - 4096, 4096);
863
864 #ifdef CONFIG_SMP
865         /*
866          * But first pinch a few for the stack/trampoline stuff
867          * FIXME: Don't need the extra page at 4K, but need to fix
868          * trampoline before removing it. (see the GDT stuff)
869          */
870         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
871 #endif
872 #ifdef CONFIG_ACPI_SLEEP
873         /*
874          * Reserve low memory region for sleep support.
875          */
876         acpi_reserve_bootmem();
877 #endif
878 #ifdef CONFIG_X86_FIND_SMP_CONFIG
879         /*
880          * Find and reserve possible boot-time SMP configuration:
881          */
882         find_smp_config();
883 #endif
884
885 #ifdef CONFIG_BLK_DEV_INITRD
886         if (LOADER_TYPE && INITRD_START) {
887                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
888                         reserve_bootmem(INITRD_START, INITRD_SIZE);
889                         initrd_start =
890                                 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
891                         initrd_end = initrd_start+INITRD_SIZE;
892                 }
893                 else {
894                         printk(KERN_ERR "initrd extends beyond end of memory "
895                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
896                             INITRD_START + INITRD_SIZE,
897                             max_low_pfn << PAGE_SHIFT);
898                         initrd_start = 0;
899                 }
900         }
901 #endif
902         return max_low_pfn;
903 }
904 #else
905 extern unsigned long setup_memory(void);
906 #endif /* !CONFIG_DISCONTIGMEM */
907
908 /*
909  * Request address space for all standard RAM and ROM resources
910  * and also for regions reported as reserved by the e820.
911  */
912 static void __init
913 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
914 {
915         int i;
916
917         probe_roms();
918         for (i = 0; i < e820.nr_map; i++) {
919                 struct resource *res;
920                 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
921                         continue;
922                 res = alloc_bootmem_low(sizeof(struct resource));
923                 switch (e820.map[i].type) {
924                 case E820_RAM:  res->name = "System RAM"; break;
925                 case E820_ACPI: res->name = "ACPI Tables"; break;
926                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
927                 default:        res->name = "reserved";
928                 }
929                 res->start = e820.map[i].addr;
930                 res->end = res->start + e820.map[i].size - 1;
931                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
932                 request_resource(&iomem_resource, res);
933                 if (e820.map[i].type == E820_RAM) {
934                         /*
935                          *  We don't know which RAM region contains kernel data,
936                          *  so we try it repeatedly and let the resource manager
937                          *  test it.
938                          */
939                         request_resource(res, code_resource);
940                         request_resource(res, data_resource);
941                 }
942         }
943 }
944
945 /*
946  * Request address space for all standard resources
947  */
948 static void __init register_memory(unsigned long max_low_pfn)
949 {
950         unsigned long low_mem_size;
951
952         if (efi_enabled)
953                 efi_initialize_iomem_resources(&code_resource, &data_resource);
954         else
955                 legacy_init_iomem_resources(&code_resource, &data_resource);
956
957         /* EFI systems may still have VGA */
958         request_graphics_resource();
959
960         /* request I/O space for devices used on all i[345]86 PCs */
961         request_standard_io_resources();
962
963         /* Tell the PCI layer not to allocate too close to the RAM area.. */
964         low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
965         if (low_mem_size > pci_mem_start)
966                 pci_mem_start = low_mem_size;
967 }
968
969 /* Use inline assembly to define this because the nops are defined 
970    as inline assembly strings in the include files and we cannot 
971    get them easily into strings. */
972 asm("\t.data\nintelnops: " 
973     GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
974     GENERIC_NOP7 GENERIC_NOP8); 
975 asm("\t.data\nk8nops: " 
976     K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
977     K8_NOP7 K8_NOP8); 
978 asm("\t.data\nk7nops: " 
979     K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
980     K7_NOP7 K7_NOP8); 
981     
982 extern unsigned char intelnops[], k8nops[], k7nops[];
983 static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
984      NULL,
985      intelnops,
986      intelnops + 1,
987      intelnops + 1 + 2,
988      intelnops + 1 + 2 + 3,
989      intelnops + 1 + 2 + 3 + 4,
990      intelnops + 1 + 2 + 3 + 4 + 5,
991      intelnops + 1 + 2 + 3 + 4 + 5 + 6,
992      intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
993 }; 
994 static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
995      NULL,
996      k8nops,
997      k8nops + 1,
998      k8nops + 1 + 2,
999      k8nops + 1 + 2 + 3,
1000      k8nops + 1 + 2 + 3 + 4,
1001      k8nops + 1 + 2 + 3 + 4 + 5,
1002      k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1003      k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1004 }; 
1005 static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
1006      NULL,
1007      k7nops,
1008      k7nops + 1,
1009      k7nops + 1 + 2,
1010      k7nops + 1 + 2 + 3,
1011      k7nops + 1 + 2 + 3 + 4,
1012      k7nops + 1 + 2 + 3 + 4 + 5,
1013      k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1014      k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1015 }; 
1016 static struct nop { 
1017      int cpuid; 
1018      unsigned char **noptable; 
1019 } noptypes[] = { 
1020      { X86_FEATURE_K8, k8_nops }, 
1021      { X86_FEATURE_K7, k7_nops }, 
1022      { -1, 0 }
1023 }; 
1024
1025 /* Replace instructions with better alternatives for this CPU type.
1026
1027    This runs before SMP is initialized to avoid SMP problems with
1028    self modifying code. This implies that assymetric systems where
1029    APs have less capabilities than the boot processor are not handled. 
1030    In this case boot with "noreplacement". */ 
1031 void apply_alternatives(void *start, void *end) 
1032
1033         struct alt_instr *a; 
1034         int diff, i, k;
1035         unsigned char **noptable = intel_nops; 
1036         for (i = 0; noptypes[i].cpuid >= 0; i++) { 
1037                 if (boot_cpu_has(noptypes[i].cpuid)) { 
1038                         noptable = noptypes[i].noptable;
1039                         break;
1040                 }
1041         } 
1042         for (a = start; (void *)a < end; a++) { 
1043                 if (!boot_cpu_has(a->cpuid))
1044                         continue;
1045                 BUG_ON(a->replacementlen > a->instrlen); 
1046                 memcpy(a->instr, a->replacement, a->replacementlen); 
1047                 diff = a->instrlen - a->replacementlen; 
1048                 /* Pad the rest with nops */
1049                 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1050                         k = diff;
1051                         if (k > ASM_NOP_MAX)
1052                                 k = ASM_NOP_MAX;
1053                         memcpy(a->instr + i, noptable[k], k); 
1054                 } 
1055         }
1056
1057
1058 static int no_replacement __initdata = 0; 
1059  
1060 void __init alternative_instructions(void)
1061 {
1062         extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1063         if (no_replacement) 
1064                 return;
1065         apply_alternatives(__alt_instructions, __alt_instructions_end);
1066 }
1067
1068 static int __init noreplacement_setup(char *s)
1069
1070      no_replacement = 1; 
1071      return 0; 
1072
1073
1074 __setup("noreplacement", noreplacement_setup); 
1075
1076 /*
1077  * Determine if we were loaded by an EFI loader.  If so, then we have also been
1078  * passed the efi memmap, systab, etc., so we should use these data structures
1079  * for initialization.  Note, the efi init code path is determined by the
1080  * global efi_enabled. This allows the same kernel image to be used on existing
1081  * systems (with a traditional BIOS) as well as on EFI systems.
1082  */
1083 void __init setup_arch(char **cmdline_p)
1084 {
1085         unsigned long max_low_pfn;
1086
1087         memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1088         pre_setup_arch_hook();
1089         early_cpu_init();
1090
1091         /*
1092          * FIXME: This isn't an official loader_type right
1093          * now but does currently work with elilo.
1094          * If we were configured as an EFI kernel, check to make
1095          * sure that we were loaded correctly from elilo and that
1096          * the system table is valid.  If not, then initialize normally.
1097          */
1098 #ifdef CONFIG_EFI
1099         if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1100                 efi_enabled = 1;
1101 #endif
1102
1103         ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1104         drive_info = DRIVE_INFO;
1105         screen_info = SCREEN_INFO;
1106         edid_info = EDID_INFO;
1107         apm_info.bios = APM_BIOS_INFO;
1108         ist_info = IST_INFO;
1109         saved_videomode = VIDEO_MODE;
1110         if( SYS_DESC_TABLE.length != 0 ) {
1111                 MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
1112                 machine_id = SYS_DESC_TABLE.table[0];
1113                 machine_submodel_id = SYS_DESC_TABLE.table[1];
1114                 BIOS_revision = SYS_DESC_TABLE.table[2];
1115         }
1116         aux_device_present = AUX_DEVICE_INFO;
1117
1118 #ifdef CONFIG_BLK_DEV_RAM
1119         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1120         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1121         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1122 #endif
1123         ARCH_SETUP
1124         if (efi_enabled)
1125                 efi_init();
1126         else
1127                 setup_memory_region();
1128
1129         copy_edd();
1130
1131         if (!MOUNT_ROOT_RDONLY)
1132                 root_mountflags &= ~MS_RDONLY;
1133         init_mm.start_code = (unsigned long) _text;
1134         init_mm.end_code = (unsigned long) _etext;
1135         init_mm.end_data = (unsigned long) _edata;
1136         init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1137
1138         code_resource.start = virt_to_phys(_text);
1139         code_resource.end = virt_to_phys(_etext)-1;
1140         data_resource.start = virt_to_phys(_etext);
1141         data_resource.end = virt_to_phys(_edata)-1;
1142
1143         parse_cmdline_early(cmdline_p);
1144
1145         max_low_pfn = setup_memory();
1146
1147         /*
1148          * NOTE: before this point _nobody_ is allowed to allocate
1149          * any memory using the bootmem allocator.
1150          */
1151
1152 #ifdef CONFIG_SMP
1153         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1154 #endif
1155         paging_init();
1156
1157 #ifdef CONFIG_EARLY_PRINTK
1158         {
1159                 char *s = strstr(*cmdline_p, "earlyprintk=");
1160                 if (s) {
1161                         extern void setup_early_printk(char *);
1162
1163                         setup_early_printk(s);
1164                         printk("early console enabled\n");
1165                 }
1166         }
1167 #endif
1168
1169
1170         dmi_scan_machine();
1171
1172 #ifdef CONFIG_X86_GENERICARCH
1173         generic_apic_probe(*cmdline_p);
1174 #endif  
1175         if (efi_enabled)
1176                 efi_map_memmap();
1177
1178         /*
1179          * Parse the ACPI tables for possible boot-time SMP configuration.
1180          */
1181         acpi_boot_init();
1182
1183 #ifdef CONFIG_X86_LOCAL_APIC
1184         if (smp_found_config)
1185                 get_smp_config();
1186 #endif
1187
1188         register_memory(max_low_pfn);
1189
1190 #ifdef CONFIG_VT
1191 #if defined(CONFIG_VGA_CONSOLE)
1192         if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1193                 conswitchp = &vga_con;
1194 #elif defined(CONFIG_DUMMY_CONSOLE)
1195         conswitchp = &dummy_con;
1196 #endif
1197 #endif
1198 }
1199
1200 #include "setup_arch_post.h"
1201 /*
1202  * Local Variables:
1203  * mode:c
1204  * c-file-style:"k&r"
1205  * c-basic-offset:8
1206  * End:
1207  */