ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / arch / i386 / kernel / setup.c
1 /*
2  *  linux/arch/i386/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *
8  *  Memory region support
9  *      David Parsons <orc@pell.chi.il.us>, July-August 1999
10  *
11  *  Added E820 sanitization routine (removes overlapping memory regions);
12  *  Brian Moyle <bmoyle@mvista.com>, February 2001
13  *
14  * Moved CPU detection code to cpu/${cpu}.c
15  *    Patrick Mochel <mochel@osdl.org>, March 2002
16  *
17  *  Provisions for empty E820 memory regions (reported by certain BIOSes).
18  *  Alex Achenbach <xela@slit.de>, December 2002.
19  *
20  */
21
22 /*
23  * This file handles the architecture-dependent parts of initialization
24  */
25
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/root_dev.h>
37 #include <linux/highmem.h>
38 #include <linux/module.h>
39 #include <linux/efi.h>
40 #include <linux/init.h>
41 #include <linux/edd.h>
42 #include <video/edid.h>
43 #include <asm/e820.h>
44 #include <asm/mpspec.h>
45 #include <asm/setup.h>
46 #include <asm/arch_hooks.h>
47 #include <asm/sections.h>
48 #include <asm/io_apic.h>
49 #include <asm/ist.h>
50 #include <asm/std_resources.h>
51 #include "setup_arch_pre.h"
52
53 /* This value is set up by the early boot code to point to the value
54    immediately after the boot time page tables.  It contains a *physical*
55    address, and must not be in the .bss segment! */
56 unsigned long init_pg_tables_end __initdata = ~0UL;
57
58 int disable_pse __initdata = 0;
59
60 static inline char * __init machine_specific_memory_setup(void);
61
62 /*
63  * Machine setup..
64  */
65
66 #ifdef CONFIG_EFI
67 int efi_enabled = 0;
68 #endif
69
70 /* cpu data as detected by the assembly code in head.S */
71 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
72 /* common cpu data for all cpus */
73 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
74
75 unsigned long mmu_cr4_features;
76 EXPORT_SYMBOL_GPL(mmu_cr4_features);
77
78 #ifdef  CONFIG_ACPI_INTERPRETER
79         int acpi_disabled = 0;
80 #else
81         int acpi_disabled = 1;
82 #endif
83 EXPORT_SYMBOL(acpi_disabled);
84
85 #ifdef  CONFIG_ACPI_BOOT
86 int __initdata acpi_force = 0;
87 extern acpi_interrupt_flags     acpi_sci_flags;
88 #endif
89
90 int MCA_bus;
91 /* for MCA, but anyone else can use it if they want */
92 unsigned int machine_id;
93 unsigned int machine_submodel_id;
94 unsigned int BIOS_revision;
95 unsigned int mca_pentium_flag;
96
97 /* For PCI or other memory-mapped resources */
98 unsigned long pci_mem_start = 0x10000000;
99
100 /* user-defined highmem size */
101 static unsigned int highmem_pages = -1;
102
103 /*
104  * Setup options
105  */
106 struct drive_info_struct { char dummy[32]; } drive_info;
107 struct screen_info screen_info;
108 struct apm_info apm_info;
109 struct sys_desc_table_struct {
110         unsigned short length;
111         unsigned char table[0];
112 };
113 struct edid_info edid_info;
114 struct ist_info ist_info;
115 struct e820map e820;
116
117 unsigned char aux_device_present;
118
119 extern void early_cpu_init(void);
120 extern void dmi_scan_machine(void);
121 extern void generic_apic_probe(char *);
122 extern int root_mountflags;
123
124 unsigned long saved_videomode;
125
126 #define RAMDISK_IMAGE_START_MASK        0x07FF
127 #define RAMDISK_PROMPT_FLAG             0x8000
128 #define RAMDISK_LOAD_FLAG               0x4000  
129
130 static char command_line[COMMAND_LINE_SIZE];
131        char saved_command_line[COMMAND_LINE_SIZE];
132
133 unsigned char __initdata boot_params[PARAM_SIZE];
134
135 static struct resource code_resource = { "Kernel code", 0x100000, 0 };
136 static struct resource data_resource = { "Kernel data", 0, 0 };
137
138 static void __init limit_regions(unsigned long long size)
139 {
140         unsigned long long current_addr = 0;
141         int i;
142
143         if (efi_enabled) {
144                 for (i = 0; i < memmap.nr_map; i++) {
145                         current_addr = memmap.map[i].phys_addr +
146                                        (memmap.map[i].num_pages << 12);
147                         if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
148                                 if (current_addr >= size) {
149                                         memmap.map[i].num_pages -=
150                                                 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
151                                         memmap.nr_map = i + 1;
152                                         return;
153                                 }
154                         }
155                 }
156         }
157         for (i = 0; i < e820.nr_map; i++) {
158                 if (e820.map[i].type == E820_RAM) {
159                         current_addr = e820.map[i].addr + e820.map[i].size;
160                         if (current_addr >= size) {
161                                 e820.map[i].size -= current_addr-size;
162                                 e820.nr_map = i + 1;
163                                 return;
164                         }
165                 }
166         }
167 }
168
169 static void __init add_memory_region(unsigned long long start,
170                                   unsigned long long size, int type)
171 {
172         int x;
173
174         if (!efi_enabled) {
175                 x = e820.nr_map;
176
177                 if (x == E820MAX) {
178                     printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
179                     return;
180                 }
181
182                 e820.map[x].addr = start;
183                 e820.map[x].size = size;
184                 e820.map[x].type = type;
185                 e820.nr_map++;
186         }
187 } /* add_memory_region */
188
189 #define E820_DEBUG      1
190
191 static void __init print_memory_map(char *who)
192 {
193         int i;
194
195         for (i = 0; i < e820.nr_map; i++) {
196                 printk(" %s: %016Lx - %016Lx ", who,
197                         e820.map[i].addr,
198                         e820.map[i].addr + e820.map[i].size);
199                 switch (e820.map[i].type) {
200                 case E820_RAM:  printk("(usable)\n");
201                                 break;
202                 case E820_RESERVED:
203                                 printk("(reserved)\n");
204                                 break;
205                 case E820_ACPI:
206                                 printk("(ACPI data)\n");
207                                 break;
208                 case E820_NVS:
209                                 printk("(ACPI NVS)\n");
210                                 break;
211                 default:        printk("type %lu\n", e820.map[i].type);
212                                 break;
213                 }
214         }
215 }
216
217 /*
218  * Sanitize the BIOS e820 map.
219  *
220  * Some e820 responses include overlapping entries.  The following 
221  * replaces the original e820 map with a new one, removing overlaps.
222  *
223  */
224 struct change_member {
225         struct e820entry *pbios; /* pointer to original bios entry */
226         unsigned long long addr; /* address for this change point */
227 };
228 struct change_member change_point_list[2*E820MAX] __initdata;
229 struct change_member *change_point[2*E820MAX] __initdata;
230 struct e820entry *overlap_list[E820MAX] __initdata;
231 struct e820entry new_bios[E820MAX] __initdata;
232
233 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
234 {
235         struct change_member *change_tmp;
236         unsigned long current_type, last_type;
237         unsigned long long last_addr;
238         int chgidx, still_changing;
239         int overlap_entries;
240         int new_bios_entry;
241         int old_nr, new_nr, chg_nr;
242         int i;
243
244         /*
245                 Visually we're performing the following (1,2,3,4 = memory types)...
246
247                 Sample memory map (w/overlaps):
248                    ____22__________________
249                    ______________________4_
250                    ____1111________________
251                    _44_____________________
252                    11111111________________
253                    ____________________33__
254                    ___________44___________
255                    __________33333_________
256                    ______________22________
257                    ___________________2222_
258                    _________111111111______
259                    _____________________11_
260                    _________________4______
261
262                 Sanitized equivalent (no overlap):
263                    1_______________________
264                    _44_____________________
265                    ___1____________________
266                    ____22__________________
267                    ______11________________
268                    _________1______________
269                    __________3_____________
270                    ___________44___________
271                    _____________33_________
272                    _______________2________
273                    ________________1_______
274                    _________________4______
275                    ___________________2____
276                    ____________________33__
277                    ______________________4_
278         */
279
280         /* if there's only one memory region, don't bother */
281         if (*pnr_map < 2)
282                 return -1;
283
284         old_nr = *pnr_map;
285
286         /* bail out if we find any unreasonable addresses in bios map */
287         for (i=0; i<old_nr; i++)
288                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
289                         return -1;
290
291         /* create pointers for initial change-point information (for sorting) */
292         for (i=0; i < 2*old_nr; i++)
293                 change_point[i] = &change_point_list[i];
294
295         /* record all known change-points (starting and ending addresses),
296            omitting those that are for empty memory regions */
297         chgidx = 0;
298         for (i=0; i < old_nr; i++)      {
299                 if (biosmap[i].size != 0) {
300                         change_point[chgidx]->addr = biosmap[i].addr;
301                         change_point[chgidx++]->pbios = &biosmap[i];
302                         change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
303                         change_point[chgidx++]->pbios = &biosmap[i];
304                 }
305         }
306         chg_nr = chgidx;        /* true number of change-points */
307
308         /* sort change-point list by memory addresses (low -> high) */
309         still_changing = 1;
310         while (still_changing)  {
311                 still_changing = 0;
312                 for (i=1; i < chg_nr; i++)  {
313                         /* if <current_addr> > <last_addr>, swap */
314                         /* or, if current=<start_addr> & last=<end_addr>, swap */
315                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
316                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
317                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
318                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
319                            )
320                         {
321                                 change_tmp = change_point[i];
322                                 change_point[i] = change_point[i-1];
323                                 change_point[i-1] = change_tmp;
324                                 still_changing=1;
325                         }
326                 }
327         }
328
329         /* create a new bios memory map, removing overlaps */
330         overlap_entries=0;       /* number of entries in the overlap table */
331         new_bios_entry=0;        /* index for creating new bios map entries */
332         last_type = 0;           /* start with undefined memory type */
333         last_addr = 0;           /* start with 0 as last starting address */
334         /* loop through change-points, determining affect on the new bios map */
335         for (chgidx=0; chgidx < chg_nr; chgidx++)
336         {
337                 /* keep track of all overlapping bios entries */
338                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
339                 {
340                         /* add map entry to overlap list (> 1 entry implies an overlap) */
341                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
342                 }
343                 else
344                 {
345                         /* remove entry from list (order independent, so swap with last) */
346                         for (i=0; i<overlap_entries; i++)
347                         {
348                                 if (overlap_list[i] == change_point[chgidx]->pbios)
349                                         overlap_list[i] = overlap_list[overlap_entries-1];
350                         }
351                         overlap_entries--;
352                 }
353                 /* if there are overlapping entries, decide which "type" to use */
354                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
355                 current_type = 0;
356                 for (i=0; i<overlap_entries; i++)
357                         if (overlap_list[i]->type > current_type)
358                                 current_type = overlap_list[i]->type;
359                 /* continue building up new bios map based on this information */
360                 if (current_type != last_type)  {
361                         if (last_type != 0)      {
362                                 new_bios[new_bios_entry].size =
363                                         change_point[chgidx]->addr - last_addr;
364                                 /* move forward only if the new size was non-zero */
365                                 if (new_bios[new_bios_entry].size != 0)
366                                         if (++new_bios_entry >= E820MAX)
367                                                 break;  /* no more space left for new bios entries */
368                         }
369                         if (current_type != 0)  {
370                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
371                                 new_bios[new_bios_entry].type = current_type;
372                                 last_addr=change_point[chgidx]->addr;
373                         }
374                         last_type = current_type;
375                 }
376         }
377         new_nr = new_bios_entry;   /* retain count for new bios entries */
378
379         /* copy new bios mapping into original location */
380         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
381         *pnr_map = new_nr;
382
383         return 0;
384 }
385
386 /*
387  * Copy the BIOS e820 map into a safe place.
388  *
389  * Sanity-check it while we're at it..
390  *
391  * If we're lucky and live on a modern system, the setup code
392  * will have given us a memory map that we can use to properly
393  * set up memory.  If we aren't, we'll fake a memory map.
394  *
395  * We check to see that the memory map contains at least 2 elements
396  * before we'll use it, because the detection code in setup.S may
397  * not be perfect and most every PC known to man has two memory
398  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
399  * thinkpad 560x, for example, does not cooperate with the memory
400  * detection code.)
401  */
402 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
403 {
404         /* Only one memory region (or negative)? Ignore it */
405         if (nr_map < 2)
406                 return -1;
407
408         do {
409                 unsigned long long start = biosmap->addr;
410                 unsigned long long size = biosmap->size;
411                 unsigned long long end = start + size;
412                 unsigned long type = biosmap->type;
413
414                 /* Overflow in 64 bits? Ignore the memory map. */
415                 if (start > end)
416                         return -1;
417
418                 /*
419                  * Some BIOSes claim RAM in the 640k - 1M region.
420                  * Not right. Fix it up.
421                  */
422                 if (type == E820_RAM) {
423                         if (start < 0x100000ULL && end > 0xA0000ULL) {
424                                 if (start < 0xA0000ULL)
425                                         add_memory_region(start, 0xA0000ULL-start, type);
426                                 if (end <= 0x100000ULL)
427                                         continue;
428                                 start = 0x100000ULL;
429                                 size = end - start;
430                         }
431                 }
432                 add_memory_region(start, size, type);
433         } while (biosmap++,--nr_map);
434         return 0;
435 }
436
437 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
438 unsigned char eddnr;
439 struct edd_info edd[EDDMAXNR];
440 unsigned int edd_disk80_sig;
441 #ifdef CONFIG_EDD_MODULE
442 EXPORT_SYMBOL(eddnr);
443 EXPORT_SYMBOL(edd);
444 EXPORT_SYMBOL(edd_disk80_sig);
445 #endif
446 /**
447  * copy_edd() - Copy the BIOS EDD information
448  *              from boot_params into a safe place.
449  *
450  */
451 static inline void copy_edd(void)
452 {
453      eddnr = EDD_NR;
454      memcpy(edd, EDD_BUF, sizeof(edd));
455      edd_disk80_sig = DISK80_SIGNATURE;
456 }
457 #else
458 #define copy_edd() do {} while (0)
459 #endif
460
461 /*
462  * Do NOT EVER look at the BIOS memory size location.
463  * It does not work on many machines.
464  */
465 #define LOWMEMSIZE()    (0x9f000)
466
467 static void __init setup_memory_region(void)
468 {
469         char *who = machine_specific_memory_setup();
470         printk(KERN_INFO "BIOS-provided physical RAM map:\n");
471         print_memory_map(who);
472 } /* setup_memory_region */
473
474
475 static void __init parse_cmdline_early (char ** cmdline_p)
476 {
477         char c = ' ', *to = command_line, *from = saved_command_line;
478         int len = 0;
479         int userdef = 0;
480
481         /* Save unparsed command line copy for /proc/cmdline */
482         saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
483
484         for (;;) {
485                 /*
486                  * "mem=nopentium" disables the 4MB page tables.
487                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
488                  * to <mem>, overriding the bios size.
489                  * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
490                  * <start> to <start>+<mem>, overriding the bios size.
491                  *
492                  * HPA tells me bootloaders need to parse mem=, so no new
493                  * option should be mem=  [also see Documentation/i386/boot.txt]
494                  */
495                 if (c == ' ' && !memcmp(from, "mem=", 4)) {
496                         if (to != command_line)
497                                 to--;
498                         if (!memcmp(from+4, "nopentium", 9)) {
499                                 from += 9+4;
500                                 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
501                                 disable_pse = 1;
502                         } else {
503                                 /* If the user specifies memory size, we
504                                  * limit the BIOS-provided memory map to
505                                  * that size. exactmap can be used to specify
506                                  * the exact map. mem=number can be used to
507                                  * trim the existing memory map.
508                                  */
509                                 unsigned long long mem_size;
510  
511                                 mem_size = memparse(from+4, &from);
512                                 limit_regions(mem_size);
513                                 userdef=1;
514                         }
515                 }
516
517                 if (c == ' ' && !memcmp(from, "memmap=", 7)) {
518                         if (to != command_line)
519                                 to--;
520                         if (!memcmp(from+7, "exactmap", 8)) {
521                                 from += 8+7;
522                                 e820.nr_map = 0;
523                                 userdef = 1;
524                         } else {
525                                 /* If the user specifies memory size, we
526                                  * limit the BIOS-provided memory map to
527                                  * that size. exactmap can be used to specify
528                                  * the exact map. mem=number can be used to
529                                  * trim the existing memory map.
530                                  */
531                                 unsigned long long start_at, mem_size;
532  
533                                 mem_size = memparse(from+7, &from);
534                                 if (*from == '@') {
535                                         start_at = memparse(from+1, &from);
536                                         add_memory_region(start_at, mem_size, E820_RAM);
537                                 } else if (*from == '#') {
538                                         start_at = memparse(from+1, &from);
539                                         add_memory_region(start_at, mem_size, E820_ACPI);
540                                 } else if (*from == '$') {
541                                         start_at = memparse(from+1, &from);
542                                         add_memory_region(start_at, mem_size, E820_RESERVED);
543                                 } else {
544                                         limit_regions(mem_size);
545                                         userdef=1;
546                                 }
547                         }
548                 }
549
550 #ifdef  CONFIG_X86_SMP
551                 /*
552                  * If the BIOS enumerates physical processors before logical,
553                  * maxcpus=N at enumeration-time can be used to disable HT.
554                  */
555                 else if (!memcmp(from, "maxcpus=", 8)) {
556                         extern unsigned int maxcpus;
557
558                         maxcpus = simple_strtoul(from + 8, NULL, 0);
559                 }
560 #endif
561
562 #ifdef CONFIG_ACPI_BOOT
563                 /* "acpi=off" disables both ACPI table parsing and interpreter */
564                 else if (!memcmp(from, "acpi=off", 8)) {
565                         disable_acpi();
566                 }
567
568                 /* acpi=force to over-ride black-list */
569                 else if (!memcmp(from, "acpi=force", 10)) {
570                         acpi_force = 1;
571                         acpi_ht = 1;
572                         acpi_disabled = 0;
573                 }
574
575                 /* acpi=strict disables out-of-spec workarounds */
576                 else if (!memcmp(from, "acpi=strict", 11)) {
577                         acpi_strict = 1;
578                 }
579
580                 /* Limit ACPI just to boot-time to enable HT */
581                 else if (!memcmp(from, "acpi=ht", 7)) {
582                         if (!acpi_force)
583                                 disable_acpi();
584                         acpi_ht = 1;
585                 }
586                 
587                 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
588                 else if (!memcmp(from, "pci=noacpi", 10)) {
589                         acpi_disable_pci();
590                 }
591                 /* "acpi=noirq" disables ACPI interrupt routing */
592                 else if (!memcmp(from, "acpi=noirq", 10)) {
593                         acpi_noirq_set();
594                 }
595
596                 else if (!memcmp(from, "acpi_sci=edge", 13))
597                         acpi_sci_flags.trigger =  1;
598
599                 else if (!memcmp(from, "acpi_sci=level", 14))
600                         acpi_sci_flags.trigger = 3;
601
602                 else if (!memcmp(from, "acpi_sci=high", 13))
603                         acpi_sci_flags.polarity = 1;
604
605                 else if (!memcmp(from, "acpi_sci=low", 12))
606                         acpi_sci_flags.polarity = 3;
607
608 #ifdef CONFIG_X86_IO_APIC
609                 else if (!memcmp(from, "acpi_skip_timer_override", 24))
610                         acpi_skip_timer_override = 1;
611 #endif
612
613 #ifdef CONFIG_X86_LOCAL_APIC
614                 /* disable IO-APIC */
615                 else if (!memcmp(from, "noapic", 6))
616                         disable_ioapic_setup();
617 #endif /* CONFIG_X86_LOCAL_APIC */
618 #endif /* CONFIG_ACPI_BOOT */
619
620                 /*
621                  * highmem=size forces highmem to be exactly 'size' bytes.
622                  * This works even on boxes that have no highmem otherwise.
623                  * This also works to reduce highmem size on bigger boxes.
624                  */
625                 if (c == ' ' && !memcmp(from, "highmem=", 8))
626                         highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
627         
628                 c = *(from++);
629                 if (!c)
630                         break;
631                 if (COMMAND_LINE_SIZE <= ++len)
632                         break;
633                 *(to++) = c;
634         }
635         *to = '\0';
636         *cmdline_p = command_line;
637         if (userdef) {
638                 printk(KERN_INFO "user-defined physical RAM map:\n");
639                 print_memory_map("user");
640         }
641 }
642
643 /*
644  * Callback for efi_memory_walk.
645  */
646 static int __init
647 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
648 {
649         unsigned long *max_pfn = arg, pfn;
650
651         if (start < end) {
652                 pfn = PFN_UP(end -1);
653                 if (pfn > *max_pfn)
654                         *max_pfn = pfn;
655         }
656         return 0;
657 }
658
659
660 /*
661  * Find the highest page frame number we have available
662  */
663 void __init find_max_pfn(void)
664 {
665         int i;
666
667         max_pfn = 0;
668         if (efi_enabled) {
669                 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
670                 return;
671         }
672
673         for (i = 0; i < e820.nr_map; i++) {
674                 unsigned long start, end;
675                 /* RAM? */
676                 if (e820.map[i].type != E820_RAM)
677                         continue;
678                 start = PFN_UP(e820.map[i].addr);
679                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
680                 if (start >= end)
681                         continue;
682                 if (end > max_pfn)
683                         max_pfn = end;
684         }
685 }
686
687 /*
688  * Determine low and high memory ranges:
689  */
690 unsigned long __init find_max_low_pfn(void)
691 {
692         unsigned long max_low_pfn;
693
694         max_low_pfn = max_pfn;
695         if (max_low_pfn > MAXMEM_PFN) {
696                 if (highmem_pages == -1)
697                         highmem_pages = max_pfn - MAXMEM_PFN;
698                 if (highmem_pages + MAXMEM_PFN < max_pfn)
699                         max_pfn = MAXMEM_PFN + highmem_pages;
700                 if (highmem_pages + MAXMEM_PFN > max_pfn) {
701                         printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
702                         highmem_pages = 0;
703                 }
704                 max_low_pfn = MAXMEM_PFN;
705 #ifndef CONFIG_HIGHMEM
706                 /* Maximum memory usable is what is directly addressable */
707                 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
708                                         MAXMEM>>20);
709                 if (max_pfn > MAX_NONPAE_PFN)
710                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
711                 else
712                         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
713                 max_pfn = MAXMEM_PFN;
714 #else /* !CONFIG_HIGHMEM */
715 #ifndef CONFIG_X86_PAE
716                 if (max_pfn > MAX_NONPAE_PFN) {
717                         max_pfn = MAX_NONPAE_PFN;
718                         printk(KERN_WARNING "Warning only 4GB will be used.\n");
719                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
720                 }
721 #endif /* !CONFIG_X86_PAE */
722 #endif /* !CONFIG_HIGHMEM */
723         } else {
724                 if (highmem_pages == -1)
725                         highmem_pages = 0;
726 #ifdef CONFIG_HIGHMEM
727                 if (highmem_pages >= max_pfn) {
728                         printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
729                         highmem_pages = 0;
730                 }
731                 if (highmem_pages) {
732                         if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
733                                 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
734                                 highmem_pages = 0;
735                         }
736                         max_low_pfn -= highmem_pages;
737                 }
738 #else
739                 if (highmem_pages)
740                         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
741 #endif
742         }
743         return max_low_pfn;
744 }
745
746 #ifndef CONFIG_DISCONTIGMEM
747
748 /*
749  * Free all available memory for boot time allocation.  Used
750  * as a callback function by efi_memory_walk()
751  */
752
753 static int __init
754 free_available_memory(unsigned long start, unsigned long end, void *arg)
755 {
756         /* check max_low_pfn */
757         if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
758                 return 0;
759         if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
760                 end = (max_low_pfn + 1) << PAGE_SHIFT;
761         if (start < end)
762                 free_bootmem(start, end - start);
763
764         return 0;
765 }
766 /*
767  * Register fully available low RAM pages with the bootmem allocator.
768  */
769 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
770 {
771         int i;
772
773         if (efi_enabled) {
774                 efi_memmap_walk(free_available_memory, NULL);
775                 return;
776         }
777         for (i = 0; i < e820.nr_map; i++) {
778                 unsigned long curr_pfn, last_pfn, size;
779                 /*
780                  * Reserve usable low memory
781                  */
782                 if (e820.map[i].type != E820_RAM)
783                         continue;
784                 /*
785                  * We are rounding up the start address of usable memory:
786                  */
787                 curr_pfn = PFN_UP(e820.map[i].addr);
788                 if (curr_pfn >= max_low_pfn)
789                         continue;
790                 /*
791                  * ... and at the end of the usable range downwards:
792                  */
793                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
794
795                 if (last_pfn > max_low_pfn)
796                         last_pfn = max_low_pfn;
797
798                 /*
799                  * .. finally, did all the rounding and playing
800                  * around just make the area go away?
801                  */
802                 if (last_pfn <= curr_pfn)
803                         continue;
804
805                 size = last_pfn - curr_pfn;
806                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
807         }
808 }
809
810 static unsigned long __init setup_memory(void)
811 {
812         unsigned long bootmap_size, start_pfn, max_low_pfn;
813
814         /*
815          * partially used pages are not usable - thus
816          * we are rounding upwards:
817          */
818         start_pfn = PFN_UP(init_pg_tables_end);
819
820         find_max_pfn();
821
822         max_low_pfn = find_max_low_pfn();
823
824 #ifdef CONFIG_HIGHMEM
825         highstart_pfn = highend_pfn = max_pfn;
826         if (max_pfn > max_low_pfn) {
827                 highstart_pfn = max_low_pfn;
828         }
829         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
830                 pages_to_mb(highend_pfn - highstart_pfn));
831 #endif
832         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
833                         pages_to_mb(max_low_pfn));
834         /*
835          * Initialize the boot-time allocator (with low memory only):
836          */
837         bootmap_size = init_bootmem(start_pfn, max_low_pfn);
838
839         register_bootmem_low_pages(max_low_pfn);
840
841         /*
842          * Reserve the bootmem bitmap itself as well. We do this in two
843          * steps (first step was init_bootmem()) because this catches
844          * the (very unlikely) case of us accidentally initializing the
845          * bootmem allocator with an invalid RAM area.
846          */
847         reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
848                          bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
849
850         /*
851          * reserve physical page 0 - it's a special BIOS page on many boxes,
852          * enabling clean reboots, SMP operation, laptop functions.
853          */
854         reserve_bootmem(0, PAGE_SIZE);
855
856     /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
857        PCI prefetch into it (errata #56). Usually the page is reserved anyways,
858        unless you have no PS/2 mouse plugged in. */
859         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
860             boot_cpu_data.x86 == 6)
861              reserve_bootmem(0xa0000 - 4096, 4096);
862
863 #ifdef CONFIG_SMP
864         /*
865          * But first pinch a few for the stack/trampoline stuff
866          * FIXME: Don't need the extra page at 4K, but need to fix
867          * trampoline before removing it. (see the GDT stuff)
868          */
869         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
870 #endif
871 #ifdef CONFIG_ACPI_SLEEP
872         /*
873          * Reserve low memory region for sleep support.
874          */
875         acpi_reserve_bootmem();
876 #endif
877 #ifdef CONFIG_X86_FIND_SMP_CONFIG
878         /*
879          * Find and reserve possible boot-time SMP configuration:
880          */
881         find_smp_config();
882 #endif
883
884 #ifdef CONFIG_BLK_DEV_INITRD
885         if (LOADER_TYPE && INITRD_START) {
886                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
887                         reserve_bootmem(INITRD_START, INITRD_SIZE);
888                         initrd_start =
889                                 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
890                         initrd_end = initrd_start+INITRD_SIZE;
891                 }
892                 else {
893                         printk(KERN_ERR "initrd extends beyond end of memory "
894                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
895                             INITRD_START + INITRD_SIZE,
896                             max_low_pfn << PAGE_SHIFT);
897                         initrd_start = 0;
898                 }
899         }
900 #endif
901         return max_low_pfn;
902 }
903 #else
904 extern unsigned long setup_memory(void);
905 #endif /* !CONFIG_DISCONTIGMEM */
906
907 /*
908  * Request address space for all standard RAM and ROM resources
909  * and also for regions reported as reserved by the e820.
910  */
911 static void __init
912 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
913 {
914         int i;
915
916         probe_roms();
917         for (i = 0; i < e820.nr_map; i++) {
918                 struct resource *res;
919                 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
920                         continue;
921                 res = alloc_bootmem_low(sizeof(struct resource));
922                 switch (e820.map[i].type) {
923                 case E820_RAM:  res->name = "System RAM"; break;
924                 case E820_ACPI: res->name = "ACPI Tables"; break;
925                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
926                 default:        res->name = "reserved";
927                 }
928                 res->start = e820.map[i].addr;
929                 res->end = res->start + e820.map[i].size - 1;
930                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
931                 request_resource(&iomem_resource, res);
932                 if (e820.map[i].type == E820_RAM) {
933                         /*
934                          *  We don't know which RAM region contains kernel data,
935                          *  so we try it repeatedly and let the resource manager
936                          *  test it.
937                          */
938                         request_resource(res, code_resource);
939                         request_resource(res, data_resource);
940                 }
941         }
942 }
943
944 /*
945  * Request address space for all standard resources
946  */
947 static void __init register_memory(unsigned long max_low_pfn)
948 {
949         unsigned long low_mem_size;
950
951         if (efi_enabled)
952                 efi_initialize_iomem_resources(&code_resource, &data_resource);
953         else
954                 legacy_init_iomem_resources(&code_resource, &data_resource);
955
956         /* EFI systems may still have VGA */
957         request_graphics_resource();
958
959         /* request I/O space for devices used on all i[345]86 PCs */
960         request_standard_io_resources();
961
962         /* Tell the PCI layer not to allocate too close to the RAM area.. */
963         low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
964         if (low_mem_size > pci_mem_start)
965                 pci_mem_start = low_mem_size;
966 }
967
968 /* Use inline assembly to define this because the nops are defined 
969    as inline assembly strings in the include files and we cannot 
970    get them easily into strings. */
971 asm("\t.data\nintelnops: " 
972     GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
973     GENERIC_NOP7 GENERIC_NOP8); 
974 asm("\t.data\nk8nops: " 
975     K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
976     K8_NOP7 K8_NOP8); 
977 asm("\t.data\nk7nops: " 
978     K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
979     K7_NOP7 K7_NOP8); 
980     
981 extern unsigned char intelnops[], k8nops[], k7nops[];
982 static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
983      NULL,
984      intelnops,
985      intelnops + 1,
986      intelnops + 1 + 2,
987      intelnops + 1 + 2 + 3,
988      intelnops + 1 + 2 + 3 + 4,
989      intelnops + 1 + 2 + 3 + 4 + 5,
990      intelnops + 1 + 2 + 3 + 4 + 5 + 6,
991      intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
992 }; 
993 static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
994      NULL,
995      k8nops,
996      k8nops + 1,
997      k8nops + 1 + 2,
998      k8nops + 1 + 2 + 3,
999      k8nops + 1 + 2 + 3 + 4,
1000      k8nops + 1 + 2 + 3 + 4 + 5,
1001      k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1002      k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1003 }; 
1004 static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
1005      NULL,
1006      k7nops,
1007      k7nops + 1,
1008      k7nops + 1 + 2,
1009      k7nops + 1 + 2 + 3,
1010      k7nops + 1 + 2 + 3 + 4,
1011      k7nops + 1 + 2 + 3 + 4 + 5,
1012      k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1013      k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1014 }; 
1015 static struct nop { 
1016      int cpuid; 
1017      unsigned char **noptable; 
1018 } noptypes[] = { 
1019      { X86_FEATURE_K8, k8_nops }, 
1020      { X86_FEATURE_K7, k7_nops }, 
1021      { -1, 0 }
1022 }; 
1023
1024 /* Replace instructions with better alternatives for this CPU type.
1025
1026    This runs before SMP is initialized to avoid SMP problems with
1027    self modifying code. This implies that assymetric systems where
1028    APs have less capabilities than the boot processor are not handled. 
1029    In this case boot with "noreplacement". */ 
1030 void apply_alternatives(void *start, void *end) 
1031
1032         struct alt_instr *a; 
1033         int diff, i, k;
1034         unsigned char **noptable = intel_nops; 
1035         for (i = 0; noptypes[i].cpuid >= 0; i++) { 
1036                 if (boot_cpu_has(noptypes[i].cpuid)) { 
1037                         noptable = noptypes[i].noptable;
1038                         break;
1039                 }
1040         } 
1041         for (a = start; (void *)a < end; a++) { 
1042                 if (!boot_cpu_has(a->cpuid))
1043                         continue;
1044                 BUG_ON(a->replacementlen > a->instrlen); 
1045                 memcpy(a->instr, a->replacement, a->replacementlen); 
1046                 diff = a->instrlen - a->replacementlen; 
1047                 /* Pad the rest with nops */
1048                 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1049                         k = diff;
1050                         if (k > ASM_NOP_MAX)
1051                                 k = ASM_NOP_MAX;
1052                         memcpy(a->instr + i, noptable[k], k); 
1053                 } 
1054         }
1055
1056
1057 static int no_replacement __initdata = 0; 
1058  
1059 void __init alternative_instructions(void)
1060 {
1061         extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1062         if (no_replacement) 
1063                 return;
1064         apply_alternatives(__alt_instructions, __alt_instructions_end);
1065 }
1066
1067 static int __init noreplacement_setup(char *s)
1068
1069      no_replacement = 1; 
1070      return 0; 
1071
1072
1073 __setup("noreplacement", noreplacement_setup); 
1074
1075 /*
1076  * Determine if we were loaded by an EFI loader.  If so, then we have also been
1077  * passed the efi memmap, systab, etc., so we should use these data structures
1078  * for initialization.  Note, the efi init code path is determined by the
1079  * global efi_enabled. This allows the same kernel image to be used on existing
1080  * systems (with a traditional BIOS) as well as on EFI systems.
1081  */
1082 void __init setup_arch(char **cmdline_p)
1083 {
1084         unsigned long max_low_pfn;
1085
1086         memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1087         pre_setup_arch_hook();
1088         early_cpu_init();
1089
1090         /*
1091          * FIXME: This isn't an official loader_type right
1092          * now but does currently work with elilo.
1093          * If we were configured as an EFI kernel, check to make
1094          * sure that we were loaded correctly from elilo and that
1095          * the system table is valid.  If not, then initialize normally.
1096          */
1097 #ifdef CONFIG_EFI
1098         if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1099                 efi_enabled = 1;
1100 #endif
1101
1102         ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1103         drive_info = DRIVE_INFO;
1104         screen_info = SCREEN_INFO;
1105         edid_info = EDID_INFO;
1106         apm_info.bios = APM_BIOS_INFO;
1107         ist_info = IST_INFO;
1108         saved_videomode = VIDEO_MODE;
1109         if( SYS_DESC_TABLE.length != 0 ) {
1110                 MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
1111                 machine_id = SYS_DESC_TABLE.table[0];
1112                 machine_submodel_id = SYS_DESC_TABLE.table[1];
1113                 BIOS_revision = SYS_DESC_TABLE.table[2];
1114         }
1115         aux_device_present = AUX_DEVICE_INFO;
1116
1117 #ifdef CONFIG_BLK_DEV_RAM
1118         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1119         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1120         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1121 #endif
1122         ARCH_SETUP
1123         if (efi_enabled)
1124                 efi_init();
1125         else
1126                 setup_memory_region();
1127
1128         copy_edd();
1129
1130         if (!MOUNT_ROOT_RDONLY)
1131                 root_mountflags &= ~MS_RDONLY;
1132         init_mm.start_code = (unsigned long) _text;
1133         init_mm.end_code = (unsigned long) _etext;
1134         init_mm.end_data = (unsigned long) _edata;
1135         init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1136
1137         code_resource.start = virt_to_phys(_text);
1138         code_resource.end = virt_to_phys(_etext)-1;
1139         data_resource.start = virt_to_phys(_etext);
1140         data_resource.end = virt_to_phys(_edata)-1;
1141
1142         parse_cmdline_early(cmdline_p);
1143
1144         max_low_pfn = setup_memory();
1145
1146         /*
1147          * NOTE: before this point _nobody_ is allowed to allocate
1148          * any memory using the bootmem allocator.
1149          */
1150
1151 #ifdef CONFIG_SMP
1152         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1153 #endif
1154         paging_init();
1155
1156 #ifdef CONFIG_EARLY_PRINTK
1157         {
1158                 char *s = strstr(*cmdline_p, "earlyprintk=");
1159                 if (s) {
1160                         extern void setup_early_printk(char *);
1161
1162                         setup_early_printk(s);
1163                         printk("early console enabled\n");
1164                 }
1165         }
1166 #endif
1167
1168
1169         dmi_scan_machine();
1170
1171 #ifdef CONFIG_X86_GENERICARCH
1172         generic_apic_probe(*cmdline_p);
1173 #endif  
1174         if (efi_enabled)
1175                 efi_map_memmap();
1176
1177         /*
1178          * Parse the ACPI tables for possible boot-time SMP configuration.
1179          */
1180         acpi_boot_init();
1181
1182 #ifdef CONFIG_X86_LOCAL_APIC
1183         if (smp_found_config)
1184                 get_smp_config();
1185 #endif
1186
1187         register_memory(max_low_pfn);
1188
1189 #ifdef CONFIG_VT
1190 #if defined(CONFIG_VGA_CONSOLE)
1191         if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1192                 conswitchp = &vga_con;
1193 #elif defined(CONFIG_DUMMY_CONSOLE)
1194         conswitchp = &dummy_con;
1195 #endif
1196 #endif
1197 }
1198
1199 #include "setup_arch_post.h"
1200 /*
1201  * Local Variables:
1202  * mode:c
1203  * c-file-style:"k&r"
1204  * c-basic-offset:8
1205  * End:
1206  */