This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / i386 / mm / init-xen.c
1 /*
2  *  linux/arch/i386/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  */
8
9 #include <linux/config.h>
10 #include <linux/module.h>
11 #include <linux/signal.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/types.h>
17 #include <linux/ptrace.h>
18 #include <linux/mman.h>
19 #include <linux/mm.h>
20 #include <linux/hugetlb.h>
21 #include <linux/swap.h>
22 #include <linux/smp.h>
23 #include <linux/init.h>
24 #include <linux/highmem.h>
25 #include <linux/pagemap.h>
26 #include <linux/bootmem.h>
27 #include <linux/slab.h>
28 #include <linux/proc_fs.h>
29 #include <linux/efi.h>
30 #include <linux/memory_hotplug.h>
31 #include <linux/initrd.h>
32 #include <linux/dma-mapping.h>
33 #include <linux/scatterlist.h>
34
35 #include <asm/processor.h>
36 #include <asm/system.h>
37 #include <asm/uaccess.h>
38 #include <asm/pgtable.h>
39 #include <asm/dma.h>
40 #include <asm/fixmap.h>
41 #include <asm/e820.h>
42 #include <asm/apic.h>
43 #include <asm/tlb.h>
44 #include <asm/tlbflush.h>
45 #include <asm/sections.h>
46 #include <asm/hypervisor.h>
47 #include <asm/swiotlb.h>
48
49 extern unsigned long *contiguous_bitmap;
50
51 unsigned int __VMALLOC_RESERVE = 128 << 20;
52
53 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
54 unsigned long highstart_pfn, highend_pfn;
55
56 static int noinline do_test_wp_bit(void);
57
58 /*
59  * Creates a middle page table and puts a pointer to it in the
60  * given global directory entry. This only returns the gd entry
61  * in non-PAE compilation mode, since the middle layer is folded.
62  */
63 static pmd_t * __init one_md_table_init(pgd_t *pgd)
64 {
65         pud_t *pud;
66         pmd_t *pmd_table;
67                 
68 #ifdef CONFIG_X86_PAE
69         pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
70         make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
71         set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
72         pud = pud_offset(pgd, 0);
73         if (pmd_table != pmd_offset(pud, 0)) 
74                 BUG();
75 #else
76         pud = pud_offset(pgd, 0);
77         pmd_table = pmd_offset(pud, 0);
78 #endif
79
80         return pmd_table;
81 }
82
83 /*
84  * Create a page table and place a pointer to it in a middle page
85  * directory entry.
86  */
87 static pte_t * __init one_page_table_init(pmd_t *pmd)
88 {
89         if (pmd_none(*pmd)) {
90                 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
91                 make_lowmem_page_readonly(page_table,
92                                           XENFEAT_writable_page_tables);
93                 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
94                 if (page_table != pte_offset_kernel(pmd, 0))
95                         BUG();  
96
97                 return page_table;
98         }
99         
100         return pte_offset_kernel(pmd, 0);
101 }
102
103 /*
104  * This function initializes a certain range of kernel virtual memory 
105  * with new bootmem page tables, everywhere page tables are missing in
106  * the given range.
107  */
108
109 /*
110  * NOTE: The pagetables are allocated contiguous on the physical space 
111  * so we can cache the place of the first one and move around without 
112  * checking the pgd every time.
113  */
114 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
115 {
116         pgd_t *pgd;
117         pud_t *pud;
118         pmd_t *pmd;
119         int pgd_idx, pmd_idx;
120         unsigned long vaddr;
121
122         vaddr = start;
123         pgd_idx = pgd_index(vaddr);
124         pmd_idx = pmd_index(vaddr);
125         pgd = pgd_base + pgd_idx;
126
127         for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
128                 if (pgd_none(*pgd)) 
129                         one_md_table_init(pgd);
130                 pud = pud_offset(pgd, vaddr);
131                 pmd = pmd_offset(pud, vaddr);
132                 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
133                         if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd)) 
134                                 one_page_table_init(pmd);
135
136                         vaddr += PMD_SIZE;
137                 }
138                 pmd_idx = 0;
139         }
140 }
141
142 static inline int is_kernel_text(unsigned long addr)
143 {
144         if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
145                 return 1;
146         return 0;
147 }
148
149 /*
150  * This maps the physical memory to kernel virtual address space, a total 
151  * of max_low_pfn pages, by creating page tables starting from address 
152  * PAGE_OFFSET.
153  */
154 static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
155 {
156         unsigned long pfn;
157         pgd_t *pgd;
158         pmd_t *pmd;
159         pte_t *pte;
160         int pgd_idx, pmd_idx, pte_ofs;
161
162         unsigned long max_ram_pfn = xen_start_info->nr_pages;
163         if (max_ram_pfn > max_low_pfn)
164                 max_ram_pfn = max_low_pfn;
165
166         pgd_idx = pgd_index(PAGE_OFFSET);
167         pgd = pgd_base + pgd_idx;
168         pfn = 0;
169         pmd_idx = pmd_index(PAGE_OFFSET);
170         pte_ofs = pte_index(PAGE_OFFSET);
171
172         for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
173 #ifdef CONFIG_XEN
174                 /*
175                  * Native linux hasn't PAE-paging enabled yet at this
176                  * point.  When running as xen domain we are in PAE
177                  * mode already, thus we can't simply hook a empty
178                  * pmd.  That would kill the mappings we are currently
179                  * using ...
180                  */
181                 pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET);
182 #else
183                 pmd = one_md_table_init(pgd);
184 #endif
185                 if (pfn >= max_low_pfn)
186                         continue;
187                 pmd += pmd_idx;
188                 for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
189                         unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
190                         if (address >= HYPERVISOR_VIRT_START)
191                                 continue;
192
193                         /* Map with big pages if possible, otherwise create normal page tables. */
194                         if (cpu_has_pse) {
195                                 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
196
197                                 if (is_kernel_text(address) || is_kernel_text(address2))
198                                         set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
199                                 else
200                                         set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
201                                 pfn += PTRS_PER_PTE;
202                         } else {
203                                 pte = one_page_table_init(pmd);
204
205                                 pte += pte_ofs;
206                                 for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
207                                                 /* XEN: Only map initial RAM allocation. */
208                                                 if ((pfn >= max_ram_pfn) || pte_present(*pte))
209                                                         continue;
210                                                 if (is_kernel_text(address))
211                                                         set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
212                                                 else
213                                                         set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
214                                 }
215                                 pte_ofs = 0;
216                         }
217                 }
218                 pmd_idx = 0;
219         }
220 }
221
222 #ifndef CONFIG_XEN
223
224 static inline int page_kills_ppro(unsigned long pagenr)
225 {
226         if (pagenr >= 0x70000 && pagenr <= 0x7003F)
227                 return 1;
228         return 0;
229 }
230
231 #else
232
233 #define page_kills_ppro(p)      0
234
235 #endif
236
237 extern int is_available_memory(efi_memory_desc_t *);
238
239 int page_is_ram(unsigned long pagenr)
240 {
241         int i;
242         unsigned long addr, end;
243
244         if (efi_enabled) {
245                 efi_memory_desc_t *md;
246                 void *p;
247
248                 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
249                         md = p;
250                         if (!is_available_memory(md))
251                                 continue;
252                         addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
253                         end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
254
255                         if ((pagenr >= addr) && (pagenr < end))
256                                 return 1;
257                 }
258                 return 0;
259         }
260
261         for (i = 0; i < e820.nr_map; i++) {
262
263                 if (e820.map[i].type != E820_RAM)       /* not usable memory */
264                         continue;
265                 /*
266                  *      !!!FIXME!!! Some BIOSen report areas as RAM that
267                  *      are not. Notably the 640->1Mb area. We need a sanity
268                  *      check here.
269                  */
270                 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
271                 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
272                 if  ((pagenr >= addr) && (pagenr < end))
273                         return 1;
274         }
275         return 0;
276 }
277
278 int devmem_is_allowed(unsigned long pagenr)
279 {
280    if (pagenr <= 256)
281        return 1;
282    if (!page_is_ram(pagenr))
283        return 1;
284    return 0;
285 }
286
287 EXPORT_SYMBOL_GPL(page_is_ram);
288
289 #ifdef CONFIG_HIGHMEM
290 pte_t *kmap_pte;
291 pgprot_t kmap_prot;
292
293 #define kmap_get_fixmap_pte(vaddr)                                      \
294         pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
295
296 static void __init kmap_init(void)
297 {
298         unsigned long kmap_vstart;
299
300         /* cache the first kmap pte */
301         kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
302         kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
303
304         kmap_prot = PAGE_KERNEL;
305 }
306
307 static void __init permanent_kmaps_init(pgd_t *pgd_base)
308 {
309         pgd_t *pgd;
310         pud_t *pud;
311         pmd_t *pmd;
312         pte_t *pte;
313         unsigned long vaddr;
314
315         vaddr = PKMAP_BASE;
316         page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
317
318         pgd = swapper_pg_dir + pgd_index(vaddr);
319         pud = pud_offset(pgd, vaddr);
320         pmd = pmd_offset(pud, vaddr);
321         pte = pte_offset_kernel(pmd, vaddr);
322         pkmap_page_table = pte; 
323 }
324
325 static void __meminit free_new_highpage(struct page *page, int pfn)
326 {
327         init_page_count(page);
328         if (pfn < xen_start_info->nr_pages)
329                 __free_page(page);
330         totalhigh_pages++;
331 }
332
333 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
334 {
335         if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
336                 ClearPageReserved(page);
337                 free_new_highpage(page, pfn);
338         } else
339                 SetPageReserved(page);
340 }
341
342 static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
343 {
344         free_new_highpage(page, pfn);
345         totalram_pages++;
346 #ifdef CONFIG_FLATMEM
347         max_mapnr = max(pfn, max_mapnr);
348 #endif
349         num_physpages++;
350         return 0;
351 }
352
353 /*
354  * Not currently handling the NUMA case.
355  * Assuming single node and all memory that
356  * has been added dynamically that would be
357  * onlined here is in HIGHMEM
358  */
359 void online_page(struct page *page)
360 {
361         ClearPageReserved(page);
362         add_one_highpage_hotplug(page, page_to_pfn(page));
363 }
364
365
366 #ifdef CONFIG_NUMA
367 extern void set_highmem_pages_init(int);
368 #else
369 static void __init set_highmem_pages_init(int bad_ppro)
370 {
371         int pfn;
372         for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
373                 add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
374         totalram_pages += totalhigh_pages;
375 }
376 #endif /* CONFIG_FLATMEM */
377
378 #else
379 #define kmap_init() do { } while (0)
380 #define permanent_kmaps_init(pgd_base) do { } while (0)
381 #define set_highmem_pages_init(bad_ppro) do { } while (0)
382 #endif /* CONFIG_HIGHMEM */
383
384 unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
385 EXPORT_SYMBOL(__PAGE_KERNEL);
386 unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
387
388 #ifdef CONFIG_NUMA
389 extern void __init remap_numa_kva(void);
390 #else
391 #define remap_numa_kva() do {} while (0)
392 #endif
393
394 pgd_t *swapper_pg_dir;
395
396 static void __init pagetable_init (void)
397 {
398         unsigned long vaddr;
399         pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
400
401         swapper_pg_dir = pgd_base;
402         init_mm.pgd    = pgd_base;
403
404         /* Enable PSE if available */
405         if (cpu_has_pse) {
406                 set_in_cr4(X86_CR4_PSE);
407         }
408
409         /* Enable PGE if available */
410         if (cpu_has_pge) {
411                 set_in_cr4(X86_CR4_PGE);
412                 __PAGE_KERNEL |= _PAGE_GLOBAL;
413                 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
414         }
415
416         kernel_physical_mapping_init(pgd_base);
417         remap_numa_kva();
418
419         /*
420          * Fixed mappings, only the page table structure has to be
421          * created - mappings will be set by set_fixmap():
422          */
423         vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
424         page_table_range_init(vaddr, 0, pgd_base);
425
426         permanent_kmaps_init(pgd_base);
427 }
428
429 #ifdef CONFIG_SOFTWARE_SUSPEND
430 /*
431  * Swap suspend & friends need this for resume because things like the intel-agp
432  * driver might have split up a kernel 4MB mapping.
433  */
434 char __nosavedata swsusp_pg_dir[PAGE_SIZE]
435         __attribute__ ((aligned (PAGE_SIZE)));
436
437 static inline void save_pg_dir(void)
438 {
439         memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
440 }
441 #else
442 static inline void save_pg_dir(void)
443 {
444 }
445 #endif
446
447 void zap_low_mappings (void)
448 {
449         int i;
450
451         save_pg_dir();
452
453         /*
454          * Zap initial low-memory mappings.
455          *
456          * Note that "pgd_clear()" doesn't do it for
457          * us, because pgd_clear() is a no-op on i386.
458          */
459         for (i = 0; i < USER_PTRS_PER_PGD; i++)
460 #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
461                 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
462 #else
463                 set_pgd(swapper_pg_dir+i, __pgd(0));
464 #endif
465         flush_tlb_all();
466 }
467
468 static int disable_nx __initdata = 0;
469 u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
470 EXPORT_SYMBOL(__supported_pte_mask);
471
472 /*
473  * noexec = on|off
474  *
475  * Control non executable mappings.
476  *
477  * on      Enable
478  * off     Disable (disables exec-shield too)
479  */
480 void __init noexec_setup(const char *str)
481 {
482         if (!strncmp(str, "on",2) && cpu_has_nx) {
483                 __supported_pte_mask |= _PAGE_NX;
484                 disable_nx = 0;
485         } else if (!strncmp(str,"off",3)) {
486                 disable_nx = 1;
487                 __supported_pte_mask &= ~_PAGE_NX;
488                 exec_shield = 0;
489         }
490 }
491
492 int nx_enabled = 0;
493 #ifdef CONFIG_X86_PAE
494
495 static void __init set_nx(void)
496 {
497         unsigned int v[4], l, h;
498
499         if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
500                 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
501                 if ((v[3] & (1 << 20)) && !disable_nx) {
502                         rdmsr(MSR_EFER, l, h);
503                         l |= EFER_NX;
504                         wrmsr(MSR_EFER, l, h);
505                         nx_enabled = 1;
506                         __supported_pte_mask |= _PAGE_NX;
507                 }
508         }
509 }
510
511 /*
512  * Enables/disables executability of a given kernel page and
513  * returns the previous setting.
514  */
515 int __init set_kernel_exec(unsigned long vaddr, int enable)
516 {
517         pte_t *pte;
518         int ret = 1;
519
520         if (!nx_enabled)
521                 goto out;
522
523         pte = lookup_address(vaddr);
524         BUG_ON(!pte);
525
526         if (!pte_exec_kernel(*pte))
527                 ret = 0;
528
529         if (enable)
530                 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
531         else
532                 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
533         __flush_tlb_all();
534 out:
535         return ret;
536 }
537
538 #endif
539
540 /*
541  * paging_init() sets up the page tables - note that the first 8MB are
542  * already mapped by head.S.
543  *
544  * This routines also unmaps the page at virtual kernel address 0, so
545  * that we can trap those pesky NULL-reference errors in the kernel.
546  */
547 void __init paging_init(void)
548 {
549         int i;
550
551 #ifdef CONFIG_X86_PAE
552         set_nx();
553         if (nx_enabled)
554                 printk("NX (Execute Disable) protection: active\n");
555         else
556 #endif
557         if (exec_shield)
558                 printk("Using x86 segment limits to approximate NX protection\n");
559
560         pagetable_init();
561
562 #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
563         /*
564          * We will bail out later - printk doesn't work right now so
565          * the user would just see a hanging kernel.
566          * when running as xen domain we are already in PAE mode at
567          * this point.
568          */
569         if (cpu_has_pae)
570                 set_in_cr4(X86_CR4_PAE);
571 #endif
572         __flush_tlb_all();
573
574         kmap_init();
575
576         /* Switch to the real shared_info page, and clear the
577          * dummy page. */
578         set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
579         HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
580         memset(empty_zero_page, 0, sizeof(empty_zero_page));
581
582         /* Setup mapping of lower 1st MB */
583         for (i = 0; i < NR_FIX_ISAMAPS; i++)
584                 if (xen_start_info->flags & SIF_PRIVILEGED)
585                         set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
586                 else
587                         __set_fixmap(FIX_ISAMAP_BEGIN - i,
588                                      virt_to_machine(empty_zero_page),
589                                      PAGE_KERNEL_RO);
590 }
591
592 /*
593  * Test if the WP bit works in supervisor mode. It isn't supported on 386's
594  * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
595  * used to involve black magic jumps to work around some nasty CPU bugs,
596  * but fortunately the switch to using exceptions got rid of all that.
597  */
598
599 static void __init test_wp_bit(void)
600 {
601         printk("Checking if this processor honours the WP bit even in supervisor mode... ");
602
603         /* Any page-aligned address will do, the test is non-destructive */
604         __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
605         boot_cpu_data.wp_works_ok = do_test_wp_bit();
606         clear_fixmap(FIX_WP_TEST);
607
608         if (!boot_cpu_data.wp_works_ok) {
609                 printk("No.\n");
610 #ifdef CONFIG_X86_WP_WORKS_OK
611                 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
612 #endif
613         } else {
614                 printk("Ok.\n");
615         }
616 }
617
618 static void __init set_max_mapnr_init(void)
619 {
620 #ifdef CONFIG_HIGHMEM
621         num_physpages = highend_pfn;
622 #else
623         num_physpages = max_low_pfn;
624 #endif
625 #ifdef CONFIG_FLATMEM
626         max_mapnr = num_physpages;
627 #endif
628 }
629
630 static struct kcore_list kcore_mem, kcore_vmalloc; 
631
632 void __init mem_init(void)
633 {
634         extern int ppro_with_ram_bug(void);
635         int codesize, reservedpages, datasize, initsize;
636         int tmp;
637         int bad_ppro;
638         unsigned long pfn;
639
640         contiguous_bitmap = alloc_bootmem_low_pages(
641                 (max_low_pfn + 2*BITS_PER_LONG) >> 3);
642         BUG_ON(!contiguous_bitmap);
643         memset(contiguous_bitmap, 0, (max_low_pfn + 2*BITS_PER_LONG) >> 3);
644
645 #if defined(CONFIG_SWIOTLB)
646         swiotlb_init(); 
647 #endif
648
649 #ifdef CONFIG_FLATMEM
650         if (!mem_map)
651                 BUG();
652 #endif
653         
654         bad_ppro = ppro_with_ram_bug();
655
656 #ifdef CONFIG_HIGHMEM
657         /* check that fixmap and pkmap do not overlap */
658         if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
659                 printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
660                 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
661                                 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
662                 BUG();
663         }
664 #endif
665  
666         set_max_mapnr_init();
667
668 #ifdef CONFIG_HIGHMEM
669         high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
670 #else
671         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
672 #endif
673         printk("vmalloc area: %lx-%lx, maxmem %lx\n",
674                VMALLOC_START,VMALLOC_END,MAXMEM);
675         BUG_ON(VMALLOC_START > VMALLOC_END);
676         
677         /* this will put all low memory onto the freelists */
678         totalram_pages += free_all_bootmem();
679         /* XEN: init and count low-mem pages outside initial allocation. */
680         for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
681                 ClearPageReserved(&mem_map[pfn]);
682                 init_page_count(&mem_map[pfn]);
683                 totalram_pages++;
684         }
685
686         reservedpages = 0;
687         for (tmp = 0; tmp < max_low_pfn; tmp++)
688                 /*
689                  * Only count reserved RAM pages
690                  */
691                 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
692                         reservedpages++;
693
694         set_highmem_pages_init(bad_ppro);
695
696         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
697         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
698         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
699
700         kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
701         kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
702                    VMALLOC_END-VMALLOC_START);
703
704         printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
705                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
706                 num_physpages << (PAGE_SHIFT-10),
707                 codesize >> 10,
708                 reservedpages << (PAGE_SHIFT-10),
709                 datasize >> 10,
710                 initsize >> 10,
711                 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
712                );
713
714 #ifdef CONFIG_X86_PAE
715         if (!cpu_has_pae)
716                 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
717 #endif
718         if (boot_cpu_data.wp_works_ok < 0)
719                 test_wp_bit();
720
721         /*
722          * Subtle. SMP is doing it's boot stuff late (because it has to
723          * fork idle threads) - but it also needs low mappings for the
724          * protected-mode entry to work. We zap these entries only after
725          * the WP-bit has been tested.
726          */
727 #ifndef CONFIG_SMP
728         zap_low_mappings();
729 #endif
730
731         set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
732 }
733
734 /*
735  * this is for the non-NUMA, single node SMP system case.
736  * Specifically, in the case of x86, we will always add
737  * memory to the highmem for now.
738  */
739 #ifdef CONFIG_MEMORY_HOTPLUG
740 #ifndef CONFIG_NEED_MULTIPLE_NODES
741 int add_memory(u64 start, u64 size)
742 {
743         struct pglist_data *pgdata = &contig_page_data;
744         struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
745         unsigned long start_pfn = start >> PAGE_SHIFT;
746         unsigned long nr_pages = size >> PAGE_SHIFT;
747
748         return __add_pages(zone, start_pfn, nr_pages);
749 }
750
751 int remove_memory(u64 start, u64 size)
752 {
753         return -EINVAL;
754 }
755 #endif
756 #endif
757
758 kmem_cache_t *pgd_cache;
759 kmem_cache_t *pmd_cache;
760
761 void __init pgtable_cache_init(void)
762 {
763         if (PTRS_PER_PMD > 1) {
764                 pmd_cache = kmem_cache_create("pmd",
765                                         PTRS_PER_PMD*sizeof(pmd_t),
766                                         PTRS_PER_PMD*sizeof(pmd_t),
767                                         0,
768                                         pmd_ctor,
769                                         NULL);
770                 if (!pmd_cache)
771                         panic("pgtable_cache_init(): cannot create pmd cache");
772         }
773         pgd_cache = kmem_cache_create("pgd",
774 #ifndef CONFIG_XEN
775                                 PTRS_PER_PGD*sizeof(pgd_t),
776                                 PTRS_PER_PGD*sizeof(pgd_t),
777 #else
778                                 PAGE_SIZE,
779                                 PAGE_SIZE,
780 #endif
781                                 0,
782                                 pgd_ctor,
783                                 PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
784         if (!pgd_cache)
785                 panic("pgtable_cache_init(): Cannot create pgd cache");
786 }
787
788 /*
789  * This function cannot be __init, since exceptions don't work in that
790  * section.  Put this after the callers, so that it cannot be inlined.
791  */
792 static int noinline do_test_wp_bit(void)
793 {
794         char tmp_reg;
795         int flag;
796
797         __asm__ __volatile__(
798                 "       movb %0,%1      \n"
799                 "1:     movb %1,%0      \n"
800                 "       xorl %2,%2      \n"
801                 "2:                     \n"
802                 ".section __ex_table,\"a\"\n"
803                 "       .align 4        \n"
804                 "       .long 1b,2b     \n"
805                 ".previous              \n"
806                 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
807                  "=q" (tmp_reg),
808                  "=r" (flag)
809                 :"2" (1)
810                 :"memory");
811         
812         return flag;
813 }
814
815 #ifdef CONFIG_DEBUG_RODATA
816
817 extern char __start_rodata, __end_rodata;
818 void mark_rodata_ro(void)
819 {
820         unsigned long addr = (unsigned long)&__start_rodata;
821
822         for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE)
823                 change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
824
825         printk ("Write protecting the kernel read-only data: %luk\n",
826                         (unsigned long)(&__end_rodata - &__start_rodata) >> 10);
827
828         /*
829          * change_page_attr() requires a global_flush_tlb() call after it.
830          * We do this after the printk so that if something went wrong in the
831          * change, the printk gets out at least to give a better debug hint
832          * of who is the culprit.
833          */
834         global_flush_tlb();
835 }
836 #endif
837
838 void free_init_pages(char *what, unsigned long begin, unsigned long end)
839 {
840         unsigned long addr;
841
842         for (addr = begin; addr < end; addr += PAGE_SIZE) {
843                 ClearPageReserved(virt_to_page(addr));
844                 init_page_count(virt_to_page(addr));
845                 memset((void *)addr, 0xcc, PAGE_SIZE);
846                 free_page(addr);
847                 totalram_pages++;
848         }
849         printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
850 }
851
852 void free_initmem(void)
853 {
854         free_init_pages("unused kernel memory",
855                         (unsigned long)(&__init_begin),
856                         (unsigned long)(&__init_end));
857 }
858
859 #ifdef CONFIG_BLK_DEV_INITRD
860 void free_initrd_mem(unsigned long start, unsigned long end)
861 {
862         free_init_pages("initrd memory", start, end);
863 }
864 #endif
865