Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / arch / i386 / mm / pgtable-xen.c
1 /*
2  *  linux/arch/i386/mm/pgtable.c
3  */
4
5 #include <linux/sched.h>
6 #include <linux/kernel.h>
7 #include <linux/errno.h>
8 #include <linux/mm.h>
9 #include <linux/swap.h>
10 #include <linux/smp.h>
11 #include <linux/highmem.h>
12 #include <linux/slab.h>
13 #include <linux/pagemap.h>
14 #include <linux/spinlock.h>
15 #include <linux/module.h>
16
17 #include <asm/system.h>
18 #include <asm/pgtable.h>
19 #include <asm/pgalloc.h>
20 #include <asm/fixmap.h>
21 #include <asm/e820.h>
22 #include <asm/tlb.h>
23 #include <asm/tlbflush.h>
24 #include <asm/io.h>
25 #include <asm/mmu_context.h>
26
27 #include <xen/features.h>
28 #include <xen/foreign_page.h>
29 #include <asm/hypervisor.h>
30
31 static void pgd_test_and_unpin(pgd_t *pgd);
32
33 void show_mem(void)
34 {
35         int total = 0, reserved = 0;
36         int shared = 0, cached = 0;
37         int highmem = 0;
38         struct page *page;
39         pg_data_t *pgdat;
40         unsigned long i;
41         unsigned long flags;
42
43         printk(KERN_INFO "Mem-info:\n");
44         show_free_areas();
45         printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
46         for_each_online_pgdat(pgdat) {
47                 pgdat_resize_lock(pgdat, &flags);
48                 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
49                         page = pgdat_page_nr(pgdat, i);
50                         total++;
51                         if (PageHighMem(page))
52                                 highmem++;
53                         if (PageReserved(page))
54                                 reserved++;
55                         else if (PageSwapCache(page))
56                                 cached++;
57                         else if (page_count(page))
58                                 shared += page_count(page) - 1;
59                 }
60                 pgdat_resize_unlock(pgdat, &flags);
61         }
62         printk(KERN_INFO "%d pages of RAM\n", total);
63         printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
64         printk(KERN_INFO "%d reserved pages\n", reserved);
65         printk(KERN_INFO "%d pages shared\n", shared);
66         printk(KERN_INFO "%d pages swap cached\n", cached);
67
68         printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
69         printk(KERN_INFO "%lu pages writeback\n",
70                                         global_page_state(NR_WRITEBACK));
71         printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
72         printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
73         printk(KERN_INFO "%lu pages pagetables\n",
74                                         global_page_state(NR_PAGETABLE));
75 }
76
77 /*
78  * Associate a virtual page frame with a given physical page frame 
79  * and protection flags for that frame.
80  */ 
81 static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
82 {
83         pgd_t *pgd;
84         pud_t *pud;
85         pmd_t *pmd;
86         pte_t *pte;
87
88         pgd = swapper_pg_dir + pgd_index(vaddr);
89         if (pgd_none(*pgd)) {
90                 BUG();
91                 return;
92         }
93         pud = pud_offset(pgd, vaddr);
94         if (pud_none(*pud)) {
95                 BUG();
96                 return;
97         }
98         pmd = pmd_offset(pud, vaddr);
99         if (pmd_none(*pmd)) {
100                 BUG();
101                 return;
102         }
103         pte = pte_offset_kernel(pmd, vaddr);
104         /* <pfn,flags> stored as-is, to permit clearing entries */
105         set_pte(pte, pfn_pte(pfn, flags));
106
107         /*
108          * It's enough to flush this one mapping.
109          * (PGE mappings get flushed as well)
110          */
111         __flush_tlb_one(vaddr);
112 }
113
114 /*
115  * Associate a virtual page frame with a given physical page frame 
116  * and protection flags for that frame.
117  */ 
118 static void set_pte_pfn_ma(unsigned long vaddr, unsigned long pfn,
119                            pgprot_t flags)
120 {
121         pgd_t *pgd;
122         pud_t *pud;
123         pmd_t *pmd;
124         pte_t *pte;
125
126         pgd = swapper_pg_dir + pgd_index(vaddr);
127         if (pgd_none(*pgd)) {
128                 BUG();
129                 return;
130         }
131         pud = pud_offset(pgd, vaddr);
132         if (pud_none(*pud)) {
133                 BUG();
134                 return;
135         }
136         pmd = pmd_offset(pud, vaddr);
137         if (pmd_none(*pmd)) {
138                 BUG();
139                 return;
140         }
141         pte = pte_offset_kernel(pmd, vaddr);
142         /* <pfn,flags> stored as-is, to permit clearing entries */
143         set_pte(pte, pfn_pte_ma(pfn, flags));
144
145         /*
146          * It's enough to flush this one mapping.
147          * (PGE mappings get flushed as well)
148          */
149         __flush_tlb_one(vaddr);
150 }
151
152 /*
153  * Associate a large virtual page frame with a given physical page frame 
154  * and protection flags for that frame. pfn is for the base of the page,
155  * vaddr is what the page gets mapped to - both must be properly aligned. 
156  * The pmd must already be instantiated. Assumes PAE mode.
157  */ 
158 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
159 {
160         pgd_t *pgd;
161         pud_t *pud;
162         pmd_t *pmd;
163
164         if (vaddr & (PMD_SIZE-1)) {             /* vaddr is misaligned */
165                 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
166                 return; /* BUG(); */
167         }
168         if (pfn & (PTRS_PER_PTE-1)) {           /* pfn is misaligned */
169                 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
170                 return; /* BUG(); */
171         }
172         pgd = swapper_pg_dir + pgd_index(vaddr);
173         if (pgd_none(*pgd)) {
174                 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
175                 return; /* BUG(); */
176         }
177         pud = pud_offset(pgd, vaddr);
178         pmd = pmd_offset(pud, vaddr);
179         set_pmd(pmd, pfn_pmd(pfn, flags));
180         /*
181          * It's enough to flush this one mapping.
182          * (PGE mappings get flushed as well)
183          */
184         __flush_tlb_one(vaddr);
185 }
186
187 static int nr_fixmaps = 0;
188 unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
189 EXPORT_SYMBOL(__FIXADDR_TOP);
190
191 void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
192 {
193         unsigned long address = __fix_to_virt(idx);
194
195         if (idx >= __end_of_fixed_addresses) {
196                 BUG();
197                 return;
198         }
199         switch (idx) {
200         case FIX_WP_TEST:
201 #ifdef CONFIG_X86_F00F_BUG
202         case FIX_F00F_IDT:
203 #endif
204                 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
205                 break;
206         default:
207                 set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
208                 break;
209         }
210         nr_fixmaps++;
211 }
212
213 void set_fixaddr_top(unsigned long top)
214 {
215         BUG_ON(nr_fixmaps > 0);
216         __FIXADDR_TOP = top - PAGE_SIZE;
217 }
218
219 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
220 {
221         pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
222         if (pte)
223                 make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
224         return pte;
225 }
226
227 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
228 {
229         struct page *pte;
230
231 #ifdef CONFIG_HIGHPTE
232         pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
233 #else
234         pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
235         if (pte) {
236                 SetPageForeign(pte, pte_free);
237                 init_page_count(pte);
238         }
239 #endif
240         return pte;
241 }
242
243 void pte_free(struct page *pte)
244 {
245         unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
246
247         if (!pte_write(*virt_to_ptep(va)))
248                 BUG_ON(HYPERVISOR_update_va_mapping(
249                         va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
250
251         ClearPageForeign(pte);
252         init_page_count(pte);
253
254         __free_page(pte);
255 }
256
257 void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
258 {
259         memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
260 }
261
262 /*
263  * List of all pgd's needed for non-PAE so it can invalidate entries
264  * in both cached and uncached pgd's; not needed for PAE since the
265  * kernel pmd is shared. If PAE were not to share the pmd a similar
266  * tactic would be needed. This is essentially codepath-based locking
267  * against pageattr.c; it is the unique case in which a valid change
268  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
269  * vmalloc faults work because attached pagetables are never freed.
270  * The locking scheme was chosen on the basis of manfred's
271  * recommendations and having no core impact whatsoever.
272  * -- wli
273  */
274 DEFINE_SPINLOCK(pgd_lock);
275 struct page *pgd_list;
276
277 static inline void pgd_list_add(pgd_t *pgd)
278 {
279         struct page *page = virt_to_page(pgd);
280         page->index = (unsigned long)pgd_list;
281         if (pgd_list)
282                 set_page_private(pgd_list, (unsigned long)&page->index);
283         pgd_list = page;
284         set_page_private(page, (unsigned long)&pgd_list);
285 }
286
287 static inline void pgd_list_del(pgd_t *pgd)
288 {
289         struct page *next, **pprev, *page = virt_to_page(pgd);
290         next = (struct page *)page->index;
291         pprev = (struct page **)page_private(page);
292         *pprev = next;
293         if (next)
294                 set_page_private(next, (unsigned long)pprev);
295 }
296
297 void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
298 {
299         unsigned long flags;
300
301         if (PTRS_PER_PMD > 1) {
302                 if (HAVE_SHARED_KERNEL_PMD)
303                         clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
304                                         swapper_pg_dir + USER_PTRS_PER_PGD,
305                                         KERNEL_PGD_PTRS);
306         } else {
307                 spin_lock_irqsave(&pgd_lock, flags);
308                 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
309                                 swapper_pg_dir + USER_PTRS_PER_PGD,
310                                 KERNEL_PGD_PTRS);
311                 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
312                 pgd_list_add(pgd);
313                 spin_unlock_irqrestore(&pgd_lock, flags);
314         }
315 }
316
317 /* never called when PTRS_PER_PMD > 1 */
318 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
319 {
320         unsigned long flags; /* can be called from interrupt context */
321
322         spin_lock_irqsave(&pgd_lock, flags);
323         pgd_list_del(pgd);
324         spin_unlock_irqrestore(&pgd_lock, flags);
325
326         pgd_test_and_unpin(pgd);
327 }
328
329 pgd_t *pgd_alloc(struct mm_struct *mm)
330 {
331         int i;
332         pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
333         pmd_t **pmd;
334         unsigned long flags;
335
336         pgd_test_and_unpin(pgd);
337
338         if (PTRS_PER_PMD == 1 || !pgd)
339                 return pgd;
340
341         if (HAVE_SHARED_KERNEL_PMD) {
342                 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
343                         pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
344                         if (!pmd)
345                                 goto out_oom;
346                         set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
347                 }
348                 return pgd;
349         }
350
351         /*
352          * We can race save/restore (if we sleep during a GFP_KERNEL memory
353          * allocation). We therefore store virtual addresses of pmds as they
354          * do not change across save/restore, and poke the machine addresses
355          * into the pgdir under the pgd_lock.
356          */
357         pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
358         if (!pmd) {
359                 kmem_cache_free(pgd_cache, pgd);
360                 return NULL;
361         }
362
363         /* Allocate pmds, remember virtual addresses. */
364         for (i = 0; i < PTRS_PER_PGD; ++i) {
365                 pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
366                 if (!pmd[i])
367                         goto out_oom;
368         }
369
370         spin_lock_irqsave(&pgd_lock, flags);
371
372         /* Protect against save/restore: move below 4GB under pgd_lock. */
373         if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
374                 int rc = xen_create_contiguous_region(
375                         (unsigned long)pgd, 0, 32);
376                 if (rc) {
377                         spin_unlock_irqrestore(&pgd_lock, flags);
378                         goto out_oom;
379                 }
380         }
381
382         /* Copy kernel pmd contents and write-protect the new pmds. */
383         for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
384                 unsigned long v = (unsigned long)i << PGDIR_SHIFT;
385                 pgd_t *kpgd = pgd_offset_k(v);
386                 pud_t *kpud = pud_offset(kpgd, v);
387                 pmd_t *kpmd = pmd_offset(kpud, v);
388                 memcpy(pmd[i], kpmd, PAGE_SIZE);
389                 make_lowmem_page_readonly(
390                         pmd[i], XENFEAT_writable_page_tables);
391         }
392
393         /* It is safe to poke machine addresses of pmds under the pmd_lock. */
394         for (i = 0; i < PTRS_PER_PGD; i++)
395                 set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
396
397         /* Ensure this pgd gets picked up and pinned on save/restore. */
398         pgd_list_add(pgd);
399
400         spin_unlock_irqrestore(&pgd_lock, flags);
401
402         kfree(pmd);
403
404         return pgd;
405
406 out_oom:
407         if (HAVE_SHARED_KERNEL_PMD) {
408                 for (i--; i >= 0; i--)
409                         kmem_cache_free(pmd_cache,
410                                         (void *)__va(pgd_val(pgd[i])-1));
411         } else {
412                 for (i--; i >= 0; i--)
413                         kmem_cache_free(pmd_cache, pmd[i]);
414                 kfree(pmd);
415         }
416         kmem_cache_free(pgd_cache, pgd);
417         return NULL;
418 }
419
420 void pgd_free(pgd_t *pgd)
421 {
422         int i;
423
424         /*
425          * After this the pgd should not be pinned for the duration of this
426          * function's execution. We should never sleep and thus never race:
427          *  1. User pmds will not become write-protected under our feet due
428          *     to a concurrent mm_pin_all().
429          *  2. The machine addresses in PGD entries will not become invalid
430          *     due to a concurrent save/restore.
431          */
432         pgd_test_and_unpin(pgd);
433
434         /* in the PAE case user pgd entries are overwritten before usage */
435         if (PTRS_PER_PMD > 1) {
436                 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
437                         pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
438                         kmem_cache_free(pmd_cache, pmd);
439                 }
440
441                 if (!HAVE_SHARED_KERNEL_PMD) {
442                         unsigned long flags;
443                         spin_lock_irqsave(&pgd_lock, flags);
444                         pgd_list_del(pgd);
445                         spin_unlock_irqrestore(&pgd_lock, flags);
446
447                         for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
448                                 pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
449                                 make_lowmem_page_writable(
450                                         pmd, XENFEAT_writable_page_tables);
451                                 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
452                                 kmem_cache_free(pmd_cache, pmd);
453                         }
454
455                         if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
456                                 xen_destroy_contiguous_region(
457                                         (unsigned long)pgd, 0);
458                 }
459         }
460
461         /* in the non-PAE case, free_pgtables() clears user pgd entries */
462         kmem_cache_free(pgd_cache, pgd);
463 }
464
465 void make_lowmem_page_readonly(void *va, unsigned int feature)
466 {
467         pte_t *pte;
468         int rc;
469
470         if (xen_feature(feature))
471                 return;
472
473         pte = virt_to_ptep(va);
474         rc = HYPERVISOR_update_va_mapping(
475                 (unsigned long)va, pte_wrprotect(*pte), 0);
476         BUG_ON(rc);
477 }
478
479 void make_lowmem_page_writable(void *va, unsigned int feature)
480 {
481         pte_t *pte;
482         int rc;
483
484         if (xen_feature(feature))
485                 return;
486
487         pte = virt_to_ptep(va);
488         rc = HYPERVISOR_update_va_mapping(
489                 (unsigned long)va, pte_mkwrite(*pte), 0);
490         BUG_ON(rc);
491 }
492
493 void make_page_readonly(void *va, unsigned int feature)
494 {
495         pte_t *pte;
496         int rc;
497
498         if (xen_feature(feature))
499                 return;
500
501         pte = virt_to_ptep(va);
502         rc = HYPERVISOR_update_va_mapping(
503                 (unsigned long)va, pte_wrprotect(*pte), 0);
504         if (rc) /* fallback? */
505                 xen_l1_entry_update(pte, pte_wrprotect(*pte));
506         if ((unsigned long)va >= (unsigned long)high_memory) {
507                 unsigned long pfn = pte_pfn(*pte);
508 #ifdef CONFIG_HIGHMEM
509                 if (pfn >= highstart_pfn)
510                         kmap_flush_unused(); /* flush stale writable kmaps */
511                 else
512 #endif
513                         make_lowmem_page_readonly(
514                                 phys_to_virt(pfn << PAGE_SHIFT), feature); 
515         }
516 }
517
518 void make_page_writable(void *va, unsigned int feature)
519 {
520         pte_t *pte;
521         int rc;
522
523         if (xen_feature(feature))
524                 return;
525
526         pte = virt_to_ptep(va);
527         rc = HYPERVISOR_update_va_mapping(
528                 (unsigned long)va, pte_mkwrite(*pte), 0);
529         if (rc) /* fallback? */
530                 xen_l1_entry_update(pte, pte_mkwrite(*pte));
531         if ((unsigned long)va >= (unsigned long)high_memory) {
532                 unsigned long pfn = pte_pfn(*pte); 
533 #ifdef CONFIG_HIGHMEM
534                 if (pfn < highstart_pfn)
535 #endif
536                         make_lowmem_page_writable(
537                                 phys_to_virt(pfn << PAGE_SHIFT), feature);
538         }
539 }
540
541 void make_pages_readonly(void *va, unsigned int nr, unsigned int feature)
542 {
543         if (xen_feature(feature))
544                 return;
545
546         while (nr-- != 0) {
547                 make_page_readonly(va, feature);
548                 va = (void *)((unsigned long)va + PAGE_SIZE);
549         }
550 }
551
552 void make_pages_writable(void *va, unsigned int nr, unsigned int feature)
553 {
554         if (xen_feature(feature))
555                 return;
556
557         while (nr-- != 0) {
558                 make_page_writable(va, feature);
559                 va = (void *)((unsigned long)va + PAGE_SIZE);
560         }
561 }
562
563 static inline void pgd_walk_set_prot(void *pt, pgprot_t flags)
564 {
565         struct page *page = virt_to_page(pt);
566         unsigned long pfn = page_to_pfn(page);
567
568         if (PageHighMem(page))
569                 return;
570         BUG_ON(HYPERVISOR_update_va_mapping(
571                 (unsigned long)__va(pfn << PAGE_SHIFT),
572                 pfn_pte(pfn, flags), 0));
573 }
574
575 static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
576 {
577         pgd_t *pgd = pgd_base;
578         pud_t *pud;
579         pmd_t *pmd;
580         pte_t *pte;
581         int    g, u, m;
582
583         if (xen_feature(XENFEAT_auto_translated_physmap))
584                 return;
585
586         for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
587                 if (pgd_none(*pgd))
588                         continue;
589                 pud = pud_offset(pgd, 0);
590                 if (PTRS_PER_PUD > 1) /* not folded */
591                         pgd_walk_set_prot(pud,flags);
592                 for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
593                         if (pud_none(*pud))
594                                 continue;
595                         pmd = pmd_offset(pud, 0);
596                         if (PTRS_PER_PMD > 1) /* not folded */
597                                 pgd_walk_set_prot(pmd,flags);
598                         for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
599                                 if (pmd_none(*pmd))
600                                         continue;
601                                 pte = pte_offset_kernel(pmd,0);
602                                 pgd_walk_set_prot(pte,flags);
603                         }
604                 }
605         }
606
607         BUG_ON(HYPERVISOR_update_va_mapping(
608                 (unsigned long)pgd_base,
609                 pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
610                 UVMF_TLB_FLUSH));
611 }
612
613 static void __pgd_pin(pgd_t *pgd)
614 {
615         pgd_walk(pgd, PAGE_KERNEL_RO);
616         xen_pgd_pin(__pa(pgd));
617         set_bit(PG_pinned, &virt_to_page(pgd)->flags);
618 }
619
620 static void __pgd_unpin(pgd_t *pgd)
621 {
622         xen_pgd_unpin(__pa(pgd));
623         pgd_walk(pgd, PAGE_KERNEL);
624         clear_bit(PG_pinned, &virt_to_page(pgd)->flags);
625 }
626
627 static void pgd_test_and_unpin(pgd_t *pgd)
628 {
629         if (test_bit(PG_pinned, &virt_to_page(pgd)->flags))
630                 __pgd_unpin(pgd);
631 }
632
633 void mm_pin(struct mm_struct *mm)
634 {
635         if (xen_feature(XENFEAT_writable_page_tables))
636                 return;
637         spin_lock(&mm->page_table_lock);
638         __pgd_pin(mm->pgd);
639         spin_unlock(&mm->page_table_lock);
640 }
641
642 void mm_unpin(struct mm_struct *mm)
643 {
644         if (xen_feature(XENFEAT_writable_page_tables))
645                 return;
646         spin_lock(&mm->page_table_lock);
647         __pgd_unpin(mm->pgd);
648         spin_unlock(&mm->page_table_lock);
649 }
650
651 void mm_pin_all(void)
652 {
653         struct page *page;
654
655         /* Only pgds on the pgd_list please: none hidden in the slab cache. */
656         kmem_cache_shrink(pgd_cache);
657
658         if (xen_feature(XENFEAT_writable_page_tables))
659                 return;
660
661         for (page = pgd_list; page; page = (struct page *)page->index) {
662                 if (!test_bit(PG_pinned, &page->flags))
663                         __pgd_pin((pgd_t *)page_address(page));
664         }
665 }
666
667 void _arch_dup_mmap(struct mm_struct *mm)
668 {
669         if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
670                 mm_pin(mm);
671 }
672
673 void _arch_exit_mmap(struct mm_struct *mm)
674 {
675         struct task_struct *tsk = current;
676
677         task_lock(tsk);
678
679         /*
680          * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
681          * *much* faster this way, as no tlb flushes means bigger wrpt batches.
682          */
683         if (tsk->active_mm == mm) {
684                 tsk->active_mm = &init_mm;
685                 atomic_inc(&init_mm.mm_count);
686
687                 switch_mm(mm, &init_mm, tsk);
688
689                 atomic_dec(&mm->mm_count);
690                 BUG_ON(atomic_read(&mm->mm_count) == 0);
691         }
692
693         task_unlock(tsk);
694
695         if (test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags) &&
696             (atomic_read(&mm->mm_count) == 1) &&
697             !mm->context.has_foreign_mappings)
698                 mm_unpin(mm);
699 }