2 * linux/arch/i386/mm/pgtable.c
5 #include <linux/config.h>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
10 #include <linux/swap.h>
11 #include <linux/smp.h>
12 #include <linux/highmem.h>
13 #include <linux/slab.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
18 #include <asm/system.h>
19 #include <asm/pgtable.h>
20 #include <asm/pgalloc.h>
21 #include <asm/fixmap.h>
24 #include <asm/tlbflush.h>
25 #include <asm/atomic_kmap.h>
29 int total = 0, reserved = 0;
30 int shared = 0, cached = 0;
36 printk("Mem-info:\n");
38 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
39 for_each_pgdat(pgdat) {
40 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
41 page = pgdat->node_mem_map + i;
43 if (PageHighMem(page))
45 if (PageReserved(page))
47 else if (PageSwapCache(page))
49 else if (page_count(page))
50 shared += page_count(page) - 1;
53 printk("%d pages of RAM\n", total);
54 printk("%d pages of HIGHMEM\n",highmem);
55 printk("%d reserved pages\n",reserved);
56 printk("%d pages shared\n",shared);
57 printk("%d pages swap cached\n",cached);
60 EXPORT_SYMBOL_GPL(show_mem);
63 * Associate a virtual page frame with a given physical page frame
64 * and protection flags for that frame.
66 static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
72 pgd = swapper_pg_dir + pgd_index(vaddr);
77 pmd = pmd_offset(pgd, vaddr);
82 pte = pte_offset_kernel(pmd, vaddr);
83 /* <pfn,flags> stored as-is, to permit clearing entries */
84 set_pte(pte, pfn_pte(pfn, flags));
87 * It's enough to flush this one mapping.
88 * (PGE mappings get flushed as well)
90 __flush_tlb_one(vaddr);
94 * Associate a large virtual page frame with a given physical page frame
95 * and protection flags for that frame. pfn is for the base of the page,
96 * vaddr is what the page gets mapped to - both must be properly aligned.
97 * The pmd must already be instantiated. Assumes PAE mode.
99 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
104 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
105 printk ("set_pmd_pfn: vaddr misaligned\n");
108 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
109 printk ("set_pmd_pfn: pfn misaligned\n");
112 pgd = swapper_pg_dir + pgd_index(vaddr);
113 if (pgd_none(*pgd)) {
114 printk ("set_pmd_pfn: pgd_none\n");
117 pmd = pmd_offset(pgd, vaddr);
118 set_pmd(pmd, pfn_pmd(pfn, flags));
120 * It's enough to flush this one mapping.
121 * (PGE mappings get flushed as well)
123 __flush_tlb_one(vaddr);
126 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
128 unsigned long address = __fix_to_virt(idx);
130 if (idx >= __end_of_fixed_addresses) {
134 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
137 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
139 pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
145 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
149 #ifdef CONFIG_HIGHPTE
150 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
152 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
159 void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
161 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
164 void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags)
167 kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1],
168 (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE);
169 pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS);
171 memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t));
172 memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t));
176 * List of all pgd's needed so it can invalidate entries in both cached
177 * and uncached pgd's. This is essentially codepath-based locking
178 * against pageattr.c; it is the unique case in which a valid change
179 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
180 * vmalloc faults work because attached pagetables are never freed.
181 * If the locking proves to be non-performant, a ticketing scheme with
182 * checks at dup_mmap(), exec(), and other mmlist addition points
183 * could be used. The locking scheme was chosen on the basis of
184 * manfred's recommendations and having no core impact whatsoever.
186 * Lexicon for #ifdefless conditions to config options:
187 * (a) PTRS_PER_PMD == 1 means non-PAE.
188 * (b) PTRS_PER_PMD > 1 means PAE.
189 * (c) TASK_SIZE > PAGE_OFFSET means 4:4.
190 * (d) TASK_SIZE <= PAGE_OFFSET means non-4:4.
193 spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED;
194 struct page *pgd_list;
196 static inline void pgd_list_add(pgd_t *pgd)
198 struct page *page = virt_to_page(pgd);
199 page->index = (unsigned long)pgd_list;
201 pgd_list->private = (unsigned long)&page->index;
203 page->private = (unsigned long)&pgd_list;
206 static inline void pgd_list_del(pgd_t *pgd)
208 struct page *next, **pprev, *page = virt_to_page(pgd);
209 next = (struct page *)page->index;
210 pprev = (struct page **)page->private;
213 next->private = (unsigned long)pprev;
216 void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused)
221 if (PTRS_PER_PMD == 1) {
222 if (TASK_SIZE <= PAGE_OFFSET)
223 spin_lock_irqsave(&pgd_lock, flags);
225 memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS],
226 &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS],
227 NR_SHARED_PMDS*sizeof(pgd_t));
230 if (TASK_SIZE <= PAGE_OFFSET)
231 memcpy(&pgd[USER_PTRS_PER_PGD],
232 &swapper_pg_dir[USER_PTRS_PER_PGD],
233 (PTRS_PER_PGD - USER_PTRS_PER_PGD)*sizeof(pgd_t));
235 if (PTRS_PER_PMD > 1)
238 if (TASK_SIZE > PAGE_OFFSET)
239 memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t));
242 spin_unlock_irqrestore(&pgd_lock, flags);
243 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
247 /* Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET */
248 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
250 unsigned long flags; /* can be called from interrupt context */
252 spin_lock_irqsave(&pgd_lock, flags);
254 spin_unlock_irqrestore(&pgd_lock, flags);
257 pgd_t *pgd_alloc(struct mm_struct *mm)
260 pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
262 if (PTRS_PER_PMD == 1 || !pgd)
266 * In the 4G userspace case alias the top 16 MB virtual
267 * memory range into the user mappings as well (these
268 * include the trampoline and CPU data structures).
270 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
273 if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
274 pmd = kmem_cache_alloc(kpmd_cache, GFP_KERNEL);
276 pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
280 set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
286 * we don't have to handle the kpmd_cache here, since it's the
287 * last allocation, and has either nothing to free or when it
288 * succeeds the whole operation succeeds.
290 for (i--; i >= 0; i--)
291 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
292 kmem_cache_free(pgd_cache, pgd);
296 void pgd_free(pgd_t *pgd)
300 /* in the non-PAE case, clear_page_tables() clears user pgd entries */
301 if (PTRS_PER_PMD == 1)
304 /* in the PAE case user pgd entries are overwritten before usage */
305 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
306 pmd_t *pmd = __va(pgd_val(pgd[i]) - 1);
309 * only userspace pmd's are cleared for us
310 * by mm/memory.c; it's a slab cache invariant
311 * that we must separate the kernel pmd slab
312 * all times, else we'll have bad pmd's.
314 if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
315 kmem_cache_free(kpmd_cache, pmd);
317 kmem_cache_free(pmd_cache, pmd);
320 kmem_cache_free(pgd_cache, pgd);