2 * linux/arch/i386/mm/pgtable.c
5 #include <linux/config.h>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
10 #include <linux/swap.h>
11 #include <linux/smp.h>
12 #include <linux/highmem.h>
13 #include <linux/slab.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
17 #include <asm/system.h>
18 #include <asm/pgtable.h>
19 #include <asm/pgalloc.h>
20 #include <asm/fixmap.h>
23 #include <asm/tlbflush.h>
24 #include <asm/atomic_kmap.h>
28 int total = 0, reserved = 0;
29 int shared = 0, cached = 0;
35 printk("Mem-info:\n");
37 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
38 for_each_pgdat(pgdat) {
39 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
40 page = pgdat->node_mem_map + i;
42 if (PageHighMem(page))
44 if (PageReserved(page))
46 else if (PageSwapCache(page))
48 else if (page_count(page))
49 shared += page_count(page) - 1;
52 printk("%d pages of RAM\n", total);
53 printk("%d pages of HIGHMEM\n",highmem);
54 printk("%d reserved pages\n",reserved);
55 printk("%d pages shared\n",shared);
56 printk("%d pages swap cached\n",cached);
60 * Associate a virtual page frame with a given physical page frame
61 * and protection flags for that frame.
63 static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
69 pgd = swapper_pg_dir + pgd_index(vaddr);
74 pmd = pmd_offset(pgd, vaddr);
79 pte = pte_offset_kernel(pmd, vaddr);
80 /* <pfn,flags> stored as-is, to permit clearing entries */
81 set_pte(pte, pfn_pte(pfn, flags));
84 * It's enough to flush this one mapping.
85 * (PGE mappings get flushed as well)
87 __flush_tlb_one(vaddr);
91 * Associate a large virtual page frame with a given physical page frame
92 * and protection flags for that frame. pfn is for the base of the page,
93 * vaddr is what the page gets mapped to - both must be properly aligned.
94 * The pmd must already be instantiated. Assumes PAE mode.
96 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
101 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
102 printk ("set_pmd_pfn: vaddr misaligned\n");
105 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
106 printk ("set_pmd_pfn: pfn misaligned\n");
109 pgd = swapper_pg_dir + pgd_index(vaddr);
110 if (pgd_none(*pgd)) {
111 printk ("set_pmd_pfn: pgd_none\n");
114 pmd = pmd_offset(pgd, vaddr);
115 set_pmd(pmd, pfn_pmd(pfn, flags));
117 * It's enough to flush this one mapping.
118 * (PGE mappings get flushed as well)
120 __flush_tlb_one(vaddr);
123 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
125 unsigned long address = __fix_to_virt(idx);
127 if (idx >= __end_of_fixed_addresses) {
131 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
134 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
136 pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
142 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
146 #ifdef CONFIG_HIGHPTE
147 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
149 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
156 void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
158 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
161 void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags)
164 kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1],
165 (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE);
166 pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS);
168 memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t));
169 memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t));
173 * List of all pgd's needed so it can invalidate entries in both cached
174 * and uncached pgd's. This is essentially codepath-based locking
175 * against pageattr.c; it is the unique case in which a valid change
176 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
177 * vmalloc faults work because attached pagetables are never freed.
178 * If the locking proves to be non-performant, a ticketing scheme with
179 * checks at dup_mmap(), exec(), and other mmlist addition points
180 * could be used. The locking scheme was chosen on the basis of
181 * manfred's recommendations and having no core impact whatsoever.
183 * Lexicon for #ifdefless conditions to config options:
184 * (a) PTRS_PER_PMD == 1 means non-PAE.
185 * (b) PTRS_PER_PMD > 1 means PAE.
186 * (c) TASK_SIZE > PAGE_OFFSET means 4:4.
187 * (d) TASK_SIZE <= PAGE_OFFSET means non-4:4.
190 spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED;
191 struct page *pgd_list;
193 static inline void pgd_list_add(pgd_t *pgd)
195 struct page *page = virt_to_page(pgd);
196 page->index = (unsigned long)pgd_list;
198 pgd_list->private = (unsigned long)&page->index;
200 page->private = (unsigned long)&pgd_list;
203 static inline void pgd_list_del(pgd_t *pgd)
205 struct page *next, **pprev, *page = virt_to_page(pgd);
206 next = (struct page *)page->index;
207 pprev = (struct page **)page->private;
210 next->private = (unsigned long)pprev;
213 void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused)
218 if (PTRS_PER_PMD == 1) {
219 if (TASK_SIZE <= PAGE_OFFSET)
220 spin_lock_irqsave(&pgd_lock, flags);
222 memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS],
223 &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS],
224 NR_SHARED_PMDS*sizeof(pgd_t));
227 if (TASK_SIZE <= PAGE_OFFSET)
228 memcpy(&pgd[USER_PTRS_PER_PGD],
229 &swapper_pg_dir[USER_PTRS_PER_PGD],
230 (PTRS_PER_PGD - USER_PTRS_PER_PGD)*sizeof(pgd_t));
232 if (PTRS_PER_PMD > 1)
235 if (TASK_SIZE > PAGE_OFFSET)
236 memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t));
239 spin_unlock_irqrestore(&pgd_lock, flags);
240 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
244 /* Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET */
245 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
247 unsigned long flags; /* can be called from interrupt context */
249 spin_lock_irqsave(&pgd_lock, flags);
251 spin_unlock_irqrestore(&pgd_lock, flags);
254 pgd_t *pgd_alloc(struct mm_struct *mm)
257 pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
259 if (PTRS_PER_PMD == 1 || !pgd)
263 * In the 4G userspace case alias the top 16 MB virtual
264 * memory range into the user mappings as well (these
265 * include the trampoline and CPU data structures).
267 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
270 if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
271 pmd = kmem_cache_alloc(kpmd_cache, GFP_KERNEL);
273 pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
277 set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
283 * we don't have to handle the kpmd_cache here, since it's the
284 * last allocation, and has either nothing to free or when it
285 * succeeds the whole operation succeeds.
287 for (i--; i >= 0; i--)
288 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
289 kmem_cache_free(pgd_cache, pgd);
293 void pgd_free(pgd_t *pgd)
297 /* in the non-PAE case, clear_page_tables() clears user pgd entries */
298 if (PTRS_PER_PMD == 1)
301 /* in the PAE case user pgd entries are overwritten before usage */
302 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
303 pmd_t *pmd = __va(pgd_val(pgd[i]) - 1);
306 * only userspace pmd's are cleared for us
307 * by mm/memory.c; it's a slab cache invariant
308 * that we must separate the kernel pmd slab
309 * all times, else we'll have bad pmd's.
311 if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
312 kmem_cache_free(kpmd_cache, pmd);
314 kmem_cache_free(pmd_cache, pmd);
317 kmem_cache_free(pgd_cache, pgd);