#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
+#include <linux/module.h>
#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/e820.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/atomic_kmap.h>
void show_mem(void)
{
printk("%d pages swap cached\n",cached);
}
+EXPORT_SYMBOL_GPL(show_mem);
+
/*
* Associate a virtual page frame with a given physical page frame
* and protection flags for that frame.
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
}
+void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags)
+{
+ pmd_t *kpmd, *pmd;
+ kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1],
+ (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE);
+ pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS);
+
+ memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t));
+ memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t));
+}
+
/*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
+ * List of all pgd's needed so it can invalidate entries in both cached
+ * and uncached pgd's. This is essentially codepath-based locking
* against pageattr.c; it is the unique case in which a valid change
* of kernel pagetables can't be lazily synchronized by vmalloc faults.
* vmalloc faults work because attached pagetables are never freed.
* checks at dup_mmap(), exec(), and other mmlist addition points
* could be used. The locking scheme was chosen on the basis of
* manfred's recommendations and having no core impact whatsoever.
+ *
+ * Lexicon for #ifdefless conditions to config options:
+ * (a) PTRS_PER_PMD == 1 means non-PAE.
+ * (b) PTRS_PER_PMD > 1 means PAE.
+ * (c) TASK_SIZE > PAGE_OFFSET means 4:4.
+ * (d) TASK_SIZE <= PAGE_OFFSET means non-4:4.
* -- wli
*/
spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED;
next->private = (unsigned long)pprev;
}
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused)
{
+ pgd_t *pgd = __pgd;
unsigned long flags;
- if (PTRS_PER_PMD == 1)
- spin_lock_irqsave(&pgd_lock, flags);
+ if (PTRS_PER_PMD == 1) {
+ if (TASK_SIZE <= PAGE_OFFSET)
+ spin_lock_irqsave(&pgd_lock, flags);
+ else
+ memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS],
+ &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS],
+ NR_SHARED_PMDS*sizeof(pgd_t));
+ }
- memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+ if (TASK_SIZE <= PAGE_OFFSET)
+ memcpy(&pgd[USER_PTRS_PER_PGD],
+ &swapper_pg_dir[USER_PTRS_PER_PGD],
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD)*sizeof(pgd_t));
if (PTRS_PER_PMD > 1)
return;
- pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+ if (TASK_SIZE > PAGE_OFFSET)
+ memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t));
+ else {
+ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+ }
}
-/* never called when PTRS_PER_PMD > 1 */
+/* Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET */
void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
if (PTRS_PER_PMD == 1 || !pgd)
return pgd;
+ /*
+ * In the 4G userspace case alias the top 16 MB virtual
+ * memory range into the user mappings as well (these
+ * include the trampoline and CPU data structures).
+ */
for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+ pmd_t *pmd;
+
+ if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
+ pmd = kmem_cache_alloc(kpmd_cache, GFP_KERNEL);
+ else
+ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+
if (!pmd)
goto out_oom;
set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
}
- return pgd;
+ return pgd;
out_oom:
+ /*
+ * we don't have to handle the kpmd_cache here, since it's the
+ * last allocation, and has either nothing to free or when it
+ * succeeds the whole operation succeeds.
+ */
for (i--; i >= 0; i--)
kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
kmem_cache_free(pgd_cache, pgd);
{
int i;
- /* in the PAE case user pgd entries are overwritten before usage */
- if (PTRS_PER_PMD > 1)
- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
/* in the non-PAE case, clear_page_tables() clears user pgd entries */
+ if (PTRS_PER_PMD == 1)
+ goto out_free;
+
+ /* in the PAE case user pgd entries are overwritten before usage */
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ pmd_t *pmd = __va(pgd_val(pgd[i]) - 1);
+
+ /*
+ * only userspace pmd's are cleared for us
+ * by mm/memory.c; it's a slab cache invariant
+ * that we must separate the kernel pmd slab
+ * all times, else we'll have bad pmd's.
+ */
+ if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
+ kmem_cache_free(kpmd_cache, pmd);
+ else
+ kmem_cache_free(pmd_cache, pmd);
+ }
+out_free:
kmem_cache_free(pgd_cache, pgd);
}
+