#include <asm/processor.h>
#include <asm/fixmap.h>
#include <linux/threads.h>
-#include <linux/slab.h>
#ifndef _I386_BITOPS_H
#include <asm/bitops.h>
#endif
-extern pgd_t swapper_pg_dir[1024];
-extern kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache;
-extern spinlock_t pgd_lock;
-extern struct page *pgd_list;
-void pmd_ctor(void *, kmem_cache_t *, unsigned long);
-void kpmd_ctor(void *, kmem_cache_t *, unsigned long);
-void pgd_ctor(void *, kmem_cache_t *, unsigned long);
-void pgd_dtor(void *, kmem_cache_t *, unsigned long);
-void pgtable_cache_init(void);
-extern void paging_init(void);
-void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end);
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct mm_struct;
+struct vm_area_struct;
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
*/
-extern unsigned long empty_zero_page[1024];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+extern unsigned long empty_zero_page[1024];
+extern pgd_t swapper_pg_dir[1024];
+extern kmem_cache_t *pgd_cache;
+extern kmem_cache_t *pmd_cache;
+extern spinlock_t pgd_lock;
+extern struct page *pgd_list;
-#endif /* !__ASSEMBLY__ */
+void pmd_ctor(void *, kmem_cache_t *, unsigned long);
+void pgd_ctor(void *, kmem_cache_t *, unsigned long);
+void pgd_dtor(void *, kmem_cache_t *, unsigned long);
+void pgtable_cache_init(void);
+void paging_init(void);
/*
* The Linux x86 paging architecture is 'compile-time dual-mode', it
* implements both the traditional 2-level x86 page tables and the
* newer 3-level PAE-mode page tables.
*/
-#ifndef __ASSEMBLY__
-
-extern void set_system_gate(unsigned int n, void *addr);
-extern void init_entry_mappings(void);
-extern void entry_trampoline_setup(void);
-
#ifdef CONFIG_X86_PAE
-# include <asm/pgtable-3level.h>
+# include <asm/pgtable-3level-defs.h>
+# define PMD_SIZE (1UL << PMD_SHIFT)
+# define PMD_MASK (~(PMD_SIZE-1))
#else
-# include <asm/pgtable-2level.h>
-#endif
+# include <asm/pgtable-2level-defs.h>
#endif
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#if defined(CONFIG_X86_PAE) && defined(CONFIG_X86_4G_VM_LAYOUT)
-# define USER_PTRS_PER_PGD 4
-#else
-# define USER_PTRS_PER_PGD ((TASK_SIZE/PGDIR_SIZE) + ((TASK_SIZE % PGDIR_SIZE) + PGDIR_SIZE-1)/PGDIR_SIZE)
-#endif
-
-#define FIRST_USER_PGD_NR 0
+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
+#define FIRST_USER_ADDRESS 0
#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
-
-#ifndef __ASSEMBLY__
/* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 8MB value just means that there will be a 8MB "hole" after the
* physical memory until the kernel virtual memory starts. That means that
* area for the same reason. ;)
*/
#define VMALLOC_OFFSET (8*1024*1024)
-#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
- ~(VMALLOC_OFFSET-1))
+#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \
+ 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
#else
#endif
/*
- * The 4MB page is guessing.. Detailed in the infamous "Chapter H"
- * of the Pentium details, but assuming intel did the straightforward
- * thing, this bit set in the page directory entry just means that
+ * _PAGE_PSE set in the page directory entry just means that
* the page directory entry points directly to a 4MB-aligned block of
* memory.
*/
#define _PAGE_UNUSED2 0x400
#define _PAGE_UNUSED3 0x800
-#define _PAGE_FILE 0x040 /* set:pagecache unset:swap */
-#define _PAGE_PROTNONE 0x080 /* If not present */
+/* If _PAGE_PRESENT is clear, we use these: */
+#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */
+#define _PAGE_PROTNONE 0x080 /* if the user mapped it with PROT_NONE;
+ pte_present gives true */
#ifdef CONFIG_X86_PAE
#define _PAGE_NX (1ULL<<_PAGE_BIT_NX)
#else
/*
* Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are
- * done without a 'verify_area(VERIFY_WRITE,..)'
+ * done without a 'access_ok(VERIFY_WRITE,..)'
*/
-#undef TEST_VERIFY_AREA
+#undef TEST_ACCESS_OK
/* The boot page tables (all created as a single array) */
extern unsigned long pg0[];
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0)
-#define pmd_none(x) (!pmd_val(x))
+/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
+#define pmd_none(x) (!(unsigned long)pmd_val(x))
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
+#define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT)
static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
-static inline int pte_exec(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
+static inline int pte_huge(pte_t pte) { return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; }
/*
* The following only works if pte_present() is not true.
static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= __LARGE_PTE; return pte; }
-static inline int ptep_test_and_clear_dirty(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
-static inline int ptep_test_and_clear_young(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); }
-static inline void ptep_set_wrprotect(pte_t *ptep) { clear_bit(_PAGE_BIT_RW, &ptep->pte_low); }
-static inline void ptep_mkdirty(pte_t *ptep) { set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
+#ifdef CONFIG_X86_PAE
+# include <asm/pgtable-3level.h>
+#else
+# include <asm/pgtable-2level.h>
+#endif
+
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (!pte_dirty(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
+}
+
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (!pte_young(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
+}
+
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
+{
+ pte_t pte;
+ if (full) {
+ pte = *ptep;
+ pte_clear(mm, addr, ptep);
+ } else {
+ pte = ptep_get_and_clear(mm, addr, ptep);
+ }
+ return pte;
+}
+
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
+}
+
+/*
+ * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
+ *
+ * dst - pointer to pgd range anwhere on a pgd page
+ * src - ""
+ * count - the number of pgds to copy.
+ *
+ * dst and src can be on the same page, but the range must not overlap,
+ * and must not cross a page boundary.
+ */
+static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+{
+ memcpy(dst, src, count * sizeof(pgd_t));
+}
/*
* Macro to mark a page protection value as "uncacheable". On processors which do not support
*/
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-#define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE)
-#define mk_pte_phys(physpage, pgprot) pfn_pte((physpage) >> PAGE_SHIFT, pgprot)
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
* Chop off the NX bit (if present), and add the NX portion of
* the newprot (if present):
*/
- pte.pte_high &= -1 ^ (1 << (_PAGE_BIT_NX - 32));
+ pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
pte.pte_high |= (pgprot_val(newprot) >> 32) & \
(__supported_pte_mask >> 32);
#endif
return pte;
}
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
-#define pmd_page_kernel(pmd) \
-((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
-#ifndef CONFIG_DISCONTIGMEM
-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-#endif /* !CONFIG_DISCONTIGMEM */
-
#define pmd_large(pmd) \
- ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
+((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
/*
* the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
* control the given virtual address
*/
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pgd_index_k(addr) pgd_index(addr)
/*
* pgd_offset() returns a (pgd_t *)
#define pte_offset_kernel(dir, address) \
((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+
+#define pmd_page_kernel(pmd) \
+ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+/*
+ * Helper function that returns the kernel pagetable entry controlling
+ * the virtual address 'address'. NULL means no pagetable entry present.
+ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
+ * as a pte too.
+ */
+extern pte_t *lookup_address(unsigned long address);
+
+/*
+ * Make a given kernel text page executable/non-executable.
+ * Returns the previous executability setting of that page (which
+ * is used to restore the previous state). Used by the SMP bootup code.
+ * NOTE: this is an __init function for security reasons.
+ */
+#ifdef CONFIG_X86_PAE
+ extern int set_kernel_exec(unsigned long vaddr, int enable);
+#else
+ static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
+#endif
+
+extern void noexec_setup(const char *str);
+
#if defined(CONFIG_HIGHPTE)
#define pte_offset_map(dir, address) \
((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
} \
} while (0)
-/* Encode and de-code a swap entry */
-#define __swp_type(x) (((x).val >> 1) & 0x1f)
-#define __swp_offset(x) ((x).val >> 8)
-#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
-#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-
#endif /* !__ASSEMBLY__ */
-#ifndef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_FLATMEM
#define kern_addr_valid(addr) (1)
-#endif /* !CONFIG_DISCONTIGMEM */
+#endif /* CONFIG_FLATMEM */
-#define io_remap_page_range remap_page_range
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#define MK_IOSPACE_PFN(space, pfn) (pfn)
+#define GET_IOSPACE(pfn) 0
+#define GET_PFN(pfn) (pfn)
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
#define __HAVE_ARCH_PTE_SAME
#include <asm-generic/pgtable.h>
-/*
- * The size of the low 1:1 mappings we use during bootup,
- * SMP-boot and ACPI-sleep:
- */
-#define LOW_MAPPINGS_SIZE (16*1024*1024)
-
-
#endif /* _I386_PGTABLE_H */