X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Fasm-i386%2Fmach-xen%2Fasm%2Fpgtable.h;fp=include%2Fasm-i386%2Fmach-xen%2Fasm%2Fpgtable.h;h=b4a967e7e93e65a48150838c2f29d0ddea51bc39;hb=1db395853d4f30d6120458bd279ede1f882a8525;hp=0000000000000000000000000000000000000000;hpb=34a75f0025b9cf803b6a88db032e6ad6950c9313;p=linux-2.6.git diff --git a/include/asm-i386/mach-xen/asm/pgtable.h b/include/asm-i386/mach-xen/asm/pgtable.h new file mode 100644 index 000000000..b4a967e7e --- /dev/null +++ b/include/asm-i386/mach-xen/asm/pgtable.h @@ -0,0 +1,509 @@ +#ifndef _I386_PGTABLE_H +#define _I386_PGTABLE_H + +#include +#include + +/* + * The Linux memory management assumes a three-level page table setup. On + * the i386, we use that, but "fold" the mid level into the top-level page + * table, so that we physically have the same two-level page table as the + * i386 mmu expects. + * + * This file contains the functions and defines necessary to modify and use + * the i386 page table tree. + */ +#ifndef __ASSEMBLY__ +#include +#include +#include + +#ifndef _I386_BITOPS_H +#include +#endif + +#include +#include +#include + +struct mm_struct; +struct vm_area_struct; + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +extern unsigned long empty_zero_page[1024]; +extern pgd_t *swapper_pg_dir; +extern kmem_cache_t *pgd_cache; +extern kmem_cache_t *pmd_cache; +extern spinlock_t pgd_lock; +extern struct page *pgd_list; + +void pmd_ctor(void *, kmem_cache_t *, unsigned long); +void pgd_ctor(void *, kmem_cache_t *, unsigned long); +void pgd_dtor(void *, kmem_cache_t *, unsigned long); +void pgtable_cache_init(void); +void paging_init(void); + +/* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the + * newer 3-level PAE-mode page tables. + */ +#ifdef CONFIG_X86_PAE +# include +# define PMD_SIZE (1UL << PMD_SHIFT) +# define PMD_MASK (~(PMD_SIZE-1)) +#else +# include +#endif + +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) +#define FIRST_USER_ADDRESS 0 + +#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) + +#define TWOLEVEL_PGDIR_SHIFT 22 +#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) +#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) + +/* Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8*1024*1024) +#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \ + 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) +#else +# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) +#endif + +/* + * _PAGE_PSE set in the page directory entry just means that + * the page directory entry points directly to a 4MB-aligned block of + * memory. + */ +#define _PAGE_BIT_PRESENT 0 +#define _PAGE_BIT_RW 1 +#define _PAGE_BIT_USER 2 +#define _PAGE_BIT_PWT 3 +#define _PAGE_BIT_PCD 4 +#define _PAGE_BIT_ACCESSED 5 +#define _PAGE_BIT_DIRTY 6 +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ +#define _PAGE_BIT_UNUSED2 10 +#define _PAGE_BIT_UNUSED3 11 +#define _PAGE_BIT_NX 63 + +#define _PAGE_PRESENT 0x001 +#define _PAGE_RW 0x002 +#define _PAGE_USER 0x004 +#define _PAGE_PWT 0x008 +#define _PAGE_PCD 0x010 +#define _PAGE_ACCESSED 0x020 +#define _PAGE_DIRTY 0x040 +#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ +#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ +#define _PAGE_UNUSED1 0x200 /* available for programmer */ +#define _PAGE_UNUSED2 0x400 +#define _PAGE_UNUSED3 0x800 + +/* If _PAGE_PRESENT is clear, we use these: */ +#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */ +#define _PAGE_PROTNONE 0x080 /* if the user mapped it with PROT_NONE; + pte_present gives true */ +#ifdef CONFIG_X86_PAE +#define _PAGE_NX (1ULL<<_PAGE_BIT_NX) +#else +#define _PAGE_NX 0 +#endif + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define PAGE_NONE \ + __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED \ + __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) + +#define PAGE_SHARED_EXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY_NOEXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_COPY_EXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY \ + PAGE_COPY_NOEXEC +#define PAGE_READONLY \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_READONLY_EXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) + +#define _PAGE_KERNEL \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) +#define _PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) + +extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; +#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) +#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) +#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) +#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) + +#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) +#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) +#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) +#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) + +/* + * The i386 can't do page protection for execute, and considers that + * the same are read. Also, write permissions imply read permissions. + * This is the closest we can get.. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_EXEC +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_EXEC +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC + +/* + * Define this if things work differently on an i386 and an i486: + * it will (on an i486) warn about kernel memory accesses that are + * done without a 'access_ok(VERIFY_WRITE,..)' + */ +#undef TEST_ACCESS_OK + +/* The boot page tables (all created as a single array) */ +extern unsigned long pg0[]; + +#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) + +/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ +#define pmd_none(x) (!(unsigned long)pmd_val(x)) +/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. + can temporarily clear it. */ +#define pmd_present(x) (pmd_val(x)) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) + + +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; } +static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } +static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } +static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } +static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; } + +/* + * The following only works if pte_present() is not true. + */ +static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; } + +static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } +static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } +static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } +static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } +static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } +static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } +static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } +static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } +static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } + +#ifdef CONFIG_X86_PAE +# include +#else +# include +#endif + +static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (!pte_dirty(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); +} + +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (!pte_young(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); +} + +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) +{ + pte_t pte; + if (full) { + pte = *ptep; + pte_clear(mm, addr, ptep); + } else { + pte = ptep_get_and_clear(mm, addr, ptep); + } + return pte; +} + +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + if (pte_write(*ptep)) + clear_bit(_PAGE_BIT_RW, &ptep->pte_low); +} + +/* + * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); + * + * dst - pointer to pgd range anwhere on a pgd page + * src - "" + * count - the number of pgds to copy. + * + * dst and src can be on the same page, but the range must not overlap, + * and must not cross a page boundary. + */ +static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) +{ + memcpy(dst, src, count * sizeof(pgd_t)); +} + +/* + * Macro to mark a page protection value as "uncacheable". On processors which do not support + * it, this is a no-op. + */ +#define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \ + ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot)) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte.pte_low &= _PAGE_CHG_MASK; + pte.pte_low |= pgprot_val(newprot); +#ifdef CONFIG_X86_PAE + /* + * Chop off the NX bit (if present), and add the NX portion of + * the newprot (if present): + */ + pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); + pte.pte_high |= (pgprot_val(newprot) >> 32) & \ + (__supported_pte_mask >> 32); +#endif + return pte; +} + +#define pmd_large(pmd) \ +((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) + +/* + * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] + * + * this macro returns the index of the entry in the pgd page which would + * control the given virtual address + */ +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pgd_index_k(addr) pgd_index(addr) + +/* + * pgd_offset() returns a (pgd_t *) + * pgd_index() is used get the offset into the pgd page's array of pgd_t's; + */ +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) + +/* + * a shortcut which implies the use of the kernel's pgd, instead + * of a process's + */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* + * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] + * + * this macro returns the index of the entry in the pmd page which would + * control the given virtual address + */ +#define pmd_index(address) \ + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) + +/* + * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] + * + * this macro returns the index of the entry in the pte page which would + * control the given virtual address + */ +#define pte_index(address) \ + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) + +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) + +#define pmd_page_kernel(pmd) \ + ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +/* + * Helper function that returns the kernel pagetable entry controlling + * the virtual address 'address'. NULL means no pagetable entry present. + * NOTE: the return type is pte_t but if the pmd is PSE then we return it + * as a pte too. + */ +extern pte_t *lookup_address(unsigned long address); + +/* + * Make a given kernel text page executable/non-executable. + * Returns the previous executability setting of that page (which + * is used to restore the previous state). Used by the SMP bootup code. + * NOTE: this is an __init function for security reasons. + */ +#ifdef CONFIG_X86_PAE + extern int set_kernel_exec(unsigned long vaddr, int enable); +#else + static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} +#endif + +extern void noexec_setup(const char *str); + +#if defined(CONFIG_HIGHPTE) +#define pte_offset_map(dir, address) \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \ + pte_index(address)) +#define pte_offset_map_nested(dir, address) \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \ + pte_index(address)) +#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) +#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) +#else +#define pte_offset_map(dir, address) \ + ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) +#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) +#endif + +/* + * The i386 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + * + * Also, we only update the dirty/accessed state if we set + * the dirty bit by hand in the kernel, since the hardware + * will do the accessed bit for us, and we don't want to + * race with other CPU's that might be updating the dirty + * bit at the same time. + */ +#define update_mmu_cache(vma,address,pte) do { } while (0) +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ + do { \ + if (__dirty) { \ + if ( likely((__vma)->vm_mm == current->mm) ) { \ + BUG_ON(HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned long)((__vma)->vm_mm->cpu_vm_mask.bits))); \ + } else { \ + xen_l1_entry_update((__ptep), (__entry)); \ + flush_tlb_page((__vma), (__address)); \ + } \ + } \ + } while (0) + +#define __HAVE_ARCH_PTEP_ESTABLISH +#define ptep_establish(__vma, __address, __ptep, __entry) \ +do { \ + ptep_set_access_flags(__vma, __address, __ptep, __entry, 1); \ +} while (0) + +#include +void make_lowmem_page_readonly(void *va, unsigned int feature); +void make_lowmem_page_writable(void *va, unsigned int feature); +void make_page_readonly(void *va, unsigned int feature); +void make_page_writable(void *va, unsigned int feature); +void make_pages_readonly(void *va, unsigned int nr, unsigned int feature); +void make_pages_writable(void *va, unsigned int nr, unsigned int feature); + +#define virt_to_ptep(__va) \ +({ \ + pgd_t *__pgd = pgd_offset_k((unsigned long)(__va)); \ + pud_t *__pud = pud_offset(__pgd, (unsigned long)(__va)); \ + pmd_t *__pmd = pmd_offset(__pud, (unsigned long)(__va)); \ + pte_offset_kernel(__pmd, (unsigned long)(__va)); \ +}) + +#define arbitrary_virt_to_machine(__va) \ +({ \ + maddr_t m = (maddr_t)pte_mfn(*virt_to_ptep(__va)) << PAGE_SHIFT;\ + m | ((unsigned long)(__va) & (PAGE_SIZE-1)); \ +}) + +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_FLATMEM +#define kern_addr_valid(addr) (1) +#endif /* CONFIG_FLATMEM */ + +int direct_remap_pfn_range(struct vm_area_struct *vma, + unsigned long address, + unsigned long mfn, + unsigned long size, + pgprot_t prot, + domid_t domid); +int direct_kernel_remap_pfn_range(unsigned long address, + unsigned long mfn, + unsigned long size, + pgprot_t prot, + domid_t domid); +int create_lookup_pte_addr(struct mm_struct *mm, + unsigned long address, + uint64_t *ptep); +int touch_pte_range(struct mm_struct *mm, + unsigned long address, + unsigned long size); + +#define io_remap_pfn_range(vma,from,pfn,size,prot) \ +direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO) + +#define MK_IOSPACE_PFN(space, pfn) (pfn) +#define GET_IOSPACE(pfn) 0 +#define GET_PFN(pfn) (pfn) + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define __HAVE_ARCH_PTE_SAME +#include + +#endif /* _I386_PGTABLE_H */