X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=mm%2Fmprotect.c;h=9eafe2582ebf33b2d324e6c3529e6068a8a11d27;hb=refs%2Fheads%2Fvserver;hp=0427711ec10bad0ca1b6e934c1c35e400575851e;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/mm/mprotect.c b/mm/mprotect.c index 0427711ec..9eafe2582 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -19,136 +19,127 @@ #include #include #include - +#include +#include #include #include +#include #include #include -static inline void -change_pte_range(pmd_t *pmd, unsigned long address, - unsigned long size, pgprot_t newprot) +static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { - pte_t * pte; - unsigned long end; - - if (pmd_none(*pmd)) - return; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - return; - } - pte = pte_offset_map(pmd, address); - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; + pte_t *pte, oldpte; + spinlock_t *ptl; + + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + arch_enter_lazy_mmu_mode(); do { - if (pte_present(*pte)) { - pte_t entry; + oldpte = *pte; + if (pte_present(oldpte)) { + pte_t ptent; /* Avoid an SMP race with hardware updated dirty/clean * bits by wiping the pte and then setting the new pte * into place. */ - entry = ptep_get_and_clear(pte); - set_pte(pte, pte_modify(entry, newprot)); + ptent = ptep_get_and_clear(mm, addr, pte); + ptent = pte_modify(ptent, newprot); + /* + * Avoid taking write faults for pages we know to be + * dirty. + */ + if (dirty_accountable && pte_dirty(ptent)) + ptent = pte_mkwrite(ptent); + set_pte_at(mm, addr, pte, ptent); + lazy_mmu_prot_update(ptent); +#ifdef CONFIG_MIGRATION + } else if (!pte_file(oldpte)) { + swp_entry_t entry = pte_to_swp_entry(oldpte); + + if (is_write_migration_entry(entry)) { + /* + * A protection check is difficult so + * just be safe and disable write + */ + make_migration_entry_read(&entry); + set_pte_at(mm, addr, pte, + swp_entry_to_pte(entry)); + } +#endif } - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); - pte_unmap(pte - 1); + + } while (pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(pte - 1, ptl); } -static inline void -change_pmd_range(pud_t *pud, unsigned long address, - unsigned long size, pgprot_t newprot) +static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { - pmd_t * pmd; - unsigned long end; - - if (pud_none(*pud)) - return; - if (pud_bad(*pud)) { - pud_ERROR(*pud); - pud_clear(pud); - return; - } - pmd = pmd_offset(pud, address); - address &= ~PUD_MASK; - end = address + size; - if (end > PUD_SIZE) - end = PUD_SIZE; + pmd_t *pmd; + unsigned long next; + + pmd = pmd_offset(pud, addr); do { - change_pte_range(pmd, address, end - address, newprot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable); + } while (pmd++, addr = next, addr != end); } -static inline void -change_pud_range(pgd_t *pgd, unsigned long address, - unsigned long size, pgprot_t newprot) +static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { - pud_t * pud; - unsigned long end; - - if (pgd_none(*pgd)) - return; - if (pgd_bad(*pgd)) { - pgd_ERROR(*pgd); - pgd_clear(pgd); - return; - } - pud = pud_offset(pgd, address); - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; + pud_t *pud; + unsigned long next; + + pud = pud_offset(pgd, addr); do { - change_pmd_range(pud, address, end - address, newprot); - address = (address + PUD_SIZE) & PUD_MASK; - pud++; - } while (address && (address < end)); + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + change_pmd_range(mm, pud, addr, next, newprot, dirty_accountable); + } while (pud++, addr = next, addr != end); } -static void -change_protection(struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgprot_t newprot) +static void change_protection(struct vm_area_struct *vma, + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { - struct mm_struct *mm = current->mm; + struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; - unsigned long beg = start, next; - int i; - - pgd = pgd_offset(mm, start); - flush_cache_range(vma, beg, end); - BUG_ON(start >= end); - spin_lock(&mm->page_table_lock); - for (i = pgd_index(start); i <= pgd_index(end-1); i++) { - next = (start + PGDIR_SIZE) & PGDIR_MASK; - if (next <= start || next > end) - next = end; - change_pud_range(pgd, start, next - start, newprot); - start = next; - pgd++; - } - flush_tlb_range(vma, beg, end); - spin_unlock(&mm->page_table_lock); + unsigned long next; + unsigned long start = addr; + + BUG_ON(addr >= end); + pgd = pgd_offset(mm, addr); + flush_cache_range(vma, addr, end); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + change_pud_range(mm, pgd, addr, next, newprot, dirty_accountable); + } while (pgd++, addr = next, addr != end); + flush_tlb_range(vma, start, end); } static int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags) { - struct mm_struct * mm = vma->vm_mm; + struct mm_struct *mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; - unsigned long charged = 0; - pgprot_t newprot; + unsigned long charged = 0, old_end = vma->vm_end; pgoff_t pgoff; int error; + int dirty_accountable = 0; if (newflags == oldflags) { *pprev = vma; @@ -164,7 +155,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, * a MAP_NORESERVE private mapping to writable will now reserve. */ if (newflags & VM_WRITE) { - if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { + if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { charged = nrpages; if (security_vm_enough_memory(charged)) return -ENOMEM; @@ -172,8 +163,6 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, } } - newprot = protection_map[newflags & 0xf]; - /* * First try to merge with previous and/or next vma. */ @@ -185,16 +174,13 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, goto success; } + *pprev = vma; + if (start != vma->vm_start) { error = split_vma(mm, vma, start, 1); if (error) goto fail; } - /* - * Unless it returns an error, this function always sets *pprev to - * the first vma for which vma->vm_end >= end. - */ - *pprev = vma; if (end != vma->vm_end) { error = split_vma(mm, vma, end, 0); @@ -208,10 +194,23 @@ success: * held in write mode. */ vma->vm_flags = newflags; - vma->vm_page_prot = newprot; - change_protection(vma, start, end, newprot); - __vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); - __vm_stat_account(mm, newflags, vma->vm_file, nrpages); + vma->vm_page_prot = protection_map[newflags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + if (vma_wants_writenotify(vma)) { + vma->vm_page_prot = protection_map[newflags & + (VM_READ|VM_WRITE|VM_EXEC)]; + dirty_accountable = 1; + } + + if (oldflags & VM_EXEC) + arch_remove_exec_range(current->mm, old_end); + + if (is_vm_hugetlb_page(vma)) + hugetlb_change_protection(vma, start, end, vma->vm_page_prot); + else + change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); + vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); + vm_stat_account(mm, newflags, vma->vm_file, nrpages); return 0; fail: @@ -222,7 +221,7 @@ fail: asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) { - unsigned long vm_flags, nstart, end, tmp; + unsigned long vm_flags, nstart, end, tmp, reqprot; struct vm_area_struct *vma, *prev; int error = -EINVAL; const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); @@ -232,19 +231,20 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) if (start & ~PAGE_MASK) return -EINVAL; + if (!len) + return 0; len = PAGE_ALIGN(len); end = start + len; - if (end < start) + if (end <= start) return -ENOMEM; if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) return -EINVAL; - if (end == start) - return 0; + + reqprot = prot; /* * Does the application expect PROT_READ to imply PROT_EXEC: */ - if (unlikely((prot & PROT_READ) && - (current->personality & READ_IMPLIES_EXEC))) + if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) prot |= PROT_EXEC; vm_flags = calc_vm_prot_bits(prot); @@ -281,19 +281,15 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ - if (is_vm_hugetlb_page(vma)) { - error = -EACCES; - goto out; - } - newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); - if ((newflags & ~(newflags >> 4)) & 0xf) { + /* newflags >> 4 shift VM_MAY% in place of VM_% */ + if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { error = -EACCES; goto out; } - error = security_file_mprotect(vma, prot); + error = security_file_mprotect(vma, reqprot, prot); if (error) goto out;