Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / arch / ia64 / mm / hugetlbpage.c
index c724358..eee5c1c 100644 (file)
@@ -8,7 +8,6 @@
  * Feb, 2004: dynamic hugetlb page size via boot parameter
  */
 
-#include <linux/config.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
 unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
 
-static pte_t *
+pte_t *
 huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
 {
        unsigned long taddr = htlbpage_to_page(addr);
        pgd_t *pgd;
+       pud_t *pud;
        pmd_t *pmd;
        pte_t *pte = NULL;
 
        pgd = pgd_offset(mm, taddr);
-       pmd = pmd_alloc(mm, pgd, taddr);
-       if (pmd)
-               pte = pte_alloc_map(mm, pmd, taddr);
+       pud = pud_alloc(mm, pgd, taddr);
+       if (pud) {
+               pmd = pmd_alloc(mm, pud, taddr);
+               if (pmd)
+                       pte = pte_alloc_map(mm, pmd, taddr);
+       }
        return pte;
 }
 
-static pte_t *
+pte_t *
 huge_pte_offset (struct mm_struct *mm, unsigned long addr)
 {
        unsigned long taddr = htlbpage_to_page(addr);
        pgd_t *pgd;
+       pud_t *pud;
        pmd_t *pmd;
        pte_t *pte = NULL;
 
        pgd = pgd_offset(mm, taddr);
        if (pgd_present(*pgd)) {
-               pmd = pmd_offset(pgd, taddr);
-               if (pmd_present(*pmd))
-                       pte = pte_offset_map(pmd, taddr);
+               pud = pud_offset(pgd, taddr);
+               if (pud_present(*pud)) {
+                       pmd = pmd_offset(pud, taddr);
+                       if (pmd_present(*pmd))
+                               pte = pte_offset_map(pmd, taddr);
+               }
        }
 
        return pte;
@@ -59,110 +66,28 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
 
 #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
 
-static void
-set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
-             struct page *page, pte_t * page_table, int write_access)
-{
-       pte_t entry;
-
-       // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
-       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
-       if (write_access) {
-               entry =
-                   pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
-       } else
-               entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
-       entry = pte_mkyoung(entry);
-       mk_pte_huge(entry);
-       set_pte(page_table, entry);
-       return;
-}
 /*
- * This function checks for proper alignment of input addr and len parameters.
+ * Don't actually need to do any preparation, but need to make sure
+ * the address is in the right region.
  */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len)
 {
        if (len & ~HPAGE_MASK)
                return -EINVAL;
        if (addr & ~HPAGE_MASK)
                return -EINVAL;
-       if (REGION_NUMBER(addr) != REGION_HPAGE)
+       if (REGION_NUMBER(addr) != RGN_HPAGE)
                return -EINVAL;
 
        return 0;
 }
 
-int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
-                       struct vm_area_struct *vma)
-{
-       pte_t *src_pte, *dst_pte, entry;
-       struct page *ptepage;
-       unsigned long addr = vma->vm_start;
-       unsigned long end = vma->vm_end;
-
-       while (addr < end) {
-               dst_pte = huge_pte_alloc(dst, addr);
-               if (!dst_pte)
-                       goto nomem;
-               src_pte = huge_pte_offset(src, addr);
-               entry = *src_pte;
-               ptepage = pte_page(entry);
-               get_page(ptepage);
-               set_pte(dst_pte, entry);
-               // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
-               vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
-               addr += HPAGE_SIZE;
-       }
-       return 0;
-nomem:
-       return -ENOMEM;
-}
-
-int
-follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-                   struct page **pages, struct vm_area_struct **vmas,
-                   unsigned long *st, int *length, int i)
-{
-       pte_t *ptep, pte;
-       unsigned long start = *st;
-       unsigned long pstart;
-       int len = *length;
-       struct page *page;
-
-       do {
-               pstart = start & HPAGE_MASK;
-               ptep = huge_pte_offset(mm, start);
-               pte = *ptep;
-
-back1:
-               page = pte_page(pte);
-               if (pages) {
-                       page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
-                       get_page(page);
-                       pages[i] = page;
-               }
-               if (vmas)
-                       vmas[i] = vma;
-               i++;
-               len--;
-               start += PAGE_SIZE;
-               if (((start & HPAGE_MASK) == pstart) && len &&
-                               (start < vma->vm_end))
-                       goto back1;
-       } while (len && start < vma->vm_end);
-       *length = len;
-       *st = start;
-       return i;
-}
-
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
 {
        struct page *page;
        pte_t *ptep;
 
-       if (! mm->used_hugetlb)
-               return ERR_PTR(-EINVAL);
-       if (REGION_NUMBER(addr) != REGION_HPAGE)
+       if (REGION_NUMBER(addr) != RGN_HPAGE)
                return ERR_PTR(-EINVAL);
 
        ptep = huge_pte_offset(mm, addr);
@@ -182,133 +107,29 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int wri
        return NULL;
 }
 
-/*
- * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
- * are hugetlb region specific.
- */
-void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
-       unsigned long start, unsigned long end)
+void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+                       unsigned long addr, unsigned long end,
+                       unsigned long floor, unsigned long ceiling)
 {
-       unsigned long first = start & HUGETLB_PGDIR_MASK;
-       unsigned long last = end + HUGETLB_PGDIR_SIZE - 1;
-       unsigned long start_index, end_index;
-       struct mm_struct *mm = tlb->mm;
-
-       if (!prev) {
-               prev = mm->mmap;
-               if (!prev)
-                       goto no_mmaps;
-               if (prev->vm_end > start) {
-                       if (last > prev->vm_start)
-                               last = prev->vm_start;
-                       goto no_mmaps;
-               }
-       }
-       for (;;) {
-               struct vm_area_struct *next = prev->vm_next;
-
-               if (next) {
-                       if (next->vm_start < start) {
-                               prev = next;
-                               continue;
-                       }
-                       if (last > next->vm_start)
-                               last = next->vm_start;
-               }
-               if (prev->vm_end > first)
-                       first = prev->vm_end + HUGETLB_PGDIR_SIZE - 1;
-               break;
-       }
-no_mmaps:
-       if (last < first)       /* for arches with discontiguous pgd indices */
-               return;
        /*
-        * If the PGD bits are not consecutive in the virtual address, the
-        * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
+        * This is called to free hugetlb page tables.
+        *
+        * The offset of these addresses from the base of the hugetlb
+        * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that
+        * the standard free_pgd_range will free the right page tables.
+        *
+        * If floor and ceiling are also in the hugetlb region, they
+        * must likewise be scaled down; but if outside, left unchanged.
         */
 
-       start_index = pgd_index(htlbpage_to_page(first));
-       end_index = pgd_index(htlbpage_to_page(last));
-
-       if (end_index > start_index) {
-               clear_page_tables(tlb, start_index, end_index - start_index);
-       }
-}
-
-void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
-{
-       struct mm_struct *mm = vma->vm_mm;
-       unsigned long address;
-       pte_t *pte;
-       struct page *page;
-
-       BUG_ON(start & (HPAGE_SIZE - 1));
-       BUG_ON(end & (HPAGE_SIZE - 1));
+       addr = htlbpage_to_page(addr);
+       end  = htlbpage_to_page(end);
+       if (REGION_NUMBER(floor) == RGN_HPAGE)
+               floor = htlbpage_to_page(floor);
+       if (REGION_NUMBER(ceiling) == RGN_HPAGE)
+               ceiling = htlbpage_to_page(ceiling);
 
-       for (address = start; address < end; address += HPAGE_SIZE) {
-               pte = huge_pte_offset(mm, address);
-               if (pte_none(*pte))
-                       continue;
-               page = pte_page(*pte);
-               put_page(page);
-               pte_clear(pte);
-       }
-       // mm->rss -= (end - start) >> PAGE_SHIFT;
-       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
-       flush_tlb_range(vma, start, end);
-}
-
-int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
-{
-       struct mm_struct *mm = current->mm;
-       unsigned long addr;
-       int ret = 0;
-
-       BUG_ON(vma->vm_start & ~HPAGE_MASK);
-       BUG_ON(vma->vm_end & ~HPAGE_MASK);
-
-       spin_lock(&mm->page_table_lock);
-       for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
-               unsigned long idx;
-               pte_t *pte = huge_pte_alloc(mm, addr);
-               struct page *page;
-
-               if (!pte) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
-               if (!pte_none(*pte))
-                       continue;
-
-               idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
-                       + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
-               page = find_get_page(mapping, idx);
-               if (!page) {
-                       /* charge the fs quota first */
-                       if (hugetlb_get_quota(mapping)) {
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-                       page = alloc_huge_page();
-                       if (!page) {
-                               hugetlb_put_quota(mapping);
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-                       ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
-                       if (! ret) {
-                               unlock_page(page);
-                       } else {
-                               hugetlb_put_quota(mapping);
-                               free_huge_page(page);
-                               goto out;
-                       }
-               }
-               set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
-       }
-out:
-       spin_unlock(&mm->page_table_lock);
-       return ret;
+       free_pgd_range(tlb, addr, end, floor, ceiling);
 }
 
 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
@@ -320,8 +141,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
                return -ENOMEM;
        if (len & ~HPAGE_MASK)
                return -EINVAL;
-       /* This code assumes that REGION_HPAGE != 0. */
-       if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1)))
+       /* This code assumes that RGN_HPAGE != 0. */
+       if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1)))
                addr = HPAGE_REGION_BASE;
        else
                addr = ALIGN(addr, HPAGE_SIZE);