+
+int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep, int write_access)
+{
+ int ret = VM_FAULT_SIGBUS;
+ unsigned long idx;
+ unsigned long size;
+ struct page *page;
+ struct address_space *mapping;
+ pte_t new_pte;
+
+ mapping = vma->vm_file->f_mapping;
+ idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
+ + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+
+ /*
+ * Use page lock to guard against racing truncation
+ * before we get page_table_lock.
+ */
+retry:
+ page = find_lock_page(mapping, idx);
+ if (!page) {
+ if (hugetlb_get_quota(mapping))
+ goto out;
+ page = alloc_huge_page(vma, address);
+ if (!page) {
+ hugetlb_put_quota(mapping);
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
+ clear_huge_page(page, address);
+
+ if (vma->vm_flags & VM_SHARED) {
+ int err;
+
+ err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
+ if (err) {
+ put_page(page);
+ hugetlb_put_quota(mapping);
+ if (err == -EEXIST)
+ goto retry;
+ goto out;
+ }
+ } else
+ lock_page(page);
+ }
+
+ spin_lock(&mm->page_table_lock);
+ size = i_size_read(mapping->host) >> HPAGE_SHIFT;
+ if (idx >= size)
+ goto backout;
+
+ ret = VM_FAULT_MINOR;
+ if (!pte_none(*ptep))
+ goto backout;
+
+ add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
+ new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
+ && (vma->vm_flags & VM_SHARED)));
+ set_huge_pte_at(mm, address, ptep, new_pte);
+
+ if (write_access && !(vma->vm_flags & VM_SHARED)) {
+ /* Optimization, do the COW without a second fault */
+ ret = hugetlb_cow(mm, vma, address, ptep, new_pte);
+ }
+
+ spin_unlock(&mm->page_table_lock);
+ unlock_page(page);
+out:
+ return ret;
+
+backout:
+ spin_unlock(&mm->page_table_lock);
+ hugetlb_put_quota(mapping);
+ unlock_page(page);
+ put_page(page);
+ goto out;
+}
+
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, int write_access)
+{
+ pte_t *ptep;
+ pte_t entry;
+ int ret;
+ static DEFINE_MUTEX(hugetlb_instantiation_mutex);
+
+ ptep = huge_pte_alloc(mm, address);
+ if (!ptep)
+ return VM_FAULT_OOM;
+
+ /*
+ * Serialize hugepage allocation and instantiation, so that we don't
+ * get spurious allocation failures if two CPUs race to instantiate
+ * the same page in the page cache.
+ */
+ mutex_lock(&hugetlb_instantiation_mutex);
+ entry = *ptep;
+ if (pte_none(entry)) {
+ ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
+ mutex_unlock(&hugetlb_instantiation_mutex);
+ return ret;
+ }
+
+ ret = VM_FAULT_MINOR;
+
+ spin_lock(&mm->page_table_lock);
+ /* Check for a racing update before calling hugetlb_cow */
+ if (likely(pte_same(entry, *ptep)))
+ if (write_access && !pte_write(entry))
+ ret = hugetlb_cow(mm, vma, address, ptep, entry);
+ spin_unlock(&mm->page_table_lock);
+ mutex_unlock(&hugetlb_instantiation_mutex);
+
+ return ret;
+}
+
+int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page **pages, struct vm_area_struct **vmas,
+ unsigned long *position, int *length, int i)
+{
+ unsigned long pfn_offset;
+ unsigned long vaddr = *position;
+ int remainder = *length;
+
+ spin_lock(&mm->page_table_lock);
+ while (vaddr < vma->vm_end && remainder) {
+ pte_t *pte;
+ struct page *page;
+
+ /*
+ * Some archs (sparc64, sh*) have multiple pte_ts to
+ * each hugepage. We have to make * sure we get the
+ * first, for the page indexing below to work.
+ */
+ pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
+
+ if (!pte || pte_none(*pte)) {
+ int ret;
+
+ spin_unlock(&mm->page_table_lock);
+ ret = hugetlb_fault(mm, vma, vaddr, 0);
+ spin_lock(&mm->page_table_lock);
+ if (ret == VM_FAULT_MINOR)
+ continue;
+
+ remainder = 0;
+ if (!i)
+ i = -EFAULT;
+ break;
+ }
+
+ pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
+ page = pte_page(*pte);
+same_page:
+ if (pages) {
+ get_page(page);
+ pages[i] = page + pfn_offset;
+ }
+
+ if (vmas)
+ vmas[i] = vma;
+
+ vaddr += PAGE_SIZE;
+ ++pfn_offset;
+ --remainder;
+ ++i;
+ if (vaddr < vma->vm_end && remainder &&
+ pfn_offset < HPAGE_SIZE/PAGE_SIZE) {
+ /*
+ * We use pfn_offset to avoid touching the pageframes
+ * of this compound page.
+ */
+ goto same_page;
+ }
+ }
+ spin_unlock(&mm->page_table_lock);
+ *length = remainder;
+ *position = vaddr;
+
+ return i;
+}
+
+void hugetlb_change_protection(struct vm_area_struct *vma,
+ unsigned long address, unsigned long end, pgprot_t newprot)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long start = address;
+ pte_t *ptep;
+ pte_t pte;
+
+ BUG_ON(address >= end);
+ flush_cache_range(vma, address, end);
+
+ spin_lock(&mm->page_table_lock);
+ for (; address < end; address += HPAGE_SIZE) {
+ ptep = huge_pte_offset(mm, address);
+ if (!ptep)
+ continue;
+ if (!pte_none(*ptep)) {
+ pte = huge_ptep_get_and_clear(mm, address, ptep);
+ pte = pte_mkhuge(pte_modify(pte, newprot));
+ set_huge_pte_at(mm, address, ptep, pte);
+ lazy_mmu_prot_update(pte);
+ }
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ flush_tlb_range(vma, start, end);
+}
+