60760d0415200f8dbbf205d9dccde2ecee6c0f92
[linux-2.6.git] / arch / sparc64 / mm / hugetlbpage.c
1 /*
2  * SPARC64 Huge TLB page support.
3  *
4  * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
5  */
6
7 #include <linux/config.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/fs.h>
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/pagemap.h>
14 #include <linux/smp_lock.h>
15 #include <linux/slab.h>
16 #include <linux/sysctl.h>
17
18 #include <asm/mman.h>
19 #include <asm/pgalloc.h>
20 #include <asm/tlb.h>
21 #include <asm/tlbflush.h>
22 #include <asm/cacheflush.h>
23
24 static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
25 {
26         pgd_t *pgd;
27         pud_t *pud;
28         pmd_t *pmd;
29         pte_t *pte = NULL;
30
31         pgd = pgd_offset(mm, addr);
32         if (pgd) {
33                 pud = pud_offset(pgd, addr);
34                 if (pud) {
35                         pmd = pmd_alloc(mm, pud, addr);
36                         if (pmd)
37                                 pte = pte_alloc_map(mm, pmd, addr);
38                 }
39         }
40         return pte;
41 }
42
43 static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
44 {
45         pgd_t *pgd;
46         pud_t *pud;
47         pmd_t *pmd;
48         pte_t *pte = NULL;
49
50         pgd = pgd_offset(mm, addr);
51         if (pgd) {
52                 pud = pud_offset(pgd, addr);
53                 if (pud) {
54                         pmd = pmd_offset(pud, addr);
55                         if (pmd)
56                                 pte = pte_offset_map(pmd, addr);
57                 }
58         }
59         return pte;
60 }
61
62 #define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
63
64 static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
65                          struct page *page, pte_t * page_table, int write_access)
66 {
67         unsigned long i;
68         pte_t entry;
69
70         // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
71         vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
72
73         if (write_access)
74                 entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
75                                                        vma->vm_page_prot)));
76         else
77                 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
78         entry = pte_mkyoung(entry);
79         mk_pte_huge(entry);
80
81         for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
82                 set_pte(page_table, entry);
83                 page_table++;
84
85                 pte_val(entry) += PAGE_SIZE;
86         }
87 }
88
89 /*
90  * This function checks for proper alignment of input addr and len parameters.
91  */
92 int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
93 {
94         if (len & ~HPAGE_MASK)
95                 return -EINVAL;
96         if (addr & ~HPAGE_MASK)
97                 return -EINVAL;
98         return 0;
99 }
100
101 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
102                             struct vm_area_struct *vma)
103 {
104         pte_t *src_pte, *dst_pte, entry;
105         struct page *ptepage;
106         unsigned long addr = vma->vm_start;
107         unsigned long end = vma->vm_end;
108         int i;
109
110         while (addr < end) {
111                 dst_pte = huge_pte_alloc(dst, addr);
112                 if (!dst_pte)
113                         goto nomem;
114                 src_pte = huge_pte_offset(src, addr);
115                 BUG_ON(!src_pte || pte_none(*src_pte));
116                 entry = *src_pte;
117                 ptepage = pte_page(entry);
118                 get_page(ptepage);
119                 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
120                         set_pte(dst_pte, entry);
121                         pte_val(entry) += PAGE_SIZE;
122                         dst_pte++;
123                 }
124                 // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
125                 vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
126                 addr += HPAGE_SIZE;
127         }
128         return 0;
129
130 nomem:
131         return -ENOMEM;
132 }
133
134 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
135                         struct page **pages, struct vm_area_struct **vmas,
136                         unsigned long *position, int *length, int i)
137 {
138         unsigned long vaddr = *position;
139         int remainder = *length;
140
141         WARN_ON(!is_vm_hugetlb_page(vma));
142
143         while (vaddr < vma->vm_end && remainder) {
144                 if (pages) {
145                         pte_t *pte;
146                         struct page *page;
147
148                         pte = huge_pte_offset(mm, vaddr);
149
150                         /* hugetlb should be locked, and hence, prefaulted */
151                         BUG_ON(!pte || pte_none(*pte));
152
153                         page = pte_page(*pte);
154
155                         WARN_ON(!PageCompound(page));
156
157                         get_page(page);
158                         pages[i] = page;
159                 }
160
161                 if (vmas)
162                         vmas[i] = vma;
163
164                 vaddr += PAGE_SIZE;
165                 --remainder;
166                 ++i;
167         }
168
169         *length = remainder;
170         *position = vaddr;
171
172         return i;
173 }
174
175 struct page *follow_huge_addr(struct mm_struct *mm,
176                               unsigned long address, int write)
177 {
178         return ERR_PTR(-EINVAL);
179 }
180
181 int pmd_huge(pmd_t pmd)
182 {
183         return 0;
184 }
185
186 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
187                              pmd_t *pmd, int write)
188 {
189         return NULL;
190 }
191
192 void unmap_hugepage_range(struct vm_area_struct *vma,
193                           unsigned long start, unsigned long end)
194 {
195         struct mm_struct *mm = vma->vm_mm;
196         unsigned long address;
197         pte_t *pte;
198         struct page *page;
199         int i;
200
201         BUG_ON(start & (HPAGE_SIZE - 1));
202         BUG_ON(end & (HPAGE_SIZE - 1));
203
204         for (address = start; address < end; address += HPAGE_SIZE) {
205                 pte = huge_pte_offset(mm, address);
206                 BUG_ON(!pte);
207                 if (pte_none(*pte))
208                         continue;
209                 page = pte_page(*pte);
210                 put_page(page);
211                 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
212                         pte_clear(pte);
213                         pte++;
214                 }
215         }
216         // mm->rss -= (end - start) >> PAGE_SHIFT;
217         vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
218         flush_tlb_range(vma, start, end);
219 }
220
221 int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
222 {
223         struct mm_struct *mm = current->mm;
224         unsigned long addr;
225         int ret = 0;
226
227         BUG_ON(vma->vm_start & ~HPAGE_MASK);
228         BUG_ON(vma->vm_end & ~HPAGE_MASK);
229
230         spin_lock(&mm->page_table_lock);
231         for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
232                 unsigned long idx;
233                 pte_t *pte = huge_pte_alloc(mm, addr);
234                 struct page *page;
235
236                 if (!pte) {
237                         ret = -ENOMEM;
238                         goto out;
239                 }
240                 if (!pte_none(*pte))
241                         continue;
242
243                 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
244                         + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
245                 page = find_get_page(mapping, idx);
246                 if (!page) {
247                         /* charge the fs quota first */
248                         if (hugetlb_get_quota(mapping)) {
249                                 ret = -ENOMEM;
250                                 goto out;
251                         }
252                         page = alloc_huge_page();
253                         if (!page) {
254                                 hugetlb_put_quota(mapping);
255                                 ret = -ENOMEM;
256                                 goto out;
257                         }
258                         ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
259                         if (! ret) {
260                                 unlock_page(page);
261                         } else {
262                                 hugetlb_put_quota(mapping);
263                                 free_huge_page(page);
264                                 goto out;
265                         }
266                 }
267                 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
268         }
269 out:
270         spin_unlock(&mm->page_table_lock);
271         return ret;
272 }