vserver 2.0-rc4
[linux-2.6.git] / arch / sparc64 / mm / hugetlbpage.c
1 /*
2  * SPARC64 Huge TLB page support.
3  *
4  * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
5  */
6
7 #include <linux/config.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/fs.h>
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/pagemap.h>
14 #include <linux/smp_lock.h>
15 #include <linux/slab.h>
16 #include <linux/sysctl.h>
17
18 #include <asm/mman.h>
19 #include <asm/pgalloc.h>
20 #include <asm/tlb.h>
21 #include <asm/tlbflush.h>
22 #include <asm/cacheflush.h>
23
24 static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
25 {
26         pgd_t *pgd;
27         pud_t *pud;
28         pmd_t *pmd;
29         pte_t *pte = NULL;
30
31         pgd = pgd_offset(mm, addr);
32         if (pgd) {
33                 pud = pud_offset(pgd, addr);
34                 if (pud) {
35                         pmd = pmd_alloc(mm, pud, addr);
36                         if (pmd)
37                                 pte = pte_alloc_map(mm, pmd, addr);
38                 }
39         }
40         return pte;
41 }
42
43 static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
44 {
45         pgd_t *pgd;
46         pud_t *pud;
47         pmd_t *pmd;
48         pte_t *pte = NULL;
49
50         pgd = pgd_offset(mm, addr);
51         if (pgd) {
52                 pud = pud_offset(pgd, addr);
53                 if (pud) {
54                         pmd = pmd_offset(pud, addr);
55                         if (pmd)
56                                 pte = pte_offset_map(pmd, addr);
57                 }
58         }
59         return pte;
60 }
61
62 #define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
63
64 static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
65                          struct page *page, pte_t * page_table, int write_access)
66 {
67         unsigned long i;
68         pte_t entry;
69
70         vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
71
72         if (write_access)
73                 entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
74                                                        vma->vm_page_prot)));
75         else
76                 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
77         entry = pte_mkyoung(entry);
78         mk_pte_huge(entry);
79
80         for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
81                 set_pte(page_table, entry);
82                 page_table++;
83
84                 pte_val(entry) += PAGE_SIZE;
85         }
86 }
87
88 /*
89  * This function checks for proper alignment of input addr and len parameters.
90  */
91 int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
92 {
93         if (len & ~HPAGE_MASK)
94                 return -EINVAL;
95         if (addr & ~HPAGE_MASK)
96                 return -EINVAL;
97         return 0;
98 }
99
100 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
101                             struct vm_area_struct *vma)
102 {
103         pte_t *src_pte, *dst_pte, entry;
104         struct page *ptepage;
105         unsigned long addr = vma->vm_start;
106         unsigned long end = vma->vm_end;
107         int i;
108
109         while (addr < end) {
110                 dst_pte = huge_pte_alloc(dst, addr);
111                 if (!dst_pte)
112                         goto nomem;
113                 src_pte = huge_pte_offset(src, addr);
114                 BUG_ON(!src_pte || pte_none(*src_pte));
115                 entry = *src_pte;
116                 ptepage = pte_page(entry);
117                 get_page(ptepage);
118                 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
119                         set_pte(dst_pte, entry);
120                         pte_val(entry) += PAGE_SIZE;
121                         dst_pte++;
122                 }
123                 vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
124                 addr += HPAGE_SIZE;
125         }
126         return 0;
127
128 nomem:
129         return -ENOMEM;
130 }
131
132 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
133                         struct page **pages, struct vm_area_struct **vmas,
134                         unsigned long *position, int *length, int i)
135 {
136         unsigned long vaddr = *position;
137         int remainder = *length;
138
139         WARN_ON(!is_vm_hugetlb_page(vma));
140
141         while (vaddr < vma->vm_end && remainder) {
142                 if (pages) {
143                         pte_t *pte;
144                         struct page *page;
145
146                         pte = huge_pte_offset(mm, vaddr);
147
148                         /* hugetlb should be locked, and hence, prefaulted */
149                         BUG_ON(!pte || pte_none(*pte));
150
151                         page = pte_page(*pte);
152
153                         WARN_ON(!PageCompound(page));
154
155                         get_page(page);
156                         pages[i] = page;
157                 }
158
159                 if (vmas)
160                         vmas[i] = vma;
161
162                 vaddr += PAGE_SIZE;
163                 --remainder;
164                 ++i;
165         }
166
167         *length = remainder;
168         *position = vaddr;
169
170         return i;
171 }
172
173 struct page *follow_huge_addr(struct mm_struct *mm,
174                               unsigned long address, int write)
175 {
176         return ERR_PTR(-EINVAL);
177 }
178
179 int pmd_huge(pmd_t pmd)
180 {
181         return 0;
182 }
183
184 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
185                              pmd_t *pmd, int write)
186 {
187         return NULL;
188 }
189
190 void unmap_hugepage_range(struct vm_area_struct *vma,
191                           unsigned long start, unsigned long end)
192 {
193         struct mm_struct *mm = vma->vm_mm;
194         unsigned long address;
195         pte_t *pte;
196         struct page *page;
197         int i;
198
199         BUG_ON(start & (HPAGE_SIZE - 1));
200         BUG_ON(end & (HPAGE_SIZE - 1));
201
202         for (address = start; address < end; address += HPAGE_SIZE) {
203                 pte = huge_pte_offset(mm, address);
204                 BUG_ON(!pte);
205                 if (pte_none(*pte))
206                         continue;
207                 page = pte_page(*pte);
208                 put_page(page);
209                 for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
210                         pte_clear(pte);
211                         pte++;
212                 }
213         }
214         vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
215         flush_tlb_range(vma, start, end);
216 }
217
218 int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
219 {
220         struct mm_struct *mm = current->mm;
221         unsigned long addr;
222         int ret = 0;
223
224         BUG_ON(vma->vm_start & ~HPAGE_MASK);
225         BUG_ON(vma->vm_end & ~HPAGE_MASK);
226
227         spin_lock(&mm->page_table_lock);
228         for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
229                 unsigned long idx;
230                 pte_t *pte = huge_pte_alloc(mm, addr);
231                 struct page *page;
232
233                 if (!pte) {
234                         ret = -ENOMEM;
235                         goto out;
236                 }
237                 if (!pte_none(*pte))
238                         continue;
239
240                 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
241                         + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
242                 page = find_get_page(mapping, idx);
243                 if (!page) {
244                         /* charge the fs quota first */
245                         if (hugetlb_get_quota(mapping)) {
246                                 ret = -ENOMEM;
247                                 goto out;
248                         }
249                         page = alloc_huge_page();
250                         if (!page) {
251                                 hugetlb_put_quota(mapping);
252                                 ret = -ENOMEM;
253                                 goto out;
254                         }
255                         ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
256                         if (! ret) {
257                                 unlock_page(page);
258                         } else {
259                                 hugetlb_put_quota(mapping);
260                                 free_huge_page(page);
261                                 goto out;
262                         }
263                 }
264                 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
265         }
266 out:
267         spin_unlock(&mm->page_table_lock);
268         return ret;
269 }