3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
5 * Derived from "arch/i386/mm/fault.c"
6 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
8 * Modified by Cort Dougan and Paul Mackerras.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/signal.h>
17 #include <linux/sched.h>
18 #include <linux/kernel.h>
19 #include <linux/errno.h>
20 #include <linux/string.h>
21 #include <linux/types.h>
22 #include <linux/ptrace.h>
23 #include <linux/mman.h>
25 #include <linux/interrupt.h>
26 #include <linux/highmem.h>
27 #include <linux/module.h>
30 #include <asm/pgtable.h>
32 #include <asm/mmu_context.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/tlbflush.h>
37 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
38 extern void (*debugger)(struct pt_regs *);
39 extern void (*debugger_fault_handler)(struct pt_regs *);
40 extern int (*debugger_dabr_match)(struct pt_regs *);
41 int debugger_kernel_faults = 1;
44 unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
45 unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
46 unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
47 unsigned long pte_misses; /* updated by do_page_fault() */
48 unsigned long pte_errors; /* updated by do_page_fault() */
49 unsigned int probingmem;
52 * Check whether the instruction at regs->nip is a store using
53 * an update addressing form which will update r1.
55 static int store_updates_sp(struct pt_regs *regs)
59 if (get_user(inst, (unsigned int __user *)regs->nip))
61 /* check for 1 in the rA field */
62 if (((inst >> 16) & 0x1f) != 1)
64 /* check major opcode */
73 /* check minor opcode */
74 switch ((inst >> 1) & 0x3ff) {
78 case 695: /* stfsux */
79 case 759: /* stfdux */
87 * For 600- and 800-family processors, the error_code parameter is DSISR
88 * for a data fault, SRR1 for an instruction fault. For 400-family processors
89 * the error_code parameter is ESR for a data fault, 0 for an instruction
92 int do_page_fault(struct pt_regs *regs, unsigned long address,
93 unsigned long error_code)
95 struct vm_area_struct * vma;
96 struct mm_struct *mm = current->mm;
98 int code = SEGV_MAPERR;
99 #if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
100 int is_write = error_code & ESR_DST;
105 * Fortunately the bit assignments in SRR1 for an instruction
106 * fault and DSISR for a data fault are mostly the same for the
107 * bits we are interested in. But there are some bits which
108 * indicate errors in DSISR but can validly be set in SRR1.
110 if (TRAP(regs) == 0x400)
111 error_code &= 0x48200000;
113 is_write = error_code & 0x02000000;
114 #endif /* CONFIG_4xx || CONFIG_BOOKE */
116 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
117 if (debugger_fault_handler && TRAP(regs) == 0x300) {
118 debugger_fault_handler(regs);
121 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
122 if (error_code & 0x00400000) {
124 if (debugger_dabr_match(regs))
127 #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
128 #endif /* CONFIG_XMON || CONFIG_KGDB */
130 if (in_atomic() || mm == NULL)
133 down_read(&mm->mmap_sem);
134 vma = find_vma(mm, address);
137 if (vma->vm_start <= address)
139 if (!(vma->vm_flags & VM_GROWSDOWN))
145 * N.B. The rs6000/xcoff ABI allows programs to access up to
146 * a few hundred bytes below the stack pointer.
147 * The kernel signal delivery code writes up to about 1.5kB
148 * below the stack pointer (r1) before decrementing it.
149 * The exec code can write slightly over 640kB to the stack
150 * before setting the user r1. Thus we allow the stack to
151 * expand to 1MB without further checks.
153 if (address + 0x100000 < vma->vm_end) {
154 /* get user regs even if this fault is in kernel mode */
155 struct pt_regs *uregs = current->thread.regs;
160 * A user-mode access to an address a long way below
161 * the stack pointer is only valid if the instruction
162 * is one which would update the stack pointer to the
163 * address accessed if the instruction completed,
164 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
165 * (or the byte, halfword, float or double forms).
167 * If we don't check this then any write to the area
168 * between the last mapped region and the stack will
169 * expand the stack rather than segfaulting.
171 if (address + 2048 < uregs->gpr[1]
172 && (!user_mode(regs) || !store_updates_sp(regs)))
175 if (expand_stack(vma, address))
180 #if defined(CONFIG_6xx)
181 if (error_code & 0x95700000)
182 /* an error such as lwarx to I/O controller space,
183 address matching DABR, eciwx, etc. */
185 #endif /* CONFIG_6xx */
186 #if defined(CONFIG_8xx)
187 /* The MPC8xx seems to always set 0x80000000, which is
188 * "undefined". Of those that can be set, this is the only
189 * one which seems bad.
191 if (error_code & 0x10000000)
192 /* Guarded storage error. */
194 #endif /* CONFIG_8xx */
198 if (!(vma->vm_flags & VM_WRITE))
200 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
201 /* an exec - 4xx/Book-E allows for per-page execute permission */
202 } else if (TRAP(regs) == 0x400) {
207 /* It would be nice to actually enforce the VM execute
208 permission on CPUs which can do so, but far too
209 much stuff in userspace doesn't get the permissions
210 right, so we let any page be executed for now. */
211 if (! (vma->vm_flags & VM_EXEC))
215 /* Since 4xx/Book-E supports per-page execute permission,
216 * we lazily flush dcache to icache. */
218 if (get_pteptr(mm, address, &ptep, &pmdp)) {
219 spinlock_t *ptl = pte_lockptr(mm, pmdp);
221 if (pte_present(*ptep)) {
222 struct page *page = pte_page(*ptep);
224 if (!test_bit(PG_arch_1, &page->flags)) {
225 flush_dcache_icache_page(page);
226 set_bit(PG_arch_1, &page->flags);
228 pte_update(ptep, 0, _PAGE_HWEXEC);
230 pte_unmap_unlock(ptep, ptl);
231 up_read(&mm->mmap_sem);
234 pte_unmap_unlock(ptep, ptl);
239 /* protection fault */
240 if (error_code & 0x08000000)
242 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
247 * If for any reason at all we couldn't handle the fault,
248 * make sure we exit gracefully rather than endlessly redo
252 switch (handle_mm_fault(mm, vma, address, is_write)) {
259 case VM_FAULT_SIGBUS:
267 up_read(&mm->mmap_sem);
269 * keep track of tlb+htab misses that are good addrs but
270 * just need pte's created via handle_mm_fault()
277 up_read(&mm->mmap_sem);
280 /* User mode accesses cause a SIGSEGV */
281 if (user_mode(regs)) {
282 _exception(SIGSEGV, regs, code, address);
289 * We ran out of memory, or some other thing happened to us that made
290 * us unable to handle the page fault gracefully.
293 up_read(&mm->mmap_sem);
294 if (is_init(current)) {
296 down_read(&mm->mmap_sem);
299 printk("VM: killing process %s(%d:#%u)\n",
300 current->comm, current->pid, current->xid);
306 up_read(&mm->mmap_sem);
307 info.si_signo = SIGBUS;
309 info.si_code = BUS_ADRERR;
310 info.si_addr = (void __user *)address;
311 force_sig_info (SIGBUS, &info, current);
312 if (!user_mode(regs))
318 * bad_page_fault is called when we have a bad access from the kernel.
319 * It is called from the DSI and ISI handlers in head.S and from some
320 * of the procedures in traps.c.
323 bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
325 const struct exception_table_entry *entry;
327 /* Are we prepared to handle this fault? */
328 if ((entry = search_exception_tables(regs->nip)) != NULL) {
329 regs->nip = entry->fixup;
333 /* kernel has accessed a bad area */
334 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
335 if (debugger_kernel_faults)
338 die("kernel access of bad area", regs, sig);
343 /* The pgtable.h claims some functions generically exist, but I
344 * can't find them......
346 pte_t *va_to_pte(unsigned long address)
352 if (address < TASK_SIZE)
355 dir = pgd_offset(&init_mm, address);
357 pmd = pmd_offset(dir, address & PAGE_MASK);
358 if (pmd && pmd_present(*pmd)) {
359 pte = pte_offset_kernel(pmd, address & PAGE_MASK);
360 if (pte && pte_present(*pte))
367 unsigned long va_to_phys(unsigned long address)
371 pte = va_to_pte(address);
373 return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
378 print_8xx_pte(struct mm_struct *mm, unsigned long addr)
384 printk(" pte @ 0x%8lx: ", addr);
385 pgd = pgd_offset(mm, addr & PAGE_MASK);
387 pmd = pmd_offset(pgd, addr & PAGE_MASK);
388 if (pmd && pmd_present(*pmd)) {
389 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
391 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
392 (long)pgd, (long)pte, (long)pte_val(*pte));
393 #define pp ((long)pte_val(*pte))
394 printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
398 (pp>>3)&1, /* small */
399 (pp>>2)&1, /* shared */
400 (pp>>1)&1, /* cache inhibit */
419 get_8xx_pte(struct mm_struct *mm, unsigned long addr)
426 pgd = pgd_offset(mm, addr & PAGE_MASK);
428 pmd = pmd_offset(pgd, addr & PAGE_MASK);
429 if (pmd && pmd_present(*pmd)) {
430 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
432 retval = (int)pte_val(*pte);
438 #endif /* CONFIG_8xx */