* Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
*/
+#include <linux/config.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
-#ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-
-/* Hook to register for page fault notifications */
-int register_page_fault_notifier(struct notifier_block *nb)
-{
- vmalloc_sync_all();
- return atomic_notifier_chain_register(¬ify_page_fault_chain, nb);
-}
-
-int unregister_page_fault_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb);
-}
-
-static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
-{
- struct die_args args = {
- .regs = regs,
- .str = str,
- .err = err,
- .trapnr = trap,
- .signr = sig
- };
- return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args);
-}
-#else
-static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
-{
- return NOTIFY_DONE;
-}
-#endif
-
void bust_spinlocks(int yes)
{
int loglevel_save = console_loglevel;
instr = (unsigned char *)convert_rip_to_linear(current, regs);
max_instr = instr + 15;
- if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE64)
+ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
return 0;
while (scan_more && instr < max_instr) {
printk("PGD %lx ", pgd_val(*pgd));
if (!pgd_present(*pgd)) goto ret;
- pud = pud_offset(pgd, address);
+ pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address);
if (bad_address(pud)) goto bad;
printk("PUD %lx ", pud_val(*pud));
if (!pud_present(*pud)) goto ret;
return -1;
if (pgd_none(*pgd))
set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
/* Below here mismatches are bugs because these lower tables
are shared */
unsigned long flags;
siginfo_t info;
- tsk = current;
- mm = tsk->mm;
- prefetchw(&mm->mmap_sem);
-
/* get the address */
__asm__("movq %%cr2,%0":"=r" (address));
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (likely(regs->eflags & X86_EFLAGS_IF))
+ local_irq_enable();
+
+ if (unlikely(page_fault_trace))
+ printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+ regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
+ tsk = current;
+ mm = tsk->mm;
info.si_code = SEGV_MAPERR;
*/
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
((address >= VMALLOC_START && address < VMALLOC_END))) {
- if (vmalloc_fault(address) >= 0)
- return;
- }
- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
+ if (vmalloc_fault(address) < 0)
+ goto bad_area_nosemaphore;
return;
+ }
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
goto bad_area_nosemaphore;
}
- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
- return;
-
- if (likely(regs->eflags & X86_EFLAGS_IF))
- local_irq_enable();
-
- if (unlikely(page_fault_trace))
- printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
- regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
-
if (unlikely(error_code & PF_RSVD))
pgtable_bad(address, regs, error_code);
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunatly, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
+ * erroneous fault occuring in a code path which already holds mmap_sem
* we will deadlock attempting to validate the fault against the
* address space. Luckily the kernel only validly references user
* space from well defined areas of code, which are listed in the
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
if (error_code & 4) {
- /* Allow userspace just enough access below the stack pointer
- * to let the 'enter' instruction work.
- */
- if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
+ // XXX: align red zone size with ABI
+ if (address + 128 < regs->rsp)
goto bad_area;
}
if (expand_stack(vma, address))
printk(KERN_ALERT "Unable to handle kernel paging request");
printk(" at %016lx RIP: \n" KERN_ALERT,address);
printk_address(regs->rip);
+ printk("\n");
dump_pagetable(address);
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
return;
}
-DEFINE_SPINLOCK(pgd_lock);
-struct page *pgd_list;
-
-void vmalloc_sync_all(void)
-{
- /* Note that races in the updates of insync and start aren't
- problematic:
- insync can only get set bits added, and updates to start are only
- improving performance (without affecting correctness if undone). */
- static DECLARE_BITMAP(insync, PTRS_PER_PGD);
- static unsigned long start = VMALLOC_START & PGDIR_MASK;
- unsigned long address;
-
- for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
- if (!test_bit(pgd_index(address), insync)) {
- const pgd_t *pgd_ref = pgd_offset_k(address);
- struct page *page;
-
- if (pgd_none(*pgd_ref))
- continue;
- spin_lock(&pgd_lock);
- for (page = pgd_list; page;
- page = (struct page *)page->index) {
- pgd_t *pgd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
- }
- spin_unlock(&pgd_lock);
- set_bit(pgd_index(address), insync);
- }
- if (address == start)
- start = address + PGDIR_SIZE;
- }
- /* Check that there is no need to do the same for the modules area. */
- BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
- (__START_KERNEL & PGDIR_MASK)));
-}
-
static int __init enable_pagefaulttrace(char *str)
{
page_fault_trace = 1;
- return 1;
+ return 0;
}
__setup("pagefaulttrace", enable_pagefaulttrace);