* Copyright (C) 1995 Linus Torvalds
*/
-#include <linux/config.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/console.h>
#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-#include <asm/hardirq.h>
+#include <asm/kdebug.h>
+#include <asm/s390_ext.h>
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
#define __FIXUP_MASK 0x7fffffff
#define __SUBCODE_MASK 0x0200
#define __PF_RES_FIELD 0ULL
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
#define __FAIL_ADDR_MASK -4096L
#define __FIXUP_MASK ~0L
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
#ifdef CONFIG_SYSCTL
extern int sysctl_userprocess_debug;
extern void die(const char *,struct pt_regs *,long);
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(¬ify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = str,
+ .err = err,
+ .trapnr = trap,
+ .signr = sig
+ };
+ return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
extern spinlock_t timerlist_lock;
/*
oops_in_progress = 1;
} else {
int loglevel_save = console_loglevel;
- oops_in_progress = 0;
console_unblank();
+ oops_in_progress = 0;
/*
* OK, the message is on the console. Now we call printk()
* without oops_in_progress set so that printk will give klogd
if (areg == 0)
/* Access via access register 0 -> kernel address */
return 0;
+ save_access_regs(current->thread.acrs);
if (regs && areg < NUM_ACRS && current->thread.acrs[areg] <= 1)
/*
* access register contains 0 -> kernel address,
* 3: Home Segment Table Descriptor
*/
int descriptor = S390_lowcore.trans_exc_code & 3;
- if (descriptor == 1) {
- save_access_regs(current->thread.acrs);
+ if (unlikely(descriptor == 1))
return __check_access_register(regs, error_code);
- }
- return descriptor >> 1;
+ if (descriptor == 2)
+ return current->thread.mm_segment.ar4;
+ return descriptor != 0;
}
/*
* Send SIGSEGV to task. This is an external routine
* to keep the stack usage of do_page_fault small.
*/
-static void force_sigsegv(struct pt_regs *regs, unsigned long error_code,
- int si_code, unsigned long address)
+static void do_sigsegv(struct pt_regs *regs, unsigned long error_code,
+ int si_code, unsigned long address)
{
struct siginfo si;
#endif
si.si_signo = SIGSEGV;
si.si_code = si_code;
- si.si_addr = (void *) address;
+ si.si_addr = (void __user *) address;
force_sig_info(SIGSEGV, &si, current);
}
* 11 Page translation -> Not present (nullification)
* 3b Region third trans. -> Not present (nullification)
*/
-extern inline void
+static inline void __kprobes
do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
{
struct task_struct *tsk;
tsk = current;
mm = tsk->mm;
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
+
/*
* Check for low-address protection. This needs to be treated
* as a special case because the translation exception code
* we are not in an interrupt and that there is a
* user context.
*/
- if (user_address == 0 || in_interrupt() || !mm)
+ if (user_address == 0 || in_atomic() || !mm)
goto no_context;
/*
if (regs->psw.mask & PSW_MASK_PSTATE) {
tsk->thread.prot_addr = address;
tsk->thread.trap_no = error_code;
- force_sigsegv(regs, error_code, si_code, address);
+ do_sigsegv(regs, error_code, si_code, address);
return;
}
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (tsk->pid == 1) {
+ if (is_init(tsk)) {
yield();
+ down_read(&mm->mmap_sem);
goto survive;
}
- printk("VM: killing process %s\n", tsk->comm);
+ printk("VM: killing process %s(%d:#%u)\n",
+ tsk->comm, tsk->pid, tsk->xid);
if (regs->psw.mask & PSW_MASK_PSTATE)
do_exit(SIGKILL);
goto no_context;
do_exception(regs, error_code & 0xff, 0);
}
-#ifndef CONFIG_ARCH_S390X
-
-typedef struct _pseudo_wait_t {
- struct _pseudo_wait_t *next;
- wait_queue_head_t queue;
- unsigned long address;
- int resolved;
-} pseudo_wait_t;
-
-static pseudo_wait_t *pseudo_lock_queue = NULL;
-static spinlock_t pseudo_wait_spinlock; /* spinlock to protect lock queue */
-
-/*
- * This routine handles 'pagex' pseudo page faults.
- */
-asmlinkage void
-do_pseudo_page_fault(struct pt_regs *regs, unsigned long error_code)
-{
- pseudo_wait_t wait_struct;
- pseudo_wait_t *ptr, *last, *next;
- unsigned long address;
-
- /*
- * get the failing address
- * more specific the segment and page table portion of
- * the address
- */
- address = S390_lowcore.trans_exc_code & 0xfffff000;
-
- if (address & 0x80000000) {
- /* high bit set -> a page has been swapped in by VM */
- address &= 0x7fffffff;
- spin_lock(&pseudo_wait_spinlock);
- last = NULL;
- ptr = pseudo_lock_queue;
- while (ptr != NULL) {
- next = ptr->next;
- if (address == ptr->address) {
- /*
- * This is one of the processes waiting
- * for the page. Unchain from the queue.
- * There can be more than one process
- * waiting for the same page. VM presents
- * an initial and a completion interrupt for
- * every process that tries to access a
- * page swapped out by VM.
- */
- if (last == NULL)
- pseudo_lock_queue = next;
- else
- last->next = next;
- /* now wake up the process */
- ptr->resolved = 1;
- wake_up(&ptr->queue);
- } else
- last = ptr;
- ptr = next;
- }
- spin_unlock(&pseudo_wait_spinlock);
- } else {
- /* Pseudo page faults in kernel mode is a bad idea */
- if (!(regs->psw.mask & PSW_MASK_PSTATE)) {
- /*
- * VM presents pseudo page faults if the interrupted
- * state was not disabled for interrupts. So we can
- * get pseudo page fault interrupts while running
- * in kernel mode. We simply access the page here
- * while we are running disabled. VM will then swap
- * in the page synchronously.
- */
- if (check_user_space(regs, error_code) == 0)
- /* dereference a virtual kernel address */
- __asm__ __volatile__ (
- " ic 0,0(%0)"
- : : "a" (address) : "0");
- else
- /* dereference a virtual user address */
- __asm__ __volatile__ (
- " la 2,0(%0)\n"
- " sacf 512\n"
- " ic 2,0(2)\n"
- "0:sacf 0\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 0b,0b\n"
- ".previous"
- : : "a" (address) : "2" );
-
- return;
- }
- /* initialize and add element to pseudo_lock_queue */
- init_waitqueue_head (&wait_struct.queue);
- wait_struct.address = address;
- wait_struct.resolved = 0;
- spin_lock(&pseudo_wait_spinlock);
- wait_struct.next = pseudo_lock_queue;
- pseudo_lock_queue = &wait_struct;
- spin_unlock(&pseudo_wait_spinlock);
- /*
- * The instruction that caused the program check will
- * be repeated. Don't signal single step via SIGTRAP.
- */
- clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
- /* go to sleep */
- wait_event(wait_struct.queue, wait_struct.resolved);
- }
-}
-#endif /* CONFIG_ARCH_S390X */
-
#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
*/
+static ext_int_info_t ext_int_pfault;
static int pfault_disable = 0;
static int __init nopfault(char *str)
__PF_RES_FIELD };
int rc;
- if (pfault_disable)
+ if (!MACHINE_IS_VM || pfault_disable)
return -1;
- __asm__ __volatile__(
- " diag %1,%0,0x258\n"
- "0: j 2f\n"
- "1: la %0,8\n"
+ asm volatile(
+ " diag %1,%0,0x258\n"
+ "0: j 2f\n"
+ "1: la %0,8\n"
"2:\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
-#ifndef CONFIG_ARCH_S390X
- " .long 0b,1b\n"
-#else /* CONFIG_ARCH_S390X */
- " .quad 0b,1b\n"
-#endif /* CONFIG_ARCH_S390X */
- ".previous"
- : "=d" (rc) : "a" (&refbk) : "cc" );
+ EX_TABLE(0b,1b)
+ : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
__ctl_set_bit(0, 9);
return rc;
}
pfault_refbk_t refbk =
{ 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };
- if (pfault_disable)
+ if (!MACHINE_IS_VM || pfault_disable)
return;
__ctl_clear_bit(0,9);
- __asm__ __volatile__(
- " diag %0,0,0x258\n"
+ asm volatile(
+ " diag %0,0,0x258\n"
"0:\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
-#ifndef CONFIG_ARCH_S390X
- " .long 0b,0b\n"
-#else /* CONFIG_ARCH_S390X */
- " .quad 0b,0b\n"
-#endif /* CONFIG_ARCH_S390X */
- ".previous"
- : : "a" (&refbk) : "cc" );
+ EX_TABLE(0b,0b)
+ : : "a" (&refbk), "m" (refbk) : "cc");
}
asmlinkage void
-pfault_interrupt(struct pt_regs *regs, __u16 error_code)
+pfault_interrupt(__u16 error_code)
{
struct task_struct *tsk;
__u16 subcode;
* interrupt. pfault_wait is valid. Set pfault_wait
* back to zero and wake up the process. This can
* safely be done because the task is still sleeping
- * and can't procude new pfaults. */
+ * and can't produce new pfaults. */
tsk->thread.pfault_wait = 0;
wake_up_process(tsk);
+ put_task_struct(tsk);
}
} else {
/* signal bit not set -> a real page is missing. */
+ get_task_struct(tsk);
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (xchg(&tsk->thread.pfault_wait, 1) != 0) {
/* Completion interrupt was faster than the initial
* mode and can't produce new pfaults. */
tsk->thread.pfault_wait = 0;
set_task_state(tsk, TASK_RUNNING);
+ put_task_struct(tsk);
} else
set_tsk_need_resched(tsk);
}
}
-#endif
+void __init pfault_irq_init(void)
+{
+ if (!MACHINE_IS_VM)
+ return;
+
+ /*
+ * Try to get pfault pseudo page faults going.
+ */
+ if (register_early_external_interrupt(0x2603, pfault_interrupt,
+ &ext_int_pfault) != 0)
+ panic("Couldn't request external interrupt 0x2603");
+
+ if (pfault_init() == 0)
+ return;
+
+ /* Tough luck, no pfault. */
+ pfault_disable = 1;
+ unregister_early_external_interrupt(0x2603, pfault_interrupt,
+ &ext_int_pfault);
+}
+#endif