X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fptrace.c;h=37118d72a9db7ed9c90acba57391bbccb06e634c;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=8b2856aaf6410310d83751e1af1bef61bf447115;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8b2856aaf..37118d72a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -7,6 +7,7 @@ * to continually duplicate across every architecture. */ +#include #include #include #include @@ -16,330 +17,1775 @@ #include #include #include - +#include +#include +#include +#include +#include #include #include -/* - * ptrace a task: make the debugger its new parent and - * move it to the ptrace list. - * - * Must be called with the tasklist lock write-held. - */ -void __ptrace_link(task_t *child, task_t *new_parent) +struct ptrace_state { - if (!list_empty(&child->ptrace_list)) - BUG(); - if (child->parent == new_parent) - return; - list_add(&child->ptrace_list, &child->parent->ptrace_children); - REMOVE_LINKS(child); - child->parent = new_parent; - SET_LINKS(child); -} + struct rcu_head rcu; -/* - * unptrace a task: move it back to its original parent and - * remove it from the ptrace list. - * - * Must be called with the tasklist lock write-held. - */ -void __ptrace_unlink(task_t *child) + /* + * These elements are always available, even when the struct is + * awaiting destruction at the next RCU callback point. + */ + struct utrace_attached_engine *engine; + struct task_struct *task; /* Target task. */ + struct task_struct *parent; /* Whom we report to. */ + struct list_head entry; /* Entry on parent->ptracees list. */ + + u8 options; /* PTRACE_SETOPTIONS bits. */ + unsigned int syscall:1; /* Reporting for syscall. */ +#ifdef PTRACE_SYSEMU + unsigned int sysemu:1; /* PTRACE_SYSEMU in progress. */ +#endif + unsigned int have_eventmsg:1; /* u.eventmsg valid. */ + unsigned int cap_sys_ptrace:1; /* Tracer capable. */ + + union + { + unsigned long eventmsg; + siginfo_t *siginfo; + } u; +}; + +static const struct utrace_engine_ops ptrace_utrace_ops; /* Initialized below. */ + +static void +ptrace_state_unlink(struct ptrace_state *state) { - if (!child->ptrace) - BUG(); - child->ptrace = 0; - if (list_empty(&child->ptrace_list)) - return; - list_del_init(&child->ptrace_list); - REMOVE_LINKS(child); - child->parent = child->real_parent; - SET_LINKS(child); + task_lock(state->parent); + list_del_rcu(&state->entry); + task_unlock(state->parent); +} + +static struct ptrace_state * +ptrace_setup(struct task_struct *target, struct utrace_attached_engine *engine, + struct task_struct *parent, u8 options, int cap_sys_ptrace, + struct ptrace_state *state) +{ + if (state == NULL) { + state = kzalloc(sizeof *state, GFP_USER); + if (unlikely(state == NULL)) + return ERR_PTR(-ENOMEM); + } + + state->engine = engine; + state->task = target; + state->parent = parent; + state->options = options; + state->cap_sys_ptrace = cap_sys_ptrace; + + task_lock(parent); + if (unlikely(parent->flags & PF_EXITING)) { + task_unlock(parent); + kfree(state); + return ERR_PTR(-EALREADY); + } + list_add_rcu(&state->entry, &state->parent->ptracees); + task_unlock(state->parent); + + BUG_ON(engine->data != 0); + rcu_assign_pointer(engine->data, (unsigned long) state); + + return state; +} + +static void +ptrace_state_free(struct rcu_head *rhead) +{ + struct ptrace_state *state = container_of(rhead, + struct ptrace_state, rcu); + kfree(state); +} + +static void +ptrace_done(struct ptrace_state *state) +{ + INIT_RCU_HEAD(&state->rcu); + call_rcu(&state->rcu, ptrace_state_free); } /* - * Check that we have indeed attached to the thing.. + * Update the tracing engine state to match the new ptrace state. */ -int ptrace_check_attach(struct task_struct *child, int kill) +static int __must_check +ptrace_update(struct task_struct *target, + struct utrace_attached_engine *engine, + unsigned long flags, int from_stopped) { - if (!(child->ptrace & PT_PTRACED)) - return -ESRCH; + struct ptrace_state *state = (struct ptrace_state *) engine->data; + + /* + * These events are always reported. + */ + flags |= (UTRACE_EVENT(DEATH) | UTRACE_EVENT(EXEC) + | UTRACE_EVENT_SIGNAL_ALL | UTRACE_EVENT(JCTL)); - if (child->parent != current) - return -ESRCH; + /* + * We always have to examine clone events to check for CLONE_PTRACE. + */ + flags |= UTRACE_EVENT(CLONE); - if (!kill) { - if (child->state != TASK_STOPPED) - return -ESRCH; - wait_task_inactive(child); + /* + * PTRACE_SETOPTIONS can request more events. + */ + if (state->options & PTRACE_O_TRACEEXIT) + flags |= UTRACE_EVENT(EXIT); + if (state->options & PTRACE_O_TRACEVFORKDONE) + flags |= UTRACE_EVENT(VFORK_DONE); + + /* + * ptrace always inhibits normal parent reaping. + * But for a corner case we sometimes see the REAP event anyway. + */ + flags |= UTRACE_ACTION_NOREAP | UTRACE_EVENT(REAP); + + if (from_stopped && !(flags & UTRACE_ACTION_QUIESCE)) { + /* + * We're letting the thread resume from ptrace stop. + * If SIGKILL is waking it up, it can be racing with us here + * to set its own exit_code in do_exit. Though we clobber + * it here, we check for the case in ptrace_report_death. + */ + if (!unlikely(target->flags & PF_SIGNALED)) + target->exit_code = 0; + + if (!state->have_eventmsg) + state->u.siginfo = NULL; + + if (target->state == TASK_STOPPED) { + /* + * We have to double-check for naughty de_thread + * reaping despite NOREAP, before we can get siglock. + */ + read_lock(&tasklist_lock); + if (!target->exit_state) { + spin_lock_irq(&target->sighand->siglock); + if (target->state == TASK_STOPPED) + target->signal->flags &= + ~SIGNAL_STOP_STOPPED; + spin_unlock_irq(&target->sighand->siglock); + } + read_unlock(&tasklist_lock); + } } - /* All systems go.. */ - return 0; + return utrace_set_flags(target, engine, flags); } -int ptrace_attach(struct task_struct *task) +static int ptrace_traceme(void) { + struct utrace_attached_engine *engine; + struct ptrace_state *state; + struct task_struct *parent; int retval; - task_lock(task); + + engine = utrace_attach(current, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0UL); + + if (IS_ERR(engine)) { + retval = PTR_ERR(engine); + if (retval == -EEXIST) + retval = -EPERM; + } + else { + /* + * We need to preallocate so that we can hold + * rcu_read_lock from extracting ->parent through + * ptrace_setup using it. + */ + state = kzalloc(sizeof *state, GFP_USER); + if (unlikely(state == NULL)) { + (void) utrace_detach(current, engine); + printk(KERN_ERR + "ptrace out of memory, lost child %d of %d", + current->pid, current->parent->pid); + return -ENOMEM; + } + + rcu_read_lock(); + parent = rcu_dereference(current->parent); + + task_lock(current); + retval = security_ptrace(parent, current); + task_unlock(current); + + if (retval) { + kfree(state); + (void) utrace_detach(current, engine); + } + else { + state = ptrace_setup(current, engine, parent, 0, 0, + state); + if (IS_ERR(state)) + retval = PTR_ERR(state); + } + rcu_read_unlock(); + + if (!retval) { + /* + * This can't fail because we can't die while we + * are here doing this. + */ + retval = ptrace_update(current, engine, 0, 0); + BUG_ON(retval); + } + else if (unlikely(retval == -EALREADY)) + /* + * We raced with our parent's exit, which would + * have detached us just after our attach if + * we'd won the race. Pretend we got attached + * and then detached immediately, no error. + */ + retval = 0; + } + + return retval; +} + +static int ptrace_attach(struct task_struct *task) +{ + struct utrace_attached_engine *engine; + struct ptrace_state *state; + int retval; + retval = -EPERM; if (task->pid <= 1) goto bad; - if (task == current) - goto bad; - if (!task->mm) + if (task->tgid == current->tgid) goto bad; - if(((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + if (!task->mm) /* kernel threads */ goto bad; - rmb(); - if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) - goto bad; - /* the same process cannot be attached many times */ - if (task->ptrace & PT_PTRACED) + + pr_debug("%d ptrace_attach %d state %lu exit_code %x\n", + current->pid, task->pid, task->state, task->exit_code); + + engine = utrace_attach(task, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0); + if (IS_ERR(engine)) { + retval = PTR_ERR(engine); + if (retval == -EEXIST) + retval = -EPERM; goto bad; - retval = security_ptrace(current, task); + } + + pr_debug("%d ptrace_attach %d after utrace_attach: %lu exit_code %x\n", + current->pid, task->pid, task->state, task->exit_code); + + if (ptrace_may_attach(task)) { + state = ptrace_setup(task, engine, current, 0, + capable(CAP_SYS_PTRACE), NULL); + if (IS_ERR(state)) + retval = PTR_ERR(state); + else { + retval = ptrace_update(task, engine, 0, 0); + + pr_debug("%d ptrace_attach %d after ptrace_update (%d)" + " %lu exit_code %x\n", + current->pid, task->pid, retval, + task->state, task->exit_code); + + if (retval) { + /* + * It died before we enabled any callbacks. + */ + if (retval == -EALREADY) + retval = -ESRCH; + BUG_ON(retval != -ESRCH); + ptrace_state_unlink(state); + ptrace_done(state); + } + } + } if (retval) - goto bad; + (void) utrace_detach(task, engine); + else { + int stopped = 0; - /* Go */ - task->ptrace |= PT_PTRACED; - if (capable(CAP_SYS_PTRACE)) - task->ptrace |= PT_PTRACE_CAP; - task_unlock(task); + /* + * We must double-check that task has not just died and + * been reaped (after ptrace_update succeeded). + * This happens when exec (de_thread) ignores NOREAP. + * We cannot call into the signal code if it's dead. + */ + read_lock(&tasklist_lock); + if (likely(!task->exit_state)) { + force_sig_specific(SIGSTOP, task); - write_lock_irq(&tasklist_lock); - __ptrace_link(task, current); - write_unlock_irq(&tasklist_lock); + spin_lock_irq(&task->sighand->siglock); + stopped = (task->state == TASK_STOPPED); + spin_unlock_irq(&task->sighand->siglock); + } + read_unlock(&tasklist_lock); - force_sig_specific(SIGSTOP, task); - return 0; + if (stopped) { + const struct utrace_regset *regset; + + /* + * Set QUIESCE immediately, so we can allow + * ptrace requests while he's in TASK_STOPPED. + */ + retval = ptrace_update(task, engine, + UTRACE_ACTION_QUIESCE, 0); + if (retval) + BUG_ON(retval != -ESRCH); + retval = 0; + + /* + * Do now the regset 0 writeback that we do on every + * stop, since it's never been done. On register + * window machines, this makes sure the user memory + * backing the register data is up to date. + */ + regset = utrace_regset(task, engine, + utrace_native_view(task), 0); + if (regset->writeback) + (*regset->writeback)(task, regset, 1); + } + + pr_debug("%d ptrace_attach %d complete (%sstopped)" + " state %lu code %x", + current->pid, task->pid, stopped ? "" : "not ", + task->state, task->exit_code); + } bad: - task_unlock(task); return retval; } -int ptrace_detach(struct task_struct *child, unsigned int data) +/* + * The task might be dying or being reaped in parallel, in which case + * engine and state may no longer be valid. utrace_detach checks for us. + */ +static int ptrace_detach(struct task_struct *task, + struct utrace_attached_engine *engine, + struct ptrace_state *state) { - if ((unsigned long) data > _NSIG) - return -EIO; - /* Architecture-specific hardware disable .. */ - ptrace_disable(child); + int error; - /* .. re-parent .. */ - child->exit_code = data; +#ifdef HAVE_ARCH_PTRACE_DETACH + /* + * Some funky compatibility code in arch_ptrace may have + * needed to install special state it should clean up now. + */ + arch_ptrace_detach(task); +#endif + + /* + * Traditional ptrace behavior does wake_up_process no matter what + * in ptrace_detach. But utrace_detach will not do a wakeup if + * it's in a proper job control stop. We need it to wake up from + * TASK_STOPPED and either resume or process more signals. A + * pending stop signal will just leave it stopped again, but will + * consume the signal, and reset task->exit_code for the next wait + * call to see. This is important to userland if ptrace_do_wait + * "stole" the previous unwaited-for-ness (clearing exit_code), but + * there is a pending SIGSTOP, e.g. sent by a PTRACE_ATTACH done + * while already in job control stop. + */ + read_lock(&tasklist_lock); + if (likely(task->signal != NULL)) { + spin_lock_irq(&task->sighand->siglock); + task->signal->flags &= ~SIGNAL_STOP_STOPPED; + spin_unlock_irq(&task->sighand->siglock); + } + read_unlock(&tasklist_lock); - write_lock_irq(&tasklist_lock); - __ptrace_unlink(child); - /* .. and wake it up. */ - if (child->state != TASK_ZOMBIE) - wake_up_process(child); - write_unlock_irq(&tasklist_lock); + error = utrace_detach(task, engine); + if (!error) { + /* + * We can only get here from the ptracer itself or via + * detach_zombie from another thread in its group. + */ + BUG_ON(state->parent->tgid != current->tgid); + ptrace_state_unlink(state); + ptrace_done(state); - return 0; + /* + * Wake up any other threads that might be blocked in + * wait. Though traditional ptrace does not guarantee + * this wakeup on PTRACE_DETACH, it does prevent + * erroneous blocking in wait when another racing + * thread's wait call reap-detaches the last child. + * Without this wakeup, another thread might stay + * blocked when it should return -ECHILD. + */ + spin_lock_irq(¤t->sighand->siglock); + wake_up_interruptible(¤t->signal->wait_chldexit); + spin_unlock_irq(¤t->sighand->siglock); + } + return error; } + /* - * Access another process' address space. - * Source/target buffer must be kernel space, - * Do not walk the page table directly, use get_user_pages + * This is called when we are exiting. We must stop all our ptracing. */ +void +ptrace_exit(struct task_struct *tsk) +{ + struct list_head *pos, *n; -int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) + /* + * Taking the task_lock after PF_EXITING is set ensures that a + * child in ptrace_traceme will not put itself on our list when + * we might already be tearing it down. + */ + task_lock(tsk); + if (likely(list_empty(&tsk->ptracees))) { + task_unlock(tsk); + return; + } + task_unlock(tsk); + +restart: + rcu_read_lock(); + + list_for_each_safe_rcu(pos, n, &tsk->ptracees) { + struct ptrace_state *state = list_entry(pos, + struct ptrace_state, + entry); + int error = utrace_detach(state->task, state->engine); + BUG_ON(state->parent != tsk); + if (likely(error == 0)) { + ptrace_state_unlink(state); + ptrace_done(state); + } + else if (unlikely(error == -EALREADY)) { + /* + * It's still doing report_death callbacks. + * Just wait for it to settle down. + * Since wait_task_inactive might yield, + * we must go out of rcu_read_lock and restart. + */ + struct task_struct *p = state->task; + get_task_struct(p); + rcu_read_unlock(); + wait_task_inactive(p); + put_task_struct(p); + goto restart; + } + else + BUG_ON(error != -ESRCH); + } + + rcu_read_unlock(); + + BUG_ON(!list_empty(&tsk->ptracees)); +} + +static int +ptrace_induce_signal(struct task_struct *target, + struct utrace_attached_engine *engine, + long signr) { - struct mm_struct *mm; - struct vm_area_struct *vma; - struct page *page; - void *old_buf = buf; + struct ptrace_state *state = (struct ptrace_state *) engine->data; - mm = get_task_mm(tsk); - if (!mm) + if (signr == 0) return 0; - down_read(&mm->mmap_sem); - /* ignore errors, just check how much was sucessfully transfered */ - while (len) { - int bytes, ret, offset; - void *maddr; + if (!valid_signal(signr)) + return -EIO; - ret = get_user_pages(tsk, mm, addr, 1, - write, 1, &page, &vma); - if (ret <= 0) - break; + if (state->syscall) { + /* + * This is the traditional ptrace behavior when given + * a signal to resume from a syscall tracing stop. + */ + send_sig(signr, target, 1); + } + else if (!state->have_eventmsg && state->u.siginfo) { + siginfo_t *info = state->u.siginfo; - bytes = len; - offset = addr & (PAGE_SIZE-1); - if (bytes > PAGE_SIZE-offset) - bytes = PAGE_SIZE-offset; - - flush_cache_page(vma, addr); - - maddr = kmap(page); - if (write) { - copy_to_user_page(vma, page, addr, - maddr + offset, buf, bytes); - set_page_dirty_lock(page); - } else { - copy_from_user_page(vma, page, addr, - buf, maddr + offset, bytes); + /* Update the siginfo structure if the signal has + changed. If the debugger wanted something + specific in the siginfo structure then it should + have updated *info via PTRACE_SETSIGINFO. */ + if (signr != info->si_signo) { + info->si_signo = signr; + info->si_errno = 0; + info->si_code = SI_USER; + info->si_pid = current->pid; + info->si_uid = current->uid; } - kunmap(page); - page_cache_release(page); - len -= bytes; - buf += bytes; - addr += bytes; + + return utrace_inject_signal(target, engine, + UTRACE_ACTION_RESUME, info, NULL); } - up_read(&mm->mmap_sem); - mmput(mm); - - return buf - old_buf; + + return 0; } -int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) +int +ptrace_regset_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long offset, unsigned int size, + void __user *data, int write) { - int copied = 0; + const struct utrace_regset *regset = utrace_regset(target, engine, + view, setno); + int ret; - while (len > 0) { - char buf[128]; - int this_len, retval; + if (unlikely(regset == NULL)) + return -EIO; - this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, 0); - if (!retval) { - if (copied) - break; - return -EIO; - } - if (copy_to_user(dst, buf, retval)) - return -EFAULT; - copied += retval; - src += retval; - dst += retval; - len -= retval; + if (size == (unsigned int) -1) + size = regset->size * regset->n; + + if (write) { + if (!access_ok(VERIFY_READ, data, size)) + ret = -EIO; + else + ret = (*regset->set)(target, regset, + offset, size, NULL, data); + } + else { + if (!access_ok(VERIFY_WRITE, data, size)) + ret = -EIO; + else + ret = (*regset->get)(target, regset, + offset, size, NULL, data); } - return copied; + + return ret; } -int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len) +int +ptrace_onereg_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long regno, + void __user *data, int write) { - int copied = 0; + const struct utrace_regset *regset = utrace_regset(target, engine, + view, setno); + unsigned int pos; + int ret; - while (len > 0) { - char buf[128]; - int this_len, retval; + if (unlikely(regset == NULL)) + return -EIO; - this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - if (copy_from_user(buf, src, this_len)) - return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, 1); - if (!retval) { - if (copied) - break; - return -EIO; - } - copied += retval; - src += retval; - dst += retval; - len -= retval; + if (regno < regset->bias || regno >= regset->bias + regset->n) + return -EINVAL; + + pos = (regno - regset->bias) * regset->size; + + if (write) { + if (!access_ok(VERIFY_READ, data, regset->size)) + ret = -EIO; + else + ret = (*regset->set)(target, regset, pos, regset->size, + NULL, data); + } + else { + if (!access_ok(VERIFY_WRITE, data, regset->size)) + ret = -EIO; + else + ret = (*regset->get)(target, regset, pos, regset->size, + NULL, data); } - return copied; + + return ret; } -static int ptrace_setoptions(struct task_struct *child, long data) +int +ptrace_layout_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + const struct ptrace_layout_segment layout[], + unsigned long addr, unsigned int size, + void __user *udata, void *kdata, int write) { - child->ptrace &= ~PT_TRACE_MASK; + const struct ptrace_layout_segment *seg; + int ret = -EIO; - if (data & PTRACE_O_TRACESYSGOOD) - child->ptrace |= PT_TRACESYSGOOD; + if (kdata == NULL && + !access_ok(write ? VERIFY_READ : VERIFY_WRITE, udata, size)) + return -EIO; - if (data & PTRACE_O_TRACEFORK) - child->ptrace |= PT_TRACE_FORK; + seg = layout; + do { + unsigned int pos, n; - if (data & PTRACE_O_TRACEVFORK) - child->ptrace |= PT_TRACE_VFORK; + while (addr >= seg->end && seg->end != 0) + ++seg; - if (data & PTRACE_O_TRACECLONE) - child->ptrace |= PT_TRACE_CLONE; + if (addr < seg->start || addr >= seg->end) + return -EIO; - if (data & PTRACE_O_TRACEEXEC) - child->ptrace |= PT_TRACE_EXEC; + pos = addr - seg->start + seg->offset; + n = min(size, seg->end - (unsigned int) addr); - if (data & PTRACE_O_TRACEVFORKDONE) - child->ptrace |= PT_TRACE_VFORK_DONE; + if (unlikely(seg->regset == (unsigned int) -1)) { + /* + * This is a no-op/zero-fill portion of struct user. + */ + ret = 0; + if (!write && seg->offset == 0) { + if (kdata) + memset(kdata, 0, n); + else if (clear_user(udata, n)) + ret = -EFAULT; + } + } + else { + unsigned int align; + const struct utrace_regset *regset = utrace_regset( + target, engine, view, seg->regset); + if (unlikely(regset == NULL)) + return -EIO; - if (data & PTRACE_O_TRACEEXIT) - child->ptrace |= PT_TRACE_EXIT; + /* + * A ptrace compatibility layout can do a misaligned + * regset access, e.g. word access to larger data. + * An arch's compat layout can be this way only if + * it is actually ok with the regset code despite the + * regset->align setting. + */ + align = min(regset->align, size); + if ((pos & (align - 1)) + || pos >= regset->n * regset->size) + return -EIO; - return (data & ~PTRACE_O_MASK) ? -EINVAL : 0; -} + if (write) + ret = (*regset->set)(target, regset, + pos, n, kdata, udata); + else + ret = (*regset->get)(target, regset, + pos, n, kdata, udata); + } -static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data) -{ - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_to_user(data, child->last_siginfo); + if (kdata) + kdata += n; + else + udata += n; + addr += n; + size -= n; + } while (ret == 0 && size > 0); + + return ret; } -static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data) + +static int +ptrace_start(long pid, long request, + struct task_struct **childp, + struct utrace_attached_engine **enginep, + struct ptrace_state **statep) + { - if (child->last_siginfo == NULL) - return -EINVAL; - if (copy_from_user(child->last_siginfo, data, sizeof (siginfo_t)) != 0) - return -EFAULT; - return 0; + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + int ret; + + if (request == PTRACE_TRACEME) + return ptrace_traceme(); + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + pr_debug("ptrace pid %ld => %p\n", pid, child); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + ret = -EPERM; + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + rcu_read_lock(); + engine = utrace_attach(child, UTRACE_ATTACH_MATCH_OPS, + &ptrace_utrace_ops, 0); + ret = -ESRCH; + if (IS_ERR(engine) || engine == NULL) + goto out_tsk_rcu; + state = rcu_dereference((struct ptrace_state *) engine->data); + if (state == NULL || state->parent != current) + goto out_tsk_rcu; + rcu_read_unlock(); + + /* + * Traditional ptrace behavior demands that the target already be + * quiescent, but not dead. + */ + if (request != PTRACE_KILL + && !(engine->flags & UTRACE_ACTION_QUIESCE)) { + pr_debug("%d not stopped (%lu)\n", child->pid, child->state); + goto out_tsk; + } + + /* + * We do this for all requests to match traditional ptrace behavior. + * If the machine state synchronization done at context switch time + * includes e.g. writing back to user memory, we want to make sure + * that has finished before a PTRACE_PEEKDATA can fetch the results. + * On most machines, only regset data is affected by context switch + * and calling utrace_regset later on will take care of that, so + * this is superfluous. + * + * To do this purely in utrace terms, we could do: + * (void) utrace_regset(child, engine, utrace_native_view(child), 0); + */ + wait_task_inactive(child); + + if (child->exit_state) + goto out_tsk; + + *childp = child; + *enginep = engine; + *statep = state; + return -EIO; + +out_tsk_rcu: + rcu_read_unlock(); +out_tsk: + put_task_struct(child); +out: + return ret; } -int ptrace_request(struct task_struct *child, long request, - long addr, long data) +static int +ptrace_common(long request, struct task_struct *child, + struct utrace_attached_engine *engine, + struct ptrace_state *state, + unsigned long addr, long data) { + unsigned long flags; int ret = -EIO; switch (request) { + case PTRACE_DETACH: + /* + * Detach a process that was attached. + */ + ret = ptrace_induce_signal(child, engine, data); + if (!ret) { + ret = ptrace_detach(child, engine, state); + if (ret == -EALREADY) /* Already a zombie. */ + ret = -ESRCH; + if (ret) + BUG_ON(ret != -ESRCH); + } + break; + + /* + * These are the operations that resume the child running. + */ + case PTRACE_KILL: + data = SIGKILL; + case PTRACE_CONT: + case PTRACE_SYSCALL: +#ifdef PTRACE_SYSEMU + case PTRACE_SYSEMU: + case PTRACE_SYSEMU_SINGLESTEP: +#endif +#ifdef PTRACE_SINGLEBLOCK + case PTRACE_SINGLEBLOCK: +# ifdef ARCH_HAS_BLOCK_STEP + if (! ARCH_HAS_BLOCK_STEP) +# endif + if (request == PTRACE_SINGLEBLOCK) + break; +#endif + case PTRACE_SINGLESTEP: +#ifdef ARCH_HAS_SINGLE_STEP + if (! ARCH_HAS_SINGLE_STEP) +#endif + if (request == PTRACE_SINGLESTEP +#ifdef PTRACE_SYSEMU_SINGLESTEP + || request == PTRACE_SYSEMU_SINGLESTEP +#endif + ) + break; + + ret = ptrace_induce_signal(child, engine, data); + if (ret) + break; + + + /* + * Reset the action flags without QUIESCE, so it resumes. + */ + flags = 0; +#ifdef PTRACE_SYSEMU + state->sysemu = (request == PTRACE_SYSEMU_SINGLESTEP + || request == PTRACE_SYSEMU); +#endif + if (request == PTRACE_SINGLESTEP +#ifdef PTRACE_SYSEMU + || request == PTRACE_SYSEMU_SINGLESTEP +#endif + ) + flags |= UTRACE_ACTION_SINGLESTEP; +#ifdef PTRACE_SINGLEBLOCK + else if (request == PTRACE_SINGLEBLOCK) + flags |= UTRACE_ACTION_BLOCKSTEP; +#endif + if (request == PTRACE_SYSCALL) + flags |= UTRACE_EVENT_SYSCALL; +#ifdef PTRACE_SYSEMU + else if (request == PTRACE_SYSEMU + || request == PTRACE_SYSEMU_SINGLESTEP) + flags |= UTRACE_EVENT(SYSCALL_ENTRY); +#endif + ret = ptrace_update(child, engine, flags, 1); + if (ret) + BUG_ON(ret != -ESRCH); + ret = 0; + break; + #ifdef PTRACE_OLDSETOPTIONS case PTRACE_OLDSETOPTIONS: #endif case PTRACE_SETOPTIONS: - ret = ptrace_setoptions(child, data); + ret = -EINVAL; + if (data & ~PTRACE_O_MASK) + break; + state->options = data; + ret = ptrace_update(child, engine, UTRACE_ACTION_QUIESCE, 1); + if (ret) + BUG_ON(ret != -ESRCH); + ret = 0; break; + } + + return ret; +} + + +asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + long ret, val; + + pr_debug("%d sys_ptrace(%ld, %ld, %lx, %lx)\n", + current->pid, request, pid, addr, data); + + ret = ptrace_start(pid, request, &child, &engine, &state); + if (ret != -EIO) + goto out; + + val = 0; + ret = arch_ptrace(&request, child, engine, addr, data, &val); + if (ret != -ENOSYS) { + if (ret == 0) { + ret = val; + force_successful_syscall_return(); + } + goto out_tsk; + } + + switch (request) { + default: + ret = ptrace_common(request, child, engine, state, addr, data); + break; + + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (unsigned long __user *) data); + break; + } + + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) + break; + ret = -EIO; + break; + case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, (unsigned long __user *) data); + ret = put_user(state->have_eventmsg + ? state->u.eventmsg : 0L, + (unsigned long __user *) data); break; case PTRACE_GETSIGINFO: - ret = ptrace_getsiginfo(child, (siginfo_t __user *) data); + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) + ret = copy_siginfo_to_user((siginfo_t __user *) data, + state->u.siginfo); break; case PTRACE_SETSIGINFO: - ret = ptrace_setsiginfo(child, (siginfo_t __user *) data); + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) { + ret = 0; + if (copy_from_user(state->u.siginfo, + (siginfo_t __user *) data, + sizeof(siginfo_t))) + ret = -EFAULT; + } break; + } + +out_tsk: + put_task_struct(child); +out: + pr_debug("%d ptrace -> %lx\n", current->pid, ret); + return ret; +} + + +#ifdef CONFIG_COMPAT +#include + +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_ulong_t addr, compat_long_t cdata) +{ + const unsigned long data = (unsigned long) (compat_ulong_t) cdata; + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + compat_long_t ret, val; + + pr_debug("%d compat_sys_ptrace(%d, %d, %x, %x)\n", + current->pid, request, pid, addr, cdata); + ret = ptrace_start(pid, request, &child, &engine, &state); + if (ret != -EIO) + goto out; + + val = 0; + ret = arch_compat_ptrace(&request, child, engine, addr, cdata, &val); + if (ret != -ENOSYS) { + if (ret == 0) { + ret = val; + force_successful_syscall_return(); + } + goto out_tsk; + } + + switch (request) { default: + ret = ptrace_common(request, child, engine, state, addr, data); + break; + + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + compat_ulong_t tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (compat_ulong_t __user *) data); break; } + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &cdata, sizeof(cdata), 1) == sizeof(cdata)) + break; + ret = -EIO; + break; + + case PTRACE_GETEVENTMSG: + ret = put_user(state->have_eventmsg + ? state->u.eventmsg : 0L, + (compat_long_t __user *) data); + break; + case PTRACE_GETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) + ret = copy_siginfo_to_user32( + (struct compat_siginfo __user *) data, + state->u.siginfo); + break; + case PTRACE_SETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo + && copy_siginfo_from_user32( + state->u.siginfo, + (struct compat_siginfo __user *) data)) + ret = -EFAULT; + break; + } + +out_tsk: + put_task_struct(child); +out: + pr_debug("%d ptrace -> %lx\n", current->pid, (long)ret); return ret; } +#endif + + +/* + * Detach the zombie being reported for wait. + */ +static inline void +detach_zombie(struct task_struct *tsk, + struct task_struct *p, struct ptrace_state *state) +{ + int detach_error; + struct utrace_attached_engine *engine; + +restart: + detach_error = 0; + rcu_read_lock(); + if (tsk == current) + engine = state->engine; + else { + /* + * We've excluded other ptrace_do_wait calls. But the + * ptracer itself might have done ptrace_detach while we + * did not have rcu_read_lock. So double-check that state + * is still valid. + */ + engine = utrace_attach( + p, (UTRACE_ATTACH_MATCH_OPS + | UTRACE_ATTACH_MATCH_DATA), + &ptrace_utrace_ops, + (unsigned long) state); + if (IS_ERR(engine) || state->parent != tsk) + detach_error = -ESRCH; + else + BUG_ON(state->engine != engine); + } + rcu_read_unlock(); + if (likely(!detach_error)) + detach_error = ptrace_detach(p, engine, state); + if (unlikely(detach_error == -EALREADY)) { + /* + * It's still doing report_death callbacks. + * Just wait for it to settle down. + */ + wait_task_inactive(p); /* Might block. */ + goto restart; + } + /* + * A failure with -ESRCH means that report_reap is + * already running and will do the cleanup, or that + * we lost a race with ptrace_detach in another + * thread or with the automatic detach in + * report_death. + */ + if (detach_error) + BUG_ON(detach_error != -ESRCH); +} + +/* + * We're called with tasklist_lock held for reading. + * If we return -ECHILD or zero, next_thread(tsk) must still be valid to use. + * If we return another error code, or a successful PID value, we + * release tasklist_lock first. + */ +int +ptrace_do_wait(struct task_struct *tsk, + pid_t pid, int options, struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *rusagep) +{ + struct ptrace_state *state; + struct task_struct *p; + int err = -ECHILD; + int exit_code, why, status; + + rcu_read_lock(); + list_for_each_entry_rcu(state, &tsk->ptracees, entry) { + p = state->task; + + if (pid > 0) { + if (p->pid != pid) + continue; + } else if (!pid) { + if (process_group(p) != process_group(current)) + continue; + } else if (pid != -1) { + if (process_group(p) != -pid) + continue; + } + if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) + && !(options & __WALL)) + continue; + if (security_task_wait(p)) + continue; + + /* + * This is a matching child. If we don't win now, tell + * our caller to block and repeat. From this point we + * must ensure that wait_chldexit will get a wakeup for + * any tracee stopping, dying, or being detached. + * For death, tasklist_lock guarantees this already. + */ + err = 0; + + switch (p->exit_state) { + case EXIT_ZOMBIE: + if (!likely(options & WEXITED)) + continue; + if (delay_group_leader(p)) { + struct task_struct *next = next_thread(p); + pr_debug("%d ptrace_do_wait leaving %d " + "zombie code %x " + "delay_group_leader (%d/%lu)\n", + current->pid, p->pid, p->exit_code, + next->pid, next->state); + continue; + } + exit_code = p->exit_code; + goto found; + case EXIT_DEAD: + continue; + default: + /* + * tasklist_lock holds up any transitions to + * EXIT_ZOMBIE. After releasing it we are + * guaranteed a wakeup on wait_chldexit after + * any new deaths. + */ + if (p->flags & PF_EXITING) + /* + * It's in do_exit and might have set + * p->exit_code already, but it's not quite + * dead yet. It will get to report_death + * and wakes us up when it finishes. + */ + continue; + break; + } + + /* + * This xchg atomically ensures that only one do_wait + * call can report this thread. Because exit_code is + * always set before do_notify wakes us up, after this + * check fails we are sure to get a wakeup if it stops. + */ + exit_code = xchg(&p->exit_code, 0); + if (exit_code) + goto found; + + // XXX should handle WCONTINUED + + pr_debug("%d ptrace_do_wait leaving %d state %lu code %x\n", + current->pid, p->pid, p->state, p->exit_code); + } + rcu_read_unlock(); + if (err == 0) + pr_debug("%d ptrace_do_wait blocking\n", current->pid); + + return err; + +found: + BUG_ON(state->parent != tsk); + rcu_read_unlock(); + + pr_debug("%d ptrace_do_wait (%d) found %d code %x (%lu/%d)\n", + current->pid, tsk->pid, p->pid, exit_code, + p->exit_state, p->exit_signal); + + /* + * If there was a group exit in progress, all threads report that + * status. Most will have SIGKILL in their own exit_code. + */ + if (p->signal->flags & SIGNAL_GROUP_EXIT) + exit_code = p->signal->group_exit_code; + + if (p->exit_state) { + if (unlikely(p->parent == tsk && p->exit_signal != -1)) + /* + * This is our natural child we were ptracing. + * When it dies it detaches (see ptrace_report_death). + * So we're seeing it here in a race. When it + * finishes detaching it will become reapable in + * the normal wait_task_zombie path instead. + */ + return 0; + if ((exit_code & 0x7f) == 0) { + why = CLD_EXITED; + status = exit_code >> 8; + } + else { + why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; + status = exit_code & 0x7f; + } + } + else { + why = CLD_TRAPPED; + status = exit_code; + exit_code = (status << 8) | 0x7f; + } + + /* + * At this point we are committed to a successful return + * or a user error return. Release the tasklist_lock. + */ + get_task_struct(p); + read_unlock(&tasklist_lock); + + if (rusagep) + err = getrusage(p, RUSAGE_BOTH, rusagep); + if (infop) { + if (!err) + err = put_user(SIGCHLD, &infop->si_signo); + if (!err) + err = put_user(0, &infop->si_errno); + if (!err) + err = put_user((short)why, &infop->si_code); + if (!err) + err = put_user(p->pid, &infop->si_pid); + if (!err) + err = put_user(p->uid, &infop->si_uid); + if (!err) + err = put_user(status, &infop->si_status); + } + if (!err && stat_addr) + err = put_user(exit_code, stat_addr); + + if (!err) { + if (why != CLD_TRAPPED) + /* + * This was a death report. The ptracer's wait + * does an implicit detach, so the zombie reports + * to its real parent now. + */ + detach_zombie(tsk, p, state); + err = p->pid; + } + + put_task_struct(p); + + return err; +} + + +/* + * All the report callbacks (except death and reap) are subject to a race + * with ptrace_exit doing a quick detach and ptrace_done. It can do this + * even when the target is not quiescent, so a callback may already be in + * progress when it does ptrace_done. Callbacks use this function to fetch + * the struct ptrace_state while ensuring it doesn't disappear until + * put_ptrace_state is called. This just uses RCU, since state and + * anything we try to do to state->parent is safe under rcu_read_lock. + */ +static struct ptrace_state * +get_ptrace_state(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct ptrace_state *state; + + rcu_read_lock(); + state = rcu_dereference((struct ptrace_state *) engine->data); + if (likely(state != NULL)) + return state; + + rcu_read_unlock(); + return NULL; +} + +static inline void +put_ptrace_state(struct ptrace_state *state) +{ + rcu_read_unlock(); +} + + +static void +do_notify(struct task_struct *tsk, struct task_struct *parent, int why) +{ + struct siginfo info; + unsigned long flags; + struct sighand_struct *sighand; + int sa_mask; + + info.si_signo = SIGCHLD; + info.si_errno = 0; + info.si_pid = tsk->pid; + info.si_uid = tsk->uid; + + /* FIXME: find out whether or not this is supposed to be c*time. */ + info.si_utime = cputime_to_jiffies(tsk->utime); + info.si_stime = cputime_to_jiffies(tsk->stime); + + sa_mask = SA_NOCLDSTOP; + info.si_code = why; + info.si_status = tsk->exit_code & 0x7f; + if (why == CLD_CONTINUED) + info.si_status = SIGCONT; + else if (why == CLD_STOPPED) + info.si_status = tsk->signal->group_exit_code & 0x7f; + else if (why == CLD_EXITED) { + sa_mask = SA_NOCLDWAIT; + if (tsk->exit_code & 0x80) + info.si_code = CLD_DUMPED; + else if (tsk->exit_code & 0x7f) + info.si_code = CLD_KILLED; + else { + info.si_code = CLD_EXITED; + info.si_status = tsk->exit_code >> 8; + } + } + + read_lock(&tasklist_lock); + if (unlikely(parent->signal == NULL)) + goto out; + + sighand = parent->sighand; + spin_lock_irqsave(&sighand->siglock, flags); + if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && + !(sighand->action[SIGCHLD-1].sa.sa_flags & sa_mask)) + __group_send_sig_info(SIGCHLD, &info, parent); + /* + * Even if SIGCHLD is not generated, we must wake up wait4 calls. + */ + wake_up_interruptible_sync(&parent->signal->wait_chldexit); + spin_unlock_irqrestore(&sighand->siglock, flags); + +out: + read_unlock(&tasklist_lock); +} + +static u32 +ptrace_report(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct ptrace_state *state, + int code) +{ + const struct utrace_regset *regset; + + pr_debug("%d ptrace_report %d engine %p" + " state %p code %x parent %d (%p)\n", + current->pid, tsk->pid, engine, state, code, + state->parent->pid, state->parent); + if (!state->have_eventmsg && state->u.siginfo) { + const siginfo_t *si = state->u.siginfo; + pr_debug(" si %d code %x errno %d addr %p\n", + si->si_signo, si->si_code, si->si_errno, + si->si_addr); + } + + /* + * Set our QUIESCE flag right now, before notifying the tracer. + * We do this before setting tsk->exit_code rather than + * by using UTRACE_ACTION_NEWSTATE in our return value, to + * ensure that the tracer can't get the notification and then + * try to resume us with PTRACE_CONT before we set the flag. + */ + utrace_set_flags(tsk, engine, engine->flags | UTRACE_ACTION_QUIESCE); + + /* + * If regset 0 has a writeback call, do it now. On register window + * machines, this makes sure the user memory backing the register + * data is up to date by the time wait_task_inactive returns to + * ptrace_start in our tracer doing a PTRACE_PEEKDATA or the like. + */ + regset = utrace_regset(tsk, engine, utrace_native_view(tsk), 0); + if (regset->writeback) + (*regset->writeback)(tsk, regset, 0); + + BUG_ON(code == 0); + tsk->exit_code = code; + do_notify(tsk, state->parent, CLD_TRAPPED); + + pr_debug("%d ptrace_report quiescing exit_code %x\n", + current->pid, current->exit_code); + + put_ptrace_state(state); + + return UTRACE_ACTION_RESUME; +} -void ptrace_notify(int exit_code) +static inline u32 +ptrace_event(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct ptrace_state *state, + int event) { - BUG_ON (!(current->ptrace & PT_PTRACED)); + state->syscall = 0; + return ptrace_report(engine, tsk, state, (event << 8) | SIGTRAP); +} - /* Let the debugger run. */ - current->exit_code = exit_code; - set_current_state(TASK_STOPPED); - notify_parent(current, SIGCHLD); - schedule(); +/* + * Unlike other report callbacks, this can't be called while ptrace_exit + * is doing ptrace_done in parallel, so we don't need get_ptrace_state. + */ +static u32 +ptrace_report_death(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct ptrace_state *state = (struct ptrace_state *) engine->data; + + if (tsk->exit_code == 0 && unlikely(tsk->flags & PF_SIGNALED)) + /* + * This can only mean that tsk->exit_code was clobbered + * by ptrace_update or ptrace_do_wait in a race with + * an asynchronous wakeup and exit for SIGKILL. + */ + tsk->exit_code = SIGKILL; + + if (tsk->parent == state->parent && tsk->exit_signal != -1) { + /* + * This is a natural child (excluding clone siblings of a + * child group_leader), so we detach and let the normal + * reporting happen once our NOREAP action is gone. But + * first, generate a SIGCHLD for those cases where normal + * behavior won't. A ptrace'd child always generates SIGCHLD. + */ + pr_debug("ptrace %d death natural parent %d exit_code %x\n", + tsk->pid, state->parent->pid, tsk->exit_code); + if (!thread_group_empty(tsk)) + do_notify(tsk, state->parent, CLD_EXITED); + ptrace_state_unlink(state); + rcu_assign_pointer(engine->data, 0UL); + ptrace_done(state); + return UTRACE_ACTION_DETACH; + } /* - * Signals sent while we were stopped might set TIF_SIGPENDING. + * This might be a second report_death callback for a group leader + * that was delayed when its original report_death callback was made. + * Repeating do_notify is exactly what we need for that case too. + * After the wakeup, ptrace_do_wait will see delay_group_leader false. */ - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + pr_debug("ptrace %d death notify %d exit_code %x: ", + tsk->pid, state->parent->pid, tsk->exit_code); + do_notify(tsk, state->parent, CLD_EXITED); + pr_debug("%d notified %d\n", tsk->pid, state->parent->pid); + return UTRACE_ACTION_RESUME; +} + +/* + * We get this only in the case where our UTRACE_ACTION_NOREAP was ignored. + * That happens solely when a non-leader exec reaps the old leader. + */ +static void +ptrace_report_reap(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (state != NULL) { + ptrace_state_unlink(state); + rcu_assign_pointer(engine->data, 0UL); + ptrace_done(state); + put_ptrace_state(state); + } +} + +/* + * Start tracing the child. This has to do put_ptrace_state before it can + * do allocation that might block. + */ +static void +ptrace_clone_setup(struct utrace_attached_engine *engine, + struct task_struct *parent, + struct ptrace_state *state, + struct task_struct *child) +{ + struct task_struct *tracer; + struct utrace_attached_engine *child_engine; + struct ptrace_state *child_state; + int ret; + u8 options; + int cap_sys_ptrace; + + tracer = state->parent; + options = state->options; + cap_sys_ptrace = state->cap_sys_ptrace; + get_task_struct(tracer); + put_ptrace_state(state); + + child_engine = utrace_attach(child, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0UL); + if (unlikely(IS_ERR(child_engine))) { + BUG_ON(PTR_ERR(child_engine) != -ENOMEM); + put_task_struct(tracer); + goto nomem; + } + + child_state = ptrace_setup(child, child_engine, + tracer, options, cap_sys_ptrace, NULL); + + put_task_struct(tracer); + + if (unlikely(IS_ERR(child_state))) { + (void) utrace_detach(child, child_engine); + + if (PTR_ERR(child_state) == -ENOMEM) + goto nomem; + + /* + * Our tracer has started exiting. It's + * too late to set it up tracing the child. + */ + BUG_ON(PTR_ERR(child_state) != -EALREADY); + } + else { + sigaddset(&child->pending.signal, SIGSTOP); + set_tsk_thread_flag(child, TIF_SIGPENDING); + ret = ptrace_update(child, child_engine, 0, 0); + + /* + * The child hasn't run yet, it can't have died already. + */ + BUG_ON(ret); + } + + return; + +nomem: + printk(KERN_ERR "ptrace out of memory, lost child %d of %d", + child->pid, parent->pid); +} + +static u32 +ptrace_report_clone(struct utrace_attached_engine *engine, + struct task_struct *parent, + unsigned long clone_flags, struct task_struct *child) +{ + int event, option; + struct ptrace_state *state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + pr_debug("%d (%p) engine %p" + " ptrace_report_clone child %d (%p) fl %lx\n", + parent->pid, parent, engine, child->pid, child, clone_flags); + + event = PTRACE_EVENT_FORK; + option = PTRACE_O_TRACEFORK; + if (clone_flags & CLONE_VFORK) { + event = PTRACE_EVENT_VFORK; + option = PTRACE_O_TRACEVFORK; + } + else if ((clone_flags & CSIGNAL) != SIGCHLD) { + event = PTRACE_EVENT_CLONE; + option = PTRACE_O_TRACECLONE; + } + + if (state->options & option) { + state->have_eventmsg = 1; + state->u.eventmsg = child->pid; + } + else + event = 0; + + if (!(clone_flags & CLONE_UNTRACED) + && (event || (clone_flags & CLONE_PTRACE))) { + /* + * Have our tracer start following the child too. + */ + ptrace_clone_setup(engine, parent, state, child); + + /* + * That did put_ptrace_state, so we have to check + * again in case our tracer just started exiting. + */ + state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + } + + if (event) + return ptrace_event(engine, parent, state, event); + + put_ptrace_state(state); + + return UTRACE_ACTION_RESUME; +} + + +static u32 +ptrace_report_vfork_done(struct utrace_attached_engine *engine, + struct task_struct *parent, pid_t child_pid) +{ + struct ptrace_state *state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->have_eventmsg = 1; + state->u.eventmsg = child_pid; + return ptrace_event(engine, parent, state, PTRACE_EVENT_VFORK_DONE); } -EXPORT_SYMBOL(ptrace_notify); + +static u32 +ptrace_report_signal(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs, + u32 action, siginfo_t *info, + const struct k_sigaction *orig_ka, + struct k_sigaction *return_ka) +{ + int signo = info == NULL ? SIGTRAP : info->si_signo; + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->syscall = 0; + state->have_eventmsg = 0; + state->u.siginfo = info; + return ptrace_report(engine, tsk, state, signo) | UTRACE_SIGNAL_IGN; +} + +static u32 +ptrace_report_jctl(struct utrace_attached_engine *engine, + struct task_struct *tsk, int type) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + pr_debug("ptrace %d jctl notify %d type %x exit_code %x\n", + tsk->pid, state->parent->pid, type, tsk->exit_code); + + do_notify(tsk, state->parent, type); + put_ptrace_state(state); + + return UTRACE_JCTL_NOSIGCHLD; +} + +static u32 +ptrace_report_exec(struct utrace_attached_engine *engine, + struct task_struct *tsk, + const struct linux_binprm *bprm, + struct pt_regs *regs) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + return ptrace_event(engine, tsk, state, + (state->options & PTRACE_O_TRACEEXEC) + ? PTRACE_EVENT_EXEC : 0); +} + +static u32 +ptrace_report_syscall(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs, + int entry) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + +#ifdef PTRACE_SYSEMU + if (entry && state->sysemu) + tracehook_abort_syscall(regs); +#endif + state->syscall = 1; + return ptrace_report(engine, tsk, state, + ((state->options & PTRACE_O_TRACESYSGOOD) + ? 0x80 : 0) | SIGTRAP); +} + +static u32 +ptrace_report_syscall_entry(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs) +{ + return ptrace_report_syscall(engine, tsk, regs, 1); +} + +static u32 +ptrace_report_syscall_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs) +{ + return ptrace_report_syscall(engine, tsk, regs, 0); +} + +static u32 +ptrace_report_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, long orig_code, long *code) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->have_eventmsg = 1; + state->u.eventmsg = *code; + return ptrace_event(engine, tsk, state, PTRACE_EVENT_EXIT); +} + +static int +ptrace_unsafe_exec(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + int unsafe = LSM_UNSAFE_PTRACE; + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (likely(state != NULL) && state->cap_sys_ptrace) + unsafe = LSM_UNSAFE_PTRACE_CAP; + put_ptrace_state(state); + return unsafe; +} + +static struct task_struct * +ptrace_tracer_task(struct utrace_attached_engine *engine, + struct task_struct *target) +{ + struct task_struct *parent = NULL; + struct ptrace_state *state = get_ptrace_state(engine, target); + if (likely(state != NULL)) { + parent = state->parent; + put_ptrace_state(state); + } + return parent; +} + +static int +ptrace_allow_access_process_vm(struct utrace_attached_engine *engine, + struct task_struct *target, + struct task_struct *caller) +{ + struct ptrace_state *state; + int ours = 0; + + state = get_ptrace_state(engine, target); + if (likely(state != NULL)) { + ours = (((engine->flags & UTRACE_ACTION_QUIESCE) + || target->state == TASK_STOPPED) + && state->parent == caller); + put_ptrace_state(state); + } + + return ours && security_ptrace(caller, target) == 0; +} + + +static const struct utrace_engine_ops ptrace_utrace_ops = +{ + .report_syscall_entry = ptrace_report_syscall_entry, + .report_syscall_exit = ptrace_report_syscall_exit, + .report_exec = ptrace_report_exec, + .report_jctl = ptrace_report_jctl, + .report_signal = ptrace_report_signal, + .report_vfork_done = ptrace_report_vfork_done, + .report_clone = ptrace_report_clone, + .report_exit = ptrace_report_exit, + .report_death = ptrace_report_death, + .report_reap = ptrace_report_reap, + .unsafe_exec = ptrace_unsafe_exec, + .tracer_task = ptrace_tracer_task, + .allow_access_process_vm = ptrace_allow_access_process_vm, +};