Merge to Fedora kernel-2.6.18-1.2255_FC5-vs2.0.2.2-rc9 patched with stable patch...
[linux-2.6.git] / kernel / ptrace.c
index 8b2856a..80e780e 100644 (file)
@@ -7,6 +7,7 @@
  * to continually duplicate across every architecture.
  */
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
@@ -16,6 +17,8 @@
 #include <linux/smp_lock.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
+#include <linux/signal.h>
+#include <linux/vs_base.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(task_t *child, task_t *new_parent)
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-       if (!list_empty(&child->ptrace_list))
-               BUG();
+       BUG_ON(!list_empty(&child->ptrace_list));
        if (child->parent == new_parent)
                return;
        list_add(&child->ptrace_list, &child->parent->ptrace_children);
-       REMOVE_LINKS(child);
+       remove_parent(child);
        child->parent = new_parent;
-       SET_LINKS(child);
+       add_parent(child);
 }
  
+/*
+ * Turn a tracing stop into a normal stop now, since with no tracer there
+ * would be no way to wake it up with SIGCONT or SIGKILL.  If there was a
+ * signal sent that would resume the child, but didn't because it was in
+ * TASK_TRACED, resume it now.
+ * Requires that irqs be disabled.
+ */
+void ptrace_untrace(struct task_struct *child)
+{
+       spin_lock(&child->sighand->siglock);
+       if (child->state == TASK_TRACED) {
+               if (child->signal->flags & SIGNAL_STOP_STOPPED) {
+                       child->state = TASK_STOPPED;
+               } else {
+                       signal_wake_up(child, 1);
+               }
+       }
+       spin_unlock(&child->sighand->siglock);
+}
+
 /*
  * unptrace a task: move it back to its original parent and
  * remove it from the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_unlink(task_t *child)
+void __ptrace_unlink(struct task_struct *child)
 {
-       if (!child->ptrace)
-               BUG();
+       BUG_ON(!child->ptrace);
+
        child->ptrace = 0;
-       if (list_empty(&child->ptrace_list))
-               return;
-       list_del_init(&child->ptrace_list);
-       REMOVE_LINKS(child);
-       child->parent = child->real_parent;
-       SET_LINKS(child);
+       if (!list_empty(&child->ptrace_list)) {
+               list_del_init(&child->ptrace_list);
+               remove_parent(child);
+               child->parent = child->real_parent;
+               add_parent(child);
+       }
+
+       if (child->state == TASK_TRACED)
+               ptrace_untrace(child);
 }
 
 /*
@@ -62,84 +87,152 @@ void __ptrace_unlink(task_t *child)
  */
 int ptrace_check_attach(struct task_struct *child, int kill)
 {
-       if (!(child->ptrace & PT_PTRACED))
-               return -ESRCH;
-
-       if (child->parent != current)
-               return -ESRCH;
+       int ret = -ESRCH;
 
-       if (!kill) {
-               if (child->state != TASK_STOPPED)
-                       return -ESRCH;
-               wait_task_inactive(child);
+       /*
+        * We take the read lock around doing both checks to close a
+        * possible race where someone else was tracing our child and
+        * detached between these two checks.  After this locked check,
+        * we are sure that this is our traced child and that can only
+        * be changed by us so it's not changing right after this.
+        */
+       read_lock(&tasklist_lock);
+       if ((child->ptrace & PT_PTRACED) && child->parent == current &&
+           (!(child->ptrace & PT_ATTACHED) || child->real_parent != current)
+           && child->signal != NULL) {
+               ret = 0;
+               spin_lock_irq(&child->sighand->siglock);
+               if (child->state == TASK_STOPPED) {
+                       child->state = TASK_TRACED;
+               } else if (child->state != TASK_TRACED && !kill) {
+                       ret = -ESRCH;
+               }
+               spin_unlock_irq(&child->sighand->siglock);
        }
+       read_unlock(&tasklist_lock);
 
        /* All systems go.. */
-       return 0;
+       return ret;
+}
+
+int __ptrace_may_attach(struct task_struct *task)
+{
+       /* May we inspect the given task?
+        * This check is used both for attaching with ptrace
+        * and for allowing access to sensitive information in /proc.
+        *
+        * ptrace_attach denies several cases that /proc allows
+        * because setting up the necessary parent/child relationship
+        * or halting the specified task is impossible.
+        */
+       int dumpable = 0;
+       /* Don't let security modules deny introspection */
+       if (task == current)
+               return 0;
+       if (((current->uid != task->euid) ||
+            (current->uid != task->suid) ||
+            (current->uid != task->uid) ||
+            (current->gid != task->egid) ||
+            (current->gid != task->sgid) ||
+            (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
+               return -EPERM;
+       smp_rmb();
+       if (task->mm)
+               dumpable = task->mm->dumpable;
+       if (!dumpable && !capable(CAP_SYS_PTRACE))
+               return -EPERM;
+
+       return security_ptrace(current, task);
+}
+
+int ptrace_may_attach(struct task_struct *task)
+{
+       int err;
+       task_lock(task);
+       err = __ptrace_may_attach(task);
+       task_unlock(task);
+       return !err;
 }
 
 int ptrace_attach(struct task_struct *task)
 {
        int retval;
-       task_lock(task);
+
        retval = -EPERM;
        if (task->pid <= 1)
-               goto bad;
-       if (task == current)
-               goto bad;
+               goto out;
+       if (task->tgid == current->tgid)
+               goto out;
+
+repeat:
+       /*
+        * Nasty, nasty.
+        *
+        * We want to hold both the task-lock and the
+        * tasklist_lock for writing at the same time.
+        * But that's against the rules (tasklist_lock
+        * is taken for reading by interrupts on other
+        * cpu's that may have task_lock).
+        */
+       task_lock(task);
+       local_irq_disable();
+       if (!write_trylock(&tasklist_lock)) {
+               local_irq_enable();
+               task_unlock(task);
+               do {
+                       cpu_relax();
+               } while (!write_can_lock(&tasklist_lock));
+               goto repeat;
+       }
+
        if (!task->mm)
                goto bad;
-       if(((current->uid != task->euid) ||
-           (current->uid != task->suid) ||
-           (current->uid != task->uid) ||
-           (current->gid != task->egid) ||
-           (current->gid != task->sgid) ||
-           (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
-               goto bad;
-       rmb();
-       if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
-               goto bad;
        /* the same process cannot be attached many times */
        if (task->ptrace & PT_PTRACED)
                goto bad;
-       retval = security_ptrace(current, task);
+       retval = __ptrace_may_attach(task);
        if (retval)
                goto bad;
 
        /* Go */
-       task->ptrace |= PT_PTRACED;
+       task->ptrace |= PT_PTRACED | ((task->real_parent != current)
+                                     ? PT_ATTACHED : 0);
        if (capable(CAP_SYS_PTRACE))
                task->ptrace |= PT_PTRACE_CAP;
-       task_unlock(task);
 
-       write_lock_irq(&tasklist_lock);
        __ptrace_link(task, current);
-       write_unlock_irq(&tasklist_lock);
 
        force_sig_specific(SIGSTOP, task);
-       return 0;
 
 bad:
+       write_unlock_irq(&tasklist_lock);
        task_unlock(task);
+out:
        return retval;
 }
 
+static inline void __ptrace_detach(struct task_struct *child, unsigned int data)
+{
+       child->exit_code = data;
+       /* .. re-parent .. */
+       __ptrace_unlink(child);
+       /* .. and wake it up. */
+       if (child->exit_state != EXIT_ZOMBIE)
+               wake_up_process(child);
+}
+
 int ptrace_detach(struct task_struct *child, unsigned int data)
 {
-       if ((unsigned long) data > _NSIG)
-               return  -EIO;
+       if (!valid_signal(data))
+               return -EIO;
 
        /* Architecture-specific hardware disable .. */
        ptrace_disable(child);
 
-       /* .. re-parent .. */
-       child->exit_code = data;
-
        write_lock_irq(&tasklist_lock);
-       __ptrace_unlink(child);
-       /* .. and wake it up. */
-       if (child->state != TASK_ZOMBIE)
-               wake_up_process(child);
+       /* protect against de_thread()->release_task() */
+       if (child->ptrace)
+               __ptrace_detach(child, data);
        write_unlock_irq(&tasklist_lock);
 
        return 0;
@@ -178,8 +271,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
                if (bytes > PAGE_SIZE-offset)
                        bytes = PAGE_SIZE-offset;
 
-               flush_cache_page(vma, addr);
-
                maddr = kmap(page);
                if (write) {
                        copy_to_user_page(vma, page, addr,
@@ -281,18 +372,45 @@ static int ptrace_setoptions(struct task_struct *child, long data)
 
 static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data)
 {
-       if (child->last_siginfo == NULL)
-               return -EINVAL;
-       return copy_siginfo_to_user(data, child->last_siginfo);
+       siginfo_t lastinfo;
+       int error = -ESRCH;
+
+       read_lock(&tasklist_lock);
+       if (likely(child->sighand != NULL)) {
+               error = -EINVAL;
+               spin_lock_irq(&child->sighand->siglock);
+               if (likely(child->last_siginfo != NULL)) {
+                       lastinfo = *child->last_siginfo;
+                       error = 0;
+               }
+               spin_unlock_irq(&child->sighand->siglock);
+       }
+       read_unlock(&tasklist_lock);
+       if (!error)
+               return copy_siginfo_to_user(data, &lastinfo);
+       return error;
 }
 
 static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
 {
-       if (child->last_siginfo == NULL)
-               return -EINVAL;
-       if (copy_from_user(child->last_siginfo, data, sizeof (siginfo_t)) != 0)
+       siginfo_t newinfo;
+       int error = -ESRCH;
+
+       if (copy_from_user(&newinfo, data, sizeof (siginfo_t)))
                return -EFAULT;
-       return 0;
+
+       read_lock(&tasklist_lock);
+       if (likely(child->sighand != NULL)) {
+               error = -EINVAL;
+               spin_lock_irq(&child->sighand->siglock);
+               if (likely(child->last_siginfo != NULL)) {
+                       *child->last_siginfo = newinfo;
+                       error = 0;
+               }
+               spin_unlock_irq(&child->sighand->siglock);
+       }
+       read_unlock(&tasklist_lock);
+       return error;
 }
 
 int ptrace_request(struct task_struct *child, long request,
@@ -323,23 +441,104 @@ int ptrace_request(struct task_struct *child, long request,
        return ret;
 }
 
-void ptrace_notify(int exit_code)
+/**
+ * ptrace_traceme  --  helper for PTRACE_TRACEME
+ *
+ * Performs checks and sets PT_PTRACED.
+ * Should be used by all ptrace implementations for PTRACE_TRACEME.
+ */
+int ptrace_traceme(void)
 {
-       BUG_ON (!(current->ptrace & PT_PTRACED));
-
-       /* Let the debugger run.  */
-       current->exit_code = exit_code;
-       set_current_state(TASK_STOPPED);
-       notify_parent(current, SIGCHLD);
-       schedule();
+       int ret = -EPERM;
 
        /*
-        * Signals sent while we were stopped might set TIF_SIGPENDING.
+        * Are we already being traced?
         */
+       task_lock(current);
+       if (!(current->ptrace & PT_PTRACED)) {
+               ret = security_ptrace(current->parent, current);
+               /*
+                * Set the ptrace bit in the process ptrace flags.
+                */
+               if (!ret)
+                       current->ptrace |= PT_PTRACED;
+       }
+       task_unlock(current);
+       return ret;
+}
+
+/**
+ * ptrace_get_task_struct  --  grab a task struct reference for ptrace
+ * @pid:       process id to grab a task_struct reference of
+ *
+ * This function is a helper for ptrace implementations.  It checks
+ * permissions and then grabs a task struct for use of the actual
+ * ptrace implementation.
+ *
+ * Returns the task_struct for @pid or an ERR_PTR() on failure.
+ */
+struct task_struct *ptrace_get_task_struct(pid_t pid)
+{
+       struct task_struct *child;
 
-       spin_lock_irq(&current->sighand->siglock);
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
+       /*
+        * Tracing init is not allowed.
+        */
+       if (pid == 1)
+               return ERR_PTR(-EPERM);
+
+       read_lock(&tasklist_lock);
+       child = find_task_by_pid(pid);
+       if (child)
+               get_task_struct(child);
+       read_unlock(&tasklist_lock);
+       if (!child)
+               return ERR_PTR(-ESRCH);
+       return child;
 }
 
-EXPORT_SYMBOL(ptrace_notify);
+#ifndef __ARCH_SYS_PTRACE
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+{
+       struct task_struct *child;
+       long ret;
+
+       /*
+        * This lock_kernel fixes a subtle race with suid exec
+        */
+       lock_kernel();
+       if (request == PTRACE_TRACEME) {
+               ret = ptrace_traceme();
+               goto out;
+       }
+
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
+               goto out;
+       }
+
+       ret = -EPERM;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_put_task_struct;
+
+       if (request == PTRACE_ATTACH) {
+               ret = ptrace_attach(child);
+               goto out_put_task_struct;
+       }
+
+       ret = ptrace_check_attach(child, request == PTRACE_KILL);
+       if (ret < 0)
+               goto out_put_task_struct;
+
+       ret = arch_ptrace(child, request, addr, data);
+       if (ret < 0)
+               goto out_put_task_struct;
+
+ out_put_task_struct:
+       put_task_struct(child);
+ out:
+       unlock_kernel();
+       return ret;
+}
+#endif /* __ARCH_SYS_PTRACE */