fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / kernel / ptrace.c
1 /*
2  * linux/kernel/ptrace.c
3  *
4  * (C) Copyright 1999 Linus Torvalds
5  *
6  * Common interfaces for "ptrace()" which we do not want
7  * to continually duplicate across every architecture.
8  */
9
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/sched.h>
13 #include <linux/errno.h>
14 #include <linux/mm.h>
15 #include <linux/highmem.h>
16 #include <linux/pagemap.h>
17 #include <linux/smp_lock.h>
18 #include <linux/ptrace.h>
19 #include <linux/security.h>
20 #include <linux/signal.h>
21 #include <linux/utrace.h>
22 #include <linux/tracehook.h>
23 #include <linux/vs_context.h>
24 #include <asm/tracehook.h>
25 #include <asm/pgtable.h>
26 #include <asm/uaccess.h>
27
28 struct ptrace_state
29 {
30         struct rcu_head rcu;
31  
32         /*
33          * These elements are always available, even when the struct is
34          * awaiting destruction at the next RCU callback point.
35          */
36         struct utrace_attached_engine *engine;
37         struct task_struct *task; /* Target task.  */
38         struct task_struct *parent; /* Whom we report to.  */
39         struct list_head entry; /* Entry on parent->ptracees list.  */
40
41         u8 options;             /* PTRACE_SETOPTIONS bits.  */
42         unsigned int syscall:1; /* Reporting for syscall.  */
43 #ifdef PTRACE_SYSEMU
44         unsigned int sysemu:1;  /* PTRACE_SYSEMU in progress. */
45 #endif
46         unsigned int have_eventmsg:1; /* u.eventmsg valid. */
47         unsigned int cap_sys_ptrace:1; /* Tracer capable.  */
48
49         union
50         {
51                 unsigned long eventmsg;
52                 siginfo_t *siginfo;
53         } u;
54 };
55
56 static const struct utrace_engine_ops ptrace_utrace_ops; /* Initialized below. */
57
58 static void
59 ptrace_state_unlink(struct ptrace_state *state)
60 {
61         task_lock(state->parent);
62         list_del_rcu(&state->entry);
63         task_unlock(state->parent);
64 }
65
66 static struct ptrace_state *
67 ptrace_setup(struct task_struct *target, struct utrace_attached_engine *engine,
68              struct task_struct *parent, u8 options, int cap_sys_ptrace,
69              struct ptrace_state *state)
70 {
71         if (state == NULL) {
72                 state = kzalloc(sizeof *state, GFP_USER);
73                 if (unlikely(state == NULL))
74                         return ERR_PTR(-ENOMEM);
75         }
76
77         state->engine = engine;
78         state->task = target;
79         state->parent = parent;
80         state->options = options;
81         state->cap_sys_ptrace = cap_sys_ptrace;
82
83         task_lock(parent);
84         if (unlikely(parent->flags & PF_EXITING)) {
85                 task_unlock(parent);
86                 kfree(state);
87                 return ERR_PTR(-EALREADY);
88         }
89         list_add_rcu(&state->entry, &state->parent->ptracees);
90         task_unlock(state->parent);
91
92         BUG_ON(engine->data != 0);
93         rcu_assign_pointer(engine->data, (unsigned long) state);
94
95         return state;
96 }
97
98 static void
99 ptrace_state_free(struct rcu_head *rhead)
100 {
101         struct ptrace_state *state = container_of(rhead,
102                                                   struct ptrace_state, rcu);
103         kfree(state);
104 }
105
106 static void
107 ptrace_done(struct ptrace_state *state)
108 {
109         INIT_RCU_HEAD(&state->rcu);
110         call_rcu(&state->rcu, ptrace_state_free);
111 }
112
113 /*
114  * Update the tracing engine state to match the new ptrace state.
115  */
116 static int __must_check
117 ptrace_update(struct task_struct *target,
118               struct utrace_attached_engine *engine,
119               unsigned long flags, int from_stopped)
120 {
121         struct ptrace_state *state = (struct ptrace_state *) engine->data;
122
123         /*
124          * These events are always reported.
125          */
126         flags |= (UTRACE_EVENT(DEATH) | UTRACE_EVENT(EXEC)
127                   | UTRACE_EVENT_SIGNAL_ALL | UTRACE_EVENT(JCTL));
128
129         /*
130          * We always have to examine clone events to check for CLONE_PTRACE.
131          */
132         flags |= UTRACE_EVENT(CLONE);
133
134         /*
135          * PTRACE_SETOPTIONS can request more events.
136          */
137         if (state->options & PTRACE_O_TRACEEXIT)
138                 flags |= UTRACE_EVENT(EXIT);
139         if (state->options & PTRACE_O_TRACEVFORKDONE)
140                 flags |= UTRACE_EVENT(VFORK_DONE);
141
142         /*
143          * ptrace always inhibits normal parent reaping.
144          * But for a corner case we sometimes see the REAP event anyway.
145          */
146         flags |= UTRACE_ACTION_NOREAP | UTRACE_EVENT(REAP);
147
148         if (from_stopped && !(flags & UTRACE_ACTION_QUIESCE)) {
149                 /*
150                  * We're letting the thread resume from ptrace stop.
151                  * If SIGKILL is waking it up, it can be racing with us here
152                  * to set its own exit_code in do_exit.  Though we clobber
153                  * it here, we check for the case in ptrace_report_death.
154                  */
155                 if (!unlikely(target->flags & PF_SIGNALED))
156                         target->exit_code = 0;
157
158                 if (!state->have_eventmsg)
159                         state->u.siginfo = NULL;
160
161                 if (target->state == TASK_STOPPED) {
162                         /*
163                          * We have to double-check for naughty de_thread
164                          * reaping despite NOREAP, before we can get siglock.
165                          */
166                         read_lock(&tasklist_lock);
167                         if (!target->exit_state) {
168                                 spin_lock_irq(&target->sighand->siglock);
169                                 if (target->state == TASK_STOPPED)
170                                         target->signal->flags &=
171                                                 ~SIGNAL_STOP_STOPPED;
172                                 spin_unlock_irq(&target->sighand->siglock);
173                         }
174                         read_unlock(&tasklist_lock);
175                 }
176         }
177
178         return utrace_set_flags(target, engine, flags);
179 }
180
181 static int ptrace_traceme(void)
182 {
183         struct utrace_attached_engine *engine;
184         struct ptrace_state *state;
185         struct task_struct *parent;
186         int retval;
187
188         engine = utrace_attach(current, (UTRACE_ATTACH_CREATE
189                                          | UTRACE_ATTACH_EXCLUSIVE
190                                          | UTRACE_ATTACH_MATCH_OPS),
191                                &ptrace_utrace_ops, 0UL);
192
193         if (IS_ERR(engine)) {
194                 retval = PTR_ERR(engine);
195                 if (retval == -EEXIST)
196                         retval = -EPERM;
197         }
198         else {
199                 /*
200                  * We need to preallocate so that we can hold
201                  * rcu_read_lock from extracting ->parent through
202                  * ptrace_setup using it.
203                  */
204                 state = kzalloc(sizeof *state, GFP_USER);
205                 if (unlikely(state == NULL)) {
206                         (void) utrace_detach(current, engine);
207                         printk(KERN_ERR
208                                "ptrace out of memory, lost child %d of %d",
209                                current->pid, current->parent->pid);
210                         return -ENOMEM;
211                 }
212
213                 rcu_read_lock();
214                 parent = rcu_dereference(current->parent);
215
216                 task_lock(current);
217                 retval = security_ptrace(parent, current);
218                 task_unlock(current);
219
220                 if (retval) {
221                         kfree(state);
222                         (void) utrace_detach(current, engine);
223                 }
224                 else {
225                         state = ptrace_setup(current, engine, parent, 0, 0,
226                                              state);
227                         if (IS_ERR(state))
228                                 retval = PTR_ERR(state);
229                 }
230                 rcu_read_unlock();
231
232                 if (!retval) {
233                         /*
234                          * This can't fail because we can't die while we
235                          * are here doing this.
236                          */
237                         retval = ptrace_update(current, engine, 0, 0);
238                         BUG_ON(retval);
239                 }
240                 else if (unlikely(retval == -EALREADY))
241                         /*
242                          * We raced with our parent's exit, which would
243                          * have detached us just after our attach if
244                          * we'd won the race.  Pretend we got attached
245                          * and then detached immediately, no error.
246                          */
247                         retval = 0;
248         }
249
250         return retval;
251 }
252
253 static int ptrace_attach(struct task_struct *task)
254 {
255         struct utrace_attached_engine *engine;
256         struct ptrace_state *state;
257         int retval;
258
259         retval = -EPERM;
260         if (task->pid <= 1)
261                 goto bad;
262         if (task->tgid == current->tgid)
263                 goto bad;
264         if (!task->mm)          /* kernel threads */
265                 goto bad;
266
267         pr_debug("%d ptrace_attach %d state %lu exit_code %x\n",
268                  current->pid, task->pid, task->state, task->exit_code);
269
270         engine = utrace_attach(task, (UTRACE_ATTACH_CREATE
271                                       | UTRACE_ATTACH_EXCLUSIVE
272                                       | UTRACE_ATTACH_MATCH_OPS),
273                                &ptrace_utrace_ops, 0);
274         if (IS_ERR(engine)) {
275                 retval = PTR_ERR(engine);
276                 if (retval == -EEXIST)
277                         retval = -EPERM;
278                 goto bad;
279         }
280
281         pr_debug("%d ptrace_attach %d after utrace_attach: %lu exit_code %x\n",
282                  current->pid, task->pid, task->state, task->exit_code);
283
284         if (ptrace_may_attach(task)) {
285                 state = ptrace_setup(task, engine, current, 0,
286                                      capable(CAP_SYS_PTRACE), NULL);
287                 if (IS_ERR(state))
288                         retval = PTR_ERR(state);
289                 else {
290                         retval = ptrace_update(task, engine, 0, 0);
291
292                         pr_debug("%d ptrace_attach %d after ptrace_update (%d)"
293                                  " %lu exit_code %x\n",
294                                  current->pid, task->pid, retval,
295                                  task->state, task->exit_code);
296
297                         if (retval) {
298                                 /*
299                                  * It died before we enabled any callbacks.
300                                  */
301                                 if (retval == -EALREADY)
302                                         retval = -ESRCH;
303                                 BUG_ON(retval != -ESRCH);
304                                 ptrace_state_unlink(state);
305                                 ptrace_done(state);
306                         }
307                 }
308         }
309         if (retval)
310                 (void) utrace_detach(task, engine);
311         else {
312                 int stopped = 0;
313
314                 /*
315                  * We must double-check that task has not just died and
316                  * been reaped (after ptrace_update succeeded).
317                  * This happens when exec (de_thread) ignores NOREAP.
318                  * We cannot call into the signal code if it's dead.
319                  */
320                 read_lock(&tasklist_lock);
321                 if (likely(!task->exit_state)) {
322                         force_sig_specific(SIGSTOP, task);
323
324                         spin_lock_irq(&task->sighand->siglock);
325                         stopped = (task->state == TASK_STOPPED);
326                         spin_unlock_irq(&task->sighand->siglock);
327                 }
328                 read_unlock(&tasklist_lock);
329
330                 if (stopped) {
331                         const struct utrace_regset *regset;
332
333                         /*
334                          * Set QUIESCE immediately, so we can allow
335                          * ptrace requests while he's in TASK_STOPPED.
336                          */
337                         retval = ptrace_update(task, engine,
338                                                UTRACE_ACTION_QUIESCE, 0);
339                         if (retval)
340                                 BUG_ON(retval != -ESRCH);
341                         retval = 0;
342
343                         /*
344                          * Do now the regset 0 writeback that we do on every
345                          * stop, since it's never been done.  On register
346                          * window machines, this makes sure the user memory
347                          * backing the register data is up to date.
348                          */
349                         regset = utrace_regset(task, engine,
350                                                utrace_native_view(task), 0);
351                         if (regset->writeback)
352                                 (*regset->writeback)(task, regset, 1);
353                 }
354
355                 pr_debug("%d ptrace_attach %d complete (%sstopped)"
356                          " state %lu code %x",
357                          current->pid, task->pid, stopped ? "" : "not ",
358                          task->state, task->exit_code);
359         }
360
361 bad:
362         return retval;
363 }
364
365 /*
366  * The task might be dying or being reaped in parallel, in which case
367  * engine and state may no longer be valid.  utrace_detach checks for us.
368  */
369 static int ptrace_detach(struct task_struct *task,
370                          struct utrace_attached_engine *engine,
371                          struct ptrace_state *state)
372 {
373
374         int error;
375
376 #ifdef HAVE_ARCH_PTRACE_DETACH
377         /*
378          * Some funky compatibility code in arch_ptrace may have
379          * needed to install special state it should clean up now.
380          */
381         arch_ptrace_detach(task);
382 #endif
383
384         /*
385          * Traditional ptrace behavior does wake_up_process no matter what
386          * in ptrace_detach.  But utrace_detach will not do a wakeup if
387          * it's in a proper job control stop.  We need it to wake up from
388          * TASK_STOPPED and either resume or process more signals.  A
389          * pending stop signal will just leave it stopped again, but will
390          * consume the signal, and reset task->exit_code for the next wait
391          * call to see.  This is important to userland if ptrace_do_wait
392          * "stole" the previous unwaited-for-ness (clearing exit_code), but
393          * there is a pending SIGSTOP, e.g. sent by a PTRACE_ATTACH done
394          * while already in job control stop.
395          */
396         read_lock(&tasklist_lock);
397         if (likely(task->signal != NULL)) {
398                 spin_lock_irq(&task->sighand->siglock);
399                 task->signal->flags &= ~SIGNAL_STOP_STOPPED;
400                 spin_unlock_irq(&task->sighand->siglock);
401         }
402         read_unlock(&tasklist_lock);
403
404         error = utrace_detach(task, engine);
405         if (!error) {
406                 /*
407                  * We can only get here from the ptracer itself or via
408                  * detach_zombie from another thread in its group.
409                  */
410                 BUG_ON(state->parent->tgid != current->tgid);
411                 ptrace_state_unlink(state);
412                 ptrace_done(state);
413
414                 /*
415                  * Wake up any other threads that might be blocked in
416                  * wait.  Though traditional ptrace does not guarantee
417                  * this wakeup on PTRACE_DETACH, it does prevent
418                  * erroneous blocking in wait when another racing
419                  * thread's wait call reap-detaches the last child.
420                  * Without this wakeup, another thread might stay
421                  * blocked when it should return -ECHILD.
422                  */
423                 spin_lock_irq(&current->sighand->siglock);
424                 wake_up_interruptible(&current->signal->wait_chldexit);
425                 spin_unlock_irq(&current->sighand->siglock);
426         }
427         return error;
428 }
429
430
431 /*
432  * This is called when we are exiting.  We must stop all our ptracing.
433  */
434 void
435 ptrace_exit(struct task_struct *tsk)
436 {
437         struct list_head *pos, *n;
438
439         /*
440          * Taking the task_lock after PF_EXITING is set ensures that a
441          * child in ptrace_traceme will not put itself on our list when
442          * we might already be tearing it down.
443          */
444         task_lock(tsk);
445         if (likely(list_empty(&tsk->ptracees))) {
446                 task_unlock(tsk);
447                 return;
448         }
449         task_unlock(tsk);
450
451 restart:
452         rcu_read_lock();
453
454         list_for_each_safe_rcu(pos, n, &tsk->ptracees) {
455                 struct ptrace_state *state = list_entry(pos,
456                                                         struct ptrace_state,
457                                                         entry);
458                 int error = utrace_detach(state->task, state->engine);
459                 BUG_ON(state->parent != tsk);
460                 if (likely(error == 0)) {
461                         ptrace_state_unlink(state);
462                         ptrace_done(state);
463                 }
464                 else if (unlikely(error == -EALREADY)) {
465                         /*
466                          * It's still doing report_death callbacks.
467                          * Just wait for it to settle down.
468                          * Since wait_task_inactive might yield,
469                          * we must go out of rcu_read_lock and restart.
470                          */
471                         struct task_struct *p = state->task;
472                         get_task_struct(p);
473                         rcu_read_unlock();
474                         wait_task_inactive(p);
475                         put_task_struct(p);
476                         goto restart;
477                 }
478                 else
479                         BUG_ON(error != -ESRCH);
480         }
481
482         rcu_read_unlock();
483
484         BUG_ON(!list_empty(&tsk->ptracees));
485 }
486
487 static int
488 ptrace_induce_signal(struct task_struct *target,
489                      struct utrace_attached_engine *engine,
490                      long signr)
491 {
492         struct ptrace_state *state = (struct ptrace_state *) engine->data;
493
494         if (signr == 0)
495                 return 0;
496
497         if (!valid_signal(signr))
498                 return -EIO;
499
500         if (state->syscall) {
501                 /*
502                  * This is the traditional ptrace behavior when given
503                  * a signal to resume from a syscall tracing stop.
504                  */
505                 send_sig(signr, target, 1);
506         }
507         else if (!state->have_eventmsg && state->u.siginfo) {
508                 siginfo_t *info = state->u.siginfo;
509
510                 /* Update the siginfo structure if the signal has
511                    changed.  If the debugger wanted something
512                    specific in the siginfo structure then it should
513                    have updated *info via PTRACE_SETSIGINFO.  */
514                 if (signr != info->si_signo) {
515                         info->si_signo = signr;
516                         info->si_errno = 0;
517                         info->si_code = SI_USER;
518                         info->si_pid = current->pid;
519                         info->si_uid = current->uid;
520                 }
521
522                 return utrace_inject_signal(target, engine,
523                                             UTRACE_ACTION_RESUME, info, NULL);
524         }
525
526         return 0;
527 }
528
529 int
530 ptrace_regset_access(struct task_struct *target,
531                      struct utrace_attached_engine *engine,
532                      const struct utrace_regset_view *view,
533                      int setno, unsigned long offset, unsigned int size,
534                      void __user *data, int write)
535 {
536         const struct utrace_regset *regset = utrace_regset(target, engine,
537                                                            view, setno);
538         int ret;
539
540         if (unlikely(regset == NULL))
541                 return -EIO;
542
543         if (size == (unsigned int) -1)
544                 size = regset->size * regset->n;
545
546         if (write) {
547                 if (!access_ok(VERIFY_READ, data, size))
548                         ret = -EIO;
549                 else
550                         ret = (*regset->set)(target, regset,
551                                              offset, size, NULL, data);
552         }
553         else {
554                 if (!access_ok(VERIFY_WRITE, data, size))
555                         ret = -EIO;
556                 else
557                         ret = (*regset->get)(target, regset,
558                                              offset, size, NULL, data);
559         }
560
561         return ret;
562 }
563
564 int
565 ptrace_onereg_access(struct task_struct *target,
566                      struct utrace_attached_engine *engine,
567                      const struct utrace_regset_view *view,
568                      int setno, unsigned long regno,
569                      void __user *data, int write)
570 {
571         const struct utrace_regset *regset = utrace_regset(target, engine,
572                                                            view, setno);
573         unsigned int pos;
574         int ret;
575
576         if (unlikely(regset == NULL))
577                 return -EIO;
578
579         if (regno < regset->bias || regno >= regset->bias + regset->n)
580                 return -EINVAL;
581
582         pos = (regno - regset->bias) * regset->size;
583
584         if (write) {
585                 if (!access_ok(VERIFY_READ, data, regset->size))
586                         ret = -EIO;
587                 else
588                         ret = (*regset->set)(target, regset, pos, regset->size,
589                                              NULL, data);
590         }
591         else {
592                 if (!access_ok(VERIFY_WRITE, data, regset->size))
593                         ret = -EIO;
594                 else
595                         ret = (*regset->get)(target, regset, pos, regset->size,
596                                              NULL, data);
597         }
598
599         return ret;
600 }
601
602 int
603 ptrace_layout_access(struct task_struct *target,
604                      struct utrace_attached_engine *engine,
605                      const struct utrace_regset_view *view,
606                      const struct ptrace_layout_segment layout[],
607                      unsigned long addr, unsigned int size,
608                      void __user *udata, void *kdata, int write)
609 {
610         const struct ptrace_layout_segment *seg;
611         int ret = -EIO;
612
613         if (kdata == NULL &&
614             !access_ok(write ? VERIFY_READ : VERIFY_WRITE, udata, size))
615                 return -EIO;
616
617         seg = layout;
618         do {
619                 unsigned int pos, n;
620
621                 while (addr >= seg->end && seg->end != 0)
622                         ++seg;
623
624                 if (addr < seg->start || addr >= seg->end)
625                         return -EIO;
626
627                 pos = addr - seg->start + seg->offset;
628                 n = min(size, seg->end - (unsigned int) addr);
629
630                 if (unlikely(seg->regset == (unsigned int) -1)) {
631                         /*
632                          * This is a no-op/zero-fill portion of struct user.
633                          */
634                         ret = 0;
635                         if (!write && seg->offset == 0) {
636                                 if (kdata)
637                                         memset(kdata, 0, n);
638                                 else if (clear_user(udata, n))
639                                         ret = -EFAULT;
640                         }
641                 }
642                 else {
643                         unsigned int align;
644                         const struct utrace_regset *regset = utrace_regset(
645                                 target, engine, view, seg->regset);
646                         if (unlikely(regset == NULL))
647                                 return -EIO;
648
649                         /*
650                          * A ptrace compatibility layout can do a misaligned
651                          * regset access, e.g. word access to larger data.
652                          * An arch's compat layout can be this way only if
653                          * it is actually ok with the regset code despite the
654                          * regset->align setting.
655                          */
656                         align = min(regset->align, size);
657                         if ((pos & (align - 1))
658                             || pos >= regset->n * regset->size)
659                                 return -EIO;
660
661                         if (write)
662                                 ret = (*regset->set)(target, regset,
663                                                      pos, n, kdata, udata);
664                         else
665                                 ret = (*regset->get)(target, regset,
666                                                      pos, n, kdata, udata);
667                 }
668
669                 if (kdata)
670                         kdata += n;
671                 else
672                         udata += n;
673                 addr += n;
674                 size -= n;
675         } while (ret == 0 && size > 0);
676
677         return ret;
678 }
679
680
681 static int
682 ptrace_start(long pid, long request,
683              struct task_struct **childp,
684              struct utrace_attached_engine **enginep,
685              struct ptrace_state **statep)
686
687 {
688         struct task_struct *child;
689         struct utrace_attached_engine *engine;
690         struct ptrace_state *state;
691         int ret;
692
693         if (request == PTRACE_TRACEME)
694                 return ptrace_traceme();
695
696         ret = -ESRCH;
697         read_lock(&tasklist_lock);
698         child = find_task_by_pid(pid);
699         if (child)
700                 get_task_struct(child);
701         read_unlock(&tasklist_lock);
702         pr_debug("ptrace pid %ld => %p\n", pid, child);
703         if (!child)
704                 goto out;
705
706         ret = -EPERM;
707         if (pid == 1)           /* you may not mess with init */
708                 goto out_tsk;
709
710         ret = -EPERM;
711         if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT))
712                 goto out_tsk;
713
714         if (request == PTRACE_ATTACH) {
715                 ret = ptrace_attach(child);
716                 goto out_tsk;
717         }
718
719         rcu_read_lock();
720         engine = utrace_attach(child, UTRACE_ATTACH_MATCH_OPS,
721                                &ptrace_utrace_ops, 0);
722         ret = -ESRCH;
723         if (IS_ERR(engine) || engine == NULL)
724                 goto out_tsk_rcu;
725         state = rcu_dereference((struct ptrace_state *) engine->data);
726         if (state == NULL || state->parent != current)
727                 goto out_tsk_rcu;
728         rcu_read_unlock();
729
730         /*
731          * Traditional ptrace behavior demands that the target already be
732          * quiescent, but not dead.
733          */
734         if (request != PTRACE_KILL
735             && !(engine->flags & UTRACE_ACTION_QUIESCE)) {
736                 pr_debug("%d not stopped (%lu)\n", child->pid, child->state);
737                 goto out_tsk;
738         }
739
740         /*
741          * We do this for all requests to match traditional ptrace behavior.
742          * If the machine state synchronization done at context switch time
743          * includes e.g. writing back to user memory, we want to make sure
744          * that has finished before a PTRACE_PEEKDATA can fetch the results.
745          * On most machines, only regset data is affected by context switch
746          * and calling utrace_regset later on will take care of that, so
747          * this is superfluous.
748          *
749          * To do this purely in utrace terms, we could do:
750          *  (void) utrace_regset(child, engine, utrace_native_view(child), 0);
751          */
752         wait_task_inactive(child);
753
754         if (child->exit_state)
755                 goto out_tsk;
756
757         *childp = child;
758         *enginep = engine;
759         *statep = state;
760         return -EIO;
761
762 out_tsk_rcu:
763         rcu_read_unlock();
764 out_tsk:
765         put_task_struct(child);
766 out:
767         return ret;
768 }
769
770 static int
771 ptrace_common(long request, struct task_struct *child,
772               struct utrace_attached_engine *engine,
773               struct ptrace_state *state,
774               unsigned long addr, long data)
775 {
776         unsigned long flags;
777         int ret = -EIO;
778
779         switch (request) {
780         case PTRACE_DETACH:
781                 /*
782                  * Detach a process that was attached.
783                  */
784                 ret = ptrace_induce_signal(child, engine, data);
785                 if (!ret) {
786                         ret = ptrace_detach(child, engine, state);
787                         if (ret == -EALREADY) /* Already a zombie.  */
788                                 ret = -ESRCH;
789                         if (ret)
790                                 BUG_ON(ret != -ESRCH);
791                 }
792                 break;
793
794                 /*
795                  * These are the operations that resume the child running.
796                  */
797         case PTRACE_KILL:
798                 data = SIGKILL;
799         case PTRACE_CONT:
800         case PTRACE_SYSCALL:
801 #ifdef PTRACE_SYSEMU
802         case PTRACE_SYSEMU:
803         case PTRACE_SYSEMU_SINGLESTEP:
804 #endif
805 #ifdef PTRACE_SINGLEBLOCK
806         case PTRACE_SINGLEBLOCK:
807 # ifdef ARCH_HAS_BLOCK_STEP
808                 if (! ARCH_HAS_BLOCK_STEP)
809 # endif
810                         if (request == PTRACE_SINGLEBLOCK)
811                                 break;
812 #endif
813         case PTRACE_SINGLESTEP:
814 #ifdef ARCH_HAS_SINGLE_STEP
815                 if (! ARCH_HAS_SINGLE_STEP)
816 #endif
817                         if (request == PTRACE_SINGLESTEP
818 #ifdef PTRACE_SYSEMU_SINGLESTEP
819                             || request == PTRACE_SYSEMU_SINGLESTEP
820 #endif
821                                 )
822                                 break;
823
824                 ret = ptrace_induce_signal(child, engine, data);
825                 if (ret)
826                         break;
827
828
829                 /*
830                  * Reset the action flags without QUIESCE, so it resumes.
831                  */
832                 flags = 0;
833 #ifdef PTRACE_SYSEMU
834                 state->sysemu = (request == PTRACE_SYSEMU_SINGLESTEP
835                                         || request == PTRACE_SYSEMU);
836 #endif
837                 if (request == PTRACE_SINGLESTEP
838 #ifdef PTRACE_SYSEMU
839                     || request == PTRACE_SYSEMU_SINGLESTEP
840 #endif
841                         )
842                         flags |= UTRACE_ACTION_SINGLESTEP;
843 #ifdef PTRACE_SINGLEBLOCK
844                 else if (request == PTRACE_SINGLEBLOCK)
845                         flags |= UTRACE_ACTION_BLOCKSTEP;
846 #endif
847                 if (request == PTRACE_SYSCALL)
848                         flags |= UTRACE_EVENT_SYSCALL;
849 #ifdef PTRACE_SYSEMU
850                 else if (request == PTRACE_SYSEMU
851                          || request == PTRACE_SYSEMU_SINGLESTEP)
852                         flags |= UTRACE_EVENT(SYSCALL_ENTRY);
853 #endif
854                 ret = ptrace_update(child, engine, flags, 1);
855                 if (ret)
856                         BUG_ON(ret != -ESRCH);
857                 ret = 0;
858                 break;
859
860 #ifdef PTRACE_OLDSETOPTIONS
861         case PTRACE_OLDSETOPTIONS:
862 #endif
863         case PTRACE_SETOPTIONS:
864                 ret = -EINVAL;
865                 if (data & ~PTRACE_O_MASK)
866                         break;
867                 state->options = data;
868                 ret = ptrace_update(child, engine, UTRACE_ACTION_QUIESCE, 1);
869                 if (ret)
870                         BUG_ON(ret != -ESRCH);
871                 ret = 0;
872                 break;
873         }
874
875         return ret;
876 }
877
878
879 asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
880 {
881         struct task_struct *child;
882         struct utrace_attached_engine *engine;
883         struct ptrace_state *state;
884         long ret, val;
885
886         pr_debug("%d sys_ptrace(%ld, %ld, %lx, %lx)\n",
887                  current->pid, request, pid, addr, data);
888
889         ret = ptrace_start(pid, request, &child, &engine, &state);
890         if (ret != -EIO)
891                 goto out;
892
893         val = 0;
894         ret = arch_ptrace(&request, child, engine, addr, data, &val);
895         if (ret != -ENOSYS) {
896                 if (ret == 0) {
897                         ret = val;
898                         force_successful_syscall_return();
899                 }
900                 goto out_tsk;
901         }
902
903         switch (request) {
904         default:
905                 ret = ptrace_common(request, child, engine, state, addr, data);
906                 break;
907
908         case PTRACE_PEEKTEXT: /* read word at location addr. */
909         case PTRACE_PEEKDATA: {
910                 unsigned long tmp;
911                 int copied;
912
913                 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
914                 ret = -EIO;
915                 if (copied != sizeof(tmp))
916                         break;
917                 ret = put_user(tmp, (unsigned long __user *) data);
918                 break;
919         }
920
921         case PTRACE_POKETEXT: /* write the word at location addr. */
922         case PTRACE_POKEDATA:
923                 ret = 0;
924                 if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
925                         break;
926                 ret = -EIO;
927                 break;
928
929         case PTRACE_GETEVENTMSG:
930                 ret = put_user(state->have_eventmsg
931                                ? state->u.eventmsg : 0L,
932                                (unsigned long __user *) data);
933                 break;
934         case PTRACE_GETSIGINFO:
935                 ret = -EINVAL;
936                 if (!state->have_eventmsg && state->u.siginfo)
937                         ret = copy_siginfo_to_user((siginfo_t __user *) data,
938                                                    state->u.siginfo);
939                 break;
940         case PTRACE_SETSIGINFO:
941                 ret = -EINVAL;
942                 if (!state->have_eventmsg && state->u.siginfo) {
943                         ret = 0;
944                         if (copy_from_user(state->u.siginfo,
945                                            (siginfo_t __user *) data,
946                                            sizeof(siginfo_t)))
947                                 ret = -EFAULT;
948                 }
949                 break;
950         }
951
952 out_tsk:
953         put_task_struct(child);
954 out:
955         pr_debug("%d ptrace -> %lx\n", current->pid, ret);
956         return ret;
957 }
958
959
960 #ifdef CONFIG_COMPAT
961 #include <linux/compat.h>
962
963 asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
964                                   compat_ulong_t addr, compat_long_t cdata)
965 {
966         const unsigned long data = (unsigned long) (compat_ulong_t) cdata;
967         struct task_struct *child;
968         struct utrace_attached_engine *engine;
969         struct ptrace_state *state;
970         compat_long_t ret, val;
971
972         pr_debug("%d compat_sys_ptrace(%d, %d, %x, %x)\n",
973                  current->pid, request, pid, addr, cdata);
974         ret = ptrace_start(pid, request, &child, &engine, &state);
975         if (ret != -EIO)
976                 goto out;
977
978         val = 0;
979         ret = arch_compat_ptrace(&request, child, engine, addr, cdata, &val);
980         if (ret != -ENOSYS) {
981                 if (ret == 0) {
982                         ret = val;
983                         force_successful_syscall_return();
984                 }
985                 goto out_tsk;
986         }
987
988         switch (request) {
989         default:
990                 ret = ptrace_common(request, child, engine, state, addr, data);
991                 break;
992
993         case PTRACE_PEEKTEXT: /* read word at location addr. */
994         case PTRACE_PEEKDATA: {
995                 compat_ulong_t tmp;
996                 int copied;
997
998                 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
999                 ret = -EIO;
1000                 if (copied != sizeof(tmp))
1001                         break;
1002                 ret = put_user(tmp, (compat_ulong_t __user *) data);
1003                 break;
1004         }
1005
1006         case PTRACE_POKETEXT: /* write the word at location addr. */
1007         case PTRACE_POKEDATA:
1008                 ret = 0;
1009                 if (access_process_vm(child, addr, &cdata, sizeof(cdata), 1) == sizeof(cdata))
1010                         break;
1011                 ret = -EIO;
1012                 break;
1013
1014         case PTRACE_GETEVENTMSG:
1015                 ret = put_user(state->have_eventmsg
1016                                ? state->u.eventmsg : 0L,
1017                                (compat_long_t __user *) data);
1018                 break;
1019         case PTRACE_GETSIGINFO:
1020                 ret = -EINVAL;
1021                 if (!state->have_eventmsg && state->u.siginfo)
1022                         ret = copy_siginfo_to_user32(
1023                                 (struct compat_siginfo __user *) data,
1024                                 state->u.siginfo);
1025                 break;
1026         case PTRACE_SETSIGINFO:
1027                 ret = -EINVAL;
1028                 if (!state->have_eventmsg && state->u.siginfo
1029                     && copy_siginfo_from_user32(
1030                             state->u.siginfo,
1031                             (struct compat_siginfo __user *) data))
1032                         ret = -EFAULT;
1033                 break;
1034         }
1035
1036 out_tsk:
1037         put_task_struct(child);
1038 out:
1039         pr_debug("%d ptrace -> %lx\n", current->pid, (long)ret);
1040         return ret;
1041 }
1042 #endif
1043
1044
1045 /*
1046  * Detach the zombie being reported for wait.
1047  */
1048 static inline void
1049 detach_zombie(struct task_struct *tsk,
1050               struct task_struct *p, struct ptrace_state *state)
1051 {
1052         int detach_error;
1053         struct utrace_attached_engine *engine;
1054
1055 restart:
1056         detach_error = 0;
1057         rcu_read_lock();
1058         if (tsk == current)
1059                 engine = state->engine;
1060         else {
1061                 /*
1062                  * We've excluded other ptrace_do_wait calls.  But the
1063                  * ptracer itself might have done ptrace_detach while we
1064                  * did not have rcu_read_lock.  So double-check that state
1065                  * is still valid.
1066                  */
1067                 engine = utrace_attach(
1068                         p, (UTRACE_ATTACH_MATCH_OPS
1069                             | UTRACE_ATTACH_MATCH_DATA),
1070                         &ptrace_utrace_ops,
1071                         (unsigned long) state);
1072                 if (IS_ERR(engine) || state->parent != tsk)
1073                         detach_error = -ESRCH;
1074                 else
1075                         BUG_ON(state->engine != engine);
1076         }
1077         rcu_read_unlock();
1078         if (likely(!detach_error))
1079                 detach_error = ptrace_detach(p, engine, state);
1080         if (unlikely(detach_error == -EALREADY)) {
1081                 /*
1082                  * It's still doing report_death callbacks.
1083                  * Just wait for it to settle down.
1084                  */
1085                 wait_task_inactive(p); /* Might block.  */
1086                 goto restart;
1087         }
1088         /*
1089          * A failure with -ESRCH means that report_reap is
1090          * already running and will do the cleanup, or that
1091          * we lost a race with ptrace_detach in another
1092          * thread or with the automatic detach in
1093          * report_death.
1094          */
1095         if (detach_error)
1096                 BUG_ON(detach_error != -ESRCH);
1097 }
1098
1099 /*
1100  * We're called with tasklist_lock held for reading.
1101  * If we return -ECHILD or zero, next_thread(tsk) must still be valid to use.
1102  * If we return another error code, or a successful PID value, we
1103  * release tasklist_lock first.
1104  */
1105 int
1106 ptrace_do_wait(struct task_struct *tsk,
1107                pid_t pid, int options, struct siginfo __user *infop,
1108                int __user *stat_addr, struct rusage __user *rusagep)
1109 {
1110         struct ptrace_state *state;
1111         struct task_struct *p;
1112         int err = -ECHILD;
1113         int exit_code, why, status;
1114
1115         rcu_read_lock();
1116         list_for_each_entry_rcu(state, &tsk->ptracees, entry) {
1117                 p = state->task;
1118
1119                 if (pid > 0) {
1120                         if (p->pid != pid)
1121                                 continue;
1122                 } else if (!pid) {
1123                         if (process_group(p) != process_group(current))
1124                                 continue;
1125                 } else if (pid != -1) {
1126                         if (process_group(p) != -pid)
1127                                 continue;
1128                 }
1129                 if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
1130                     && !(options & __WALL))
1131                         continue;
1132                 if (security_task_wait(p))
1133                         continue;
1134
1135                 /*
1136                  * This is a matching child.  If we don't win now, tell
1137                  * our caller to block and repeat.  From this point we
1138                  * must ensure that wait_chldexit will get a wakeup for
1139                  * any tracee stopping, dying, or being detached.
1140                  * For death, tasklist_lock guarantees this already.
1141                  */
1142                 err = 0;
1143
1144                 switch (p->exit_state) {
1145                 case EXIT_ZOMBIE:
1146                         if (!likely(options & WEXITED))
1147                                 continue;
1148                         if (delay_group_leader(p)) {
1149                                 struct task_struct *next = next_thread(p);
1150                                 pr_debug("%d ptrace_do_wait leaving %d "
1151                                          "zombie code %x "
1152                                          "delay_group_leader (%d/%lu)\n",
1153                                          current->pid, p->pid, p->exit_code,
1154                                          next->pid, next->state);
1155                                 continue;
1156                         }
1157                         exit_code = p->exit_code;
1158                         goto found;
1159                 case EXIT_DEAD:
1160                         continue;
1161                 default:
1162                         /*
1163                          * tasklist_lock holds up any transitions to
1164                          * EXIT_ZOMBIE.  After releasing it we are
1165                          * guaranteed a wakeup on wait_chldexit after
1166                          * any new deaths.
1167                          */
1168                         if (p->flags & PF_EXITING)
1169                                 /*
1170                                  * It's in do_exit and might have set
1171                                  * p->exit_code already, but it's not quite
1172                                  * dead yet.  It will get to report_death
1173                                  * and wakes us up when it finishes.
1174                                  */
1175                                 continue;
1176                         break;
1177                 }
1178
1179                 /*
1180                  * This xchg atomically ensures that only one do_wait
1181                  * call can report this thread.  Because exit_code is
1182                  * always set before do_notify wakes us up, after this
1183                  * check fails we are sure to get a wakeup if it stops.
1184                  */
1185                 exit_code = xchg(&p->exit_code, 0);
1186                 if (exit_code)
1187                         goto found;
1188
1189                 // XXX should handle WCONTINUED
1190
1191                 pr_debug("%d ptrace_do_wait leaving %d state %lu code %x\n",
1192                          current->pid, p->pid, p->state, p->exit_code);
1193         }
1194         rcu_read_unlock();
1195         if (err == 0)
1196                 pr_debug("%d ptrace_do_wait blocking\n", current->pid);
1197
1198         return err;
1199
1200 found:
1201         BUG_ON(state->parent != tsk);
1202         rcu_read_unlock();
1203
1204         pr_debug("%d ptrace_do_wait (%d) found %d code %x (%lu/%d)\n",
1205                  current->pid, tsk->pid, p->pid, exit_code,
1206                  p->exit_state, p->exit_signal);
1207
1208         /*
1209          * If there was a group exit in progress, all threads report that
1210          * status.  Most will have SIGKILL in their own exit_code.
1211          */
1212         if (p->signal->flags & SIGNAL_GROUP_EXIT)
1213                 exit_code = p->signal->group_exit_code;
1214
1215         if (p->exit_state) {
1216                 if (unlikely(p->parent == tsk && p->exit_signal != -1))
1217                         /*
1218                          * This is our natural child we were ptracing.
1219                          * When it dies it detaches (see ptrace_report_death).
1220                          * So we're seeing it here in a race.  When it
1221                          * finishes detaching it will become reapable in
1222                          * the normal wait_task_zombie path instead.
1223                          */
1224                         return 0;
1225                 if ((exit_code & 0x7f) == 0) {
1226                         why = CLD_EXITED;
1227                         status = exit_code >> 8;
1228                 }
1229                 else {
1230                         why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
1231                         status = exit_code & 0x7f;
1232                 }
1233         }
1234         else {
1235                 why = CLD_TRAPPED;
1236                 status = exit_code;
1237                 exit_code = (status << 8) | 0x7f;
1238         }
1239
1240         /*
1241          * At this point we are committed to a successful return
1242          * or a user error return.  Release the tasklist_lock.
1243          */
1244         get_task_struct(p);
1245         read_unlock(&tasklist_lock);
1246
1247         if (rusagep)
1248                 err = getrusage(p, RUSAGE_BOTH, rusagep);
1249         if (infop) {
1250                 if (!err)
1251                         err = put_user(SIGCHLD, &infop->si_signo);
1252                 if (!err)
1253                         err = put_user(0, &infop->si_errno);
1254                 if (!err)
1255                         err = put_user((short)why, &infop->si_code);
1256                 if (!err)
1257                         err = put_user(p->pid, &infop->si_pid);
1258                 if (!err)
1259                         err = put_user(p->uid, &infop->si_uid);
1260                 if (!err)
1261                         err = put_user(status, &infop->si_status);
1262         }
1263         if (!err && stat_addr)
1264                 err = put_user(exit_code, stat_addr);
1265
1266         if (!err) {
1267                 if (why != CLD_TRAPPED)
1268                         /*
1269                          * This was a death report.  The ptracer's wait
1270                          * does an implicit detach, so the zombie reports
1271                          * to its real parent now.
1272                          */
1273                         detach_zombie(tsk, p, state);
1274                 err = p->pid;
1275         }
1276
1277         put_task_struct(p);
1278
1279         return err;
1280 }
1281
1282
1283 /*
1284  * All the report callbacks (except death and reap) are subject to a race
1285  * with ptrace_exit doing a quick detach and ptrace_done.  It can do this
1286  * even when the target is not quiescent, so a callback may already be in
1287  * progress when it does ptrace_done.  Callbacks use this function to fetch
1288  * the struct ptrace_state while ensuring it doesn't disappear until
1289  * put_ptrace_state is called.  This just uses RCU, since state and
1290  * anything we try to do to state->parent is safe under rcu_read_lock.
1291  */
1292 static struct ptrace_state *
1293 get_ptrace_state(struct utrace_attached_engine *engine,
1294                  struct task_struct *tsk)
1295 {
1296         struct ptrace_state *state;
1297
1298         rcu_read_lock();
1299         state = rcu_dereference((struct ptrace_state *) engine->data);
1300         if (likely(state != NULL))
1301                 return state;
1302
1303         rcu_read_unlock();
1304         return NULL;
1305 }
1306
1307 static inline void
1308 put_ptrace_state(struct ptrace_state *state)
1309 {
1310         rcu_read_unlock();
1311 }
1312
1313
1314 static void
1315 do_notify(struct task_struct *tsk, struct task_struct *parent, int why)
1316 {
1317         struct siginfo info;
1318         unsigned long flags;
1319         struct sighand_struct *sighand;
1320         int sa_mask;
1321
1322         info.si_signo = SIGCHLD;
1323         info.si_errno = 0;
1324         info.si_pid = tsk->pid;
1325         info.si_uid = tsk->uid;
1326
1327         /* FIXME: find out whether or not this is supposed to be c*time. */
1328         info.si_utime = cputime_to_jiffies(tsk->utime);
1329         info.si_stime = cputime_to_jiffies(tsk->stime);
1330
1331         sa_mask = SA_NOCLDSTOP;
1332         info.si_code = why;
1333         info.si_status = tsk->exit_code & 0x7f;
1334         if (why == CLD_CONTINUED)
1335                 info.si_status = SIGCONT;
1336         else if (why == CLD_STOPPED)
1337                 info.si_status = tsk->signal->group_exit_code & 0x7f;
1338         else if (why == CLD_EXITED) {
1339                 sa_mask = SA_NOCLDWAIT;
1340                 if (tsk->exit_code & 0x80)
1341                         info.si_code = CLD_DUMPED;
1342                 else if (tsk->exit_code & 0x7f)
1343                         info.si_code = CLD_KILLED;
1344                 else {
1345                         info.si_code = CLD_EXITED;
1346                         info.si_status = tsk->exit_code >> 8;
1347                 }
1348         }
1349
1350         read_lock(&tasklist_lock);
1351         if (unlikely(parent->signal == NULL))
1352                 goto out;
1353
1354         sighand = parent->sighand;
1355         spin_lock_irqsave(&sighand->siglock, flags);
1356         if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
1357             !(sighand->action[SIGCHLD-1].sa.sa_flags & sa_mask))
1358                 __group_send_sig_info(SIGCHLD, &info, parent);
1359         /*
1360          * Even if SIGCHLD is not generated, we must wake up wait4 calls.
1361          */
1362         wake_up_interruptible_sync(&parent->signal->wait_chldexit);
1363         spin_unlock_irqrestore(&sighand->siglock, flags);
1364
1365 out:
1366         read_unlock(&tasklist_lock);
1367 }
1368
1369 static u32
1370 ptrace_report(struct utrace_attached_engine *engine,
1371               struct task_struct *tsk,
1372               struct ptrace_state *state,
1373               int code)
1374 {
1375         const struct utrace_regset *regset;
1376
1377         pr_debug("%d ptrace_report %d engine %p"
1378                  " state %p code %x parent %d (%p)\n",
1379                  current->pid, tsk->pid, engine, state, code,
1380                  state->parent->pid, state->parent);
1381         if (!state->have_eventmsg && state->u.siginfo) {
1382                 const siginfo_t *si = state->u.siginfo;
1383                 pr_debug("  si %d code %x errno %d addr %p\n",
1384                          si->si_signo, si->si_code, si->si_errno,
1385                          si->si_addr);
1386         }
1387
1388         /*
1389          * Set our QUIESCE flag right now, before notifying the tracer.
1390          * We do this before setting tsk->exit_code rather than
1391          * by using UTRACE_ACTION_NEWSTATE in our return value, to
1392          * ensure that the tracer can't get the notification and then
1393          * try to resume us with PTRACE_CONT before we set the flag.
1394          */
1395         utrace_set_flags(tsk, engine, engine->flags | UTRACE_ACTION_QUIESCE);
1396
1397         /*
1398          * If regset 0 has a writeback call, do it now.  On register window
1399          * machines, this makes sure the user memory backing the register
1400          * data is up to date by the time wait_task_inactive returns to
1401          * ptrace_start in our tracer doing a PTRACE_PEEKDATA or the like.
1402          */
1403         regset = utrace_regset(tsk, engine, utrace_native_view(tsk), 0);
1404         if (regset->writeback)
1405                 (*regset->writeback)(tsk, regset, 0);
1406
1407         BUG_ON(code == 0);
1408         tsk->exit_code = code;
1409         do_notify(tsk, state->parent, CLD_TRAPPED);
1410
1411         pr_debug("%d ptrace_report quiescing exit_code %x\n",
1412                  current->pid, current->exit_code);
1413
1414         put_ptrace_state(state);
1415
1416         return UTRACE_ACTION_RESUME;
1417 }
1418
1419 static inline u32
1420 ptrace_event(struct utrace_attached_engine *engine,
1421              struct task_struct *tsk,
1422              struct ptrace_state *state,
1423              int event)
1424 {
1425         state->syscall = 0;
1426         return ptrace_report(engine, tsk, state, (event << 8) | SIGTRAP);
1427 }
1428
1429 /*
1430  * Unlike other report callbacks, this can't be called while ptrace_exit
1431  * is doing ptrace_done in parallel, so we don't need get_ptrace_state.
1432  */
1433 static u32
1434 ptrace_report_death(struct utrace_attached_engine *engine,
1435                     struct task_struct *tsk)
1436 {
1437         struct ptrace_state *state = (struct ptrace_state *) engine->data;
1438
1439         if (tsk->exit_code == 0 && unlikely(tsk->flags & PF_SIGNALED))
1440                 /*
1441                  * This can only mean that tsk->exit_code was clobbered
1442                  * by ptrace_update or ptrace_do_wait in a race with
1443                  * an asynchronous wakeup and exit for SIGKILL.
1444                  */
1445                 tsk->exit_code = SIGKILL;
1446
1447         if (tsk->parent == state->parent && tsk->exit_signal != -1) {
1448                 /*
1449                  * This is a natural child (excluding clone siblings of a
1450                  * child group_leader), so we detach and let the normal
1451                  * reporting happen once our NOREAP action is gone.  But
1452                  * first, generate a SIGCHLD for those cases where normal
1453                  * behavior won't.  A ptrace'd child always generates SIGCHLD.
1454                  */
1455                 pr_debug("ptrace %d death natural parent %d exit_code %x\n",
1456                          tsk->pid, state->parent->pid, tsk->exit_code);
1457                 if (!thread_group_empty(tsk))
1458                         do_notify(tsk, state->parent, CLD_EXITED);
1459                 ptrace_state_unlink(state);
1460                 rcu_assign_pointer(engine->data, 0UL);
1461                 ptrace_done(state);
1462                 return UTRACE_ACTION_DETACH;
1463         }
1464
1465         /*
1466          * This might be a second report_death callback for a group leader
1467          * that was delayed when its original report_death callback was made.
1468          * Repeating do_notify is exactly what we need for that case too.
1469          * After the wakeup, ptrace_do_wait will see delay_group_leader false.
1470          */
1471
1472         pr_debug("ptrace %d death notify %d exit_code %x: ",
1473                  tsk->pid, state->parent->pid, tsk->exit_code);
1474         do_notify(tsk, state->parent, CLD_EXITED);
1475         pr_debug("%d notified %d\n", tsk->pid, state->parent->pid);
1476         return UTRACE_ACTION_RESUME;
1477 }
1478
1479 /*
1480  * We get this only in the case where our UTRACE_ACTION_NOREAP was ignored.
1481  * That happens solely when a non-leader exec reaps the old leader.
1482  */
1483 static void
1484 ptrace_report_reap(struct utrace_attached_engine *engine,
1485                    struct task_struct *tsk)
1486 {
1487         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1488         if (state != NULL) {
1489                 ptrace_state_unlink(state);
1490                 rcu_assign_pointer(engine->data, 0UL);
1491                 ptrace_done(state);
1492                 put_ptrace_state(state);
1493         }
1494 }
1495
1496 /*
1497  * Start tracing the child.  This has to do put_ptrace_state before it can
1498  * do allocation that might block.
1499  */
1500 static void
1501 ptrace_clone_setup(struct utrace_attached_engine *engine,
1502                    struct task_struct *parent,
1503                    struct ptrace_state *state,
1504                    struct task_struct *child)
1505 {
1506         struct task_struct *tracer;
1507         struct utrace_attached_engine *child_engine;
1508         struct ptrace_state *child_state;
1509         int ret;
1510         u8 options;
1511         int cap_sys_ptrace;
1512
1513         tracer = state->parent;
1514         options = state->options;
1515         cap_sys_ptrace = state->cap_sys_ptrace;
1516         get_task_struct(tracer);
1517         put_ptrace_state(state);
1518
1519         child_engine = utrace_attach(child, (UTRACE_ATTACH_CREATE
1520                                              | UTRACE_ATTACH_EXCLUSIVE
1521                                              | UTRACE_ATTACH_MATCH_OPS),
1522                                      &ptrace_utrace_ops, 0UL);
1523         if (unlikely(IS_ERR(child_engine))) {
1524                 BUG_ON(PTR_ERR(child_engine) != -ENOMEM);
1525                 put_task_struct(tracer);
1526                 goto nomem;
1527         }
1528
1529         child_state = ptrace_setup(child, child_engine,
1530                                    tracer, options, cap_sys_ptrace, NULL);
1531
1532         put_task_struct(tracer);
1533
1534         if (unlikely(IS_ERR(child_state))) {
1535                 (void) utrace_detach(child, child_engine);
1536
1537                 if (PTR_ERR(child_state) == -ENOMEM)
1538                         goto nomem;
1539
1540                 /*
1541                  * Our tracer has started exiting.  It's
1542                  * too late to set it up tracing the child.
1543                  */
1544                 BUG_ON(PTR_ERR(child_state) != -EALREADY);
1545         }
1546         else {
1547                 sigaddset(&child->pending.signal, SIGSTOP);
1548                 set_tsk_thread_flag(child, TIF_SIGPENDING);
1549                 ret = ptrace_update(child, child_engine, 0, 0);
1550
1551                 /*
1552                  * The child hasn't run yet, it can't have died already.
1553                  */
1554                 BUG_ON(ret);
1555         }
1556
1557         return;
1558
1559 nomem:
1560         printk(KERN_ERR "ptrace out of memory, lost child %d of %d",
1561                child->pid, parent->pid);
1562 }
1563
1564 static u32
1565 ptrace_report_clone(struct utrace_attached_engine *engine,
1566                     struct task_struct *parent,
1567                     unsigned long clone_flags, struct task_struct *child)
1568 {
1569         int event, option;
1570         struct ptrace_state *state = get_ptrace_state(engine, parent);
1571         if (unlikely(state == NULL))
1572                 return UTRACE_ACTION_RESUME;
1573
1574         pr_debug("%d (%p) engine %p"
1575                  " ptrace_report_clone child %d (%p) fl %lx\n",
1576                  parent->pid, parent, engine, child->pid, child, clone_flags);
1577
1578         event = PTRACE_EVENT_FORK;
1579         option = PTRACE_O_TRACEFORK;
1580         if (clone_flags & CLONE_VFORK) {
1581                 event = PTRACE_EVENT_VFORK;
1582                 option = PTRACE_O_TRACEVFORK;
1583         }
1584         else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1585                 event = PTRACE_EVENT_CLONE;
1586                 option = PTRACE_O_TRACECLONE;
1587         }
1588
1589         if (state->options & option) {
1590                 state->have_eventmsg = 1;
1591                 state->u.eventmsg = child->pid;
1592         }
1593         else
1594                 event = 0;
1595
1596         if (!(clone_flags & CLONE_UNTRACED)
1597             && (event || (clone_flags & CLONE_PTRACE))) {
1598                 /*
1599                  * Have our tracer start following the child too.
1600                  */
1601                 ptrace_clone_setup(engine, parent, state, child);
1602
1603                 /*
1604                  * That did put_ptrace_state, so we have to check
1605                  * again in case our tracer just started exiting.
1606                  */
1607                 state = get_ptrace_state(engine, parent);
1608                 if (unlikely(state == NULL))
1609                         return UTRACE_ACTION_RESUME;
1610         }
1611
1612         if (event)
1613                 return ptrace_event(engine, parent, state, event);
1614
1615         put_ptrace_state(state);
1616
1617         return UTRACE_ACTION_RESUME;
1618 }
1619
1620
1621 static u32
1622 ptrace_report_vfork_done(struct utrace_attached_engine *engine,
1623                          struct task_struct *parent, pid_t child_pid)
1624 {
1625         struct ptrace_state *state = get_ptrace_state(engine, parent);
1626         if (unlikely(state == NULL))
1627                 return UTRACE_ACTION_RESUME;
1628
1629         state->have_eventmsg = 1;
1630         state->u.eventmsg = child_pid;
1631         return ptrace_event(engine, parent, state, PTRACE_EVENT_VFORK_DONE);
1632 }
1633
1634
1635 static u32
1636 ptrace_report_signal(struct utrace_attached_engine *engine,
1637                      struct task_struct *tsk, struct pt_regs *regs,
1638                      u32 action, siginfo_t *info,
1639                      const struct k_sigaction *orig_ka,
1640                      struct k_sigaction *return_ka)
1641 {
1642         int signo = info == NULL ? SIGTRAP : info->si_signo;
1643         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1644         if (unlikely(state == NULL))
1645                 return UTRACE_ACTION_RESUME;
1646
1647         state->syscall = 0;
1648         state->have_eventmsg = 0;
1649         state->u.siginfo = info;
1650         return ptrace_report(engine, tsk, state, signo) | UTRACE_SIGNAL_IGN;
1651 }
1652
1653 static u32
1654 ptrace_report_jctl(struct utrace_attached_engine *engine,
1655                    struct task_struct *tsk, int type)
1656 {
1657         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1658         if (unlikely(state == NULL))
1659                 return UTRACE_ACTION_RESUME;
1660
1661         pr_debug("ptrace %d jctl notify %d type %x exit_code %x\n",
1662                  tsk->pid, state->parent->pid, type, tsk->exit_code);
1663
1664         do_notify(tsk, state->parent, type);
1665         put_ptrace_state(state);
1666
1667         return UTRACE_JCTL_NOSIGCHLD;
1668 }
1669
1670 static u32
1671 ptrace_report_exec(struct utrace_attached_engine *engine,
1672                    struct task_struct *tsk,
1673                    const struct linux_binprm *bprm,
1674                    struct pt_regs *regs)
1675 {
1676         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1677         if (unlikely(state == NULL))
1678                 return UTRACE_ACTION_RESUME;
1679
1680         return ptrace_event(engine, tsk, state,
1681                             (state->options & PTRACE_O_TRACEEXEC)
1682                             ? PTRACE_EVENT_EXEC : 0);
1683 }
1684
1685 static u32
1686 ptrace_report_syscall(struct utrace_attached_engine *engine,
1687                       struct task_struct *tsk, struct pt_regs *regs,
1688                       int entry)
1689 {
1690         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1691         if (unlikely(state == NULL))
1692                 return UTRACE_ACTION_RESUME;
1693
1694 #ifdef PTRACE_SYSEMU
1695         if (entry && state->sysemu)
1696                 tracehook_abort_syscall(regs);
1697 #endif
1698         state->syscall = 1;
1699         return ptrace_report(engine, tsk, state,
1700                              ((state->options & PTRACE_O_TRACESYSGOOD)
1701                               ? 0x80 : 0) | SIGTRAP);
1702 }
1703
1704 static u32
1705 ptrace_report_syscall_entry(struct utrace_attached_engine *engine,
1706                             struct task_struct *tsk, struct pt_regs *regs)
1707 {
1708         return ptrace_report_syscall(engine, tsk, regs, 1);
1709 }
1710
1711 static u32
1712 ptrace_report_syscall_exit(struct utrace_attached_engine *engine,
1713                             struct task_struct *tsk, struct pt_regs *regs)
1714 {
1715         return ptrace_report_syscall(engine, tsk, regs, 0);
1716 }
1717
1718 static u32
1719 ptrace_report_exit(struct utrace_attached_engine *engine,
1720                    struct task_struct *tsk, long orig_code, long *code)
1721 {
1722         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1723         if (unlikely(state == NULL))
1724                 return UTRACE_ACTION_RESUME;
1725
1726         state->have_eventmsg = 1;
1727         state->u.eventmsg = *code;
1728         return ptrace_event(engine, tsk, state, PTRACE_EVENT_EXIT);
1729 }
1730
1731 static int
1732 ptrace_unsafe_exec(struct utrace_attached_engine *engine,
1733                    struct task_struct *tsk)
1734 {
1735         int unsafe = LSM_UNSAFE_PTRACE;
1736         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1737         if (likely(state != NULL) && state->cap_sys_ptrace)
1738                 unsafe = LSM_UNSAFE_PTRACE_CAP;
1739         put_ptrace_state(state);
1740         return unsafe;
1741 }
1742
1743 static struct task_struct *
1744 ptrace_tracer_task(struct utrace_attached_engine *engine,
1745                    struct task_struct *target)
1746 {
1747         struct task_struct *parent = NULL;
1748         struct ptrace_state *state = get_ptrace_state(engine, target);
1749         if (likely(state != NULL)) {
1750                 parent = state->parent;
1751                 put_ptrace_state(state);
1752         }
1753         return parent;
1754 }
1755
1756 static int
1757 ptrace_allow_access_process_vm(struct utrace_attached_engine *engine,
1758                                struct task_struct *target,
1759                                struct task_struct *caller)
1760 {
1761         struct ptrace_state *state;
1762         int ours = 0;
1763
1764         state = get_ptrace_state(engine, target);
1765         if (likely(state != NULL)) {
1766                 ours = (((engine->flags & UTRACE_ACTION_QUIESCE)
1767                          || target->state == TASK_STOPPED)
1768                         && state->parent == caller);
1769                 put_ptrace_state(state);
1770         }
1771
1772         return ours && security_ptrace(caller, target) == 0;
1773 }
1774
1775
1776 static const struct utrace_engine_ops ptrace_utrace_ops =
1777 {
1778         .report_syscall_entry = ptrace_report_syscall_entry,
1779         .report_syscall_exit = ptrace_report_syscall_exit,
1780         .report_exec = ptrace_report_exec,
1781         .report_jctl = ptrace_report_jctl,
1782         .report_signal = ptrace_report_signal,
1783         .report_vfork_done = ptrace_report_vfork_done,
1784         .report_clone = ptrace_report_clone,
1785         .report_exit = ptrace_report_exit,
1786         .report_death = ptrace_report_death,
1787         .report_reap = ptrace_report_reap,
1788         .unsafe_exec = ptrace_unsafe_exec,
1789         .tracer_task = ptrace_tracer_task,
1790         .allow_access_process_vm = ptrace_allow_access_process_vm,
1791 };