kernel/ptrace.c

   1 /*
   2  * linux/kernel/ptrace.c
   3  *
   4  * (C) Copyright 1999 Linus Torvalds
   5  *
   6  * Common interfaces for "ptrace()" which we do not want
   7  * to continually duplicate across every architecture.
   8  */
   9
  10 #include <linux/capability.h>
  11 #include <linux/module.h>
  12 #include <linux/sched.h>
  13 #include <linux/errno.h>
  14 #include <linux/mm.h>
  15 #include <linux/highmem.h>
  16 #include <linux/pagemap.h>
  17 #include <linux/smp_lock.h>
  18 #include <linux/ptrace.h>
  19 #include <linux/security.h>
  20 #include <linux/signal.h>
  21 #include <linux/utrace.h>
  22 #include <linux/tracehook.h>
  23 #include <linux/vs_context.h>
  24 #include <asm/tracehook.h>
  25 #include <asm/pgtable.h>
  26 #include <asm/uaccess.h>
  27
  28 struct ptrace_state
  29 {
  30         struct rcu_head rcu;
  31
  32         /*
  33          * These elements are always available, even when the struct is
  34          * awaiting destruction at the next RCU callback point.
  35          */
  36         struct utrace_attached_engine *engine;
  37         struct task_struct *task; /* Target task.  */
  38         struct task_struct *parent; /* Whom we report to.  */
  39         struct list_head entry; /* Entry on parent->ptracees list.  */
  40
  41         u8 options;             /* PTRACE_SETOPTIONS bits.  */
  42         unsigned int syscall:1; /* Reporting for syscall.  */
  43 #ifdef PTRACE_SYSEMU
  44         unsigned int sysemu:1;  /* PTRACE_SYSEMU in progress. */
  45 #endif
  46         unsigned int have_eventmsg:1; /* u.eventmsg valid. */
  47         unsigned int cap_sys_ptrace:1; /* Tracer capable.  */
  48
  49         union
  50         {
  51                 unsigned long eventmsg;
  52                 siginfo_t *siginfo;
  53         } u;
  54 };
  55
  56 static const struct utrace_engine_ops ptrace_utrace_ops; /* Initialized below. */
  57
  58 static void
  59 ptrace_state_unlink(struct ptrace_state *state)
  60 {
  61         task_lock(state->parent);
  62         list_del_rcu(&state->entry);
  63         task_unlock(state->parent);
  64 }
  65
  66 static struct ptrace_state *
  67 ptrace_setup(struct task_struct *target, struct utrace_attached_engine *engine,
  68              struct task_struct *parent, u8 options, int cap_sys_ptrace,
  69              struct ptrace_state *state)
  70 {
  71         if (state == NULL) {
  72                 state = kzalloc(sizeof *state, GFP_USER);
  73                 if (unlikely(state == NULL))
  74                         return ERR_PTR(-ENOMEM);
  75         }
  76
  77         state->engine = engine;
  78         state->task = target;
  79         state->parent = parent;
  80         state->options = options;
  81         state->cap_sys_ptrace = cap_sys_ptrace;
  82
  83         task_lock(parent);
  84         if (unlikely(parent->flags & PF_EXITING)) {
  85                 task_unlock(parent);
  86                 kfree(state);
  87                 return ERR_PTR(-EALREADY);
  88         }
  89         list_add_rcu(&state->entry, &state->parent->ptracees);
  90         task_unlock(state->parent);
  91
  92         BUG_ON(engine->data != 0);
  93         rcu_assign_pointer(engine->data, (unsigned long) state);
  94
  95         return state;
  96 }
  97
  98 static void
  99 ptrace_state_free(struct rcu_head *rhead)
 100 {
 101         struct ptrace_state *state = container_of(rhead,
 102                                                   struct ptrace_state, rcu);
 103         kfree(state);
 104 }
 105
 106 static void
 107 ptrace_done(struct ptrace_state *state)
 108 {
 109         INIT_RCU_HEAD(&state->rcu);
 110         call_rcu(&state->rcu, ptrace_state_free);
 111 }
 112
 113 /*
 114  * Update the tracing engine state to match the new ptrace state.
 115  */
 116 static int __must_check
 117 ptrace_update(struct task_struct *target,
 118               struct utrace_attached_engine *engine,
 119               unsigned long flags, int from_stopped)
 120 {
 121         struct ptrace_state *state = (struct ptrace_state *) engine->data;
 122
 123         /*
 124          * These events are always reported.
 125          */
 126         flags |= (UTRACE_EVENT(DEATH) | UTRACE_EVENT(EXEC)
 127                   | UTRACE_EVENT_SIGNAL_ALL | UTRACE_EVENT(JCTL));
 128
 129         /*
 130          * We always have to examine clone events to check for CLONE_PTRACE.
 131          */
 132         flags |= UTRACE_EVENT(CLONE);
 133
 134         /*
 135          * PTRACE_SETOPTIONS can request more events.
 136          */
 137         if (state->options & PTRACE_O_TRACEEXIT)
 138                 flags |= UTRACE_EVENT(EXIT);
 139         if (state->options & PTRACE_O_TRACEVFORKDONE)
 140                 flags |= UTRACE_EVENT(VFORK_DONE);
 141
 142         /*
 143          * ptrace always inhibits normal parent reaping.
 144          * But for a corner case we sometimes see the REAP event anyway.
 145          */
 146         flags |= UTRACE_ACTION_NOREAP | UTRACE_EVENT(REAP);
 147
 148         if (from_stopped && !(flags & UTRACE_ACTION_QUIESCE)) {
 149                 /*
 150                  * We're letting the thread resume from ptrace stop.
 151                  * If SIGKILL is waking it up, it can be racing with us here
 152                  * to set its own exit_code in do_exit.  Though we clobber
 153                  * it here, we check for the case in ptrace_report_death.
 154                  */
 155                 if (!unlikely(target->flags & PF_SIGNALED))
 156                         target->exit_code = 0;
 157
 158                 if (!state->have_eventmsg)
 159                         state->u.siginfo = NULL;
 160
 161                 if (target->state == TASK_STOPPED) {
 162                         /*
 163                          * We have to double-check for naughty de_thread
 164                          * reaping despite NOREAP, before we can get siglock.
 165                          */
 166                         read_lock(&tasklist_lock);
 167                         if (!target->exit_state) {
 168                                 spin_lock_irq(&target->sighand->siglock);
 169                                 if (target->state == TASK_STOPPED)
 170                                         target->signal->flags &=
 171                                                 ~SIGNAL_STOP_STOPPED;
 172                                 spin_unlock_irq(&target->sighand->siglock);
 173                         }
 174                         read_unlock(&tasklist_lock);
 175                 }
 176         }
 177
 178         return utrace_set_flags(target, engine, flags);
 179 }
 180
 181 static int ptrace_traceme(void)
 182 {
 183         struct utrace_attached_engine *engine;
 184         struct ptrace_state *state;
 185         struct task_struct *parent;
 186         int retval;
 187
 188         engine = utrace_attach(current, (UTRACE_ATTACH_CREATE
 189                                          | UTRACE_ATTACH_EXCLUSIVE
 190                                          | UTRACE_ATTACH_MATCH_OPS),
 191                                &ptrace_utrace_ops, 0UL);
 192
 193         if (IS_ERR(engine)) {
 194                 retval = PTR_ERR(engine);
 195                 if (retval == -EEXIST)
 196                         retval = -EPERM;
 197         }
 198         else {
 199                 /*
 200                  * We need to preallocate so that we can hold
 201                  * rcu_read_lock from extracting ->parent through
 202                  * ptrace_setup using it.
 203                  */
 204                 state = kzalloc(sizeof *state, GFP_USER);
 205                 if (unlikely(state == NULL)) {
 206                         (void) utrace_detach(current, engine);
 207                         printk(KERN_ERR
 208                                "ptrace out of memory, lost child %d of %d",
 209                                current->pid, current->parent->pid);
 210                         return -ENOMEM;
 211                 }
 212
 213                 rcu_read_lock();
 214                 parent = rcu_dereference(current->parent);
 215
 216                 task_lock(current);
 217                 retval = security_ptrace(parent, current);
 218                 task_unlock(current);
 219
 220                 if (retval) {
 221                         kfree(state);
 222                         (void) utrace_detach(current, engine);
 223                 }
 224                 else {
 225                         state = ptrace_setup(current, engine, parent, 0, 0,
 226                                              state);
 227                         if (IS_ERR(state))
 228                                 retval = PTR_ERR(state);
 229                 }
 230                 rcu_read_unlock();
 231
 232                 if (!retval) {
 233                         /*
 234                          * This can't fail because we can't die while we
 235                          * are here doing this.
 236                          */
 237                         retval = ptrace_update(current, engine, 0, 0);
 238                         BUG_ON(retval);
 239                 }
 240                 else if (unlikely(retval == -EALREADY))
 241                         /*
 242                          * We raced with our parent's exit, which would
 243                          * have detached us just after our attach if
 244                          * we'd won the race.  Pretend we got attached
 245                          * and then detached immediately, no error.
 246                          */
 247                         retval = 0;
 248         }
 249
 250         return retval;
 251 }
 252
 253 static int ptrace_attach(struct task_struct *task)
 254 {
 255         struct utrace_attached_engine *engine;
 256         struct ptrace_state *state;
 257         int retval;
 258
 259         retval = -EPERM;
 260         if (task->pid <= 1)
 261                 goto bad;
 262         if (task->tgid == current->tgid)
 263                 goto bad;
 264         if (!task->mm)          /* kernel threads */
 265                 goto bad;
 266
 267         pr_debug("%d ptrace_attach %d state %lu exit_code %x\n",
 268                  current->pid, task->pid, task->state, task->exit_code);
 269
 270         engine = utrace_attach(task, (UTRACE_ATTACH_CREATE
 271                                       | UTRACE_ATTACH_EXCLUSIVE
 272                                       | UTRACE_ATTACH_MATCH_OPS),
 273                                &ptrace_utrace_ops, 0);
 274         if (IS_ERR(engine)) {
 275                 retval = PTR_ERR(engine);
 276                 if (retval == -EEXIST)
 277                         retval = -EPERM;
 278                 goto bad;
 279         }
 280
 281         pr_debug("%d ptrace_attach %d after utrace_attach: %lu exit_code %x\n",
 282                  current->pid, task->pid, task->state, task->exit_code);
 283
 284         if (ptrace_may_attach(task)) {
 285                 state = ptrace_setup(task, engine, current, 0,
 286                                      capable(CAP_SYS_PTRACE), NULL);
 287                 if (IS_ERR(state))
 288                         retval = PTR_ERR(state);
 289                 else {
 290                         retval = ptrace_update(task, engine, 0, 0);
 291
 292                         pr_debug("%d ptrace_attach %d after ptrace_update (%d)"
 293                                  " %lu exit_code %x\n",
 294                                  current->pid, task->pid, retval,
 295                                  task->state, task->exit_code);
 296
 297                         if (retval) {
 298                                 /*
 299                                  * It died before we enabled any callbacks.
 300                                  */
 301                                 if (retval == -EALREADY)
 302                                         retval = -ESRCH;
 303                                 BUG_ON(retval != -ESRCH);
 304                                 ptrace_state_unlink(state);
 305                                 ptrace_done(state);
 306                         }
 307                 }
 308         }
 309         if (retval)
 310                 (void) utrace_detach(task, engine);
 311         else {
 312                 int stopped = 0;
 313
 314                 /*
 315                  * We must double-check that task has not just died and
 316                  * been reaped (after ptrace_update succeeded).
 317                  * This happens when exec (de_thread) ignores NOREAP.
 318                  * We cannot call into the signal code if it's dead.
 319                  */
 320                 read_lock(&tasklist_lock);
 321                 if (likely(!task->exit_state)) {
 322                         force_sig_specific(SIGSTOP, task);
 323
 324                         spin_lock_irq(&task->sighand->siglock);
 325                         stopped = (task->state == TASK_STOPPED);
 326                         spin_unlock_irq(&task->sighand->siglock);
 327                 }
 328                 read_unlock(&tasklist_lock);
 329
 330                 if (stopped) {
 331                         const struct utrace_regset *regset;
 332
 333                         /*
 334                          * Set QUIESCE immediately, so we can allow
 335                          * ptrace requests while he's in TASK_STOPPED.
 336                          */
 337                         retval = ptrace_update(task, engine,
 338                                                UTRACE_ACTION_QUIESCE, 0);
 339                         if (retval)
 340                                 BUG_ON(retval != -ESRCH);
 341                         retval = 0;
 342
 343                         /*
 344                          * Do now the regset 0 writeback that we do on every
 345                          * stop, since it's never been done.  On register
 346                          * window machines, this makes sure the user memory
 347                          * backing the register data is up to date.
 348                          */
 349                         regset = utrace_regset(task, engine,
 350                                                utrace_native_view(task), 0);
 351                         if (regset->writeback)
 352                                 (*regset->writeback)(task, regset, 1);
 353                 }
 354
 355                 pr_debug("%d ptrace_attach %d complete (%sstopped)"
 356                          " state %lu code %x",
 357                          current->pid, task->pid, stopped ? "" : "not ",
 358                          task->state, task->exit_code);
 359         }
 360
 361 bad:
 362         return retval;
 363 }
 364
 365 /*
 366  * The task might be dying or being reaped in parallel, in which case
 367  * engine and state may no longer be valid.  utrace_detach checks for us.
 368  */
 369 static int ptrace_detach(struct task_struct *task,
 370                          struct utrace_attached_engine *engine,
 371                          struct ptrace_state *state)
 372 {
 373
 374         int error;
 375
 376 #ifdef HAVE_ARCH_PTRACE_DETACH
 377         /*
 378          * Some funky compatibility code in arch_ptrace may have
 379          * needed to install special state it should clean up now.
 380          */
 381         arch_ptrace_detach(task);
 382 #endif
 383
 384         /*
 385          * Traditional ptrace behavior does wake_up_process no matter what
 386          * in ptrace_detach.  But utrace_detach will not do a wakeup if
 387          * it's in a proper job control stop.  We need it to wake up from
 388          * TASK_STOPPED and either resume or process more signals.  A
 389          * pending stop signal will just leave it stopped again, but will
 390          * consume the signal, and reset task->exit_code for the next wait
 391          * call to see.  This is important to userland if ptrace_do_wait
 392          * "stole" the previous unwaited-for-ness (clearing exit_code), but
 393          * there is a pending SIGSTOP, e.g. sent by a PTRACE_ATTACH done
 394          * while already in job control stop.
 395          */
 396         read_lock(&tasklist_lock);
 397         if (likely(task->signal != NULL)) {
 398                 spin_lock_irq(&task->sighand->siglock);
 399                 task->signal->flags &= ~SIGNAL_STOP_STOPPED;
 400                 spin_unlock_irq(&task->sighand->siglock);
 401         }
 402         read_unlock(&tasklist_lock);
 403
 404         error = utrace_detach(task, engine);
 405         if (!error) {
 406                 /*
 407                  * We can only get here from the ptracer itself or via
 408                  * detach_zombie from another thread in its group.
 409                  */
 410                 BUG_ON(state->parent->tgid != current->tgid);
 411                 ptrace_state_unlink(state);
 412                 ptrace_done(state);
 413
 414                 /*
 415                  * Wake up any other threads that might be blocked in
 416                  * wait.  Though traditional ptrace does not guarantee
 417                  * this wakeup on PTRACE_DETACH, it does prevent
 418                  * erroneous blocking in wait when another racing
 419                  * thread's wait call reap-detaches the last child.
 420                  * Without this wakeup, another thread might stay
 421                  * blocked when it should return -ECHILD.
 422                  */
 423                 spin_lock_irq(&current->sighand->siglock);
 424                 wake_up_interruptible(&current->signal->wait_chldexit);
 425                 spin_unlock_irq(&current->sighand->siglock);
 426         }
 427         return error;
 428 }
 429
 430
 431 /*
 432  * This is called when we are exiting.  We must stop all our ptracing.
 433  */
 434 void
 435 ptrace_exit(struct task_struct *tsk)
 436 {
 437         struct list_head *pos, *n;
 438
 439         /*
 440          * Taking the task_lock after PF_EXITING is set ensures that a
 441          * child in ptrace_traceme will not put itself on our list when
 442          * we might already be tearing it down.
 443          */
 444         task_lock(tsk);
 445         if (likely(list_empty(&tsk->ptracees))) {
 446                 task_unlock(tsk);
 447                 return;
 448         }
 449         task_unlock(tsk);
 450
 451 restart:
 452         rcu_read_lock();
 453
 454         list_for_each_safe_rcu(pos, n, &tsk->ptracees) {
 455                 struct ptrace_state *state = list_entry(pos,
 456                                                         struct ptrace_state,
 457                                                         entry);
 458                 int error = utrace_detach(state->task, state->engine);
 459                 BUG_ON(state->parent != tsk);
 460                 if (likely(error == 0)) {
 461                         ptrace_state_unlink(state);
 462                         ptrace_done(state);
 463                 }
 464                 else if (unlikely(error == -EALREADY)) {
 465                         /*
 466                          * It's still doing report_death callbacks.
 467                          * Just wait for it to settle down.
 468                          * Since wait_task_inactive might yield,
 469                          * we must go out of rcu_read_lock and restart.
 470                          */
 471                         struct task_struct *p = state->task;
 472                         get_task_struct(p);
 473                         rcu_read_unlock();
 474                         wait_task_inactive(p);
 475                         put_task_struct(p);
 476                         goto restart;
 477                 }
 478                 else
 479                         BUG_ON(error != -ESRCH);
 480         }
 481
 482         rcu_read_unlock();
 483
 484         BUG_ON(!list_empty(&tsk->ptracees));
 485 }
 486
 487 static int
 488 ptrace_induce_signal(struct task_struct *target,
 489                      struct utrace_attached_engine *engine,
 490                      long signr)
 491 {
 492         struct ptrace_state *state = (struct ptrace_state *) engine->data;
 493
 494         if (signr == 0)
 495                 return 0;
 496
 497         if (!valid_signal(signr))
 498                 return -EIO;
 499
 500         if (state->syscall) {
 501                 /*
 502                  * This is the traditional ptrace behavior when given
 503                  * a signal to resume from a syscall tracing stop.
 504                  */
 505                 send_sig(signr, target, 1);
 506         }
 507         else if (!state->have_eventmsg && state->u.siginfo) {
 508                 siginfo_t *info = state->u.siginfo;
 509
 510                 /* Update the siginfo structure if the signal has
 511                    changed.  If the debugger wanted something
 512                    specific in the siginfo structure then it should
 513                    have updated *info via PTRACE_SETSIGINFO.  */
 514                 if (signr != info->si_signo) {
 515                         info->si_signo = signr;
 516                         info->si_errno = 0;
 517                         info->si_code = SI_USER;
 518                         info->si_pid = current->pid;
 519                         info->si_uid = current->uid;
 520                 }
 521
 522                 return utrace_inject_signal(target, engine,
 523                                             UTRACE_ACTION_RESUME, info, NULL);
 524         }
 525
 526         return 0;
 527 }
 528
 529 int
 530 ptrace_regset_access(struct task_struct *target,
 531                      struct utrace_attached_engine *engine,
 532                      const struct utrace_regset_view *view,
 533                      int setno, unsigned long offset, unsigned int size,
 534                      void __user *data, int write)
 535 {
 536         const struct utrace_regset *regset = utrace_regset(target, engine,
 537                                                            view, setno);
 538         int ret;
 539
 540         if (unlikely(regset == NULL))
 541                 return -EIO;
 542
 543         if (size == (unsigned int) -1)
 544                 size = regset->size * regset->n;
 545
 546         if (write) {
 547                 if (!access_ok(VERIFY_READ, data, size))
 548                         ret = -EIO;
 549                 else
 550                         ret = (*regset->set)(target, regset,
 551                                              offset, size, NULL, data);
 552         }
 553         else {
 554                 if (!access_ok(VERIFY_WRITE, data, size))
 555                         ret = -EIO;
 556                 else
 557                         ret = (*regset->get)(target, regset,
 558                                              offset, size, NULL, data);
 559         }
 560
 561         return ret;
 562 }
 563
 564 int
 565 ptrace_onereg_access(struct task_struct *target,
 566                      struct utrace_attached_engine *engine,
 567                      const struct utrace_regset_view *view,
 568                      int setno, unsigned long regno,
 569                      void __user *data, int write)
 570 {
 571         const struct utrace_regset *regset = utrace_regset(target, engine,
 572                                                            view, setno);
 573         unsigned int pos;
 574         int ret;
 575
 576         if (unlikely(regset == NULL))
 577                 return -EIO;
 578
 579         if (regno < regset->bias || regno >= regset->bias + regset->n)
 580                 return -EINVAL;
 581
 582         pos = (regno - regset->bias) * regset->size;
 583
 584         if (write) {
 585                 if (!access_ok(VERIFY_READ, data, regset->size))
 586                         ret = -EIO;
 587                 else
 588                         ret = (*regset->set)(target, regset, pos, regset->size,
 589                                              NULL, data);
 590         }
 591         else {
 592                 if (!access_ok(VERIFY_WRITE, data, regset->size))
 593                         ret = -EIO;
 594                 else
 595                         ret = (*regset->get)(target, regset, pos, regset->size,
 596                                              NULL, data);
 597         }
 598
 599         return ret;
 600 }
 601
 602 int
 603 ptrace_layout_access(struct task_struct *target,
 604                      struct utrace_attached_engine *engine,
 605                      const struct utrace_regset_view *view,
 606                      const struct ptrace_layout_segment layout[],
 607                      unsigned long addr, unsigned int size,
 608                      void __user *udata, void *kdata, int write)
 609 {
 610         const struct ptrace_layout_segment *seg;
 611         int ret = -EIO;
 612
 613         if (kdata == NULL &&
 614             !access_ok(write ? VERIFY_READ : VERIFY_WRITE, udata, size))
 615                 return -EIO;
 616
 617         seg = layout;
 618         do {
 619                 unsigned int pos, n;
 620
 621                 while (addr >= seg->end && seg->end != 0)
 622                         ++seg;
 623
 624                 if (addr < seg->start || addr >= seg->end)
 625                         return -EIO;
 626
 627                 pos = addr - seg->start + seg->offset;
 628                 n = min(size, seg->end - (unsigned int) addr);
 629
 630                 if (unlikely(seg->regset == (unsigned int) -1)) {
 631                         /*
 632                          * This is a no-op/zero-fill portion of struct user.
 633                          */
 634                         ret = 0;
 635                         if (!write && seg->offset == 0) {
 636                                 if (kdata)
 637                                         memset(kdata, 0, n);
 638                                 else if (clear_user(udata, n))
 639                                         ret = -EFAULT;
 640                         }
 641                 }
 642                 else {
 643                         unsigned int align;
 644                         const struct utrace_regset *regset = utrace_regset(
 645                                 target, engine, view, seg->regset);
 646                         if (unlikely(regset == NULL))
 647                                 return -EIO;
 648
 649                         /*
 650                          * A ptrace compatibility layout can do a misaligned
 651                          * regset access, e.g. word access to larger data.
 652                          * An arch's compat layout can be this way only if
 653                          * it is actually ok with the regset code despite the
 654                          * regset->align setting.
 655                          */
 656                         align = min(regset->align, size);
 657                         if ((pos & (align - 1))
 658                             || pos >= regset->n * regset->size)
 659                                 return -EIO;
 660
 661                         if (write)
 662                                 ret = (*regset->set)(target, regset,
 663                                                      pos, n, kdata, udata);
 664                         else
 665                                 ret = (*regset->get)(target, regset,
 666                                                      pos, n, kdata, udata);
 667                 }
 668
 669                 if (kdata)
 670                         kdata += n;
 671                 else
 672                         udata += n;
 673                 addr += n;
 674                 size -= n;
 675         } while (ret == 0 && size > 0);
 676
 677         return ret;
 678 }
 679
 680
 681 static int
 682 ptrace_start(long pid, long request,
 683              struct task_struct **childp,
 684              struct utrace_attached_engine **enginep,
 685              struct ptrace_state **statep)
 686
 687 {
 688         struct task_struct *child;
 689         struct utrace_attached_engine *engine;
 690         struct ptrace_state *state;
 691         int ret;
 692
 693         if (request == PTRACE_TRACEME)
 694                 return ptrace_traceme();
 695
 696         ret = -ESRCH;
 697         read_lock(&tasklist_lock);
 698         child = find_task_by_pid(pid);
 699         if (child)
 700                 get_task_struct(child);
 701         read_unlock(&tasklist_lock);
 702         pr_debug("ptrace pid %ld => %p\n", pid, child);
 703         if (!child)
 704                 goto out;
 705
 706         ret = -EPERM;
 707         if (pid == 1)           /* you may not mess with init */
 708                 goto out_tsk;
 709
 710         ret = -EPERM;
 711         if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT))
 712                 goto out_tsk;
 713
 714         if (request == PTRACE_ATTACH) {
 715                 ret = ptrace_attach(child);
 716                 goto out_tsk;
 717         }
 718
 719         rcu_read_lock();
 720         engine = utrace_attach(child, UTRACE_ATTACH_MATCH_OPS,
 721                                &ptrace_utrace_ops, 0);
 722         ret = -ESRCH;
 723         if (IS_ERR(engine) || engine == NULL)
 724                 goto out_tsk_rcu;
 725         state = rcu_dereference((struct ptrace_state *) engine->data);
 726         if (state == NULL || state->parent != current)
 727                 goto out_tsk_rcu;
 728         rcu_read_unlock();
 729
 730         /*
 731          * Traditional ptrace behavior demands that the target already be
 732          * quiescent, but not dead.
 733          */
 734         if (request != PTRACE_KILL
 735             && !(engine->flags & UTRACE_ACTION_QUIESCE)) {
 736                 pr_debug("%d not stopped (%lu)\n", child->pid, child->state);
 737                 goto out_tsk;
 738         }
 739
 740         /*
 741          * We do this for all requests to match traditional ptrace behavior.
 742          * If the machine state synchronization done at context switch time
 743          * includes e.g. writing back to user memory, we want to make sure
 744          * that has finished before a PTRACE_PEEKDATA can fetch the results.
 745          * On most machines, only regset data is affected by context switch
 746          * and calling utrace_regset later on will take care of that, so
 747          * this is superfluous.
 748          *
 749          * To do this purely in utrace terms, we could do:
 750          *  (void) utrace_regset(child, engine, utrace_native_view(child), 0);
 751          */
 752         wait_task_inactive(child);
 753
 754         if (child->exit_state)
 755                 goto out_tsk;
 756
 757         *childp = child;
 758         *enginep = engine;
 759         *statep = state;
 760         return -EIO;
 761
 762 out_tsk_rcu:
 763         rcu_read_unlock();
 764 out_tsk:
 765         put_task_struct(child);
 766 out:
 767         return ret;
 768 }
 769
 770 static int
 771 ptrace_common(long request, struct task_struct *child,
 772               struct utrace_attached_engine *engine,
 773               struct ptrace_state *state,
 774               unsigned long addr, long data)
 775 {
 776         unsigned long flags;
 777         int ret = -EIO;
 778
 779         switch (request) {
 780         case PTRACE_DETACH:
 781                 /*
 782                  * Detach a process that was attached.
 783                  */
 784                 ret = ptrace_induce_signal(child, engine, data);
 785                 if (!ret) {
 786                         ret = ptrace_detach(child, engine, state);
 787                         if (ret == -EALREADY) /* Already a zombie.  */
 788                                 ret = -ESRCH;
 789                         if (ret)
 790                                 BUG_ON(ret != -ESRCH);
 791                 }
 792                 break;
 793
 794                 /*
 795                  * These are the operations that resume the child running.
 796                  */
 797         case PTRACE_KILL:
 798                 data = SIGKILL;
 799         case PTRACE_CONT:
 800         case PTRACE_SYSCALL:
 801 #ifdef PTRACE_SYSEMU
 802         case PTRACE_SYSEMU:
 803         case PTRACE_SYSEMU_SINGLESTEP:
 804 #endif
 805 #ifdef PTRACE_SINGLEBLOCK
 806         case PTRACE_SINGLEBLOCK:
 807 # ifdef ARCH_HAS_BLOCK_STEP
 808                 if (! ARCH_HAS_BLOCK_STEP)
 809 # endif
 810                         if (request == PTRACE_SINGLEBLOCK)
 811                                 break;
 812 #endif
 813         case PTRACE_SINGLESTEP:
 814 #ifdef ARCH_HAS_SINGLE_STEP
 815                 if (! ARCH_HAS_SINGLE_STEP)
 816 #endif
 817                         if (request == PTRACE_SINGLESTEP
 818 #ifdef PTRACE_SYSEMU_SINGLESTEP
 819                             || request == PTRACE_SYSEMU_SINGLESTEP
 820 #endif
 821                                 )
 822                                 break;
 823
 824                 ret = ptrace_induce_signal(child, engine, data);
 825                 if (ret)
 826                         break;
 827
 828
 829                 /*
 830                  * Reset the action flags without QUIESCE, so it resumes.
 831                  */
 832                 flags = 0;
 833 #ifdef PTRACE_SYSEMU
 834                 state->sysemu = (request == PTRACE_SYSEMU_SINGLESTEP
 835                                         || request == PTRACE_SYSEMU);
 836 #endif
 837                 if (request == PTRACE_SINGLESTEP
 838 #ifdef PTRACE_SYSEMU
 839                     || request == PTRACE_SYSEMU_SINGLESTEP
 840 #endif
 841                         )
 842                         flags |= UTRACE_ACTION_SINGLESTEP;
 843 #ifdef PTRACE_SINGLEBLOCK
 844                 else if (request == PTRACE_SINGLEBLOCK)
 845                         flags |= UTRACE_ACTION_BLOCKSTEP;
 846 #endif
 847                 if (request == PTRACE_SYSCALL)
 848                         flags |= UTRACE_EVENT_SYSCALL;
 849 #ifdef PTRACE_SYSEMU
 850                 else if (request == PTRACE_SYSEMU
 851                          || request == PTRACE_SYSEMU_SINGLESTEP)
 852                         flags |= UTRACE_EVENT(SYSCALL_ENTRY);
 853 #endif
 854                 ret = ptrace_update(child, engine, flags, 1);
 855                 if (ret)
 856                         BUG_ON(ret != -ESRCH);
 857                 ret = 0;
 858                 break;
 859
 860 #ifdef PTRACE_OLDSETOPTIONS
 861         case PTRACE_OLDSETOPTIONS:
 862 #endif
 863         case PTRACE_SETOPTIONS:
 864                 ret = -EINVAL;
 865                 if (data & ~PTRACE_O_MASK)
 866                         break;
 867                 state->options = data;
 868                 ret = ptrace_update(child, engine, UTRACE_ACTION_QUIESCE, 1);
 869                 if (ret)
 870                         BUG_ON(ret != -ESRCH);
 871                 ret = 0;
 872                 break;
 873         }
 874
 875         return ret;
 876 }
 877
 878
 879 asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 880 {
 881         struct task_struct *child;
 882         struct utrace_attached_engine *engine;
 883         struct ptrace_state *state;
 884         long ret, val;
 885
 886         pr_debug("%d sys_ptrace(%ld, %ld, %lx, %lx)\n",
 887                  current->pid, request, pid, addr, data);
 888
 889         ret = ptrace_start(pid, request, &child, &engine, &state);
 890         if (ret != -EIO)
 891                 goto out;
 892
 893         val = 0;
 894         ret = arch_ptrace(&request, child, engine, addr, data, &val);
 895         if (ret != -ENOSYS) {
 896                 if (ret == 0) {
 897                         ret = val;
 898                         force_successful_syscall_return();
 899                 }
 900                 goto out_tsk;
 901         }
 902
 903         switch (request) {
 904         default:
 905                 ret = ptrace_common(request, child, engine, state, addr, data);
 906                 break;
 907
 908         case PTRACE_PEEKTEXT: /* read word at location addr. */
 909         case PTRACE_PEEKDATA: {
 910                 unsigned long tmp;
 911                 int copied;
 912
 913                 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
 914                 ret = -EIO;
 915                 if (copied != sizeof(tmp))
 916                         break;
 917                 ret = put_user(tmp, (unsigned long __user *) data);
 918                 break;
 919         }
 920
 921         case PTRACE_POKETEXT: /* write the word at location addr. */
 922         case PTRACE_POKEDATA:
 923                 ret = 0;
 924                 if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
 925                         break;
 926                 ret = -EIO;
 927                 break;
 928
 929         case PTRACE_GETEVENTMSG:
 930                 ret = put_user(state->have_eventmsg
 931                                ? state->u.eventmsg : 0L,
 932                                (unsigned long __user *) data);
 933                 break;
 934         case PTRACE_GETSIGINFO:
 935                 ret = -EINVAL;
 936                 if (!state->have_eventmsg && state->u.siginfo)
 937                         ret = copy_siginfo_to_user((siginfo_t __user *) data,
 938                                                    state->u.siginfo);
 939                 break;
 940         case PTRACE_SETSIGINFO:
 941                 ret = -EINVAL;
 942                 if (!state->have_eventmsg && state->u.siginfo) {
 943                         ret = 0;
 944                         if (copy_from_user(state->u.siginfo,
 945                                            (siginfo_t __user *) data,
 946                                            sizeof(siginfo_t)))
 947                                 ret = -EFAULT;
 948                 }
 949                 break;
 950         }
 951
 952 out_tsk:
 953         put_task_struct(child);
 954 out:
 955         pr_debug("%d ptrace -> %lx\n", current->pid, ret);
 956         return ret;
 957 }
 958
 959
 960 #ifdef CONFIG_COMPAT
 961 #include <linux/compat.h>
 962
 963 asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
 964                                   compat_ulong_t addr, compat_long_t cdata)
 965 {
 966         const unsigned long data = (unsigned long) (compat_ulong_t) cdata;
 967         struct task_struct *child;
 968         struct utrace_attached_engine *engine;
 969         struct ptrace_state *state;
 970         compat_long_t ret, val;
 971
 972         pr_debug("%d compat_sys_ptrace(%d, %d, %x, %x)\n",
 973                  current->pid, request, pid, addr, cdata);
 974         ret = ptrace_start(pid, request, &child, &engine, &state);
 975         if (ret != -EIO)
 976                 goto out;
 977
 978         val = 0;
 979         ret = arch_compat_ptrace(&request, child, engine, addr, cdata, &val);
 980         if (ret != -ENOSYS) {
 981                 if (ret == 0) {
 982                         ret = val;
 983                         force_successful_syscall_return();
 984                 }
 985                 goto out_tsk;
 986         }
 987
 988         switch (request) {
 989         default:
 990                 ret = ptrace_common(request, child, engine, state, addr, data);
 991                 break;
 992
 993         case PTRACE_PEEKTEXT: /* read word at location addr. */
 994         case PTRACE_PEEKDATA: {
 995                 compat_ulong_t tmp;
 996                 int copied;
 997
 998                 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
 999                 ret = -EIO;
1000                 if (copied != sizeof(tmp))
1001                         break;
1002                 ret = put_user(tmp, (compat_ulong_t __user *) data);
1003                 break;
1004         }
1005
1006         case PTRACE_POKETEXT: /* write the word at location addr. */
1007         case PTRACE_POKEDATA:
1008                 ret = 0;
1009                 if (access_process_vm(child, addr, &cdata, sizeof(cdata), 1) == sizeof(cdata))
1010                         break;
1011                 ret = -EIO;
1012                 break;
1013
1014         case PTRACE_GETEVENTMSG:
1015                 ret = put_user(state->have_eventmsg
1016                                ? state->u.eventmsg : 0L,
1017                                (compat_long_t __user *) data);
1018                 break;
1019         case PTRACE_GETSIGINFO:
1020                 ret = -EINVAL;
1021                 if (!state->have_eventmsg && state->u.siginfo)
1022                         ret = copy_siginfo_to_user32(
1023                                 (struct compat_siginfo __user *) data,
1024                                 state->u.siginfo);
1025                 break;
1026         case PTRACE_SETSIGINFO:
1027                 ret = -EINVAL;
1028                 if (!state->have_eventmsg && state->u.siginfo
1029                     && copy_siginfo_from_user32(
1030                             state->u.siginfo,
1031                             (struct compat_siginfo __user *) data))
1032                         ret = -EFAULT;
1033                 break;
1034         }
1035
1036 out_tsk:
1037         put_task_struct(child);
1038 out:
1039         pr_debug("%d ptrace -> %lx\n", current->pid, (long)ret);
1040         return ret;
1041 }
1042 #endif
1043
1044
1045 /*
1046  * Detach the zombie being reported for wait.
1047  */
1048 static inline void
1049 detach_zombie(struct task_struct *tsk,
1050               struct task_struct *p, struct ptrace_state *state)
1051 {
1052         int detach_error;
1053         struct utrace_attached_engine *engine;
1054
1055 restart:
1056         detach_error = 0;
1057         rcu_read_lock();
1058         if (tsk == current)
1059                 engine = state->engine;
1060         else {
1061                 /*
1062                  * We've excluded other ptrace_do_wait calls.  But the
1063                  * ptracer itself might have done ptrace_detach while we
1064                  * did not have rcu_read_lock.  So double-check that state
1065                  * is still valid.
1066                  */
1067                 engine = utrace_attach(
1068                         p, (UTRACE_ATTACH_MATCH_OPS
1069                             | UTRACE_ATTACH_MATCH_DATA),
1070                         &ptrace_utrace_ops,
1071                         (unsigned long) state);
1072                 if (IS_ERR(engine) || state->parent != tsk)
1073                         detach_error = -ESRCH;
1074                 else
1075                         BUG_ON(state->engine != engine);
1076         }
1077         rcu_read_unlock();
1078         if (likely(!detach_error))
1079                 detach_error = ptrace_detach(p, engine, state);
1080         if (unlikely(detach_error == -EALREADY)) {
1081                 /*
1082                  * It's still doing report_death callbacks.
1083                  * Just wait for it to settle down.
1084                  */
1085                 wait_task_inactive(p); /* Might block.  */
1086                 goto restart;
1087         }
1088         /*
1089          * A failure with -ESRCH means that report_reap is
1090          * already running and will do the cleanup, or that
1091          * we lost a race with ptrace_detach in another
1092          * thread or with the automatic detach in
1093          * report_death.
1094          */
1095         if (detach_error)
1096                 BUG_ON(detach_error != -ESRCH);
1097 }
1098
1099 /*
1100  * We're called with tasklist_lock held for reading.
1101  * If we return -ECHILD or zero, next_thread(tsk) must still be valid to use.
1102  * If we return another error code, or a successful PID value, we
1103  * release tasklist_lock first.
1104  */
1105 int
1106 ptrace_do_wait(struct task_struct *tsk,
1107                pid_t pid, int options, struct siginfo __user *infop,
1108                int __user *stat_addr, struct rusage __user *rusagep)
1109 {
1110         struct ptrace_state *state;
1111         struct task_struct *p;
1112         int err = -ECHILD;
1113         int exit_code, why, status;
1114
1115         rcu_read_lock();
1116         list_for_each_entry_rcu(state, &tsk->ptracees, entry) {
1117                 p = state->task;
1118
1119                 if (pid > 0) {
1120                         if (p->pid != pid)
1121                                 continue;
1122                 } else if (!pid) {
1123                         if (process_group(p) != process_group(current))
1124                                 continue;
1125                 } else if (pid != -1) {
1126                         if (process_group(p) != -pid)
1127                                 continue;
1128                 }
1129                 if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
1130                     && !(options & __WALL))
1131                         continue;
1132                 if (security_task_wait(p))
1133                         continue;
1134
1135                 /*
1136                  * This is a matching child.  If we don't win now, tell
1137                  * our caller to block and repeat.  From this point we
1138                  * must ensure that wait_chldexit will get a wakeup for
1139                  * any tracee stopping, dying, or being detached.
1140                  * For death, tasklist_lock guarantees this already.
1141                  */
1142                 err = 0;
1143
1144                 switch (p->exit_state) {
1145                 case EXIT_ZOMBIE:
1146                         if (!likely(options & WEXITED))
1147                                 continue;
1148                         if (delay_group_leader(p)) {
1149                                 struct task_struct *next = next_thread(p);
1150                                 pr_debug("%d ptrace_do_wait leaving %d "
1151                                          "zombie code %x "
1152                                          "delay_group_leader (%d/%lu)\n",
1153                                          current->pid, p->pid, p->exit_code,
1154                                          next->pid, next->state);
1155                                 continue;
1156                         }
1157                         exit_code = p->exit_code;
1158                         goto found;
1159                 case EXIT_DEAD:
1160                         continue;
1161                 default:
1162                         /*
1163                          * tasklist_lock holds up any transitions to
1164                          * EXIT_ZOMBIE.  After releasing it we are
1165                          * guaranteed a wakeup on wait_chldexit after
1166                          * any new deaths.
1167                          */
1168                         if (p->flags & PF_EXITING)
1169                                 /*
1170                                  * It's in do_exit and might have set
1171                                  * p->exit_code already, but it's not quite
1172                                  * dead yet.  It will get to report_death
1173                                  * and wakes us up when it finishes.
1174                                  */
1175                                 continue;
1176                         break;
1177                 }
1178
1179                 /*
1180                  * This xchg atomically ensures that only one do_wait
1181                  * call can report this thread.  Because exit_code is
1182                  * always set before do_notify wakes us up, after this
1183                  * check fails we are sure to get a wakeup if it stops.
1184                  */
1185                 exit_code = xchg(&p->exit_code, 0);
1186                 if (exit_code)
1187                         goto found;
1188
1189                 // XXX should handle WCONTINUED
1190
1191                 pr_debug("%d ptrace_do_wait leaving %d state %lu code %x\n",
1192                          current->pid, p->pid, p->state, p->exit_code);
1193         }
1194         rcu_read_unlock();
1195         if (err == 0)
1196                 pr_debug("%d ptrace_do_wait blocking\n", current->pid);
1197
1198         return err;
1199
1200 found:
1201         BUG_ON(state->parent != tsk);
1202         rcu_read_unlock();
1203
1204         pr_debug("%d ptrace_do_wait (%d) found %d code %x (%lu/%d)\n",
1205                  current->pid, tsk->pid, p->pid, exit_code,
1206                  p->exit_state, p->exit_signal);
1207
1208         /*
1209          * If there was a group exit in progress, all threads report that
1210          * status.  Most will have SIGKILL in their own exit_code.
1211          */
1212         if (p->signal->flags & SIGNAL_GROUP_EXIT)
1213                 exit_code = p->signal->group_exit_code;
1214
1215         if (p->exit_state) {
1216                 if (unlikely(p->parent == tsk && p->exit_signal != -1))
1217                         /*
1218                          * This is our natural child we were ptracing.
1219                          * When it dies it detaches (see ptrace_report_death).
1220                          * So we're seeing it here in a race.  When it
1221                          * finishes detaching it will become reapable in
1222                          * the normal wait_task_zombie path instead.
1223                          */
1224                         return 0;
1225                 if ((exit_code & 0x7f) == 0) {
1226                         why = CLD_EXITED;
1227                         status = exit_code >> 8;
1228                 }
1229                 else {
1230                         why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
1231                         status = exit_code & 0x7f;
1232                 }
1233         }
1234         else {
1235                 why = CLD_TRAPPED;
1236                 status = exit_code;
1237                 exit_code = (status << 8) | 0x7f;
1238         }
1239
1240         /*
1241          * At this point we are committed to a successful return
1242          * or a user error return.  Release the tasklist_lock.
1243          */
1244         get_task_struct(p);
1245         read_unlock(&tasklist_lock);
1246
1247         if (rusagep)
1248                 err = getrusage(p, RUSAGE_BOTH, rusagep);
1249         if (infop) {
1250                 if (!err)
1251                         err = put_user(SIGCHLD, &infop->si_signo);
1252                 if (!err)
1253                         err = put_user(0, &infop->si_errno);
1254                 if (!err)
1255                         err = put_user((short)why, &infop->si_code);
1256                 if (!err)
1257                         err = put_user(p->pid, &infop->si_pid);
1258                 if (!err)
1259                         err = put_user(p->uid, &infop->si_uid);
1260                 if (!err)
1261                         err = put_user(status, &infop->si_status);
1262         }
1263         if (!err && stat_addr)
1264                 err = put_user(exit_code, stat_addr);
1265
1266         if (!err) {
1267                 if (why != CLD_TRAPPED)
1268                         /*
1269                          * This was a death report.  The ptracer's wait
1270                          * does an implicit detach, so the zombie reports
1271                          * to its real parent now.
1272                          */
1273                         detach_zombie(tsk, p, state);
1274                 err = p->pid;
1275         }
1276
1277         put_task_struct(p);
1278
1279         return err;
1280 }
1281
1282
1283 /*
1284  * All the report callbacks (except death and reap) are subject to a race
1285  * with ptrace_exit doing a quick detach and ptrace_done.  It can do this
1286  * even when the target is not quiescent, so a callback may already be in
1287  * progress when it does ptrace_done.  Callbacks use this function to fetch
1288  * the struct ptrace_state while ensuring it doesn't disappear until
1289  * put_ptrace_state is called.  This just uses RCU, since state and
1290  * anything we try to do to state->parent is safe under rcu_read_lock.
1291  */
1292 static struct ptrace_state *
1293 get_ptrace_state(struct utrace_attached_engine *engine,
1294                  struct task_struct *tsk)
1295 {
1296         struct ptrace_state *state;
1297
1298         rcu_read_lock();
1299         state = rcu_dereference((struct ptrace_state *) engine->data);
1300         if (likely(state != NULL))
1301                 return state;
1302
1303         rcu_read_unlock();
1304         return NULL;
1305 }
1306
1307 static inline void
1308 put_ptrace_state(struct ptrace_state *state)
1309 {
1310         rcu_read_unlock();
1311 }
1312
1313
1314 static void
1315 do_notify(struct task_struct *tsk, struct task_struct *parent, int why)
1316 {
1317         struct siginfo info;
1318         unsigned long flags;
1319         struct sighand_struct *sighand;
1320         int sa_mask;
1321
1322         info.si_signo = SIGCHLD;
1323         info.si_errno = 0;
1324         info.si_pid = tsk->pid;
1325         info.si_uid = tsk->uid;
1326
1327         /* FIXME: find out whether or not this is supposed to be c*time. */
1328         info.si_utime = cputime_to_jiffies(tsk->utime);
1329         info.si_stime = cputime_to_jiffies(tsk->stime);
1330
1331         sa_mask = SA_NOCLDSTOP;
1332         info.si_code = why;
1333         info.si_status = tsk->exit_code & 0x7f;
1334         if (why == CLD_CONTINUED)
1335                 info.si_status = SIGCONT;
1336         else if (why == CLD_STOPPED)
1337                 info.si_status = tsk->signal->group_exit_code & 0x7f;
1338         else if (why == CLD_EXITED) {
1339                 sa_mask = SA_NOCLDWAIT;
1340                 if (tsk->exit_code & 0x80)
1341                         info.si_code = CLD_DUMPED;
1342                 else if (tsk->exit_code & 0x7f)
1343                         info.si_code = CLD_KILLED;
1344                 else {
1345                         info.si_code = CLD_EXITED;
1346                         info.si_status = tsk->exit_code >> 8;
1347                 }
1348         }
1349
1350         read_lock(&tasklist_lock);
1351         if (unlikely(parent->signal == NULL))
1352                 goto out;
1353
1354         sighand = parent->sighand;
1355         spin_lock_irqsave(&sighand->siglock, flags);
1356         if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
1357             !(sighand->action[SIGCHLD-1].sa.sa_flags & sa_mask))
1358                 __group_send_sig_info(SIGCHLD, &info, parent);
1359         /*
1360          * Even if SIGCHLD is not generated, we must wake up wait4 calls.
1361          */
1362         wake_up_interruptible_sync(&parent->signal->wait_chldexit);
1363         spin_unlock_irqrestore(&sighand->siglock, flags);
1364
1365 out:
1366         read_unlock(&tasklist_lock);
1367 }
1368
1369 static u32
1370 ptrace_report(struct utrace_attached_engine *engine,
1371               struct task_struct *tsk,
1372               struct ptrace_state *state,
1373               int code)
1374 {
1375         const struct utrace_regset *regset;
1376
1377         pr_debug("%d ptrace_report %d engine %p"
1378                  " state %p code %x parent %d (%p)\n",
1379                  current->pid, tsk->pid, engine, state, code,
1380                  state->parent->pid, state->parent);
1381         if (!state->have_eventmsg && state->u.siginfo) {
1382                 const siginfo_t *si = state->u.siginfo;
1383                 pr_debug("  si %d code %x errno %d addr %p\n",
1384                          si->si_signo, si->si_code, si->si_errno,
1385                          si->si_addr);
1386         }
1387
1388         /*
1389          * Set our QUIESCE flag right now, before notifying the tracer.
1390          * We do this before setting tsk->exit_code rather than
1391          * by using UTRACE_ACTION_NEWSTATE in our return value, to
1392          * ensure that the tracer can't get the notification and then
1393          * try to resume us with PTRACE_CONT before we set the flag.
1394          */
1395         utrace_set_flags(tsk, engine, engine->flags | UTRACE_ACTION_QUIESCE);
1396
1397         /*
1398          * If regset 0 has a writeback call, do it now.  On register window
1399          * machines, this makes sure the user memory backing the register
1400          * data is up to date by the time wait_task_inactive returns to
1401          * ptrace_start in our tracer doing a PTRACE_PEEKDATA or the like.
1402          */
1403         regset = utrace_regset(tsk, engine, utrace_native_view(tsk), 0);
1404         if (regset->writeback)
1405                 (*regset->writeback)(tsk, regset, 0);
1406
1407         BUG_ON(code == 0);
1408         tsk->exit_code = code;
1409         do_notify(tsk, state->parent, CLD_TRAPPED);
1410
1411         pr_debug("%d ptrace_report quiescing exit_code %x\n",
1412                  current->pid, current->exit_code);
1413
1414         put_ptrace_state(state);
1415
1416         return UTRACE_ACTION_RESUME;
1417 }
1418
1419 static inline u32
1420 ptrace_event(struct utrace_attached_engine *engine,
1421              struct task_struct *tsk,
1422              struct ptrace_state *state,
1423              int event)
1424 {
1425         state->syscall = 0;
1426         return ptrace_report(engine, tsk, state, (event << 8) | SIGTRAP);
1427 }
1428
1429 /*
1430  * Unlike other report callbacks, this can't be called while ptrace_exit
1431  * is doing ptrace_done in parallel, so we don't need get_ptrace_state.
1432  */
1433 static u32
1434 ptrace_report_death(struct utrace_attached_engine *engine,
1435                     struct task_struct *tsk)
1436 {
1437         struct ptrace_state *state = (struct ptrace_state *) engine->data;
1438
1439         if (tsk->exit_code == 0 && unlikely(tsk->flags & PF_SIGNALED))
1440                 /*
1441                  * This can only mean that tsk->exit_code was clobbered
1442                  * by ptrace_update or ptrace_do_wait in a race with
1443                  * an asynchronous wakeup and exit for SIGKILL.
1444                  */
1445                 tsk->exit_code = SIGKILL;
1446
1447         if (tsk->parent == state->parent && tsk->exit_signal != -1) {
1448                 /*
1449                  * This is a natural child (excluding clone siblings of a
1450                  * child group_leader), so we detach and let the normal
1451                  * reporting happen once our NOREAP action is gone.  But
1452                  * first, generate a SIGCHLD for those cases where normal
1453                  * behavior won't.  A ptrace'd child always generates SIGCHLD.
1454                  */
1455                 pr_debug("ptrace %d death natural parent %d exit_code %x\n",
1456                          tsk->pid, state->parent->pid, tsk->exit_code);
1457                 if (!thread_group_empty(tsk))
1458                         do_notify(tsk, state->parent, CLD_EXITED);
1459                 ptrace_state_unlink(state);
1460                 rcu_assign_pointer(engine->data, 0UL);
1461                 ptrace_done(state);
1462                 return UTRACE_ACTION_DETACH;
1463         }
1464
1465         /*
1466          * This might be a second report_death callback for a group leader
1467          * that was delayed when its original report_death callback was made.
1468          * Repeating do_notify is exactly what we need for that case too.
1469          * After the wakeup, ptrace_do_wait will see delay_group_leader false.
1470          */
1471
1472         pr_debug("ptrace %d death notify %d exit_code %x: ",
1473                  tsk->pid, state->parent->pid, tsk->exit_code);
1474         do_notify(tsk, state->parent, CLD_EXITED);
1475         pr_debug("%d notified %d\n", tsk->pid, state->parent->pid);
1476         return UTRACE_ACTION_RESUME;
1477 }
1478
1479 /*
1480  * We get this only in the case where our UTRACE_ACTION_NOREAP was ignored.
1481  * That happens solely when a non-leader exec reaps the old leader.
1482  */
1483 static void
1484 ptrace_report_reap(struct utrace_attached_engine *engine,
1485                    struct task_struct *tsk)
1486 {
1487         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1488         if (state != NULL) {
1489                 ptrace_state_unlink(state);
1490                 rcu_assign_pointer(engine->data, 0UL);
1491                 ptrace_done(state);
1492                 put_ptrace_state(state);
1493         }
1494 }
1495
1496 /*
1497  * Start tracing the child.  This has to do put_ptrace_state before it can
1498  * do allocation that might block.
1499  */
1500 static void
1501 ptrace_clone_setup(struct utrace_attached_engine *engine,
1502                    struct task_struct *parent,
1503                    struct ptrace_state *state,
1504                    struct task_struct *child)
1505 {
1506         struct task_struct *tracer;
1507         struct utrace_attached_engine *child_engine;
1508         struct ptrace_state *child_state;
1509         int ret;
1510         u8 options;
1511         int cap_sys_ptrace;
1512
1513         tracer = state->parent;
1514         options = state->options;
1515         cap_sys_ptrace = state->cap_sys_ptrace;
1516         get_task_struct(tracer);
1517         put_ptrace_state(state);
1518
1519         child_engine = utrace_attach(child, (UTRACE_ATTACH_CREATE
1520                                              | UTRACE_ATTACH_EXCLUSIVE
1521                                              | UTRACE_ATTACH_MATCH_OPS),
1522                                      &ptrace_utrace_ops, 0UL);
1523         if (unlikely(IS_ERR(child_engine))) {
1524                 BUG_ON(PTR_ERR(child_engine) != -ENOMEM);
1525                 put_task_struct(tracer);
1526                 goto nomem;
1527         }
1528
1529         child_state = ptrace_setup(child, child_engine,
1530                                    tracer, options, cap_sys_ptrace, NULL);
1531
1532         put_task_struct(tracer);
1533
1534         if (unlikely(IS_ERR(child_state))) {
1535                 (void) utrace_detach(child, child_engine);
1536
1537                 if (PTR_ERR(child_state) == -ENOMEM)
1538                         goto nomem;
1539
1540                 /*
1541                  * Our tracer has started exiting.  It's
1542                  * too late to set it up tracing the child.
1543                  */
1544                 BUG_ON(PTR_ERR(child_state) != -EALREADY);
1545         }
1546         else {
1547                 sigaddset(&child->pending.signal, SIGSTOP);
1548                 set_tsk_thread_flag(child, TIF_SIGPENDING);
1549                 ret = ptrace_update(child, child_engine, 0, 0);
1550
1551                 /*
1552                  * The child hasn't run yet, it can't have died already.
1553                  */
1554                 BUG_ON(ret);
1555         }
1556
1557         return;
1558
1559 nomem:
1560         printk(KERN_ERR "ptrace out of memory, lost child %d of %d",
1561                child->pid, parent->pid);
1562 }
1563
1564 static u32
1565 ptrace_report_clone(struct utrace_attached_engine *engine,
1566                     struct task_struct *parent,
1567                     unsigned long clone_flags, struct task_struct *child)
1568 {
1569         int event, option;
1570         struct ptrace_state *state = get_ptrace_state(engine, parent);
1571         if (unlikely(state == NULL))
1572                 return UTRACE_ACTION_RESUME;
1573
1574         pr_debug("%d (%p) engine %p"
1575                  " ptrace_report_clone child %d (%p) fl %lx\n",
1576                  parent->pid, parent, engine, child->pid, child, clone_flags);
1577
1578         event = PTRACE_EVENT_FORK;
1579         option = PTRACE_O_TRACEFORK;
1580         if (clone_flags & CLONE_VFORK) {
1581                 event = PTRACE_EVENT_VFORK;
1582                 option = PTRACE_O_TRACEVFORK;
1583         }
1584         else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1585                 event = PTRACE_EVENT_CLONE;
1586                 option = PTRACE_O_TRACECLONE;
1587         }
1588
1589         if (state->options & option) {
1590                 state->have_eventmsg = 1;
1591                 state->u.eventmsg = child->pid;
1592         }
1593         else
1594                 event = 0;
1595
1596         if (!(clone_flags & CLONE_UNTRACED)
1597             && (event || (clone_flags & CLONE_PTRACE))) {
1598                 /*
1599                  * Have our tracer start following the child too.
1600                  */
1601                 ptrace_clone_setup(engine, parent, state, child);
1602
1603                 /*
1604                  * That did put_ptrace_state, so we have to check
1605                  * again in case our tracer just started exiting.
1606                  */
1607                 state = get_ptrace_state(engine, parent);
1608                 if (unlikely(state == NULL))
1609                         return UTRACE_ACTION_RESUME;
1610         }
1611
1612         if (event)
1613                 return ptrace_event(engine, parent, state, event);
1614
1615         put_ptrace_state(state);
1616
1617         return UTRACE_ACTION_RESUME;
1618 }
1619
1620
1621 static u32
1622 ptrace_report_vfork_done(struct utrace_attached_engine *engine,
1623                          struct task_struct *parent, pid_t child_pid)
1624 {
1625         struct ptrace_state *state = get_ptrace_state(engine, parent);
1626         if (unlikely(state == NULL))
1627                 return UTRACE_ACTION_RESUME;
1628
1629         state->have_eventmsg = 1;
1630         state->u.eventmsg = child_pid;
1631         return ptrace_event(engine, parent, state, PTRACE_EVENT_VFORK_DONE);
1632 }
1633
1634
1635 static u32
1636 ptrace_report_signal(struct utrace_attached_engine *engine,
1637                      struct task_struct *tsk, struct pt_regs *regs,
1638                      u32 action, siginfo_t *info,
1639                      const struct k_sigaction *orig_ka,
1640                      struct k_sigaction *return_ka)
1641 {
1642         int signo = info == NULL ? SIGTRAP : info->si_signo;
1643         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1644         if (unlikely(state == NULL))
1645                 return UTRACE_ACTION_RESUME;
1646
1647         state->syscall = 0;
1648         state->have_eventmsg = 0;
1649         state->u.siginfo = info;
1650         return ptrace_report(engine, tsk, state, signo) | UTRACE_SIGNAL_IGN;
1651 }
1652
1653 static u32
1654 ptrace_report_jctl(struct utrace_attached_engine *engine,
1655                    struct task_struct *tsk, int type)
1656 {
1657         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1658         if (unlikely(state == NULL))
1659                 return UTRACE_ACTION_RESUME;
1660
1661         pr_debug("ptrace %d jctl notify %d type %x exit_code %x\n",
1662                  tsk->pid, state->parent->pid, type, tsk->exit_code);
1663
1664         do_notify(tsk, state->parent, type);
1665         put_ptrace_state(state);
1666
1667         return UTRACE_JCTL_NOSIGCHLD;
1668 }
1669
1670 static u32
1671 ptrace_report_exec(struct utrace_attached_engine *engine,
1672                    struct task_struct *tsk,
1673                    const struct linux_binprm *bprm,
1674                    struct pt_regs *regs)
1675 {
1676         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1677         if (unlikely(state == NULL))
1678                 return UTRACE_ACTION_RESUME;
1679
1680         return ptrace_event(engine, tsk, state,
1681                             (state->options & PTRACE_O_TRACEEXEC)
1682                             ? PTRACE_EVENT_EXEC : 0);
1683 }
1684
1685 static u32
1686 ptrace_report_syscall(struct utrace_attached_engine *engine,
1687                       struct task_struct *tsk, struct pt_regs *regs,
1688                       int entry)
1689 {
1690         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1691         if (unlikely(state == NULL))
1692                 return UTRACE_ACTION_RESUME;
1693
1694 #ifdef PTRACE_SYSEMU
1695         if (entry && state->sysemu)
1696                 tracehook_abort_syscall(regs);
1697 #endif
1698         state->syscall = 1;
1699         return ptrace_report(engine, tsk, state,
1700                              ((state->options & PTRACE_O_TRACESYSGOOD)
1701                               ? 0x80 : 0) | SIGTRAP);
1702 }
1703
1704 static u32
1705 ptrace_report_syscall_entry(struct utrace_attached_engine *engine,
1706                             struct task_struct *tsk, struct pt_regs *regs)
1707 {
1708         return ptrace_report_syscall(engine, tsk, regs, 1);
1709 }
1710
1711 static u32
1712 ptrace_report_syscall_exit(struct utrace_attached_engine *engine,
1713                             struct task_struct *tsk, struct pt_regs *regs)
1714 {
1715         return ptrace_report_syscall(engine, tsk, regs, 0);
1716 }
1717
1718 static u32
1719 ptrace_report_exit(struct utrace_attached_engine *engine,
1720                    struct task_struct *tsk, long orig_code, long *code)
1721 {
1722         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1723         if (unlikely(state == NULL))
1724                 return UTRACE_ACTION_RESUME;
1725
1726         state->have_eventmsg = 1;
1727         state->u.eventmsg = *code;
1728         return ptrace_event(engine, tsk, state, PTRACE_EVENT_EXIT);
1729 }
1730
1731 static int
1732 ptrace_unsafe_exec(struct utrace_attached_engine *engine,
1733                    struct task_struct *tsk)
1734 {
1735         int unsafe = LSM_UNSAFE_PTRACE;
1736         struct ptrace_state *state = get_ptrace_state(engine, tsk);
1737         if (likely(state != NULL) && state->cap_sys_ptrace)
1738                 unsafe = LSM_UNSAFE_PTRACE_CAP;
1739         put_ptrace_state(state);
1740         return unsafe;
1741 }
1742
1743 static struct task_struct *
1744 ptrace_tracer_task(struct utrace_attached_engine *engine,
1745                    struct task_struct *target)
1746 {
1747         struct task_struct *parent = NULL;
1748         struct ptrace_state *state = get_ptrace_state(engine, target);
1749         if (likely(state != NULL)) {
1750                 parent = state->parent;
1751                 put_ptrace_state(state);
1752         }
1753         return parent;
1754 }
1755
1756 static int
1757 ptrace_allow_access_process_vm(struct utrace_attached_engine *engine,
1758                                struct task_struct *target,
1759                                struct task_struct *caller)
1760 {
1761         struct ptrace_state *state;
1762         int ours = 0;
1763
1764         state = get_ptrace_state(engine, target);
1765         if (likely(state != NULL)) {
1766                 ours = (((engine->flags & UTRACE_ACTION_QUIESCE)
1767                          || target->state == TASK_STOPPED)
1768                         && state->parent == caller);
1769                 put_ptrace_state(state);
1770         }
1771
1772         return ours && security_ptrace(caller, target) == 0;
1773 }
1774
1775
1776 static const struct utrace_engine_ops ptrace_utrace_ops =
1777 {
1778         .report_syscall_entry = ptrace_report_syscall_entry,
1779         .report_syscall_exit = ptrace_report_syscall_exit,
1780         .report_exec = ptrace_report_exec,
1781         .report_jctl = ptrace_report_jctl,
1782         .report_signal = ptrace_report_signal,
1783         .report_vfork_done = ptrace_report_vfork_done,
1784         .report_clone = ptrace_report_clone,
1785         .report_exit = ptrace_report_exit,
1786         .report_death = ptrace_report_death,
1787         .report_reap = ptrace_report_reap,
1788         .unsafe_exec = ptrace_unsafe_exec,
1789         .tracer_task = ptrace_tracer_task,
1790         .allow_access_process_vm = ptrace_allow_access_process_vm,
1791 };