/* * Tracing hooks * * Copyright (C) 2006, 2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU General Public License v.2. * * Red Hat Author: Roland McGrath. * * This file defines hook entry points called by core code where * user tracing/debugging support might need to do something. * These entry points are called tracehook_*. Each hook declared below * has a detailed comment giving the context (locking et al) from * which it is called, and the meaning of its return value (if any). * * We also declare here tracehook_* functions providing access to low-level * interrogation and control of threads. These functions must be called * on either the current thread or on a quiescent thread. We say a * thread is "quiescent" if it is in TASK_STOPPED or TASK_TRACED state, * we are guaranteed it will not be woken up and return to user mode, and * we have called wait_task_inactive on it. */ #ifndef _LINUX_TRACEHOOK_H #define _LINUX_TRACEHOOK_H 1 #include #include #include struct linux_binprm; struct pt_regs; /* * The machine-specific asm/tracehook.h file is responsible for declaring * the following entry points. These can be called only on a quiescent thread, * or the current thread when it is about to return to user mode. * * Single-step control. When enabled, the next instruction or syscall exit * produces a SIGTRAP. Enabling or disabling redundantly is harmless. * * void tracehook_enable_single_step(struct task_struct *tsk); * void tracehook_disable_single_step(struct task_struct *tsk); * int tracehook_single_step_enabled(struct task_struct *tsk); * * If those calls are defined, #define ARCH_HAS_SINGLE_STEP to nonzero. * Do not #define it if these calls are never available in this kernel config. * If defined, the value of ARCH_HAS_SINGLE_STEP can be constant or variable. * It should evaluate to nonzero if the hardware is able to support * tracehook_enable_single_step. If it's a variable expression, it * should be one that can be evaluated in modules, i.e. uses exported symbols. * * Block-step control (trap on control transfer), when available. * tracehook_disable_block_step will be called after tracehook_enable_single_step. * When enabled, the next jump, or other control transfer or syscall exit, * produces a SIGTRAP. Enabling or disabling redundantly is harmless. * * void tracehook_enable_block_step(struct task_struct *tsk); * void tracehook_disable_block_step(struct task_struct *tsk); * int tracehook_block_step_enabled(struct task_struct *tsk); * * If those calls are defined, #define ARCH_HAS_BLOCK_STEP to nonzero. * Do not #define it if these calls are never available in this kernel config. * If defined, the value of ARCH_HAS_BLOCK_STEP can be constant or variable. * It should evaluate to nonzero if the hardware is able to support * tracehook_enable_block_step. If it's a variable expression, it * should be one that can be evaluated in modules, i.e. uses exported symbols. * * Control system call tracing. When enabled a syscall entry or exit * produces a call to tracehook_report_syscall, below. * * void tracehook_enable_syscall_trace(struct task_struct *tsk); * void tracehook_disable_syscall_trace(struct task_struct *tsk); * * When stopped in tracehook_report_syscall for syscall entry, * abort the syscall so no kernel function is called. * If the register state was not otherwise updated before, * this produces an -ENOSYS error return as for an invalid syscall number. * * void tracehook_abort_syscall(struct pt_regs *regs); * * Return the regset view (see below) that is native for the given process. * For example, what it would access when it called ptrace. * Throughout the life of the process, this only changes at exec. * * const struct utrace_regset_view *utrace_native_view(struct task_struct *); * ***/ /* * This data structure describes a machine resource we call a register set. * This is part of the state of an individual thread, not necessarily * actual CPU registers per se. A register set consists of a number of * similar slots, given by ->n. Each slot is ->size bytes, and aligned to * ->align bytes (which is at least ->size). * * As described above, these entry points can be called on the current * thread or on a quiescent thread. The pos argument must be aligned * according to ->align; the count argument must be a multiple of ->size. * These functions are not responsible for checking for invalid arguments. * * When there is a natural value to use as an index, ->bias gives the * difference between the natural index and the slot index for the * register set. For example, x86 GDT segment descriptors form a regset; * the segment selector produces a natural index, but only a subset of * that index space is available as a regset (the TLS slots); subtracting * ->bias from a segment selector index value computes the regset slot. */ struct utrace_regset { unsigned int n; /* Number of slots (registers). */ unsigned int size; /* Size in bytes of a slot (register). */ unsigned int align; /* Required alignment, in bytes. */ unsigned int bias; /* Bias from natural indexing. */ /* * Return -ENODEV if not available on the hardware found. * Return 0 if no interesting state in this thread. * Return >0 number of ->size units of interesting state. * Any get call fetching state beyond that number will * see the default initialization state for this data, * so a caller that knows that the default state is need * not copy it all out. * This call is optional; the pointer is NULL if there * so no inexpensive check to yield a value < .n. */ int (*active)(struct task_struct *, const struct utrace_regset *); /* * Fetch and store register values. Return 0 on success; -EIO or * -ENODEV are usual failure returns. The pos and count values are * in bytes, but must be properly aligned. If kbuf is non-null, * that buffer is used and ubuf is ignored. If kbuf is NULL, then * ubuf gives a userland pointer to access directly, and an -EFAULT * return value is possible. */ int (*get)(struct task_struct *, const struct utrace_regset *, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); int (*set)(struct task_struct *, const struct utrace_regset *, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); /* * This call is optional; usually the pointer is NULL. * When provided, there is some user memory associated * with this regset's hardware, such as memory backing * cached register data on register window machines; the * regset's data controls what user memory is used * (e.g. via the stack pointer value). * * Write register data back to user memory. If the * immediate flag is nonzero, it must be written to the * user memory so uaccess/access_process_vm can see it * when this call returns; if zero, then it must be * written back by the time the task completes a context * switch (as synchronized with wait_task_inactive). * Return 0 on success or if there was nothing to do, * -EFAULT for a memory problem (bad stack pointer or * whatever), or -EIO for a hardware problem. */ int (*writeback)(struct task_struct *, const struct utrace_regset *, int immediate); }; /* * A regset view is a collection of regsets (struct utrace_regset, above). * This describes all the state of a thread that can be seen from a given * architecture/ABI environment. More than one view might refer to the * same utrace_regset, or more than one regset might refer to the same * machine-specific state in the thread. For example, a 32-bit thread's * state could be examined from the 32-bit view or from the 64-bit view. * Either method reaches the same thread register state, doing appropriate * widening or truncation. */ struct utrace_regset_view { const char *name; /* Identifier, e.g. ELF_PLATFORM string. */ const struct utrace_regset *regsets; unsigned int n; /* * EM_* value for which this is the native view, if any. */ u16 e_machine; }; /* * These two are helpers for writing regset get/set functions in arch code. * Use one or more calls sequentially for each chunk of regset data stored * contiguously in memory. Call with constants for start_pos and end_pos, * giving the range of byte positions in the regset that data corresponds * to; end_pos can be -1 if this chunk is at the end of the regset layout. * Each call updates the arguments to point past its chunk. */ static inline int utrace_regset_copyout(unsigned int *pos, unsigned int *count, void **kbuf, void __user **ubuf, const void *data, int start_pos, int end_pos) { if (*count == 0) return 0; BUG_ON(*pos < start_pos); if (end_pos < 0 || *pos < end_pos) { unsigned int copy = (end_pos < 0 ? *count : min(*count, end_pos - *pos)); data += *pos - start_pos; if (*kbuf) { memcpy(*kbuf, data, copy); *kbuf += copy; } else if (copy_to_user(*ubuf, data, copy)) return -EFAULT; else *ubuf += copy; *pos += copy; *count -= copy; } return 0; } static inline int utrace_regset_copyin(unsigned int *pos, unsigned int *count, const void **kbuf, const void __user **ubuf, void *data, int start_pos, int end_pos) { if (*count == 0) return 0; BUG_ON(*pos < start_pos); if (end_pos < 0 || *pos < end_pos) { unsigned int copy = (end_pos < 0 ? *count : min(*count, end_pos - *pos)); data += *pos - start_pos; if (*kbuf) { memcpy(data, *kbuf, copy); *kbuf += copy; } else if (copy_from_user(data, *ubuf, copy)) return -EFAULT; else *ubuf += copy; *pos += copy; *count -= copy; } return 0; } /* * These two parallel the two above, but for portions of a regset layout * that always read as all-zero or for which writes are ignored. */ static inline int utrace_regset_copyout_zero(unsigned int *pos, unsigned int *count, void **kbuf, void __user **ubuf, int start_pos, int end_pos) { if (*count == 0) return 0; BUG_ON(*pos < start_pos); if (end_pos < 0 || *pos < end_pos) { unsigned int copy = (end_pos < 0 ? *count : min(*count, end_pos - *pos)); if (*kbuf) { memset(*kbuf, 0, copy); *kbuf += copy; } else if (clear_user(*ubuf, copy)) return -EFAULT; else *ubuf += copy; *pos += copy; *count -= copy; } return 0; } static inline int utrace_regset_copyin_ignore(unsigned int *pos, unsigned int *count, const void **kbuf, const void __user **ubuf, int start_pos, int end_pos) { if (*count == 0) return 0; BUG_ON(*pos < start_pos); if (end_pos < 0 || *pos < end_pos) { unsigned int copy = (end_pos < 0 ? *count : min(*count, end_pos - *pos)); if (*kbuf) *kbuf += copy; else *ubuf += copy; *pos += copy; *count -= copy; } return 0; } /**/ /*** *** *** Following are entry points from core code, where the user debugging *** support can affect the normal behavior. The locking situation is *** described for each call. *** ***/ /* * Called in copy_process when setting up the copied task_struct, * with tasklist_lock held for writing. */ static inline void tracehook_init_task(struct task_struct *child) { utrace_init_task(child); } /* * Called from release_task, no locks held. * After this, there should be no tracing entanglements. */ static inline void tracehook_release_task(struct task_struct *p) { smp_mb(); if (tsk_utrace_struct(p) != NULL) utrace_release_task(p); } /* * Return nonzero to trigger a BUG_ON crash in release_task. * This should verify that there is no tracing-related state * still affecting the task_struct about to be released. * Called with tasklist_lock held for writing. */ static inline int tracehook_check_released(struct task_struct *p) { return unlikely(tsk_utrace_struct(p) != NULL); } /* * do_notify_parent_cldstop calls this when it's about to generate a SIGCHLD * for a job control stop. Return nonzero to prevent that signal generation. * Called with tasklist_lock held for reading, sometimes with irqs disabled. */ static inline int tracehook_notify_cldstop(struct task_struct *tsk, const siginfo_t *info) { return (tsk_utrace_flags(tsk) & UTRACE_ACTION_NOREAP); } /* * exit_notify calls this with tasklist_lock held for writing. * Return nonzero to prevent any normal SIGCHLD generation for this * thread's death (i.e. when it is not ignored and its thread group is * empty). This call must set *noreap to 0, or to 1 to force this thread * to become a zombie when it would normally reap itself. * The *death_cookie is passed to tracehook_report_death (below). */ static inline int tracehook_notify_death(struct task_struct *tsk, int *noreap, void **death_cookie) { *death_cookie = tsk_utrace_struct(tsk); if (tsk_utrace_flags(tsk) & UTRACE_ACTION_NOREAP) { *noreap = 1; return 1; } *noreap = 0; return 0; } /* * Return zero iff tracing doesn't care to examine this fatal signal, * so it can short-circuit normal delivery directly to a group exit. * Called with tsk->sighand->siglock held. */ static inline int tracehook_consider_fatal_signal(struct task_struct *tsk, int sig) { return (tsk_utrace_flags(tsk) & (UTRACE_EVENT(SIGNAL_TERM) | UTRACE_EVENT(SIGNAL_CORE))); } /* * Return zero iff tracing doesn't care to examine this ignored signal, * so it can short-circuit normal delivery and never even get queued. * Either the handler is SIG_DFL and sig's default is ignore, or it's SIG_IGN. * Called with tsk->sighand->siglock held. */ static inline int tracehook_consider_ignored_signal(struct task_struct *tsk, int sig, void *handler) { return (tsk_utrace_flags(tsk) & UTRACE_EVENT(SIGNAL_IGN)); } /* * Called with the siglock held when computing tsk's signal_pending flag. * Return nonzero to force the signal_pending flag on, so that * tracehook_induce_signal will be called before the next return to user mode. */ static inline int tracehook_induce_sigpending(struct task_struct *tsk) { return unlikely(tsk_utrace_flags(tsk) & UTRACE_ACTION_QUIESCE); } /* * Called with the siglock held before dequeuing pending signals. * Return zero to check for a real pending signal normally. * Return -1 after releasing the siglock to repeat the check. * Return a signal number to induce an artifical signal delivery, * setting *info and *return_ka to specify its details and behavior. */ static inline int tracehook_get_signal(struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info, struct k_sigaction *return_ka) { if (unlikely(tsk_utrace_flags(tsk))) return utrace_get_signal(tsk, regs, info, return_ka); return 0; } /* * Called with no locks held when about to stop for job control; * we are already in TASK_STOPPED state, about to call schedule. * Return zero if the normal SIGCHLD should be generated, which * will happen if last_one is true meaning this is the last thread * in the thread group to stop. */ static inline int tracehook_finish_stop(int last_one) { if (tsk_utrace_flags(current) & UTRACE_EVENT(JCTL)) return utrace_report_jctl(CLD_STOPPED); return 0; } /* * Return nonzero if the child's parent (current) should be prevented * from seeing its child in TASK_STOPPED state when it waits with WSTOPPED. * Called with tasklist_lock held for reading. */ static inline int tracehook_inhibit_wait_stopped(struct task_struct *child) { return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP); } /* * Return nonzero if the child's parent (current) should be prevented * from seeing its child in TASK_ZOMBIE state when it waits with WEXITED. * Called with tasklist_lock held for reading. */ static inline int tracehook_inhibit_wait_zombie(struct task_struct *child) { return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP); } /* * Return nonzero if the child's parent (current) should be prevented * from seeing its child resuming after job stop when it waits with WCONTINUED. * Called with tasklist_lock held for reading. */ static inline int tracehook_inhibit_wait_continued(struct task_struct *child) { return (tsk_utrace_flags(child) & UTRACE_ACTION_NOREAP); } /* * Return LSM_UNSAFE_* bits applied to an exec because of tracing. * Called with task_lock(tsk) held. */ static inline int tracehook_unsafe_exec(struct task_struct *tsk) { if (tsk_utrace_flags(tsk)) return utrace_unsafe_exec(tsk); return 0; } /* * Return the task_struct for the task using ptrace on this one, or NULL. * Must be called with rcu_read_lock held to keep the returned struct alive. * * At exec time, this may be called with task_lock(p) still held from when * tracehook_unsafe_exec was just called. * * The value is also used to display after "TracerPid:" in /proc/PID/status, * where it is called with only rcu_read_lock held. */ static inline struct task_struct *tracehook_tracer_task(struct task_struct *p) { if (tsk_utrace_flags(p)) return utrace_tracer_task(p); return NULL; } /* * Return nonzero if the current task should be allowed to use * access_process_vm on the given task. */ static inline int tracehook_allow_access_process_vm(struct task_struct *tsk) { if (tsk == current) return 1; if (tsk_utrace_flags(tsk)) return utrace_allow_access_process_vm(tsk); return 0; } /*** *** *** Following decelarations are hook stubs where core code reports *** events. These are called without locks, from the thread having the *** event. In all tracehook_report_* calls, no locks are held and the thread *** is in a state close to returning to user mode with little baggage to *** unwind, except as noted below for tracehook_report_clone. It is generally *** OK to block in these places if you want the user thread to be suspended. *** ***/ /* * Thread has just become a zombie (exit_state==TASK_ZOMBIE) or is about to * self-reap (exit_state==EXIT_DEAD). If normal reaping is not inhibited, * tsk->exit_state might be changing in parallel. The death_cookie was * passed back by tracehook_notify_death (above). */ static inline void tracehook_report_death(struct task_struct *tsk, int exit_state, void *death_cookie) { smp_mb(); if (tsk_utrace_flags(tsk) & (UTRACE_EVENT(DEATH) | UTRACE_ACTION_QUIESCE)) utrace_report_death(tsk, death_cookie); } /* * This is called when tracehook_inhibit_wait_zombie(p) returned true * and a previously delayed group_leader is now eligible for reaping. * It's called from release_task, with no locks held, and p is not current. */ static inline void tracehook_report_delayed_group_leader(struct task_struct *p) { utrace_report_delayed_group_leader(p); } /* * exec completed, we are shortly going to return to user mode. * The freshly initialized register state can be seen and changed here. */ static inline void tracehook_report_exec(struct linux_binprm *bprm, struct pt_regs *regs) { if (tsk_utrace_flags(current) & UTRACE_EVENT(EXEC)) utrace_report_exec(bprm, regs); } /* * Called from do_exit, we are about to exit. The code returned to the * parent for wait can be changed here. */ static inline void tracehook_report_exit(long *exit_code) { if (tsk_utrace_flags(current) & UTRACE_EVENT(EXIT)) utrace_report_exit(exit_code); } /* * Called after a child is set up, but before it has been started or * been given its CLONE_STOPPED initial stop. (See also tracehook_init_task.) * This is not a good place to block, because the child has not started yet. * Suspend the child here if desired, and block in clone_complete (below). * This must prevent the child from self-reaping if clone_complete uses * the task_struct pointer; otherwise it might have died and been released * by the time tracehook_report_clone_complete is called. */ static inline void tracehook_report_clone(unsigned long clone_flags, struct task_struct *child) { if (tsk_utrace_flags(current) & UTRACE_EVENT(CLONE)) utrace_report_clone(clone_flags, child); } /* * Called after the child has started running, shortly after * tracehook_report_clone. This is just before the clone/fork syscall returns, * or blocks for vfork child completion if (clone_flags & CLONE_VFORK). * The child pointer may be invalid if a self-reaping child died and * tracehook_report_clone took no action to prevent it from self-reaping. */ static inline void tracehook_report_clone_complete(unsigned long clone_flags, pid_t pid, struct task_struct *child) { if (tsk_utrace_flags(current) & UTRACE_ACTION_QUIESCE) utrace_quiescent(current, NULL); } /* * Called after a CLONE_VFORK parent has waited for the child to complete. * The clone/vfork system call will return immediately after this. * The child pointer may be invalid if a self-reaping child died and * tracehook_report_clone took no action to prevent it from self-reaping. */ static inline void tracehook_report_vfork_done(struct task_struct *child, pid_t child_pid) { if (tsk_utrace_flags(current) & UTRACE_EVENT(VFORK_DONE)) utrace_report_vfork_done(child_pid); } /* * Called for system call entry or exit. */ static inline void tracehook_report_syscall(struct pt_regs *regs, int is_exit) { if (tsk_utrace_flags(current) & (is_exit ? UTRACE_EVENT(SYSCALL_EXIT) : UTRACE_EVENT(SYSCALL_ENTRY))) utrace_report_syscall(regs, is_exit); } /* * Called after system call exit if single/block-stepped into the syscall. */ static inline void tracehook_report_syscall_step(struct pt_regs *regs) { } /* * Called when a signal handler has been set up. * Register and stack state reflects the user handler about to run. * Signal mask changes have already been made. */ static inline void tracehook_report_handle_signal(int sig, const struct k_sigaction *ka, const sigset_t *oldset, struct pt_regs *regs) { struct task_struct *tsk = current; if ((tsk_utrace_flags(tsk) & UTRACE_EVENT_SIGNAL_ALL) && (tsk_utrace_flags(tsk) & (UTRACE_ACTION_SINGLESTEP | UTRACE_ACTION_BLOCKSTEP))) utrace_signal_handler_singlestep(tsk, regs); } #endif /* */