X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fptrace.c;h=2d50024c9f308b8fb5f390fb46e88d33bc843d3d;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=87ab1e27f00e3ba0b145ee7926041cfa5a0b254e;hpb=a8e794ca871505c8ea96cc102f4ad555c5231d7f;p=linux-2.6.git diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index 87ab1e27f..2d50024c9 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -34,9 +36,12 @@ * in exit.c or in signal.c. */ -/* determines which flags the user has access to. */ -/* 1 = access 0 = no access */ -#define FLAG_MASK 0x44dd5UL +/* + * Determines which flags the user has access to [1 = access, 0 = no access]. + * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). + * Also masks reserved bits (63-22, 15, 5, 3, 1). + */ +#define FLAG_MASK 0x54dd5UL /* set's the trap flag. */ #define TRAP_FLAG 0x100UL @@ -79,6 +84,130 @@ static inline long put_stack_long(struct task_struct *task, int offset, return 0; } +#define LDT_SEGMENT 4 + +unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs) +{ + unsigned long addr, seg; + + addr = regs->rip; + seg = regs->cs & 0xffff; + + /* + * We'll assume that the code segments in the GDT + * are all zero-based. That is largely true: the + * TLS segments are used for data, and the PNPBIOS + * and APM bios ones we just ignore here. + */ + if (seg & LDT_SEGMENT) { + u32 *desc; + unsigned long base; + + down(&child->mm->context.sem); + desc = child->mm->context.ldt + (seg & ~7); + base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000); + + /* 16-bit code segment? */ + if (!((desc[1] >> 22) & 1)) + addr &= 0xffff; + addr += base; + up(&child->mm->context.sem); + } + return addr; +} + +static int is_at_popf(struct task_struct *child, struct pt_regs *regs) +{ + int i, copied; + unsigned char opcode[16]; + unsigned long addr = convert_rip_to_linear(child, regs); + + copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); + for (i = 0; i < copied; i++) { + switch (opcode[i]) { + /* popf */ + case 0x9d: + return 1; + + /* CHECKME: 64 65 */ + + /* opcode and address size prefixes */ + case 0x66: case 0x67: + continue; + /* irrelevant prefixes (segment overrides and repeats) */ + case 0x26: case 0x2e: + case 0x36: case 0x3e: + case 0x64: case 0x65: + case 0xf0: case 0xf2: case 0xf3: + continue; + + /* REX prefixes */ + case 0x40 ... 0x4f: + continue; + + /* CHECKME: f0, f2, f3 */ + + /* + * pushf: NOTE! We should probably not let + * the user see the TF bit being set. But + * it's more pain than it's worth to avoid + * it, and a debugger could emulate this + * all in user space if it _really_ cares. + */ + case 0x9c: + default: + return 0; + } + } + return 0; +} + +static void set_singlestep(struct task_struct *child) +{ + struct pt_regs *regs = task_pt_regs(child); + + /* + * Always set TIF_SINGLESTEP - this guarantees that + * we single-step system calls etc.. This will also + * cause us to set TF when returning to user mode. + */ + set_tsk_thread_flag(child, TIF_SINGLESTEP); + + /* + * If TF was already set, don't do anything else + */ + if (regs->eflags & TRAP_FLAG) + return; + + /* Set TF on the kernel stack.. */ + regs->eflags |= TRAP_FLAG; + + /* + * ..but if TF is changed by the instruction we will trace, + * don't mark it as being "us" that set it, so that we + * won't clear it by hand later. + * + * AK: this is not enough, LAHF and IRET can change TF in user space too. + */ + if (is_at_popf(child, regs)) + return; + + child->ptrace |= PT_DTRACE; +} + +static void clear_singlestep(struct task_struct *child) +{ + /* Always clear TIF_SINGLESTEP... */ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); + + /* But touch TF only if it was set by us.. */ + if (child->ptrace & PT_DTRACE) { + struct pt_regs *regs = task_pt_regs(child); + regs->eflags &= ~TRAP_FLAG; + child->ptrace &= ~PT_DTRACE; + } +} + /* * Called by kernel/ptrace.c when detaching.. * @@ -86,10 +215,7 @@ static inline long put_stack_long(struct task_struct *task, int offset, */ void ptrace_disable(struct task_struct *child) { - long tmp; - - tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; - put_stack_long(child, EFL_OFFSET, tmp); + clear_singlestep(child); } static int putreg(struct task_struct *child, @@ -128,13 +254,13 @@ static int putreg(struct task_struct *child, value &= 0xffff; return 0; case offsetof(struct user_regs_struct,fs_base): - if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) - return -EIO; + if (value >= TASK_SIZE_OF(child)) + return -EIO; child->thread.fs = value; return 0; case offsetof(struct user_regs_struct,gs_base): - if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) - return -EIO; + if (value >= TASK_SIZE_OF(child)) + return -EIO; child->thread.gs = value; return 0; case offsetof(struct user_regs_struct, eflags): @@ -179,50 +305,11 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno) } -asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; long i, ret; unsigned ui; - /* This lock_kernel fixes a subtle race with suid exec */ - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) - goto out_tsk; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -234,7 +321,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data ret = -EIO; if (copied != sizeof(tmp)) break; - ret = put_user(tmp,(unsigned long *) data); + ret = put_user(tmp,(unsigned long __user *) data); break; } @@ -248,7 +335,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data break; switch (addr) { - case 0 ... sizeof(struct user_regs_struct): + case 0 ... sizeof(struct user_regs_struct) - sizeof(long): tmp = getreg(child, addr); break; case offsetof(struct user, u_debugreg[0]): @@ -273,7 +360,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data tmp = 0; break; } - ret = put_user(tmp,(unsigned long *) data); + ret = put_user(tmp,(unsigned long __user *) data); break; } @@ -287,33 +374,35 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ + { + int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7; ret = -EIO; if ((addr & 7) || addr > sizeof(struct user) - 7) break; switch (addr) { - case 0 ... sizeof(struct user_regs_struct): + case 0 ... sizeof(struct user_regs_struct) - sizeof(long): ret = putreg(child, addr, data); break; /* Disallows to set a breakpoint into the vsyscall */ case offsetof(struct user, u_debugreg[0]): - if (data >= TASK_SIZE-7) break; + if (data >= TASK_SIZE_OF(child) - dsize) break; child->thread.debugreg0 = data; ret = 0; break; case offsetof(struct user, u_debugreg[1]): - if (data >= TASK_SIZE-7) break; + if (data >= TASK_SIZE_OF(child) - dsize) break; child->thread.debugreg1 = data; ret = 0; break; case offsetof(struct user, u_debugreg[2]): - if (data >= TASK_SIZE-7) break; + if (data >= TASK_SIZE_OF(child) - dsize) break; child->thread.debugreg2 = data; ret = 0; break; case offsetof(struct user, u_debugreg[3]): - if (data >= TASK_SIZE-7) break; + if (data >= TASK_SIZE_OF(child) - dsize) break; child->thread.debugreg3 = data; ret = 0; break; @@ -324,9 +413,11 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data ret = 0; break; case offsetof(struct user, u_debugreg[7]): - data &= ~DR_CONTROL_RESERVED; - for(i=0; i<4; i++) - if ((0x5454 >> ((data >> (16 + 4*i)) & 0xf)) & 1) + /* See arch/i386/kernel/ptrace.c for an explanation of + * this awkward check.*/ + data &= ~DR_CONTROL_RESERVED; + for(i=0; i<4; i++) + if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) break; if (i == 4) { child->thread.debugreg7 = data; @@ -335,26 +426,24 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data break; } break; + } case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: { /* restart after signal. */ - long tmp; + case PTRACE_CONT: /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child,TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child->exit_code = data; - /* make sure the single step bit is not set. */ - tmp = get_stack_long(child, EFL_OFFSET); - tmp &= ~TRAP_FLAG; - put_stack_long(child, EFL_OFFSET,tmp); + /* make sure the single step bit is not set. */ + clear_singlestep(child); wake_up_process(child); ret = 0; break; - } #ifdef CONFIG_IA32_EMULATION /* This makes only sense with 32bit programs. Allow a @@ -362,19 +451,20 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data don't use it against 64bit processes, use PTRACE_ARCH_PRCTL instead. */ case PTRACE_SET_THREAD_AREA: { + struct user_desc __user *p; int old; - get_user(old, &((struct user_desc *)data)->entry_number); - put_user(addr, &((struct user_desc *)data)->entry_number); - ret = do_set_thread_area(&child->thread, - (struct user_desc *)data); - put_user(old, &((struct user_desc *)data)->entry_number); + p = (struct user_desc __user *)data; + get_user(old, &p->entry_number); + put_user(addr, &p->entry_number); + ret = do_set_thread_area(&child->thread, p); + put_user(old, &p->entry_number); break; case PTRACE_GET_THREAD_AREA: - get_user(old, &((struct user_desc *)data)->entry_number); - put_user(addr, &((struct user_desc *)data)->entry_number); - ret = do_get_thread_area(&child->thread, - (struct user_desc *)data); - put_user(old, &((struct user_desc *)data)->entry_number); + p = (struct user_desc __user *)data; + get_user(old, &p->entry_number); + put_user(addr, &p->entry_number); + ret = do_get_thread_area(&child->thread, p); + put_user(old, &p->entry_number); break; } #endif @@ -390,39 +480,28 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data * perhaps it should be put in the status that it wants to * exit. */ - case PTRACE_KILL: { - long tmp; - + case PTRACE_KILL: ret = 0; - if (child->state == TASK_ZOMBIE) /* already dead */ + if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child->exit_code = SIGKILL; /* make sure the single step bit is not set. */ - tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; - put_stack_long(child, EFL_OFFSET, tmp); + clear_singlestep(child); wake_up_process(child); break; - } - - case PTRACE_SINGLESTEP: { /* set the trap flag. */ - long tmp; + case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); - if ((child->ptrace & PT_DTRACE) == 0) { - /* Spurious delayed TF traps may occur */ - child->ptrace |= PT_DTRACE; - } - tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; - put_stack_long(child, EFL_OFFSET, tmp); + set_singlestep(child); child->exit_code = data; /* give it a chance to run. */ wake_up_process(child); ret = 0; break; - } case PTRACE_DETACH: /* detach a process that was attached. */ @@ -430,51 +509,53 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data break; case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - if (!access_ok(VERIFY_WRITE, (unsigned *)data, FRAME_SIZE)) { + if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, + sizeof(struct user_regs_struct))) { ret = -EIO; break; } + ret = 0; for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { - __put_user(getreg(child, ui),(unsigned long *) data); + ret |= __put_user(getreg(child, ui),(unsigned long __user *) data); data += sizeof(long); } - ret = 0; break; } case PTRACE_SETREGS: { /* Set all gp regs in the child. */ unsigned long tmp; - if (!access_ok(VERIFY_READ, (unsigned *)data, FRAME_SIZE)) { + if (!access_ok(VERIFY_READ, (unsigned __user *)data, + sizeof(struct user_regs_struct))) { ret = -EIO; break; } + ret = 0; for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { - __get_user(tmp, (unsigned long *) data); + ret |= __get_user(tmp, (unsigned long __user *) data); putreg(child, ui, tmp); data += sizeof(long); } - ret = 0; break; } case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */ - if (!access_ok(VERIFY_WRITE, (unsigned *)data, + if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, sizeof(struct user_i387_struct))) { ret = -EIO; break; } - ret = get_fpregs((struct user_i387_struct *)data, child); + ret = get_fpregs((struct user_i387_struct __user *)data, child); break; } case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */ - if (!access_ok(VERIFY_READ, (unsigned *)data, + if (!access_ok(VERIFY_READ, (unsigned __user *)data, sizeof(struct user_i387_struct))) { ret = -EIO; break; } - child->used_math = 1; - ret = set_fpregs(child, (struct user_i387_struct *)data); + set_stopped_child_used_math(child); + ret = set_fpregs(child, (struct user_i387_struct __user *)data); break; } @@ -482,10 +563,6 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); return ret; } @@ -514,22 +591,35 @@ static void syscall_trace(struct pt_regs *regs) asmlinkage void syscall_trace_enter(struct pt_regs *regs) { - if (unlikely(current->audit_context)) - audit_syscall_entry(current, regs->orig_rax, - regs->rdi, regs->rsi, - regs->rdx, regs->r10); + /* do the secure computing check first */ + secure_computing(regs->orig_rax); if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) syscall_trace(regs); + + if (unlikely(current->audit_context)) { + if (test_thread_flag(TIF_IA32)) { + audit_syscall_entry(AUDIT_ARCH_I386, + regs->orig_rax, + regs->rbx, regs->rcx, + regs->rdx, regs->rsi); + } else { + audit_syscall_entry(AUDIT_ARCH_X86_64, + regs->orig_rax, + regs->rdi, regs->rsi, + regs->rdx, regs->r10); + } + } } asmlinkage void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) - audit_syscall_exit(current, regs->rax); + audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax); - if (test_thread_flag(TIF_SYSCALL_TRACE) + if ((test_thread_flag(TIF_SYSCALL_TRACE) + || test_thread_flag(TIF_SINGLESTEP)) && (current->ptrace & PT_PTRACED)) syscall_trace(regs); }