X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=fs%2Fexec.c;h=95ae49ba1232329cfbf044951f5ebe23f98311ec;hb=6d308e497b57edb95440a32d54e316c31c2ca272;hp=bca37d6c0f72150d83f65d12ac23b114baf51a6e;hpb=c5fb35bebffd73f6c75e2947f99fceff83f187b3;p=linux-2.6.git diff --git a/fs/exec.c b/fs/exec.c index bca37d6c0..95ae49ba1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,8 @@ #include #include #include -#include +#include +#include #include #include @@ -58,6 +60,9 @@ int core_uses_pid; char core_pattern[65] = "core"; +int suid_dumpable = 0; + +EXPORT_SYMBOL(suid_dumpable); /* The maximal length of core_pattern is also specified in sysctl.c */ static struct linux_binfmt *formats; @@ -339,12 +344,14 @@ out_sig: force_sig(SIGKILL, current); } +#define EXTRA_STACK_VM_PAGES 20 /* random */ + int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) { unsigned long stack_base; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; - int i; + int i, ret; long arg_size; #ifdef CONFIG_STACK_GROWSUP @@ -376,15 +383,15 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) memmove(to, to + offset, PAGE_SIZE - offset); kunmap(bprm->page[j - 1]); - /* Adjust bprm->p to point to the end of the strings. */ - bprm->p = PAGE_SIZE * i - offset; - /* Limit stack size to 1GB */ - stack_base = current->rlim[RLIMIT_STACK].rlim_max; + stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max; if (stack_base > (1 << 30)) stack_base = 1 << 30; stack_base = PAGE_ALIGN(STACK_TOP - stack_base); + /* Adjust bprm->p to point to the end of the strings. */ + bprm->p = stack_base + PAGE_SIZE * i - offset; + mm->arg_start = stack_base; arg_size = i << PAGE_SHIFT; @@ -398,11 +405,13 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) #else stack_base = STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE; #endif - mm->arg_start = bprm->p + stack_base; + bprm->p += stack_base; + mm->arg_start = bprm->p; arg_size = STACK_TOP - (PAGE_MASK & (unsigned long) mm->arg_start); #endif - bprm->p += stack_base; + arg_size += EXTRA_STACK_VM_PAGES * PAGE_SIZE; + if (bprm->loader) bprm->loader += stack_base; bprm->exec += stack_base; @@ -424,11 +433,10 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) mpnt->vm_mm = mm; #ifdef CONFIG_STACK_GROWSUP mpnt->vm_start = stack_base; - mpnt->vm_end = PAGE_MASK & - (PAGE_SIZE - 1 + (unsigned long) bprm->p); + mpnt->vm_end = stack_base + arg_size; #else - mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; mpnt->vm_end = STACK_TOP; + mpnt->vm_start = mpnt->vm_end - arg_size; #endif /* Adjust stack execute permissions; explicitly enable * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X @@ -441,10 +449,14 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_flags |= mm->def_flags; mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7]; - insert_vm_struct(mm, mpnt); - // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - vx_vmpages_sub(mm, mm->total_vm - - ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT)); + if ((ret = insert_vm_struct(mm, mpnt))) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, mpnt); + return ret; + } + // mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { @@ -540,12 +552,6 @@ static int exec_mmap(struct mm_struct *mm) struct task_struct *tsk; struct mm_struct * old_mm, *active_mm; - /* Add it to the list of mm's */ - spin_lock(&mmlist_lock); - list_add(&mm->mmlist, &init_mm.mmlist); - mmlist_nr++; - spin_unlock(&mmlist_lock); - /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; @@ -558,6 +564,7 @@ static int exec_mmap(struct mm_struct *mm) activate_mm(active_mm, mm); task_unlock(tsk); arch_pick_mmap_layout(mm); + ckrm_task_change_mm(tsk, old_mm, mm); if (old_mm) { if (active_mm != old_mm) BUG(); mmput(old_mm); @@ -575,7 +582,7 @@ static int exec_mmap(struct mm_struct *mm) */ static inline int de_thread(struct task_struct *tsk) { - struct signal_struct *newsig, *oldsig = tsk->signal; + struct signal_struct *sig = tsk->signal; struct sighand_struct *newsighand, *oldsighand = tsk->sighand; spinlock_t *lock = &oldsighand->siglock; int count; @@ -584,43 +591,16 @@ static inline int de_thread(struct task_struct *tsk) * If we don't share sighandlers, then we aren't sharing anything * and we can just re-use it all. */ - if (atomic_read(&oldsighand->count) <= 1) + if (atomic_read(&oldsighand->count) <= 1) { + BUG_ON(atomic_read(&sig->count) != 1); + exit_itimers(sig); return 0; + } newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); if (!newsighand) return -ENOMEM; - spin_lock_init(&newsighand->siglock); - atomic_set(&newsighand->count, 1); - memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); - - /* - * See if we need to allocate a new signal structure - */ - newsig = NULL; - if (atomic_read(&oldsig->count) > 1) { - newsig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); - if (!newsig) { - kmem_cache_free(sighand_cachep, newsighand); - return -ENOMEM; - } - atomic_set(&newsig->count, 1); - newsig->group_exit = 0; - newsig->group_exit_code = 0; - newsig->group_exit_task = NULL; - newsig->group_stop_count = 0; - newsig->curr_target = NULL; - init_sigpending(&newsig->shared_pending); - INIT_LIST_HEAD(&newsig->posix_timers); - - newsig->tty = oldsig->tty; - newsig->pgrp = oldsig->pgrp; - newsig->session = oldsig->session; - newsig->leader = oldsig->leader; - newsig->tty_old_pgrp = oldsig->tty_old_pgrp; - } - if (thread_group_empty(current)) goto no_thread_group; @@ -630,7 +610,7 @@ static inline int de_thread(struct task_struct *tsk) */ read_lock(&tasklist_lock); spin_lock_irq(lock); - if (oldsig->group_exit) { + if (sig->group_exit) { /* * Another group action in progress, just * return so that the signal is processed. @@ -638,11 +618,9 @@ static inline int de_thread(struct task_struct *tsk) spin_unlock_irq(lock); read_unlock(&tasklist_lock); kmem_cache_free(sighand_cachep, newsighand); - if (newsig) - kmem_cache_free(signal_cachep, newsig); return -EAGAIN; } - oldsig->group_exit = 1; + sig->group_exit = 1; zap_other_threads(current); read_unlock(&tasklist_lock); @@ -652,14 +630,16 @@ static inline int de_thread(struct task_struct *tsk) count = 2; if (current->pid == current->tgid) count = 1; - while (atomic_read(&oldsig->count) > count) { - oldsig->group_exit_task = current; - oldsig->notify_count = count; + while (atomic_read(&sig->count) > count) { + sig->group_exit_task = current; + sig->notify_count = count; __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irq(lock); schedule(); spin_lock_irq(lock); } + sig->group_exit_task = NULL; + sig->notify_count = 0; spin_unlock_irq(lock); /* @@ -670,14 +650,14 @@ static inline int de_thread(struct task_struct *tsk) if (current->pid != current->tgid) { struct task_struct *leader = current->group_leader, *parent; struct dentry *proc_dentry1, *proc_dentry2; - unsigned long state, ptrace; + unsigned long exit_state, ptrace; /* * Wait for the thread group leader to be a zombie. * It should already be zombie at this point, most * of the time. */ - while (leader->state != TASK_ZOMBIE) + while (leader->exit_state != EXIT_ZOMBIE) yield(); spin_lock(&leader->proc_lock); @@ -721,7 +701,7 @@ static inline int de_thread(struct task_struct *tsk) list_del(¤t->tasks); list_add_tail(¤t->tasks, &init_task.tasks); current->exit_signal = SIGCHLD; - state = leader->state; + exit_state = leader->exit_state; write_unlock_irq(&tasklist_lock); spin_unlock(&leader->proc_lock); @@ -729,34 +709,48 @@ static inline int de_thread(struct task_struct *tsk) proc_pid_flush(proc_dentry1); proc_pid_flush(proc_dentry2); - if (state != TASK_ZOMBIE) + if (exit_state != EXIT_ZOMBIE) BUG(); release_task(leader); } -no_thread_group: - - write_lock_irq(&tasklist_lock); - spin_lock(&oldsighand->siglock); - spin_lock(&newsighand->siglock); - - if (current == oldsig->curr_target) - oldsig->curr_target = next_thread(current); - if (newsig) - current->signal = newsig; - current->sighand = newsighand; - init_sigpending(¤t->pending); - recalc_sigpending(); + /* + * Now there are really no other threads at all, + * so it's safe to stop telling them to kill themselves. + */ + sig->group_exit = 0; - spin_unlock(&newsighand->siglock); - spin_unlock(&oldsighand->siglock); - write_unlock_irq(&tasklist_lock); +no_thread_group: + BUG_ON(atomic_read(&sig->count) != 1); + exit_itimers(sig); - if (newsig && atomic_dec_and_test(&oldsig->count)) - kmem_cache_free(signal_cachep, oldsig); + if (atomic_read(&oldsighand->count) == 1) { + /* + * Now that we nuked the rest of the thread group, + * it turns out we are not sharing sighand any more either. + * So we can just keep it. + */ + kmem_cache_free(sighand_cachep, newsighand); + } else { + /* + * Move our state over to newsighand and switch it in. + */ + spin_lock_init(&newsighand->siglock); + atomic_set(&newsighand->count, 1); + memcpy(newsighand->action, oldsighand->action, + sizeof(newsighand->action)); + write_lock_irq(&tasklist_lock); + spin_lock(&oldsighand->siglock); + spin_lock(&newsighand->siglock); + current->sighand = newsighand; + recalc_sigpending(); + spin_unlock(&newsighand->siglock); + spin_unlock(&oldsighand->siglock); + write_unlock_irq(&tasklist_lock); - if (atomic_dec_and_test(&oldsighand->count)) - kmem_cache_free(sighand_cachep, oldsighand); + if (atomic_dec_and_test(&oldsighand->count)) + kmem_cache_free(sighand_cachep, oldsighand); + } if (!thread_group_empty(current)) BUG(); @@ -798,11 +792,27 @@ static inline void flush_old_files(struct files_struct * files) spin_unlock(&files->file_lock); } +void get_task_comm(char *buf, struct task_struct *tsk) +{ + /* buf must be at least sizeof(tsk->comm) in size */ + task_lock(tsk); + memcpy(buf, tsk->comm, sizeof(tsk->comm)); + task_unlock(tsk); +} + +void set_task_comm(struct task_struct *tsk, char *buf) +{ + task_lock(tsk); + strlcpy(tsk->comm, buf, sizeof(tsk->comm)); + task_unlock(tsk); +} + int flush_old_exec(struct linux_binprm * bprm) { char * name; int i, ch, retval; struct files_struct *files; + char tcomm[sizeof(current->comm)]; /* * Make sure we have a private signal table and that @@ -838,23 +848,29 @@ int flush_old_exec(struct linux_binprm * bprm) if (current->euid == current->uid && current->egid == current->gid) current->mm->dumpable = 1; + else + current->mm->dumpable = suid_dumpable; + name = bprm->filename; for (i=0; (ch = *(name++)) != '\0';) { if (ch == '/') i = 0; else - if (i < 15) - current->comm[i++] = ch; + if (i < (sizeof(tcomm) - 1)) + tcomm[i++] = ch; } - current->comm[i] = '\0'; + tcomm[i] = '\0'; + set_task_comm(current, tcomm); current->flags &= ~PF_RELOCEXEC; flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) || - (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) - current->mm->dumpable = 0; + (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { + suid_keys(current); + current->mm->dumpable = suid_dumpable; + } /* An exec changes our domain. We are no longer part of the thread group */ @@ -888,7 +904,7 @@ int prepare_binprm(struct linux_binprm *bprm) mode = inode->i_mode; /* * Check execute perms again - if the caller has CAP_DAC_OVERRIDE, - * vfs_permission lets a non-executable through + * generic_permission lets a non-executable through */ if (!(mode & 0111)) /* with at least _one_ execute bit set */ return -EACCES; @@ -948,6 +964,11 @@ static inline int unsafe_exec(struct task_struct *p) void compute_creds(struct linux_binprm *bprm) { int unsafe; + + if (bprm->e_uid != current->uid) + suid_keys(current); + exec_keys(current); + task_lock(current); unsafe = unsafe_exec(current); security_bprm_apply_creds(bprm, unsafe); @@ -987,7 +1008,7 @@ EXPORT_SYMBOL(remove_arg_zero); */ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { - int try,retval=0; + int try,retval; struct linux_binfmt *fmt; #ifdef __alpha__ /* handle /sbin/loader.. */ @@ -1031,6 +1052,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) /* kernel module loader fixup */ /* so we don't try to load run modprobe in kernel space. */ set_fs(USER_DS); + retval = -ENOENT; for (try=0; try<2; try++) { read_lock(&binfmt_lock); for (fmt = formats ; fmt ; fmt = fmt->next) { @@ -1088,103 +1110,105 @@ int do_execve(char * filename, char __user *__user *envp, struct pt_regs * regs) { - struct linux_binprm bprm; + struct linux_binprm *bprm; struct file *file; int retval; int i; - file = open_exec(filename); + retval = -ENOMEM; + bprm = kmalloc(sizeof(*bprm), GFP_KERNEL); + if (!bprm) + goto out_ret; + memset(bprm, 0, sizeof(*bprm)); + file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - return retval; + goto out_kfree; + + sched_exec(); + + bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - sched_balance_exec(); - - bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); - - bprm.file = file; - bprm.filename = filename; - bprm.interp = filename; - bprm.interp_flags = 0; - bprm.interp_data = 0; - bprm.sh_bang = 0; - bprm.loader = 0; - bprm.exec = 0; - bprm.security = NULL; - bprm.mm = mm_alloc(); + bprm->file = file; + bprm->filename = filename; + bprm->interp = filename; + bprm->mm = mm_alloc(); retval = -ENOMEM; - if (!bprm.mm) + if (!bprm->mm) goto out_file; - retval = init_new_context(current, bprm.mm); + retval = init_new_context(current, bprm->mm); if (retval < 0) goto out_mm; - bprm.argc = count(argv, bprm.p / sizeof(void *)); - if ((retval = bprm.argc) < 0) + bprm->argc = count(argv, bprm->p / sizeof(void *)); + if ((retval = bprm->argc) < 0) goto out_mm; - bprm.envc = count(envp, bprm.p / sizeof(void *)); - if ((retval = bprm.envc) < 0) + bprm->envc = count(envp, bprm->p / sizeof(void *)); + if ((retval = bprm->envc) < 0) goto out_mm; - retval = security_bprm_alloc(&bprm); + retval = security_bprm_alloc(bprm); if (retval) goto out; - retval = prepare_binprm(&bprm); + retval = prepare_binprm(bprm); if (retval < 0) goto out; - retval = copy_strings_kernel(1, &bprm.filename, &bprm); + retval = copy_strings_kernel(1, &bprm->filename, bprm); if (retval < 0) goto out; - bprm.exec = bprm.p; - retval = copy_strings(bprm.envc, envp, &bprm); + bprm->exec = bprm->p; + retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) goto out; - retval = copy_strings(bprm.argc, argv, &bprm); + retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; - retval = search_binary_handler(&bprm,regs); + retval = search_binary_handler(bprm,regs); if (retval >= 0) { - free_arg_pages(&bprm); + free_arg_pages(bprm); /* execve success */ - security_bprm_free(&bprm); + security_bprm_free(bprm); + kfree(bprm); return retval; } out: /* Something went wrong, return the inode and free the argument pages*/ for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page * page = bprm.page[i]; + struct page * page = bprm->page[i]; if (page) __free_page(page); } - if (bprm.security) - security_bprm_free(&bprm); + if (bprm->security) + security_bprm_free(bprm); out_mm: - if (bprm.mm) - mmdrop(bprm.mm); + if (bprm->mm) + mmdrop(bprm->mm); out_file: - if (bprm.file) { - allow_write_access(bprm.file); - fput(bprm.file); + if (bprm->file) { + allow_write_access(bprm->file); + fput(bprm->file); } + +out_kfree: + kfree(bprm); + +out_ret: return retval; } -EXPORT_SYMBOL(do_execve); - int set_binfmt(struct linux_binfmt *new) { struct linux_binfmt *old = current->binfmt; @@ -1207,7 +1231,7 @@ EXPORT_SYMBOL(set_binfmt); * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -void format_corename(char *corename, const char *pattern, long signr) +static void format_corename(char *corename, const char *pattern, long signr) { const char *pat_ptr = pattern; char *out_ptr = corename; @@ -1322,6 +1346,7 @@ static void zap_threads (struct mm_struct *mm) struct task_struct *g, *p; struct task_struct *tsk = current; struct completion *vfork_done = tsk->vfork_done; + int traced = 0; /* * Make sure nobody is waiting for us to release the VM, @@ -1337,10 +1362,65 @@ static void zap_threads (struct mm_struct *mm) if (mm == p->mm && p != tsk) { force_sig_specific(SIGKILL, p); mm->core_waiters++; + if (unlikely(p->ptrace) && + unlikely(p->parent->mm == mm)) + traced = 1; } while_each_thread(g,p); read_unlock(&tasklist_lock); + + while (unlikely(traced)) { + /* + * We are zapping a thread and the thread it ptraces. + * The tracee won't come out of TASK_TRACED state until + * its ptracer detaches. That happens when the ptracer + * dies, but it synchronizes with us and so won't get + * that far until we finish the core dump. If we're + * waiting for the tracee to synchronize but it stays + * blocked in TASK_TRACED, then we deadlock. So, for + * this weirdo case we have to do another round with + * tasklist_lock write-locked to __ptrace_unlink the + * children that might cause this deadlock. That will + * wake them up to process their pending SIGKILL. + * + * First, give everyone we just killed a chance to run + * so they can all get into the coredump synchronization. + * That should leave only the TASK_TRACED stragglers for + * us to wake up. If a ptracer is still running, we'll + * have to come around again after letting it finish. + */ + yield(); + traced = 0; + write_lock_irq(&tasklist_lock); + do_each_thread(g,p) { + if (mm != p->mm || p == tsk || + !p->ptrace || p->parent->mm != mm) + continue; + if ((p->parent->flags & (PF_SIGNALED|PF_EXITING)) || + (p->parent->state & (TASK_TRACED|TASK_STOPPED))) { + /* + * The parent is in the process of exiting + * itself, or else it's stopped right now. + * It cannot be in a ptrace call, and would + * have to read_lock tasklist_lock before + * it could start one, so we are safe here. + */ + __ptrace_unlink(p); + } else { + /* + * Blargh! The ptracer is not dying + * yet, so we cannot be sure that it + * isn't in the middle of a ptrace call. + * We'll have to let it run to get into + * coredump_wait and come around another + * time to detach its tracee. + */ + traced = 1; + } + } while_each_thread(g,p); + write_unlock_irq(&tasklist_lock); + } } static void coredump_wait(struct mm_struct *mm) @@ -1370,27 +1450,46 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) struct inode * inode; struct file * file; int retval = 0; + int fsuid = current->fsuid; + int flag = 0; - lock_kernel(); binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; + if (current->tux_exit) + current->tux_exit(); down_write(&mm->mmap_sem); if (!mm->dumpable) { up_write(&mm->mmap_sem); goto fail; } + + /* + * We cannot trust fsuid as being the "true" uid of the + * process nor do we know its entire history. We only know it + * was tainted so we dump it as root in mode 2. + */ + if (mm->dumpable == 2) { /* Setuid core dump mode */ + flag = O_EXCL; /* Stop rewrite attacks */ + current->fsuid = 0; /* Dump root private */ + } mm->dumpable = 0; init_completion(&mm->core_done); current->signal->group_exit = 1; current->signal->group_exit_code = exit_code; coredump_wait(mm); - if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) + if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) goto fail_unlock; - format_corename(corename, core_pattern, signr); - file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600); + /* + * lock_kernel() because format_corename() is controlled by sysctl, which + * uses lock_kernel() + */ + lock_kernel(); + format_corename(corename, core_pattern, signr); + unlock_kernel(); + file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); if (IS_ERR(file)) goto fail_unlock; inode = file->f_dentry->d_inode; @@ -1410,12 +1509,13 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) retval = binfmt->core_dump(signr, regs, file); - current->signal->group_exit_code |= 0x80; + if (retval) + current->signal->group_exit_code |= 0x80; close_fail: filp_close(file, NULL); fail_unlock: + current->fsuid = fsuid; complete_all(&mm->core_done); fail: - unlock_kernel(); return retval; }