#include "asm/ptrace.h"
#include "asm/tlbflush.h"
#include "irq_user.h"
-#include "signal_user.h"
#include "kern_util.h"
#include "user_util.h"
#include "os.h"
#include "kern.h"
#include "sigcontext.h"
-#include "time_user.h"
#include "mem_user.h"
#include "tlb.h"
#include "mode.h"
+#include "mode_kern.h"
#include "init.h"
#include "tt.h"
-void *switch_to_tt(void *prev, void *next, void *last)
+void switch_to_tt(void *prev, void *next)
{
- struct task_struct *from, *to;
+ struct task_struct *from, *to, *prev_sched;
unsigned long flags;
int err, vtalrm, alrm, prof, cpu;
char c;
- /* jailing and SMP are incompatible, so this doesn't need to be
- * made per-cpu
- */
- static int reading;
from = prev;
to = next;
- to->thread.prev_sched = from;
-
- cpu = from->thread_info->cpu;
+ cpu = task_thread_info(from)->cpu;
if(cpu == 0)
forward_interrupts(to->thread.mode.tt.extern_pid);
#ifdef CONFIG_SMP
forward_pending_sigio(to->thread.mode.tt.extern_pid);
c = 0;
- set_current(to);
- reading = 0;
+ /* Notice that here we "up" the semaphore on which "to" is waiting, and
+ * below (the read) we wait on this semaphore (which is implemented by
+ * switch_pipe) and go sleeping. Thus, after that, we have resumed in
+ * "to", and can't use any more the value of "from" (which is outdated),
+ * nor the value in "to" (since it was the task which stole us the CPU,
+ * which we don't care about). */
+
err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
if(err != sizeof(c))
panic("write of switch_pipe failed, err = %d", -err);
- reading = 1;
- if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD))
+ if(from->thread.mode.tt.switch_pipe[0] == -1)
os_kill_process(os_getpid(), 0);
err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
if(err != sizeof(c))
panic("read of switch_pipe failed, errno = %d", -err);
- /* This works around a nasty race with 'jail'. If we are switching
- * between two threads of a threaded app and the incoming process
- * runs before the outgoing process reaches the read, and it makes
- * it all the way out to userspace, then it will have write-protected
- * the outgoing process stack. Then, when the outgoing process
- * returns from the write, it will segfault because it can no longer
- * write its own stack. So, in order to avoid that, the incoming
- * thread sits in a loop yielding until 'reading' is set. This
- * isn't entirely safe, since there may be a reschedule from a timer
- * happening between setting 'reading' and sleeping in read. But,
- * it should get a whole quantum in which to reach the read and sleep,
- * which should be enough.
+ /* If the process that we have just scheduled away from has exited,
+ * then it needs to be killed here. The reason is that, even though
+ * it will kill itself when it next runs, that may be too late. Its
+ * stack will be freed, possibly before then, and if that happens,
+ * we have a use-after-free situation. So, it gets killed here
+ * in case it has not already killed itself.
*/
-
- if(jail){
- while(!reading) sched_yield();
- }
+ prev_sched = current->thread.prev_sched;
+ if(prev_sched->thread.mode.tt.switch_pipe[0] == -1)
+ os_kill_process(prev_sched->thread.mode.tt.extern_pid, 1);
change_sig(SIGVTALRM, vtalrm);
change_sig(SIGALRM, alrm);
change_sig(SIGPROF, prof);
- arch_switch();
+ arch_switch_to_tt(prev_sched, current);
flush_tlb_all();
local_irq_restore(flags);
-
- return(current->thread.prev_sched);
}
void release_thread_tt(struct task_struct *task)
{
int pid = task->thread.mode.tt.extern_pid;
+ /*
+ * We first have to kill the other process, before
+ * closing its switch_pipe. Else it might wake up
+ * and receive "EOF" before we could kill it.
+ */
if(os_getpid() != pid)
os_kill_process(pid, 0);
+
+ os_close_file(task->thread.mode.tt.switch_pipe[0]);
+ os_close_file(task->thread.mode.tt.switch_pipe[1]);
+ /* use switch_pipe as flag: thread is released */
+ task->thread.mode.tt.switch_pipe[0] = -1;
}
-void exit_thread_tt(void)
+void suspend_new_thread(int fd)
{
- os_close_file(current->thread.mode.tt.switch_pipe[0]);
- os_close_file(current->thread.mode.tt.switch_pipe[1]);
+ int err;
+ char c;
+
+ os_stop_process(os_getpid());
+ err = os_read_file(fd, &c, sizeof(c));
+ if(err != sizeof(c))
+ panic("read failed in suspend_new_thread, err = %d", -err);
}
-void schedule_tail(task_t *prev);
+void schedule_tail(struct task_struct *prev);
static void new_thread_handler(int sig)
{
schedule_tail(current->thread.prev_sched);
current->thread.prev_sched = NULL;
- init_new_thread_signals(1);
+ init_new_thread_signals();
enable_timer();
free_page(current->thread.temp_stack);
set_cmdline("(kernel thread)");
change_sig(SIGUSR1, 1);
- change_sig(SIGVTALRM, 1);
change_sig(SIGPROF, 1);
local_irq_enable();
if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf))
do_exit(0);
+
+ /* XXX No set_user_mode here because a newly execed process will
+ * immediately segfault on its non-existent IP, coming straight back
+ * to the signal handler, which will call set_user_mode on its way
+ * out. This should probably change since it's confusing.
+ */
}
static int new_thread_proc(void *stack)
{
/* local_irq_disable is needed to block out signals until this thread is
- * properly scheduled. Otherwise, the tracing thread will get mighty
- * upset about any signals that arrive before that.
+ * properly scheduled. Otherwise, the tracing thread will get mighty
+ * upset about any signals that arrive before that.
* This has the complication that it sets the saved signal mask in
* the sigcontext to block signals. This gets restored when this
* thread (or a descendant, since they get a copy of this sigcontext)
* returns to userspace.
* So, this is compensated for elsewhere.
- * XXX There is still a small window until local_irq_disable() actually
- * finishes where signals are possible - shouldn't be a problem in
- * practice since SIGIO hasn't been forwarded here yet, and the
- * local_irq_disable should finish before a SIGVTALRM has time to be
+ * XXX There is still a small window until local_irq_disable() actually
+ * finishes where signals are possible - shouldn't be a problem in
+ * practice since SIGIO hasn't been forwarded here yet, and the
+ * local_irq_disable should finish before a SIGVTALRM has time to be
* delivered.
*/
local_irq_disable();
init_new_thread_stack(stack, new_thread_handler);
os_usr1_process(os_getpid());
+ change_sig(SIGUSR1, 1);
return(0);
}
init_new_thread_stack(stack, finish_fork_handler);
os_usr1_process(os_getpid());
+ change_sig(SIGUSR1, 1);
return(0);
}
clone_flags &= CLONE_VM;
p->thread.temp_stack = stack;
- new_pid = start_fork_tramp((void *) p->thread.kernel_stack, stack,
- clone_flags, tramp);
+ new_pid = start_fork_tramp(task_stack_page(p), stack, clone_flags, tramp);
if(new_pid < 0){
printk(KERN_ERR "copy_thread : clone failed - errno = %d\n",
-new_pid);
}
if(current->thread.forking){
- sc_to_sc(UPT_SC(&p->thread.regs.regs),
- UPT_SC(¤t->thread.regs.regs));
+ sc_to_sc(UPT_SC(&p->thread.regs.regs), UPT_SC(®s->regs));
SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0);
- if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp;
+ if(sp != 0)
+ SC_SP(UPT_SC(&p->thread.regs.regs)) = sp;
}
p->thread.mode.tt.extern_pid = new_pid;
current->thread.request.op = OP_FORK;
current->thread.request.u.fork.pid = new_pid;
os_usr1_process(os_getpid());
- return(0);
+
+ /* Enable the signal and then disable it to ensure that it is handled
+ * here, and nowhere else.
+ */
+ change_sig(SIGUSR1, 1);
+
+ change_sig(SIGUSR1, 0);
+ err = 0;
+ return(err);
}
void reboot_tt(void)
{
current->thread.request.op = OP_REBOOT;
os_usr1_process(os_getpid());
+ change_sig(SIGUSR1, 1);
}
void halt_tt(void)
{
current->thread.request.op = OP_HALT;
os_usr1_process(os_getpid());
+ change_sig(SIGUSR1, 1);
}
void kill_off_processes_tt(void)
current->thread.request.u.cb.proc = proc;
current->thread.request.u.cb.arg = arg;
os_usr1_process(os_getpid());
+ change_sig(SIGUSR1, 1);
+
+ change_sig(SIGUSR1, 0);
}
}
pid = thread->request.u.exec.pid;
do_exec(thread->mode.tt.extern_pid, pid);
thread->mode.tt.extern_pid = pid;
- cpu_tasks[task->thread_info->cpu].pid = pid;
+ cpu_tasks[task_thread_info(task)->cpu].pid = pid;
break;
case OP_FORK:
attach_process(thread->request.u.fork.pid);
default_idle();
}
-/* Changed by jail_setup, which is a setup */
-int jail = 0;
-
-int __init jail_setup(char *line, int *add)
-{
- int ok = 1;
-
- if(jail) return(0);
-#ifdef CONFIG_SMP
- printf("'jail' may not used used in a kernel with CONFIG_SMP "
- "enabled\n");
- ok = 0;
-#endif
-#ifdef CONFIG_HOSTFS
- printf("'jail' may not used used in a kernel with CONFIG_HOSTFS "
- "enabled\n");
- ok = 0;
-#endif
-#ifdef CONFIG_MODULES
- printf("'jail' may not used used in a kernel with CONFIG_MODULES "
- "enabled\n");
- ok = 0;
-#endif
- if(!ok) exit(1);
-
- /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem.
- * Removing it from the bounding set eliminates the ability of anything
- * to acquire it, and thus read or write kernel memory.
- */
- cap_lower(cap_bset, CAP_SYS_RAWIO);
- jail = 1;
- return(0);
-}
-
-__uml_setup("jail", jail_setup,
-"jail\n"
-" Enables the protection of kernel memory from processes.\n\n"
-);
-
-static void mprotect_kernel_mem(int w)
-{
- unsigned long start, end;
- int pages;
-
- if(!jail || (current == &init_task)) return;
-
- pages = (1 << CONFIG_KERNEL_STACK_ORDER);
-
- start = (unsigned long) current_thread + PAGE_SIZE;
- end = (unsigned long) current_thread + PAGE_SIZE * pages;
- protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1);
- protect_memory(end, high_physmem - end, 1, w, 1, 1);
-
- start = (unsigned long) UML_ROUND_DOWN(&_stext);
- end = (unsigned long) UML_ROUND_UP(&_etext);
- protect_memory(start, end - start, 1, w, 1, 1);
-
- start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end);
- end = (unsigned long) UML_ROUND_UP(&_edata);
- protect_memory(start, end - start, 1, w, 1, 1);
-
- start = (unsigned long) UML_ROUND_DOWN(&__bss_start);
- end = (unsigned long) UML_ROUND_UP(brk_start);
- protect_memory(start, end - start, 1, w, 1, 1);
-
- mprotect_kernel_vm(w);
-}
-
-void unprotect_kernel_mem(void)
-{
- mprotect_kernel_mem(1);
-}
-
-void protect_kernel_mem(void)
-{
- mprotect_kernel_mem(0);
-}
-
extern void start_kernel(void);
static int start_kernel_proc(void *unused)
init_task.thread.mode.tt.extern_pid = pid;
err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1);
- if(err)
- panic("Can't create switch pipe for init_task, errno = %d",
+ if(err)
+ panic("Can't create switch pipe for init_task, errno = %d",
-err);
}
-int singlestepping_tt(void *t)
-{
- struct task_struct *task = t;
-
- if(task->thread.mode.tt.singlestep_syscall)
- return(0);
- return(task->ptrace & PT_DTRACE);
-}
-
-void clear_singlestep(void *t)
-{
- struct task_struct *task = t;
-
- task->ptrace &= ~PT_DTRACE;
-}
-
int start_uml_tt(void)
{
void *sp;
int pages;
- pages = (1 << CONFIG_KERNEL_STACK_ORDER) - 2;
- sp = (void *) init_task.thread.kernel_stack + pages * PAGE_SIZE -
- sizeof(unsigned long);
+ pages = (1 << CONFIG_KERNEL_STACK_ORDER);
+ sp = task_stack_page(&init_task) +
+ pages * PAGE_SIZE - sizeof(unsigned long);
return(tracer(start_kernel_proc, sp));
}
read_unlock(&tasklist_lock);
return(0);
}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */