2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
6 #include "linux/sched.h"
7 #include "linux/signal.h"
8 #include "linux/kernel.h"
9 #include "linux/interrupt.h"
10 #include "linux/ptrace.h"
11 #include "asm/system.h"
12 #include "asm/pgalloc.h"
13 #include "asm/ptrace.h"
14 #include "asm/tlbflush.h"
16 #include "signal_user.h"
17 #include "kern_util.h"
18 #include "user_util.h"
21 #include "sigcontext.h"
22 #include "time_user.h"
29 void *switch_to_tt(void *prev, void *next, void *last)
31 struct task_struct *from, *to;
33 int err, vtalrm, alrm, prof, cpu;
35 /* jailing and SMP are incompatible, so this doesn't need to be
43 to->thread.prev_sched = from;
45 cpu = from->thread_info->cpu;
47 forward_interrupts(to->thread.mode.tt.extern_pid);
49 forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid);
51 local_irq_save(flags);
53 vtalrm = change_sig(SIGVTALRM, 0);
54 alrm = change_sig(SIGALRM, 0);
55 prof = change_sig(SIGPROF, 0);
57 forward_pending_sigio(to->thread.mode.tt.extern_pid);
63 err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
65 panic("write of switch_pipe failed, err = %d", -err);
68 if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD))
69 os_kill_process(os_getpid(), 0);
71 err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
73 panic("read of switch_pipe failed, errno = %d", -err);
75 /* This works around a nasty race with 'jail'. If we are switching
76 * between two threads of a threaded app and the incoming process
77 * runs before the outgoing process reaches the read, and it makes
78 * it all the way out to userspace, then it will have write-protected
79 * the outgoing process stack. Then, when the outgoing process
80 * returns from the write, it will segfault because it can no longer
81 * write its own stack. So, in order to avoid that, the incoming
82 * thread sits in a loop yielding until 'reading' is set. This
83 * isn't entirely safe, since there may be a reschedule from a timer
84 * happening between setting 'reading' and sleeping in read. But,
85 * it should get a whole quantum in which to reach the read and sleep,
86 * which should be enough.
90 while(!reading) sched_yield();
93 change_sig(SIGVTALRM, vtalrm);
94 change_sig(SIGALRM, alrm);
95 change_sig(SIGPROF, prof);
100 local_irq_restore(flags);
102 return(current->thread.prev_sched);
105 void release_thread_tt(struct task_struct *task)
107 int pid = task->thread.mode.tt.extern_pid;
109 if(os_getpid() != pid)
110 os_kill_process(pid, 0);
113 void exit_thread_tt(void)
115 os_close_file(current->thread.mode.tt.switch_pipe[0]);
116 os_close_file(current->thread.mode.tt.switch_pipe[1]);
119 void suspend_new_thread(int fd)
124 os_stop_process(os_getpid());
125 err = os_read_file(fd, &c, sizeof(c));
127 panic("read failed in suspend_new_thread, err = %d", -err);
130 void schedule_tail(task_t *prev);
132 static void new_thread_handler(int sig)
134 unsigned long disable;
138 fn = current->thread.request.u.thread.proc;
139 arg = current->thread.request.u.thread.arg;
141 UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1);
142 disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) |
143 (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1));
144 SC_SIGMASK(UPT_SC(¤t->thread.regs.regs)) &= ~disable;
146 suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
149 if(current->thread.prev_sched != NULL)
150 schedule_tail(current->thread.prev_sched);
151 current->thread.prev_sched = NULL;
153 init_new_thread_signals(1);
155 free_page(current->thread.temp_stack);
156 set_cmdline("(kernel thread)");
158 change_sig(SIGUSR1, 1);
159 change_sig(SIGVTALRM, 1);
160 change_sig(SIGPROF, 1);
162 if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf))
165 /* XXX No set_user_mode here because a newly execed process will
166 * immediately segfault on its non-existent IP, coming straight back
167 * to the signal handler, which will call set_user_mode on its way
168 * out. This should probably change since it's confusing.
172 static int new_thread_proc(void *stack)
174 /* local_irq_disable is needed to block out signals until this thread is
175 * properly scheduled. Otherwise, the tracing thread will get mighty
176 * upset about any signals that arrive before that.
177 * This has the complication that it sets the saved signal mask in
178 * the sigcontext to block signals. This gets restored when this
179 * thread (or a descendant, since they get a copy of this sigcontext)
180 * returns to userspace.
181 * So, this is compensated for elsewhere.
182 * XXX There is still a small window until local_irq_disable() actually
183 * finishes where signals are possible - shouldn't be a problem in
184 * practice since SIGIO hasn't been forwarded here yet, and the
185 * local_irq_disable should finish before a SIGVTALRM has time to be
190 init_new_thread_stack(stack, new_thread_handler);
191 os_usr1_process(os_getpid());
192 change_sig(SIGUSR1, 1);
196 /* Signal masking - signals are blocked at the start of fork_tramp. They
197 * are re-enabled when finish_fork_handler is entered by fork_tramp hitting
198 * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off,
199 * so it is blocked before it's called. They are re-enabled on sigreturn
200 * despite the fact that they were blocked when the SIGUSR1 was issued because
201 * copy_thread copies the parent's sigcontext, including the signal mask
202 * onto the signal frame.
205 void finish_fork_handler(int sig)
207 UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1);
208 suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
211 if(current->thread.prev_sched != NULL)
212 schedule_tail(current->thread.prev_sched);
213 current->thread.prev_sched = NULL;
216 change_sig(SIGVTALRM, 1);
218 if(current->mm != current->parent->mm)
219 protect_memory(uml_reserved, high_physmem - uml_reserved, 1,
221 task_protections((unsigned long) current_thread);
223 free_page(current->thread.temp_stack);
225 change_sig(SIGUSR1, 0);
226 set_user_mode(current);
229 int fork_tramp(void *stack)
233 init_new_thread_stack(stack, finish_fork_handler);
235 os_usr1_process(os_getpid());
236 change_sig(SIGUSR1, 1);
240 int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp,
241 unsigned long stack_top, struct task_struct * p,
242 struct pt_regs *regs)
244 int (*tramp)(void *);
248 if(current->thread.forking)
251 tramp = new_thread_proc;
252 p->thread.request.u.thread = current->thread.request.u.thread;
255 err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1);
257 printk("copy_thread : pipe failed, err = %d\n", -err);
261 stack = alloc_stack(0, 0);
263 printk(KERN_ERR "copy_thread : failed to allocate "
264 "temporary stack\n");
268 clone_flags &= CLONE_VM;
269 p->thread.temp_stack = stack;
270 new_pid = start_fork_tramp(p->thread_info, stack, clone_flags, tramp);
272 printk(KERN_ERR "copy_thread : clone failed - errno = %d\n",
277 if(current->thread.forking){
278 sc_to_sc(UPT_SC(&p->thread.regs.regs),
279 UPT_SC(¤t->thread.regs.regs));
280 SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0);
281 if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp;
283 p->thread.mode.tt.extern_pid = new_pid;
285 current->thread.request.op = OP_FORK;
286 current->thread.request.u.fork.pid = new_pid;
287 os_usr1_process(os_getpid());
289 /* Enable the signal and then disable it to ensure that it is handled
290 * here, and nowhere else.
292 change_sig(SIGUSR1, 1);
294 change_sig(SIGUSR1, 0);
302 current->thread.request.op = OP_REBOOT;
303 os_usr1_process(os_getpid());
304 change_sig(SIGUSR1, 1);
309 current->thread.request.op = OP_HALT;
310 os_usr1_process(os_getpid());
311 change_sig(SIGUSR1, 1);
314 void kill_off_processes_tt(void)
316 struct task_struct *p;
321 if(p->thread.mode.tt.extern_pid != me)
322 os_kill_process(p->thread.mode.tt.extern_pid, 0);
324 if(init_task.thread.mode.tt.extern_pid != me)
325 os_kill_process(init_task.thread.mode.tt.extern_pid, 0);
328 void initial_thread_cb_tt(void (*proc)(void *), void *arg)
330 if(os_getpid() == tracing_pid){
334 current->thread.request.op = OP_CB;
335 current->thread.request.u.cb.proc = proc;
336 current->thread.request.u.cb.arg = arg;
337 os_usr1_process(os_getpid());
338 change_sig(SIGUSR1, 1);
340 change_sig(SIGUSR1, 0);
344 int do_proc_op(void *t, int proc_id)
346 struct task_struct *task;
347 struct thread_struct *thread;
351 thread = &task->thread;
352 op = thread->request.op;
358 pid = thread->request.u.exec.pid;
359 do_exec(thread->mode.tt.extern_pid, pid);
360 thread->mode.tt.extern_pid = pid;
361 cpu_tasks[task->thread_info->cpu].pid = pid;
364 attach_process(thread->request.u.fork.pid);
367 (*thread->request.u.cb.proc)(thread->request.u.cb.arg);
373 tracer_panic("Bad op in do_proc_op");
376 thread->request.op = OP_NONE;
380 void init_idle_tt(void)
385 /* Changed by jail_setup, which is a setup */
388 int __init jail_setup(char *line, int *add)
394 printf("'jail' may not used used in a kernel with CONFIG_SMP "
399 printf("'jail' may not used used in a kernel with CONFIG_HOSTFS "
403 #ifdef CONFIG_MODULES
404 printf("'jail' may not used used in a kernel with CONFIG_MODULES "
410 /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem.
411 * Removing it from the bounding set eliminates the ability of anything
412 * to acquire it, and thus read or write kernel memory.
414 cap_lower(cap_bset, CAP_SYS_RAWIO);
419 __uml_setup("jail", jail_setup,
421 " Enables the protection of kernel memory from processes.\n\n"
424 static void mprotect_kernel_mem(int w)
426 unsigned long start, end;
429 if(!jail || (current == &init_task)) return;
431 pages = (1 << CONFIG_KERNEL_STACK_ORDER);
433 start = (unsigned long) current_thread + PAGE_SIZE;
434 end = (unsigned long) current_thread + PAGE_SIZE * pages;
435 protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1);
436 protect_memory(end, high_physmem - end, 1, w, 1, 1);
438 start = (unsigned long) UML_ROUND_DOWN(&_stext);
439 end = (unsigned long) UML_ROUND_UP(&_etext);
440 protect_memory(start, end - start, 1, w, 1, 1);
442 start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end);
443 end = (unsigned long) UML_ROUND_UP(&_edata);
444 protect_memory(start, end - start, 1, w, 1, 1);
446 start = (unsigned long) UML_ROUND_DOWN(&__bss_start);
447 end = (unsigned long) UML_ROUND_UP(brk_start);
448 protect_memory(start, end - start, 1, w, 1, 1);
450 mprotect_kernel_vm(w);
453 void unprotect_kernel_mem(void)
455 mprotect_kernel_mem(1);
458 void protect_kernel_mem(void)
460 mprotect_kernel_mem(0);
463 extern void start_kernel(void);
465 static int start_kernel_proc(void *unused)
472 cpu_tasks[0].pid = pid;
473 cpu_tasks[0].task = current;
475 cpu_online_map = cpumask_of_cpu(0);
477 if(debug) os_stop_process(pid);
482 void set_tracing(void *task, int tracing)
484 ((struct task_struct *) task)->thread.mode.tt.tracing = tracing;
487 int is_tracing(void *t)
489 return (((struct task_struct *) t)->thread.mode.tt.tracing);
492 int set_user_mode(void *t)
494 struct task_struct *task;
496 task = t ? t : current;
497 if(task->thread.mode.tt.tracing)
499 task->thread.request.op = OP_TRACE_ON;
500 os_usr1_process(os_getpid());
504 void set_init_pid(int pid)
508 init_task.thread.mode.tt.extern_pid = pid;
509 err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1);
511 panic("Can't create switch pipe for init_task, errno = %d",
515 int singlestepping_tt(void *t)
517 struct task_struct *task = t;
519 if(task->thread.mode.tt.singlestep_syscall)
521 return(task->ptrace & PT_DTRACE);
524 void clear_singlestep(void *t)
526 struct task_struct *task = t;
528 task->ptrace &= ~PT_DTRACE;
531 int start_uml_tt(void)
536 pages = (1 << CONFIG_KERNEL_STACK_ORDER);
537 sp = (void *) ((unsigned long) init_task.thread_info) +
538 pages * PAGE_SIZE - sizeof(unsigned long);
539 return(tracer(start_kernel_proc, sp));
542 int external_pid_tt(struct task_struct *task)
544 return(task->thread.mode.tt.extern_pid);
547 int thread_pid_tt(struct task_struct *task)
549 return(task->thread.mode.tt.extern_pid);
552 int is_valid_pid(int pid)
554 struct task_struct *task;
556 read_lock(&tasklist_lock);
557 for_each_process(task){
558 if(task->thread.mode.tt.extern_pid == pid){
559 read_unlock(&tasklist_lock);
563 read_unlock(&tasklist_lock);
568 * Overrides for Emacs so that we follow Linus's tabbing style.
569 * Emacs will notice this stuff at the end of the file and automatically
570 * adjust the settings for this buffer only. This must remain at the end
572 * ---------------------------------------------------------------------------
574 * c-file-style: "linux"