177cb2461be0cfd49448daaf1235e7baf1c95c8f
[linux-2.6.git] / arch / um / kernel / tt / process_kern.c
1 /* 
2  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3  * Licensed under the GPL
4  */
5
6 #include "linux/sched.h"
7 #include "linux/signal.h"
8 #include "linux/kernel.h"
9 #include "linux/interrupt.h"
10 #include "linux/ptrace.h"
11 #include "asm/system.h"
12 #include "asm/pgalloc.h"
13 #include "asm/ptrace.h"
14 #include "asm/tlbflush.h"
15 #include "irq_user.h"
16 #include "signal_user.h"
17 #include "kern_util.h"
18 #include "user_util.h"
19 #include "os.h"
20 #include "kern.h"
21 #include "sigcontext.h"
22 #include "time_user.h"
23 #include "mem_user.h"
24 #include "tlb.h"
25 #include "mode.h"
26 #include "init.h"
27 #include "tt.h"
28
29 void *switch_to_tt(void *prev, void *next, void *last)
30 {
31         struct task_struct *from, *to;
32         unsigned long flags;
33         int err, vtalrm, alrm, prof, cpu;
34         char c;
35         /* jailing and SMP are incompatible, so this doesn't need to be 
36          * made per-cpu 
37          */
38         static int reading;
39
40         from = prev;
41         to = next;
42
43         to->thread.prev_sched = from;
44
45         cpu = from->thread_info->cpu;
46         if(cpu == 0)
47                 forward_interrupts(to->thread.mode.tt.extern_pid);
48 #ifdef CONFIG_SMP
49         forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid);
50 #endif
51         local_irq_save(flags);
52
53         vtalrm = change_sig(SIGVTALRM, 0);
54         alrm = change_sig(SIGALRM, 0);
55         prof = change_sig(SIGPROF, 0);
56
57         forward_pending_sigio(to->thread.mode.tt.extern_pid);
58
59         c = 0;
60         set_current(to);
61
62         reading = 0;
63         err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
64         if(err != sizeof(c))
65                 panic("write of switch_pipe failed, err = %d", -err);
66
67         reading = 1;
68         if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD))
69                 os_kill_process(os_getpid(), 0);
70
71         err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
72         if(err != sizeof(c))
73                 panic("read of switch_pipe failed, errno = %d", -err);
74
75         /* This works around a nasty race with 'jail'.  If we are switching
76          * between two threads of a threaded app and the incoming process 
77          * runs before the outgoing process reaches the read, and it makes
78          * it all the way out to userspace, then it will have write-protected 
79          * the outgoing process stack.  Then, when the outgoing process 
80          * returns from the write, it will segfault because it can no longer
81          * write its own stack.  So, in order to avoid that, the incoming 
82          * thread sits in a loop yielding until 'reading' is set.  This 
83          * isn't entirely safe, since there may be a reschedule from a timer
84          * happening between setting 'reading' and sleeping in read.  But,
85          * it should get a whole quantum in which to reach the read and sleep,
86          * which should be enough.
87          */
88
89         if(jail){
90                 while(!reading) sched_yield();
91         }
92
93         change_sig(SIGVTALRM, vtalrm);
94         change_sig(SIGALRM, alrm);
95         change_sig(SIGPROF, prof);
96
97         arch_switch();
98
99         flush_tlb_all();
100         local_irq_restore(flags);
101
102         return(current->thread.prev_sched);
103 }
104
105 void release_thread_tt(struct task_struct *task)
106 {
107         int pid = task->thread.mode.tt.extern_pid;
108
109         if(os_getpid() != pid)
110                 os_kill_process(pid, 0);
111 }
112
113 void exit_thread_tt(void)
114 {
115         os_close_file(current->thread.mode.tt.switch_pipe[0]);
116         os_close_file(current->thread.mode.tt.switch_pipe[1]);
117 }
118
119 void suspend_new_thread(int fd)
120 {
121         int err;
122         char c;
123
124         os_stop_process(os_getpid());
125         err = os_read_file(fd, &c, sizeof(c));
126         if(err != sizeof(c))
127                 panic("read failed in suspend_new_thread, err = %d", -err);
128 }
129
130 void schedule_tail(task_t *prev);
131
132 static void new_thread_handler(int sig)
133 {
134         unsigned long disable;
135         int (*fn)(void *);
136         void *arg;
137
138         fn = current->thread.request.u.thread.proc;
139         arg = current->thread.request.u.thread.arg;
140
141         UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
142         disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) |
143                 (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1));
144         SC_SIGMASK(UPT_SC(&current->thread.regs.regs)) &= ~disable;
145
146         suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
147
148         force_flush_all();
149         if(current->thread.prev_sched != NULL)
150                 schedule_tail(current->thread.prev_sched);
151         current->thread.prev_sched = NULL;
152
153         init_new_thread_signals(1);
154         enable_timer();
155         free_page(current->thread.temp_stack);
156         set_cmdline("(kernel thread)");
157
158         change_sig(SIGUSR1, 1);
159         change_sig(SIGVTALRM, 1);
160         change_sig(SIGPROF, 1);
161         local_irq_enable();
162         if(!run_kernel_thread(fn, arg, &current->thread.exec_buf))
163                 do_exit(0);
164         
165         /* XXX No set_user_mode here because a newly execed process will
166          * immediately segfault on its non-existent IP, coming straight back
167          * to the signal handler, which will call set_user_mode on its way
168          * out.  This should probably change since it's confusing.
169          */
170 }
171
172 static int new_thread_proc(void *stack)
173 {
174         /* local_irq_disable is needed to block out signals until this thread is
175          * properly scheduled.  Otherwise, the tracing thread will get mighty 
176          * upset about any signals that arrive before that.  
177          * This has the complication that it sets the saved signal mask in
178          * the sigcontext to block signals.  This gets restored when this
179          * thread (or a descendant, since they get a copy of this sigcontext)
180          * returns to userspace.
181          * So, this is compensated for elsewhere.
182          * XXX There is still a small window until local_irq_disable() actually 
183          * finishes where signals are possible - shouldn't be a problem in 
184          * practice since SIGIO hasn't been forwarded here yet, and the 
185          * local_irq_disable should finish before a SIGVTALRM has time to be 
186          * delivered.
187          */
188
189         local_irq_disable();
190         init_new_thread_stack(stack, new_thread_handler);
191         os_usr1_process(os_getpid());
192         change_sig(SIGUSR1, 1);
193         return(0);
194 }
195
196 /* Signal masking - signals are blocked at the start of fork_tramp.  They
197  * are re-enabled when finish_fork_handler is entered by fork_tramp hitting
198  * itself with a SIGUSR1.  set_user_mode has to be run with SIGUSR1 off,
199  * so it is blocked before it's called.  They are re-enabled on sigreturn
200  * despite the fact that they were blocked when the SIGUSR1 was issued because
201  * copy_thread copies the parent's sigcontext, including the signal mask
202  * onto the signal frame.
203  */
204
205 void finish_fork_handler(int sig)
206 {
207         UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
208         suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
209
210         force_flush_all();
211         if(current->thread.prev_sched != NULL)
212                 schedule_tail(current->thread.prev_sched);
213         current->thread.prev_sched = NULL;
214
215         enable_timer();
216         change_sig(SIGVTALRM, 1);
217         local_irq_enable();
218         if(current->mm != current->parent->mm)
219                 protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 
220                                1, 0, 1);
221         task_protections((unsigned long) current_thread);
222
223         free_page(current->thread.temp_stack);
224         local_irq_disable();
225         change_sig(SIGUSR1, 0);
226         set_user_mode(current);
227 }
228
229 int fork_tramp(void *stack)
230 {
231         local_irq_disable();
232         arch_init_thread();
233         init_new_thread_stack(stack, finish_fork_handler);
234
235         os_usr1_process(os_getpid());
236         change_sig(SIGUSR1, 1);
237         return(0);
238 }
239
240 int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp,
241                    unsigned long stack_top, struct task_struct * p, 
242                    struct pt_regs *regs)
243 {
244         int (*tramp)(void *);
245         int new_pid, err;
246         unsigned long stack;
247         
248         if(current->thread.forking)
249                 tramp = fork_tramp;
250         else {
251                 tramp = new_thread_proc;
252                 p->thread.request.u.thread = current->thread.request.u.thread;
253         }
254
255         err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1);
256         if(err < 0){
257                 printk("copy_thread : pipe failed, err = %d\n", -err);
258                 goto out;
259         }
260
261         stack = alloc_stack(0, 0);
262         if(stack == 0){
263                 printk(KERN_ERR "copy_thread : failed to allocate "
264                        "temporary stack\n");
265                 return(-ENOMEM);
266         }
267
268         clone_flags &= CLONE_VM;
269         p->thread.temp_stack = stack;
270         new_pid = start_fork_tramp(p->thread_info, stack, clone_flags, tramp);
271         if(new_pid < 0){
272                 printk(KERN_ERR "copy_thread : clone failed - errno = %d\n", 
273                        -new_pid);
274                 return(new_pid);
275         }
276
277         if(current->thread.forking){
278                 sc_to_sc(UPT_SC(&p->thread.regs.regs), 
279                          UPT_SC(&current->thread.regs.regs));
280                 SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0);
281                 if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp;
282         }
283         p->thread.mode.tt.extern_pid = new_pid;
284
285         current->thread.request.op = OP_FORK;
286         current->thread.request.u.fork.pid = new_pid;
287         os_usr1_process(os_getpid());
288
289         /* Enable the signal and then disable it to ensure that it is handled
290          * here, and nowhere else.
291          */
292         change_sig(SIGUSR1, 1);
293
294         change_sig(SIGUSR1, 0);
295         err = 0;
296  out:
297         return(err);
298 }
299
300 void reboot_tt(void)
301 {
302         current->thread.request.op = OP_REBOOT;
303         os_usr1_process(os_getpid());
304         change_sig(SIGUSR1, 1);
305 }
306
307 void halt_tt(void)
308 {
309         current->thread.request.op = OP_HALT;
310         os_usr1_process(os_getpid());
311         change_sig(SIGUSR1, 1);
312 }
313
314 void kill_off_processes_tt(void)
315 {
316         struct task_struct *p;
317         int me;
318
319         me = os_getpid();
320         for_each_process(p){
321                 if(p->thread.mode.tt.extern_pid != me) 
322                         os_kill_process(p->thread.mode.tt.extern_pid, 0);
323         }
324         if(init_task.thread.mode.tt.extern_pid != me) 
325                 os_kill_process(init_task.thread.mode.tt.extern_pid, 0);
326 }
327
328 void initial_thread_cb_tt(void (*proc)(void *), void *arg)
329 {
330         if(os_getpid() == tracing_pid){
331                 (*proc)(arg);
332         }
333         else {
334                 current->thread.request.op = OP_CB;
335                 current->thread.request.u.cb.proc = proc;
336                 current->thread.request.u.cb.arg = arg;
337                 os_usr1_process(os_getpid());
338                 change_sig(SIGUSR1, 1);
339
340                 change_sig(SIGUSR1, 0);
341         }
342 }
343
344 int do_proc_op(void *t, int proc_id)
345 {
346         struct task_struct *task;
347         struct thread_struct *thread;
348         int op, pid;
349
350         task = t;
351         thread = &task->thread;
352         op = thread->request.op;
353         switch(op){
354         case OP_NONE:
355         case OP_TRACE_ON:
356                 break;
357         case OP_EXEC:
358                 pid = thread->request.u.exec.pid;
359                 do_exec(thread->mode.tt.extern_pid, pid);
360                 thread->mode.tt.extern_pid = pid;
361                 cpu_tasks[task->thread_info->cpu].pid = pid;
362                 break;
363         case OP_FORK:
364                 attach_process(thread->request.u.fork.pid);
365                 break;
366         case OP_CB:
367                 (*thread->request.u.cb.proc)(thread->request.u.cb.arg);
368                 break;
369         case OP_REBOOT:
370         case OP_HALT:
371                 break;
372         default:
373                 tracer_panic("Bad op in do_proc_op");
374                 break;
375         }
376         thread->request.op = OP_NONE;
377         return(op);
378 }
379
380 void init_idle_tt(void)
381 {
382         default_idle();
383 }
384
385 /* Changed by jail_setup, which is a setup */
386 int jail = 0;
387
388 int __init jail_setup(char *line, int *add)
389 {
390         int ok = 1;
391
392         if(jail) return(0);
393 #ifdef CONFIG_SMP
394         printf("'jail' may not used used in a kernel with CONFIG_SMP "
395                "enabled\n");
396         ok = 0;
397 #endif
398 #ifdef CONFIG_HOSTFS
399         printf("'jail' may not used used in a kernel with CONFIG_HOSTFS "
400                "enabled\n");
401         ok = 0;
402 #endif
403 #ifdef CONFIG_MODULES
404         printf("'jail' may not used used in a kernel with CONFIG_MODULES "
405                "enabled\n");
406         ok = 0;
407 #endif  
408         if(!ok) exit(1);
409
410         /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem.
411          * Removing it from the bounding set eliminates the ability of anything
412          * to acquire it, and thus read or write kernel memory.
413          */
414         cap_lower(cap_bset, CAP_SYS_RAWIO);
415         jail = 1;
416         return(0);
417 }
418
419 __uml_setup("jail", jail_setup,
420 "jail\n"
421 "    Enables the protection of kernel memory from processes.\n\n"
422 );
423
424 static void mprotect_kernel_mem(int w)
425 {
426         unsigned long start, end;
427         int pages;
428
429         if(!jail || (current == &init_task)) return;
430
431         pages = (1 << CONFIG_KERNEL_STACK_ORDER);
432
433         start = (unsigned long) current_thread + PAGE_SIZE;
434         end = (unsigned long) current_thread + PAGE_SIZE * pages;
435         protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1);
436         protect_memory(end, high_physmem - end, 1, w, 1, 1);
437
438         start = (unsigned long) UML_ROUND_DOWN(&_stext);
439         end = (unsigned long) UML_ROUND_UP(&_etext);
440         protect_memory(start, end - start, 1, w, 1, 1);
441
442         start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end);
443         end = (unsigned long) UML_ROUND_UP(&_edata);
444         protect_memory(start, end - start, 1, w, 1, 1);
445
446         start = (unsigned long) UML_ROUND_DOWN(&__bss_start);
447         end = (unsigned long) UML_ROUND_UP(brk_start);
448         protect_memory(start, end - start, 1, w, 1, 1);
449
450         mprotect_kernel_vm(w);
451 }
452
453 void unprotect_kernel_mem(void)
454 {
455         mprotect_kernel_mem(1);
456 }
457
458 void protect_kernel_mem(void)
459 {
460         mprotect_kernel_mem(0);
461 }
462
463 extern void start_kernel(void);
464
465 static int start_kernel_proc(void *unused)
466 {
467         int pid;
468
469         block_signals();
470         pid = os_getpid();
471
472         cpu_tasks[0].pid = pid;
473         cpu_tasks[0].task = current;
474 #ifdef CONFIG_SMP
475         cpu_online_map = cpumask_of_cpu(0);
476 #endif
477         if(debug) os_stop_process(pid);
478         start_kernel();
479         return(0);
480 }
481
482 void set_tracing(void *task, int tracing)
483 {
484         ((struct task_struct *) task)->thread.mode.tt.tracing = tracing;
485 }
486
487 int is_tracing(void *t)
488 {
489         return (((struct task_struct *) t)->thread.mode.tt.tracing);
490 }
491
492 int set_user_mode(void *t)
493 {
494         struct task_struct *task;
495
496         task = t ? t : current;
497         if(task->thread.mode.tt.tracing) 
498                 return(1);
499         task->thread.request.op = OP_TRACE_ON;
500         os_usr1_process(os_getpid());
501         return(0);
502 }
503
504 void set_init_pid(int pid)
505 {
506         int err;
507
508         init_task.thread.mode.tt.extern_pid = pid;
509         err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1);
510         if(err)
511                 panic("Can't create switch pipe for init_task, errno = %d", 
512                       -err);
513 }
514
515 int singlestepping_tt(void *t)
516 {
517         struct task_struct *task = t;
518
519         if(task->thread.mode.tt.singlestep_syscall)
520                 return(0);
521         return(task->ptrace & PT_DTRACE);
522 }
523
524 void clear_singlestep(void *t)
525 {
526         struct task_struct *task = t;
527
528         task->ptrace &= ~PT_DTRACE;
529 }
530
531 int start_uml_tt(void)
532 {
533         void *sp;
534         int pages;
535
536         pages = (1 << CONFIG_KERNEL_STACK_ORDER);
537         sp = (void *) ((unsigned long) init_task.thread_info) + 
538                 pages * PAGE_SIZE - sizeof(unsigned long);
539         return(tracer(start_kernel_proc, sp));
540 }
541
542 int external_pid_tt(struct task_struct *task)
543 {
544         return(task->thread.mode.tt.extern_pid);
545 }
546
547 int thread_pid_tt(struct task_struct *task)
548 {
549         return(task->thread.mode.tt.extern_pid);
550 }
551
552 int is_valid_pid(int pid)
553 {
554         struct task_struct *task;
555
556         read_lock(&tasklist_lock);
557         for_each_process(task){
558                 if(task->thread.mode.tt.extern_pid == pid){
559                         read_unlock(&tasklist_lock);
560                         return(1);
561                 }
562         }
563         read_unlock(&tasklist_lock);
564         return(0);
565 }
566
567 /*
568  * Overrides for Emacs so that we follow Linus's tabbing style.
569  * Emacs will notice this stuff at the end of the file and automatically
570  * adjust the settings for this buffer only.  This must remain at the end
571  * of the file.
572  * ---------------------------------------------------------------------------
573  * Local variables:
574  * c-file-style: "linux"
575  * End:
576  */