upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / arch / um / kernel / tt / process_kern.c
1 /* 
2  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3  * Licensed under the GPL
4  */
5
6 #include "linux/sched.h"
7 #include "linux/signal.h"
8 #include "linux/kernel.h"
9 #include "linux/interrupt.h"
10 #include "linux/ptrace.h"
11 #include "asm/system.h"
12 #include "asm/pgalloc.h"
13 #include "asm/ptrace.h"
14 #include "asm/tlbflush.h"
15 #include "irq_user.h"
16 #include "signal_user.h"
17 #include "kern_util.h"
18 #include "user_util.h"
19 #include "os.h"
20 #include "kern.h"
21 #include "sigcontext.h"
22 #include "time_user.h"
23 #include "mem_user.h"
24 #include "tlb.h"
25 #include "mode.h"
26 #include "init.h"
27 #include "tt.h"
28
29 void *switch_to_tt(void *prev, void *next, void *last)
30 {
31         struct task_struct *from, *to, *prev_sched;
32         unsigned long flags;
33         int err, vtalrm, alrm, prof, cpu;
34         char c;
35         /* jailing and SMP are incompatible, so this doesn't need to be 
36          * made per-cpu 
37          */
38         static int reading;
39
40         from = prev;
41         to = next;
42
43         to->thread.prev_sched = from;
44
45         cpu = from->thread_info->cpu;
46         if(cpu == 0)
47                 forward_interrupts(to->thread.mode.tt.extern_pid);
48 #ifdef CONFIG_SMP
49         forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid);
50 #endif
51         local_irq_save(flags);
52
53         vtalrm = change_sig(SIGVTALRM, 0);
54         alrm = change_sig(SIGALRM, 0);
55         prof = change_sig(SIGPROF, 0);
56
57         forward_pending_sigio(to->thread.mode.tt.extern_pid);
58
59         c = 0;
60         set_current(to);
61
62         reading = 0;
63         err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
64         if(err != sizeof(c))
65                 panic("write of switch_pipe failed, err = %d", -err);
66
67         reading = 1;
68         if((from->exit_state == EXIT_ZOMBIE) || (from->exit_state == EXIT_DEAD))
69                 os_kill_process(os_getpid(), 0);
70
71         err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
72         if(err != sizeof(c))
73                 panic("read of switch_pipe failed, errno = %d", -err);
74
75         /* If the process that we have just scheduled away from has exited,
76          * then it needs to be killed here.  The reason is that, even though
77          * it will kill itself when it next runs, that may be too late.  Its
78          * stack will be freed, possibly before then, and if that happens,
79          * we have a use-after-free situation.  So, it gets killed here
80          * in case it has not already killed itself.
81          */
82         prev_sched = current->thread.prev_sched;
83         if((prev_sched->exit_state == EXIT_ZOMBIE) ||
84            (prev_sched->exit_state == EXIT_DEAD))
85                 os_kill_ptraced_process(prev_sched->thread.mode.tt.extern_pid, 1);
86
87         /* This works around a nasty race with 'jail'.  If we are switching
88          * between two threads of a threaded app and the incoming process 
89          * runs before the outgoing process reaches the read, and it makes
90          * it all the way out to userspace, then it will have write-protected 
91          * the outgoing process stack.  Then, when the outgoing process 
92          * returns from the write, it will segfault because it can no longer
93          * write its own stack.  So, in order to avoid that, the incoming 
94          * thread sits in a loop yielding until 'reading' is set.  This 
95          * isn't entirely safe, since there may be a reschedule from a timer
96          * happening between setting 'reading' and sleeping in read.  But,
97          * it should get a whole quantum in which to reach the read and sleep,
98          * which should be enough.
99          */
100
101         if(jail){
102                 while(!reading) sched_yield();
103         }
104
105         change_sig(SIGVTALRM, vtalrm);
106         change_sig(SIGALRM, alrm);
107         change_sig(SIGPROF, prof);
108
109         arch_switch();
110
111         flush_tlb_all();
112         local_irq_restore(flags);
113
114         return(current->thread.prev_sched);
115 }
116
117 void release_thread_tt(struct task_struct *task)
118 {
119         int pid = task->thread.mode.tt.extern_pid;
120
121         if(os_getpid() != pid)
122                 os_kill_process(pid, 0);
123 }
124
125 void exit_thread_tt(void)
126 {
127         os_close_file(current->thread.mode.tt.switch_pipe[0]);
128         os_close_file(current->thread.mode.tt.switch_pipe[1]);
129 }
130
131 void suspend_new_thread(int fd)
132 {
133         int err;
134         char c;
135
136         os_stop_process(os_getpid());
137         err = os_read_file(fd, &c, sizeof(c));
138         if(err != sizeof(c))
139                 panic("read failed in suspend_new_thread, err = %d", -err);
140 }
141
142 void schedule_tail(task_t *prev);
143
144 static void new_thread_handler(int sig)
145 {
146         unsigned long disable;
147         int (*fn)(void *);
148         void *arg;
149
150         fn = current->thread.request.u.thread.proc;
151         arg = current->thread.request.u.thread.arg;
152
153         UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
154         disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) |
155                 (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1));
156         SC_SIGMASK(UPT_SC(&current->thread.regs.regs)) &= ~disable;
157
158         suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
159
160         force_flush_all();
161         if(current->thread.prev_sched != NULL)
162                 schedule_tail(current->thread.prev_sched);
163         current->thread.prev_sched = NULL;
164
165         init_new_thread_signals(1);
166         enable_timer();
167         free_page(current->thread.temp_stack);
168         set_cmdline("(kernel thread)");
169
170         change_sig(SIGUSR1, 1);
171         change_sig(SIGVTALRM, 1);
172         change_sig(SIGPROF, 1);
173         local_irq_enable();
174         if(!run_kernel_thread(fn, arg, &current->thread.exec_buf))
175                 do_exit(0);
176
177         /* XXX No set_user_mode here because a newly execed process will
178          * immediately segfault on its non-existent IP, coming straight back
179          * to the signal handler, which will call set_user_mode on its way
180          * out.  This should probably change since it's confusing.
181          */
182 }
183
184 static int new_thread_proc(void *stack)
185 {
186         /* local_irq_disable is needed to block out signals until this thread is
187          * properly scheduled.  Otherwise, the tracing thread will get mighty
188          * upset about any signals that arrive before that.
189          * This has the complication that it sets the saved signal mask in
190          * the sigcontext to block signals.  This gets restored when this
191          * thread (or a descendant, since they get a copy of this sigcontext)
192          * returns to userspace.
193          * So, this is compensated for elsewhere.
194          * XXX There is still a small window until local_irq_disable() actually
195          * finishes where signals are possible - shouldn't be a problem in
196          * practice since SIGIO hasn't been forwarded here yet, and the
197          * local_irq_disable should finish before a SIGVTALRM has time to be
198          * delivered.
199          */
200
201         local_irq_disable();
202         init_new_thread_stack(stack, new_thread_handler);
203         os_usr1_process(os_getpid());
204         change_sig(SIGUSR1, 1);
205         return(0);
206 }
207
208 /* Signal masking - signals are blocked at the start of fork_tramp.  They
209  * are re-enabled when finish_fork_handler is entered by fork_tramp hitting
210  * itself with a SIGUSR1.  set_user_mode has to be run with SIGUSR1 off,
211  * so it is blocked before it's called.  They are re-enabled on sigreturn
212  * despite the fact that they were blocked when the SIGUSR1 was issued because
213  * copy_thread copies the parent's sigcontext, including the signal mask
214  * onto the signal frame.
215  */
216
217 void finish_fork_handler(int sig)
218 {
219         UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
220         suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
221
222         force_flush_all();
223         if(current->thread.prev_sched != NULL)
224                 schedule_tail(current->thread.prev_sched);
225         current->thread.prev_sched = NULL;
226
227         enable_timer();
228         change_sig(SIGVTALRM, 1);
229         local_irq_enable();
230         if(current->mm != current->parent->mm)
231                 protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 
232                                1, 0, 1);
233         task_protections((unsigned long) current_thread);
234
235         free_page(current->thread.temp_stack);
236         local_irq_disable();
237         change_sig(SIGUSR1, 0);
238         set_user_mode(current);
239 }
240
241 int fork_tramp(void *stack)
242 {
243         local_irq_disable();
244         arch_init_thread();
245         init_new_thread_stack(stack, finish_fork_handler);
246
247         os_usr1_process(os_getpid());
248         change_sig(SIGUSR1, 1);
249         return(0);
250 }
251
252 int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp,
253                    unsigned long stack_top, struct task_struct * p, 
254                    struct pt_regs *regs)
255 {
256         int (*tramp)(void *);
257         int new_pid, err;
258         unsigned long stack;
259         
260         if(current->thread.forking)
261                 tramp = fork_tramp;
262         else {
263                 tramp = new_thread_proc;
264                 p->thread.request.u.thread = current->thread.request.u.thread;
265         }
266
267         err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1);
268         if(err < 0){
269                 printk("copy_thread : pipe failed, err = %d\n", -err);
270                 return(err);
271         }
272
273         stack = alloc_stack(0, 0);
274         if(stack == 0){
275                 printk(KERN_ERR "copy_thread : failed to allocate "
276                        "temporary stack\n");
277                 return(-ENOMEM);
278         }
279
280         clone_flags &= CLONE_VM;
281         p->thread.temp_stack = stack;
282         new_pid = start_fork_tramp(p->thread_info, stack, clone_flags, tramp);
283         if(new_pid < 0){
284                 printk(KERN_ERR "copy_thread : clone failed - errno = %d\n", 
285                        -new_pid);
286                 return(new_pid);
287         }
288
289         if(current->thread.forking){
290                 sc_to_sc(UPT_SC(&p->thread.regs.regs), 
291                          UPT_SC(&current->thread.regs.regs));
292                 SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0);
293                 if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp;
294         }
295         p->thread.mode.tt.extern_pid = new_pid;
296
297         current->thread.request.op = OP_FORK;
298         current->thread.request.u.fork.pid = new_pid;
299         os_usr1_process(os_getpid());
300
301         /* Enable the signal and then disable it to ensure that it is handled
302          * here, and nowhere else.
303          */
304         change_sig(SIGUSR1, 1);
305
306         change_sig(SIGUSR1, 0);
307         err = 0;
308         return(err);
309 }
310
311 void reboot_tt(void)
312 {
313         current->thread.request.op = OP_REBOOT;
314         os_usr1_process(os_getpid());
315         change_sig(SIGUSR1, 1);
316 }
317
318 void halt_tt(void)
319 {
320         current->thread.request.op = OP_HALT;
321         os_usr1_process(os_getpid());
322         change_sig(SIGUSR1, 1);
323 }
324
325 void kill_off_processes_tt(void)
326 {
327         struct task_struct *p;
328         int me;
329
330         me = os_getpid();
331         for_each_process(p){
332                 if(p->thread.mode.tt.extern_pid != me) 
333                         os_kill_process(p->thread.mode.tt.extern_pid, 0);
334         }
335         if(init_task.thread.mode.tt.extern_pid != me) 
336                 os_kill_process(init_task.thread.mode.tt.extern_pid, 0);
337 }
338
339 void initial_thread_cb_tt(void (*proc)(void *), void *arg)
340 {
341         if(os_getpid() == tracing_pid){
342                 (*proc)(arg);
343         }
344         else {
345                 current->thread.request.op = OP_CB;
346                 current->thread.request.u.cb.proc = proc;
347                 current->thread.request.u.cb.arg = arg;
348                 os_usr1_process(os_getpid());
349                 change_sig(SIGUSR1, 1);
350
351                 change_sig(SIGUSR1, 0);
352         }
353 }
354
355 int do_proc_op(void *t, int proc_id)
356 {
357         struct task_struct *task;
358         struct thread_struct *thread;
359         int op, pid;
360
361         task = t;
362         thread = &task->thread;
363         op = thread->request.op;
364         switch(op){
365         case OP_NONE:
366         case OP_TRACE_ON:
367                 break;
368         case OP_EXEC:
369                 pid = thread->request.u.exec.pid;
370                 do_exec(thread->mode.tt.extern_pid, pid);
371                 thread->mode.tt.extern_pid = pid;
372                 cpu_tasks[task->thread_info->cpu].pid = pid;
373                 break;
374         case OP_FORK:
375                 attach_process(thread->request.u.fork.pid);
376                 break;
377         case OP_CB:
378                 (*thread->request.u.cb.proc)(thread->request.u.cb.arg);
379                 break;
380         case OP_REBOOT:
381         case OP_HALT:
382                 break;
383         default:
384                 tracer_panic("Bad op in do_proc_op");
385                 break;
386         }
387         thread->request.op = OP_NONE;
388         return(op);
389 }
390
391 void init_idle_tt(void)
392 {
393         default_idle();
394 }
395
396 /* Changed by jail_setup, which is a setup */
397 int jail = 0;
398
399 int __init jail_setup(char *line, int *add)
400 {
401         int ok = 1;
402
403         if(jail) return(0);
404 #ifdef CONFIG_SMP
405         printf("'jail' may not used used in a kernel with CONFIG_SMP "
406                "enabled\n");
407         ok = 0;
408 #endif
409 #ifdef CONFIG_HOSTFS
410         printf("'jail' may not used used in a kernel with CONFIG_HOSTFS "
411                "enabled\n");
412         ok = 0;
413 #endif
414 #ifdef CONFIG_MODULES
415         printf("'jail' may not used used in a kernel with CONFIG_MODULES "
416                "enabled\n");
417         ok = 0;
418 #endif  
419         if(!ok) exit(1);
420
421         /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem.
422          * Removing it from the bounding set eliminates the ability of anything
423          * to acquire it, and thus read or write kernel memory.
424          */
425         cap_lower(cap_bset, CAP_SYS_RAWIO);
426         jail = 1;
427         return(0);
428 }
429
430 __uml_setup("jail", jail_setup,
431 "jail\n"
432 "    Enables the protection of kernel memory from processes.\n\n"
433 );
434
435 static void mprotect_kernel_mem(int w)
436 {
437         unsigned long start, end;
438         int pages;
439
440         if(!jail || (current == &init_task)) return;
441
442         pages = (1 << CONFIG_KERNEL_STACK_ORDER);
443
444         start = (unsigned long) current_thread + PAGE_SIZE;
445         end = (unsigned long) current_thread + PAGE_SIZE * pages;
446         protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1);
447         protect_memory(end, high_physmem - end, 1, w, 1, 1);
448
449         start = (unsigned long) UML_ROUND_DOWN(&_stext);
450         end = (unsigned long) UML_ROUND_UP(&_etext);
451         protect_memory(start, end - start, 1, w, 1, 1);
452
453         start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end);
454         end = (unsigned long) UML_ROUND_UP(&_edata);
455         protect_memory(start, end - start, 1, w, 1, 1);
456
457         start = (unsigned long) UML_ROUND_DOWN(&__bss_start);
458         end = (unsigned long) UML_ROUND_UP(brk_start);
459         protect_memory(start, end - start, 1, w, 1, 1);
460
461         mprotect_kernel_vm(w);
462 }
463
464 void unprotect_kernel_mem(void)
465 {
466         mprotect_kernel_mem(1);
467 }
468
469 void protect_kernel_mem(void)
470 {
471         mprotect_kernel_mem(0);
472 }
473
474 extern void start_kernel(void);
475
476 static int start_kernel_proc(void *unused)
477 {
478         int pid;
479
480         block_signals();
481         pid = os_getpid();
482
483         cpu_tasks[0].pid = pid;
484         cpu_tasks[0].task = current;
485 #ifdef CONFIG_SMP
486         cpu_online_map = cpumask_of_cpu(0);
487 #endif
488         if(debug) os_stop_process(pid);
489         start_kernel();
490         return(0);
491 }
492
493 void set_tracing(void *task, int tracing)
494 {
495         ((struct task_struct *) task)->thread.mode.tt.tracing = tracing;
496 }
497
498 int is_tracing(void *t)
499 {
500         return (((struct task_struct *) t)->thread.mode.tt.tracing);
501 }
502
503 int set_user_mode(void *t)
504 {
505         struct task_struct *task;
506
507         task = t ? t : current;
508         if(task->thread.mode.tt.tracing) 
509                 return(1);
510         task->thread.request.op = OP_TRACE_ON;
511         os_usr1_process(os_getpid());
512         return(0);
513 }
514
515 void set_init_pid(int pid)
516 {
517         int err;
518
519         init_task.thread.mode.tt.extern_pid = pid;
520         err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1);
521         if(err)
522                 panic("Can't create switch pipe for init_task, errno = %d",
523                       -err);
524 }
525
526 int start_uml_tt(void)
527 {
528         void *sp;
529         int pages;
530
531         pages = (1 << CONFIG_KERNEL_STACK_ORDER);
532         sp = (void *) ((unsigned long) init_task.thread_info) +
533                 pages * PAGE_SIZE - sizeof(unsigned long);
534         return(tracer(start_kernel_proc, sp));
535 }
536
537 int external_pid_tt(struct task_struct *task)
538 {
539         return(task->thread.mode.tt.extern_pid);
540 }
541
542 int thread_pid_tt(struct task_struct *task)
543 {
544         return(task->thread.mode.tt.extern_pid);
545 }
546
547 int is_valid_pid(int pid)
548 {
549         struct task_struct *task;
550
551         read_lock(&tasklist_lock);
552         for_each_process(task){
553                 if(task->thread.mode.tt.extern_pid == pid){
554                         read_unlock(&tasklist_lock);
555                         return(1);
556                 }
557         }
558         read_unlock(&tasklist_lock);
559         return(0);
560 }
561
562 /*
563  * Overrides for Emacs so that we follow Linus's tabbing style.
564  * Emacs will notice this stuff at the end of the file and automatically
565  * adjust the settings for this buffer only.  This must remain at the end
566  * of the file.
567  * ---------------------------------------------------------------------------
568  * Local variables:
569  * c-file-style: "linux"
570  * End:
571  */