ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / arch / um / kernel / tt / process_kern.c
1 /* 
2  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3  * Licensed under the GPL
4  */
5
6 #include "linux/sched.h"
7 #include "linux/signal.h"
8 #include "linux/kernel.h"
9 #include "linux/interrupt.h"
10 #include "linux/ptrace.h"
11 #include "asm/system.h"
12 #include "asm/pgalloc.h"
13 #include "asm/ptrace.h"
14 #include "asm/tlbflush.h"
15 #include "irq_user.h"
16 #include "signal_user.h"
17 #include "kern_util.h"
18 #include "user_util.h"
19 #include "os.h"
20 #include "kern.h"
21 #include "sigcontext.h"
22 #include "time_user.h"
23 #include "mem_user.h"
24 #include "tlb.h"
25 #include "mode.h"
26 #include "init.h"
27 #include "tt.h"
28
29 void *switch_to_tt(void *prev, void *next, void *last)
30 {
31         struct task_struct *from, *to;
32         unsigned long flags;
33         int err, vtalrm, alrm, prof, cpu;
34         char c;
35         /* jailing and SMP are incompatible, so this doesn't need to be 
36          * made per-cpu 
37          */
38         static int reading;
39
40         from = prev;
41         to = next;
42
43         to->thread.prev_sched = from;
44
45         cpu = from->thread_info->cpu;
46         if(cpu == 0)
47                 forward_interrupts(to->thread.mode.tt.extern_pid);
48 #ifdef CONFIG_SMP
49         forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid);
50 #endif
51         local_irq_save(flags);
52
53         vtalrm = change_sig(SIGVTALRM, 0);
54         alrm = change_sig(SIGALRM, 0);
55         prof = change_sig(SIGPROF, 0);
56
57         forward_pending_sigio(to->thread.mode.tt.extern_pid);
58
59         c = 0;
60         set_current(to);
61
62         reading = 0;
63         err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
64         if(err != sizeof(c))
65                 panic("write of switch_pipe failed, errno = %d", -err);
66
67         reading = 1;
68         if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD))
69                 os_kill_process(os_getpid(), 0);
70
71         err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
72         if(err != sizeof(c))
73                 panic("read of switch_pipe failed, errno = %d", -err);
74
75         /* This works around a nasty race with 'jail'.  If we are switching
76          * between two threads of a threaded app and the incoming process 
77          * runs before the outgoing process reaches the read, and it makes
78          * it all the way out to userspace, then it will have write-protected 
79          * the outgoing process stack.  Then, when the outgoing process 
80          * returns from the write, it will segfault because it can no longer
81          * write its own stack.  So, in order to avoid that, the incoming 
82          * thread sits in a loop yielding until 'reading' is set.  This 
83          * isn't entirely safe, since there may be a reschedule from a timer
84          * happening between setting 'reading' and sleeping in read.  But,
85          * it should get a whole quantum in which to reach the read and sleep,
86          * which should be enough.
87          */
88
89         if(jail){
90                 while(!reading) sched_yield();
91         }
92
93         change_sig(SIGVTALRM, vtalrm);
94         change_sig(SIGALRM, alrm);
95         change_sig(SIGPROF, prof);
96
97         arch_switch();
98
99         flush_tlb_all();
100         local_irq_restore(flags);
101
102         return(current->thread.prev_sched);
103 }
104
105 void release_thread_tt(struct task_struct *task)
106 {
107         os_kill_process(task->thread.mode.tt.extern_pid, 0);
108 }
109
110 void exit_thread_tt(void)
111 {
112         close(current->thread.mode.tt.switch_pipe[0]);
113         close(current->thread.mode.tt.switch_pipe[1]);
114 }
115
116 void schedule_tail(task_t *prev);
117
118 static void new_thread_handler(int sig)
119 {
120         int (*fn)(void *);
121         void *arg;
122
123         fn = current->thread.request.u.thread.proc;
124         arg = current->thread.request.u.thread.arg;
125         UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
126         suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
127
128         block_signals();
129         init_new_thread_signals(1);
130 #ifdef CONFIG_SMP
131         schedule_tail(current->thread.prev_sched);
132 #endif
133         enable_timer();
134         free_page(current->thread.temp_stack);
135         set_cmdline("(kernel thread)");
136         force_flush_all();
137
138         current->thread.prev_sched = NULL;
139         change_sig(SIGUSR1, 1);
140         change_sig(SIGVTALRM, 1);
141         change_sig(SIGPROF, 1);
142         unblock_signals();
143         if(!run_kernel_thread(fn, arg, &current->thread.exec_buf))
144                 do_exit(0);
145 }
146
147 static int new_thread_proc(void *stack)
148 {
149         init_new_thread_stack(stack, new_thread_handler);
150         os_usr1_process(os_getpid());
151         return(0);
152 }
153
154 /* Signal masking - signals are blocked at the start of fork_tramp.  They
155  * are re-enabled when finish_fork_handler is entered by fork_tramp hitting
156  * itself with a SIGUSR1.  set_user_mode has to be run with SIGUSR1 off,
157  * so it is blocked before it's called.  They are re-enabled on sigreturn
158  * despite the fact that they were blocked when the SIGUSR1 was issued because
159  * copy_thread copies the parent's signcontext, including the signal mask
160  * onto the signal frame.
161  */
162
163 void finish_fork_handler(int sig)
164 {
165         UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
166         suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
167
168 #ifdef CONFIG_SMP       
169         schedule_tail(NULL);
170 #endif
171         enable_timer();
172         change_sig(SIGVTALRM, 1);
173         local_irq_enable();
174         force_flush_all();
175         if(current->mm != current->parent->mm)
176                 protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 
177                                1, 0, 1);
178         task_protections((unsigned long) current->thread_info);
179
180         current->thread.prev_sched = NULL;
181
182         free_page(current->thread.temp_stack);
183         change_sig(SIGUSR1, 0);
184         set_user_mode(current);
185 }
186
187 static int sigusr1 = SIGUSR1;
188
189 int fork_tramp(void *stack)
190 {
191         int sig = sigusr1;
192
193         local_irq_disable();
194         init_new_thread_stack(stack, finish_fork_handler);
195
196         kill(os_getpid(), sig);
197         return(0);
198 }
199
200 int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp,
201                    unsigned long stack_top, struct task_struct * p, 
202                    struct pt_regs *regs)
203 {
204         int (*tramp)(void *);
205         int new_pid, err;
206         unsigned long stack;
207         
208         if(current->thread.forking)
209                 tramp = fork_tramp;
210         else {
211                 tramp = new_thread_proc;
212                 p->thread.request.u.thread = current->thread.request.u.thread;
213         }
214
215         err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1);
216         if(err){
217                 printk("copy_thread : pipe failed, errno = %d\n", -err);
218                 return(err);
219         }
220
221         stack = alloc_stack(0, 0);
222         if(stack == 0){
223                 printk(KERN_ERR "copy_thread : failed to allocate "
224                        "temporary stack\n");
225                 return(-ENOMEM);
226         }
227
228         clone_flags &= CLONE_VM;
229         p->thread.temp_stack = stack;
230         new_pid = start_fork_tramp((void *) p->thread.kernel_stack, stack,
231                                    clone_flags, tramp);
232         if(new_pid < 0){
233                 printk(KERN_ERR "copy_thread : clone failed - errno = %d\n", 
234                        -new_pid);
235                 return(new_pid);
236         }
237
238         if(current->thread.forking){
239                 sc_to_sc(UPT_SC(&p->thread.regs.regs), 
240                          UPT_SC(&current->thread.regs.regs));
241                 SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0);
242                 if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp;
243         }
244         p->thread.mode.tt.extern_pid = new_pid;
245
246         current->thread.request.op = OP_FORK;
247         current->thread.request.u.fork.pid = new_pid;
248         os_usr1_process(os_getpid());
249         return(0);
250 }
251
252 void reboot_tt(void)
253 {
254         current->thread.request.op = OP_REBOOT;
255         os_usr1_process(os_getpid());
256 }
257
258 void halt_tt(void)
259 {
260         current->thread.request.op = OP_HALT;
261         os_usr1_process(os_getpid());
262 }
263
264 void kill_off_processes_tt(void)
265 {
266         struct task_struct *p;
267         int me;
268
269         me = os_getpid();
270         for_each_process(p){
271                 if(p->thread.mode.tt.extern_pid != me) 
272                         os_kill_process(p->thread.mode.tt.extern_pid, 0);
273         }
274         if(init_task.thread.mode.tt.extern_pid != me) 
275                 os_kill_process(init_task.thread.mode.tt.extern_pid, 0);
276 }
277
278 void initial_thread_cb_tt(void (*proc)(void *), void *arg)
279 {
280         if(os_getpid() == tracing_pid){
281                 (*proc)(arg);
282         }
283         else {
284                 current->thread.request.op = OP_CB;
285                 current->thread.request.u.cb.proc = proc;
286                 current->thread.request.u.cb.arg = arg;
287                 os_usr1_process(os_getpid());
288         }
289 }
290
291 int do_proc_op(void *t, int proc_id)
292 {
293         struct task_struct *task;
294         struct thread_struct *thread;
295         int op, pid;
296
297         task = t;
298         thread = &task->thread;
299         op = thread->request.op;
300         switch(op){
301         case OP_NONE:
302         case OP_TRACE_ON:
303                 break;
304         case OP_EXEC:
305                 pid = thread->request.u.exec.pid;
306                 do_exec(thread->mode.tt.extern_pid, pid);
307                 thread->mode.tt.extern_pid = pid;
308                 cpu_tasks[task->thread_info->cpu].pid = pid;
309                 break;
310         case OP_FORK:
311                 attach_process(thread->request.u.fork.pid);
312                 break;
313         case OP_CB:
314                 (*thread->request.u.cb.proc)(thread->request.u.cb.arg);
315                 break;
316         case OP_REBOOT:
317         case OP_HALT:
318                 break;
319         default:
320                 tracer_panic("Bad op in do_proc_op");
321                 break;
322         }
323         thread->request.op = OP_NONE;
324         return(op);
325 }
326
327 void init_idle_tt(void)
328 {
329         default_idle();
330 }
331
332 /* Changed by jail_setup, which is a setup */
333 int jail = 0;
334
335 int __init jail_setup(char *line, int *add)
336 {
337         int ok = 1;
338
339         if(jail) return(0);
340 #ifdef CONFIG_SMP
341         printf("'jail' may not used used in a kernel with CONFIG_SMP "
342                "enabled\n");
343         ok = 0;
344 #endif
345 #ifdef CONFIG_HOSTFS
346         printf("'jail' may not used used in a kernel with CONFIG_HOSTFS "
347                "enabled\n");
348         ok = 0;
349 #endif
350 #ifdef CONFIG_MODULES
351         printf("'jail' may not used used in a kernel with CONFIG_MODULES "
352                "enabled\n");
353         ok = 0;
354 #endif  
355         if(!ok) exit(1);
356
357         /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem.
358          * Removing it from the bounding set eliminates the ability of anything
359          * to acquire it, and thus read or write kernel memory.
360          */
361         cap_lower(cap_bset, CAP_SYS_RAWIO);
362         jail = 1;
363         return(0);
364 }
365
366 __uml_setup("jail", jail_setup,
367 "jail\n"
368 "    Enables the protection of kernel memory from processes.\n\n"
369 );
370
371 static void mprotect_kernel_mem(int w)
372 {
373         unsigned long start, end;
374         int pages;
375
376         if(!jail || (current == &init_task)) return;
377
378         pages = (1 << CONFIG_KERNEL_STACK_ORDER);
379
380         start = (unsigned long) current->thread_info + PAGE_SIZE;
381         end = (unsigned long) current + PAGE_SIZE * pages;
382         protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1);
383         protect_memory(end, high_physmem - end, 1, w, 1, 1);
384
385         start = (unsigned long) UML_ROUND_DOWN(&_stext);
386         end = (unsigned long) UML_ROUND_UP(&_etext);
387         protect_memory(start, end - start, 1, w, 1, 1);
388
389         start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end);
390         end = (unsigned long) UML_ROUND_UP(&_edata);
391         protect_memory(start, end - start, 1, w, 1, 1);
392
393         start = (unsigned long) UML_ROUND_DOWN(&__bss_start);
394         end = (unsigned long) UML_ROUND_UP(brk_start);
395         protect_memory(start, end - start, 1, w, 1, 1);
396
397         mprotect_kernel_vm(w);
398 }
399
400 void unprotect_kernel_mem(void)
401 {
402         mprotect_kernel_mem(1);
403 }
404
405 void protect_kernel_mem(void)
406 {
407         mprotect_kernel_mem(0);
408 }
409
410 extern void start_kernel(void);
411
412 static int start_kernel_proc(void *unused)
413 {
414         int pid;
415
416         block_signals();
417         pid = os_getpid();
418
419         cpu_tasks[0].pid = pid;
420         cpu_tasks[0].task = current;
421 #ifdef CONFIG_SMP
422         cpu_online_map = cpumask_of_cpu(0);
423 #endif
424         if(debug) os_stop_process(pid);
425         start_kernel();
426         return(0);
427 }
428
429 void set_tracing(void *task, int tracing)
430 {
431         ((struct task_struct *) task)->thread.mode.tt.tracing = tracing;
432 }
433
434 int is_tracing(void *t)
435 {
436         return (((struct task_struct *) t)->thread.mode.tt.tracing);
437 }
438
439 int set_user_mode(void *t)
440 {
441         struct task_struct *task;
442
443         task = t ? t : current;
444         if(task->thread.mode.tt.tracing) 
445                 return(1);
446         task->thread.request.op = OP_TRACE_ON;
447         os_usr1_process(os_getpid());
448         return(0);
449 }
450
451 void set_init_pid(int pid)
452 {
453         int err;
454
455         init_task.thread.mode.tt.extern_pid = pid;
456         err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1);
457         if(err) panic("Can't create switch pipe for init_task, errno = %d", 
458                       err);
459 }
460
461 int singlestepping_tt(void *t)
462 {
463         struct task_struct *task = t;
464
465         if(task->thread.mode.tt.singlestep_syscall)
466                 return(0);
467         return(task->ptrace & PT_DTRACE);
468 }
469
470 void clear_singlestep(void *t)
471 {
472         struct task_struct *task = t;
473
474         task->ptrace &= ~PT_DTRACE;
475 }
476
477 int start_uml_tt(void)
478 {
479         void *sp;
480         int pages;
481
482         pages = (1 << CONFIG_KERNEL_STACK_ORDER) - 2;
483         sp = (void *) init_task.thread.kernel_stack + pages * PAGE_SIZE - 
484                 sizeof(unsigned long);
485         return(tracer(start_kernel_proc, sp));
486 }
487
488 int external_pid_tt(struct task_struct *task)
489 {
490         return(task->thread.mode.tt.extern_pid);
491 }
492
493 int thread_pid_tt(struct task_struct *task)
494 {
495         return(task->thread.mode.tt.extern_pid);
496 }
497
498 int is_valid_pid(int pid)
499 {
500         struct task_struct *task;
501
502         read_lock(&tasklist_lock);
503         for_each_process(task){
504                 if(task->thread.mode.tt.extern_pid == pid){
505                         read_unlock(&tasklist_lock);
506                         return(1);
507                 }
508         }
509         read_unlock(&tasklist_lock);
510         return(0);
511 }
512
513 /*
514  * Overrides for Emacs so that we follow Linus's tabbing style.
515  * Emacs will notice this stuff at the end of the file and automatically
516  * adjust the settings for this buffer only.  This must remain at the end
517  * of the file.
518  * ---------------------------------------------------------------------------
519  * Local variables:
520  * c-file-style: "linux"
521  * End:
522  */