vserver 1.9.5.x5
[linux-2.6.git] / arch / um / kernel / tt / tracer.c
1 /* 
2  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3  * Licensed under the GPL
4  */
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdarg.h>
9 #include <unistd.h>
10 #include <signal.h>
11 #include <errno.h>
12 #include <sched.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <sys/ptrace.h>
16 #include <linux/ptrace.h>
17 #include <sys/time.h>
18 #include <sys/wait.h>
19 #include "user.h"
20 #include "sysdep/ptrace.h"
21 #include "sigcontext.h"
22 #include "sysdep/sigcontext.h"
23 #include "os.h"
24 #include "signal_user.h"
25 #include "user_util.h"
26 #include "mem_user.h"
27 #include "process.h"
28 #include "kern_util.h"
29 #include "chan_user.h"
30 #include "ptrace_user.h"
31 #include "mode.h"
32 #include "tt.h"
33
34 static int tracer_winch[2];
35
36 int is_tracer_winch(int pid, int fd, void *data)
37 {
38         if(pid != tracing_pid)
39                 return(0);
40
41         register_winch_irq(tracer_winch[0], fd, -1, data);
42         return(1);
43 }
44
45 static void tracer_winch_handler(int sig)
46 {
47         int n;
48         char c = 1;
49
50         n = os_write_file(tracer_winch[1], &c, sizeof(c));
51         if(n != sizeof(c))
52                 printk("tracer_winch_handler - write failed, err = %d\n", -n);
53 }
54
55 /* Called only by the tracing thread during initialization */
56
57 static void setup_tracer_winch(void)
58 {
59         int err;
60
61         err = os_pipe(tracer_winch, 1, 1);
62         if(err < 0){
63                 printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err);
64                 return;
65         }
66         signal(SIGWINCH, tracer_winch_handler);
67 }
68
69 void attach_process(int pid)
70 {
71         if((ptrace(PTRACE_ATTACH, pid, 0, 0) < 0) ||
72            (ptrace(PTRACE_CONT, pid, 0, 0) < 0))
73                 tracer_panic("OP_FORK failed to attach pid");
74         wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL);
75         if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0)
76                 tracer_panic("OP_FORK: PTRACE_SETOPTIONS failed, errno = %d", errno);
77         if(ptrace(PTRACE_CONT, pid, 0, 0) < 0)
78                 tracer_panic("OP_FORK failed to continue process");
79 }
80
81 void tracer_panic(char *format, ...)
82 {
83         va_list ap;
84
85         va_start(ap, format);
86         vprintf(format, ap);
87         va_end(ap);
88         printf("\n");
89         while(1) pause();
90 }
91
92 static void tracer_segv(int sig, struct sigcontext sc)
93 {
94         printf("Tracing thread segfault at address 0x%lx, ip 0x%lx\n",
95                SC_FAULT_ADDR(&sc), SC_IP(&sc));
96         while(1)
97                 pause();
98 }
99
100 /* Changed early in boot, and then only read */
101 int debug = 0;
102 int debug_stop = 1;
103 int debug_parent = 0;
104 int honeypot = 0;
105
106 static int signal_tramp(void *arg)
107 {
108         int (*proc)(void *);
109
110         if(honeypot && munmap((void *) (host_task_size - 0x10000000),
111                               0x10000000)) 
112                 panic("Unmapping stack failed");
113         if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0)
114                 panic("ptrace PTRACE_TRACEME failed");
115         os_stop_process(os_getpid());
116         change_sig(SIGWINCH, 0);
117         signal(SIGUSR1, SIG_IGN);
118         change_sig(SIGCHLD, 0);
119         signal(SIGSEGV, (__sighandler_t) sig_handler);
120         set_cmdline("(idle thread)");
121         set_init_pid(os_getpid());
122         proc = arg;
123         return((*proc)(NULL));
124 }
125
126 static void sleeping_process_signal(int pid, int sig)
127 {
128         switch(sig){
129         /* These two result from UML being ^Z-ed and bg-ed.  PTRACE_CONT is
130          * right because the process must be in the kernel already.
131          */
132         case SIGCONT:
133         case SIGTSTP:
134                 if(ptrace(PTRACE_CONT, pid, 0, sig) < 0)
135                         tracer_panic("sleeping_process_signal : Failed to "
136                                      "continue pid %d, signal = %d, "
137                                      "errno = %d\n", pid, sig, errno);
138                 break;
139
140         /* This happens when the debugger (e.g. strace) is doing system call 
141          * tracing on the kernel.  During a context switch, the current task
142          * will be set to the incoming process and the outgoing process will
143          * hop into write and then read.  Since it's not the current process
144          * any more, the trace of those will land here.  So, we need to just 
145          * PTRACE_SYSCALL it.
146          */
147         case (SIGTRAP + 0x80):
148                 if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
149                         tracer_panic("sleeping_process_signal : Failed to "
150                                      "PTRACE_SYSCALL pid %d, errno = %d\n",
151                                      pid, errno);
152                 break;
153         case SIGSTOP:
154                 break;
155         default:
156                 tracer_panic("sleeping process %d got unexpected "
157                              "signal : %d\n", pid, sig);
158                 break;
159         }
160 }
161
162 /* Accessed only by the tracing thread */
163 int debugger_pid = -1;
164 int debugger_parent = -1;
165 int debugger_fd = -1;
166 int gdb_pid = -1;
167
168 struct {
169         int pid;
170         int signal;
171         unsigned long addr;
172         struct timeval time;
173 } signal_record[1024][32];
174
175 int signal_index[32];
176 int nsignals = 0;
177 int debug_trace = 0;
178 extern int io_nsignals, io_count, intr_count;
179
180 extern void signal_usr1(int sig);
181
182 int tracing_pid = -1;
183
184 int tracer(int (*init_proc)(void *), void *sp)
185 {
186         void *task = NULL;
187         unsigned long eip = 0;
188         int status, pid = 0, sig = 0, cont_type, tracing = 0, op = 0;
189         int last_index, proc_id = 0, n, err, old_tracing = 0, strace = 0;
190         int local_using_sysemu = 0;
191
192         signal(SIGPIPE, SIG_IGN);
193         setup_tracer_winch();
194         tracing_pid = os_getpid();
195         printf("tracing thread pid = %d\n", tracing_pid);
196
197         pid = clone(signal_tramp, sp, CLONE_FILES | SIGCHLD, init_proc);
198         CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
199         if(n < 0){
200                 printf("waitpid on idle thread failed, errno = %d\n", errno);
201                 exit(1);
202         }
203         if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) {
204                 printf("Failed to PTRACE_SETOPTIONS for idle thread, errno = %d\n", errno);
205                 exit(1);
206         }
207         if((ptrace(PTRACE_CONT, pid, 0, 0) < 0)){
208                 printf("Failed to continue idle thread, errno = %d\n", errno);
209                 exit(1);
210         }
211
212         signal(SIGSEGV, (sighandler_t) tracer_segv);
213         signal(SIGUSR1, signal_usr1);
214         if(debug_trace){
215                 printf("Tracing thread pausing to be attached\n");
216                 stop();
217         }
218         if(debug){
219                 if(gdb_pid != -1) 
220                         debugger_pid = attach_debugger(pid, gdb_pid, 1);
221                 else debugger_pid = init_ptrace_proxy(pid, 1, debug_stop);
222                 if(debug_parent){
223                         debugger_parent = os_process_parent(debugger_pid);
224                         init_parent_proxy(debugger_parent);
225                         err = attach(debugger_parent);
226                         if(err){
227                                 printf("Failed to attach debugger parent %d, "
228                                        "errno = %d\n", debugger_parent, -err);
229                                 debugger_parent = -1;
230                         }
231                         else {
232                                 if(ptrace(PTRACE_SYSCALL, debugger_parent, 
233                                           0, 0) < 0){
234                                         printf("Failed to continue debugger "
235                                                "parent, errno = %d\n", errno);
236                                         debugger_parent = -1;
237                                 }
238                         }
239                 }
240         }
241         set_cmdline("(tracing thread)");
242         while(1){
243                 CATCH_EINTR(pid = waitpid(-1, &status, WUNTRACED));
244                 if(pid <= 0){
245                         if(errno != ECHILD){
246                                 printf("wait failed - errno = %d\n", errno);
247                         }
248                         continue;
249                 }
250                 if(pid == debugger_pid){
251                         int cont = 0;
252
253                         if(WIFEXITED(status) || WIFSIGNALED(status))
254                                 debugger_pid = -1;
255                         /* XXX Figure out how to deal with gdb and SMP */
256                         else cont = debugger_signal(status, cpu_tasks[0].pid);
257                         if(cont == PTRACE_SYSCALL) strace = 1;
258                         continue;
259                 }
260                 else if(pid == debugger_parent){
261                         debugger_parent_signal(status, pid);
262                         continue;
263                 }
264                 nsignals++;
265                 if(WIFEXITED(status)) ;
266 #ifdef notdef
267                 {
268                         printf("Child %d exited with status %d\n", pid, 
269                                WEXITSTATUS(status));
270                 }
271 #endif
272                 else if(WIFSIGNALED(status)){
273                         sig = WTERMSIG(status);
274                         if(sig != 9){
275                                 printf("Child %d exited with signal %d\n", pid,
276                                        sig);
277                         }
278                 }
279                 else if(WIFSTOPPED(status)){
280                         proc_id = pid_to_processor_id(pid);
281                         sig = WSTOPSIG(status);
282                         if(signal_index[proc_id] == 1024){
283                                 signal_index[proc_id] = 0;
284                                 last_index = 1023;
285                         }
286                         else last_index = signal_index[proc_id] - 1;
287                         if(((sig == SIGPROF) || (sig == SIGVTALRM) || 
288                             (sig == SIGALRM)) &&
289                            (signal_record[proc_id][last_index].signal == sig)&&
290                            (signal_record[proc_id][last_index].pid == pid))
291                                 signal_index[proc_id] = last_index;
292                         signal_record[proc_id][signal_index[proc_id]].pid = pid;
293                         gettimeofday(&signal_record[proc_id][signal_index[proc_id]].time, NULL);
294                         eip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0);
295                         signal_record[proc_id][signal_index[proc_id]].addr = eip;
296                         signal_record[proc_id][signal_index[proc_id]++].signal = sig;
297                         
298                         if(proc_id == -1){
299                                 sleeping_process_signal(pid, sig);
300                                 continue;
301                         }
302
303                         task = cpu_tasks[proc_id].task;
304                         tracing = is_tracing(task);
305                         old_tracing = tracing;
306
307                         /* Assume: no syscall, when coming from user */
308                         if ( tracing )
309                                 do_sigtrap(task);
310
311                         switch(sig){
312                         case SIGUSR1:
313                                 sig = 0;
314                                 op = do_proc_op(task, proc_id);
315                                 switch(op){
316                                 /*
317                                  * This is called when entering user mode; after
318                                  * this, we start intercepting syscalls.
319                                  *
320                                  * In fact, a process is started in kernel mode,
321                                  * so with is_tracing() == 0 (and that is reset
322                                  * when executing syscalls, since UML kernel has
323                                  * the right to do syscalls);
324                                  */
325                                 case OP_TRACE_ON:
326                                         arch_leave_kernel(task, pid);
327                                         tracing = 1;
328                                         break;
329                                 case OP_REBOOT:
330                                 case OP_HALT:
331                                         unmap_physmem();
332                                         kmalloc_ok = 0;
333                                         os_kill_ptraced_process(pid, 0);
334                                         /* Now let's reap remaining zombies */
335                                         errno = 0;
336                                         do {
337                                                 waitpid(-1, &status,
338                                                         WUNTRACED);
339                                         } while (errno != ECHILD);
340                                         return(op == OP_REBOOT);
341                                 case OP_NONE:
342                                         printf("Detaching pid %d\n", pid);
343                                         detach(pid, SIGSTOP);
344                                         continue;
345                                 default:
346                                         break;
347                                 }
348                                 /* OP_EXEC switches host processes on us,
349                                  * we want to continue the new one.
350                                  */
351                                 pid = cpu_tasks[proc_id].pid;
352                                 break;
353                         case (SIGTRAP + 0x80):
354                                 if(!tracing && (debugger_pid != -1)){
355                                         child_signal(pid, status & 0x7fff);
356                                         continue;
357                                 }
358                                 tracing = 0;
359                                 /* local_using_sysemu has been already set
360                                  * below, since if we are here, is_tracing() on
361                                  * the traced task was 1, i.e. the process had
362                                  * already run through one iteration of the
363                                  * loop which executed a OP_TRACE_ON request.*/
364                                 do_syscall(task, pid, local_using_sysemu);
365                                 sig = SIGUSR2;
366                                 break;
367                         case SIGTRAP:
368                                 if(!tracing && (debugger_pid != -1)){
369                                         child_signal(pid, status);
370                                         continue;
371                                 }
372                                 tracing = 0;
373                                 break;
374                         case SIGPROF:
375                                 if(tracing) sig = 0;
376                                 break;
377                         case SIGCHLD:
378                         case SIGHUP:
379                                 sig = 0;
380                                 break;
381                         case SIGSEGV:
382                         case SIGIO:
383                         case SIGALRM:
384                         case SIGVTALRM:
385                         case SIGFPE:
386                         case SIGBUS:
387                         case SIGILL:
388                         case SIGWINCH:
389
390                         default:
391                                 tracing = 0;
392                                 break;
393                         }
394                         set_tracing(task, tracing);
395
396                         if(!tracing && old_tracing)
397                                 arch_enter_kernel(task, pid);
398
399                         if(!tracing && (debugger_pid != -1) && (sig != 0) &&
400                                 (sig != SIGALRM) && (sig != SIGVTALRM) &&
401                                 (sig != SIGSEGV) && (sig != SIGTRAP) &&
402                                 (sig != SIGUSR2) && (sig != SIGIO) &&
403                                 (sig != SIGFPE)){
404                                 child_signal(pid, status);
405                                 continue;
406                         }
407
408                         local_using_sysemu = get_using_sysemu();
409
410                         if(tracing)
411                                 cont_type = SELECT_PTRACE_OPERATION(local_using_sysemu,
412                                                                     singlestepping(task));
413                         else if((debugger_pid != -1) && strace)
414                                 cont_type = PTRACE_SYSCALL;
415                         else
416                                 cont_type = PTRACE_CONT;
417
418                         if(ptrace(cont_type, pid, 0, sig) != 0){
419                                 tracer_panic("ptrace failed to continue "
420                                              "process - errno = %d\n", 
421                                              errno);
422                         }
423                 }
424         }
425         return(0);
426 }
427
428 static int __init uml_debug_setup(char *line, int *add)
429 {
430         char *next;
431
432         debug = 1;
433         *add = 0;
434         if(*line != '=') return(0);
435         line++;
436
437         while(line != NULL){
438                 next = strchr(line, ',');
439                 if(next) *next++ = '\0';
440                 
441                 if(!strcmp(line, "go")) debug_stop = 0;
442                 else if(!strcmp(line, "parent")) debug_parent = 1;
443                 else printf("Unknown debug option : '%s'\n", line);
444
445                 line = next;
446         }
447         return(0);
448 }
449
450 __uml_setup("debug", uml_debug_setup,
451 "debug\n"
452 "    Starts up the kernel under the control of gdb. See the \n"
453 "    kernel debugging tutorial and the debugging session pages\n"
454 "    at http://user-mode-linux.sourceforge.net/ for more information.\n\n"
455 );
456
457 static int __init uml_debugtrace_setup(char *line, int *add)
458 {
459         debug_trace = 1;
460         return 0;
461 }
462 __uml_setup("debugtrace", uml_debugtrace_setup,
463 "debugtrace\n"
464 "    Causes the tracing thread to pause until it is attached by a\n"
465 "    debugger and continued.  This is mostly for debugging crashes\n"
466 "    early during boot, and should be pretty much obsoleted by\n"
467 "    the debug switch.\n\n"
468 );
469
470 static int __init uml_honeypot_setup(char *line, int *add)
471 {
472         jail_setup("", add);
473         honeypot = 1;
474         return 0;
475 }
476 __uml_setup("honeypot", uml_honeypot_setup, 
477 "honeypot\n"
478 "    This makes UML put process stacks in the same location as they are\n"
479 "    on the host, allowing expoits such as stack smashes to work against\n"
480 "    UML.  This implies 'jail'.\n\n"
481 );
482
483 /*
484  * Overrides for Emacs so that we follow Linus's tabbing style.
485  * Emacs will notice this stuff at the end of the file and automatically
486  * adjust the settings for this buffer only.  This must remain at the end
487  * of the file.
488  * ---------------------------------------------------------------------------
489  * Local variables:
490  * c-file-style: "linux"
491  * End:
492  */