Removed .orig files
[linux-2.6.git] / linux-2.6-590-chopstix-intern.patch
1 diff -Nurb linux-2.6.22-580/arch/i386/Kconfig linux-2.6.22-590/arch/i386/Kconfig
2 --- linux-2.6.22-580/arch/i386/Kconfig  2008-03-21 18:07:23.000000000 -0400
3 +++ linux-2.6.22-590/arch/i386/Kconfig  2008-03-21 18:07:50.000000000 -0400
4 @@ -1217,6 +1217,14 @@
5  
6  source "arch/i386/oprofile/Kconfig"
7  
8 +config CHOPSTIX
9 +       bool "Chopstix (PlanetLab)"
10 +       depends on MODULES && OPROFILE
11 +       help
12 +         Chopstix allows you to monitor various events by summarizing them
13 +         in lossy data structures and transferring these data structures
14 +         into user space. If in doubt, say "N".
15 +
16  config KPROBES
17         bool "Kprobes (EXPERIMENTAL)"
18         depends on KALLSYMS && EXPERIMENTAL && MODULES
19 diff -Nurb linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drivers/oprofile/cpu_buffer.c
20 --- linux-2.6.22-580/drivers/oprofile/cpu_buffer.c      2007-07-08 19:32:17.000000000 -0400
21 +++ linux-2.6.22-590/drivers/oprofile/cpu_buffer.c      2008-03-21 18:07:50.000000000 -0400
22 @@ -21,6 +21,7 @@
23  #include <linux/oprofile.h>
24  #include <linux/vmalloc.h>
25  #include <linux/errno.h>
26 +#include <linux/arrays.h>
27   
28  #include "event_buffer.h"
29  #include "cpu_buffer.h"
30 @@ -143,6 +144,17 @@
31                 b->head_pos = 0;
32  }
33  
34 +#ifdef CONFIG_CHOPSTIX
35 +
36 +struct event_spec {
37 +       unsigned int pc;
38 +       unsigned long dcookie;
39 +       unsigned count;
40 +};
41 +
42 +extern void (*rec_event)(void *,unsigned int);
43 +#endif
44 +
45  static inline void
46  add_sample(struct oprofile_cpu_buffer * cpu_buf,
47             unsigned long pc, unsigned long event)
48 @@ -151,6 +163,7 @@
49         entry->eip = pc;
50         entry->event = event;
51         increment_head(cpu_buf);
52 +
53  }
54  
55  static inline void
56 @@ -237,12 +250,75 @@
57         oprofile_end_trace(cpu_buf);
58  }
59  
60 +#ifdef CONFIG_CHOPSTIX
61 +
62 +static int proc_pid_cmdline(struct task_struct *task, char * buffer)
63 +{
64 +       int res = 0;
65 +       unsigned int len;
66 +       struct mm_struct *mm = get_task_mm(task);
67 +       if (!mm)
68 +               goto out;
69 +       if (!mm->arg_end)
70 +               goto out_mm;    /* Shh! No looking before we're done */
71 +
72 +       len = mm->arg_end - mm->arg_start;
73
74 +       if (len > PAGE_SIZE)
75 +               len = PAGE_SIZE;
76
77 +       res = access_process_vm(task, mm->arg_start, buffer, len, 0);
78 +
79 +       // If the nul at the end of args has been overwritten, then
80 +       // assume application is using setproctitle(3).
81 +       if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
82 +               len = strnlen(buffer, res);
83 +               if (len < res) {
84 +                   res = len;
85 +               } else {
86 +                       len = mm->env_end - mm->env_start;
87 +                       if (len > PAGE_SIZE - res)
88 +                               len = PAGE_SIZE - res;
89 +                       res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
90 +                       res = strnlen(buffer, res);
91 +               }
92 +       }
93 +out_mm:
94 +       mmput(mm);
95 +out:
96 +       return res;
97 +}
98 +#endif
99 +
100 +
101 +
102  void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
103  {
104         int is_kernel = !user_mode(regs);
105         unsigned long pc = profile_pc(regs);
106 +       int res=0;
107  
108 +#ifdef CONFIG_CHOPSTIX
109 +       if (rec_event) {
110 +               struct event esig;
111 +               struct event_spec espec;
112 +               /*res = proc_pid_cmdline(current, espec->appname);*/
113 +               esig.task = current;
114 +               espec.pc=pc;
115 +               espec.count=1;
116 +               esig.event_data=&espec;
117 +               esig.event_type=event; /* index in the event array currently set up */
118 +                                       /* make sure the counters are loaded in the order we want them to show up*/ 
119 +               (*rec_event)(&esig, 1);
120 +       }
121 +       else {
122         oprofile_add_ext_sample(pc, regs, event, is_kernel);
123 +       }
124 +#else
125 +       oprofile_add_ext_sample(pc, regs, event, is_kernel);
126 +#endif
127 +
128 +
129  }
130  
131  void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
132 diff -Nurb linux-2.6.22-580/fs/exec.c linux-2.6.22-590/fs/exec.c
133 --- linux-2.6.22-580/fs/exec.c  2008-03-21 18:07:24.000000000 -0400
134 +++ linux-2.6.22-590/fs/exec.c  2008-03-21 18:07:50.000000000 -0400
135 @@ -52,6 +52,7 @@
136  #include <linux/audit.h>
137  #include <linux/signalfd.h>
138  #include <linux/vs_memory.h>
139 +#include <linux/dcookies.h>
140  
141  #include <asm/uaccess.h>
142  #include <asm/mmu_context.h>
143 @@ -488,6 +489,12 @@
144  
145         if (!err) {
146                 struct inode *inode = nd.dentry->d_inode;
147 +#ifdef CONFIG_CHOPSTIX
148 +               unsigned long cookie;
149 +               if (!nd.dentry->d_cookie)
150 +                       get_dcookie(nd.dentry, nd.mnt, &cookie);
151 +#endif
152 +
153                 file = ERR_PTR(-EACCES);
154                 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
155                     S_ISREG(inode->i_mode)) {
156 diff -Nurb linux-2.6.22-580/include/linux/arrays.h linux-2.6.22-590/include/linux/arrays.h
157 --- linux-2.6.22-580/include/linux/arrays.h     1969-12-31 19:00:00.000000000 -0500
158 +++ linux-2.6.22-590/include/linux/arrays.h     2008-03-21 18:07:50.000000000 -0400
159 @@ -0,0 +1,35 @@
160 +#ifndef __ARRAYS_H__
161 +#define __ARRAYS_H__
162 +#include <linux/list.h>
163 +
164 +#define SAMPLING_METHOD_DEFAULT 0
165 +#define SAMPLING_METHOD_LOG 1
166 +
167 +/* Every probe has an array handler */
168 +
169 +/* XXX - Optimize this structure */
170 +
171 +struct array_handler {
172 +       struct list_head link;
173 +       unsigned int (*hash_func)(void *);
174 +       unsigned int (*sampling_func)(void *,int,void *);
175 +       unsigned short size;
176 +       unsigned int threshold;
177 +       unsigned char **expcount;
178 +       unsigned int sampling_method;
179 +       unsigned int **arrays;
180 +       unsigned int arraysize;
181 +       unsigned int num_samples[2];
182 +       void **epoch_samples; /* size-sized lists of samples */
183 +       unsigned int (*serialize)(void *, void *);
184 +       unsigned char code[5];
185 +};
186 +
187 +struct event {
188 +       struct list_head link;
189 +       void *event_data;
190 +       unsigned int count;
191 +       unsigned int event_type;
192 +       struct task_struct *task;
193 +};
194 +#endif
195 diff -Nurb linux-2.6.22-580/include/linux/sched.h linux-2.6.22-590/include/linux/sched.h
196 --- linux-2.6.22-580/include/linux/sched.h      2008-03-21 18:07:27.000000000 -0400
197 +++ linux-2.6.22-590/include/linux/sched.h      2008-03-24 15:32:53.000000000 -0400
198 @@ -850,6 +850,10 @@
199  #endif
200         unsigned long sleep_avg;
201         unsigned long long timestamp, last_ran;
202 +#ifdef CONFIG_CHOPSTIX
203 +       unsigned long last_interrupted, last_ran_j;
204 +#endif
205 +
206         unsigned long long sched_time; /* sched_clock time spent running */
207         enum sleep_type sleep_type;
208  
209 diff -Nurb linux-2.6.22-580/kernel/fork.c linux-2.6.22-590/kernel/fork.c
210 --- linux-2.6.22-580/kernel/fork.c      2008-03-21 18:07:28.000000000 -0400
211 +++ linux-2.6.22-590/kernel/fork.c      2008-03-21 18:07:50.000000000 -0400
212 @@ -197,6 +197,11 @@
213         tsk->btrace_seq = 0;
214  #endif
215         tsk->splice_pipe = NULL;
216 +       //tsk->cmdline[0]='\0';
217 +#ifdef CONFIG_CHOPSTIX
218 +       tsk->last_interrupted = 0;
219 +       tsk->last_ran_j = 0;
220 +#endif
221         return tsk;
222  }
223  
224 diff -Nurb linux-2.6.22-580/kernel/sched.c linux-2.6.22-590/kernel/sched.c
225 --- linux-2.6.22-580/kernel/sched.c     2008-03-21 18:07:28.000000000 -0400
226 +++ linux-2.6.22-590/kernel/sched.c     2008-03-21 18:07:50.000000000 -0400
227 @@ -10,7 +10,7 @@
228   *  1998-11-19 Implemented schedule_timeout() and related stuff
229   *             by Andrea Arcangeli
230   *  2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
231 - *             hybrid priority-list and round-robin design with
232 + *             hybrid priority-list and round-robin deventn with
233   *             an array-switch method of distributing timeslices
234   *             and per-CPU runqueues.  Cleanups and useful suggestions
235   *             by Davide Libenzi, preemptible kernel bits by Robert Love.
236 @@ -56,6 +56,7 @@
237  
238  #include <asm/tlb.h>
239  #include <asm/unistd.h>
240 +#include <linux/arrays.h>
241  #include <linux/vs_sched.h>
242  #include <linux/vs_cvirt.h>
243  
244 @@ -3608,6 +3609,7 @@
245  
246  #endif
247  
248 +
249  static inline int interactive_sleep(enum sleep_type sleep_type)
250  {
251         return (sleep_type == SLEEP_INTERACTIVE ||
252 @@ -3617,16 +3619,54 @@
253  /*
254   * schedule() is the main scheduler function.
255   */
256 +
257 +#ifdef CONFIG_CHOPSTIX
258 +extern void (*rec_event)(void *,unsigned int);
259 +struct event_spec {
260 +       unsigned long pc;
261 +       unsigned long dcookie;
262 +       unsigned count;
263 +       unsigned char reason;
264 +};
265 +
266 +#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
267 +#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))
268 +
269 +static inline unsigned long my_get_wchan(struct task_struct *p)
270 +{
271 +        unsigned long ebp, esp, eip;
272 +        unsigned long stack_page;
273 +        int count = 0;
274 +        stack_page = (unsigned long)task_stack_page(p);
275 +        esp = p->thread.esp;
276 +        if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
277 +                return 0;
278 +        /* include/asm-i386/system.h:switch_to() pushes ebp last. */
279 +        ebp = *(unsigned long *) esp;
280 +        do {
281 +                if (ebp < stack_page || ebp > top_ebp+stack_page)
282 +                        return 0;
283 +                eip = *(unsigned long *) (ebp+4);
284 +                if (!in_sched_functions(eip))
285 +                        return eip;
286 +                ebp = *(unsigned long *) ebp;
287 +        } while (count++ < 16);
288 +        return 0;
289 +}
290 +/* CHOPSTIX */
291 +#endif
292 +
293  asmlinkage void __sched schedule(void)
294  {
295         struct task_struct *prev, *next;
296         struct prio_array *array;
297         struct list_head *queue;
298         unsigned long long now;
299 -       unsigned long run_time;
300 +       unsigned long run_time, diff;
301         int cpu, idx, new_prio;
302         long *switch_count;
303         struct rq *rq;
304 +       int sampling_reason;
305  
306         /*
307          * Test if we are atomic.  Since do_exit() needs to call into
308 @@ -3680,6 +3720,7 @@
309         switch_count = &prev->nivcsw;
310         if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
311                 switch_count = &prev->nvcsw;
312 +
313                 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
314                                 unlikely(signal_pending(prev))))
315                         prev->state = TASK_RUNNING;
316 @@ -3689,6 +3730,11 @@
317                                 vx_uninterruptible_inc(prev);
318                         }
319                         deactivate_task(prev, rq);
320 +#ifdef CONFIG_CHOPSTIX
321 +                       if (prev->state & TASK_INTERRUPTIBLE) {
322 +                               prev->last_interrupted=jiffies;
323 +                       }
324 +#endif
325                 }
326         }
327  
328 @@ -3763,8 +3809,45 @@
329         prev->sleep_avg -= run_time;
330         if ((long)prev->sleep_avg <= 0)
331                 prev->sleep_avg = 0;
332 +       
333         prev->timestamp = prev->last_ran = now;
334 +#ifdef CONFIG_CHOPSTIX
335  
336 +       /* CHOPSTIX */
337 +
338 +       prev->last_ran_j = jiffies;
339 +       if (next->last_interrupted) {
340 +               diff = (jiffies-next->last_interrupted);
341 +               next->last_interrupted = 0;
342 +               sampling_reason = 0;
343 +       }
344 +       else {
345 +               diff = jiffies-next->last_ran_j;
346 +               sampling_reason = 1;
347 +       }
348 +
349 +       if (rec_event && (diff>HZ/5)) {
350 +               struct event event;
351 +               struct event_spec espec;
352 +               unsigned long eip;
353 +               unsigned int state = next->state;
354 +               
355 +               espec.reason = sampling_reason;
356 +
357 +               next->state = 0;
358 +               eip = next->thread.esp;
359 +               next->state = state;
360 +
361 +               next->last_interrupted = 0;     
362 +               event.event_data=&espec;
363 +               event.task=next;
364 +               espec.pc=eip;
365 +               event.event_type=2; 
366 +               /* index in the event array currently set up */
367 +               /* make sure the counters are loaded in the order we want them to show up*/ 
368 +               (*rec_event)(&event, diff);
369 +       }
370 +#endif
371         sched_info_switch(prev, next);
372         if (likely(prev != next)) {
373                 next->timestamp = next->last_ran = now;
374 @@ -7275,3 +7358,9 @@
375  }
376  
377  #endif
378 +
379 +#ifdef CONFIG_CHOPSTIX
380 +void (*rec_event)(void *,unsigned int);
381 +EXPORT_SYMBOL(rec_event);
382 +EXPORT_SYMBOL(in_sched_functions);
383 +#endif