Chopstix base patch.
[linux-2.6.git] / linux-2.6-590-chopstix-intern.patch
1 diff -Nurb linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drivers/oprofile/cpu_buffer.c
2 --- linux-2.6.22-580/drivers/oprofile/cpu_buffer.c      2007-07-08 19:32:17.000000000 -0400
3 +++ linux-2.6.22-590/drivers/oprofile/cpu_buffer.c      2008-02-27 13:53:47.000000000 -0500
4 @@ -21,6 +21,7 @@
5  #include <linux/oprofile.h>
6  #include <linux/vmalloc.h>
7  #include <linux/errno.h>
8 +#include <linux/arrays.h>
9   
10  #include "event_buffer.h"
11  #include "cpu_buffer.h"
12 @@ -143,6 +144,14 @@
13                 b->head_pos = 0;
14  }
15  
16 +struct event_spec {
17 +       unsigned int pc;
18 +       unsigned long dcookie;
19 +       unsigned count;
20 +};
21 +
22 +extern void (*rec_event)(void *,unsigned int);
23 +
24  static inline void
25  add_sample(struct oprofile_cpu_buffer * cpu_buf,
26             unsigned long pc, unsigned long event)
27 @@ -151,6 +160,7 @@
28         entry->eip = pc;
29         entry->event = event;
30         increment_head(cpu_buf);
31 +
32  }
33  
34  static inline void
35 @@ -237,12 +247,66 @@
36         oprofile_end_trace(cpu_buf);
37  }
38  
39 +static int proc_pid_cmdline(struct task_struct *task, char * buffer)
40 +{
41 +       int res = 0;
42 +       unsigned int len;
43 +       struct mm_struct *mm = get_task_mm(task);
44 +       if (!mm)
45 +               goto out;
46 +       if (!mm->arg_end)
47 +               goto out_mm;    /* Shh! No looking before we're done */
48 +
49 +       len = mm->arg_end - mm->arg_start;
50
51 +       if (len > PAGE_SIZE)
52 +               len = PAGE_SIZE;
53
54 +       res = access_process_vm(task, mm->arg_start, buffer, len, 0);
55 +
56 +       // If the nul at the end of args has been overwritten, then
57 +       // assume application is using setproctitle(3).
58 +       if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
59 +               len = strnlen(buffer, res);
60 +               if (len < res) {
61 +                   res = len;
62 +               } else {
63 +                       len = mm->env_end - mm->env_start;
64 +                       if (len > PAGE_SIZE - res)
65 +                               len = PAGE_SIZE - res;
66 +                       res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
67 +                       res = strnlen(buffer, res);
68 +               }
69 +       }
70 +out_mm:
71 +       mmput(mm);
72 +out:
73 +       return res;
74 +}
75 +
76 +
77 +
78  void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
79  {
80         int is_kernel = !user_mode(regs);
81         unsigned long pc = profile_pc(regs);
82 +       int res=0;
83  
84 +       if (rec_event) {
85 +               struct event esig;
86 +               struct event_spec espec;
87 +               /*res = proc_pid_cmdline(current, espec->appname);*/
88 +               esig.task = current;
89 +               espec.pc=pc;
90 +               espec.count=1;
91 +               esig.event_data=&espec;
92 +               esig.event_type=event; /* index in the event array currently set up */
93 +                                       /* make sure the counters are loaded in the order we want them to show up*/ 
94 +               (*rec_event)(&esig, 1);
95 +       }
96 +       else {
97         oprofile_add_ext_sample(pc, regs, event, is_kernel);
98 +       }
99  }
100  
101  void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
102 diff -Nurb linux-2.6.22-580/drivers/oprofile/oprof.c linux-2.6.22-590/drivers/oprofile/oprof.c
103 --- linux-2.6.22-580/drivers/oprofile/oprof.c   2007-07-08 19:32:17.000000000 -0400
104 +++ linux-2.6.22-590/drivers/oprofile/oprof.c   2008-02-27 13:48:29.000000000 -0500
105 @@ -33,6 +33,8 @@
106   */
107  static int timer = 0;
108  
109 +extern void *rec_event;
110 +
111  int oprofile_setup(void)
112  {
113         int err;
114 @@ -53,7 +55,7 @@
115          * us missing task deaths and eventually oopsing
116          * when trying to process the event buffer.
117          */
118 -       if ((err = sync_start()))
119 +       if (err = sync_start())
120                 goto out3;
121  
122         is_setup = 1;
123 diff -Nurb linux-2.6.22-580/fs/exec.c linux-2.6.22-590/fs/exec.c
124 --- linux-2.6.22-580/fs/exec.c  2008-02-27 13:46:38.000000000 -0500
125 +++ linux-2.6.22-590/fs/exec.c  2008-02-27 13:49:58.000000000 -0500
126 @@ -52,6 +52,7 @@
127  #include <linux/audit.h>
128  #include <linux/signalfd.h>
129  #include <linux/vs_memory.h>
130 +#include <linux/dcookies.h>
131  
132  #include <asm/uaccess.h>
133  #include <asm/mmu_context.h>
134 @@ -488,6 +489,10 @@
135  
136         if (!err) {
137                 struct inode *inode = nd.dentry->d_inode;
138 +               unsigned long cookie;
139 +               if (!nd.dentry->d_cookie)
140 +                       get_dcookie(nd.dentry, nd.mnt, &cookie);
141 +
142                 file = ERR_PTR(-EACCES);
143                 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
144                     S_ISREG(inode->i_mode)) {
145 diff -Nurb linux-2.6.22-580/include/linux/arrays.h linux-2.6.22-590/include/linux/arrays.h
146 --- linux-2.6.22-580/include/linux/arrays.h     1969-12-31 19:00:00.000000000 -0500
147 +++ linux-2.6.22-590/include/linux/arrays.h     2008-02-27 13:48:29.000000000 -0500
148 @@ -0,0 +1,35 @@
149 +#ifndef __ARRAYS_H__
150 +#define __ARRAYS_H__
151 +#include <linux/list.h>
152 +
153 +#define SAMPLING_METHOD_DEFAULT 0
154 +#define SAMPLING_METHOD_LOG 1
155 +
156 +/* Every probe has an array handler */
157 +
158 +/* XXX - Optimize this structure */
159 +
160 +struct array_handler {
161 +       struct list_head link;
162 +       unsigned int (*hash_func)(void *);
163 +       unsigned int (*sampling_func)(void *,int,void *);
164 +       unsigned short size;
165 +       unsigned int threshold;
166 +       unsigned char **expcount;
167 +       unsigned int sampling_method;
168 +       unsigned int **arrays;
169 +       unsigned int arraysize;
170 +       unsigned int num_samples[2];
171 +       void **epoch_samples; /* size-sized lists of samples */
172 +       unsigned int (*serialize)(void *, void *);
173 +       unsigned char code[5];
174 +};
175 +
176 +struct event {
177 +       struct list_head link;
178 +       void *event_data;
179 +       unsigned int count;
180 +       unsigned int event_type;
181 +       struct task_struct *task;
182 +};
183 +#endif
184 diff -Nurb linux-2.6.22-580/include/linux/sched.h linux-2.6.22-590/include/linux/sched.h
185 --- linux-2.6.22-580/include/linux/sched.h      2008-02-27 13:46:40.000000000 -0500
186 +++ linux-2.6.22-590/include/linux/sched.h      2008-02-27 13:48:29.000000000 -0500
187 @@ -849,7 +849,7 @@
188         unsigned int btrace_seq;
189  #endif
190         unsigned long sleep_avg;
191 -       unsigned long long timestamp, last_ran;
192 +       unsigned long long timestamp, last_ran, last_interrupted, last_ran_j;
193         unsigned long long sched_time; /* sched_clock time spent running */
194         enum sleep_type sleep_type;
195  
196 diff -Nurb linux-2.6.22-580/kernel/fork.c linux-2.6.22-590/kernel/fork.c
197 --- linux-2.6.22-580/kernel/fork.c      2008-02-27 13:46:40.000000000 -0500
198 +++ linux-2.6.22-590/kernel/fork.c      2008-02-27 13:48:29.000000000 -0500
199 @@ -197,6 +197,8 @@
200         tsk->btrace_seq = 0;
201  #endif
202         tsk->splice_pipe = NULL;
203 +       //tsk->cmdline[0]='\0';
204 +       tsk->last_interrupted = 0;
205         return tsk;
206  }
207  
208 diff -Nurb linux-2.6.22-580/kernel/sched.c linux-2.6.22-590/kernel/sched.c
209 --- linux-2.6.22-580/kernel/sched.c     2008-02-27 13:46:40.000000000 -0500
210 +++ linux-2.6.22-590/kernel/sched.c     2008-02-27 14:08:26.000000000 -0500
211 @@ -10,7 +10,7 @@
212   *  1998-11-19 Implemented schedule_timeout() and related stuff
213   *             by Andrea Arcangeli
214   *  2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
215 - *             hybrid priority-list and round-robin design with
216 + *             hybrid priority-list and round-robin deventn with
217   *             an array-switch method of distributing timeslices
218   *             and per-CPU runqueues.  Cleanups and useful suggestions
219   *             by Davide Libenzi, preemptible kernel bits by Robert Love.
220 @@ -56,6 +56,7 @@
221  
222  #include <asm/tlb.h>
223  #include <asm/unistd.h>
224 +#include <linux/arrays.h>
225  #include <linux/vs_sched.h>
226  #include <linux/vs_cvirt.h>
227  
228 @@ -3608,6 +3609,8 @@
229  
230  #endif
231  
232 +extern void (*rec_event)(void *,unsigned int);
233 +
234  static inline int interactive_sleep(enum sleep_type sleep_type)
235  {
236         return (sleep_type == SLEEP_INTERACTIVE ||
237 @@ -3617,16 +3620,51 @@
238  /*
239   * schedule() is the main scheduler function.
240   */
241 +
242 +struct event_spec {
243 +       unsigned long pc;
244 +       unsigned long dcookie;
245 +       unsigned count;
246 +       unsigned char reason;
247 +};
248 +
249 +#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
250 +#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))
251 +
252 +static inline unsigned long my_get_wchan(struct task_struct *p)
253 +{
254 +        unsigned long ebp, esp, eip;
255 +        unsigned long stack_page;
256 +        int count = 0;
257 +        stack_page = (unsigned long)task_stack_page(p);
258 +        esp = p->thread.esp;
259 +        if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
260 +                return 0;
261 +        /* include/asm-i386/system.h:switch_to() pushes ebp last. */
262 +        ebp = *(unsigned long *) esp;
263 +        do {
264 +                if (ebp < stack_page || ebp > top_ebp+stack_page)
265 +                        return 0;
266 +                eip = *(unsigned long *) (ebp+4);
267 +                if (!in_sched_functions(eip))
268 +                        return eip;
269 +                ebp = *(unsigned long *) ebp;
270 +        } while (count++ < 16);
271 +        return 0;
272 +}
273 +/* CHOPSTIX */
274 +
275  asmlinkage void __sched schedule(void)
276  {
277         struct task_struct *prev, *next;
278         struct prio_array *array;
279         struct list_head *queue;
280         unsigned long long now;
281 -       unsigned long run_time;
282 +       unsigned long run_time, diff;
283         int cpu, idx, new_prio;
284         long *switch_count;
285         struct rq *rq;
286 +       int sampling_reason;
287  
288         /*
289          * Test if we are atomic.  Since do_exit() needs to call into
290 @@ -3680,6 +3718,7 @@
291         switch_count = &prev->nivcsw;
292         if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
293                 switch_count = &prev->nvcsw;
294 +
295                 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
296                                 unlikely(signal_pending(prev))))
297                         prev->state = TASK_RUNNING;
298 @@ -3689,6 +3728,9 @@
299                                 vx_uninterruptible_inc(prev);
300                         }
301                         deactivate_task(prev, rq);
302 +                       if (prev->state & TASK_INTERRUPTIBLE) {
303 +                               prev->last_interrupted=jiffies;
304 +                       }
305                 }
306         }
307  
308 @@ -3763,8 +3805,44 @@
309         prev->sleep_avg -= run_time;
310         if ((long)prev->sleep_avg <= 0)
311                 prev->sleep_avg = 0;
312 +       
313         prev->timestamp = prev->last_ran = now;
314  
315 +       /* CHOPSTIX */
316 +
317 +       prev->last_ran_j = jiffies;
318 +       if (next->last_interrupted) {
319 +               diff = (jiffies-next->last_interrupted);
320 +               next->last_interrupted = 0;
321 +               sampling_reason = 0;
322 +       }
323 +       else {
324 +               diff = jiffies-next->last_ran_j;
325 +               sampling_reason = 1;
326 +       }
327 +
328 +       if (rec_event && (diff>HZ/5)) {
329 +               struct event event;
330 +               struct event_spec espec;
331 +               unsigned long eip;
332 +               unsigned int state = next->state;
333 +               
334 +               espec.reason = sampling_reason;
335 +
336 +               next->state = 0;
337 +               eip = next->thread.esp;
338 +               next->state = state;
339 +
340 +               next->last_interrupted = 0;     
341 +               event.event_data=&espec;
342 +               event.task=next;
343 +               espec.pc=eip;
344 +               event.event_type=2; 
345 +               /* index in the event array currently set up */
346 +               /* make sure the counters are loaded in the order we want them to show up*/ 
347 +               (*rec_event)(&event, diff);
348 +       }
349 +
350         sched_info_switch(prev, next);
351         if (likely(prev != next)) {
352                 next->timestamp = next->last_ran = now;
353 @@ -7275,3 +7353,7 @@
354  }
355  
356  #endif
357 +
358 +void (*rec_event)(void *,unsigned int);
359 +EXPORT_SYMBOL(rec_event);
360 +EXPORT_SYMBOL(in_sched_functions);
361 diff -Nurb linux-2.6.22-580/kernel/sched.c.rej linux-2.6.22-590/kernel/sched.c.rej
362 --- linux-2.6.22-580/kernel/sched.c.rej 1969-12-31 19:00:00.000000000 -0500
363 +++ linux-2.6.22-590/kernel/sched.c.rej 2008-02-27 13:48:29.000000000 -0500
364 @@ -0,0 +1,18 @@
365 +***************
366 +*** 56,61 ****
367 +  
368 +  #include <asm/tlb.h>
369 +  #include <asm/unistd.h>
370 +  
371 +  /*
372 +   * Scheduler clock - returns current time in nanosec units.
373 +--- 56,64 ----
374 +  
375 +  #include <asm/tlb.h>
376 +  #include <asm/unistd.h>
377 ++ #include <linux/arrays.h>
378 ++ 
379 ++ 
380 +  
381 +  /*
382 +   * Scheduler clock - returns current time in nanosec units.