Continual garbage collection for Dcookies.
[linux-2.6.git] / linux-2.6-590-chopstix-intern.patch
1 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/arch/i386/Kconfig linux-2.6.22-590/arch/i386/Kconfig
2 --- linux-2.6.22-580/arch/i386/Kconfig  2009-02-18 09:56:02.000000000 -0500
3 +++ linux-2.6.22-590/arch/i386/Kconfig  2009-02-18 09:57:23.000000000 -0500
4 @@ -1217,6 +1217,14 @@
5  
6  source "arch/i386/oprofile/Kconfig"
7  
8 +config CHOPSTIX
9 +       bool "Chopstix (PlanetLab)"
10 +       depends on MODULES && OPROFILE
11 +       help
12 +         Chopstix allows you to monitor various events by summarizing them
13 +         in lossy data structures and transferring these data structures
14 +         into user space. If in doubt, say "N".
15 +
16  config KPROBES
17         bool "Kprobes (EXPERIMENTAL)"
18         depends on KALLSYMS && EXPERIMENTAL && MODULES
19 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/arch/i386/kernel/asm-offsets.c linux-2.6.22-590/arch/i386/kernel/asm-offsets.c
20 --- linux-2.6.22-580/arch/i386/kernel/asm-offsets.c     2007-07-08 19:32:17.000000000 -0400
21 +++ linux-2.6.22-590/arch/i386/kernel/asm-offsets.c     2009-02-18 09:57:23.000000000 -0500
22 @@ -9,6 +9,7 @@
23  #include <linux/signal.h>
24  #include <linux/personality.h>
25  #include <linux/suspend.h>
26 +#include <linux/arrays.h>
27  #include <asm/ucontext.h>
28  #include "sigframe.h"
29  #include <asm/pgtable.h>
30 @@ -25,9 +26,19 @@
31  #define OFFSET(sym, str, mem) \
32         DEFINE(sym, offsetof(struct str, mem));
33  
34 +#define STACKOFFSET(sym, str, mem) \
35 +       DEFINE(sym, offsetof(struct str, mem)-sizeof(struct str));
36 +
37  /* workaround for a warning with -Wmissing-prototypes */
38  void foo(void);
39  
40 +struct event_spec {
41 +       unsigned long pc;
42 +       unsigned long dcookie;
43 +       unsigned count;
44 +       unsigned int number;
45 +};
46 +
47  void foo(void)
48  {
49         OFFSET(SIGCONTEXT_eax, sigcontext, eax);
50 @@ -51,7 +62,16 @@
51         OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
52         BLANK();
53  
54 -       OFFSET(TI_task, thread_info, task);
55 +    STACKOFFSET(TASK_thread, task_struct, thread);
56 +    STACKOFFSET(THREAD_esp, thread_struct, esp);
57 +    STACKOFFSET(EVENT_event_data, event, event_data);
58 +    STACKOFFSET(EVENT_task, event, task);
59 +    STACKOFFSET(EVENT_event_type, event, event_data);
60 +    STACKOFFSET(SPEC_number, event_spec, number);
61 +    DEFINE(EVENT_SIZE, sizeof(struct event));
62 +    DEFINE(SPEC_SIZE, sizeof(struct event_spec));
63 +    DEFINE(SPEC_EVENT_SIZE, sizeof(struct event_spec)+sizeof(struct event));
64 +
65         OFFSET(TI_exec_domain, thread_info, exec_domain);
66         OFFSET(TI_flags, thread_info, flags);
67         OFFSET(TI_status, thread_info, status);
68 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/arch/i386/mm/fault.c linux-2.6.22-590/arch/i386/mm/fault.c
69 --- linux-2.6.22-580/arch/i386/mm/fault.c       2009-02-18 09:56:02.000000000 -0500
70 +++ linux-2.6.22-590/arch/i386/mm/fault.c       2009-02-18 09:57:23.000000000 -0500
71 @@ -60,6 +60,15 @@
72                                           DIE_PAGE_FAULT, &args);
73  }
74  
75 +
76 +extern void (*rec_event)(void *,unsigned int);
77 +struct event_spec {
78 +       unsigned long pc;
79 +       unsigned long dcookie; 
80 +       unsigned count;
81 +       unsigned char reason;
82 +};
83 +
84  /*
85   * Return EIP plus the CS segment base.  The segment limit is also
86   * adjusted, clamped to the kernel/user address space (whichever is
87 @@ -296,6 +305,8 @@
88   *     bit 3 == 1 means use of reserved bit detected
89   *     bit 4 == 1 means fault was an instruction fetch
90   */
91 +
92 +
93  fastcall void __kprobes do_page_fault(struct pt_regs *regs,
94                                       unsigned long error_code)
95  {
96 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/block/ll_rw_blk.c linux-2.6.22-590/block/ll_rw_blk.c
97 --- linux-2.6.22-580/block/ll_rw_blk.c  2009-02-18 09:55:48.000000000 -0500
98 +++ linux-2.6.22-590/block/ll_rw_blk.c  2009-02-18 09:57:23.000000000 -0500
99 @@ -30,6 +30,7 @@
100  #include <linux/cpu.h>
101  #include <linux/blktrace_api.h>
102  #include <linux/fault-inject.h>
103 +#include <linux/arrays.h>
104  
105  /*
106   * for max sense size
107 @@ -3102,6 +3103,13 @@
108  
109  #endif /* CONFIG_FAIL_MAKE_REQUEST */
110  
111 +extern void (*rec_event)(void *,unsigned int);
112 +struct event_spec {
113 +       unsigned long pc;
114 +       unsigned long dcookie;
115 +       unsigned count;
116 +       unsigned char reason;
117 +};
118  /**
119   * generic_make_request: hand a buffer to its device driver for I/O
120   * @bio:  The bio describing the location in memory and on the device.
121 @@ -3220,7 +3228,23 @@
122                                 goto end_io;
123                         }
124                 }
125 -
126 +#ifdef CONFIG_CHOPSTIX
127 +               if (rec_event) {
128 +                       struct event event;
129 +                       struct event_spec espec;
130 +                       unsigned long eip;
131 +                       
132 +                       espec.reason = 0;/*request */
133 +
134 +                       eip = bio->bi_end_io;
135 +                       event.event_data=&espec;
136 +                       espec.pc=eip;
137 +                       event.event_type=3; 
138 +                       /* index in the event array currently set up */
139 +                       /* make sure the counters are loaded in the order we want them to show up*/ 
140 +                       (*rec_event)(&event, bio->bi_size);
141 +               }
142 +#endif
143                 ret = q->make_request_fn(q, bio);
144         } while (ret);
145  }
146 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drivers/oprofile/cpu_buffer.c
147 --- linux-2.6.22-580/drivers/oprofile/cpu_buffer.c      2007-07-08 19:32:17.000000000 -0400
148 +++ linux-2.6.22-590/drivers/oprofile/cpu_buffer.c      2009-02-18 09:57:23.000000000 -0500
149 @@ -21,6 +21,7 @@
150  #include <linux/oprofile.h>
151  #include <linux/vmalloc.h>
152  #include <linux/errno.h>
153 +#include <linux/arrays.h>
154   
155  #include "event_buffer.h"
156  #include "cpu_buffer.h"
157 @@ -143,6 +144,17 @@
158                 b->head_pos = 0;
159  }
160  
161 +#ifdef CONFIG_CHOPSTIX
162 +
163 +struct event_spec {
164 +       unsigned int pc;
165 +       unsigned long dcookie;
166 +       unsigned count;
167 +};
168 +
169 +extern void (*rec_event)(void *,unsigned int);
170 +#endif
171 +
172  static inline void
173  add_sample(struct oprofile_cpu_buffer * cpu_buf,
174             unsigned long pc, unsigned long event)
175 @@ -151,6 +163,7 @@
176         entry->eip = pc;
177         entry->event = event;
178         increment_head(cpu_buf);
179 +
180  }
181  
182  static inline void
183 @@ -241,8 +254,28 @@
184  {
185         int is_kernel = !user_mode(regs);
186         unsigned long pc = profile_pc(regs);
187 +       int res=0;
188  
189 +#ifdef CONFIG_CHOPSTIX
190 +       if (rec_event) {
191 +               struct event esig;
192 +               struct event_spec espec;
193 +               esig.task = current;
194 +               espec.pc=pc;
195 +               espec.count=1;
196 +               esig.event_data=&espec;
197 +               esig.event_type=event; /* index in the event array currently set up */
198 +                                       /* make sure the counters are loaded in the order we want them to show up*/ 
199 +               (*rec_event)(&esig, 1);
200 +       }
201 +       else {
202         oprofile_add_ext_sample(pc, regs, event, is_kernel);
203 +       }
204 +#else
205 +       oprofile_add_ext_sample(pc, regs, event, is_kernel);
206 +#endif
207 +
208 +
209  }
210  
211  void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
212 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/fs/bio.c linux-2.6.22-590/fs/bio.c
213 --- linux-2.6.22-580/fs/bio.c   2007-07-08 19:32:17.000000000 -0400
214 +++ linux-2.6.22-590/fs/bio.c   2009-02-18 09:57:23.000000000 -0500
215 @@ -27,6 +27,7 @@
216  #include <linux/workqueue.h>
217  #include <linux/blktrace_api.h>
218  #include <scsi/sg.h>           /* for struct sg_iovec */
219 +#include <linux/arrays.h>
220  
221  #define BIO_POOL_SIZE 2
222  
223 @@ -47,6 +48,7 @@
224         struct kmem_cache *slab;
225  };
226  
227 +
228  /*
229   * if you change this list, also change bvec_alloc or things will
230   * break badly! cannot be bigger than what you can fit into an
231 @@ -999,6 +1001,14 @@
232         }
233  }
234  
235 +struct event_spec {
236 +       unsigned long pc;
237 +       unsigned long dcookie;
238 +       unsigned count;
239 +       unsigned char reason;
240 +};
241 +
242 +extern void (*rec_event)(void *,unsigned int);
243  /**
244   * bio_endio - end I/O on a bio
245   * @bio:       bio
246 @@ -1028,6 +1038,24 @@
247         bio->bi_size -= bytes_done;
248         bio->bi_sector += (bytes_done >> 9);
249  
250 +#ifdef CONFIG_CHOPSTIX
251 +               if (rec_event) {
252 +                       struct event event;
253 +                       struct event_spec espec;
254 +                       unsigned long eip;
255 +                       
256 +                       espec.reason = 1;/*response */
257 +
258 +                       eip = bio->bi_end_io;
259 +                       event.event_data=&espec;
260 +                       espec.pc=eip;
261 +                       event.event_type=3; 
262 +                       /* index in the event array currently set up */
263 +                       /* make sure the counters are loaded in the order we want them to show up*/ 
264 +                       (*rec_event)(&event, bytes_done);
265 +               }
266 +#endif
267 +
268         if (bio->bi_end_io)
269                 bio->bi_end_io(bio, bytes_done, error);
270  }
271 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/fs/exec.c linux-2.6.22-590/fs/exec.c
272 --- linux-2.6.22-580/fs/exec.c  2009-02-18 09:56:02.000000000 -0500
273 +++ linux-2.6.22-590/fs/exec.c  2009-02-18 09:57:23.000000000 -0500
274 @@ -27,6 +27,7 @@
275  #include <linux/mman.h>
276  #include <linux/a.out.h>
277  #include <linux/stat.h>
278 +#include <linux/dcookies.h>
279  #include <linux/fcntl.h>
280  #include <linux/smp_lock.h>
281  #include <linux/init.h>
282 @@ -38,7 +39,7 @@
283  #include <linux/binfmts.h>
284  #include <linux/swap.h>
285  #include <linux/utsname.h>
286 -#include <linux/pid_namespace.h>
287 +/*#include <linux/pid_namespace.h>*/
288  #include <linux/module.h>
289  #include <linux/namei.h>
290  #include <linux/proc_fs.h>
291 @@ -488,6 +489,12 @@
292  
293         if (!err) {
294                 struct inode *inode = nd.dentry->d_inode;
295 +#ifdef CONFIG_CHOPSTIX
296 +               unsigned long cookie;
297 +               if (!nd.dentry->d_cookie)
298 +                       get_dcookie(nd.dentry, nd.mnt, &cookie);
299 +#endif
300 +
301                 file = ERR_PTR(-EACCES);
302                 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
303                     S_ISREG(inode->i_mode)) {
304 @@ -627,8 +634,10 @@
305          * Reparenting needs write_lock on tasklist_lock,
306          * so it is safe to do it under read_lock.
307          */
308 +       /*
309         if (unlikely(tsk->group_leader == child_reaper(tsk)))
310                 tsk->nsproxy->pid_ns->child_reaper = tsk;
311 +               */
312  
313         zap_other_threads(tsk);
314         read_unlock(&tasklist_lock);
315 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/include/linux/arrays.h linux-2.6.22-590/include/linux/arrays.h
316 --- linux-2.6.22-580/include/linux/arrays.h     1969-12-31 19:00:00.000000000 -0500
317 +++ linux-2.6.22-590/include/linux/arrays.h     2009-02-18 09:57:23.000000000 -0500
318 @@ -0,0 +1,36 @@
319 +#ifndef __ARRAYS_H__
320 +#define __ARRAYS_H__
321 +#include <linux/list.h>
322 +
323 +#define SAMPLING_METHOD_DEFAULT 0
324 +#define SAMPLING_METHOD_LOG 1
325 +
326 +/* Every probe has an array handler */
327 +
328 +/* XXX - Optimize this structure */
329 +
330 +extern void (*rec_event)(void *,unsigned int);
331 +struct array_handler {
332 +       struct list_head link;
333 +       unsigned int (*hash_func)(void *);
334 +       unsigned int (*sampling_func)(void *,int,void *);
335 +       unsigned short size;
336 +       unsigned int threshold;
337 +       unsigned char **expcount;
338 +       unsigned int sampling_method;
339 +       unsigned int **arrays;
340 +       unsigned int arraysize;
341 +       unsigned int num_samples[2];
342 +       void **epoch_samples; /* size-sized lists of samples */
343 +       unsigned int (*serialize)(void *, void *);
344 +       unsigned char code[5];
345 +};
346 +
347 +struct event {
348 +       struct list_head link;
349 +       void *event_data;
350 +       unsigned int count;
351 +       unsigned int event_type;
352 +       struct task_struct *task;
353 +};
354 +#endif
355 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/include/linux/mutex.h linux-2.6.22-590/include/linux/mutex.h
356 --- linux-2.6.22-580/include/linux/mutex.h      2007-07-08 19:32:17.000000000 -0400
357 +++ linux-2.6.22-590/include/linux/mutex.h      2009-02-18 09:57:23.000000000 -0500
358 @@ -53,6 +53,10 @@
359         struct thread_info      *owner;
360         const char              *name;
361         void                    *magic;
362 +#else
363 +#ifdef CONFIG_CHOPSTIX
364 +       struct thread_info      *owner;
365 +#endif
366  #endif
367  #ifdef CONFIG_DEBUG_LOCK_ALLOC
368         struct lockdep_map      dep_map;
369 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/include/linux/sched.h linux-2.6.22-590/include/linux/sched.h
370 --- linux-2.6.22-580/include/linux/sched.h      2009-02-18 09:56:02.000000000 -0500
371 +++ linux-2.6.22-590/include/linux/sched.h      2009-02-18 09:57:23.000000000 -0500
372 @@ -850,6 +850,10 @@
373  #endif
374         unsigned long sleep_avg;
375         unsigned long long timestamp, last_ran;
376 +#ifdef CONFIG_CHOPSTIX
377 +       unsigned long last_interrupted, last_ran_j;
378 +#endif
379 +
380         unsigned long long sched_time; /* sched_clock time spent running */
381         enum sleep_type sleep_type;
382  
383 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/kernel/mutex.c linux-2.6.22-590/kernel/mutex.c
384 --- linux-2.6.22-580/kernel/mutex.c     2007-07-08 19:32:17.000000000 -0400
385 +++ linux-2.6.22-590/kernel/mutex.c     2009-02-18 09:57:23.000000000 -0500
386 @@ -18,6 +18,17 @@
387  #include <linux/spinlock.h>
388  #include <linux/interrupt.h>
389  #include <linux/debug_locks.h>
390 +#include <linux/arrays.h>
391 +
392 +#undef CONFIG_CHOPSTIX
393 +#ifdef CONFIG_CHOPSTIX
394 +struct event_spec {
395 +       unsigned long pc;
396 +       unsigned long dcookie;
397 +       unsigned count;
398 +       unsigned char reason;
399 +};
400 +#endif
401  
402  /*
403   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
404 @@ -43,6 +54,9 @@
405  __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
406  {
407         atomic_set(&lock->count, 1);
408 +#ifdef CONFIG_CHOPSTIX
409 +       lock->owner=NULL;
410 +#endif
411         spin_lock_init(&lock->wait_lock);
412         INIT_LIST_HEAD(&lock->wait_list);
413  
414 @@ -88,6 +102,7 @@
415          * The locking fastpath is the 1->0 transition from
416          * 'unlocked' into 'locked' state.
417          */
418 +
419         __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
420  }
421  
422 @@ -168,6 +183,27 @@
423                 }
424                 __set_task_state(task, state);
425  
426 +#ifdef CONFIG_CHOPSTIX
427 +               if (rec_event) {
428 +                       if (lock->owner) {
429 +                               struct event event;
430 +                               struct event_spec espec;
431 +                               struct task_struct *p = lock->owner->task;
432 +                               /*spin_lock(&p->alloc_lock);*/
433 +                               espec.reason = 0; /* lock */
434 +                               event.event_data=&espec;
435 +                               event.task = p;
436 +                               espec.pc=lock;
437 +                               event.event_type=5; 
438 +                               (*rec_event)(&event, 1);
439 +                               /*spin_unlock(&p->alloc_lock);*/
440 +
441 +                       }
442 +                       else 
443 +                               BUG();
444 +               }
445 +#endif
446 +
447                 /* didnt get the lock, go to sleep: */
448                 spin_unlock_mutex(&lock->wait_lock, flags);
449                 schedule();
450 @@ -177,6 +213,9 @@
451         /* got the lock - rejoice! */
452         mutex_remove_waiter(lock, &waiter, task_thread_info(task));
453         debug_mutex_set_owner(lock, task_thread_info(task));
454 +#ifdef CONFIG_CHOPSTIX
455 +       lock->owner = task_thread_info(task);
456 +#endif
457  
458         /* set it to 0 if there are no waiters left: */
459         if (likely(list_empty(&lock->wait_list)))
460 @@ -202,6 +241,7 @@
461  mutex_lock_nested(struct mutex *lock, unsigned int subclass)
462  {
463         might_sleep();
464 +
465         __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
466  }
467  
468 @@ -211,6 +251,7 @@
469  mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
470  {
471         might_sleep();
472 +
473         return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass);
474  }
475  
476 @@ -246,6 +287,23 @@
477  
478                 debug_mutex_wake_waiter(lock, waiter);
479  
480 +#ifdef CONFIG_CHOPSTIX
481 +               if (rec_event) {
482 +                       if (lock->owner) {
483 +                               struct event event;
484 +                               struct event_spec espec;
485 +
486 +                               espec.reason = 1; /* unlock */
487 +                               event.event_data=&espec;
488 +                               event.task = lock->owner->task;
489 +                               espec.pc=lock;
490 +                               event.event_type=5; 
491 +                               (*rec_event)(&event, 1);
492 +                       }
493 +                       else 
494 +                               BUG();
495 +               }
496 +#endif
497                 wake_up_process(waiter->task);
498         }
499  
500 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/kernel/sched.c linux-2.6.22-590/kernel/sched.c
501 --- linux-2.6.22-580/kernel/sched.c     2009-02-18 09:56:02.000000000 -0500
502 +++ linux-2.6.22-590/kernel/sched.c     2009-02-18 09:57:23.000000000 -0500
503 @@ -10,7 +10,7 @@
504   *  1998-11-19 Implemented schedule_timeout() and related stuff
505   *             by Andrea Arcangeli
506   *  2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
507 - *             hybrid priority-list and round-robin design with
508 + *             hybrid priority-list and round-robin deventn with
509   *             an array-switch method of distributing timeslices
510   *             and per-CPU runqueues.  Cleanups and useful suggestions
511   *             by Davide Libenzi, preemptible kernel bits by Robert Love.
512 @@ -23,6 +23,7 @@
513  #include <linux/nmi.h>
514  #include <linux/init.h>
515  #include <asm/uaccess.h>
516 +#include <linux/arrays.h>
517  #include <linux/highmem.h>
518  #include <linux/smp_lock.h>
519  #include <asm/mmu_context.h>
520 @@ -59,6 +60,9 @@
521  #include <linux/vs_sched.h>
522  #include <linux/vs_cvirt.h>
523  
524 +#define INTERRUPTIBLE   -1
525 +#define RUNNING         0
526 +
527  /*
528   * Scheduler clock - returns current time in nanosec units.
529   * This is default implementation.
530 @@ -431,6 +435,7 @@
531  
532  repeat_lock_task:
533         rq = task_rq(p);
534 +
535         spin_lock(&rq->lock);
536         if (unlikely(rq != task_rq(p))) {
537                 spin_unlock(&rq->lock);
538 @@ -1741,6 +1746,21 @@
539          * event cannot wake it up and insert it on the runqueue either.
540          */
541         p->state = TASK_RUNNING;
542 +#ifdef CONFIG_CHOPSTIX
543 +    /* The jiffy of last interruption */
544 +    if (p->state & TASK_UNINTERRUPTIBLE) {
545 +                               p->last_interrupted=jiffies;
546 +       }
547 +    else
548 +    if (p->state & TASK_INTERRUPTIBLE) {
549 +                               p->last_interrupted=INTERRUPTIBLE;
550 +       }
551 +    else
552 +           p->last_interrupted=RUNNING;
553 +
554 +    /* The jiffy of last execution */ 
555 +       p->last_ran_j=jiffies;
556 +#endif
557  
558         /*
559          * Make sure we do not leak PI boosting priority to the child:
560 @@ -3608,6 +3628,7 @@
561  
562  #endif
563  
564 +
565  static inline int interactive_sleep(enum sleep_type sleep_type)
566  {
567         return (sleep_type == SLEEP_INTERACTIVE ||
568 @@ -3617,16 +3638,28 @@
569  /*
570   * schedule() is the main scheduler function.
571   */
572 +
573 +#ifdef CONFIG_CHOPSTIX
574 +extern void (*rec_event)(void *,unsigned int);
575 +struct event_spec {
576 +       unsigned long pc;
577 +       unsigned long dcookie;
578 +       unsigned count;
579 +       unsigned char reason;
580 +};
581 +#endif
582 +
583  asmlinkage void __sched schedule(void)
584  {
585         struct task_struct *prev, *next;
586         struct prio_array *array;
587         struct list_head *queue;
588         unsigned long long now;
589 -       unsigned long run_time;
590 +       unsigned long run_time, diff;
591         int cpu, idx, new_prio;
592         long *switch_count;
593         struct rq *rq;
594 +       int sampling_reason;
595  
596         /*
597          * Test if we are atomic.  Since do_exit() needs to call into
598 @@ -3680,6 +3713,7 @@
599         switch_count = &prev->nivcsw;
600         if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
601                 switch_count = &prev->nvcsw;
602 +
603                 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
604                                 unlikely(signal_pending(prev))))
605                         prev->state = TASK_RUNNING;
606 @@ -3689,6 +3723,17 @@
607                                 vx_uninterruptible_inc(prev);
608                         }
609                         deactivate_task(prev, rq);
610 +#ifdef CONFIG_CHOPSTIX
611 +            /* An uninterruptible process just yielded. Record the current jiffie */
612 +                       if (prev->state & TASK_UNINTERRUPTIBLE) {
613 +                               prev->last_interrupted=jiffies;
614 +                       }
615 +            /* An interruptible process just yielded, or it got preempted. 
616 +             * Mark it as interruptible */
617 +                       else if (prev->state & TASK_INTERRUPTIBLE) {
618 +                               prev->last_interrupted=INTERRUPTIBLE;
619 +                       }
620 +#endif
621                 }
622         }
623  
624 @@ -3765,6 +3810,40 @@
625                 prev->sleep_avg = 0;
626         prev->timestamp = prev->last_ran = now;
627  
628 +#ifdef CONFIG_CHOPSTIX
629 +       /* Run only if the Chopstix module so decrees it */
630 +       if (rec_event) {
631 +               prev->last_ran_j = jiffies;
632 +               if (next->last_interrupted!=INTERRUPTIBLE) {
633 +                       if (next->last_interrupted!=RUNNING) {
634 +                               diff = (jiffies-next->last_interrupted);
635 +                               sampling_reason = 0;/* BLOCKING */
636 +                       }
637 +                       else {
638 +                               diff = jiffies-next->last_ran_j; 
639 +                               sampling_reason = 1;/* PREEMPTION */
640 +                       }
641 +
642 +                       if (diff >= HZ/10) {
643 +                               struct event event;
644 +                               struct event_spec espec;
645 +                               unsigned long eip;
646 +
647 +                               espec.reason = sampling_reason;
648 +                               eip = next->thread.esp & 4095;
649 +                               event.event_data=&espec;
650 +                               event.task=next;
651 +                               espec.pc=eip;
652 +                               event.event_type=2; 
653 +                               /* index in the event array currently set up */
654 +                               /* make sure the counters are loaded in the order we want them to show up*/ 
655 +                               (*rec_event)(&event, diff);
656 +                       }
657 +               }
658 +        /* next has been elected to run */
659 +               next->last_interrupted=0;
660 +       }
661 +#endif
662         sched_info_switch(prev, next);
663         if (likely(prev != next)) {
664                 next->timestamp = next->last_ran = now;
665 @@ -4664,6 +4743,7 @@
666         get_task_struct(p);
667         read_unlock(&tasklist_lock);
668  
669 +
670         retval = -EPERM;
671         if ((current->euid != p->euid) && (current->euid != p->uid) &&
672                         !capable(CAP_SYS_NICE))
673 @@ -5032,6 +5112,7 @@
674         jiffies_to_timespec(p->policy == SCHED_FIFO ?
675                                 0 : task_timeslice(p), &t);
676         read_unlock(&tasklist_lock);
677 +
678         retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
679  out_nounlock:
680         return retval;
681 @@ -7275,3 +7356,14 @@
682  }
683  
684  #endif
685 +
686 +#ifdef CONFIG_CHOPSTIX
687 +void (*rec_event)(void *,unsigned int) = NULL;
688 +
689 +/* To support safe calling from asm */
690 +asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) {
691 +    (*rec_event)(event_signature_in, count);
692 +}
693 +EXPORT_SYMBOL(rec_event);
694 +EXPORT_SYMBOL(in_sched_functions);
695 +#endif
696 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/mm/memory.c linux-2.6.22-590/mm/memory.c
697 --- linux-2.6.22-580/mm/memory.c        2009-02-18 09:56:03.000000000 -0500
698 +++ linux-2.6.22-590/mm/memory.c        2009-02-18 09:57:23.000000000 -0500
699 @@ -59,6 +59,7 @@
700  
701  #include <linux/swapops.h>
702  #include <linux/elf.h>
703 +#include <linux/arrays.h>
704  
705  #ifndef CONFIG_NEED_MULTIPLE_NODES
706  /* use the per-pgdat data instead for discontigmem - mbligh */
707 @@ -2601,6 +2602,15 @@
708         return ret;
709  }
710  
711 +extern void (*rec_event)(void *,unsigned int);
712 +struct event_spec {
713 +       unsigned long pc;
714 +       unsigned long dcookie; 
715 +       unsigned count;
716 +       unsigned char reason;
717 +};
718 +
719 +
720  /*
721   * By the time we get here, we already hold the mm semaphore
722   */
723 @@ -2630,6 +2640,24 @@
724         if (!pte)
725                 return VM_FAULT_OOM;
726  
727 +#ifdef CONFIG_CHOPSTIX
728 +       if (rec_event) {
729 +               struct event event;
730 +               struct event_spec espec;
731 +        struct pt_regs *regs;
732 +        unsigned int pc;
733 +        regs = task_pt_regs(current);
734 +        pc = regs->eip & (unsigned int) ~4095;
735 +
736 +               espec.reason = 0; /* alloc */
737 +               event.event_data=&espec;
738 +               event.task = current;
739 +               espec.pc=pc;
740 +               event.event_type=5; 
741 +               (*rec_event)(&event, 1);
742 +       }
743 +#endif
744 +
745         return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
746  }
747  
748 diff -Nurb --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' linux-2.6.22-580/mm/slab.c linux-2.6.22-590/mm/slab.c
749 --- linux-2.6.22-580/mm/slab.c  2009-02-18 09:56:03.000000000 -0500
750 +++ linux-2.6.22-590/mm/slab.c  2009-02-18 10:00:42.000000000 -0500
751 @@ -110,11 +110,13 @@
752  #include       <linux/fault-inject.h>
753  #include       <linux/rtmutex.h>
754  #include       <linux/reciprocal_div.h>
755 +#include <linux/arrays.h>
756  
757  #include       <asm/cacheflush.h>
758  #include       <asm/tlbflush.h>
759  #include       <asm/page.h>
760  
761 +
762  /*
763   * DEBUG       - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
764   *               0 for faster, smaller code (especially in the critical paths).
765 @@ -249,6 +251,14 @@
766         void *addr;
767  };
768  
769 +extern void (*rec_event)(void *,unsigned int);
770 +struct event_spec {
771 +       unsigned long pc;
772 +       unsigned long dcookie; 
773 +       unsigned count;
774 +       unsigned char reason;
775 +};
776 +
777  /*
778   * struct array_cache
779   *
780 @@ -3443,6 +3453,19 @@
781         local_irq_restore(save_flags);
782         objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
783         prefetchw(objp);
784 +#ifdef CONFIG_CHOPSTIX
785 +       if (rec_event && objp) {
786 +               struct event event;
787 +               struct event_spec espec;
788 +
789 +               espec.reason = 0; /* alloc */
790 +               event.event_data=&espec;
791 +               event.task = current;
792 +               espec.pc=caller;
793 +               event.event_type=5; 
794 +               (*rec_event)(&event, cachep->buffer_size);
795 +       }
796 +#endif
797  
798         return objp;
799  }
800 @@ -3549,12 +3572,26 @@
801   * Release an obj back to its cache. If the obj has a constructed state, it must
802   * be in this state _before_ it is released.  Called with disabled ints.
803   */
804 -static inline void __cache_free(struct kmem_cache *cachep, void *objp)
805 +static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller)
806  {
807         struct array_cache *ac = cpu_cache_get(cachep);
808  
809         check_irq_off();
810 -       objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
811 +       objp = cache_free_debugcheck(cachep, objp, caller);
812 +    #ifdef CONFIG_CHOPSTIX
813 +       if (rec_event && objp) {
814 +               struct event event;
815 +               struct event_spec espec;
816 +     
817 +               espec.reason = 1; /* free */
818 +               event.event_data=&espec;
819 +               event.task = current;
820 +               espec.pc=caller;
821 +               event.event_type=4; 
822 +               (*rec_event)(&event, cachep->buffer_size);
823 +       }
824 +    #endif
825 +
826         vx_slab_free(cachep);
827  
828         if (cache_free_alien(cachep, objp))
829 @@ -3651,16 +3688,19 @@
830                         __builtin_return_address(0));
831  }
832  EXPORT_SYMBOL(kmem_cache_alloc_node);
833 -
834  static __always_inline void *
835  __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
836  {
837         struct kmem_cache *cachep;
838 +       void *ret;
839 +
840  
841         cachep = kmem_find_general_cachep(size, flags);
842         if (unlikely(cachep == NULL))
843                 return NULL;
844 -       return kmem_cache_alloc_node(cachep, flags, node);
845 +       ret = kmem_cache_alloc_node(cachep, flags, node);
846 +       
847 +       return ret;
848  }
849  
850  #ifdef CONFIG_DEBUG_SLAB
851 @@ -3696,6 +3736,7 @@
852                                           void *caller)
853  {
854         struct kmem_cache *cachep;
855 +       void *ret;
856  
857         /* If you want to save a few bytes .text space: replace
858          * __ with kmem_.
859 @@ -3705,9 +3746,10 @@
860         cachep = __find_general_cachep(size, flags);
861         if (unlikely(cachep == NULL))
862                 return NULL;
863 -       return __cache_alloc(cachep, flags, caller);
864 -}
865 +       ret = __cache_alloc(cachep, flags, caller);
866  
867 +       return ret;
868 +}
869  
870  #ifdef CONFIG_DEBUG_SLAB
871  void *__kmalloc(size_t size, gfp_t flags)
872 @@ -3723,10 +3765,17 @@
873  EXPORT_SYMBOL(__kmalloc_track_caller);
874  
875  #else
876 +#ifdef CONFIG_CHOPSTIX
877 +void *__kmalloc(size_t size, gfp_t flags)
878 +{
879 +       return __do_kmalloc(size, flags, __builtin_return_address(0));
880 +}
881 +#else
882  void *__kmalloc(size_t size, gfp_t flags)
883  {
884         return __do_kmalloc(size, flags, NULL);
885  }
886 +#endif
887  EXPORT_SYMBOL(__kmalloc);
888  #endif
889  
890 @@ -3792,7 +3841,7 @@
891  
892         local_irq_save(flags);
893         debug_check_no_locks_freed(objp, obj_size(cachep));
894 -       __cache_free(cachep, objp);
895 +       __cache_free(cachep, objp,__builtin_return_address(0));
896         local_irq_restore(flags);
897  }
898  EXPORT_SYMBOL(kmem_cache_free);
899 @@ -3817,7 +3866,7 @@
900         kfree_debugcheck(objp);
901         c = virt_to_cache(objp);
902         debug_check_no_locks_freed(objp, obj_size(c));
903 -       __cache_free(c, (void *)objp);
904 +       __cache_free(c, (void *)objp,__builtin_return_address(0));
905         local_irq_restore(flags);
906  }