X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=linux-2.6-590-chopstix-intern.patch;h=d9a0c9ab0eb8c33f42fcb140b63a133078e2e49c;hb=refs%2Fheads%2F22;hp=a75849a48e386b5433ee35acb6d191eb90256250;hpb=e0edb22cbd51740ad23f6befdf4d74b7646801af;p=linux-2.6.git diff --git a/linux-2.6-590-chopstix-intern.patch b/linux-2.6-590-chopstix-intern.patch index a75849a48..d9a0c9ab0 100644 --- a/linux-2.6-590-chopstix-intern.patch +++ b/linux-2.6-590-chopstix-intern.patch @@ -1,6 +1,183 @@ -diff -Nurb linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drivers/oprofile/cpu_buffer.c ---- linux-2.6.22-580/drivers/oprofile/cpu_buffer.c 2007-07-08 19:32:17.000000000 -0400 -+++ linux-2.6.22-590/drivers/oprofile/cpu_buffer.c 2008-02-27 13:53:47.000000000 -0500 +diff -Nurb --exclude='*.cmd' --exclude='*.orig' --exclude='*.swp' --exclude=tags --exclude='*.patch' --exclude='*.diff' --exclude='*.svn*' linux-2.6.22-590/arch/i386/Kconfig linux-2.6.22-591/arch/i386/Kconfig +--- linux-2.6.22-590/arch/i386/Kconfig 2009-03-16 20:49:42.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/Kconfig 2009-03-16 20:58:59.000000000 -0400 +@@ -1217,6 +1217,14 @@ + + source "arch/i386/oprofile/Kconfig" + ++config CHOPSTIX ++ bool "Chopstix (PlanetLab)" ++ depends on MODULES && OPROFILE ++ help ++ Chopstix allows you to monitor various events by summarizing them ++ in lossy data structures and transferring these data structures ++ into user space. If in doubt, say "N". ++ + config KPROBES + bool "Kprobes (EXPERIMENTAL)" + depends on KALLSYMS && EXPERIMENTAL && MODULES +--- linux-2.6.22-590/arch/i386/kernel/asm-offsets.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/kernel/asm-offsets.c 2009-03-16 20:58:59.000000000 -0400 +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include "sigframe.h" + #include +@@ -25,9 +26,19 @@ + #define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + ++#define STACKOFFSET(sym, str, mem) \ ++ DEFINE(sym, offsetof(struct str, mem)-sizeof(struct str)); ++ + /* workaround for a warning with -Wmissing-prototypes */ + void foo(void); + ++struct event_spec { ++ unsigned long pc; ++ unsigned long dcookie; ++ unsigned count; ++ unsigned int number; ++}; ++ + void foo(void) + { + OFFSET(SIGCONTEXT_eax, sigcontext, eax); +@@ -51,7 +62,16 @@ + OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); + BLANK(); + +- OFFSET(TI_task, thread_info, task); ++ STACKOFFSET(TASK_thread, task_struct, thread); ++ STACKOFFSET(THREAD_esp, thread_struct, esp); ++ STACKOFFSET(EVENT_event_data, event, event_data); ++ STACKOFFSET(EVENT_task, event, task); ++ STACKOFFSET(EVENT_event_type, event, event_type); ++ STACKOFFSET(SPEC_number, event_spec, number); ++ DEFINE(EVENT_SIZE, sizeof(struct event)); ++ DEFINE(SPEC_SIZE, sizeof(struct event_spec)); ++ DEFINE(SPEC_EVENT_SIZE, sizeof(struct event_spec)+sizeof(struct event)); ++ + OFFSET(TI_exec_domain, thread_info, exec_domain); + OFFSET(TI_flags, thread_info, flags); + OFFSET(TI_status, thread_info, status); +--- linux-2.6.22-590/arch/i386/kernel/entry.S 2009-03-16 20:49:07.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/kernel/entry.S 2009-03-16 20:58:59.000000000 -0400 +@@ -374,6 +374,33 @@ + cmpl $(nr_syscalls), %eax + jae syscall_badsys + syscall_call: ++ /* Move Chopstix syscall probe here */ ++ /* Save and clobber: eax, ecx, ebp */ ++ pushl %eax ++ pushl %ecx ++ pushl %ebp ++ movl %esp, %ebp ++ subl $SPEC_EVENT_SIZE, %esp ++ movl rec_event, %ecx ++ testl %ecx, %ecx ++ jz carry_on ++ # struct event is first, just below %ebp ++ movl %eax, (SPEC_number-EVENT_SIZE)(%ebp) ++ leal -SPEC_EVENT_SIZE(%ebp), %eax ++ movl %eax, EVENT_event_data(%ebp) ++ movl $6, EVENT_event_type(%ebp) ++ movl rec_event, %edx ++ movl $1, 4(%esp) ++ leal -EVENT_SIZE(%ebp), %eax ++ movl %eax, (%esp) ++ call rec_event_asm ++carry_on: ++ addl $SPEC_EVENT_SIZE, %esp ++ popl %ebp ++ popl %ecx ++ popl %eax ++ /* End chopstix */ ++ + call *sys_call_table(,%eax,4) + movl %eax,PT_EAX(%esp) # store the return value + syscall_exit: +--- linux-2.6.22-590/arch/i386/mm/fault.c 2009-03-16 20:49:42.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/mm/fault.c 2009-03-16 20:58:59.000000000 -0400 +@@ -60,6 +60,15 @@ + DIE_PAGE_FAULT, &args); + } + ++ ++extern void (*rec_event)(void *,unsigned int); ++struct event_spec { ++ unsigned long pc; ++ unsigned long dcookie; ++ unsigned count; ++ unsigned char reason; ++}; ++ + /* + * Return EIP plus the CS segment base. The segment limit is also + * adjusted, clamped to the kernel/user address space (whichever is +@@ -296,6 +305,8 @@ + * bit 3 == 1 means use of reserved bit detected + * bit 4 == 1 means fault was an instruction fetch + */ ++ ++ + fastcall void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) + { +--- linux-2.6.22-590/block/ll_rw_blk.c 2009-03-16 20:49:07.000000000 -0400 ++++ linux-2.6.22-591/block/ll_rw_blk.c 2009-03-16 20:58:59.000000000 -0400 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + /* + * for max sense size +@@ -3102,6 +3103,13 @@ + + #endif /* CONFIG_FAIL_MAKE_REQUEST */ + ++extern void (*rec_event)(void *,unsigned int); ++struct event_spec { ++ unsigned long pc; ++ unsigned long dcookie; ++ unsigned count; ++ unsigned char reason; ++}; + /** + * generic_make_request: hand a buffer to its device driver for I/O + * @bio: The bio describing the location in memory and on the device. +@@ -3220,7 +3228,23 @@ + goto end_io; + } + } +- ++#ifdef CONFIG_CHOPSTIX ++ if (rec_event) { ++ struct event event; ++ struct event_spec espec; ++ unsigned long eip; ++ ++ espec.reason = 0;/*request */ ++ ++ eip = bio->bi_end_io; ++ event.event_data=&espec; ++ espec.pc=eip; ++ event.event_type=3; ++ /* index in the event array currently set up */ ++ /* make sure the counters are loaded in the order we want them to show up*/ ++ (*rec_event)(&event, bio->bi_size); ++ } ++#endif + ret = q->make_request_fn(q, bio); + } while (ret); + } +--- linux-2.6.22-590/drivers/oprofile/cpu_buffer.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/oprofile/cpu_buffer.c 2009-03-16 20:58:59.000000000 -0400 @@ -21,6 +21,7 @@ #include #include @@ -9,10 +186,12 @@ diff -Nurb linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drive #include "event_buffer.h" #include "cpu_buffer.h" -@@ -143,6 +144,14 @@ +@@ -143,6 +144,17 @@ b->head_pos = 0; } ++#ifdef CONFIG_CHOPSTIX ++ +struct event_spec { + unsigned int pc; + unsigned long dcookie; @@ -20,11 +199,12 @@ diff -Nurb linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drive +}; + +extern void (*rec_event)(void *,unsigned int); ++#endif + static inline void add_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, unsigned long event) -@@ -151,6 +160,7 @@ +@@ -151,6 +163,7 @@ entry->eip = pc; entry->event = event; increment_head(cpu_buf); @@ -32,59 +212,16 @@ diff -Nurb linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drive } static inline void -@@ -237,12 +247,66 @@ - oprofile_end_trace(cpu_buf); - } - -+static int proc_pid_cmdline(struct task_struct *task, char * buffer) -+{ -+ int res = 0; -+ unsigned int len; -+ struct mm_struct *mm = get_task_mm(task); -+ if (!mm) -+ goto out; -+ if (!mm->arg_end) -+ goto out_mm; /* Shh! No looking before we're done */ -+ -+ len = mm->arg_end - mm->arg_start; -+ -+ if (len > PAGE_SIZE) -+ len = PAGE_SIZE; -+ -+ res = access_process_vm(task, mm->arg_start, buffer, len, 0); -+ -+ // If the nul at the end of args has been overwritten, then -+ // assume application is using setproctitle(3). -+ if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { -+ len = strnlen(buffer, res); -+ if (len < res) { -+ res = len; -+ } else { -+ len = mm->env_end - mm->env_start; -+ if (len > PAGE_SIZE - res) -+ len = PAGE_SIZE - res; -+ res += access_process_vm(task, mm->env_start, buffer+res, len, 0); -+ res = strnlen(buffer, res); -+ } -+ } -+out_mm: -+ mmput(mm); -+out: -+ return res; -+} -+ -+ -+ - void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) +@@ -241,8 +254,28 @@ { int is_kernel = !user_mode(regs); unsigned long pc = profile_pc(regs); + int res=0; ++#ifdef CONFIG_CHOPSTIX + if (rec_event) { + struct event esig; + struct event_spec espec; -+ /*res = proc_pid_cmdline(current, espec->appname);*/ + esig.task = current; + espec.pc=pc; + espec.count=1; @@ -96,35 +233,119 @@ diff -Nurb linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drive + else { oprofile_add_ext_sample(pc, regs, event, is_kernel); + } ++#else ++ oprofile_add_ext_sample(pc, regs, event, is_kernel); ++#endif ++ ++ } void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) -diff -Nurb linux-2.6.22-580/fs/exec.c linux-2.6.22-590/fs/exec.c ---- linux-2.6.22-580/fs/exec.c 2008-02-27 13:46:38.000000000 -0500 -+++ linux-2.6.22-590/fs/exec.c 2008-02-27 13:49:58.000000000 -0500 -@@ -52,6 +52,7 @@ - #include - #include - #include -+#include +--- linux-2.6.22-590/fs/bio.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/bio.c 2009-03-16 20:58:59.000000000 -0400 +@@ -27,6 +27,7 @@ + #include + #include + #include /* for struct sg_iovec */ ++#include - #include - #include -@@ -488,6 +489,10 @@ + #define BIO_POOL_SIZE 2 + +@@ -47,6 +48,7 @@ + struct kmem_cache *slab; + }; + ++ + /* + * if you change this list, also change bvec_alloc or things will + * break badly! cannot be bigger than what you can fit into an +@@ -999,6 +1001,14 @@ + } + } + ++struct event_spec { ++ unsigned long pc; ++ unsigned long dcookie; ++ unsigned count; ++ unsigned char reason; ++}; ++ ++extern void (*rec_event)(void *,unsigned int); + /** + * bio_endio - end I/O on a bio + * @bio: bio +@@ -1028,6 +1038,24 @@ + bio->bi_size -= bytes_done; + bio->bi_sector += (bytes_done >> 9); + ++#ifdef CONFIG_CHOPSTIX ++ if (rec_event) { ++ struct event event; ++ struct event_spec espec; ++ unsigned long eip; ++ ++ espec.reason = 1;/*response */ ++ ++ eip = bio->bi_end_io; ++ event.event_data=&espec; ++ espec.pc=eip; ++ event.event_type=3; ++ /* index in the event array currently set up */ ++ /* make sure the counters are loaded in the order we want them to show up*/ ++ (*rec_event)(&event, bytes_done); ++ } ++#endif ++ + if (bio->bi_end_io) + bio->bi_end_io(bio, bytes_done, error); + } +--- linux-2.6.22-580/fs/exec.c 2009-04-08 16:36:16.000000000 -0400 ++++ linux-2.6.22-590/fs/exec.c 2009-04-08 16:40:34.000000000 -0400 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -38,7 +39,7 @@ + #include + #include + #include +-#include ++/*#include */ + #include + #include + #include +@@ -488,6 +489,13 @@ if (!err) { struct inode *inode = nd.dentry->d_inode; ++#ifdef CONFIG_CHOPSTIX + unsigned long cookie; -+ if (!nd.dentry->d_cookie) ++ extern void (*rec_event)(void *, unsigned int); ++ if (rec_event && !nd.dentry->d_cookie) + get_dcookie(nd.dentry, nd.mnt, &cookie); ++#endif + file = ERR_PTR(-EACCES); if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && S_ISREG(inode->i_mode)) { -diff -Nurb linux-2.6.22-580/include/linux/arrays.h linux-2.6.22-590/include/linux/arrays.h ---- linux-2.6.22-580/include/linux/arrays.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.22-590/include/linux/arrays.h 2008-02-27 13:48:29.000000000 -0500 -@@ -0,0 +1,35 @@ +@@ -627,8 +635,10 @@ + * Reparenting needs write_lock on tasklist_lock, + * so it is safe to do it under read_lock. + */ ++ /* + if (unlikely(tsk->group_leader == child_reaper(tsk))) + tsk->nsproxy->pid_ns->child_reaper = tsk; ++ */ + + zap_other_threads(tsk); + read_unlock(&tasklist_lock); +--- linux-2.6.22-590/include/linux/arrays.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/arrays.h 2009-03-16 20:58:59.000000000 -0400 +@@ -0,0 +1,36 @@ +#ifndef __ARRAYS_H__ +#define __ARRAYS_H__ +#include @@ -136,6 +357,7 @@ diff -Nurb linux-2.6.22-580/include/linux/arrays.h linux-2.6.22-590/include/linu + +/* XXX - Optimize this structure */ + ++extern void (*rec_event)(void *,unsigned int); +struct array_handler { + struct list_head link; + unsigned int (*hash_func)(void *); @@ -160,33 +382,150 @@ diff -Nurb linux-2.6.22-580/include/linux/arrays.h linux-2.6.22-590/include/linu + struct task_struct *task; +}; +#endif -diff -Nurb linux-2.6.22-580/include/linux/sched.h linux-2.6.22-590/include/linux/sched.h ---- linux-2.6.22-580/include/linux/sched.h 2008-02-27 13:46:40.000000000 -0500 -+++ linux-2.6.22-590/include/linux/sched.h 2008-02-27 13:48:29.000000000 -0500 -@@ -849,7 +849,7 @@ - unsigned int btrace_seq; +--- linux-2.6.22-590/include/linux/mutex.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/mutex.h 2009-03-16 20:58:59.000000000 -0400 +@@ -53,6 +53,10 @@ + struct thread_info *owner; + const char *name; + void *magic; ++#else ++#ifdef CONFIG_CHOPSTIX ++ struct thread_info *owner; ++#endif + #endif + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +--- linux-2.6.22-590/include/linux/sched.h 2009-03-16 20:49:42.000000000 -0400 ++++ linux-2.6.22-591/include/linux/sched.h 2009-03-16 20:58:59.000000000 -0400 +@@ -850,6 +850,10 @@ #endif unsigned long sleep_avg; -- unsigned long long timestamp, last_ran; -+ unsigned long long timestamp, last_ran, last_interrupted, last_ran_j; + unsigned long long timestamp, last_ran; ++#ifdef CONFIG_CHOPSTIX ++ unsigned long last_interrupted, last_ran_j; ++#endif ++ unsigned long long sched_time; /* sched_clock time spent running */ enum sleep_type sleep_type; -diff -Nurb linux-2.6.22-580/kernel/fork.c linux-2.6.22-590/kernel/fork.c ---- linux-2.6.22-580/kernel/fork.c 2008-02-27 13:46:40.000000000 -0500 -+++ linux-2.6.22-590/kernel/fork.c 2008-02-27 13:48:29.000000000 -0500 -@@ -197,6 +197,8 @@ - tsk->btrace_seq = 0; - #endif - tsk->splice_pipe = NULL; -+ //tsk->cmdline[0]='\0'; -+ tsk->last_interrupted = 0; - return tsk; +--- linux-2.6.22-590/kernel/mutex.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/mutex.c 2009-03-16 20:58:59.000000000 -0400 +@@ -18,6 +18,17 @@ + #include + #include + #include ++#include ++ ++#undef CONFIG_CHOPSTIX ++#ifdef CONFIG_CHOPSTIX ++struct event_spec { ++ unsigned long pc; ++ unsigned long dcookie; ++ unsigned count; ++ unsigned char reason; ++}; ++#endif + + /* + * In the DEBUG case we are using the "NULL fastpath" for mutexes, +@@ -43,6 +54,9 @@ + __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) + { + atomic_set(&lock->count, 1); ++#ifdef CONFIG_CHOPSTIX ++ lock->owner=NULL; ++#endif + spin_lock_init(&lock->wait_lock); + INIT_LIST_HEAD(&lock->wait_list); + +@@ -88,6 +102,7 @@ + * The locking fastpath is the 1->0 transition from + * 'unlocked' into 'locked' state. + */ ++ + __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); } -diff -Nurb linux-2.6.22-580/kernel/sched.c linux-2.6.22-590/kernel/sched.c ---- linux-2.6.22-580/kernel/sched.c 2008-02-27 13:46:40.000000000 -0500 -+++ linux-2.6.22-590/kernel/sched.c 2008-02-27 14:08:26.000000000 -0500 +@@ -168,6 +183,27 @@ + } + __set_task_state(task, state); + ++#ifdef CONFIG_CHOPSTIX ++ if (rec_event) { ++ if (lock->owner) { ++ struct event event; ++ struct event_spec espec; ++ struct task_struct *p = lock->owner->task; ++ /*spin_lock(&p->alloc_lock);*/ ++ espec.reason = 0; /* lock */ ++ event.event_data=&espec; ++ event.task = p; ++ espec.pc=lock; ++ event.event_type=5; ++ (*rec_event)(&event, 1); ++ /*spin_unlock(&p->alloc_lock);*/ ++ ++ } ++ else ++ BUG(); ++ } ++#endif ++ + /* didnt get the lock, go to sleep: */ + spin_unlock_mutex(&lock->wait_lock, flags); + schedule(); +@@ -177,6 +213,9 @@ + /* got the lock - rejoice! */ + mutex_remove_waiter(lock, &waiter, task_thread_info(task)); + debug_mutex_set_owner(lock, task_thread_info(task)); ++#ifdef CONFIG_CHOPSTIX ++ lock->owner = task_thread_info(task); ++#endif + + /* set it to 0 if there are no waiters left: */ + if (likely(list_empty(&lock->wait_list))) +@@ -202,6 +241,7 @@ + mutex_lock_nested(struct mutex *lock, unsigned int subclass) + { + might_sleep(); ++ + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); + } + +@@ -211,6 +251,7 @@ + mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) + { + might_sleep(); ++ + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass); + } + +@@ -246,6 +287,23 @@ + + debug_mutex_wake_waiter(lock, waiter); + ++#ifdef CONFIG_CHOPSTIX ++ if (rec_event) { ++ if (lock->owner) { ++ struct event event; ++ struct event_spec espec; ++ ++ espec.reason = 1; /* unlock */ ++ event.event_data=&espec; ++ event.task = lock->owner->task; ++ espec.pc=lock; ++ event.event_type=5; ++ (*rec_event)(&event, 1); ++ } ++ else ++ BUG(); ++ } ++#endif + wake_up_process(waiter->task); + } + +--- linux-2.6.22-590/kernel/sched.c 2009-03-16 20:49:42.000000000 -0400 ++++ linux-2.6.22-591/kernel/sched.c 2009-03-16 20:58:59.000000000 -0400 @@ -10,7 +10,7 @@ * 1998-11-19 Implemented schedule_timeout() and related stuff * by Andrea Arcangeli @@ -196,60 +535,76 @@ diff -Nurb linux-2.6.22-580/kernel/sched.c linux-2.6.22-590/kernel/sched.c * an array-switch method of distributing timeslices * and per-CPU runqueues. Cleanups and useful suggestions * by Davide Libenzi, preemptible kernel bits by Robert Love. -@@ -56,6 +56,7 @@ - - #include - #include +@@ -23,6 +23,7 @@ + #include + #include + #include +#include + #include + #include + #include +@@ -59,6 +60,9 @@ #include #include -@@ -3608,6 +3609,8 @@ ++#define INTERRUPTIBLE -1 ++#define RUNNING 0 ++ + /* + * Scheduler clock - returns current time in nanosec units. + * This is default implementation. +@@ -431,6 +435,7 @@ + + repeat_lock_task: + rq = task_rq(p); ++ + spin_lock(&rq->lock); + if (unlikely(rq != task_rq(p))) { + spin_unlock(&rq->lock); +@@ -1741,6 +1746,21 @@ + * event cannot wake it up and insert it on the runqueue either. + */ + p->state = TASK_RUNNING; ++#ifdef CONFIG_CHOPSTIX ++ /* The jiffy of last interruption */ ++ if (p->state & TASK_UNINTERRUPTIBLE) { ++ p->last_interrupted=jiffies; ++ } ++ else ++ if (p->state & TASK_INTERRUPTIBLE) { ++ p->last_interrupted=INTERRUPTIBLE; ++ } ++ else ++ p->last_interrupted=RUNNING; ++ ++ /* The jiffy of last execution */ ++ p->last_ran_j=jiffies; ++#endif + + /* + * Make sure we do not leak PI boosting priority to the child: +@@ -3608,6 +3628,7 @@ #endif -+extern void (*rec_event)(void *,unsigned int); + static inline int interactive_sleep(enum sleep_type sleep_type) { return (sleep_type == SLEEP_INTERACTIVE || -@@ -3617,16 +3620,51 @@ +@@ -3617,16 +3638,28 @@ /* * schedule() is the main scheduler function. */ + ++#ifdef CONFIG_CHOPSTIX ++extern void (*rec_event)(void *,unsigned int); +struct event_spec { + unsigned long pc; + unsigned long dcookie; -+ unsigned count; -+ unsigned char reason; ++ unsigned int count; ++ unsigned int reason; +}; -+ -+#define top_esp (THREAD_SIZE - sizeof(unsigned long)) -+#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) -+ -+static inline unsigned long my_get_wchan(struct task_struct *p) -+{ -+ unsigned long ebp, esp, eip; -+ unsigned long stack_page; -+ int count = 0; -+ stack_page = (unsigned long)task_stack_page(p); -+ esp = p->thread.esp; -+ if (!stack_page || esp < stack_page || esp > top_esp+stack_page) -+ return 0; -+ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ -+ ebp = *(unsigned long *) esp; -+ do { -+ if (ebp < stack_page || ebp > top_ebp+stack_page) -+ return 0; -+ eip = *(unsigned long *) (ebp+4); -+ if (!in_sched_functions(eip)) -+ return eip; -+ ebp = *(unsigned long *) ebp; -+ } while (count++ < 16); -+ return 0; -+} -+/* CHOPSTIX */ ++#endif + asmlinkage void __sched schedule(void) { @@ -266,7 +621,7 @@ diff -Nurb linux-2.6.22-580/kernel/sched.c linux-2.6.22-590/kernel/sched.c /* * Test if we are atomic. Since do_exit() needs to call into -@@ -3680,6 +3718,7 @@ +@@ -3680,6 +3713,7 @@ switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { switch_count = &prev->nvcsw; @@ -274,88 +629,310 @@ diff -Nurb linux-2.6.22-580/kernel/sched.c linux-2.6.22-590/kernel/sched.c if (unlikely((prev->state & TASK_INTERRUPTIBLE) && unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; -@@ -3689,6 +3728,9 @@ +@@ -3689,6 +3723,17 @@ vx_uninterruptible_inc(prev); } deactivate_task(prev, rq); -+ if (prev->state & TASK_INTERRUPTIBLE) { ++#ifdef CONFIG_CHOPSTIX ++ /* An uninterruptible process just yielded. Record the current jiffie */ ++ if (prev->state & TASK_UNINTERRUPTIBLE) { + prev->last_interrupted=jiffies; + } ++ /* An interruptible process just yielded, or it got preempted. ++ * Mark it as interruptible */ ++ else if (prev->state & TASK_INTERRUPTIBLE) { ++ prev->last_interrupted=INTERRUPTIBLE; ++ } ++#endif } } -@@ -3763,8 +3805,44 @@ - prev->sleep_avg -= run_time; - if ((long)prev->sleep_avg <= 0) +@@ -3765,6 +3810,40 @@ prev->sleep_avg = 0; -+ prev->timestamp = prev->last_ran = now; -+ /* CHOPSTIX */ -+ -+ prev->last_ran_j = jiffies; -+ if (next->last_interrupted) { -+ diff = (jiffies-next->last_interrupted); -+ next->last_interrupted = 0; -+ sampling_reason = 0; -+ } -+ else { -+ diff = jiffies-next->last_ran_j; -+ sampling_reason = 1; -+ } -+ -+ if (rec_event && (diff>HZ/5)) { -+ struct event event; -+ struct event_spec espec; -+ unsigned long eip; -+ unsigned int state = next->state; -+ -+ espec.reason = sampling_reason; ++#ifdef CONFIG_CHOPSTIX ++ /* Run only if the Chopstix module so decrees it */ ++ if (rec_event) { ++ prev->last_ran_j = jiffies; ++ if (next->last_interrupted!=INTERRUPTIBLE) { ++ if (next->last_interrupted!=RUNNING) { ++ diff = (jiffies-next->last_interrupted); ++ sampling_reason = 0;/* BLOCKING */ ++ } ++ else { ++ diff = jiffies-next->last_ran_j; ++ sampling_reason = 1;/* PREEMPTION */ ++ } + -+ next->state = 0; -+ eip = next->thread.esp; -+ next->state = state; ++ if (diff >= HZ/10) { ++ struct event event; ++ struct event_spec espec; ++ struct pt_regs *regs; ++ regs = task_pt_regs(current); + -+ next->last_interrupted = 0; -+ event.event_data=&espec; -+ event.task=next; -+ espec.pc=eip; -+ event.event_type=2; -+ /* index in the event array currently set up */ -+ /* make sure the counters are loaded in the order we want them to show up*/ -+ (*rec_event)(&event, diff); ++ espec.reason = sampling_reason; ++ event.event_data=&espec; ++ event.task=next; ++ espec.pc=regs->eip; ++ event.event_type=2; ++ /* index in the event array currently set up */ ++ /* make sure the counters are loaded in the order we want them to show up*/ ++ (*rec_event)(&event, diff); ++ } ++ } ++ /* next has been elected to run */ ++ next->last_interrupted=0; + } -+ ++#endif sched_info_switch(prev, next); if (likely(prev != next)) { next->timestamp = next->last_ran = now; -@@ -7275,3 +7353,7 @@ +@@ -4664,6 +4743,7 @@ + get_task_struct(p); + read_unlock(&tasklist_lock); + ++ + retval = -EPERM; + if ((current->euid != p->euid) && (current->euid != p->uid) && + !capable(CAP_SYS_NICE)) +@@ -5032,6 +5112,7 @@ + jiffies_to_timespec(p->policy == SCHED_FIFO ? + 0 : task_timeslice(p), &t); + read_unlock(&tasklist_lock); ++ + retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; + out_nounlock: + return retval; +@@ -7275,3 +7356,20 @@ } #endif + -+void (*rec_event)(void *,unsigned int); ++#ifdef CONFIG_CHOPSTIX ++void (*rec_event)(void *,unsigned int) = NULL; ++ ++/* To support safe calling from asm */ ++asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) { ++ struct pt_regs *regs; ++ struct event_spec *es = event_signature_in->event_data; ++ regs = task_pt_regs(current); ++ event_signature_in->task=current; ++ es->pc=regs->eip; ++ event_signature_in->count=1; ++ (*rec_event)(event_signature_in, count); ++} +EXPORT_SYMBOL(rec_event); +EXPORT_SYMBOL(in_sched_functions); -diff -Nurb linux-2.6.22-580/kernel/sched.c.rej linux-2.6.22-590/kernel/sched.c.rej ---- linux-2.6.22-580/kernel/sched.c.rej 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.22-590/kernel/sched.c.rej 2008-02-27 13:48:29.000000000 -0500 -@@ -0,0 +1,18 @@ -+*************** -+*** 56,61 **** -+ -+ #include -+ #include -+ -+ /* -+ * Scheduler clock - returns current time in nanosec units. -+--- 56,64 ---- -+ -+ #include -+ #include -++ #include -++ -++ ++#endif +--- linux-2.6.22-590/mm/memory.c 2009-03-16 20:49:42.000000000 -0400 ++++ linux-2.6.22-591/mm/memory.c 2009-03-16 20:58:59.000000000 -0400 +@@ -59,6 +59,7 @@ + + #include + #include ++#include + + #ifndef CONFIG_NEED_MULTIPLE_NODES + /* use the per-pgdat data instead for discontigmem - mbligh */ +@@ -2601,6 +2602,15 @@ + return ret; + } + ++extern void (*rec_event)(void *,unsigned int); ++struct event_spec { ++ unsigned long pc; ++ unsigned long dcookie; ++ unsigned count; ++ unsigned char reason; ++}; ++ ++ + /* + * By the time we get here, we already hold the mm semaphore + */ +@@ -2630,6 +2640,24 @@ + if (!pte) + return VM_FAULT_OOM; + ++#ifdef CONFIG_CHOPSTIX ++ if (rec_event) { ++ struct event event; ++ struct event_spec espec; ++ struct pt_regs *regs; ++ unsigned int pc; ++ regs = task_pt_regs(current); ++ pc = regs->eip & (unsigned int) ~4095; ++ ++ espec.reason = 0; /* alloc */ ++ event.event_data=&espec; ++ event.task = current; ++ espec.pc=pc; ++ event.event_type=5; ++ (*rec_event)(&event, 1); ++ } ++#endif ++ + return handle_pte_fault(mm, vma, address, pte, pmd, write_access); + } + +--- linux-2.6.22-590/mm/slab.c 2009-03-16 20:49:42.000000000 -0400 ++++ linux-2.6.22-591/mm/slab.c 2009-03-16 21:00:27.000000000 -0400 +@@ -110,11 +110,13 @@ + #include + #include + #include ++#include + + #include + #include + #include + ++ + /* + * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. + * 0 for faster, smaller code (especially in the critical paths). +@@ -249,6 +251,14 @@ + void *addr; + }; + ++extern void (*rec_event)(void *,unsigned int); ++struct event_spec { ++ unsigned long pc; ++ unsigned long dcookie; ++ unsigned count; ++ unsigned char reason; ++}; ++ + /* + * struct array_cache + * +@@ -3443,6 +3453,19 @@ + local_irq_restore(save_flags); + objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); + prefetchw(objp); ++#ifdef CONFIG_CHOPSTIX ++ if (rec_event && objp) { ++ struct event event; ++ struct event_spec espec; ++ ++ espec.reason = 0; /* alloc */ ++ event.event_data=&espec; ++ event.task = current; ++ espec.pc=caller; ++ event.event_type=5; ++ (*rec_event)(&event, cachep->buffer_size); ++ } ++#endif + + return objp; + } +@@ -3549,12 +3572,26 @@ + * Release an obj back to its cache. If the obj has a constructed state, it must + * be in this state _before_ it is released. Called with disabled ints. + */ +-static inline void __cache_free(struct kmem_cache *cachep, void *objp) ++static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller) + { + struct array_cache *ac = cpu_cache_get(cachep); + + check_irq_off(); +- objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); ++ objp = cache_free_debugcheck(cachep, objp, caller); ++ #ifdef CONFIG_CHOPSTIX ++ if (rec_event && objp) { ++ struct event event; ++ struct event_spec espec; + -+ /* -+ * Scheduler clock - returns current time in nanosec units. ++ espec.reason = 1; /* free */ ++ event.event_data=&espec; ++ event.task = current; ++ espec.pc=caller; ++ event.event_type=4; ++ (*rec_event)(&event, cachep->buffer_size); ++ } ++ #endif ++ + vx_slab_free(cachep); + + if (cache_free_alien(cachep, objp)) +@@ -3651,16 +3688,19 @@ + __builtin_return_address(0)); + } + EXPORT_SYMBOL(kmem_cache_alloc_node); +- + static __always_inline void * + __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) + { + struct kmem_cache *cachep; ++ void *ret; ++ + + cachep = kmem_find_general_cachep(size, flags); + if (unlikely(cachep == NULL)) + return NULL; +- return kmem_cache_alloc_node(cachep, flags, node); ++ ret = kmem_cache_alloc_node(cachep, flags, node); ++ ++ return ret; + } + + #ifdef CONFIG_DEBUG_SLAB +@@ -3696,6 +3736,7 @@ + void *caller) + { + struct kmem_cache *cachep; ++ void *ret; + + /* If you want to save a few bytes .text space: replace + * __ with kmem_. +@@ -3705,9 +3746,10 @@ + cachep = __find_general_cachep(size, flags); + if (unlikely(cachep == NULL)) + return NULL; +- return __cache_alloc(cachep, flags, caller); +-} ++ ret = __cache_alloc(cachep, flags, caller); + ++ return ret; ++} + + #ifdef CONFIG_DEBUG_SLAB + void *__kmalloc(size_t size, gfp_t flags) +@@ -3723,10 +3765,17 @@ + EXPORT_SYMBOL(__kmalloc_track_caller); + + #else ++#ifdef CONFIG_CHOPSTIX ++void *__kmalloc(size_t size, gfp_t flags) ++{ ++ return __do_kmalloc(size, flags, __builtin_return_address(0)); ++} ++#else + void *__kmalloc(size_t size, gfp_t flags) + { + return __do_kmalloc(size, flags, NULL); + } ++#endif + EXPORT_SYMBOL(__kmalloc); + #endif + +@@ -3792,7 +3841,7 @@ + + local_irq_save(flags); + debug_check_no_locks_freed(objp, obj_size(cachep)); +- __cache_free(cachep, objp); ++ __cache_free(cachep, objp,__builtin_return_address(0)); + local_irq_restore(flags); + } + EXPORT_SYMBOL(kmem_cache_free); +@@ -3817,7 +3866,7 @@ + kfree_debugcheck(objp); + c = virt_to_cache(objp); + debug_check_no_locks_freed(objp, obj_size(c)); +- __cache_free(c, (void *)objp); ++ __cache_free(c, (void *)objp,__builtin_return_address(0)); + local_irq_restore(flags); + } + EXPORT_SYMBOL(kfree); +