diff -Nurb linux-2.6.22-580/arch/i386/Kconfig linux-2.6.22-590/arch/i386/Kconfig --- linux-2.6.22-580/arch/i386/Kconfig 2008-04-30 09:29:26.000000000 -0400 +++ linux-2.6.22-590/arch/i386/Kconfig 2008-04-30 09:29:41.000000000 -0400 @@ -1217,6 +1217,14 @@ source "arch/i386/oprofile/Kconfig" +config CHOPSTIX + bool "Chopstix (PlanetLab)" + depends on MODULES && OPROFILE + help + Chopstix allows you to monitor various events by summarizing them + in lossy data structures and transferring these data structures + into user space. If in doubt, say "N". + config KPROBES bool "Kprobes (EXPERIMENTAL)" depends on KALLSYMS && EXPERIMENTAL && MODULES diff -Nurb linux-2.6.22-580/block/ll_rw_blk.c linux-2.6.22-590/block/ll_rw_blk.c --- linux-2.6.22-580/block/ll_rw_blk.c 2008-04-30 09:29:21.000000000 -0400 +++ linux-2.6.22-590/block/ll_rw_blk.c 2008-04-30 09:29:41.000000000 -0400 @@ -30,6 +30,7 @@ #include #include #include +#include /* * for max sense size @@ -3102,6 +3103,13 @@ #endif /* CONFIG_FAIL_MAKE_REQUEST */ +extern void (*rec_event)(void *,unsigned int); +struct event_spec { + unsigned long pc; + unsigned long dcookie; + unsigned count; + unsigned char reason; +}; /** * generic_make_request: hand a buffer to its device driver for I/O * @bio: The bio describing the location in memory and on the device. @@ -3220,7 +3228,23 @@ goto end_io; } } - +#ifdef CONFIG_CHOPSTIX + if (rec_event) { + struct event event; + struct event_spec espec; + unsigned long eip; + + espec.reason = 0;/*request */ + + eip = bio->bi_end_io; + event.event_data=&espec; + espec.pc=eip; + event.event_type=3; + /* index in the event array currently set up */ + /* make sure the counters are loaded in the order we want them to show up*/ + (*rec_event)(&event, bio->bi_size); + } +#endif ret = q->make_request_fn(q, bio); } while (ret); } diff -Nurb linux-2.6.22-580/drivers/oprofile/cpu_buffer.c linux-2.6.22-590/drivers/oprofile/cpu_buffer.c --- linux-2.6.22-580/drivers/oprofile/cpu_buffer.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-590/drivers/oprofile/cpu_buffer.c 2008-04-30 09:29:41.000000000 -0400 @@ -21,6 +21,7 @@ #include #include #include +#include #include "event_buffer.h" #include "cpu_buffer.h" @@ -143,6 +144,17 @@ b->head_pos = 0; } +#ifdef CONFIG_CHOPSTIX + +struct event_spec { + unsigned int pc; + unsigned long dcookie; + unsigned count; +}; + +extern void (*rec_event)(void *,unsigned int); +#endif + static inline void add_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, unsigned long event) @@ -151,6 +163,7 @@ entry->eip = pc; entry->event = event; increment_head(cpu_buf); + } static inline void @@ -241,8 +254,28 @@ { int is_kernel = !user_mode(regs); unsigned long pc = profile_pc(regs); + int res=0; +#ifdef CONFIG_CHOPSTIX + if (rec_event) { + struct event esig; + struct event_spec espec; + esig.task = current; + espec.pc=pc; + espec.count=1; + esig.event_data=&espec; + esig.event_type=event; /* index in the event array currently set up */ + /* make sure the counters are loaded in the order we want them to show up*/ + (*rec_event)(&esig, 1); + } + else { oprofile_add_ext_sample(pc, regs, event, is_kernel); + } +#else + oprofile_add_ext_sample(pc, regs, event, is_kernel); +#endif + + } void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) diff -Nurb linux-2.6.22-580/fs/bio.c linux-2.6.22-590/fs/bio.c --- linux-2.6.22-580/fs/bio.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-590/fs/bio.c 2008-04-30 09:29:41.000000000 -0400 @@ -27,6 +27,7 @@ #include #include #include /* for struct sg_iovec */ +#include #define BIO_POOL_SIZE 2 @@ -47,6 +48,7 @@ struct kmem_cache *slab; }; + /* * if you change this list, also change bvec_alloc or things will * break badly! cannot be bigger than what you can fit into an @@ -999,6 +1001,14 @@ } } +struct event_spec { + unsigned long pc; + unsigned long dcookie; + unsigned count; + unsigned char reason; +}; + +extern void (*rec_event)(void *,unsigned int); /** * bio_endio - end I/O on a bio * @bio: bio @@ -1028,6 +1038,24 @@ bio->bi_size -= bytes_done; bio->bi_sector += (bytes_done >> 9); +#ifdef CONFIG_CHOPSTIX + if (rec_event) { + struct event event; + struct event_spec espec; + unsigned long eip; + + espec.reason = 1;/*response */ + + eip = bio->bi_end_io; + event.event_data=&espec; + espec.pc=eip; + event.event_type=3; + /* index in the event array currently set up */ + /* make sure the counters are loaded in the order we want them to show up*/ + (*rec_event)(&event, bytes_done); + } +#endif + if (bio->bi_end_io) bio->bi_end_io(bio, bytes_done, error); } diff -Nurb linux-2.6.22-580/fs/exec.c linux-2.6.22-590/fs/exec.c --- linux-2.6.22-580/fs/exec.c 2008-04-30 09:29:26.000000000 -0400 +++ linux-2.6.22-590/fs/exec.c 2008-04-30 09:29:41.000000000 -0400 @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -488,6 +489,12 @@ if (!err) { struct inode *inode = nd.dentry->d_inode; +#ifdef CONFIG_CHOPSTIX + unsigned long cookie; + if (!nd.dentry->d_cookie) + get_dcookie(nd.dentry, nd.mnt, &cookie); +#endif + file = ERR_PTR(-EACCES); if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && S_ISREG(inode->i_mode)) { diff -Nurb linux-2.6.22-580/include/linux/arrays.h linux-2.6.22-590/include/linux/arrays.h --- linux-2.6.22-580/include/linux/arrays.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-590/include/linux/arrays.h 2008-04-30 09:29:41.000000000 -0400 @@ -0,0 +1,36 @@ +#ifndef __ARRAYS_H__ +#define __ARRAYS_H__ +#include + +#define SAMPLING_METHOD_DEFAULT 0 +#define SAMPLING_METHOD_LOG 1 + +/* Every probe has an array handler */ + +/* XXX - Optimize this structure */ + +extern void (*rec_event)(void *,unsigned int); +struct array_handler { + struct list_head link; + unsigned int (*hash_func)(void *); + unsigned int (*sampling_func)(void *,int,void *); + unsigned short size; + unsigned int threshold; + unsigned char **expcount; + unsigned int sampling_method; + unsigned int **arrays; + unsigned int arraysize; + unsigned int num_samples[2]; + void **epoch_samples; /* size-sized lists of samples */ + unsigned int (*serialize)(void *, void *); + unsigned char code[5]; +}; + +struct event { + struct list_head link; + void *event_data; + unsigned int count; + unsigned int event_type; + struct task_struct *task; +}; +#endif diff -Nurb linux-2.6.22-580/include/linux/mutex.h linux-2.6.22-590/include/linux/mutex.h --- linux-2.6.22-580/include/linux/mutex.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-590/include/linux/mutex.h 2008-04-30 09:45:43.000000000 -0400 @@ -53,6 +53,10 @@ struct thread_info *owner; const char *name; void *magic; +#else +#ifdef CONFIG_CHOPSTIX + struct thread_info *owner; +#endif #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; diff -Nurb linux-2.6.22-580/include/linux/sched.h linux-2.6.22-590/include/linux/sched.h --- linux-2.6.22-580/include/linux/sched.h 2008-04-30 09:29:26.000000000 -0400 +++ linux-2.6.22-590/include/linux/sched.h 2008-04-30 09:29:41.000000000 -0400 @@ -850,6 +850,10 @@ #endif unsigned long sleep_avg; unsigned long long timestamp, last_ran; +#ifdef CONFIG_CHOPSTIX + unsigned long last_interrupted, last_ran_j; +#endif + unsigned long long sched_time; /* sched_clock time spent running */ enum sleep_type sleep_type; diff -Nurb linux-2.6.22-580/kernel/mutex.c linux-2.6.22-590/kernel/mutex.c --- linux-2.6.22-580/kernel/mutex.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-590/kernel/mutex.c 2008-04-30 09:29:41.000000000 -0400 @@ -18,6 +18,16 @@ #include #include #include +#include +#undef CONFIG_CHOPSTIX +#ifdef CONFIG_CHOPSTIX +struct event_spec { + unsigned long pc; + unsigned long dcookie; + unsigned count; + unsigned char reason; +}; +#endif /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -43,6 +53,9 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { atomic_set(&lock->count, 1); +#ifdef CONFIG_CHOPSTIX + lock->owner=NULL; +#endif spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); @@ -88,6 +101,7 @@ * The locking fastpath is the 1->0 transition from * 'unlocked' into 'locked' state. */ + __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); } @@ -168,6 +182,27 @@ } __set_task_state(task, state); +#ifdef CONFIG_CHOPSTIX + if (rec_event) { + if (lock->owner) { + struct event event; + struct event_spec espec; + struct task_struct *p = lock->owner->task; + /*spin_lock(&p->alloc_lock);*/ + espec.reason = 0; /* lock */ + event.event_data=&espec; + event.task = p; + espec.pc=lock; + event.event_type=5; + (*rec_event)(&event, 1); + /*spin_unlock(&p->alloc_lock);*/ + + } + else + BUG(); + } +#endif + /* didnt get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); schedule(); @@ -177,6 +212,9 @@ /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); +#ifdef CONFIG_CHOPSTIX + lock->owner = task_thread_info(task); +#endif /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) @@ -202,6 +240,7 @@ mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); } @@ -211,6 +250,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass); } @@ -246,6 +286,23 @@ debug_mutex_wake_waiter(lock, waiter); +#ifdef CONFIG_CHOPSTIX + if (rec_event) { + if (lock->owner) { + struct event event; + struct event_spec espec; + + espec.reason = 1; /* unlock */ + event.event_data=&espec; + event.task = lock->owner->task; + espec.pc=lock; + event.event_type=5; + (*rec_event)(&event, 1); + } + else + BUG(); + } +#endif wake_up_process(waiter->task); } diff -Nurb linux-2.6.22-580/kernel/sched.c linux-2.6.22-590/kernel/sched.c --- linux-2.6.22-580/kernel/sched.c 2008-04-30 09:29:26.000000000 -0400 +++ linux-2.6.22-590/kernel/sched.c 2008-04-30 09:29:41.000000000 -0400 @@ -10,7 +10,7 @@ * 1998-11-19 Implemented schedule_timeout() and related stuff * by Andrea Arcangeli * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar: - * hybrid priority-list and round-robin design with + * hybrid priority-list and round-robin deventn with * an array-switch method of distributing timeslices * and per-CPU runqueues. Cleanups and useful suggestions * by Davide Libenzi, preemptible kernel bits by Robert Love. @@ -56,6 +56,7 @@ #include #include +#include #include #include @@ -431,6 +432,7 @@ repeat_lock_task: rq = task_rq(p); + spin_lock(&rq->lock); if (unlikely(rq != task_rq(p))) { spin_unlock(&rq->lock); @@ -1741,6 +1743,10 @@ * event cannot wake it up and insert it on the runqueue either. */ p->state = TASK_RUNNING; +#ifdef CONFIG_CHOPSTIX + p->last_interrupted=0; + p->last_ran_j=jiffies; +#endif /* * Make sure we do not leak PI boosting priority to the child: @@ -3608,6 +3614,7 @@ #endif + static inline int interactive_sleep(enum sleep_type sleep_type) { return (sleep_type == SLEEP_INTERACTIVE || @@ -3617,16 +3624,28 @@ /* * schedule() is the main scheduler function. */ + +#ifdef CONFIG_CHOPSTIX +extern void (*rec_event)(void *,unsigned int); +struct event_spec { + unsigned long pc; + unsigned long dcookie; + unsigned count; + unsigned char reason; +}; +#endif + asmlinkage void __sched schedule(void) { struct task_struct *prev, *next; struct prio_array *array; struct list_head *queue; unsigned long long now; - unsigned long run_time; + unsigned long run_time, diff; int cpu, idx, new_prio; long *switch_count; struct rq *rq; + int sampling_reason; /* * Test if we are atomic. Since do_exit() needs to call into @@ -3680,6 +3699,7 @@ switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { switch_count = &prev->nvcsw; + if (unlikely((prev->state & TASK_INTERRUPTIBLE) && unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; @@ -3689,6 +3709,14 @@ vx_uninterruptible_inc(prev); } deactivate_task(prev, rq); +#ifdef CONFIG_CHOPSTIX + if (prev->state & TASK_UNINTERRUPTIBLE) { + prev->last_interrupted=jiffies; + } + else if (prev->state & TASK_INTERRUPTIBLE) { + prev->last_interrupted=-1; + } +#endif } } @@ -3765,6 +3793,39 @@ prev->sleep_avg = 0; prev->timestamp = prev->last_ran = now; +#ifdef CONFIG_CHOPSTIX + /* Run only if the Chopstix module so decrees it */ + if (rec_event) { + prev->last_ran_j = jiffies; + if (next->last_interrupted!=-1) { + if (next->last_interrupted) { + diff = (jiffies-next->last_interrupted); + sampling_reason = 0; + } + else { + diff = jiffies-next->last_ran_j; + sampling_reason = 1; + } + + if (diff > HZ/5) { + struct event event; + struct event_spec espec; + unsigned long eip; + + espec.reason = sampling_reason; + eip = next->thread.esp & 4095; + event.event_data=&espec; + event.task=next; + espec.pc=eip; + event.event_type=2; + /* index in the event array currently set up */ + /* make sure the counters are loaded in the order we want them to show up*/ + (*rec_event)(&event, diff); + } + } + next->last_interrupted=0; + } +#endif sched_info_switch(prev, next); if (likely(prev != next)) { next->timestamp = next->last_ran = now; @@ -4664,6 +4725,7 @@ get_task_struct(p); read_unlock(&tasklist_lock); + retval = -EPERM; if ((current->euid != p->euid) && (current->euid != p->uid) && !capable(CAP_SYS_NICE)) @@ -5032,6 +5094,7 @@ jiffies_to_timespec(p->policy == SCHED_FIFO ? 0 : task_timeslice(p), &t); read_unlock(&tasklist_lock); + retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; out_nounlock: return retval; @@ -7275,3 +7338,9 @@ } #endif + +#ifdef CONFIG_CHOPSTIX +void (*rec_event)(void *,unsigned int); +EXPORT_SYMBOL(rec_event); +EXPORT_SYMBOL(in_sched_functions); +#endif diff -Nurb linux-2.6.22-580/mm/slab.c linux-2.6.22-590/mm/slab.c --- linux-2.6.22-580/mm/slab.c 2008-04-30 09:29:26.000000000 -0400 +++ linux-2.6.22-590/mm/slab.c 2008-04-30 09:29:41.000000000 -0400 @@ -110,11 +110,13 @@ #include #include #include +#include #include #include #include + /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. * 0 for faster, smaller code (especially in the critical paths). @@ -249,6 +251,14 @@ void *addr; }; +extern void (*rec_event)(void *,unsigned int); +struct event_spec { + unsigned long pc; + unsigned long dcookie; + unsigned count; + unsigned char reason; +}; + /* * struct array_cache * @@ -3443,6 +3453,19 @@ local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); prefetchw(objp); +#ifdef CONFIG_CHOPSTIX + if (rec_event && objp) { + struct event event; + struct event_spec espec; + + espec.reason = 0; /* alloc */ + event.event_data=&espec; + event.task = current; + espec.pc=caller; + event.event_type=4; + (*rec_event)(&event, cachep->buffer_size); + } +#endif return objp; } @@ -3549,13 +3572,26 @@ * Release an obj back to its cache. If the obj has a constructed state, it must * be in this state _before_ it is released. Called with disabled ints. */ -static inline void __cache_free(struct kmem_cache *cachep, void *objp) +static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller) { struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); - objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); + objp = cache_free_debugcheck(cachep, objp, caller); vx_slab_free(cachep); +#ifdef CONFIG_CHOPSTIX + if (rec_event && objp) { + struct event event; + struct event_spec espec; + + espec.reason = 1; /* free */ + event.event_data=&espec; + event.task = current; + espec.pc=caller; + event.event_type=4; + (*rec_event)(&event, cachep->buffer_size); + } +#endif if (cache_free_alien(cachep, objp)) return; @@ -3651,16 +3687,19 @@ __builtin_return_address(0)); } EXPORT_SYMBOL(kmem_cache_alloc_node); - static __always_inline void * __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) { struct kmem_cache *cachep; + void *ret; + cachep = kmem_find_general_cachep(size, flags); if (unlikely(cachep == NULL)) return NULL; - return kmem_cache_alloc_node(cachep, flags, node); + ret = kmem_cache_alloc_node(cachep, flags, node); + + return ret; } #ifdef CONFIG_DEBUG_SLAB @@ -3696,6 +3735,7 @@ void *caller) { struct kmem_cache *cachep; + void *ret; /* If you want to save a few bytes .text space: replace * __ with kmem_. @@ -3705,9 +3745,10 @@ cachep = __find_general_cachep(size, flags); if (unlikely(cachep == NULL)) return NULL; - return __cache_alloc(cachep, flags, caller); -} + ret = __cache_alloc(cachep, flags, caller); + return ret; +} #ifdef CONFIG_DEBUG_SLAB void *__kmalloc(size_t size, gfp_t flags) @@ -3723,10 +3764,17 @@ EXPORT_SYMBOL(__kmalloc_track_caller); #else +#ifdef CONFIG_CHOPSTIX +void *__kmalloc(size_t size, gfp_t flags) +{ + return __do_kmalloc(size, flags, __builtin_return_address(0)); +} +#else void *__kmalloc(size_t size, gfp_t flags) { return __do_kmalloc(size, flags, NULL); } +#endif EXPORT_SYMBOL(__kmalloc); #endif @@ -3792,7 +3840,7 @@ local_irq_save(flags); debug_check_no_locks_freed(objp, obj_size(cachep)); - __cache_free(cachep, objp); + __cache_free(cachep, objp,__builtin_return_address(0)); local_irq_restore(flags); } EXPORT_SYMBOL(kmem_cache_free); @@ -3817,7 +3865,7 @@ kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); - __cache_free(c, (void *)objp); + __cache_free(c, (void *)objp,__builtin_return_address(0)); local_irq_restore(flags); } EXPORT_SYMBOL(kfree);