1 diff --git a/arch/Kconfig b/arch/Kconfig
2 index 1d07625..7d503e4 100644
5 @@ -13,9 +13,18 @@ config OPROFILE
10 + bool "Chopstix (PlanetLab)"
11 + depends on MODULES && OPROFILE
13 + Chopstix allows you to monitor various events by summarizing them
14 + in lossy data structures and transferring these data structures
15 + into user space. If in doubt, say "N".
23 depends on KALLSYMS && MODULES
24 diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
25 index 6649d09..5508d20 100644
26 --- a/arch/x86/kernel/asm-offsets_32.c
27 +++ b/arch/x86/kernel/asm-offsets_32.c
29 #include <linux/signal.h>
30 #include <linux/personality.h>
31 #include <linux/suspend.h>
32 +#include <linux/arrays.h>
33 #include <linux/kbuild.h>
34 #include <asm/ucontext.h>
37 #include <linux/lguest.h>
38 #include "../../../drivers/lguest/lg.h"
40 +#ifdef CONFIG_CHOPSTIX
41 +#define STACKOFFSET(sym, str, mem) \
42 + DEFINE(sym, offsetof(struct str, mem)-sizeof(struct str));
46 + unsigned long dcookie;
48 + unsigned int number;
52 /* workaround for a warning with -Wmissing-prototypes */
55 @@ -50,6 +63,18 @@ void foo(void)
56 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
59 +#ifdef CONFIG_CHOPSTIX
60 + STACKOFFSET(TASK_thread, task_struct, thread);
61 + STACKOFFSET(THREAD_esp, thread_struct, sp);
62 + STACKOFFSET(EVENT_event_data, event, event_data);
63 + STACKOFFSET(EVENT_task, event, task);
64 + STACKOFFSET(EVENT_event_type, event, event_type);
65 + STACKOFFSET(SPEC_number, event_spec, number);
66 + DEFINE(EVENT_SIZE, sizeof(struct event));
67 + DEFINE(SPEC_SIZE, sizeof(struct event_spec));
68 + DEFINE(SPEC_EVENT_SIZE, sizeof(struct event_spec)+sizeof(struct event));
71 OFFSET(TI_task, thread_info, task);
72 OFFSET(TI_exec_domain, thread_info, exec_domain);
73 OFFSET(TI_flags, thread_info, flags);
74 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
75 index 109792b..92a4f72 100644
76 --- a/arch/x86/kernel/entry_32.S
77 +++ b/arch/x86/kernel/entry_32.S
78 @@ -426,6 +426,34 @@ ENTRY(system_call)
79 cmpl $(nr_syscalls), %eax
82 +#ifdef CONFIG_CHOPSTIX
83 + /* Move Chopstix syscall probe here */
84 + /* Save and clobber: eax, ecx, ebp */
89 + subl $SPEC_EVENT_SIZE, %esp
90 + movl rec_event, %ecx
93 + # struct event is first, just below %ebp
94 + movl %eax, (SPEC_number-EVENT_SIZE)(%ebp)
95 + leal -SPEC_EVENT_SIZE(%ebp), %eax
96 + movl %eax, EVENT_event_data(%ebp)
97 + movl $7, EVENT_event_type(%ebp)
98 + movl rec_event, %edx
100 + leal -EVENT_SIZE(%ebp), %eax
104 + addl $SPEC_EVENT_SIZE, %esp
110 call *sys_call_table(,%eax,4)
111 movl %eax,PT_EAX(%esp) # store the return value
113 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
114 index 3384255..cd535c7 100644
115 --- a/arch/x86/mm/fault.c
116 +++ b/arch/x86/mm/fault.c
117 @@ -79,6 +79,16 @@ static inline int notify_page_fault(struct pt_regs *regs)
121 +#ifdef CONFIG_CHOPSTIX
122 +extern void (*rec_event)(void *,unsigned int);
125 + unsigned long dcookie;
127 + unsigned char reason;
133 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
134 diff --git a/block/blk-core.c b/block/blk-core.c
135 index 2cba5ef..7fc6c2b 100644
136 --- a/block/blk-core.c
137 +++ b/block/blk-core.c
139 #include <linux/cpu.h>
140 #include <linux/blktrace_api.h>
141 #include <linux/fault-inject.h>
142 +#include <linux/arrays.h>
146 +#ifdef CONFIG_CHOPSTIX
147 +extern void (*rec_event)(void *,unsigned int);
150 + unsigned long dcookie;
152 + unsigned char reason;
156 static int __make_request(struct request_queue *q, struct bio *bio);
159 @@ -1414,6 +1425,24 @@ end_io:
163 +#ifdef CONFIG_CHOPSTIX
165 + struct event event;
166 + struct event_spec espec;
169 + espec.reason = 0;/*request */
171 + eip = bio->bi_end_io;
172 + event.event_data=&espec;
174 + event.event_type=3;
175 + /* index in the event array currently set up */
176 + /* make sure the counters are loaded in the order we want them to show up*/
177 + (*rec_event)(&event, bio->bi_size);
181 ret = q->make_request_fn(q, bio);
184 diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
185 index 7ba78e6..ef379fb 100644
186 --- a/drivers/oprofile/cpu_buffer.c
187 +++ b/drivers/oprofile/cpu_buffer.c
189 #include <linux/oprofile.h>
190 #include <linux/vmalloc.h>
191 #include <linux/errno.h>
192 +#include <linux/arrays.h>
194 #include "event_buffer.h"
195 #include "cpu_buffer.h"
196 @@ -147,6 +148,17 @@ static void increment_head(struct oprofile_cpu_buffer * b)
200 +#ifdef CONFIG_CHOPSTIX
204 + unsigned long dcookie;
208 +extern void (*rec_event)(void *,unsigned int);
212 add_sample(struct oprofile_cpu_buffer * cpu_buf,
213 unsigned long pc, unsigned long event)
214 @@ -251,7 +263,24 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
215 int is_kernel = !user_mode(regs);
216 unsigned long pc = profile_pc(regs);
218 +#ifdef CONFIG_CHOPSTIX
221 + struct event_spec espec;
222 + esig.task = current;
225 + esig.event_data=&espec;
226 + esig.event_type=event; /* index in the event array currently set up */
227 + /* make sure the counters are loaded in the order we want them to show up*/
228 + (*rec_event)(&esig, 1);
231 + oprofile_add_ext_sample(pc, regs, event, is_kernel);
234 oprofile_add_ext_sample(pc, regs, event, is_kernel);
238 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
239 diff --git a/fs/bio.c b/fs/bio.c
240 index 3cba7ae..2f16e17 100644
244 #include <linux/workqueue.h>
245 #include <linux/blktrace_api.h>
246 #include <scsi/sg.h> /* for struct sg_iovec */
247 +#include <linux/arrays.h>
249 static struct kmem_cache *bio_slab __read_mostly;
251 @@ -44,6 +45,7 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
257 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
258 * IO code that does not need private memory pools.
259 @@ -1171,6 +1173,17 @@ void bio_check_pages_dirty(struct bio *bio)
263 +#ifdef CONFIG_CHOPSTIX
266 + unsigned long dcookie;
268 + unsigned char reason;
271 +extern void (*rec_event)(void *,unsigned int);
275 * bio_endio - end I/O on a bio
277 @@ -1192,6 +1205,24 @@ void bio_endio(struct bio *bio, int error)
278 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
281 +#ifdef CONFIG_CHOPSTIX
283 + struct event event;
284 + struct event_spec espec;
287 + espec.reason = 1;/*response */
289 + eip = bio->bi_end_io;
290 + event.event_data=&espec;
292 + event.event_type=3;
293 + /* index in the event array currently set up */
294 + /* make sure the counters are loaded in the order we want them to show up*/
295 + (*rec_event)(&event, bio->bi_size);
300 bio->bi_end_io(bio, error);
302 diff --git a/fs/exec.c b/fs/exec.c
303 index e557406..19bc9d8 100644
307 #include <linux/fdtable.h>
308 #include <linux/mm.h>
309 #include <linux/stat.h>
310 +#include <linux/dcookies.h>
311 #include <linux/fcntl.h>
312 #include <linux/smp_lock.h>
313 #include <linux/swap.h>
314 @@ -698,6 +699,13 @@ struct file *open_exec(const char *name)
318 + #ifdef CONFIG_CHOPSTIX
319 + unsigned long cookie;
320 + extern void (*rec_event)(void *, unsigned int);
321 + if (rec_event && !nd.path.dentry->d_cookie)
322 + get_dcookie(&nd.path, &cookie);
328 diff --git a/include/linux/arrays.h b/include/linux/arrays.h
330 index 0000000..7641a3c
332 +++ b/include/linux/arrays.h
334 +#ifndef __ARRAYS_H__
335 +#define __ARRAYS_H__
336 +#include <linux/list.h>
338 +#define SAMPLING_METHOD_DEFAULT 0
339 +#define SAMPLING_METHOD_LOG 1
341 +#define DEFAULT_ARRAY_SIZE 2048
343 +/* Every probe has an array handler */
345 +/* XXX - Optimize this structure */
347 +extern void (*rec_event)(void *,unsigned int);
348 +struct array_handler {
349 + struct list_head link;
350 + unsigned int (*hash_func)(void *);
351 + unsigned int (*sampling_func)(void *,int,void *);
352 + unsigned short size;
353 + unsigned int threshold;
354 + unsigned char **expcount;
355 + unsigned int sampling_method;
356 + unsigned int **arrays;
357 + unsigned int arraysize;
358 + unsigned int num_samples[2];
359 + void **epoch_samples; /* size-sized lists of samples */
360 + unsigned int (*serialize)(void *, void *);
361 + unsigned char code[5];
362 + unsigned int last_threshold;
366 + struct list_head link;
368 + unsigned int count;
369 + unsigned int event_type;
370 + struct task_struct *task;
373 diff --git a/include/linux/mutex.h b/include/linux/mutex.h
374 index bc6da10..a385919 100644
375 --- a/include/linux/mutex.h
376 +++ b/include/linux/mutex.h
377 @@ -55,6 +55,9 @@ struct mutex {
381 +#ifdef CONFIG_CHOPSTIX
382 + struct thread_info *owner;
384 #ifdef CONFIG_DEBUG_LOCK_ALLOC
385 struct lockdep_map dep_map;
387 diff --git a/include/linux/sched.h b/include/linux/sched.h
388 index 891fbda..05ba57f 100644
389 --- a/include/linux/sched.h
390 +++ b/include/linux/sched.h
391 @@ -1134,6 +1134,11 @@ struct task_struct {
392 cputime_t utime, stime, utimescaled, stimescaled;
394 cputime_t prev_utime, prev_stime;
396 + #ifdef CONFIG_CHOPSTIX
397 + unsigned long last_interrupted, last_ran_j;
400 unsigned long nvcsw, nivcsw; /* context switch counts */
401 struct timespec start_time; /* monotonic time */
402 struct timespec real_start_time; /* boot based time */
403 diff --git a/kernel/mutex.c b/kernel/mutex.c
404 index 12c779d..fcc074f 100644
408 #include <linux/spinlock.h>
409 #include <linux/interrupt.h>
410 #include <linux/debug_locks.h>
411 +#include <linux/arrays.h>
413 +#ifdef CONFIG_CHOPSTIX
416 + unsigned long dcookie;
418 + unsigned char reason;
423 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
424 @@ -44,6 +54,9 @@ void
425 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
427 atomic_set(&lock->count, 1);
428 +#ifdef CONFIG_CHOPSTIX
429 + lock->owner = NULL;
431 spin_lock_init(&lock->wait_lock);
432 INIT_LIST_HEAD(&lock->wait_list);
434 @@ -177,6 +190,25 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
436 __set_task_state(task, state);
438 +#if 0 && CONFIG_CHOPSTIX
441 + struct event event;
442 + struct event_spec espec;
443 + struct task_struct *p = lock->owner->task;
445 + espec.reason = 0; /* lock */
446 + event.event_data = &espec;
449 + event.event_type = 5;
450 + (*rec_event)(&event, 1);
457 /* didnt get the lock, go to sleep: */
458 spin_unlock_mutex(&lock->wait_lock, flags);
460 @@ -189,6 +221,10 @@ done:
461 mutex_remove_waiter(lock, &waiter, task_thread_info(task));
462 debug_mutex_set_owner(lock, task_thread_info(task));
464 +#ifdef CONFIG_CHOPSTIX
465 + lock->owner = task_thread_info(task);
468 /* set it to 0 if there are no waiters left: */
469 if (likely(list_empty(&lock->wait_list)))
470 atomic_set(&lock->count, 0);
471 @@ -257,6 +293,25 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
473 debug_mutex_wake_waiter(lock, waiter);
475 +#if 0 && CONFIG_CHOPSTIX
478 + struct event event;
479 + struct event_spec espec;
480 + struct task_struct *p = lock->owner->task;
482 + espec.reason = 1; /* unlock */
483 + event.event_data = &espec;
486 + event.event_type = 5;
487 + (*rec_event)(&event, 1);
494 wake_up_process(waiter->task);
497 diff --git a/kernel/sched.c b/kernel/sched.c
498 index 2d66cdd..347ce2a 100644
502 * 1998-11-19 Implemented schedule_timeout() and related stuff
503 * by Andrea Arcangeli
504 * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
505 - * hybrid priority-list and round-robin design with
506 + * hybrid priority-list and round-robin deventn with
507 * an array-switch method of distributing timeslices
508 * and per-CPU runqueues. Cleanups and useful suggestions
509 * by Davide Libenzi, preemptible kernel bits by Robert Love.
511 #include <linux/ftrace.h>
512 #include <linux/vs_sched.h>
513 #include <linux/vs_cvirt.h>
514 +#include <linux/arrays.h>
517 #include <asm/irq_regs.h>
519 #include "sched_cpupri.h"
521 +#define INTERRUPTIBLE -1
525 * Convert user-nice values [ -20 ... 0 ... 19 ]
526 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
527 @@ -2368,6 +2372,10 @@ static void __sched_fork(struct task_struct *p)
528 INIT_HLIST_HEAD(&p->preempt_notifiers);
531 +#ifdef CONFIG_CHOPSTIX
532 + p->last_ran_j = jiffies;
533 + p->last_interrupted = INTERRUPTIBLE;
536 * We mark the process as running here, but have not actually
537 * inserted it onto the runqueue yet. This guarantees that
538 @@ -4428,6 +4436,30 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
542 +#ifdef CONFIG_CHOPSTIX
543 +void (*rec_event)(void *,unsigned int) = NULL;
544 +EXPORT_SYMBOL(rec_event);
545 +EXPORT_SYMBOL(in_sched_functions);
549 + unsigned long dcookie;
550 + unsigned int count;
551 + unsigned int reason;
554 +/* To support safe calling from asm */
555 +asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) {
556 + struct pt_regs *regs;
557 + struct event_spec *es = event_signature_in->event_data;
558 + regs = task_pt_regs(current);
559 + event_signature_in->task=current;
561 + event_signature_in->count=1;
562 + (*rec_event)(event_signature_in, count);
567 * schedule() is the main scheduler function.
569 @@ -4482,6 +4514,54 @@ need_resched_nonpreemptible:
570 next = pick_next_task(rq, prev);
572 if (likely(prev != next)) {
574 +#ifdef CONFIG_CHOPSTIX
575 + /* Run only if the Chopstix module so decrees it */
577 + unsigned long diff;
578 + int sampling_reason;
579 + prev->last_ran_j = jiffies;
580 + if (next->last_interrupted!=INTERRUPTIBLE) {
581 + if (next->last_interrupted!=RUNNING) {
582 + diff = (jiffies-next->last_interrupted);
583 + sampling_reason = 0;/* BLOCKING */
586 + diff = jiffies-next->last_ran_j;
587 + sampling_reason = 1;/* PREEMPTION */
590 + if (diff >= HZ/10) {
591 + struct event event;
592 + struct event_spec espec;
593 + struct pt_regs *regs;
594 + regs = task_pt_regs(current);
596 + espec.reason = sampling_reason;
597 + event.event_data=&espec;
600 + event.event_type=2;
601 + /* index in the event array currently set up */
602 + /* make sure the counters are loaded in the order we want them to show up*/
603 + (*rec_event)(&event, diff);
606 + /* next has been elected to run */
607 + next->last_interrupted=0;
609 + /* An uninterruptible process just yielded. Record the current jiffy */
610 + if (prev->state & TASK_UNINTERRUPTIBLE) {
611 + prev->last_interrupted=jiffies;
613 + /* An interruptible process just yielded, or it got preempted.
614 + * Mark it as interruptible */
615 + else if (prev->state & TASK_INTERRUPTIBLE) {
616 + prev->last_interrupted=INTERRUPTIBLE;
621 sched_info_switch(prev, next);
624 @@ -5369,6 +5449,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
626 read_unlock(&tasklist_lock);
630 if ((current->euid != p->euid) && (current->euid != p->uid) &&
631 !capable(CAP_SYS_NICE))
632 diff --git a/mm/memory.c b/mm/memory.c
633 index a258b98..1c1a375 100644
638 #include <linux/swapops.h>
639 #include <linux/elf.h>
640 +#include <linux/arrays.h>
642 #include "internal.h"
644 @@ -2753,6 +2754,16 @@ out:
648 +#ifdef CONFIG_CHOPSTIX
649 +extern void (*rec_event)(void *,unsigned int);
652 + unsigned long dcookie;
654 + unsigned char reason;
659 * By the time we get here, we already hold the mm semaphore
661 @@ -2782,6 +2793,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
665 +#ifdef CONFIG_CHOPSTIX
667 + struct event event;
668 + struct event_spec espec;
669 + struct pt_regs *regs;
671 + regs = task_pt_regs(current);
672 + pc = regs->ip & (unsigned int) ~4095;
674 + espec.reason = 0; /* alloc */
675 + event.event_data=&espec;
676 + event.task = current;
678 + event.event_type=6;
679 + (*rec_event)(&event, 1);
683 return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
686 diff --git a/mm/slab.c b/mm/slab.c
687 index 88dd5a5..3486baa 100644
691 #include <linux/fault-inject.h>
692 #include <linux/rtmutex.h>
693 #include <linux/reciprocal_div.h>
694 +#include <linux/arrays.h>
695 #include <linux/debugobjects.h>
697 #include <asm/cacheflush.h>
698 @@ -248,6 +249,16 @@ struct slab_rcu {
702 +#ifdef CONFIG_CHOPSTIX
703 +extern void (*rec_event)(void *,unsigned int);
706 + unsigned long dcookie;
708 + unsigned char reason;
715 @@ -3469,6 +3480,19 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
716 local_irq_restore(save_flags);
717 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
719 +#ifdef CONFIG_CHOPSTIX
720 + if (rec_event && objp) {
721 + struct event event;
722 + struct event_spec espec;
724 + espec.reason = 0; /* alloc */
725 + event.event_data=&espec;
726 + event.task = current;
728 + event.event_type=4;
729 + (*rec_event)(&event, cachep->buffer_size);
733 if (unlikely((flags & __GFP_ZERO) && objp))
734 memset(objp, 0, obj_size(cachep));
735 @@ -3578,12 +3602,26 @@ free_done:
736 * Release an obj back to its cache. If the obj has a constructed state, it must
737 * be in this state _before_ it is released. Called with disabled ints.
739 -static inline void __cache_free(struct kmem_cache *cachep, void *objp)
740 +static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller)
742 struct array_cache *ac = cpu_cache_get(cachep);
745 - objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
746 + objp = cache_free_debugcheck(cachep, objp, caller);
747 + #ifdef CONFIG_CHOPSTIX
748 + if (rec_event && objp) {
749 + struct event event;
750 + struct event_spec espec;
752 + espec.reason = 1; /* free */
753 + event.event_data=&espec;
754 + event.task = current;
756 + event.event_type=4;
757 + (*rec_event)(&event, cachep->buffer_size);
761 vx_slab_free(cachep);
764 @@ -3741,10 +3779,17 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
765 EXPORT_SYMBOL(__kmalloc_track_caller);
768 +#ifdef CONFIG_CHOPSTIX
769 +void *__kmalloc(size_t size, gfp_t flags)
771 + return __do_kmalloc(size, flags, __builtin_return_address(0));
774 void *__kmalloc(size_t size, gfp_t flags)
776 return __do_kmalloc(size, flags, NULL);
779 EXPORT_SYMBOL(__kmalloc);
782 @@ -3764,7 +3809,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
783 debug_check_no_locks_freed(objp, obj_size(cachep));
784 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
785 debug_check_no_obj_freed(objp, obj_size(cachep));
786 - __cache_free(cachep, objp);
787 + __cache_free(cachep, objp,__builtin_return_address(0));
788 local_irq_restore(flags);
790 EXPORT_SYMBOL(kmem_cache_free);
791 @@ -3790,7 +3835,7 @@ void kfree(const void *objp)
792 c = virt_to_cache(objp);
793 debug_check_no_locks_freed(objp, obj_size(c));
794 debug_check_no_obj_freed(objp, obj_size(c));
795 - __cache_free(c, (void *)objp);
796 + __cache_free(c, (void *)objp,__builtin_return_address(0));
797 local_irq_restore(flags);
799 EXPORT_SYMBOL(kfree);