1 diff --git a/arch/Kconfig b/arch/Kconfig
2 index 4e312ff..ef6a721 100644
5 @@ -43,6 +43,14 @@ config OPROFILE_EVENT_MULTIPLEX
10 + bool "Chopstix (PlanetLab)"
11 + depends on MODULES && OPROFILE
13 + Chopstix allows you to monitor various events by summarizing them
14 + in lossy data structures and transferring these data structures
15 + into user space. If in doubt, say "N".
20 diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
21 index dfdbf64..29c79b8 100644
22 --- a/arch/x86/kernel/asm-offsets_32.c
23 +++ b/arch/x86/kernel/asm-offsets_32.c
25 #include <linux/signal.h>
26 #include <linux/personality.h>
27 #include <linux/suspend.h>
28 +#include <linux/arrays.h>
29 #include <linux/kbuild.h>
30 #include <asm/ucontext.h>
31 #include <asm/sigframe.h>
33 #include <linux/lguest.h>
34 #include "../../../drivers/lguest/lg.h"
36 +#ifdef CONFIG_CHOPSTIX
37 +#define STACKOFFSET(sym, str, mem) \
38 + DEFINE(sym, offsetof(struct str, mem)-sizeof(struct str));
42 + unsigned long dcookie;
44 + unsigned int number;
48 /* workaround for a warning with -Wmissing-prototypes */
51 @@ -51,6 +64,18 @@ void foo(void)
52 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
55 +#ifdef CONFIG_CHOPSTIX
56 + STACKOFFSET(TASK_thread, task_struct, thread);
57 + STACKOFFSET(THREAD_esp, thread_struct, sp);
58 + STACKOFFSET(EVENT_event_data, event, event_data);
59 + STACKOFFSET(EVENT_task, event, task);
60 + STACKOFFSET(EVENT_event_type, event, event_type);
61 + STACKOFFSET(SPEC_number, event_spec, number);
62 + DEFINE(EVENT_SIZE, sizeof(struct event));
63 + DEFINE(SPEC_SIZE, sizeof(struct event_spec));
64 + DEFINE(SPEC_EVENT_SIZE, sizeof(struct event_spec)+sizeof(struct event));
67 OFFSET(TI_task, thread_info, task);
68 OFFSET(TI_exec_domain, thread_info, exec_domain);
69 OFFSET(TI_flags, thread_info, flags);
70 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
71 index c097e7d..8eff053 100644
72 --- a/arch/x86/kernel/entry_32.S
73 +++ b/arch/x86/kernel/entry_32.S
74 @@ -526,6 +526,34 @@ ENTRY(system_call)
75 cmpl $(nr_syscalls), %eax
78 +#ifdef CONFIG_CHOPSTIX
79 + /* Move Chopstix syscall probe here */
80 + /* Save and clobber: eax, ecx, ebp */
85 + subl $SPEC_EVENT_SIZE, %esp
86 + movl rec_event, %ecx
89 + # struct event is first, just below %ebp
90 + movl %eax, (SPEC_number-EVENT_SIZE)(%ebp)
91 + leal -SPEC_EVENT_SIZE(%ebp), %eax
92 + movl %eax, EVENT_event_data(%ebp)
93 + movl $7, EVENT_event_type(%ebp)
94 + movl rec_event, %edx
96 + leal -EVENT_SIZE(%ebp), %eax
100 + addl $SPEC_EVENT_SIZE, %esp
106 call *sys_call_table(,%eax,4)
107 movl %eax,PT_EAX(%esp) # store the return value
109 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
110 index 4302583..85bf9f2 100644
111 --- a/arch/x86/mm/fault.c
112 +++ b/arch/x86/mm/fault.c
113 @@ -62,6 +62,16 @@ static inline int notify_page_fault(struct pt_regs *regs)
117 +#ifdef CONFIG_CHOPSTIX
118 +extern void (*rec_event)(void *,unsigned int);
121 + unsigned long dcookie;
123 + unsigned char reason;
130 diff --git a/block/blk-core.c b/block/blk-core.c
131 index 71da511..1cefcaa 100644
132 --- a/block/blk-core.c
133 +++ b/block/blk-core.c
135 #include <linux/writeback.h>
136 #include <linux/task_io_accounting_ops.h>
137 #include <linux/fault-inject.h>
138 +#include <linux/arrays.h>
140 #define CREATE_TRACE_POINTS
141 #include <trace/events/block.h>
145 +#ifdef CONFIG_CHOPSTIX
146 +extern void (*rec_event)(void *,unsigned int);
149 + unsigned long dcookie;
151 + unsigned char reason;
155 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
156 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
157 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
158 @@ -1478,6 +1489,24 @@ static inline void __generic_make_request(struct bio *bio)
160 trace_block_bio_queue(q, bio);
162 +#ifdef CONFIG_CHOPSTIX
164 + struct event event;
165 + struct event_spec espec;
168 + espec.reason = 0;/*request */
170 + eip = bio->bi_end_io;
171 + event.event_data=&espec;
173 + event.event_type=3;
174 + /* index in the event array currently set up */
175 + /* make sure the counters are loaded in the order we want them to show up*/
176 + (*rec_event)(&event, bio->bi_size);
180 ret = q->make_request_fn(q, bio);
183 diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
184 index a7aae24..9817d91 100644
185 --- a/drivers/oprofile/cpu_buffer.c
186 +++ b/drivers/oprofile/cpu_buffer.c
188 #include <linux/sched.h>
189 #include <linux/oprofile.h>
190 #include <linux/errno.h>
191 +#include <linux/arrays.h>
193 #include "event_buffer.h"
194 #include "cpu_buffer.h"
195 @@ -326,6 +327,17 @@ static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
196 cpu_buf->tracing = 0;
199 +#ifdef CONFIG_CHOPSTIX
203 + unsigned long dcookie;
207 +extern void (*rec_event)(void *,unsigned int);
211 __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
212 unsigned long event, int is_kernel)
213 @@ -360,7 +372,25 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
214 int is_kernel = !user_mode(regs);
215 unsigned long pc = profile_pc(regs);
217 +#ifdef CONFIG_CHOPSTIX
220 + struct event_spec espec;
221 + esig.task = current;
224 + esig.event_data = &espec;
225 + esig.event_type = event; /* index in the event array currently set up */
226 + /* make sure the counters are loaded in the order we want them to show up*/
227 + (*rec_event)(&esig, 1);
230 + __oprofile_add_ext_sample(pc, regs, event, is_kernel);
233 __oprofile_add_ext_sample(pc, regs, event, is_kernel);
239 diff --git a/fs/bio.c b/fs/bio.c
240 index e0c9e71..796767d 100644
244 #include <linux/mempool.h>
245 #include <linux/workqueue.h>
246 #include <scsi/sg.h> /* for struct sg_iovec */
247 +#include <linux/arrays.h>
249 #include <trace/events/block.h>
251 @@ -48,6 +49,7 @@ struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
257 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
258 * IO code that does not need private memory pools.
259 @@ -1398,6 +1400,17 @@ void bio_check_pages_dirty(struct bio *bio)
263 +#ifdef CONFIG_CHOPSTIX
266 + unsigned long dcookie;
268 + unsigned char reason;
271 +extern void (*rec_event)(void *,unsigned int);
275 * bio_endio - end I/O on a bio
277 @@ -1419,6 +1432,24 @@ void bio_endio(struct bio *bio, int error)
278 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
281 +#ifdef CONFIG_CHOPSTIX
283 + struct event event;
284 + struct event_spec espec;
287 + espec.reason = 1;/*response */
289 + eip = bio->bi_end_io;
290 + event.event_data=&espec;
292 + event.event_type=3;
293 + /* index in the event array currently set up */
294 + /* make sure the counters are loaded in the order we want them to show up*/
295 + (*rec_event)(&event, bio->bi_size);
300 bio->bi_end_io(bio, error);
302 diff --git a/fs/exec.c b/fs/exec.c
303 index 0a049b8..c2296b5 100644
307 #include <linux/fdtable.h>
308 #include <linux/mm.h>
309 #include <linux/stat.h>
310 +#include <linux/dcookies.h>
311 #include <linux/fcntl.h>
312 #include <linux/smp_lock.h>
313 #include <linux/swap.h>
314 @@ -673,6 +674,13 @@ struct file *open_exec(const char *name)
318 +#ifdef CONFIG_CHOPSTIX
319 + unsigned long cookie;
320 + extern void (*rec_event)(void *, unsigned int);
321 + if (rec_event && !(file->f_path.dentry->d_flags & DCACHE_COOKIE))
322 + get_dcookie(&file->f_path, &cookie);
328 diff --git a/include/linux/arrays.h b/include/linux/arrays.h
330 index 0000000..7641a3c
332 +++ b/include/linux/arrays.h
334 +#ifndef __ARRAYS_H__
335 +#define __ARRAYS_H__
336 +#include <linux/list.h>
338 +#define SAMPLING_METHOD_DEFAULT 0
339 +#define SAMPLING_METHOD_LOG 1
341 +#define DEFAULT_ARRAY_SIZE 2048
343 +/* Every probe has an array handler */
345 +/* XXX - Optimize this structure */
347 +extern void (*rec_event)(void *,unsigned int);
348 +struct array_handler {
349 + struct list_head link;
350 + unsigned int (*hash_func)(void *);
351 + unsigned int (*sampling_func)(void *,int,void *);
352 + unsigned short size;
353 + unsigned int threshold;
354 + unsigned char **expcount;
355 + unsigned int sampling_method;
356 + unsigned int **arrays;
357 + unsigned int arraysize;
358 + unsigned int num_samples[2];
359 + void **epoch_samples; /* size-sized lists of samples */
360 + unsigned int (*serialize)(void *, void *);
361 + unsigned char code[5];
362 + unsigned int last_threshold;
366 + struct list_head link;
368 + unsigned int count;
369 + unsigned int event_type;
370 + struct task_struct *task;
373 diff --git a/include/linux/mutex.h b/include/linux/mutex.h
374 index 878cab4..6c21914 100644
375 --- a/include/linux/mutex.h
376 +++ b/include/linux/mutex.h
377 @@ -50,7 +50,7 @@ struct mutex {
379 spinlock_t wait_lock;
380 struct list_head wait_list;
381 -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
382 +#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) || defined(CONFIG_CHOPSTIX)
383 struct thread_info *owner;
385 #ifdef CONFIG_DEBUG_MUTEXES
386 diff --git a/include/linux/sched.h b/include/linux/sched.h
387 index c9d3cae..dd62888 100644
388 --- a/include/linux/sched.h
389 +++ b/include/linux/sched.h
390 @@ -1349,6 +1349,11 @@ struct task_struct {
391 cputime_t utime, stime, utimescaled, stimescaled;
393 cputime_t prev_utime, prev_stime;
395 + #ifdef CONFIG_CHOPSTIX
396 + unsigned long last_interrupted, last_ran_j;
399 unsigned long nvcsw, nivcsw; /* context switch counts */
400 struct timespec start_time; /* monotonic time */
401 struct timespec real_start_time; /* boot based time */
402 diff --git a/kernel/mutex.c b/kernel/mutex.c
403 index 947b3ad..ae1dc67 100644
407 #include <linux/spinlock.h>
408 #include <linux/interrupt.h>
409 #include <linux/debug_locks.h>
410 +#include <linux/arrays.h>
412 +#ifdef CONFIG_CHOPSTIX
415 + unsigned long dcookie;
417 + unsigned char reason;
422 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
423 @@ -49,6 +59,9 @@ void
424 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
426 atomic_set(&lock->count, 1);
427 +#ifdef CONFIG_CHOPSTIX
428 + lock->owner = NULL;
430 spin_lock_init(&lock->wait_lock);
431 INIT_LIST_HEAD(&lock->wait_list);
432 mutex_clear_owner(lock);
433 @@ -247,6 +260,25 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
435 __set_task_state(task, state);
437 +#if 0 && CONFIG_CHOPSTIX
440 + struct event event;
441 + struct event_spec espec;
442 + struct task_struct *p = lock->owner->task;
444 + espec.reason = 0; /* lock */
445 + event.event_data = &espec;
448 + event.event_type = 5;
449 + (*rec_event)(&event, 1);
456 /* didnt get the lock, go to sleep: */
457 spin_unlock_mutex(&lock->wait_lock, flags);
458 preempt_enable_no_resched();
459 @@ -261,6 +293,10 @@ done:
460 mutex_remove_waiter(lock, &waiter, current_thread_info());
461 mutex_set_owner(lock);
463 +#ifdef CONFIG_CHOPSTIX
464 + lock->owner = task_thread_info(task);
467 /* set it to 0 if there are no waiters left: */
468 if (likely(list_empty(&lock->wait_list)))
469 atomic_set(&lock->count, 0);
470 @@ -331,6 +367,25 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
472 debug_mutex_wake_waiter(lock, waiter);
474 +#if 0 && CONFIG_CHOPSTIX
477 + struct event event;
478 + struct event_spec espec;
479 + struct task_struct *p = lock->owner->task;
481 + espec.reason = 1; /* unlock */
482 + event.event_data = &espec;
485 + event.event_type = 5;
486 + (*rec_event)(&event, 1);
493 wake_up_process(waiter->task);
496 diff --git a/kernel/sched.c b/kernel/sched.c
497 index 90b63b8..43b728e 100644
501 * 1998-11-19 Implemented schedule_timeout() and related stuff
502 * by Andrea Arcangeli
503 * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
504 - * hybrid priority-list and round-robin design with
505 + * hybrid priority-list and round-robin deventn with
506 * an array-switch method of distributing timeslices
507 * and per-CPU runqueues. Cleanups and useful suggestions
508 * by Davide Libenzi, preemptible kernel bits by Robert Love.
510 #include <linux/ftrace.h>
511 #include <linux/vs_sched.h>
512 #include <linux/vs_cvirt.h>
513 +#include <linux/arrays.h>
516 #include <asm/irq_regs.h>
518 #include "sched_cpupri.h"
520 +#define INTERRUPTIBLE -1
523 #define CREATE_TRACE_POINTS
524 #include <trace/events/sched.h>
526 @@ -2742,6 +2746,10 @@ static void __sched_fork(struct task_struct *p)
527 INIT_HLIST_HEAD(&p->preempt_notifiers);
530 +#ifdef CONFIG_CHOPSTIX
531 + p->last_ran_j = jiffies;
532 + p->last_interrupted = INTERRUPTIBLE;
535 * We mark the process as running here, but have not actually
536 * inserted it onto the runqueue yet. This guarantees that
537 @@ -5659,6 +5667,30 @@ pick_next_task(struct rq *rq)
541 +#ifdef CONFIG_CHOPSTIX
542 +void (*rec_event)(void *,unsigned int) = NULL;
543 +EXPORT_SYMBOL(rec_event);
544 +EXPORT_SYMBOL(in_sched_functions);
548 + unsigned long dcookie;
549 + unsigned int count;
550 + unsigned int reason;
553 +/* To support safe calling from asm */
554 +asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) {
555 + struct pt_regs *regs;
556 + struct event_spec *es = event_signature_in->event_data;
557 + regs = task_pt_regs(current);
558 + event_signature_in->task=current;
560 + event_signature_in->count=1;
561 + (*rec_event)(event_signature_in, count);
566 * schedule() is the main scheduler function.
568 @@ -5706,6 +5738,54 @@ need_resched_nonpreemptible:
569 next = pick_next_task(rq);
571 if (likely(prev != next)) {
573 +#ifdef CONFIG_CHOPSTIX
574 + /* Run only if the Chopstix module so decrees it */
576 + unsigned long diff;
577 + int sampling_reason;
578 + prev->last_ran_j = jiffies;
579 + if (next->last_interrupted!=INTERRUPTIBLE) {
580 + if (next->last_interrupted!=RUNNING) {
581 + diff = (jiffies-next->last_interrupted);
582 + sampling_reason = 0;/* BLOCKING */
585 + diff = jiffies-next->last_ran_j;
586 + sampling_reason = 1;/* PREEMPTION */
589 + if (diff >= HZ/10) {
590 + struct event event;
591 + struct event_spec espec;
592 + struct pt_regs *regs;
593 + regs = task_pt_regs(current);
595 + espec.reason = sampling_reason;
596 + event.event_data=&espec;
599 + event.event_type=2;
600 + /* index in the event array currently set up */
601 + /* make sure the counters are loaded in the order we want them to show up*/
602 + (*rec_event)(&event, diff);
605 + /* next has been elected to run */
606 + next->last_interrupted=0;
608 + /* An uninterruptible process just yielded. Record the current jiffy */
609 + if (prev->state & TASK_UNINTERRUPTIBLE) {
610 + prev->last_interrupted=jiffies;
612 + /* An interruptible process just yielded, or it got preempted.
613 + * Mark it as interruptible */
614 + else if (prev->state & TASK_INTERRUPTIBLE) {
615 + prev->last_interrupted=INTERRUPTIBLE;
620 sched_info_switch(prev, next);
621 perf_event_task_sched_out(prev, next, cpu);
623 diff --git a/mm/memory.c b/mm/memory.c
624 index e828063..6e88fed 100644
628 #include <linux/swapops.h>
629 #include <linux/elf.h>
630 // #include <linux/vs_memory.h>
631 +#include <linux/arrays.h>
634 #include <asm/pgalloc.h>
635 @@ -3070,6 +3071,16 @@ out:
639 +#ifdef CONFIG_CHOPSTIX
640 +extern void (*rec_event)(void *,unsigned int);
643 + unsigned long dcookie;
645 + unsigned char reason;
650 * By the time we get here, we already hold the mm semaphore
652 @@ -3115,6 +3126,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
656 +#ifdef CONFIG_CHOPSTIX
658 + struct event event;
659 + struct event_spec espec;
660 + struct pt_regs *regs;
662 + regs = task_pt_regs(current);
663 + pc = regs->ip & (unsigned int) ~4095;
665 + espec.reason = 0; /* alloc */
666 + event.event_data=&espec;
667 + event.task = current;
669 + event.event_type = 6;
670 + (*rec_event)(&event, 1);
674 return handle_pte_fault(mm, vma, address, pte, pmd, flags);
677 diff --git a/mm/slab.c b/mm/slab.c
678 index ad2828e..5acdf6c 100644
682 #include <linux/fault-inject.h>
683 #include <linux/rtmutex.h>
684 #include <linux/reciprocal_div.h>
685 +#include <linux/arrays.h>
686 #include <linux/debugobjects.h>
687 #include <linux/kmemcheck.h>
689 @@ -252,6 +253,16 @@ struct slab_rcu {
693 +#ifdef CONFIG_CHOPSTIX
694 +extern void (*rec_event)(void *,unsigned int);
697 + unsigned long dcookie;
699 + unsigned char reason;
706 @@ -3400,6 +3411,19 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
707 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
710 +#ifdef CONFIG_CHOPSTIX
711 + if (rec_event && objp) {
712 + struct event event;
713 + struct event_spec espec;
715 + espec.reason = 0; /* alloc */
716 + event.event_data=&espec;
717 + event.task = current;
719 + event.event_type=4;
720 + (*rec_event)(&event, cachep->buffer_size);
725 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
726 @@ -3512,13 +3536,28 @@ free_done:
727 * Release an obj back to its cache. If the obj has a constructed state, it must
728 * be in this state _before_ it is released. Called with disabled ints.
730 -static inline void __cache_free(struct kmem_cache *cachep, void *objp)
731 +static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller)
733 struct array_cache *ac = cpu_cache_get(cachep);
736 kmemleak_free_recursive(objp, cachep->flags);
737 - objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
738 + objp = cache_free_debugcheck(cachep, objp, caller);
740 +#ifdef CONFIG_CHOPSTIX
741 + if (rec_event && objp) {
742 + struct event event;
743 + struct event_spec espec;
745 + espec.reason = 1; /* free */
746 + event.event_data = &espec;
747 + event.task = current;
749 + event.event_type = 4;
750 + (*rec_event)(&event, cachep->buffer_size);
754 vx_slab_free(cachep);
756 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
757 @@ -3720,10 +3759,17 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
758 EXPORT_SYMBOL(__kmalloc_track_caller);
761 +#ifdef CONFIG_CHOPSTIX
762 +void *__kmalloc(size_t size, gfp_t flags)
764 + return __do_kmalloc(size, flags, __builtin_return_address(0));
767 void *__kmalloc(size_t size, gfp_t flags)
769 return __do_kmalloc(size, flags, NULL);
772 EXPORT_SYMBOL(__kmalloc);
775 @@ -3743,7 +3789,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
776 debug_check_no_locks_freed(objp, obj_size(cachep));
777 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
778 debug_check_no_obj_freed(objp, obj_size(cachep));
779 - __cache_free(cachep, objp);
780 + __cache_free(cachep, objp,__builtin_return_address(0));
781 local_irq_restore(flags);
783 trace_kmem_cache_free(_RET_IP_, objp);
784 @@ -3773,7 +3819,7 @@ void kfree(const void *objp)
785 c = virt_to_cache(objp);
786 debug_check_no_locks_freed(objp, obj_size(c));
787 debug_check_no_obj_freed(objp, obj_size(c));
788 - __cache_free(c, (void *)objp);
789 + __cache_free(c, (void *)objp,__builtin_return_address(0));
790 local_irq_restore(flags);
792 EXPORT_SYMBOL(kfree);