1 Index: linux-2.6.27.y/arch/Kconfig
2 ===================================================================
3 --- linux-2.6.27.y.orig/arch/Kconfig
4 +++ linux-2.6.27.y/arch/Kconfig
5 @@ -13,9 +13,18 @@ config OPROFILE
10 + bool "Chopstix (PlanetLab)"
11 + depends on MODULES && OPROFILE
13 + Chopstix allows you to monitor various events by summarizing them
14 + in lossy data structures and transferring these data structures
15 + into user space. If in doubt, say "N".
23 depends on KALLSYMS && MODULES
24 Index: linux-2.6.27.y/arch/x86/kernel/asm-offsets_32.c
25 ===================================================================
26 --- linux-2.6.27.y.orig/arch/x86/kernel/asm-offsets_32.c
27 +++ linux-2.6.27.y/arch/x86/kernel/asm-offsets_32.c
29 #include <linux/signal.h>
30 #include <linux/personality.h>
31 #include <linux/suspend.h>
32 +#include <linux/arrays.h>
33 #include <linux/kbuild.h>
34 #include <asm/ucontext.h>
37 #include <linux/lguest.h>
38 #include "../../../drivers/lguest/lg.h"
41 +#define STACKOFFSET(sym, str, mem) \
42 + DEFINE(sym, offsetof(struct str, mem)-sizeof(struct str));
44 /* workaround for a warning with -Wmissing-prototypes */
49 + unsigned long dcookie;
51 + unsigned int number;
56 OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
57 @@ -50,6 +62,16 @@ void foo(void)
58 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
61 + STACKOFFSET(TASK_thread, task_struct, thread);
62 + STACKOFFSET(THREAD_esp, thread_struct, sp);
63 + STACKOFFSET(EVENT_event_data, event, event_data);
64 + STACKOFFSET(EVENT_task, event, task);
65 + STACKOFFSET(EVENT_event_type, event, event_type);
66 + STACKOFFSET(SPEC_number, event_spec, number);
67 + DEFINE(EVENT_SIZE, sizeof(struct event));
68 + DEFINE(SPEC_SIZE, sizeof(struct event_spec));
69 + DEFINE(SPEC_EVENT_SIZE, sizeof(struct event_spec)+sizeof(struct event));
71 OFFSET(TI_task, thread_info, task);
72 OFFSET(TI_exec_domain, thread_info, exec_domain);
73 OFFSET(TI_flags, thread_info, flags);
74 Index: linux-2.6.27.y/arch/x86/kernel/entry_32.S
75 ===================================================================
76 --- linux-2.6.27.y.orig/arch/x86/kernel/entry_32.S
77 +++ linux-2.6.27.y/arch/x86/kernel/entry_32.S
78 @@ -426,6 +426,33 @@ ENTRY(system_call)
79 cmpl $(nr_syscalls), %eax
82 + /* Move Chopstix syscall probe here */
83 + /* Save and clobber: eax, ecx, ebp */
88 + subl $SPEC_EVENT_SIZE, %esp
89 + movl rec_event, %ecx
92 + # struct event is first, just below %ebp
93 + movl %eax, (SPEC_number-EVENT_SIZE)(%ebp)
94 + leal -SPEC_EVENT_SIZE(%ebp), %eax
95 + movl %eax, EVENT_event_data(%ebp)
96 + movl $6, EVENT_event_type(%ebp)
97 + movl rec_event, %edx
99 + leal -EVENT_SIZE(%ebp), %eax
103 + addl $SPEC_EVENT_SIZE, %esp
109 call *sys_call_table(,%eax,4)
110 movl %eax,PT_EAX(%esp) # store the return value
112 Index: linux-2.6.27.y/arch/x86/mm/fault.c
113 ===================================================================
114 --- linux-2.6.27.y.orig/arch/x86/mm/fault.c
115 +++ linux-2.6.27.y/arch/x86/mm/fault.c
116 @@ -79,6 +79,15 @@ static inline int notify_page_fault(stru
121 +extern void (*rec_event)(void *,unsigned int);
124 + unsigned long dcookie;
126 + unsigned char reason;
131 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
132 Index: linux-2.6.27.y/drivers/oprofile/cpu_buffer.c
133 ===================================================================
134 --- linux-2.6.27.y.orig/drivers/oprofile/cpu_buffer.c
135 +++ linux-2.6.27.y/drivers/oprofile/cpu_buffer.c
137 #include <linux/oprofile.h>
138 #include <linux/vmalloc.h>
139 #include <linux/errno.h>
140 +#include <linux/arrays.h>
142 #include "event_buffer.h"
143 #include "cpu_buffer.h"
144 @@ -147,6 +148,17 @@ static void increment_head(struct oprofi
148 +#ifdef CONFIG_CHOPSTIX
152 + unsigned long dcookie;
156 +extern void (*rec_event)(void *,unsigned int);
160 add_sample(struct oprofile_cpu_buffer * cpu_buf,
161 unsigned long pc, unsigned long event)
162 @@ -155,6 +167,7 @@ add_sample(struct oprofile_cpu_buffer *
164 entry->event = event;
165 increment_head(cpu_buf);
170 @@ -250,8 +263,28 @@ void oprofile_add_sample(struct pt_regs
172 int is_kernel = !user_mode(regs);
173 unsigned long pc = profile_pc(regs);
176 +#ifdef CONFIG_CHOPSTIX
179 + struct event_spec espec;
180 + esig.task = current;
183 + esig.event_data=&espec;
184 + esig.event_type=event; /* index in the event array currently set up */
185 + /* make sure the counters are loaded in the order we want them to show up*/
186 + (*rec_event)(&esig, 1);
189 oprofile_add_ext_sample(pc, regs, event, is_kernel);
192 + oprofile_add_ext_sample(pc, regs, event, is_kernel);
198 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
199 Index: linux-2.6.27.y/fs/bio.c
200 ===================================================================
201 --- linux-2.6.27.y.orig/fs/bio.c
202 +++ linux-2.6.27.y/fs/bio.c
204 #include <linux/workqueue.h>
205 #include <linux/blktrace_api.h>
206 #include <scsi/sg.h> /* for struct sg_iovec */
207 +#include <linux/arrays.h>
209 static struct kmem_cache *bio_slab __read_mostly;
211 @@ -44,6 +45,7 @@ static struct biovec_slab bvec_slabs[BIO
217 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
218 * IO code that does not need private memory pools.
219 @@ -1171,6 +1173,14 @@ void bio_check_pages_dirty(struct bio *b
225 + unsigned long dcookie;
227 + unsigned char reason;
230 +extern void (*rec_event)(void *,unsigned int);
232 * bio_endio - end I/O on a bio
234 @@ -1192,6 +1202,24 @@ void bio_endio(struct bio *bio, int erro
235 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
240 + struct event event;
241 + struct event_spec espec;
244 + espec.reason = 1;/*response */
246 + eip = bio->bi_end_io;
247 + event.event_data=&espec;
249 + event.event_type=3;
250 + /* index in the event array currently set up */
251 + /* make sure the counters are loaded in the order we want them to show up*/
252 + (*rec_event)(&event, bytes_done);
257 bio->bi_end_io(bio, error);
259 Index: linux-2.6.27.y/fs/exec.c
260 ===================================================================
261 --- linux-2.6.27.y.orig/fs/exec.c
262 +++ linux-2.6.27.y/fs/exec.c
264 #include <linux/fdtable.h>
265 #include <linux/mm.h>
266 #include <linux/stat.h>
267 +#include <linux/dcookies.h>
268 #include <linux/fcntl.h>
269 #include <linux/smp_lock.h>
270 #include <linux/swap.h>
271 @@ -698,6 +699,13 @@ struct file *open_exec(const char *name)
275 + #ifdef CONFIG_CHOPSTIX
276 + unsigned long cookie;
277 + extern void (*rec_event)(void *, unsigned int);
278 + if (rec_event && !nd.path.dentry->d_cookie)
279 + get_dcookie(&nd.path, &cookie);
285 Index: linux-2.6.27.y/include/linux/arrays.h
286 ===================================================================
288 +++ linux-2.6.27.y/include/linux/arrays.h
290 +#ifndef __ARRAYS_H__
291 +#define __ARRAYS_H__
292 +#include <linux/list.h>
294 +#define SAMPLING_METHOD_DEFAULT 0
295 +#define SAMPLING_METHOD_LOG 1
297 +/* Every probe has an array handler */
299 +/* XXX - Optimize this structure */
301 +extern void (*rec_event)(void *,unsigned int);
302 +struct array_handler {
303 + struct list_head link;
304 + unsigned int (*hash_func)(void *);
305 + unsigned int (*sampling_func)(void *,int,void *);
306 + unsigned short size;
307 + unsigned int threshold;
308 + unsigned char **expcount;
309 + unsigned int sampling_method;
310 + unsigned int **arrays;
311 + unsigned int arraysize;
312 + unsigned int num_samples[2];
313 + void **epoch_samples; /* size-sized lists of samples */
314 + unsigned int (*serialize)(void *, void *);
315 + unsigned char code[5];
319 + struct list_head link;
321 + unsigned int count;
322 + unsigned int event_type;
323 + struct task_struct *task;
326 Index: linux-2.6.27.y/include/linux/sched.h.rej
327 ===================================================================
329 +++ linux-2.6.27.y/include/linux/sched.h.rej
334 + unsigned long sleep_avg;
335 + unsigned long long timestamp, last_ran;
336 + unsigned long long sched_time; /* sched_clock time spent running */
337 + enum sleep_type sleep_type;
341 + unsigned long sleep_avg;
342 + unsigned long long timestamp, last_ran;
343 ++ #ifdef CONFIG_CHOPSTIX
344 ++ unsigned long last_interrupted, last_ran_j;
347 + unsigned long long sched_time; /* sched_clock time spent running */
348 + enum sleep_type sleep_type;
350 Index: linux-2.6.27.y/kernel/sched.c
351 ===================================================================
352 --- linux-2.6.27.y.orig/kernel/sched.c
353 +++ linux-2.6.27.y/kernel/sched.c
355 * 1998-11-19 Implemented schedule_timeout() and related stuff
356 * by Andrea Arcangeli
357 * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
358 - * hybrid priority-list and round-robin design with
359 + * hybrid priority-list and round-robin deventn with
360 * an array-switch method of distributing timeslices
361 * and per-CPU runqueues. Cleanups and useful suggestions
362 * by Davide Libenzi, preemptible kernel bits by Robert Love.
364 #include <linux/ftrace.h>
365 #include <linux/vs_sched.h>
366 #include <linux/vs_cvirt.h>
367 +#include <linux/arrays.h>
370 #include <asm/irq_regs.h>
372 #include "sched_cpupri.h"
374 +#define INTERRUPTIBLE -1
378 * Convert user-nice values [ -20 ... 0 ... 19 ]
379 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
380 @@ -4436,6 +4440,29 @@ pick_next_task(struct rq *rq, struct tas
384 +void (*rec_event)(void *,unsigned int) = NULL;
385 +EXPORT_SYMBOL(rec_event);
386 +#ifdef CONFIG_CHOPSTIX
390 + unsigned long dcookie;
391 + unsigned int count;
392 + unsigned int reason;
395 +/* To support safe calling from asm */
396 +asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) {
397 + struct pt_regs *regs;
398 + struct event_spec *es = event_signature_in->event_data;
399 + regs = task_pt_regs(current);
400 + event_signature_in->task=current;
402 + event_signature_in->count=1;
403 + (*rec_event)(event_signature_in, count);
408 * schedule() is the main scheduler function.
410 @@ -5382,6 +5409,7 @@ long sched_setaffinity(pid_t pid, const
412 read_unlock(&tasklist_lock);
416 if ((current->euid != p->euid) && (current->euid != p->uid) &&
417 !capable(CAP_SYS_NICE))
418 Index: linux-2.6.27.y/kernel/sched.c.rej
419 ===================================================================
421 +++ linux-2.6.27.y/kernel/sched.c.rej
425 + #include <linux/nmi.h>
426 + #include <linux/init.h>
427 + #include <asm/uaccess.h>
428 + #include <linux/highmem.h>
429 + #include <linux/smp_lock.h>
430 + #include <asm/mmu_context.h>
432 + #include <linux/nmi.h>
433 + #include <linux/init.h>
434 + #include <asm/uaccess.h>
435 ++ #include <linux/arrays.h>
436 + #include <linux/highmem.h>
437 + #include <linux/smp_lock.h>
438 + #include <asm/mmu_context.h>
444 + spin_lock(&rq->lock);
445 + if (unlikely(rq != task_rq(p))) {
446 + spin_unlock(&rq->lock);
452 + spin_lock(&rq->lock);
453 + if (unlikely(rq != task_rq(p))) {
454 + spin_unlock(&rq->lock);
457 + * event cannot wake it up and insert it on the runqueue either.
459 + p->state = TASK_RUNNING;
462 + * Make sure we do not leak PI boosting priority to the child:
464 + * event cannot wake it up and insert it on the runqueue either.
466 + p->state = TASK_RUNNING;
467 ++ #ifdef CONFIG_CHOPSTIX
468 ++ /* The jiffy of last interruption */
469 ++ if (p->state & TASK_UNINTERRUPTIBLE) {
470 ++ p->last_interrupted=jiffies;
473 ++ if (p->state & TASK_INTERRUPTIBLE) {
474 ++ p->last_interrupted=INTERRUPTIBLE;
477 ++ p->last_interrupted=RUNNING;
479 ++ /* The jiffy of last execution */
480 ++ p->last_ran_j=jiffies;
484 + * Make sure we do not leak PI boosting priority to the child:
490 + static inline int interactive_sleep(enum sleep_type sleep_type)
492 + return (sleep_type == SLEEP_INTERACTIVE ||
498 + static inline int interactive_sleep(enum sleep_type sleep_type)
500 + return (sleep_type == SLEEP_INTERACTIVE ||
504 + * schedule() is the main scheduler function.
506 + asmlinkage void __sched schedule(void)
508 + struct task_struct *prev, *next;
509 + struct prio_array *array;
510 + struct list_head *queue;
511 + unsigned long long now;
512 +- unsigned long run_time;
513 + int cpu, idx, new_prio;
514 + long *switch_count;
518 + * Test if we are atomic. Since do_exit() needs to call into
521 + * schedule() is the main scheduler function.
524 ++ #ifdef CONFIG_CHOPSTIX
525 ++ extern void (*rec_event)(void *,unsigned int);
526 ++ struct event_spec {
528 ++ unsigned long dcookie;
529 ++ unsigned int count;
530 ++ unsigned int reason;
534 + asmlinkage void __sched schedule(void)
536 + struct task_struct *prev, *next;
537 + struct prio_array *array;
538 + struct list_head *queue;
539 + unsigned long long now;
540 ++ unsigned long run_time, diff;
541 + int cpu, idx, new_prio;
542 + long *switch_count;
544 ++ int sampling_reason;
547 + * Test if we are atomic. Since do_exit() needs to call into
550 + switch_count = &prev->nivcsw;
551 + if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
552 + switch_count = &prev->nvcsw;
553 + if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
554 + unlikely(signal_pending(prev))))
555 + prev->state = TASK_RUNNING;
557 + switch_count = &prev->nivcsw;
558 + if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
559 + switch_count = &prev->nvcsw;
561 + if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
562 + unlikely(signal_pending(prev))))
563 + prev->state = TASK_RUNNING;
566 + vx_uninterruptible_inc(prev);
568 + deactivate_task(prev, rq);
573 + vx_uninterruptible_inc(prev);
575 + deactivate_task(prev, rq);
576 ++ #ifdef CONFIG_CHOPSTIX
577 ++ /* An uninterruptible process just yielded. Record the current jiffie */
578 ++ if (prev->state & TASK_UNINTERRUPTIBLE) {
579 ++ prev->last_interrupted=jiffies;
581 ++ /* An interruptible process just yielded, or it got preempted.
582 ++ * Mark it as interruptible */
583 ++ else if (prev->state & TASK_INTERRUPTIBLE) {
584 ++ prev->last_interrupted=INTERRUPTIBLE;
592 + prev->sleep_avg = 0;
593 + prev->timestamp = prev->last_ran = now;
595 + sched_info_switch(prev, next);
596 + if (likely(prev != next)) {
597 + next->timestamp = next->last_ran = now;
599 + prev->sleep_avg = 0;
600 + prev->timestamp = prev->last_ran = now;
602 ++ #ifdef CONFIG_CHOPSTIX
603 ++ /* Run only if the Chopstix module so decrees it */
605 ++ prev->last_ran_j = jiffies;
606 ++ if (next->last_interrupted!=INTERRUPTIBLE) {
607 ++ if (next->last_interrupted!=RUNNING) {
608 ++ diff = (jiffies-next->last_interrupted);
609 ++ sampling_reason = 0;/* BLOCKING */
612 ++ diff = jiffies-next->last_ran_j;
613 ++ sampling_reason = 1;/* PREEMPTION */
616 ++ if (diff >= HZ/10) {
617 ++ struct event event;
618 ++ struct event_spec espec;
619 ++ struct pt_regs *regs;
620 ++ regs = task_pt_regs(current);
622 ++ espec.reason = sampling_reason;
623 ++ event.event_data=&espec;
625 ++ espec.pc=regs->eip;
626 ++ event.event_type=2;
627 ++ /* index in the event array currently set up */
628 ++ /* make sure the counters are loaded in the order we want them to show up*/
629 ++ (*rec_event)(&event, diff);
632 ++ /* next has been elected to run */
633 ++ next->last_interrupted=0;
636 + sched_info_switch(prev, next);
637 + if (likely(prev != next)) {
638 + next->timestamp = next->last_ran = now;
641 + jiffies_to_timespec(p->policy == SCHED_FIFO ?
642 + 0 : task_timeslice(p), &t);
643 + read_unlock(&tasklist_lock);
644 + retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
648 + jiffies_to_timespec(p->policy == SCHED_FIFO ?
649 + 0 : task_timeslice(p), &t);
650 + read_unlock(&tasklist_lock);
652 + retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
665 ++ #ifdef CONFIG_CHOPSTIX
666 ++ void (*rec_event)(void *,unsigned int) = NULL;
668 ++ /* To support safe calling from asm */
669 ++ asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) {
670 ++ struct pt_regs *regs;
671 ++ struct event_spec *es = event_signature_in->event_data;
672 ++ regs = task_pt_regs(current);
673 ++ event_signature_in->task=current;
675 ++ event_signature_in->count=1;
676 ++ (*rec_event)(event_signature_in, count);
678 ++ EXPORT_SYMBOL(rec_event);
679 ++ EXPORT_SYMBOL(in_sched_functions);
681 Index: linux-2.6.27.y/mm/memory.c
682 ===================================================================
683 --- linux-2.6.27.y.orig/mm/memory.c
684 +++ linux-2.6.27.y/mm/memory.c
687 #include <linux/swapops.h>
688 #include <linux/elf.h>
689 +#include <linux/arrays.h>
691 #include "internal.h"
693 @@ -2753,6 +2754,15 @@ out:
697 +extern void (*rec_event)(void *,unsigned int);
700 + unsigned long dcookie;
702 + unsigned char reason;
707 * By the time we get here, we already hold the mm semaphore
709 @@ -2782,6 +2792,24 @@ int handle_mm_fault(struct mm_struct *mm
713 +#ifdef CONFIG_CHOPSTIX
715 + struct event event;
716 + struct event_spec espec;
717 + struct pt_regs *regs;
719 + regs = task_pt_regs(current);
720 + pc = regs->ip & (unsigned int) ~4095;
722 + espec.reason = 0; /* alloc */
723 + event.event_data=&espec;
724 + event.task = current;
726 + event.event_type=5;
727 + (*rec_event)(&event, 1);
731 return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
734 Index: linux-2.6.27.y/mm/slab.c
735 ===================================================================
736 --- linux-2.6.27.y.orig/mm/slab.c
737 +++ linux-2.6.27.y/mm/slab.c
739 #include <linux/fault-inject.h>
740 #include <linux/rtmutex.h>
741 #include <linux/reciprocal_div.h>
742 +#include <linux/arrays.h>
743 #include <linux/debugobjects.h>
745 #include <asm/cacheflush.h>
746 @@ -248,6 +249,14 @@ struct slab_rcu {
750 +extern void (*rec_event)(void *,unsigned int);
753 + unsigned long dcookie;
755 + unsigned char reason;
761 @@ -3469,6 +3478,19 @@ __cache_alloc(struct kmem_cache *cachep,
762 local_irq_restore(save_flags);
763 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
765 +#ifdef CONFIG_CHOPSTIX
766 + if (rec_event && objp) {
767 + struct event event;
768 + struct event_spec espec;
770 + espec.reason = 0; /* alloc */
771 + event.event_data=&espec;
772 + event.task = current;
774 + event.event_type=5;
775 + (*rec_event)(&event, cachep->buffer_size);
779 if (unlikely((flags & __GFP_ZERO) && objp))
780 memset(objp, 0, obj_size(cachep));
781 @@ -3578,12 +3600,26 @@ free_done:
782 * Release an obj back to its cache. If the obj has a constructed state, it must
783 * be in this state _before_ it is released. Called with disabled ints.
785 -static inline void __cache_free(struct kmem_cache *cachep, void *objp)
786 +static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller)
788 struct array_cache *ac = cpu_cache_get(cachep);
791 - objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
792 + objp = cache_free_debugcheck(cachep, objp, caller);
793 + #ifdef CONFIG_CHOPSTIX
794 + if (rec_event && objp) {
795 + struct event event;
796 + struct event_spec espec;
798 + espec.reason = 1; /* free */
799 + event.event_data=&espec;
800 + event.task = current;
802 + event.event_type=4;
803 + (*rec_event)(&event, cachep->buffer_size);
807 vx_slab_free(cachep);
810 @@ -3714,6 +3750,7 @@ static __always_inline void *__do_kmallo
813 struct kmem_cache *cachep;
816 /* If you want to save a few bytes .text space: replace
818 @@ -3741,10 +3778,17 @@ void *__kmalloc_track_caller(size_t size
819 EXPORT_SYMBOL(__kmalloc_track_caller);
822 +#ifdef CONFIG_CHOPSTIX
823 +void *__kmalloc(size_t size, gfp_t flags)
825 + return __do_kmalloc(size, flags, __builtin_return_address(0));
828 void *__kmalloc(size_t size, gfp_t flags)
830 return __do_kmalloc(size, flags, NULL);
833 EXPORT_SYMBOL(__kmalloc);
836 @@ -3764,7 +3808,7 @@ void kmem_cache_free(struct kmem_cache *
837 debug_check_no_locks_freed(objp, obj_size(cachep));
838 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
839 debug_check_no_obj_freed(objp, obj_size(cachep));
840 - __cache_free(cachep, objp);
841 + __cache_free(cachep, objp,__builtin_return_address(0));
842 local_irq_restore(flags);
844 EXPORT_SYMBOL(kmem_cache_free);
845 @@ -3790,7 +3834,7 @@ void kfree(const void *objp)
846 c = virt_to_cache(objp);
847 debug_check_no_locks_freed(objp, obj_size(c));
848 debug_check_no_obj_freed(objp, obj_size(c));
849 - __cache_free(c, (void *)objp);
850 + __cache_free(c, (void *)objp,__builtin_return_address(0));
851 local_irq_restore(flags);
853 EXPORT_SYMBOL(kfree);