1 diff -Nurb linux-2.6.27-590/arch/Kconfig linux-2.6.27-591/arch/Kconfig
2 --- linux-2.6.27-590/arch/Kconfig 2010-01-29 16:29:46.000000000 -0500
3 +++ linux-2.6.27-591/arch/Kconfig 2010-01-29 16:30:22.000000000 -0500
9 + bool "Chopstix (PlanetLab)"
10 + depends on MODULES && OPROFILE
12 + Chopstix allows you to monitor various events by summarizing them
13 + in lossy data structures and transferring these data structures
14 + into user space. If in doubt, say "N".
22 depends on KALLSYMS && MODULES
23 diff -Nurb linux-2.6.27-590/arch/x86/kernel/asm-offsets_32.c linux-2.6.27-591/arch/x86/kernel/asm-offsets_32.c
24 --- linux-2.6.27-590/arch/x86/kernel/asm-offsets_32.c 2008-10-09 18:13:53.000000000 -0400
25 +++ linux-2.6.27-591/arch/x86/kernel/asm-offsets_32.c 2010-01-29 16:45:48.000000000 -0500
27 #include <linux/signal.h>
28 #include <linux/personality.h>
29 #include <linux/suspend.h>
30 +#include <linux/arrays.h>
31 #include <linux/kbuild.h>
32 #include <asm/ucontext.h>
35 #include <linux/lguest.h>
36 #include "../../../drivers/lguest/lg.h"
39 +#define STACKOFFSET(sym, str, mem) \
40 + DEFINE(sym, offsetof(struct str, mem)-sizeof(struct str));
42 /* workaround for a warning with -Wmissing-prototypes */
47 + unsigned long dcookie;
49 + unsigned int number;
54 OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
56 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
59 + STACKOFFSET(TASK_thread, task_struct, thread);
60 + STACKOFFSET(THREAD_esp, thread_struct, sp);
61 + STACKOFFSET(EVENT_event_data, event, event_data);
62 + STACKOFFSET(EVENT_task, event, task);
63 + STACKOFFSET(EVENT_event_type, event, event_type);
64 + STACKOFFSET(SPEC_number, event_spec, number);
65 + DEFINE(EVENT_SIZE, sizeof(struct event));
66 + DEFINE(SPEC_SIZE, sizeof(struct event_spec));
67 + DEFINE(SPEC_EVENT_SIZE, sizeof(struct event_spec)+sizeof(struct event));
69 OFFSET(TI_task, thread_info, task);
70 OFFSET(TI_exec_domain, thread_info, exec_domain);
71 OFFSET(TI_flags, thread_info, flags);
72 diff -Nurb linux-2.6.27-590/arch/x86/kernel/entry_32.S linux-2.6.27-591/arch/x86/kernel/entry_32.S
73 --- linux-2.6.27-590/arch/x86/kernel/entry_32.S 2008-10-09 18:13:53.000000000 -0400
74 +++ linux-2.6.27-591/arch/x86/kernel/entry_32.S 2010-01-29 16:30:22.000000000 -0500
76 cmpl $(nr_syscalls), %eax
79 + /* Move Chopstix syscall probe here */
80 + /* Save and clobber: eax, ecx, ebp */
85 + subl $SPEC_EVENT_SIZE, %esp
86 + movl rec_event, %ecx
89 + # struct event is first, just below %ebp
90 + movl %eax, (SPEC_number-EVENT_SIZE)(%ebp)
91 + leal -SPEC_EVENT_SIZE(%ebp), %eax
92 + movl %eax, EVENT_event_data(%ebp)
93 + movl $6, EVENT_event_type(%ebp)
94 + movl rec_event, %edx
96 + leal -EVENT_SIZE(%ebp), %eax
100 + addl $SPEC_EVENT_SIZE, %esp
106 call *sys_call_table(,%eax,4)
107 movl %eax,PT_EAX(%esp) # store the return value
109 diff -Nurb linux-2.6.27-590/arch/x86/mm/fault.c linux-2.6.27-591/arch/x86/mm/fault.c
110 --- linux-2.6.27-590/arch/x86/mm/fault.c 2010-01-29 16:29:46.000000000 -0500
111 +++ linux-2.6.27-591/arch/x86/mm/fault.c 2010-01-29 16:30:22.000000000 -0500
117 +extern void (*rec_event)(void *,unsigned int);
120 + unsigned long dcookie;
122 + unsigned char reason;
127 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
128 diff -Nurb linux-2.6.27-590/drivers/oprofile/cpu_buffer.c linux-2.6.27-591/drivers/oprofile/cpu_buffer.c
129 --- linux-2.6.27-590/drivers/oprofile/cpu_buffer.c 2008-10-09 18:13:53.000000000 -0400
130 +++ linux-2.6.27-591/drivers/oprofile/cpu_buffer.c 2010-01-29 16:30:22.000000000 -0500
132 #include <linux/oprofile.h>
133 #include <linux/vmalloc.h>
134 #include <linux/errno.h>
135 +#include <linux/arrays.h>
137 #include "event_buffer.h"
138 #include "cpu_buffer.h"
143 +#ifdef CONFIG_CHOPSTIX
147 + unsigned long dcookie;
151 +extern void (*rec_event)(void *,unsigned int);
155 add_sample(struct oprofile_cpu_buffer * cpu_buf,
156 unsigned long pc, unsigned long event)
159 entry->event = event;
160 increment_head(cpu_buf);
167 int is_kernel = !user_mode(regs);
168 unsigned long pc = profile_pc(regs);
171 +#ifdef CONFIG_CHOPSTIX
174 + struct event_spec espec;
175 + esig.task = current;
178 + esig.event_data=&espec;
179 + esig.event_type=event; /* index in the event array currently set up */
180 + /* make sure the counters are loaded in the order we want them to show up*/
181 + (*rec_event)(&esig, 1);
184 oprofile_add_ext_sample(pc, regs, event, is_kernel);
187 + oprofile_add_ext_sample(pc, regs, event, is_kernel);
193 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
194 diff -Nurb linux-2.6.27-590/fs/bio.c linux-2.6.27-591/fs/bio.c
195 --- linux-2.6.27-590/fs/bio.c 2008-10-09 18:13:53.000000000 -0400
196 +++ linux-2.6.27-591/fs/bio.c 2010-01-29 16:30:22.000000000 -0500
198 #include <linux/workqueue.h>
199 #include <linux/blktrace_api.h>
200 #include <scsi/sg.h> /* for struct sg_iovec */
201 +#include <linux/arrays.h>
203 static struct kmem_cache *bio_slab __read_mostly;
211 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
212 * IO code that does not need private memory pools.
213 @@ -1171,6 +1173,14 @@
219 + unsigned long dcookie;
221 + unsigned char reason;
224 +extern void (*rec_event)(void *,unsigned int);
226 * bio_endio - end I/O on a bio
228 @@ -1192,6 +1202,24 @@
229 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
232 +#ifdef CONFIG_CHOPSTIX
234 + struct event event;
235 + struct event_spec espec;
238 + espec.reason = 1;/*response */
240 + eip = bio->bi_end_io;
241 + event.event_data=&espec;
243 + event.event_type=3;
244 + /* index in the event array currently set up */
245 + /* make sure the counters are loaded in the order we want them to show up*/
246 + (*rec_event)(&event, bytes_done);
251 bio->bi_end_io(bio, error);
253 diff -Nurb linux-2.6.27-590/fs/exec.c linux-2.6.27-591/fs/exec.c
254 --- linux-2.6.27-590/fs/exec.c 2010-01-29 16:29:48.000000000 -0500
255 +++ linux-2.6.27-591/fs/exec.c 2010-01-29 16:45:48.000000000 -0500
257 #include <linux/fdtable.h>
258 #include <linux/mm.h>
259 #include <linux/stat.h>
260 +#include <linux/dcookies.h>
261 #include <linux/fcntl.h>
262 #include <linux/smp_lock.h>
263 #include <linux/swap.h>
268 + #ifdef CONFIG_CHOPSTIX
269 + unsigned long cookie;
270 + extern void (*rec_event)(void *, unsigned int);
271 + if (rec_event && !nd.path.dentry->d_cookie)
272 + get_dcookie(nd.path, &cookie);
278 diff -Nurb linux-2.6.27-590/include/linux/arrays.h linux-2.6.27-591/include/linux/arrays.h
279 --- linux-2.6.27-590/include/linux/arrays.h 1969-12-31 19:00:00.000000000 -0500
280 +++ linux-2.6.27-591/include/linux/arrays.h 2010-01-29 16:30:22.000000000 -0500
282 +#ifndef __ARRAYS_H__
283 +#define __ARRAYS_H__
284 +#include <linux/list.h>
286 +#define SAMPLING_METHOD_DEFAULT 0
287 +#define SAMPLING_METHOD_LOG 1
289 +/* Every probe has an array handler */
291 +/* XXX - Optimize this structure */
293 +extern void (*rec_event)(void *,unsigned int);
294 +struct array_handler {
295 + struct list_head link;
296 + unsigned int (*hash_func)(void *);
297 + unsigned int (*sampling_func)(void *,int,void *);
298 + unsigned short size;
299 + unsigned int threshold;
300 + unsigned char **expcount;
301 + unsigned int sampling_method;
302 + unsigned int **arrays;
303 + unsigned int arraysize;
304 + unsigned int num_samples[2];
305 + void **epoch_samples; /* size-sized lists of samples */
306 + unsigned int (*serialize)(void *, void *);
307 + unsigned char code[5];
311 + struct list_head link;
313 + unsigned int count;
314 + unsigned int event_type;
315 + struct task_struct *task;
318 diff -Nurb linux-2.6.27-590/include/linux/sched.h.rej linux-2.6.27-591/include/linux/sched.h.rej
319 --- linux-2.6.27-590/include/linux/sched.h.rej 1969-12-31 19:00:00.000000000 -0500
320 +++ linux-2.6.27-591/include/linux/sched.h.rej 2010-01-29 16:30:22.000000000 -0500
325 + unsigned long sleep_avg;
326 + unsigned long long timestamp, last_ran;
327 + unsigned long long sched_time; /* sched_clock time spent running */
328 + enum sleep_type sleep_type;
332 + unsigned long sleep_avg;
333 + unsigned long long timestamp, last_ran;
334 ++ #ifdef CONFIG_CHOPSTIX
335 ++ unsigned long last_interrupted, last_ran_j;
338 + unsigned long long sched_time; /* sched_clock time spent running */
339 + enum sleep_type sleep_type;
341 diff -Nurb linux-2.6.27-590/kernel/sched.c linux-2.6.27-591/kernel/sched.c
342 --- linux-2.6.27-590/kernel/sched.c 2010-01-29 16:29:48.000000000 -0500
343 +++ linux-2.6.27-591/kernel/sched.c 2010-01-29 16:30:22.000000000 -0500
345 * 1998-11-19 Implemented schedule_timeout() and related stuff
346 * by Andrea Arcangeli
347 * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
348 - * hybrid priority-list and round-robin design with
349 + * hybrid priority-list and round-robin deventn with
350 * an array-switch method of distributing timeslices
351 * and per-CPU runqueues. Cleanups and useful suggestions
352 * by Davide Libenzi, preemptible kernel bits by Robert Love.
355 #include "sched_cpupri.h"
357 +#define INTERRUPTIBLE -1
361 * Convert user-nice values [ -20 ... 0 ... 19 ]
362 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
363 @@ -5369,6 +5372,7 @@
365 read_unlock(&tasklist_lock);
369 if ((current->euid != p->euid) && (current->euid != p->uid) &&
370 !capable(CAP_SYS_NICE))
371 diff -Nurb linux-2.6.27-590/kernel/sched.c.rej linux-2.6.27-591/kernel/sched.c.rej
372 --- linux-2.6.27-590/kernel/sched.c.rej 1969-12-31 19:00:00.000000000 -0500
373 +++ linux-2.6.27-591/kernel/sched.c.rej 2010-01-29 16:30:22.000000000 -0500
377 + #include <linux/nmi.h>
378 + #include <linux/init.h>
379 + #include <asm/uaccess.h>
380 + #include <linux/highmem.h>
381 + #include <linux/smp_lock.h>
382 + #include <asm/mmu_context.h>
384 + #include <linux/nmi.h>
385 + #include <linux/init.h>
386 + #include <asm/uaccess.h>
387 ++ #include <linux/arrays.h>
388 + #include <linux/highmem.h>
389 + #include <linux/smp_lock.h>
390 + #include <asm/mmu_context.h>
396 + spin_lock(&rq->lock);
397 + if (unlikely(rq != task_rq(p))) {
398 + spin_unlock(&rq->lock);
404 + spin_lock(&rq->lock);
405 + if (unlikely(rq != task_rq(p))) {
406 + spin_unlock(&rq->lock);
409 + * event cannot wake it up and insert it on the runqueue either.
411 + p->state = TASK_RUNNING;
414 + * Make sure we do not leak PI boosting priority to the child:
416 + * event cannot wake it up and insert it on the runqueue either.
418 + p->state = TASK_RUNNING;
419 ++ #ifdef CONFIG_CHOPSTIX
420 ++ /* The jiffy of last interruption */
421 ++ if (p->state & TASK_UNINTERRUPTIBLE) {
422 ++ p->last_interrupted=jiffies;
425 ++ if (p->state & TASK_INTERRUPTIBLE) {
426 ++ p->last_interrupted=INTERRUPTIBLE;
429 ++ p->last_interrupted=RUNNING;
431 ++ /* The jiffy of last execution */
432 ++ p->last_ran_j=jiffies;
436 + * Make sure we do not leak PI boosting priority to the child:
442 + static inline int interactive_sleep(enum sleep_type sleep_type)
444 + return (sleep_type == SLEEP_INTERACTIVE ||
450 + static inline int interactive_sleep(enum sleep_type sleep_type)
452 + return (sleep_type == SLEEP_INTERACTIVE ||
456 + * schedule() is the main scheduler function.
458 + asmlinkage void __sched schedule(void)
460 + struct task_struct *prev, *next;
461 + struct prio_array *array;
462 + struct list_head *queue;
463 + unsigned long long now;
464 +- unsigned long run_time;
465 + int cpu, idx, new_prio;
466 + long *switch_count;
470 + * Test if we are atomic. Since do_exit() needs to call into
473 + * schedule() is the main scheduler function.
476 ++ #ifdef CONFIG_CHOPSTIX
477 ++ extern void (*rec_event)(void *,unsigned int);
478 ++ struct event_spec {
480 ++ unsigned long dcookie;
481 ++ unsigned int count;
482 ++ unsigned int reason;
486 + asmlinkage void __sched schedule(void)
488 + struct task_struct *prev, *next;
489 + struct prio_array *array;
490 + struct list_head *queue;
491 + unsigned long long now;
492 ++ unsigned long run_time, diff;
493 + int cpu, idx, new_prio;
494 + long *switch_count;
496 ++ int sampling_reason;
499 + * Test if we are atomic. Since do_exit() needs to call into
502 + switch_count = &prev->nivcsw;
503 + if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
504 + switch_count = &prev->nvcsw;
505 + if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
506 + unlikely(signal_pending(prev))))
507 + prev->state = TASK_RUNNING;
509 + switch_count = &prev->nivcsw;
510 + if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
511 + switch_count = &prev->nvcsw;
513 + if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
514 + unlikely(signal_pending(prev))))
515 + prev->state = TASK_RUNNING;
518 + vx_uninterruptible_inc(prev);
520 + deactivate_task(prev, rq);
525 + vx_uninterruptible_inc(prev);
527 + deactivate_task(prev, rq);
528 ++ #ifdef CONFIG_CHOPSTIX
529 ++ /* An uninterruptible process just yielded. Record the current jiffie */
530 ++ if (prev->state & TASK_UNINTERRUPTIBLE) {
531 ++ prev->last_interrupted=jiffies;
533 ++ /* An interruptible process just yielded, or it got preempted.
534 ++ * Mark it as interruptible */
535 ++ else if (prev->state & TASK_INTERRUPTIBLE) {
536 ++ prev->last_interrupted=INTERRUPTIBLE;
544 + prev->sleep_avg = 0;
545 + prev->timestamp = prev->last_ran = now;
547 + sched_info_switch(prev, next);
548 + if (likely(prev != next)) {
549 + next->timestamp = next->last_ran = now;
551 + prev->sleep_avg = 0;
552 + prev->timestamp = prev->last_ran = now;
554 ++ #ifdef CONFIG_CHOPSTIX
555 ++ /* Run only if the Chopstix module so decrees it */
557 ++ prev->last_ran_j = jiffies;
558 ++ if (next->last_interrupted!=INTERRUPTIBLE) {
559 ++ if (next->last_interrupted!=RUNNING) {
560 ++ diff = (jiffies-next->last_interrupted);
561 ++ sampling_reason = 0;/* BLOCKING */
564 ++ diff = jiffies-next->last_ran_j;
565 ++ sampling_reason = 1;/* PREEMPTION */
568 ++ if (diff >= HZ/10) {
569 ++ struct event event;
570 ++ struct event_spec espec;
571 ++ struct pt_regs *regs;
572 ++ regs = task_pt_regs(current);
574 ++ espec.reason = sampling_reason;
575 ++ event.event_data=&espec;
577 ++ espec.pc=regs->ip;
578 ++ event.event_type=2;
579 ++ /* index in the event array currently set up */
580 ++ /* make sure the counters are loaded in the order we want them to show up*/
581 ++ (*rec_event)(&event, diff);
584 ++ /* next has been elected to run */
585 ++ next->last_interrupted=0;
588 + sched_info_switch(prev, next);
589 + if (likely(prev != next)) {
590 + next->timestamp = next->last_ran = now;
593 + jiffies_to_timespec(p->policy == SCHED_FIFO ?
594 + 0 : task_timeslice(p), &t);
595 + read_unlock(&tasklist_lock);
596 + retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
600 + jiffies_to_timespec(p->policy == SCHED_FIFO ?
601 + 0 : task_timeslice(p), &t);
602 + read_unlock(&tasklist_lock);
604 + retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
617 ++ #ifdef CONFIG_CHOPSTIX
618 ++ void (*rec_event)(void *,unsigned int) = NULL;
620 ++ /* To support safe calling from asm */
621 ++ asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) {
622 ++ struct pt_regs *regs;
623 ++ struct event_spec *es = event_signature_in->event_data;
624 ++ regs = task_pt_regs(current);
625 ++ event_signature_in->task=current;
627 ++ event_signature_in->count=1;
628 ++ (*rec_event)(event_signature_in, count);
630 ++ EXPORT_SYMBOL(rec_event);
631 ++ EXPORT_SYMBOL(in_sched_functions);
633 diff -Nurb linux-2.6.27-590/mm/memory.c linux-2.6.27-591/mm/memory.c
634 --- linux-2.6.27-590/mm/memory.c 2010-01-29 16:29:48.000000000 -0500
635 +++ linux-2.6.27-591/mm/memory.c 2010-01-29 16:30:22.000000000 -0500
638 #include <linux/swapops.h>
639 #include <linux/elf.h>
640 +#include <linux/arrays.h>
642 #include "internal.h"
644 @@ -2690,6 +2691,15 @@
648 +extern void (*rec_event)(void *,unsigned int);
651 + unsigned long dcookie;
653 + unsigned char reason;
658 * By the time we get here, we already hold the mm semaphore
660 @@ -2719,6 +2729,24 @@
664 +#ifdef CONFIG_CHOPSTIX
666 + struct event event;
667 + struct event_spec espec;
668 + struct pt_regs *regs;
670 + regs = task_pt_regs(current);
671 + pc = regs->ip & (unsigned int) ~4095;
673 + espec.reason = 0; /* alloc */
674 + event.event_data=&espec;
675 + event.task = current;
677 + event.event_type=5;
678 + (*rec_event)(&event, 1);
682 return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
685 diff -Nurb linux-2.6.27-590/mm/slab.c linux-2.6.27-591/mm/slab.c
686 --- linux-2.6.27-590/mm/slab.c 2010-01-29 16:29:48.000000000 -0500
687 +++ linux-2.6.27-591/mm/slab.c 2010-01-29 16:30:22.000000000 -0500
689 #include <linux/fault-inject.h>
690 #include <linux/rtmutex.h>
691 #include <linux/reciprocal_div.h>
692 +#include <linux/arrays.h>
693 #include <linux/debugobjects.h>
695 #include <asm/cacheflush.h>
700 +extern void (*rec_event)(void *,unsigned int);
703 + unsigned long dcookie;
705 + unsigned char reason;
711 @@ -3469,6 +3478,19 @@
712 local_irq_restore(save_flags);
713 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
715 +#ifdef CONFIG_CHOPSTIX
716 + if (rec_event && objp) {
717 + struct event event;
718 + struct event_spec espec;
720 + espec.reason = 0; /* alloc */
721 + event.event_data=&espec;
722 + event.task = current;
724 + event.event_type=5;
725 + (*rec_event)(&event, cachep->buffer_size);
729 if (unlikely((flags & __GFP_ZERO) && objp))
730 memset(objp, 0, obj_size(cachep));
731 @@ -3578,12 +3600,26 @@
732 * Release an obj back to its cache. If the obj has a constructed state, it must
733 * be in this state _before_ it is released. Called with disabled ints.
735 -static inline void __cache_free(struct kmem_cache *cachep, void *objp)
736 +static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller)
738 struct array_cache *ac = cpu_cache_get(cachep);
741 - objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
742 + objp = cache_free_debugcheck(cachep, objp, caller);
743 + #ifdef CONFIG_CHOPSTIX
744 + if (rec_event && objp) {
745 + struct event event;
746 + struct event_spec espec;
748 + espec.reason = 1; /* free */
749 + event.event_data=&espec;
750 + event.task = current;
752 + event.event_type=4;
753 + (*rec_event)(&event, cachep->buffer_size);
757 vx_slab_free(cachep);
760 @@ -3714,6 +3750,7 @@
763 struct kmem_cache *cachep;
766 /* If you want to save a few bytes .text space: replace
768 @@ -3741,10 +3778,17 @@
769 EXPORT_SYMBOL(__kmalloc_track_caller);
772 +#ifdef CONFIG_CHOPSTIX
773 +void *__kmalloc(size_t size, gfp_t flags)
775 + return __do_kmalloc(size, flags, __builtin_return_address(0));
778 void *__kmalloc(size_t size, gfp_t flags)
780 return __do_kmalloc(size, flags, NULL);
783 EXPORT_SYMBOL(__kmalloc);
786 @@ -3764,7 +3808,7 @@
787 debug_check_no_locks_freed(objp, obj_size(cachep));
788 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
789 debug_check_no_obj_freed(objp, obj_size(cachep));
790 - __cache_free(cachep, objp);
791 + __cache_free(cachep, objp,__builtin_return_address(0));
792 local_irq_restore(flags);
794 EXPORT_SYMBOL(kmem_cache_free);
795 @@ -3790,7 +3834,7 @@
796 c = virt_to_cache(objp);
797 debug_check_no_locks_freed(objp, obj_size(c));
798 debug_check_no_obj_freed(objp, obj_size(c));
799 - __cache_free(c, (void *)objp);
800 + __cache_free(c, (void *)objp,__builtin_return_address(0));
801 local_irq_restore(flags);
803 EXPORT_SYMBOL(kfree);