1 Index: linux-2.6.27.y/arch/Kconfig
2 ===================================================================
3 --- linux-2.6.27.y.orig/arch/Kconfig
4 +++ linux-2.6.27.y/arch/Kconfig
5 @@ -13,9 +13,18 @@ config OPROFILE
10 + bool "Chopstix (PlanetLab)"
11 + depends on MODULES && OPROFILE
13 + Chopstix allows you to monitor various events by summarizing them
14 + in lossy data structures and transferring these data structures
15 + into user space. If in doubt, say "N".
23 depends on KALLSYMS && MODULES
24 Index: linux-2.6.27.y/arch/x86/kernel/asm-offsets_32.c
25 ===================================================================
26 --- linux-2.6.27.y.orig/arch/x86/kernel/asm-offsets_32.c
27 +++ linux-2.6.27.y/arch/x86/kernel/asm-offsets_32.c
29 #include <linux/signal.h>
30 #include <linux/personality.h>
31 #include <linux/suspend.h>
32 +#include <linux/arrays.h>
33 #include <linux/kbuild.h>
34 #include <asm/ucontext.h>
37 #include <linux/lguest.h>
38 #include "../../../drivers/lguest/lg.h"
41 +#define STACKOFFSET(sym, str, mem) \
42 + DEFINE(sym, offsetof(struct str, mem)-sizeof(struct str));
44 /* workaround for a warning with -Wmissing-prototypes */
49 + unsigned long dcookie;
51 + unsigned int number;
56 OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
57 @@ -50,6 +62,16 @@ void foo(void)
58 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
61 + STACKOFFSET(TASK_thread, task_struct, thread);
62 + STACKOFFSET(THREAD_esp, thread_struct, sp);
63 + STACKOFFSET(EVENT_event_data, event, event_data);
64 + STACKOFFSET(EVENT_task, event, task);
65 + STACKOFFSET(EVENT_event_type, event, event_type);
66 + STACKOFFSET(SPEC_number, event_spec, number);
67 + DEFINE(EVENT_SIZE, sizeof(struct event));
68 + DEFINE(SPEC_SIZE, sizeof(struct event_spec));
69 + DEFINE(SPEC_EVENT_SIZE, sizeof(struct event_spec)+sizeof(struct event));
71 OFFSET(TI_task, thread_info, task);
72 OFFSET(TI_exec_domain, thread_info, exec_domain);
73 OFFSET(TI_flags, thread_info, flags);
74 Index: linux-2.6.27.y/arch/x86/kernel/entry_32.S
75 ===================================================================
76 --- linux-2.6.27.y.orig/arch/x86/kernel/entry_32.S
77 +++ linux-2.6.27.y/arch/x86/kernel/entry_32.S
78 @@ -426,6 +426,33 @@ ENTRY(system_call)
79 cmpl $(nr_syscalls), %eax
82 + /* Move Chopstix syscall probe here */
83 + /* Save and clobber: eax, ecx, ebp */
88 + subl $SPEC_EVENT_SIZE, %esp
89 + movl rec_event, %ecx
92 + # struct event is first, just below %ebp
93 + movl %eax, (SPEC_number-EVENT_SIZE)(%ebp)
94 + leal -SPEC_EVENT_SIZE(%ebp), %eax
95 + movl %eax, EVENT_event_data(%ebp)
96 + movl $6, EVENT_event_type(%ebp)
97 + movl rec_event, %edx
99 + leal -EVENT_SIZE(%ebp), %eax
103 + addl $SPEC_EVENT_SIZE, %esp
109 call *sys_call_table(,%eax,4)
110 movl %eax,PT_EAX(%esp) # store the return value
112 Index: linux-2.6.27.y/arch/x86/mm/fault.c
113 ===================================================================
114 --- linux-2.6.27.y.orig/arch/x86/mm/fault.c
115 +++ linux-2.6.27.y/arch/x86/mm/fault.c
116 @@ -79,6 +79,15 @@ static inline int notify_page_fault(stru
121 +extern void (*rec_event)(void *,unsigned int);
124 + unsigned long dcookie;
126 + unsigned char reason;
131 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
132 Index: linux-2.6.27.y/drivers/oprofile/cpu_buffer.c
133 ===================================================================
134 --- linux-2.6.27.y.orig/drivers/oprofile/cpu_buffer.c
135 +++ linux-2.6.27.y/drivers/oprofile/cpu_buffer.c
137 #include <linux/oprofile.h>
138 #include <linux/vmalloc.h>
139 #include <linux/errno.h>
140 +#include <linux/arrays.h>
142 #include "event_buffer.h"
143 #include "cpu_buffer.h"
144 @@ -147,6 +148,17 @@ static void increment_head(struct oprofi
148 +#ifdef CONFIG_CHOPSTIX
152 + unsigned long dcookie;
156 +extern void (*rec_event)(void *,unsigned int);
160 add_sample(struct oprofile_cpu_buffer * cpu_buf,
161 unsigned long pc, unsigned long event)
162 @@ -155,6 +167,7 @@ add_sample(struct oprofile_cpu_buffer *
164 entry->event = event;
165 increment_head(cpu_buf);
170 @@ -250,8 +263,28 @@ void oprofile_add_sample(struct pt_regs
172 int is_kernel = !user_mode(regs);
173 unsigned long pc = profile_pc(regs);
176 +#ifdef CONFIG_CHOPSTIX
179 + struct event_spec espec;
180 + esig.task = current;
183 + esig.event_data=&espec;
184 + esig.event_type=event; /* index in the event array currently set up */
185 + /* make sure the counters are loaded in the order we want them to show up*/
186 + (*rec_event)(&esig, 1);
189 oprofile_add_ext_sample(pc, regs, event, is_kernel);
192 + oprofile_add_ext_sample(pc, regs, event, is_kernel);
198 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
199 Index: linux-2.6.27.y/fs/bio.c
200 ===================================================================
201 --- linux-2.6.27.y.orig/fs/bio.c
202 +++ linux-2.6.27.y/fs/bio.c
204 #include <linux/workqueue.h>
205 #include <linux/blktrace_api.h>
206 #include <scsi/sg.h> /* for struct sg_iovec */
207 +#include <linux/arrays.h>
209 static struct kmem_cache *bio_slab __read_mostly;
211 @@ -44,6 +45,7 @@ static struct biovec_slab bvec_slabs[BIO
217 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
218 * IO code that does not need private memory pools.
219 @@ -1171,6 +1173,14 @@ void bio_check_pages_dirty(struct bio *b
225 + unsigned long dcookie;
227 + unsigned char reason;
230 +extern void (*rec_event)(void *,unsigned int);
232 * bio_endio - end I/O on a bio
234 @@ -1192,6 +1202,24 @@ void bio_endio(struct bio *bio, int erro
235 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
240 + struct event event;
241 + struct event_spec espec;
244 + espec.reason = 1;/*response */
246 + eip = bio->bi_end_io;
247 + event.event_data=&espec;
249 + event.event_type=3;
250 + /* index in the event array currently set up */
251 + /* make sure the counters are loaded in the order we want them to show up*/
252 + (*rec_event)(&event, bytes_done);
257 bio->bi_end_io(bio, error);
259 Index: linux-2.6.27.y/fs/exec.c
260 ===================================================================
261 --- linux-2.6.27.y.orig/fs/exec.c
262 +++ linux-2.6.27.y/fs/exec.c
264 #include <linux/fdtable.h>
265 #include <linux/mm.h>
266 #include <linux/stat.h>
267 +#include <linux/dcookies.h>
268 #include <linux/fcntl.h>
269 #include <linux/smp_lock.h>
270 #include <linux/swap.h>
271 @@ -698,6 +699,13 @@ struct file *open_exec(const char *name)
275 + #ifdef CONFIG_CHOPSTIX
276 + unsigned long cookie;
277 + extern void (*rec_event)(void *, unsigned int);
278 + if (rec_event && !nd.path.dentry->d_cookie)
279 + get_dcookie(&nd.path, &cookie);
285 Index: linux-2.6.27.y/include/linux/arrays.h
286 ===================================================================
288 +++ linux-2.6.27.y/include/linux/arrays.h
290 +#ifndef __ARRAYS_H__
291 +#define __ARRAYS_H__
292 +#include <linux/list.h>
294 +#define SAMPLING_METHOD_DEFAULT 0
295 +#define SAMPLING_METHOD_LOG 1
297 +/* Every probe has an array handler */
299 +/* XXX - Optimize this structure */
301 +extern void (*rec_event)(void *,unsigned int);
302 +struct array_handler {
303 + struct list_head link;
304 + unsigned int (*hash_func)(void *);
305 + unsigned int (*sampling_func)(void *,int,void *);
306 + unsigned short size;
307 + unsigned int threshold;
308 + unsigned char **expcount;
309 + unsigned int sampling_method;
310 + unsigned int **arrays;
311 + unsigned int arraysize;
312 + unsigned int num_samples[2];
313 + void **epoch_samples; /* size-sized lists of samples */
314 + unsigned int (*serialize)(void *, void *);
315 + unsigned char code[5];
319 + struct list_head link;
321 + unsigned int count;
322 + unsigned int event_type;
323 + struct task_struct *task;
326 Index: linux-2.6.27.y/include/linux/sched.h
327 ===================================================================
328 --- linux-2.6.27.y.orig/include/linux/sched.h
329 +++ linux-2.6.27.y/include/linux/sched.h
330 @@ -1137,6 +1137,11 @@ struct task_struct {
331 cputime_t utime, stime, utimescaled, stimescaled;
333 cputime_t prev_utime, prev_stime;
335 + #ifdef CONFIG_CHOPSTIX
336 + unsigned long last_interrupted, last_ran_j;
339 unsigned long nvcsw, nivcsw; /* context switch counts */
340 struct timespec start_time; /* monotonic time */
341 struct timespec real_start_time; /* boot based time */
342 Index: linux-2.6.27.y/kernel/sched.c
343 ===================================================================
344 --- linux-2.6.27.y.orig/kernel/sched.c
345 +++ linux-2.6.27.y/kernel/sched.c
347 * 1998-11-19 Implemented schedule_timeout() and related stuff
348 * by Andrea Arcangeli
349 * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
350 - * hybrid priority-list and round-robin design with
351 + * hybrid priority-list and round-robin deventn with
352 * an array-switch method of distributing timeslices
353 * and per-CPU runqueues. Cleanups and useful suggestions
354 * by Davide Libenzi, preemptible kernel bits by Robert Love.
356 #include <linux/ftrace.h>
357 #include <linux/vs_sched.h>
358 #include <linux/vs_cvirt.h>
359 +#include <linux/arrays.h>
362 #include <asm/irq_regs.h>
364 #include "sched_cpupri.h"
366 +#define INTERRUPTIBLE -1
370 * Convert user-nice values [ -20 ... 0 ... 19 ]
371 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
372 @@ -2376,6 +2380,10 @@ static void __sched_fork(struct task_str
373 INIT_HLIST_HEAD(&p->preempt_notifiers);
376 +#ifdef CONFIG_CHOPSTIX
377 + p->last_ran_j = jiffies;
378 + p->last_interrupted = INTERRUPTIBLE;
381 * We mark the process as running here, but have not actually
382 * inserted it onto the runqueue yet. This guarantees that
383 @@ -4436,6 +4444,29 @@ pick_next_task(struct rq *rq, struct tas
387 +void (*rec_event)(void *,unsigned int) = NULL;
388 +EXPORT_SYMBOL(rec_event);
389 +#ifdef CONFIG_CHOPSTIX
393 + unsigned long dcookie;
394 + unsigned int count;
395 + unsigned int reason;
398 +/* To support safe calling from asm */
399 +asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) {
400 + struct pt_regs *regs;
401 + struct event_spec *es = event_signature_in->event_data;
402 + regs = task_pt_regs(current);
403 + event_signature_in->task=current;
405 + event_signature_in->count=1;
406 + (*rec_event)(event_signature_in, count);
411 * schedule() is the main scheduler function.
413 @@ -4495,6 +4526,61 @@ need_resched_nonpreemptible:
414 next = pick_next_task(rq, prev);
416 if (likely(prev != next)) {
418 +#ifdef CONFIG_CHOPSTIX
419 + /* Run only if the Chopstix module so decrees it */
421 + unsigned long diff;
422 + int sampling_reason;
423 + prev->last_ran_j = jiffies;
424 + if (next->last_interrupted!=INTERRUPTIBLE) {
425 + if (next->last_interrupted!=RUNNING) {
426 + diff = (jiffies-next->last_interrupted);
427 + sampling_reason = 0;/* BLOCKING */
430 + diff = jiffies-next->last_ran_j;
431 + sampling_reason = 1;/* PREEMPTION */
434 + if (diff >= HZ/10) {
435 + struct event_spec {
437 + unsigned long dcookie;
438 + unsigned int count;
439 + unsigned int reason;
442 + struct event event;
443 + struct event_spec espec;
444 + struct pt_regs *regs;
445 + regs = task_pt_regs(current);
447 + espec.reason = sampling_reason;
448 + event.event_data=&espec;
451 + event.event_type=2;
452 + /* index in the event array currently set up */
453 + /* make sure the counters are loaded in the order we want them to show up*/
454 + (*rec_event)(&event, diff);
457 + /* next has been elected to run */
458 + next->last_interrupted=0;
460 + /* An uninterruptible process just yielded. Record the current jiffy */
461 + if (prev->state & TASK_UNINTERRUPTIBLE) {
462 + prev->last_interrupted=jiffies;
464 + /* An interruptible process just yielded, or it got preempted.
465 + * Mark it as interruptible */
466 + else if (prev->state & TASK_INTERRUPTIBLE) {
467 + prev->last_interrupted=INTERRUPTIBLE;
472 sched_info_switch(prev, next);
475 @@ -5382,6 +5468,7 @@ long sched_setaffinity(pid_t pid, const
477 read_unlock(&tasklist_lock);
481 if ((current->euid != p->euid) && (current->euid != p->uid) &&
482 !capable(CAP_SYS_NICE))
483 Index: linux-2.6.27.y/mm/memory.c
484 ===================================================================
485 --- linux-2.6.27.y.orig/mm/memory.c
486 +++ linux-2.6.27.y/mm/memory.c
489 #include <linux/swapops.h>
490 #include <linux/elf.h>
491 +#include <linux/arrays.h>
493 #include "internal.h"
495 @@ -2753,6 +2754,15 @@ out:
499 +extern void (*rec_event)(void *,unsigned int);
502 + unsigned long dcookie;
504 + unsigned char reason;
509 * By the time we get here, we already hold the mm semaphore
511 @@ -2782,6 +2792,24 @@ int handle_mm_fault(struct mm_struct *mm
515 +#ifdef CONFIG_CHOPSTIX
517 + struct event event;
518 + struct event_spec espec;
519 + struct pt_regs *regs;
521 + regs = task_pt_regs(current);
522 + pc = regs->ip & (unsigned int) ~4095;
524 + espec.reason = 0; /* alloc */
525 + event.event_data=&espec;
526 + event.task = current;
528 + event.event_type=5;
529 + (*rec_event)(&event, 1);
533 return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
536 Index: linux-2.6.27.y/mm/slab.c
537 ===================================================================
538 --- linux-2.6.27.y.orig/mm/slab.c
539 +++ linux-2.6.27.y/mm/slab.c
541 #include <linux/fault-inject.h>
542 #include <linux/rtmutex.h>
543 #include <linux/reciprocal_div.h>
544 +#include <linux/arrays.h>
545 #include <linux/debugobjects.h>
547 #include <asm/cacheflush.h>
548 @@ -248,6 +249,14 @@ struct slab_rcu {
552 +extern void (*rec_event)(void *,unsigned int);
555 + unsigned long dcookie;
557 + unsigned char reason;
563 @@ -3469,6 +3478,19 @@ __cache_alloc(struct kmem_cache *cachep,
564 local_irq_restore(save_flags);
565 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
567 +#ifdef CONFIG_CHOPSTIX
568 + if (rec_event && objp) {
569 + struct event event;
570 + struct event_spec espec;
572 + espec.reason = 0; /* alloc */
573 + event.event_data=&espec;
574 + event.task = current;
576 + event.event_type=5;
577 + (*rec_event)(&event, cachep->buffer_size);
581 if (unlikely((flags & __GFP_ZERO) && objp))
582 memset(objp, 0, obj_size(cachep));
583 @@ -3578,12 +3600,26 @@ free_done:
584 * Release an obj back to its cache. If the obj has a constructed state, it must
585 * be in this state _before_ it is released. Called with disabled ints.
587 -static inline void __cache_free(struct kmem_cache *cachep, void *objp)
588 +static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller)
590 struct array_cache *ac = cpu_cache_get(cachep);
593 - objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
594 + objp = cache_free_debugcheck(cachep, objp, caller);
595 + #ifdef CONFIG_CHOPSTIX
596 + if (rec_event && objp) {
597 + struct event event;
598 + struct event_spec espec;
600 + espec.reason = 1; /* free */
601 + event.event_data=&espec;
602 + event.task = current;
604 + event.event_type=4;
605 + (*rec_event)(&event, cachep->buffer_size);
609 vx_slab_free(cachep);
612 @@ -3714,6 +3750,7 @@ static __always_inline void *__do_kmallo
615 struct kmem_cache *cachep;
618 /* If you want to save a few bytes .text space: replace
620 @@ -3741,10 +3778,17 @@ void *__kmalloc_track_caller(size_t size
621 EXPORT_SYMBOL(__kmalloc_track_caller);
624 +#ifdef CONFIG_CHOPSTIX
625 +void *__kmalloc(size_t size, gfp_t flags)
627 + return __do_kmalloc(size, flags, __builtin_return_address(0));
630 void *__kmalloc(size_t size, gfp_t flags)
632 return __do_kmalloc(size, flags, NULL);
635 EXPORT_SYMBOL(__kmalloc);
638 @@ -3764,7 +3808,7 @@ void kmem_cache_free(struct kmem_cache *
639 debug_check_no_locks_freed(objp, obj_size(cachep));
640 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
641 debug_check_no_obj_freed(objp, obj_size(cachep));
642 - __cache_free(cachep, objp);
643 + __cache_free(cachep, objp,__builtin_return_address(0));
644 local_irq_restore(flags);
646 EXPORT_SYMBOL(kmem_cache_free);
647 @@ -3790,7 +3834,7 @@ void kfree(const void *objp)
648 c = virt_to_cache(objp);
649 debug_check_no_locks_freed(objp, obj_size(c));
650 debug_check_no_obj_freed(objp, obj_size(c));
651 - __cache_free(c, (void *)objp);
652 + __cache_free(c, (void *)objp,__builtin_return_address(0));
653 local_irq_restore(flags);
655 EXPORT_SYMBOL(kfree);