1 diff --git a/arch/Kconfig b/arch/Kconfig
2 index 1d07625..7d503e4 100644
5 @@ -13,9 +13,18 @@ config OPROFILE
10 + bool "Chopstix (PlanetLab)"
11 + depends on MODULES && OPROFILE
13 + Chopstix allows you to monitor various events by summarizing them
14 + in lossy data structures and transferring these data structures
15 + into user space. If in doubt, say "N".
23 depends on KALLSYMS && MODULES
24 diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
25 index 6649d09..5508d20 100644
26 --- a/arch/x86/kernel/asm-offsets_32.c
27 +++ b/arch/x86/kernel/asm-offsets_32.c
29 #include <linux/signal.h>
30 #include <linux/personality.h>
31 #include <linux/suspend.h>
32 +#include <linux/arrays.h>
33 #include <linux/kbuild.h>
34 #include <asm/ucontext.h>
37 #include <linux/lguest.h>
38 #include "../../../drivers/lguest/lg.h"
40 +#ifdef CONFIG_CHOPSTIX
41 +#define STACKOFFSET(sym, str, mem) \
42 + DEFINE(sym, offsetof(struct str, mem)-sizeof(struct str));
46 + unsigned long dcookie;
48 + unsigned int number;
52 /* workaround for a warning with -Wmissing-prototypes */
55 @@ -50,6 +63,18 @@ void foo(void)
56 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
59 +#ifdef CONFIG_CHOPSTIX
60 + STACKOFFSET(TASK_thread, task_struct, thread);
61 + STACKOFFSET(THREAD_esp, thread_struct, sp);
62 + STACKOFFSET(EVENT_event_data, event, event_data);
63 + STACKOFFSET(EVENT_task, event, task);
64 + STACKOFFSET(EVENT_event_type, event, event_type);
65 + STACKOFFSET(SPEC_number, event_spec, number);
66 + DEFINE(EVENT_SIZE, sizeof(struct event));
67 + DEFINE(SPEC_SIZE, sizeof(struct event_spec));
68 + DEFINE(SPEC_EVENT_SIZE, sizeof(struct event_spec)+sizeof(struct event));
71 OFFSET(TI_task, thread_info, task);
72 OFFSET(TI_exec_domain, thread_info, exec_domain);
73 OFFSET(TI_flags, thread_info, flags);
74 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
75 index 109792b..ef514b0 100644
76 --- a/arch/x86/kernel/entry_32.S
77 +++ b/arch/x86/kernel/entry_32.S
78 @@ -426,6 +426,34 @@ ENTRY(system_call)
79 cmpl $(nr_syscalls), %eax
82 +#ifdef CONFIG_CHOPSTIX
83 + /* Move Chopstix syscall probe here */
84 + /* Save and clobber: eax, ecx, ebp */
89 + subl $SPEC_EVENT_SIZE, %esp
90 + movl rec_event, %ecx
93 + # struct event is first, just below %ebp
94 + movl %eax, (SPEC_number-EVENT_SIZE)(%ebp)
95 + leal -SPEC_EVENT_SIZE(%ebp), %eax
96 + movl %eax, EVENT_event_data(%ebp)
97 + movl $6, EVENT_event_type(%ebp)
98 + movl rec_event, %edx
100 + leal -EVENT_SIZE(%ebp), %eax
104 + addl $SPEC_EVENT_SIZE, %esp
110 call *sys_call_table(,%eax,4)
111 movl %eax,PT_EAX(%esp) # store the return value
113 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
114 index 3384255..cd535c7 100644
115 --- a/arch/x86/mm/fault.c
116 +++ b/arch/x86/mm/fault.c
117 @@ -79,6 +79,16 @@ static inline int notify_page_fault(struct pt_regs *regs)
121 +#ifdef CONFIG_CHOPSTIX
122 +extern void (*rec_event)(void *,unsigned int);
125 + unsigned long dcookie;
127 + unsigned char reason;
133 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
134 diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
135 index 7ba78e6..ef379fb 100644
136 --- a/drivers/oprofile/cpu_buffer.c
137 +++ b/drivers/oprofile/cpu_buffer.c
139 #include <linux/oprofile.h>
140 #include <linux/vmalloc.h>
141 #include <linux/errno.h>
142 +#include <linux/arrays.h>
144 #include "event_buffer.h"
145 #include "cpu_buffer.h"
146 @@ -147,6 +148,17 @@ static void increment_head(struct oprofile_cpu_buffer * b)
150 +#ifdef CONFIG_CHOPSTIX
154 + unsigned long dcookie;
158 +extern void (*rec_event)(void *,unsigned int);
162 add_sample(struct oprofile_cpu_buffer * cpu_buf,
163 unsigned long pc, unsigned long event)
164 @@ -251,7 +263,24 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
165 int is_kernel = !user_mode(regs);
166 unsigned long pc = profile_pc(regs);
168 +#ifdef CONFIG_CHOPSTIX
171 + struct event_spec espec;
172 + esig.task = current;
175 + esig.event_data=&espec;
176 + esig.event_type=event; /* index in the event array currently set up */
177 + /* make sure the counters are loaded in the order we want them to show up*/
178 + (*rec_event)(&esig, 1);
181 + oprofile_add_ext_sample(pc, regs, event, is_kernel);
184 oprofile_add_ext_sample(pc, regs, event, is_kernel);
188 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
189 diff --git a/fs/bio.c b/fs/bio.c
190 index 3cba7ae..d888eb8 100644
194 #include <linux/workqueue.h>
195 #include <linux/blktrace_api.h>
196 #include <scsi/sg.h> /* for struct sg_iovec */
197 +#include <linux/arrays.h>
199 static struct kmem_cache *bio_slab __read_mostly;
201 @@ -44,6 +45,7 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
207 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
208 * IO code that does not need private memory pools.
209 @@ -1171,6 +1173,17 @@ void bio_check_pages_dirty(struct bio *bio)
213 +#ifdef CONFIG_CHOPSTIX
216 + unsigned long dcookie;
218 + unsigned char reason;
221 +extern void (*rec_event)(void *,unsigned int);
225 * bio_endio - end I/O on a bio
227 @@ -1192,6 +1205,24 @@ void bio_endio(struct bio *bio, int error)
228 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
233 + struct event event;
234 + struct event_spec espec;
237 + espec.reason = 1;/*response */
239 + eip = bio->bi_end_io;
240 + event.event_data=&espec;
242 + event.event_type=3;
243 + /* index in the event array currently set up */
244 + /* make sure the counters are loaded in the order we want them to show up*/
245 + (*rec_event)(&event, bytes_done);
250 bio->bi_end_io(bio, error);
252 diff --git a/fs/exec.c b/fs/exec.c
253 index e557406..19bc9d8 100644
257 #include <linux/fdtable.h>
258 #include <linux/mm.h>
259 #include <linux/stat.h>
260 +#include <linux/dcookies.h>
261 #include <linux/fcntl.h>
262 #include <linux/smp_lock.h>
263 #include <linux/swap.h>
264 @@ -698,6 +699,13 @@ struct file *open_exec(const char *name)
268 + #ifdef CONFIG_CHOPSTIX
269 + unsigned long cookie;
270 + extern void (*rec_event)(void *, unsigned int);
271 + if (rec_event && !nd.path.dentry->d_cookie)
272 + get_dcookie(&nd.path, &cookie);
278 diff --git a/include/linux/arrays.h b/include/linux/arrays.h
280 index 0000000..7641a3c
282 +++ b/include/linux/arrays.h
284 +#ifndef __ARRAYS_H__
285 +#define __ARRAYS_H__
286 +#include <linux/list.h>
288 +#define SAMPLING_METHOD_DEFAULT 0
289 +#define SAMPLING_METHOD_LOG 1
291 +#define DEFAULT_ARRAY_SIZE 2048
293 +/* Every probe has an array handler */
295 +/* XXX - Optimize this structure */
297 +extern void (*rec_event)(void *,unsigned int);
298 +struct array_handler {
299 + struct list_head link;
300 + unsigned int (*hash_func)(void *);
301 + unsigned int (*sampling_func)(void *,int,void *);
302 + unsigned short size;
303 + unsigned int threshold;
304 + unsigned char **expcount;
305 + unsigned int sampling_method;
306 + unsigned int **arrays;
307 + unsigned int arraysize;
308 + unsigned int num_samples[2];
309 + void **epoch_samples; /* size-sized lists of samples */
310 + unsigned int (*serialize)(void *, void *);
311 + unsigned char code[5];
312 + unsigned int last_threshold;
316 + struct list_head link;
318 + unsigned int count;
319 + unsigned int event_type;
320 + struct task_struct *task;
323 diff --git a/include/linux/sched.h b/include/linux/sched.h
324 index 891fbda..05ba57f 100644
325 --- a/include/linux/sched.h
326 +++ b/include/linux/sched.h
327 @@ -1134,6 +1134,11 @@ struct task_struct {
328 cputime_t utime, stime, utimescaled, stimescaled;
330 cputime_t prev_utime, prev_stime;
332 + #ifdef CONFIG_CHOPSTIX
333 + unsigned long last_interrupted, last_ran_j;
336 unsigned long nvcsw, nivcsw; /* context switch counts */
337 struct timespec start_time; /* monotonic time */
338 struct timespec real_start_time; /* boot based time */
339 diff --git a/kernel/sched.c b/kernel/sched.c
340 index 2d66cdd..d1971b9 100644
344 * 1998-11-19 Implemented schedule_timeout() and related stuff
345 * by Andrea Arcangeli
346 * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
347 - * hybrid priority-list and round-robin design with
348 + * hybrid priority-list and round-robin deventn with
349 * an array-switch method of distributing timeslices
350 * and per-CPU runqueues. Cleanups and useful suggestions
351 * by Davide Libenzi, preemptible kernel bits by Robert Love.
353 #include <linux/ftrace.h>
354 #include <linux/vs_sched.h>
355 #include <linux/vs_cvirt.h>
356 +#include <linux/arrays.h>
359 #include <asm/irq_regs.h>
361 #include "sched_cpupri.h"
363 +#define INTERRUPTIBLE -1
367 * Convert user-nice values [ -20 ... 0 ... 19 ]
368 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
369 @@ -2368,6 +2372,10 @@ static void __sched_fork(struct task_struct *p)
370 INIT_HLIST_HEAD(&p->preempt_notifiers);
373 +#ifdef CONFIG_CHOPSTIX
374 + p->last_ran_j = jiffies;
375 + p->last_interrupted = INTERRUPTIBLE;
378 * We mark the process as running here, but have not actually
379 * inserted it onto the runqueue yet. This guarantees that
380 @@ -4428,6 +4436,29 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
384 +#ifdef CONFIG_CHOPSTIX
385 +void (*rec_event)(void *,unsigned int) = NULL;
386 +EXPORT_SYMBOL(rec_event);
390 + unsigned long dcookie;
391 + unsigned int count;
392 + unsigned int reason;
395 +/* To support safe calling from asm */
396 +asmlinkage void rec_event_asm (struct event *event_signature_in, unsigned int count) {
397 + struct pt_regs *regs;
398 + struct event_spec *es = event_signature_in->event_data;
399 + regs = task_pt_regs(current);
400 + event_signature_in->task=current;
402 + event_signature_in->count=1;
403 + (*rec_event)(event_signature_in, count);
408 * schedule() is the main scheduler function.
410 @@ -4482,6 +4513,54 @@ need_resched_nonpreemptible:
411 next = pick_next_task(rq, prev);
413 if (likely(prev != next)) {
415 +#ifdef CONFIG_CHOPSTIX
416 + /* Run only if the Chopstix module so decrees it */
418 + unsigned long diff;
419 + int sampling_reason;
420 + prev->last_ran_j = jiffies;
421 + if (next->last_interrupted!=INTERRUPTIBLE) {
422 + if (next->last_interrupted!=RUNNING) {
423 + diff = (jiffies-next->last_interrupted);
424 + sampling_reason = 0;/* BLOCKING */
427 + diff = jiffies-next->last_ran_j;
428 + sampling_reason = 1;/* PREEMPTION */
431 + if (diff >= HZ/10) {
432 + struct event event;
433 + struct event_spec espec;
434 + struct pt_regs *regs;
435 + regs = task_pt_regs(current);
437 + espec.reason = sampling_reason;
438 + event.event_data=&espec;
441 + event.event_type=2;
442 + /* index in the event array currently set up */
443 + /* make sure the counters are loaded in the order we want them to show up*/
444 + (*rec_event)(&event, diff);
447 + /* next has been elected to run */
448 + next->last_interrupted=0;
450 + /* An uninterruptible process just yielded. Record the current jiffy */
451 + if (prev->state & TASK_UNINTERRUPTIBLE) {
452 + prev->last_interrupted=jiffies;
454 + /* An interruptible process just yielded, or it got preempted.
455 + * Mark it as interruptible */
456 + else if (prev->state & TASK_INTERRUPTIBLE) {
457 + prev->last_interrupted=INTERRUPTIBLE;
462 sched_info_switch(prev, next);
465 @@ -5369,6 +5448,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
467 read_unlock(&tasklist_lock);
471 if ((current->euid != p->euid) && (current->euid != p->uid) &&
472 !capable(CAP_SYS_NICE))
473 diff --git a/mm/memory.c b/mm/memory.c
474 index a258b98..4fd7215 100644
479 #include <linux/swapops.h>
480 #include <linux/elf.h>
481 +#include <linux/arrays.h>
483 #include "internal.h"
485 @@ -2753,6 +2754,16 @@ out:
489 +#ifdef CONFIG_CHOPSTIX
490 +extern void (*rec_event)(void *,unsigned int);
493 + unsigned long dcookie;
495 + unsigned char reason;
500 * By the time we get here, we already hold the mm semaphore
502 @@ -2782,6 +2793,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
506 +#ifdef CONFIG_CHOPSTIX
508 + struct event event;
509 + struct event_spec espec;
510 + struct pt_regs *regs;
512 + regs = task_pt_regs(current);
513 + pc = regs->ip & (unsigned int) ~4095;
515 + espec.reason = 0; /* alloc */
516 + event.event_data=&espec;
517 + event.task = current;
519 + event.event_type=5;
520 + (*rec_event)(&event, 1);
524 return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
527 diff --git a/mm/slab.c b/mm/slab.c
528 index 88dd5a5..7f0a7c4 100644
532 #include <linux/fault-inject.h>
533 #include <linux/rtmutex.h>
534 #include <linux/reciprocal_div.h>
535 +#include <linux/arrays.h>
536 #include <linux/debugobjects.h>
538 #include <asm/cacheflush.h>
539 @@ -248,6 +249,16 @@ struct slab_rcu {
543 +#ifdef CONFIG_CHOPSTIX
544 +extern void (*rec_event)(void *,unsigned int);
547 + unsigned long dcookie;
549 + unsigned char reason;
556 @@ -3469,6 +3480,19 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
557 local_irq_restore(save_flags);
558 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
560 +#ifdef CONFIG_CHOPSTIX
561 + if (rec_event && objp) {
562 + struct event event;
563 + struct event_spec espec;
565 + espec.reason = 0; /* alloc */
566 + event.event_data=&espec;
567 + event.task = current;
569 + event.event_type=5;
570 + (*rec_event)(&event, cachep->buffer_size);
574 if (unlikely((flags & __GFP_ZERO) && objp))
575 memset(objp, 0, obj_size(cachep));
576 @@ -3578,12 +3602,26 @@ free_done:
577 * Release an obj back to its cache. If the obj has a constructed state, it must
578 * be in this state _before_ it is released. Called with disabled ints.
580 -static inline void __cache_free(struct kmem_cache *cachep, void *objp)
581 +static inline void __cache_free(struct kmem_cache *cachep, void *objp, void *caller)
583 struct array_cache *ac = cpu_cache_get(cachep);
586 - objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
587 + objp = cache_free_debugcheck(cachep, objp, caller);
588 + #ifdef CONFIG_CHOPSTIX
589 + if (rec_event && objp) {
590 + struct event event;
591 + struct event_spec espec;
593 + espec.reason = 1; /* free */
594 + event.event_data=&espec;
595 + event.task = current;
597 + event.event_type=4;
598 + (*rec_event)(&event, cachep->buffer_size);
602 vx_slab_free(cachep);
605 @@ -3741,10 +3779,17 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
606 EXPORT_SYMBOL(__kmalloc_track_caller);
609 +#ifdef CONFIG_CHOPSTIX
610 +void *__kmalloc(size_t size, gfp_t flags)
612 + return __do_kmalloc(size, flags, __builtin_return_address(0));
615 void *__kmalloc(size_t size, gfp_t flags)
617 return __do_kmalloc(size, flags, NULL);
620 EXPORT_SYMBOL(__kmalloc);
623 @@ -3764,7 +3809,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
624 debug_check_no_locks_freed(objp, obj_size(cachep));
625 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
626 debug_check_no_obj_freed(objp, obj_size(cachep));
627 - __cache_free(cachep, objp);
628 + __cache_free(cachep, objp,__builtin_return_address(0));
629 local_irq_restore(flags);
631 EXPORT_SYMBOL(kmem_cache_free);
632 @@ -3790,7 +3835,7 @@ void kfree(const void *objp)
633 c = virt_to_cache(objp);
634 debug_check_no_locks_freed(objp, obj_size(c));
635 debug_check_no_obj_freed(objp, obj_size(c));
636 - __cache_free(c, (void *)objp);
637 + __cache_free(c, (void *)objp,__builtin_return_address(0));
638 local_irq_restore(flags);
640 EXPORT_SYMBOL(kfree);