Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / arch / i386 / oprofile / xenoprof.c
1 /**
2  * @file xenoprof.c
3  *
4  * @remark Copyright 2002 OProfile authors
5  * @remark Read the file COPYING
6  *
7  * @author John Levon <levon@movementarian.org>
8  *
9  * Modified by Aravind Menon and Jose Renato Santos for Xen
10  * These modifications are:
11  * Copyright (C) 2005 Hewlett-Packard Co.
12  */
13
14 #include <linux/init.h>
15 #include <linux/notifier.h>
16 #include <linux/smp.h>
17 #include <linux/oprofile.h>
18 #include <linux/sysdev.h>
19 #include <linux/slab.h>
20 #include <linux/interrupt.h>
21 #include <linux/vmalloc.h>
22 #include <asm/nmi.h>
23 #include <asm/msr.h>
24 #include <asm/apic.h>
25 #include <asm/pgtable.h>
26 #include <xen/evtchn.h>
27 #include "op_counter.h"
28
29 #include <xen/driver_util.h>
30 #include <xen/interface/xen.h>
31 #include <xen/interface/xenoprof.h>
32 #include <../../../drivers/oprofile/cpu_buffer.h>
33 #include <../../../drivers/oprofile/event_buffer.h>
34
35 #define MAX_XENOPROF_SAMPLES 16
36
37 static int xenoprof_start(void);
38 static void xenoprof_stop(void);
39
40 static int xenoprof_enabled = 0;
41 static unsigned int num_events = 0;
42 static int is_primary = 0;
43 static int active_defined;
44
45 /* sample buffers shared with Xen */
46 xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
47 /* Shared buffer area */
48 char * shared_buffer = NULL;
49 /* Number of buffers in shared area (one per VCPU) */
50 int nbuf;
51 /* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
52 int ovf_irq[NR_CPUS];
53 /* cpu model type string - copied from Xen memory space on XENOPROF_init command */
54 char cpu_type[XENOPROF_CPU_TYPE_SIZE];
55
56 /* Passive sample buffers shared with Xen */
57 xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS];
58 /* Passive shared buffer area */
59 char *p_shared_buffer[MAX_OPROF_DOMAINS];
60
61 #ifdef CONFIG_PM
62
63 static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
64 {
65         if (xenoprof_enabled == 1)
66                 xenoprof_stop();
67         return 0;
68 }
69
70
71 static int xenoprof_resume(struct sys_device * dev)
72 {
73         if (xenoprof_enabled == 1)
74                 xenoprof_start();
75         return 0;
76 }
77
78
79 static struct sysdev_class oprofile_sysclass = {
80         set_kset_name("oprofile"),
81         .resume         = xenoprof_resume,
82         .suspend        = xenoprof_suspend
83 };
84
85
86 static struct sys_device device_oprofile = {
87         .id     = 0,
88         .cls    = &oprofile_sysclass,
89 };
90
91
92 static int __init init_driverfs(void)
93 {
94         int error;
95         if (!(error = sysdev_class_register(&oprofile_sysclass)))
96                 error = sysdev_register(&device_oprofile);
97         return error;
98 }
99
100
101 static void __exit exit_driverfs(void)
102 {
103         sysdev_unregister(&device_oprofile);
104         sysdev_class_unregister(&oprofile_sysclass);
105 }
106
107 #else
108 #define init_driverfs() do { } while (0)
109 #define exit_driverfs() do { } while (0)
110 #endif /* CONFIG_PM */
111
112 unsigned long long oprofile_samples = 0;
113 unsigned long long p_oprofile_samples = 0;
114
115 unsigned int pdomains;
116 struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
117
118 static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
119 {
120         int head, tail, size;
121
122         head = buf->event_head;
123         tail = buf->event_tail;
124         size = buf->event_size;
125
126         if (tail > head) {
127                 while (tail < size) {
128                         oprofile_add_pc(buf->event_log[tail].eip,
129                                         buf->event_log[tail].mode,
130                                         buf->event_log[tail].event);
131                         if (!is_passive)
132                                 oprofile_samples++;
133                         else
134                                 p_oprofile_samples++;
135                         tail++;
136                 }
137                 tail = 0;
138         }
139         while (tail < head) {
140                 oprofile_add_pc(buf->event_log[tail].eip,
141                                 buf->event_log[tail].mode,
142                                 buf->event_log[tail].event);
143                 if (!is_passive)
144                         oprofile_samples++;
145                 else
146                         p_oprofile_samples++;
147                 tail++;
148         }
149
150         buf->event_tail = tail;
151 }
152
153 static void xenoprof_handle_passive(void)
154 {
155         int i, j;
156         int flag_domain, flag_switch = 0;
157         
158         for (i = 0; i < pdomains; i++) {
159                 flag_domain = 0;
160                 for (j = 0; j < passive_domains[i].nbuf; j++) {
161                         xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
162                         if (buf->event_head == buf->event_tail)
163                                 continue;
164                         if (!flag_domain) {
165                                 if (!oprofile_add_domain_switch(passive_domains[i].
166                                                                 domain_id))
167                                         goto done;
168                                 flag_domain = 1;
169                         }
170                         xenoprof_add_pc(buf, 1);
171                         flag_switch = 1;
172                 }
173         }
174 done:
175         if (flag_switch)
176                 oprofile_add_domain_switch(COORDINATOR_DOMAIN);
177 }
178
179 static irqreturn_t 
180 xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
181 {
182         struct xenoprof_buf * buf;
183         int cpu;
184         static unsigned long flag;
185
186         cpu = smp_processor_id();
187         buf = xenoprof_buf[cpu];
188
189         xenoprof_add_pc(buf, 0);
190
191         if (is_primary && !test_and_set_bit(0, &flag)) {
192                 xenoprof_handle_passive();
193                 smp_mb__before_clear_bit();
194                 clear_bit(0, &flag);
195         }
196
197         return IRQ_HANDLED;
198 }
199
200
201 static void unbind_virq(void)
202 {
203         int i;
204
205         for_each_possible_cpu(i) {
206                 if (ovf_irq[i] >= 0) {
207                         unbind_from_irqhandler(ovf_irq[i], NULL);
208                         ovf_irq[i] = -1;
209                 }
210         }
211 }
212
213
214 static int bind_virq(void)
215 {
216         int i, result;
217
218         for_each_possible_cpu(i) {
219                 result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
220                                                  i,
221                                                  xenoprof_ovf_interrupt,
222                                                  SA_INTERRUPT,
223                                                  "xenoprof",
224                                                  NULL);
225
226                 if (result < 0) {
227                         unbind_virq();
228                         return result;
229                 }
230
231                 ovf_irq[i] = result;
232         }
233                 
234         return 0;
235 }
236
237
238 static int map_xenoprof_buffer(int max_samples)
239 {
240         struct xenoprof_get_buffer get_buffer;
241         struct xenoprof_buf *buf;
242         int npages, ret, i;
243         struct vm_struct *area;
244
245         if ( shared_buffer )
246                 return 0;
247
248         get_buffer.max_samples = max_samples;
249
250         if ( (ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, &get_buffer)) )
251                 return ret;
252
253         nbuf = get_buffer.nbuf;
254         npages = (get_buffer.bufsize * nbuf - 1) / PAGE_SIZE + 1;
255
256         area = alloc_vm_area(npages * PAGE_SIZE);
257         if (area == NULL)
258                 return -ENOMEM;
259
260         if ( (ret = direct_kernel_remap_pfn_range(
261                       (unsigned long)area->addr,
262                       get_buffer.buf_maddr >> PAGE_SHIFT,
263                       npages * PAGE_SIZE, __pgprot(_KERNPG_TABLE), DOMID_SELF)) ) {
264                 vunmap(area->addr);
265                 return ret;
266         }
267
268         shared_buffer = area->addr;
269         for (i=0; i< nbuf; i++) {
270                 buf = (struct xenoprof_buf*) 
271                         &shared_buffer[i * get_buffer.bufsize];
272                 BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
273                 xenoprof_buf[buf->vcpu_id] = buf;
274         }
275
276         return 0;
277 }
278
279
280 static int xenoprof_setup(void)
281 {
282         int ret;
283         int i;
284
285         if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) )
286                 return ret;
287
288         if ( (ret = bind_virq()) )
289                 return ret;
290
291         if (is_primary) {
292                 struct xenoprof_counter counter;
293
294                 /* Define dom0 as an active domain if not done yet */
295                 if (!active_defined) {
296                         domid_t domid;
297                         ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
298                         if (ret)
299                                 goto err;
300                         domid = 0;
301                         ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
302                         if (ret)
303                                 goto err;
304                         active_defined = 1;
305                 }
306
307                 ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
308                 if (ret)
309                         goto err;
310                 for (i=0; i<num_events; i++) {
311                         counter.ind       = i;
312                         counter.count     = (uint64_t)counter_config[i].count;
313                         counter.enabled   = (uint32_t)counter_config[i].enabled;
314                         counter.event     = (uint32_t)counter_config[i].event;
315                         counter.kernel    = (uint32_t)counter_config[i].kernel;
316                         counter.user      = (uint32_t)counter_config[i].user;
317                         counter.unit_mask = (uint64_t)counter_config[i].unit_mask;
318                         HYPERVISOR_xenoprof_op(XENOPROF_counter, 
319                                                &counter);
320                 }
321                 ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
322
323                 if (ret)
324                         goto err;
325         }
326
327         ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
328         if (ret)
329                 goto err;
330
331         xenoprof_enabled = 1;
332         return 0;
333  err:
334         unbind_virq();
335         return ret;
336 }
337
338
339 static void xenoprof_shutdown(void)
340 {
341         xenoprof_enabled = 0;
342
343         HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL);
344
345         if (is_primary) {
346                 HYPERVISOR_xenoprof_op(XENOPROF_release_counters, NULL);
347                 active_defined = 0;
348         }
349
350         unbind_virq();
351
352 }
353
354
355 static int xenoprof_start(void)
356 {
357         int ret = 0;
358
359         if (is_primary)
360                 ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
361
362         return ret;
363 }
364
365
366 static void xenoprof_stop(void)
367 {
368         if (is_primary)
369                 HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL);
370 }
371
372
373 static int xenoprof_set_active(int * active_domains,
374                                unsigned int adomains)
375 {
376         int ret = 0;
377         int i;
378         int set_dom0 = 0;
379         domid_t domid;
380
381         if (!is_primary)
382                 return 0;
383
384         if (adomains > MAX_OPROF_DOMAINS)
385                 return -E2BIG;
386
387         ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
388         if (ret)
389                 return ret;
390
391         for (i=0; i<adomains; i++) {
392                 domid = active_domains[i];
393                 if (domid != active_domains[i]) {
394                         ret = -EINVAL;
395                         goto out;
396                 }
397                 ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
398                 if (ret)
399                         goto out;
400                 if (active_domains[i] == 0)
401                         set_dom0 = 1;
402         }
403         /* dom0 must always be active but may not be in the list */ 
404         if (!set_dom0) {
405                 domid = 0;
406                 ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
407         }
408
409 out:
410         if (ret)
411                 HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
412         active_defined = !ret;
413         return ret;
414 }
415
416 static int xenoprof_set_passive(int * p_domains,
417                                 unsigned int pdoms)
418 {
419         int ret;
420         int i, j;
421         int npages;
422         struct xenoprof_buf *buf;
423         struct vm_struct *area;
424         pgprot_t prot = __pgprot(_KERNPG_TABLE);
425
426         if (!is_primary)
427                 return 0;
428
429         if (pdoms > MAX_OPROF_DOMAINS)
430                 return -E2BIG;
431
432         ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
433         if (ret)
434                 return ret;
435
436         for (i = 0; i < pdoms; i++) {
437                 passive_domains[i].domain_id = p_domains[i];
438                 passive_domains[i].max_samples = 2048;
439                 ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive,
440                                              &passive_domains[i]);
441                 if (ret)
442                         goto out;
443
444                 npages = (passive_domains[i].bufsize * passive_domains[i].nbuf - 1) / PAGE_SIZE + 1;
445
446                 area = alloc_vm_area(npages * PAGE_SIZE);
447                 if (area == NULL) {
448                         ret = -ENOMEM;
449                         goto out;
450                 }
451
452                 ret = direct_kernel_remap_pfn_range(
453                         (unsigned long)area->addr,
454                         passive_domains[i].buf_maddr >> PAGE_SHIFT,
455                         npages * PAGE_SIZE, prot, DOMID_SELF);
456                 if (ret) {
457                         vunmap(area->addr);
458                         goto out;
459                 }
460
461                 p_shared_buffer[i] = area->addr;
462
463                 for (j = 0; j < passive_domains[i].nbuf; j++) {
464                         buf = (struct xenoprof_buf *)
465                                 &p_shared_buffer[i][j * passive_domains[i].bufsize];
466                         BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
467                         p_xenoprof_buf[i][buf->vcpu_id] = buf;
468                 }
469
470         }
471
472         pdomains = pdoms;
473         return 0;
474
475 out:
476         for (j = 0; j < i; j++) {
477                 vunmap(p_shared_buffer[j]);
478                 p_shared_buffer[j] = NULL;
479         }
480
481         return ret;
482 }
483
484 struct op_counter_config counter_config[OP_MAX_COUNTER];
485
486 static int xenoprof_create_files(struct super_block * sb, struct dentry * root)
487 {
488         unsigned int i;
489
490         for (i = 0; i < num_events; ++i) {
491                 struct dentry * dir;
492                 char buf[2];
493  
494                 snprintf(buf, 2, "%d", i);
495                 dir = oprofilefs_mkdir(sb, root, buf);
496                 oprofilefs_create_ulong(sb, dir, "enabled",
497                                         &counter_config[i].enabled);
498                 oprofilefs_create_ulong(sb, dir, "event",
499                                         &counter_config[i].event);
500                 oprofilefs_create_ulong(sb, dir, "count",
501                                         &counter_config[i].count);
502                 oprofilefs_create_ulong(sb, dir, "unit_mask",
503                                         &counter_config[i].unit_mask);
504                 oprofilefs_create_ulong(sb, dir, "kernel",
505                                         &counter_config[i].kernel);
506                 oprofilefs_create_ulong(sb, dir, "user",
507                                         &counter_config[i].user);
508         }
509
510         return 0;
511 }
512
513
514 struct oprofile_operations xenoprof_ops = {
515         .create_files   = xenoprof_create_files,
516         .set_active     = xenoprof_set_active,
517         .set_passive    = xenoprof_set_passive,
518         .setup          = xenoprof_setup,
519         .shutdown       = xenoprof_shutdown,
520         .start          = xenoprof_start,
521         .stop           = xenoprof_stop
522 };
523
524
525 /* in order to get driverfs right */
526 static int using_xenoprof;
527
528 int __init oprofile_arch_init(struct oprofile_operations * ops)
529 {
530         struct xenoprof_init init;
531         int ret, i;
532
533         ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
534
535         if (!ret) {
536                 num_events = init.num_events;
537                 is_primary = init.is_primary;
538
539                 /* just in case - make sure we do not overflow event list 
540                    (i.e. counter_config list) */
541                 if (num_events > OP_MAX_COUNTER)
542                         num_events = OP_MAX_COUNTER;
543
544                 /*  cpu_type is detected by Xen */
545                 cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
546                 strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
547                 xenoprof_ops.cpu_type = cpu_type;
548
549                 init_driverfs();
550                 using_xenoprof = 1;
551                 *ops = xenoprof_ops;
552
553                 for (i=0; i<NR_CPUS; i++)
554                         ovf_irq[i] = -1;
555
556                 active_defined = 0;
557         }
558         printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, "
559                "is_primary %d\n", ret, num_events, is_primary);
560         return ret;
561 }
562
563
564 void __exit oprofile_arch_exit(void)
565 {
566         int i;
567
568         if (using_xenoprof)
569                 exit_driverfs();
570
571         if (shared_buffer) {
572                 vunmap(shared_buffer);
573                 shared_buffer = NULL;
574         }
575         if (is_primary) {
576                 for (i = 0; i < pdomains; i++)
577                         if (p_shared_buffer[i]) {
578                                 vunmap(p_shared_buffer[i]);
579                                 p_shared_buffer[i] = NULL;
580                         }
581                 HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL);
582         }
583
584 }