This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / i386 / oprofile / xenoprof.c
1 /**
2  * @file xenoprof.c
3  *
4  * @remark Copyright 2002 OProfile authors
5  * @remark Read the file COPYING
6  *
7  * @author John Levon <levon@movementarian.org>
8  *
9  * Modified by Aravind Menon and Jose Renato Santos for Xen
10  * These modifications are:
11  * Copyright (C) 2005 Hewlett-Packard Co.
12  */
13
14 #include <linux/init.h>
15 #include <linux/notifier.h>
16 #include <linux/smp.h>
17 #include <linux/oprofile.h>
18 #include <linux/sysdev.h>
19 #include <linux/slab.h>
20 #include <linux/interrupt.h>
21 #include <linux/vmalloc.h>
22 #include <asm/nmi.h>
23 #include <asm/msr.h>
24 #include <asm/apic.h>
25 #include <asm/pgtable.h>
26 #include <xen/evtchn.h>
27 #include "op_counter.h"
28
29 #include <xen/interface/xen.h>
30 #include <xen/interface/xenoprof.h>
31 #include <../../../drivers/oprofile/cpu_buffer.h>
32 #include <../../../drivers/oprofile/event_buffer.h>
33
34 static int xenoprof_start(void);
35 static void xenoprof_stop(void);
36
37 void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
38
39 static int xenoprof_enabled = 0;
40 static unsigned int num_events = 0;
41 static int is_primary = 0;
42 static int active_defined;
43
44 /* sample buffers shared with Xen */
45 xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
46 /* Shared buffer area */
47 char * shared_buffer;
48 /* Number of buffers in shared area (one per VCPU) */
49 int nbuf;
50 /* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
51 int ovf_irq[NR_CPUS];
52 /* cpu model type string - copied from Xen memory space on XENOPROF_init command */
53 char cpu_type[XENOPROF_CPU_TYPE_SIZE];
54
55 /* Passive sample buffers shared with Xen */
56 xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS];
57 /* Passive shared buffer area */
58 char *p_shared_buffer[MAX_OPROF_DOMAINS];
59
60 #ifdef CONFIG_PM
61
62 static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
63 {
64         if (xenoprof_enabled == 1)
65                 xenoprof_stop();
66         return 0;
67 }
68
69
70 static int xenoprof_resume(struct sys_device * dev)
71 {
72         if (xenoprof_enabled == 1)
73                 xenoprof_start();
74         return 0;
75 }
76
77
78 static struct sysdev_class oprofile_sysclass = {
79         set_kset_name("oprofile"),
80         .resume         = xenoprof_resume,
81         .suspend        = xenoprof_suspend
82 };
83
84
85 static struct sys_device device_oprofile = {
86         .id     = 0,
87         .cls    = &oprofile_sysclass,
88 };
89
90
91 static int __init init_driverfs(void)
92 {
93         int error;
94         if (!(error = sysdev_class_register(&oprofile_sysclass)))
95                 error = sysdev_register(&device_oprofile);
96         return error;
97 }
98
99
100 static void __exit exit_driverfs(void)
101 {
102         sysdev_unregister(&device_oprofile);
103         sysdev_class_unregister(&oprofile_sysclass);
104 }
105
106 #else
107 #define init_driverfs() do { } while (0)
108 #define exit_driverfs() do { } while (0)
109 #endif /* CONFIG_PM */
110
111 unsigned long long oprofile_samples = 0;
112 unsigned long long p_oprofile_samples = 0;
113
114 unsigned int pdomains;
115 struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
116
117 static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
118 {
119         int head, tail, size;
120
121         head = buf->event_head;
122         tail = buf->event_tail;
123         size = buf->event_size;
124
125         if (tail > head) {
126                 while (tail < size) {
127                         oprofile_add_pc(buf->event_log[tail].eip,
128                                         buf->event_log[tail].mode,
129                                         buf->event_log[tail].event);
130                         if (!is_passive)
131                                 oprofile_samples++;
132                         else
133                                 p_oprofile_samples++;
134                         tail++;
135                 }
136                 tail = 0;
137         }
138         while (tail < head) {
139                 oprofile_add_pc(buf->event_log[tail].eip,
140                                 buf->event_log[tail].mode,
141                                 buf->event_log[tail].event);
142                 if (!is_passive)
143                         oprofile_samples++;
144                 else
145                         p_oprofile_samples++;
146                 tail++;
147         }
148
149         buf->event_tail = tail;
150 }
151
152 static void xenoprof_handle_passive(void)
153 {
154         int i, j;
155         int flag_domain, flag_switch = 0;
156
157         for (i = 0; i < pdomains; i++) {
158                 flag_domain = 0;
159                 for (j = 0; j < passive_domains[i].nbuf; j++) {
160                         xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
161                         if (buf->event_head == buf->event_tail)
162                                 continue;
163                         if (!flag_domain) {
164                                 if (!oprofile_add_domain_switch(passive_domains[i].domain_id))
165                                         goto done;
166                                 flag_domain = 1;
167                         }
168                         xenoprof_add_pc(buf, 1);
169                         flag_switch = 1;
170                 }
171         }
172  done:
173         if (flag_switch)
174                 oprofile_add_domain_switch(COORDINATOR_DOMAIN);
175
176 }
177
178 static irqreturn_t 
179 xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
180 {
181         struct xenoprof_buf * buf;
182         int cpu;
183         static unsigned long flag;
184
185         cpu = smp_processor_id();
186         buf = xenoprof_buf[cpu];
187
188         xenoprof_add_pc(buf, 0);
189
190         if (is_primary && !test_and_set_bit(0, &flag)) {
191                 xenoprof_handle_passive();
192                 clear_bit(0, &flag);
193         }
194
195         return IRQ_HANDLED;
196 }
197
198
199 static void unbind_virq(void)
200 {
201         int i;
202
203         for_each_cpu(i) {
204                 if (ovf_irq[i] >= 0) {
205                         unbind_from_irqhandler(ovf_irq[i], NULL);
206                         ovf_irq[i] = -1;
207                 }
208         }
209 }
210
211
212 static int bind_virq(void)
213 {
214         int i, result;
215
216         for_each_cpu(i) {
217                 result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
218                                                  i,
219                                                  xenoprof_ovf_interrupt,
220                                                  SA_INTERRUPT,
221                                                  "xenoprof",
222                                                  NULL);
223
224                 if (result < 0) {
225                         unbind_virq();
226                         return result;
227                 }
228
229                 ovf_irq[i] = result;
230         }
231                 
232         return 0;
233 }
234
235
236 static int xenoprof_setup(void)
237 {
238         int ret;
239         int i;
240
241         ret = bind_virq();
242         if (ret)
243                 return ret;
244
245         if (is_primary) {
246                 struct xenoprof_counter counter;
247
248                 /* Define dom0 as an active domain if not done yet */
249                 if (!active_defined) {
250                         domid_t domid;
251                         ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
252                         if (ret)
253                                 goto err;
254                         domid = 0;
255                         ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
256                         if (ret)
257                                 goto err;
258                         active_defined = 1;
259                 }
260
261                 ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
262                 if (ret)
263                         goto err;
264                 for (i=0; i<num_events; i++) {
265                         counter.ind       = i;
266                         counter.count     = (uint64_t)counter_config[i].count;
267                         counter.enabled   = (uint32_t)counter_config[i].enabled;
268                         counter.event     = (uint32_t)counter_config[i].event;
269                         counter.kernel    = (uint32_t)counter_config[i].kernel;
270                         counter.user      = (uint32_t)counter_config[i].user;
271                         counter.unit_mask = (uint64_t)counter_config[i].unit_mask;
272                         HYPERVISOR_xenoprof_op(XENOPROF_counter, 
273                                                &counter);
274                 }
275                 ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
276
277                 if (ret)
278                         goto err;
279         }
280
281         ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
282         if (ret)
283                 goto err;
284
285         xenoprof_enabled = 1;
286         return 0;
287  err:
288         unbind_virq();
289         return ret;
290 }
291
292
293 static void xenoprof_shutdown(void)
294 {
295         xenoprof_enabled = 0;
296
297         HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL);
298
299         if (is_primary) {
300                 HYPERVISOR_xenoprof_op(XENOPROF_release_counters, NULL);
301                 active_defined = 0;
302         }
303
304         unbind_virq();
305
306 }
307
308
309 static int xenoprof_start(void)
310 {
311         int ret = 0;
312
313         if (is_primary)
314                 ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
315
316         return ret;
317 }
318
319
320 static void xenoprof_stop(void)
321 {
322         if (is_primary)
323                 HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL);
324 }
325
326
327 static int xenoprof_set_active(int * active_domains,
328                                unsigned int adomains)
329 {
330         int ret = 0;
331         int i;
332         int set_dom0 = 0;
333         domid_t domid;
334
335         if (!is_primary)
336                 return 0;
337
338         if (adomains > MAX_OPROF_DOMAINS)
339                 return -E2BIG;
340
341         ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
342         if (ret)
343                 return ret;
344
345         for (i=0; i<adomains; i++) {
346                 domid = active_domains[i];
347                 if (domid != active_domains[i]) {
348                         ret = -EINVAL;
349                         goto out;
350                 }
351                 ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
352                 if (ret)
353                         goto out;
354                 if (active_domains[i] == 0)
355                         set_dom0 = 1;
356         }
357         /* dom0 must always be active but may not be in the list */ 
358         if (!set_dom0) {
359                 domid = 0;
360                 ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
361         }
362
363 out:
364         if (ret)
365                 HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
366         active_defined = !ret;
367         return ret;
368 }
369
370 static int xenoprof_set_passive(int * p_domains,
371                                 unsigned int pdoms)
372 {
373         int ret;
374         int i, j;
375         int vm_size;
376         int npages;
377         struct xenoprof_buf *buf;
378         pgprot_t prot = __pgprot(_KERNPG_TABLE);
379
380         if (!is_primary)
381                 return 0;
382
383         if (pdoms > MAX_OPROF_DOMAINS)
384                 return -E2BIG;
385
386         ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
387         if (ret)
388                 return ret;
389
390         for (i = 0; i < pdoms; i++) {
391                 passive_domains[i].domain_id = p_domains[i];
392                 passive_domains[i].max_samples = 2048;
393                 ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, &passive_domains[i]);
394                 if (ret)
395                         return ret;
396
397                 npages = (passive_domains[i].bufsize * passive_domains[i].nbuf - 1) / PAGE_SIZE + 1;
398                 vm_size = npages * PAGE_SIZE;
399
400                 p_shared_buffer[i] = (char *)vm_map_xen_pages(passive_domains[i].buf_maddr,
401                                                               vm_size, prot);
402                 if (!p_shared_buffer[i]) {
403                         ret = -ENOMEM;
404                         goto out;
405                 }
406
407                 for (j = 0; j < passive_domains[i].nbuf; j++) {
408                         buf = (struct xenoprof_buf *)
409                                 &p_shared_buffer[i][j * passive_domains[i].bufsize];
410                         BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
411                         p_xenoprof_buf[i][buf->vcpu_id] = buf;
412                 }
413
414         }
415
416         pdomains = pdoms;
417         return 0;
418
419 out:
420         for (j = 0; j < i; j++) {
421                 vunmap(p_shared_buffer[j]);
422                 p_shared_buffer[j] = NULL;
423         }
424
425         return ret;
426 }
427
428 struct op_counter_config counter_config[OP_MAX_COUNTER];
429
430 static int xenoprof_create_files(struct super_block * sb, struct dentry * root)
431 {
432         unsigned int i;
433
434         for (i = 0; i < num_events; ++i) {
435                 struct dentry * dir;
436                 char buf[2];
437  
438                 snprintf(buf, 2, "%d", i);
439                 dir = oprofilefs_mkdir(sb, root, buf);
440                 oprofilefs_create_ulong(sb, dir, "enabled",
441                                         &counter_config[i].enabled);
442                 oprofilefs_create_ulong(sb, dir, "event",
443                                         &counter_config[i].event);
444                 oprofilefs_create_ulong(sb, dir, "count",
445                                         &counter_config[i].count);
446                 oprofilefs_create_ulong(sb, dir, "unit_mask",
447                                         &counter_config[i].unit_mask);
448                 oprofilefs_create_ulong(sb, dir, "kernel",
449                                         &counter_config[i].kernel);
450                 oprofilefs_create_ulong(sb, dir, "user",
451                                         &counter_config[i].user);
452         }
453
454         return 0;
455 }
456
457
458 struct oprofile_operations xenoprof_ops = {
459         .create_files   = xenoprof_create_files,
460         .set_active     = xenoprof_set_active,
461         .set_passive    = xenoprof_set_passive,
462         .setup          = xenoprof_setup,
463         .shutdown       = xenoprof_shutdown,
464         .start          = xenoprof_start,
465         .stop           = xenoprof_stop
466 };
467
468
469 /* in order to get driverfs right */
470 static int using_xenoprof;
471
472 int __init oprofile_arch_init(struct oprofile_operations * ops)
473 {
474         struct xenoprof_init init;
475         struct xenoprof_buf * buf;
476         int vm_size;
477         int npages;
478         int ret;
479         int i;
480
481         init.max_samples = 16;
482         ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
483
484         if (!ret) {
485                 pgprot_t prot = __pgprot(_KERNPG_TABLE);
486
487                 num_events = init.num_events;
488                 is_primary = init.is_primary;
489                 nbuf = init.nbuf;
490
491                 /* just in case - make sure we do not overflow event list 
492                    (i.e. counter_config list) */
493                 if (num_events > OP_MAX_COUNTER)
494                         num_events = OP_MAX_COUNTER;
495
496                 npages = (init.bufsize * nbuf - 1) / PAGE_SIZE + 1;
497                 vm_size = npages * PAGE_SIZE;
498
499                 shared_buffer = (char *)vm_map_xen_pages(init.buf_maddr,
500                                                          vm_size, prot);
501                 if (!shared_buffer) {
502                         ret = -ENOMEM;
503                         goto out;
504                 }
505
506                 for (i=0; i< nbuf; i++) {
507                         buf = (struct xenoprof_buf*) 
508                                 &shared_buffer[i * init.bufsize];
509                         BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
510                         xenoprof_buf[buf->vcpu_id] = buf;
511                 }
512
513                 /*  cpu_type is detected by Xen */
514                 cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
515                 strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
516                 xenoprof_ops.cpu_type = cpu_type;
517
518                 init_driverfs();
519                 using_xenoprof = 1;
520                 *ops = xenoprof_ops;
521
522                 for (i=0; i<NR_CPUS; i++)
523                         ovf_irq[i] = -1;
524
525                 active_defined = 0;
526         }
527  out:
528         printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, "
529                "is_primary %d\n", ret, num_events, is_primary);
530         return ret;
531 }
532
533
534 void __exit oprofile_arch_exit(void)
535 {
536         int i;
537
538         if (using_xenoprof)
539                 exit_driverfs();
540
541         if (shared_buffer) {
542                 vunmap(shared_buffer);
543                 shared_buffer = NULL;
544         }
545         if (is_primary) {
546                 for (i = 0; i < pdomains; i++)
547                         if (p_shared_buffer[i]) {
548                                 vunmap(p_shared_buffer[i]);
549                                 p_shared_buffer[i] = NULL;
550                         }
551                 HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL);
552         }
553
554 }