This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / ia64 / sn / kernel / sn2 / sn_hwperf.c
1 /* 
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
7  *
8  * SGI Altix topology and hardware performance monitoring API.
9  * Mark Goodwin <markgw@sgi.com>. 
10  *
11  * Creates /proc/sgi_sn/sn_topology (read-only) to export
12  * info about Altix nodes, routers, CPUs and NumaLink
13  * interconnection/topology.
14  *
15  * Also creates a dynamic misc device named "sn_hwperf"
16  * that supports an ioctl interface to call down into SAL
17  * to discover hw objects, topology and to read/write
18  * memory mapped registers, e.g. for performance monitoring.
19  * The "sn_hwperf" device is registered only after the procfs
20  * file is first opened, i.e. only if/when it's needed. 
21  *
22  * This API is used by SGI Performance Co-Pilot and other
23  * tools, see http://oss.sgi.com/projects/pcp
24  */
25
26 #include <linux/fs.h>
27 #include <linux/slab.h>
28 #include <linux/vmalloc.h>
29 #include <linux/seq_file.h>
30 #include <linux/miscdevice.h>
31 #include <linux/cpumask.h>
32 #include <linux/smp_lock.h>
33 #include <asm/processor.h>
34 #include <asm/topology.h>
35 #include <asm/smp.h>
36 #include <asm/semaphore.h>
37 #include <asm/segment.h>
38 #include <asm/uaccess.h>
39 #include <asm-ia64/sal.h>
40 #include <asm-ia64/sn/sn_sal.h>
41 #include <asm-ia64/sn/sn2/sn_hwperf.h>
42
43 static void *sn_hwperf_salheap = NULL;
44 static int sn_hwperf_obj_cnt = 0;
45 static nasid_t sn_hwperf_master_nasid = INVALID_NASID;
46 static int sn_hwperf_init(void);
47 static DECLARE_MUTEX(sn_hwperf_init_mutex);
48
49 static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
50 {
51         int e;
52         u64 sz;
53         struct sn_hwperf_object_info *objbuf = NULL;
54
55         if ((e = sn_hwperf_init()) < 0) {
56                 printk("sn_hwperf_init failed: err %d\n", e);
57                 goto out;
58         }
59
60         sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info);
61         if ((objbuf = (struct sn_hwperf_object_info *) vmalloc(sz)) == NULL) {
62                 printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz);
63                 e = -ENOMEM;
64                 goto out;
65         }
66
67         e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS,
68                 0, sz, (u64) objbuf, 0, 0, NULL);
69         if (e != SN_HWPERF_OP_OK) {
70                 e = -EINVAL;
71                 vfree(objbuf);
72         }
73
74 out:
75         *nobj = sn_hwperf_obj_cnt;
76         *ret = objbuf;
77         return e;
78 }
79
80 static int sn_hwperf_geoid_to_cnode(char *location)
81 {
82         int cnode;
83         int mod, slot, slab;
84         int cmod, cslot, cslab;
85
86         if (sscanf(location, "%03dc%02d#%d", &mod, &slot, &slab) != 3)
87                 return -1;
88         for (cnode = 0; cnode < numnodes; cnode++) {
89                 /* XXX: need a better way than this ... */
90                 if (sscanf(NODEPDA(cnode)->hwg_node_name,
91                    "hw/module/%03dc%02d/slab/%d", &cmod, &cslot, &cslab) == 3) {
92                         if (mod == cmod && slot == cslot && slab == cslab)
93                                 break;
94                 }
95         }
96
97         return cnode < numnodes ? cnode : -1;
98 }
99
100 static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)
101 {
102         if (!obj->sn_hwp_this_part)
103                 return -1;
104         return sn_hwperf_geoid_to_cnode(obj->location);
105 }
106
107 static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj,
108                                 struct sn_hwperf_object_info *objs)
109 {
110         int ordinal;
111         struct sn_hwperf_object_info *p;
112
113         for (ordinal=0, p=objs; p != obj; p++) {
114                 if (SN_HWPERF_FOREIGN(p))
115                         continue;
116                 if (p->location[3] == obj->location[3])
117                         ordinal++;
118         }
119
120         return ordinal;
121 }
122
123 #ifndef MODULE_IOBRICK 
124 /* this will be available when ioif TIO support is added */
125 #define MODULE_IOBRICK (MODULE_OPUSBRICK+1)
126 #endif
127
128 static const char *sn_hwperf_get_brickname(struct sn_hwperf_object_info *obj,
129                                 struct sn_hwperf_object_info *objs, int *ordinal)
130 {
131         int i;
132         const char *objtype = NULL;
133
134         for (i=0; i < MAX_BRICK_TYPES; i++) {
135                 if (brick_types[i] != obj->location[3])
136                         continue;
137                 switch (i) {
138                 case MODULE_CBRICK:
139                     objtype = "node";
140                     *ordinal = sn_hwperf_obj_to_cnode(obj); /* cnodeid */
141                     break;
142
143                 case MODULE_RBRICK:
144                     objtype = "router";
145                     *ordinal = sn_hwperf_generic_ordinal(obj, objs);
146                     break;
147
148                 case MODULE_IOBRICK:
149                     objtype = "ionode";
150                     *ordinal = sn_hwperf_generic_ordinal(obj, objs);
151                     break;
152                 }
153                 break;
154         }
155
156         if (i == MAX_BRICK_TYPES) {
157                 objtype = "other";
158                 *ordinal = sn_hwperf_generic_ordinal(obj, objs);
159         }
160
161         return objtype;
162 }
163
164 static int sn_topology_show(struct seq_file *s, void *d)
165 {
166         int sz;
167         int pt;
168         int e;
169         int i;
170         int j;
171         const char *brickname;
172         int ordinal;
173         cpumask_t cpumask;
174         char slice;
175         struct cpuinfo_ia64 *c;
176         struct sn_hwperf_port_info *ptdata;
177         struct sn_hwperf_object_info *p;
178         struct sn_hwperf_object_info *obj = d;  /* this object */
179         struct sn_hwperf_object_info *objs = s->private; /* all objects */
180
181         if (obj == objs) {
182                 seq_printf(s, "# sn_topology version 1\n");
183                 seq_printf(s, "# objtype ordinal location partition"
184                         " [attribute value [, ...]]\n");
185         }
186
187         if (SN_HWPERF_FOREIGN(obj)) {
188                 /* private in another partition: not interesting */
189                 return 0;
190         }
191
192         for (i = 0; obj->name[i]; i++) {
193                 if (obj->name[i] == ' ')
194                         obj->name[i] = '_';
195         }
196
197         brickname = sn_hwperf_get_brickname(obj, objs, &ordinal);
198         seq_printf(s, "%s %d %s %s asic %s", brickname, ordinal, obj->location,
199                 obj->sn_hwp_this_part ? "local" : "shared", obj->name);
200
201         if (obj->location[3] != 'c')
202                 seq_putc(s, '\n');
203         else {
204                 seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal));
205                 for (i=0; i < numnodes; i++) {
206                         seq_printf(s, i ? ":%d" : ", dist %d",
207                                 node_distance(ordinal, i));
208                 }
209                 seq_putc(s, '\n');
210
211                 /*
212                  * CPUs on this node
213                  */
214                 cpumask = node_to_cpumask(ordinal);
215                 for_each_online_cpu(i) {
216                         if (cpu_isset(i, cpumask)) {
217                                 slice = 'a' + cpuid_to_slice(i);
218                                 c = cpu_data(i);
219                                 seq_printf(s, "cpu %d %s%c local"
220                                         " freq %luMHz, arch ia64",
221                                         i, obj->location, slice,
222                                         c->proc_freq / 1000000);
223                                 for_each_online_cpu(j) {
224                                         seq_printf(s, j ? ":%d" : ", dist %d",
225                                                 node_distance(
226                                                     cpuid_to_cnodeid(i),
227                                                     cpuid_to_cnodeid(j)));
228                                 }
229                                 seq_putc(s, '\n');
230                         }
231                 }
232         }
233
234         if (obj->ports) {
235                 /*
236                  * numalink ports
237                  */
238                 sz = obj->ports * sizeof(struct sn_hwperf_port_info);
239                 if ((ptdata = vmalloc(sz)) == NULL)
240                         return -ENOMEM;
241                 e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
242                                       SN_HWPERF_ENUM_PORTS, obj->id, sz,
243                                       (u64) ptdata, 0, 0, NULL);
244                 if (e != SN_HWPERF_OP_OK)
245                         return -EINVAL;
246                 for (ordinal=0, p=objs; p != obj; p++) {
247                         if (!SN_HWPERF_FOREIGN(p))
248                                 ordinal += p->ports;
249                 }
250                 for (pt = 0; pt < obj->ports; pt++) {
251                         for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) {
252                                 if (ptdata[pt].conn_id == p->id) {
253                                         break;
254                                 }
255                         }
256                         if (i >= sn_hwperf_obj_cnt)
257                                 continue;
258                         seq_printf(s, "numalink %d %s-%d",
259                             ordinal+pt, obj->location, ptdata[pt].port);
260
261                         if (obj->sn_hwp_this_part && p->sn_hwp_this_part)
262                                 /* both ends local to this partition */
263                                 seq_puts(s, " local");
264                         else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part)
265                                 /* both ends of the link in foreign partiton */
266                                 seq_puts(s, " foreign");
267                         else
268                                 /* link straddles a partition */
269                                 seq_puts(s, " shared");
270
271                         /*
272                          * Unlikely, but strictly should query the LLP config
273                          * registers because an NL4R can be configured to run
274                          * NL3 protocol, even when not talking to an NL3 router.
275                          * Ditto for node-node.
276                          */
277                         seq_printf(s, " endpoint %s-%d, protocol %s\n",
278                                 p->location, ptdata[pt].conn_port,
279                                 strcmp(obj->name, "NL3Router") == 0 ||
280                                 strcmp(p->name, "NL3Router") == 0 ?
281                                 "LLP3" : "LLP4");
282                 }
283                 vfree(ptdata);
284         }
285
286         return 0;
287 }
288
289 static void *sn_topology_start(struct seq_file *s, loff_t * pos)
290 {
291         struct sn_hwperf_object_info *objs = s->private;
292
293         if (*pos < sn_hwperf_obj_cnt)
294                 return (void *)(objs + *pos);
295
296         return NULL;
297 }
298
299 static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos)
300 {
301         ++*pos;
302         return sn_topology_start(s, pos);
303 }
304
305 static void sn_topology_stop(struct seq_file *m, void *v)
306 {
307         return;
308 }
309
310 /*
311  * /proc/sgi_sn/sn_topology, read-only using seq_file
312  */
313 static struct seq_operations sn_topology_seq_ops = {
314         .start = sn_topology_start,
315         .next = sn_topology_next,
316         .stop = sn_topology_stop,
317         .show = sn_topology_show
318 };
319
320 struct sn_hwperf_op_info {
321         u64 op;
322         struct sn_hwperf_ioctl_args *a;
323         void *p;
324         int *v0;
325         int ret;
326 };
327
328 static void sn_hwperf_call_sal(void *info)
329 {
330         struct sn_hwperf_op_info *op_info = info;
331         int r;
332
333         r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op,
334                       op_info->a->arg, op_info->a->sz,
335                       (u64) op_info->p, 0, 0, op_info->v0);
336         op_info->ret = r;
337 }
338
339 static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
340 {
341         u32 cpu;
342         u32 use_ipi;
343         int r = 0;
344         cpumask_t save_allowed;
345         
346         cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32;
347         use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK;
348         op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
349
350         if (cpu != SN_HWPERF_ARG_ANY_CPU) {
351                 if (cpu >= num_online_cpus() || !cpu_online(cpu)) {
352                         r = -EINVAL;
353                         goto out;
354                 }
355         }
356
357         if (cpu == SN_HWPERF_ARG_ANY_CPU || cpu == get_cpu()) {
358                 /* don't care, or already on correct cpu */
359                 sn_hwperf_call_sal(op_info);
360         }
361         else {
362                 if (use_ipi) {
363                         /* use an interprocessor interrupt to call SAL */
364                         smp_call_function_single(cpu, sn_hwperf_call_sal,
365                                 op_info, 1, 1);
366                 }
367                 else {
368                         /* migrate the task before calling SAL */ 
369                         save_allowed = current->cpus_allowed;
370                         set_cpus_allowed(current, cpumask_of_cpu(cpu));
371                         sn_hwperf_call_sal(op_info);
372                         set_cpus_allowed(current, save_allowed);
373                 }
374         }
375         r = op_info->ret;
376
377 out:
378         return r;
379 }
380
381 /*
382  * ioctl for "sn_hwperf" misc device
383  */
384 static int
385 sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
386 {
387         struct sn_hwperf_ioctl_args a;
388         struct cpuinfo_ia64 *cdata;
389         struct sn_hwperf_object_info *objs;
390         struct sn_hwperf_object_info *cpuobj;
391         struct sn_hwperf_op_info op_info;
392         void *p = NULL;
393         int nobj;
394         char slice;
395         int node;
396         int r;
397         int v0;
398         int i;
399         int j;
400
401         unlock_kernel();
402
403         /* only user requests are allowed here */
404         if ((op & SN_HWPERF_OP_MASK) < 10) {
405                 r = -EINVAL;
406                 goto error;
407         }
408         r = copy_from_user(&a, (const void *)arg,
409                 sizeof(struct sn_hwperf_ioctl_args));
410         if (r != 0) {
411                 r = -EFAULT;
412                 goto error;
413         }
414
415         /*
416          * Allocate memory to hold a kernel copy of the user buffer. The
417          * buffer contents are either copied in or out (or both) of user
418          * space depending on the flags encoded in the requested operation.
419          */
420         if (a.ptr) {
421                 p = vmalloc(a.sz);
422                 if (!p) {
423                         r = -ENOMEM;
424                         goto error;
425                 }
426         }
427
428         if (op & SN_HWPERF_OP_MEM_COPYIN) {
429                 r = copy_from_user(p, (const void *)a.ptr, a.sz);
430                 if (r != 0) {
431                         r = -EFAULT;
432                         goto error;
433                 }
434         }
435
436         switch (op) {
437         case SN_HWPERF_GET_CPU_INFO:
438                 if (a.sz == sizeof(u64)) {
439                         /* special case to get size needed */
440                         *(u64 *) p = (u64) num_online_cpus() *
441                                 sizeof(struct sn_hwperf_object_info);
442                 } else
443                 if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) {
444                         r = -ENOMEM;
445                         goto error;
446                 } else
447                 if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
448                         memset(p, 0, a.sz);
449                         for (i = 0; i < nobj; i++) {
450                                 node = sn_hwperf_obj_to_cnode(objs + i);
451                                 for_each_online_cpu(j) {
452                                         if (node != cpu_to_node(j))
453                                                 continue;
454                                         cpuobj = (struct sn_hwperf_object_info *) p + j;
455                                         slice = 'a' + cpuid_to_slice(j);
456                                         cdata = cpu_data(j);
457                                         cpuobj->id = j;
458                                         snprintf(cpuobj->name,
459                                                  sizeof(cpuobj->name),
460                                                  "CPU %luMHz %s",
461                                                  cdata->proc_freq / 1000000,
462                                                  cdata->vendor);
463                                         snprintf(cpuobj->location,
464                                                  sizeof(cpuobj->location),
465                                                  "%s%c", objs[i].location,
466                                                  slice);
467                                 }
468                         }
469
470                         vfree(objs);
471                 }
472                 break;
473
474         case SN_HWPERF_GET_NODE_NASID:
475                 if (a.sz != sizeof(u64) ||
476                    (node = a.arg) < 0 || node >= numnodes) {
477                         r = -EINVAL;
478                         goto error;
479                 }
480                 *(u64 *)p = (u64)cnodeid_to_nasid(node);
481                 break;
482
483         case SN_HWPERF_GET_OBJ_NODE:
484                 if (a.sz != sizeof(u64) || a.arg < 0) {
485                         r = -EINVAL;
486                         goto error;
487                 }
488                 if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
489                         if (a.arg >= nobj) {
490                                 r = -EINVAL;
491                                 vfree(objs);
492                                 goto error;
493                         }
494                         if (objs[(i = a.arg)].id != a.arg) {
495                                 for (i = 0; i < nobj; i++) {
496                                         if (objs[i].id == a.arg)
497                                                 break;
498                                 }
499                         }
500                         if (i == nobj) {
501                                 r = -EINVAL;
502                                 vfree(objs);
503                                 goto error;
504                         }
505                         *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i);
506                         vfree(objs);
507                 }
508                 break;
509
510         case SN_HWPERF_GET_MMRS:
511         case SN_HWPERF_SET_MMRS:
512         case SN_HWPERF_OBJECT_DISTANCE:
513                 op_info.p = p;
514                 op_info.a = &a;
515                 op_info.v0 = &v0;
516                 op_info.op = op;
517                 r = sn_hwperf_op_cpu(&op_info);
518                 break;
519
520         default:
521                 /* all other ops are a direct SAL call */
522                 r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op,
523                               a.arg, a.sz, (u64) p, 0, 0, &v0);
524                 a.v0 = v0;
525                 break;
526         }
527
528         if (op & SN_HWPERF_OP_MEM_COPYOUT) {
529                 r = copy_to_user((void *)a.ptr, p, a.sz);
530                 if (r != 0) {
531                         r = -EFAULT;
532                         goto error;
533                 }
534         }
535
536 error:
537         if (p)
538                 vfree(p);
539
540         lock_kernel();
541         return r;
542 }
543
544 static struct file_operations sn_hwperf_fops = {
545         .ioctl = sn_hwperf_ioctl,
546 };
547
548 static struct miscdevice sn_hwperf_dev = {
549         MISC_DYNAMIC_MINOR,
550         "sn_hwperf",
551         &sn_hwperf_fops
552 };
553
554 static int sn_hwperf_init(void)
555 {
556         u64 v;
557         int salr;
558         int e = 0;
559
560         /* single threaded, once-only initialization */
561         down(&sn_hwperf_init_mutex);
562         if (sn_hwperf_salheap) {
563                 up(&sn_hwperf_init_mutex);
564                 return e;
565         }
566
567         /*
568          * The PROM code needs a fixed reference node. For convenience the
569          * same node as the console I/O is used.
570          */
571         sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid();
572
573         /*
574          * Request the needed size and install the PROM scratch area.
575          * The PROM keeps various tracking bits in this memory area.
576          */
577         salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
578                                  (u64) SN_HWPERF_GET_HEAPSIZE, 0,
579                                  (u64) sizeof(u64), (u64) &v, 0, 0, NULL);
580         if (salr != SN_HWPERF_OP_OK) {
581                 e = -EINVAL;
582                 goto out;
583         }
584
585         if ((sn_hwperf_salheap = vmalloc(v)) == NULL) {
586                 e = -ENOMEM;
587                 goto out;
588         }
589         salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
590                                  SN_HWPERF_INSTALL_HEAP, 0, v,
591                                  (u64) sn_hwperf_salheap, 0, 0, NULL);
592         if (salr != SN_HWPERF_OP_OK) {
593                 e = -EINVAL;
594                 goto out;
595         }
596
597         salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
598                                  SN_HWPERF_OBJECT_COUNT, 0,
599                                  sizeof(u64), (u64) &v, 0, 0, NULL);
600         if (salr != SN_HWPERF_OP_OK) {
601                 e = -EINVAL;
602                 goto out;
603         }
604         sn_hwperf_obj_cnt = (int)v;
605
606 out:
607         if (e < 0 && sn_hwperf_salheap) {
608                 vfree(sn_hwperf_salheap);
609                 sn_hwperf_salheap = NULL;
610                 sn_hwperf_obj_cnt = 0;
611         }
612
613         if (!e) {
614                 /*
615                  * Register a dynamic misc device for ioctl. Platforms
616                  * supporting hotplug will create /dev/sn_hwperf, else
617                  * user can to look up the minor number in /proc/misc.
618                  */
619                 if ((e = misc_register(&sn_hwperf_dev)) != 0) {
620                         printk(KERN_ERR "sn_hwperf_init: misc register "
621                                "for \"sn_hwperf\" failed, err %d\n", e);
622                 }
623         }
624
625         up(&sn_hwperf_init_mutex);
626         return e;
627 }
628
629 int sn_topology_open(struct inode *inode, struct file *file)
630 {
631         int e;
632         struct seq_file *seq;
633         struct sn_hwperf_object_info *objbuf;
634         int nobj;
635
636         if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
637                 e = seq_open(file, &sn_topology_seq_ops);
638                 seq = file->private_data;
639                 seq->private = objbuf;
640         }
641
642         return e;
643 }
644
645 int sn_topology_release(struct inode *inode, struct file *file)
646 {
647         struct seq_file *seq = file->private_data;
648
649         if (seq->private)
650                 vfree(seq->private);
651         return seq_release(inode, file);
652 }