X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fsn%2Fkernel%2Fsn2%2Fsn_hwperf.c;h=33367996d72d595b180179977b4a20e6c3b929bc;hb=refs%2Fheads%2Fvserver;hp=833e700fdac93e4ada8c663c8c182b4d899cf05b;hpb=f7f1b0f1e2fbadeab12d24236000e778aa9b1ead;p=linux-2.6.git diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 833e700fd..33367996d 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2004-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. * * SGI Altix topology and hardware performance monitoring API. * Mark Goodwin . @@ -32,11 +32,11 @@ #include #include #include +#include + #include #include -#include #include -#include #include #include #include @@ -52,6 +52,8 @@ static nasid_t sn_hwperf_master_nasid = INVALID_NASID; static int sn_hwperf_init(void); static DECLARE_MUTEX(sn_hwperf_init_mutex); +#define cnode_possible(n) ((n) < num_cnodes) + static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) { int e; @@ -59,7 +61,7 @@ static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) struct sn_hwperf_object_info *objbuf = NULL; if ((e = sn_hwperf_init()) < 0) { - printk("sn_hwperf_init failed: err %d\n", e); + printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e); goto out; } @@ -111,7 +113,11 @@ static int sn_hwperf_geoid_to_cnode(char *location) if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) return -1; - for (cnode = 0; cnode < numionodes; cnode++) { + /* + * FIXME: replace with cleaner for_each_XXX macro which addresses + * both compute and IO nodes once ACPI3.0 is available. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { geoid = cnodeid_get_geoid(cnode); module_id = geo_module(geoid); this_rack = MODULE_GET_RACK(module_id); @@ -124,12 +130,14 @@ static int sn_hwperf_geoid_to_cnode(char *location) } } - return cnode < numionodes ? cnode : -1; + return cnode_possible(cnode) ? cnode : -1; } static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) { - if (!obj->sn_hwp_this_part) + if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + BUG(); + if (SN_HWPERF_FOREIGN(obj)) return -1; return sn_hwperf_geoid_to_cnode(obj->location); } @@ -174,31 +182,199 @@ static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, return slabname; } -static void print_pci_topology(struct seq_file *s, - struct sn_hwperf_object_info *obj, int *ordinal, - u64 rack, u64 bay, u64 slot, u64 slab) +static void print_pci_topology(struct seq_file *s) +{ + char *p; + size_t sz; + int e; + + for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) { + if (!(p = kmalloc(sz, GFP_KERNEL))) + break; + e = ia64_sn_ioif_get_pci_topology(__pa(p), sz); + if (e == SALRET_OK) + seq_puts(s, p); + kfree(p); + if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED) + break; + } +} + +static inline int sn_hwperf_has_cpus(cnodeid_t node) +{ + return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node); +} + +static inline int sn_hwperf_has_mem(cnodeid_t node) { - char *p1; - char *p2; - char *pg; - - if (!(pg = (char *)get_zeroed_page(GFP_KERNEL))) - return; /* ignore */ - if (ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab, - __pa(pg), PAGE_SIZE) == SN_HWPERF_OP_OK) { - for (p1=pg; *p1 && p1 < pg + PAGE_SIZE;) { - if (!(p2 = strchr(p1, '\n'))) + return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages; +} + +static struct sn_hwperf_object_info * +sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf, + int nobj, int id) +{ + int i; + struct sn_hwperf_object_info *p = objbuf; + + for (i=0; i < nobj; i++, p++) { + if (p->id == id) + return p; + } + + return NULL; + +} + +static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf, + int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + struct sn_hwperf_object_info *nodeobj = NULL; + struct sn_hwperf_object_info *op; + struct sn_hwperf_object_info *dest; + struct sn_hwperf_object_info *router; + struct sn_hwperf_port_info ptdata[16]; + int sz, i, j; + cnodeid_t c; + int found_mem = 0; + int found_cpu = 0; + + if (!cnode_possible(node)) + return -EINVAL; + + if (sn_hwperf_has_cpus(node)) { + if (near_cpu_node) + *near_cpu_node = node; + found_cpu++; + } + + if (sn_hwperf_has_mem(node)) { + if (near_mem_node) + *near_mem_node = node; + found_mem++; + } + + if (found_cpu && found_mem) + return 0; /* trivially successful */ + + /* find the argument node object */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op)) + continue; + if (node == sn_hwperf_obj_to_cnode(op)) { + nodeobj = op; + break; + } + } + if (!nodeobj) { + e = -ENOENT; + goto err; + } + + /* get it's interconnect topology */ + sz = op->ports * sizeof(struct sn_hwperf_port_info); + if (sz > sizeof(ptdata)) + BUG(); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, nodeobj->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + + /* find nearest node with cpus and nearest memory */ + for (router=NULL, j=0; j < op->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id); + if (dest && SN_HWPERF_IS_ROUTER(dest)) + router = dest; + if (!dest || SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + } + + if (router && (!found_cpu || !found_mem)) { + /* search for a node connected to the same router */ + sz = router->ports * sizeof(struct sn_hwperf_port_info); + if (sz > sizeof(ptdata)) + BUG(); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, router->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + for (j=0; j < router->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, + ptdata[j].conn_id); + if (!dest || dest->id == node || + SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || + SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) { + /* resort to _any_ node with CPUs and memory */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (SN_HWPERF_FOREIGN(op) || + SN_HWPERF_IS_IONODE(op) || + !SN_HWPERF_IS_NODE(op)) { + continue; + } + c = sn_hwperf_obj_to_cnode(op); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) break; - *p2 = '\0'; - seq_printf(s, "pcibus %d %s-%s\n", - *ordinal, obj->location, p1); - (*ordinal)++; - p1 = p2 + 1; } } - free_page((unsigned long)pg); + + if (!found_cpu || !found_mem) + e = -ENODATA; + +err: + return e; } + static int sn_topology_show(struct seq_file *s, void *d) { int sz; @@ -215,7 +391,6 @@ static int sn_topology_show(struct seq_file *s, void *d) struct sn_hwperf_object_info *p; struct sn_hwperf_object_info *obj = d; /* this object */ struct sn_hwperf_object_info *objs = s->private; /* all objects */ - int rack, bay, slot, slab; u8 shubtype; u8 system_size; u8 sharing_size; @@ -225,7 +400,6 @@ static int sn_topology_show(struct seq_file *s, void *d) u8 region_size; u16 nasid_mask; int nasid_msb; - int pci_bus_ordinal = 0; if (obj == objs) { seq_printf(s, "# sn_topology version 2\n"); @@ -249,10 +423,12 @@ static int sn_topology_show(struct seq_file *s, void *d) "coherency_domain %d, " "region_size %d\n", - partid, system_utsname.nodename, + partid, utsname()->nodename, shubtype ? "shub2" : "shub1", (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, system_size, sharing_size, coher, region_size); + + print_pci_topology(s); } if (SN_HWPERF_FOREIGN(obj)) { @@ -269,48 +445,52 @@ static int sn_topology_show(struct seq_file *s, void *d) seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location, obj->sn_hwp_this_part ? "local" : "shared", obj->name); - if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))) seq_putc(s, '\n'); else { + cnodeid_t near_mem = -1; + cnodeid_t near_cpu = -1; + seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal)); - for (i=0; i < numionodes; i++) { - seq_printf(s, i ? ":%d" : ", dist %d", - node_distance(ordinal, i)); + + if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt, + ordinal, &near_mem, &near_cpu) == 0) { + seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d", + near_mem, near_cpu); + } + + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_online_node(i) { + seq_printf(s, i ? ":%d" : ", dist %d", + node_distance(ordinal, i)); + } } + seq_putc(s, '\n'); /* * CPUs on this node, if any */ - cpumask = node_to_cpumask(ordinal); - for_each_online_cpu(i) { - if (cpu_isset(i, cpumask)) { - slice = 'a' + cpuid_to_slice(i); - c = cpu_data(i); - seq_printf(s, "cpu %d %s%c local" - " freq %luMHz, arch ia64", - i, obj->location, slice, - c->proc_freq / 1000000); - for_each_online_cpu(j) { - seq_printf(s, j ? ":%d" : ", dist %d", - node_distance( - cpuid_to_cnodeid(i), - cpuid_to_cnodeid(j))); + if (!SN_HWPERF_IS_IONODE(obj)) { + cpumask = node_to_cpumask(ordinal); + for_each_online_cpu(i) { + if (cpu_isset(i, cpumask)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( + cpu_to_node(i), + cpu_to_node(j))); + } + seq_putc(s, '\n'); } - seq_putc(s, '\n'); } } - - /* - * PCI busses attached to this node, if any - */ - if (sn_hwperf_location_to_bpos(obj->location, - &rack, &bay, &slot, &slab)) { - /* export pci bus info */ - print_pci_topology(s, obj, &pci_bus_ordinal, - rack, bay, slot, slab); - - } } if (obj->ports) { @@ -318,7 +498,7 @@ static int sn_topology_show(struct seq_file *s, void *d) * numalink ports */ sz = obj->ports * sizeof(struct sn_hwperf_port_info); - if ((ptdata = vmalloc(sz)) == NULL) + if ((ptdata = kmalloc(sz, GFP_KERNEL)) == NULL) return -ENOMEM; e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_PORTS, obj->id, sz, @@ -348,7 +528,7 @@ static int sn_topology_show(struct seq_file *s, void *d) if (obj->sn_hwp_this_part && p->sn_hwp_this_part) /* both ends local to this partition */ seq_puts(s, " local"); - else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part) + else if (SN_HWPERF_FOREIGN(p)) /* both ends of the link in foreign partiton */ seq_puts(s, " foreign"); else @@ -366,7 +546,7 @@ static int sn_topology_show(struct seq_file *s, void *d) (SN_HWPERF_IS_NL3ROUTER(obj) || SN_HWPERF_IS_NL3ROUTER(p)) ? "LLP3" : "LLP4"); } - vfree(ptdata); + kfree(ptdata); } return 0; @@ -434,7 +614,7 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK; if (cpu != SN_HWPERF_ARG_ANY_CPU) { - if (cpu >= num_online_cpus() || !cpu_online(cpu)) { + if (cpu >= NR_CPUS || !cpu_online(cpu)) { r = -EINVAL; goto out; } @@ -572,11 +752,14 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { memset(p, 0, a.sz); for (i = 0; i < nobj; i++) { + int cpuobj_index = 0; + if (!SN_HWPERF_IS_NODE(objs + i)) + continue; node = sn_hwperf_obj_to_cnode(objs + i); for_each_online_cpu(j) { if (node != cpu_to_node(j)) continue; - cpuobj = (struct sn_hwperf_object_info *) p + j; + cpuobj = (struct sn_hwperf_object_info *) p + cpuobj_index++; slice = 'a' + cpuid_to_slice(j); cdata = cpu_data(j); cpuobj->id = j; @@ -598,7 +781,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) case SN_HWPERF_GET_NODE_NASID: if (a.sz != sizeof(u64) || - (node = a.arg) < 0 || node >= numionodes) { + (node = a.arg) < 0 || !cnode_possible(node)) { r = -EINVAL; goto error; } @@ -627,6 +810,14 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) vfree(objs); goto error; } + + if (!SN_HWPERF_IS_NODE(objs + i) && + !SN_HWPERF_IS_IONODE(objs + i)) { + r = -ENOENT; + vfree(objs); + goto error; + } + *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i); vfree(objs); } @@ -692,6 +883,7 @@ static int sn_hwperf_init(void) /* single threaded, once-only initialization */ down(&sn_hwperf_init_mutex); + if (sn_hwperf_salheap) { up(&sn_hwperf_init_mutex); return e; @@ -742,19 +934,6 @@ out: sn_hwperf_salheap = NULL; sn_hwperf_obj_cnt = 0; } - - if (!e) { - /* - * Register a dynamic misc device for ioctl. Platforms - * supporting hotplug will create /dev/sn_hwperf, else - * user can to look up the minor number in /proc/misc. - */ - if ((e = misc_register(&sn_hwperf_dev)) != 0) { - printk(KERN_ERR "sn_hwperf_init: misc register " - "for \"sn_hwperf\" failed, err %d\n", e); - } - } - up(&sn_hwperf_init_mutex); return e; } @@ -782,3 +961,44 @@ int sn_topology_release(struct inode *inode, struct file *file) vfree(seq->private); return seq_release(inode, file); } + +int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + int nobj; + struct sn_hwperf_object_info *objbuf; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj, + node, near_mem_node, near_cpu_node); + vfree(objbuf); + } + + return e; +} + +static int __devinit sn_hwperf_misc_register_init(void) +{ + int e; + + if (!ia64_platform_is("sn2")) + return 0; + + sn_hwperf_init(); + + /* + * Register a dynamic misc device for hwperf ioctls. Platforms + * supporting hotplug will create /dev/sn_hwperf, else user + * can to look up the minor number in /proc/misc. + */ + if ((e = misc_register(&sn_hwperf_dev)) != 0) { + printk(KERN_ERR "sn_hwperf_misc_register_init: failed to " + "register misc device for \"%s\"\n", sn_hwperf_dev.name); + } + + return e; +} + +device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */ +EXPORT_SYMBOL(sn_hwperf_get_nearest_node);