X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fperfmon.c;h=4f1543cdafec51d8f9651df34a4476d80e86224e;hb=8e8ece46a861c84343256819eaec77e608ff9217;hp=ae2eb13f94255e4ec88e98e96730e46e301af263;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index ae2eb13f9..4f1543cda 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -5,13 +5,13 @@ * The initial version of perfmon.c was written by * Ganesh Venkitachalam, IBM Corp. * - * Then it was modified for perfmon-1.x by Stephane Eranian and + * Then it was modified for perfmon-1.x by Stephane Eranian and * David Mosberger, Hewlett Packard Co. - * + * * Version Perfmon-2.x is a rewrite of perfmon-1.x - * by Stephane Eranian, Hewlett Packard Co. + * by Stephane Eranian, Hewlett Packard Co. * - * Copyright (C) 1999-2003 Hewlett Packard Co + * Copyright (C) 1999-2003, 2005 Hewlett Packard Co * Stephane Eranian * David Mosberger-Tang * @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -37,8 +38,10 @@ #include #include #include +#include +#include +#include -#include #include #include #include @@ -86,27 +89,25 @@ #define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ #define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ -#define PMC_IS_LAST(i) (pmu_conf.pmc_desc[i].type & PFM_REG_END) -#define PMD_IS_LAST(i) (pmu_conf.pmd_desc[i].type & PFM_REG_END) - -#define PFM_IS_DISABLED() (pmu_conf.enabled == 0) +#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) +#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) #define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) /* i assumed unsigned */ -#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf.pmc_desc[i].type & PFM_REG_IMPL)) -#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf.pmd_desc[i].type & PFM_REG_IMPL)) +#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) +#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) /* XXX: these assume that register i is implemented */ -#define PMD_IS_COUNTING(i) ((pmu_conf.pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) -#define PMC_IS_COUNTING(i) ((pmu_conf.pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) -#define PMC_IS_MONITOR(i) ((pmu_conf.pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) -#define PMC_IS_CONTROL(i) ((pmu_conf.pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) +#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) +#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) -#define PMC_DFL_VAL(i) pmu_conf.pmc_desc[i].default_value -#define PMC_RSVD_MASK(i) pmu_conf.pmc_desc[i].reserved_mask -#define PMD_PMD_DEP(i) pmu_conf.pmd_desc[i].dep_pmd[0] -#define PMC_PMD_DEP(i) pmu_conf.pmc_desc[i].dep_pmd[0] +#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value +#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask +#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] +#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] #define PFM_NUM_IBRS IA64_NUM_DBG_REGS #define PFM_NUM_DBRS IA64_NUM_DBG_REGS @@ -133,6 +134,8 @@ #define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) #define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) +#define RDEP(x) (1UL<<(x)) + /* * context protection macros * in SMP: @@ -310,6 +313,7 @@ typedef struct pfm_context { unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ int ctx_fd; /* file descriptor used my this context */ + pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ @@ -374,26 +378,32 @@ typedef struct { * dep_pmd[]: a bitmask of dependent PMD registers * dep_pmc[]: a bitmask of dependent PMC registers */ +typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); typedef struct { unsigned int type; int pm_pos; unsigned long default_value; /* power-on default value */ unsigned long reserved_mask; /* bitmask of reserved bits */ - int (*read_check)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); - int (*write_check)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + pfm_reg_check_t read_check; + pfm_reg_check_t write_check; unsigned long dep_pmd[4]; unsigned long dep_pmc[4]; } pfm_reg_desc_t; /* assume cnum is a valid monitor */ -#define PMC_PM(cnum, val) (((val) >> (pmu_conf.pmc_desc[cnum].pm_pos)) & 0x1) -#define PMC_WR_FUNC(cnum) (pmu_conf.pmc_desc[cnum].write_check) -#define PMD_WR_FUNC(cnum) (pmu_conf.pmd_desc[cnum].write_check) -#define PMD_RD_FUNC(cnum) (pmu_conf.pmd_desc[cnum].read_check) +#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) /* * This structure is initialized at boot time and contains * a description of the PMU main characteristics. + * + * If the probe function is defined, detection is based + * on its return value: + * - 0 means recognized PMU + * - anything else means not supported + * When the probe function is not defined, then the pmu_family field + * is used and it must match the host CPU family such that: + * - cpu->family & config->pmu_family != 0 */ typedef struct { unsigned long ovfl_val; /* overflow value for counters */ @@ -407,15 +417,18 @@ typedef struct { unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ char *pmu_name; /* PMU family name */ - unsigned int enabled; /* indicates if perfmon initialized properly */ unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ - + unsigned int flags; /* pmu specific flags */ unsigned int num_ibrs; /* number of IBRS: computed at init time */ unsigned int num_dbrs; /* number of DBRS: computed at init time */ unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ - + int (*probe)(void); /* customized probe routine */ unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ } pmu_config_t; +/* + * PMU specific flags + */ +#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ /* * debug register related type definitions @@ -500,6 +513,8 @@ static pfm_uuid_t pfm_null_uuid = {0,}; static spinlock_t pfm_buffer_fmt_lock; static LIST_HEAD(pfm_buffer_fmt_list); +static pmu_config_t *pmu_conf; + /* sysctl() controls */ static pfm_sysctl_t pfm_sysctl; int pfm_debug_var; @@ -559,12 +574,6 @@ pfm_unreserve_page(unsigned long a) ClearPageReserved(vmalloc_to_page((void*)a)); } -static inline int -pfm_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot) -{ - return remap_page_range(vma, from, phys_addr, size, prot); -} - static inline unsigned long pfm_protect_ctx_ctxsw(pfm_context_t *x) { @@ -620,20 +629,19 @@ static void pfm_lazy_save_regs (struct task_struct *ta); #endif void dump_pmu_state(const char *); +static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); -/* - * the HP simulator must be first because - * CONFIG_IA64_HP_SIM is independent of CONFIG_MCKINLEY or CONFIG_ITANIUM - */ -#if defined(CONFIG_IA64_HP_SIM) -#include "perfmon_hpsim.h" -#elif defined(CONFIG_ITANIUM) #include "perfmon_itanium.h" -#elif defined(CONFIG_MCKINLEY) #include "perfmon_mckinley.h" -#else #include "perfmon_generic.h" -#endif + +static pmu_config_t *pmu_confs[]={ + &pmu_conf_mck, + &pmu_conf_ita, + &pmu_conf_gen, /* must be last */ + NULL +}; + static int pfm_end_notify_user(pfm_context_t *ctx); @@ -702,6 +710,7 @@ pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) for (i=0; i < nibrs; i++) { ia64_set_ibr(i, ibrs[i]); + ia64_dv_serialize_instruction(); } ia64_srlz_i(); } @@ -713,6 +722,7 @@ pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) for (i=0; i < ndbrs; i++) { ia64_set_dbr(i, dbrs[i]); + ia64_dv_serialize_data(); } ia64_srlz_d(); } @@ -723,7 +733,7 @@ pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) static inline unsigned long pfm_read_soft_counter(pfm_context_t *ctx, int i) { - return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.ovfl_val); + return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); } /* @@ -732,7 +742,7 @@ pfm_read_soft_counter(pfm_context_t *ctx, int i) static inline void pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) { - unsigned long ovfl_val = pmu_conf.ovfl_val; + unsigned long ovfl_val = pmu_conf->ovfl_val; ctx->ctx_pmds[i].val = val & ~ovfl_val; /* @@ -791,18 +801,6 @@ pfm_reset_msgq(pfm_context_t *ctx) DPRINT(("ctx=%p msgq reset\n", ctx)); } - -/* Here we want the physical address of the memory. - * This is used when initializing the contents of the - * area and marking the pages as reserved. - */ -static inline unsigned long -pfm_kvirt_to_pa(unsigned long adr) -{ - __u64 pa = ia64_tpa(adr); - return pa; -} - static void * pfm_rvmalloc(unsigned long size) { @@ -878,7 +876,7 @@ pfm_mask_monitoring(struct task_struct *task) DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid)); - ovfl_mask = pmu_conf.ovfl_val; + ovfl_mask = pmu_conf->ovfl_val; /* * monitoring can only be masked as a result of a valid * counter overflow. In UP, it means that the PMU still @@ -953,7 +951,7 @@ pfm_restore_monitoring(struct task_struct *task) int i, is_system; is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf.ovfl_val; + ovfl_mask = pmu_conf->ovfl_val; if (task != current) { printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid); @@ -1024,8 +1022,8 @@ pfm_restore_monitoring(struct task_struct *task) * XXX: need to optimize */ if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf.num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf.num_dbrs); + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); } /* @@ -1058,7 +1056,7 @@ static inline void pfm_restore_pmds(unsigned long *pmds, unsigned long mask) { int i; - unsigned long val, ovfl_val = pmu_conf.ovfl_val; + unsigned long val, ovfl_val = pmu_conf->ovfl_val; for (i=0; mask; i++, mask>>=1) { if ((mask & 0x1) == 0) continue; @@ -1075,7 +1073,7 @@ static inline void pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) { struct thread_struct *thread = &task->thread; - unsigned long ovfl_val = pmu_conf.ovfl_val; + unsigned long ovfl_val = pmu_conf->ovfl_val; unsigned long mask = ctx->ctx_all_pmds[0]; unsigned long val; int i; @@ -1498,15 +1496,8 @@ exit_pfm_fs(void) mntput(pfmfs_mnt); } -static loff_t -pfm_lseek(struct file *file, loff_t offset, int whence) -{ - DPRINT(("pfm_lseek called\n")); - return -ESPIPE; -} - static ssize_t -pfm_read(struct file *filp, char *buf, size_t size, loff_t *ppos) +pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) { pfm_context_t *ctx; pfm_msg_t *msg; @@ -1531,10 +1522,6 @@ pfm_read(struct file *filp, char *buf, size_t size, loff_t *ppos) DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); return -EINVAL; } - /* - * seeks are not allowed on message queues - */ - if (ppos != &filp->f_pos) return -ESPIPE; PROTECT_CTX(ctx, flags); @@ -1603,7 +1590,7 @@ abort: } static ssize_t -pfm_write(struct file *file, const char *ubuf, +pfm_write(struct file *file, const char __user *ubuf, size_t size, loff_t *ppos) { DPRINT(("pfm_write called\n")); @@ -1653,7 +1640,7 @@ pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned lon } /* - * context is locked when coming here and interrupts are disabled + * interrupt cannot be masked when coming here */ static inline int pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) @@ -1675,7 +1662,6 @@ static int pfm_fasync(int fd, struct file *filp, int on) { pfm_context_t *ctx; - unsigned long flags; int ret; if (PFM_IS_FILE(filp) == 0) { @@ -1688,19 +1674,21 @@ pfm_fasync(int fd, struct file *filp, int on) printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid); return -EBADF; } - - - PROTECT_CTX(ctx, flags); - + /* + * we cannot mask interrupts during this call because this may + * may go to sleep if memory is not readily avalaible. + * + * We are protected from the conetxt disappearing by the get_fd()/put_fd() + * done in caller. Serialization of this function is ensured by caller. + */ ret = pfm_do_fasync(fd, filp, ctx, on); + DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", fd, on, ctx->ctx_async_queue, ret)); - UNPROTECT_CTX(ctx, flags); - return ret; } @@ -2012,7 +2000,7 @@ pfm_close(struct inode *inode, struct file *filp) /* * XXX: check for signals : - * - ok of explicit close + * - ok for explicit close * - not ok when coming from exit_files() */ schedule(); @@ -2127,7 +2115,7 @@ pfm_no_open(struct inode *irrelevant, struct file *dontcare) static struct file_operations pfm_file_ops = { - .llseek = pfm_lseek, + .llseek = no_llseek, .read = pfm_read, .write = pfm_write, .poll = pfm_poll, @@ -2174,9 +2162,7 @@ pfm_alloc_fd(struct file **cfile) DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); - inode->i_sb = pfmfs_mnt->mnt_sb; inode->i_mode = S_IFCHR|S_IRUGO; - inode->i_sock = 0; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -2224,6 +2210,15 @@ out: static void pfm_free_fd(int fd, struct file *file) { + struct files_struct *files = current->files; + + /* + * there ie no fd_uninstall(), so we do it here + */ + spin_lock(&files->file_lock); + files->fd[fd] = NULL; + spin_unlock(&files->file_lock); + if (file) put_filp(file); put_unused_fd(fd); } @@ -2231,14 +2226,14 @@ pfm_free_fd(int fd, struct file *file) static int pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) { - unsigned long page; - DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); while (size > 0) { - page = pfm_kvirt_to_pa(buf); + unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; + - if (pfm_remap_page_range(vma, addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM; + if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) + return -ENOMEM; addr += PAGE_SIZE; buf += PAGE_SIZE; @@ -2274,7 +2269,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) * return -ENOMEM; */ - if (size > task->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; + if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) + return -ENOMEM; /* * We do the easy to undo allocations first. @@ -2295,20 +2291,14 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon DPRINT(("Cannot allocate vma\n")); goto error_kmem; } + memset(vma, 0, sizeof(*vma)); + /* * partially initialize the vma for the sampling buffer - * - * The VM_DONTCOPY flag is very important as it ensures that the mapping - * will never be inherited for any child process (via fork()) which is always - * what we want. */ vma->vm_mm = mm; vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ - vma->vm_ops = NULL; - vma->vm_pgoff = 0; - vma->vm_file = NULL; - vma->vm_private_data = NULL; /* * Now we have everything we need and we can initialize @@ -2334,6 +2324,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon goto error; } vma->vm_end = vma->vm_start + size; + vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); @@ -2350,8 +2341,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; - + vx_vmpages_add(mm, size >> PAGE_SHIFT); + vm_stat_account(vma); up_write(&task->mm->mmap_sem); /* @@ -2513,12 +2504,12 @@ pfm_reset_pmu_state(pfm_context_t *ctx) * * PMC0 is treated differently. */ - ctx->ctx_all_pmcs[0] = pmu_conf.impl_pmcs[0] & ~0x1; + ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; /* * bitmask of all PMDs that are accesible to this context */ - ctx->ctx_all_pmds[0] = pmu_conf.impl_pmds[0]; + ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); @@ -2581,7 +2572,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) return -EINVAL; } - if (task->state == TASK_ZOMBIE) { + if (task->exit_state == EXIT_ZOMBIE) { DPRINT(("cannot attach to zombie task [%d]\n", task->pid)); return -EBUSY; } @@ -2591,7 +2582,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) */ if (task == current) return 0; - if (task->state != TASK_STOPPED) { + if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state)); return -EBUSY; } @@ -2658,8 +2649,10 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg ctx = pfm_context_alloc(); if (!ctx) goto error; - req->ctx_fd = ctx->ctx_fd = pfm_alloc_fd(&filp); - if (req->ctx_fd < 0) goto error_file; + ret = pfm_alloc_fd(&filp); + if (ret < 0) goto error_file; + + req->ctx_fd = ctx->ctx_fd = ret; /* * attach context to file @@ -2858,16 +2851,17 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) unsigned long value, pmc_pm; unsigned long smpl_pmds, reset_pmds, impl_pmds; unsigned int cnum, reg_flags, flags, pmc_type; - int i, can_access_pmu = 0, is_loaded, is_system; + int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; int is_monitor, is_counting, state; int ret = -EINVAL; + pfm_reg_check_t wr_func; #define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) state = ctx->ctx_state; is_loaded = state == PFM_CTX_LOADED ? 1 : 0; is_system = ctx->ctx_fl_system; task = ctx->ctx_task; - impl_pmds = pmu_conf.impl_pmds[0]; + impl_pmds = pmu_conf->impl_pmds[0]; if (state == PFM_CTX_ZOMBIE) return -EINVAL; @@ -2884,6 +2878,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) } can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; } + expert_mode = pfm_sysctl.expert_mode; for (i = 0; i < count; i++, req++) { @@ -2900,8 +2895,8 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) goto error; } - pmc_type = pmu_conf.pmc_desc[cnum].type; - pmc_pm = (value >> pmu_conf.pmc_desc[cnum].pm_pos) & 0x1; + pmc_type = pmu_conf->pmc_desc[cnum].type; + pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; @@ -2914,6 +2909,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); goto error; } + wr_func = pmu_conf->pmc_desc[cnum].write_check; /* * If the PMC is a monitor, then if the value is not the default: * - system-wide session: PMCx.pm=1 (privileged monitor) @@ -2962,8 +2958,8 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * execute write checker, if any */ - if (pfm_sysctl.expert_mode == 0 && PMC_WR_FUNC(cnum)) { - ret = PMC_WR_FUNC(cnum)(task, ctx, cnum, &value, regs); + if (likely(expert_mode == 0 && wr_func)) { + ret = (*wr_func)(task, ctx, cnum, &value, regs); if (ret) goto error; ret = -EINVAL; } @@ -3014,7 +3010,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no * possible leak here. */ - CTX_USED_PMD(ctx, pmu_conf.pmc_desc[cnum].dep_pmd[0]); + CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); /* * keep track of the monitor PMC that we are using. @@ -3061,11 +3057,12 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) #endif } - DPRINT(("pmc[%u]=0x%lx loaded=%d access_pmu=%d all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", + DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", cnum, value, is_loaded, can_access_pmu, + flags, ctx->ctx_all_pmcs[0], ctx->ctx_used_pmds[0], ctx->ctx_pmds[cnum].eventid, @@ -3096,14 +3093,15 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) unsigned long value, hw_value, ovfl_mask; unsigned int cnum; int i, can_access_pmu = 0, state; - int is_counting, is_loaded, is_system; + int is_counting, is_loaded, is_system, expert_mode; int ret = -EINVAL; + pfm_reg_check_t wr_func; state = ctx->ctx_state; is_loaded = state == PFM_CTX_LOADED ? 1 : 0; is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf.ovfl_val; + ovfl_mask = pmu_conf->ovfl_val; task = ctx->ctx_task; if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; @@ -3125,6 +3123,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) } can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; } + expert_mode = pfm_sysctl.expert_mode; for (i = 0; i < count; i++, req++) { @@ -3136,14 +3135,15 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) goto abort_mission; } is_counting = PMD_IS_COUNTING(cnum); + wr_func = pmu_conf->pmd_desc[cnum].write_check; /* * execute write checker, if any */ - if (pfm_sysctl.expert_mode == 0 && PMD_WR_FUNC(cnum)) { + if (unlikely(expert_mode == 0 && wr_func)) { unsigned long v = value; - ret = PMD_WR_FUNC(cnum)(task, ctx, cnum, &v, regs); + ret = (*wr_func)(task, ctx, cnum, &v, regs); if (ret) goto abort_mission; value = v; @@ -3238,8 +3238,8 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) } } - DPRINT(("pmd[%u]=0x%lx loaded=%d access_pmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " - "long_reset=0x%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", + DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " + "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", cnum, value, is_loaded, @@ -3249,6 +3249,8 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ctx->ctx_pmds[cnum].short_reset, ctx->ctx_pmds[cnum].long_reset, PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', + ctx->ctx_pmds[cnum].seed, + ctx->ctx_pmds[cnum].mask, ctx->ctx_used_pmds[0], ctx->ctx_pmds[cnum].reset_pmds[0], ctx->ctx_reload_pmds[0], @@ -3289,8 +3291,9 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned int cnum, reg_flags = 0; int i, can_access_pmu = 0, state; - int is_loaded, is_system, is_counting; + int is_loaded, is_system, is_counting, expert_mode; int ret = -EINVAL; + pfm_reg_check_t rd_func; /* * access is possible when loaded only for @@ -3300,7 +3303,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) state = ctx->ctx_state; is_loaded = state == PFM_CTX_LOADED ? 1 : 0; is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf.ovfl_val; + ovfl_mask = pmu_conf->ovfl_val; task = ctx->ctx_task; if (state == PFM_CTX_ZOMBIE) return -EINVAL; @@ -3323,8 +3326,9 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (can_access_pmu) ia64_srlz_d(); } + expert_mode = pfm_sysctl.expert_mode; - DPRINT(("loaded=%d access_pmu=%d ctx_state=%d\n", + DPRINT(("ld=%d apmu=%d ctx_state=%d\n", is_loaded, can_access_pmu, state)); @@ -3369,6 +3373,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) */ val = is_loaded ? thread->pmds[cnum] : 0UL; } + rd_func = pmu_conf->pmd_desc[cnum].read_check; if (is_counting) { /* @@ -3381,9 +3386,9 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * execute read checker, if any */ - if (unlikely(pfm_sysctl.expert_mode == 0 && PMD_RD_FUNC(cnum))) { + if (unlikely(expert_mode == 0 && rd_func)) { unsigned long v = val; - ret = PMD_RD_FUNC(cnum)(ctx->ctx_task, ctx, cnum, &v, regs); + ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); if (ret) goto error; val = v; ret = -EINVAL; @@ -3463,7 +3468,7 @@ pfm_use_debug_registers(struct task_struct *task) unsigned long flags; int ret = 0; - if (pmu_conf.use_rr_dbregs == 0) return 0; + if (pmu_conf->use_rr_dbregs == 0) return 0; DPRINT(("called for [%d]\n", task->pid)); @@ -3517,7 +3522,7 @@ pfm_release_debug_registers(struct task_struct *task) unsigned long flags; int ret; - if (pmu_conf.use_rr_dbregs == 0) return 0; + if (pmu_conf->use_rr_dbregs == 0) return 0; LOCK_PFS(flags); if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { @@ -3720,7 +3725,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_ int i, can_access_pmu = 0; int is_system, is_loaded; - if (pmu_conf.use_rr_dbregs == 0) return -EINVAL; + if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; state = ctx->ctx_state; is_loaded = state == PFM_CTX_LOADED ? 1 : 0; @@ -3802,14 +3807,14 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_ */ if (first_time && can_access_pmu) { DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid)); - for (i=0; i < pmu_conf.num_ibrs; i++) { + for (i=0; i < pmu_conf->num_ibrs; i++) { ia64_set_ibr(i, 0UL); - ia64_srlz_i(); + ia64_dv_serialize_instruction(); } ia64_srlz_i(); - for (i=0; i < pmu_conf.num_dbrs; i++) { + for (i=0; i < pmu_conf->num_dbrs; i++) { ia64_set_dbr(i, 0UL); - ia64_srlz_d(); + ia64_dv_serialize_data(); } ia64_srlz_d(); } @@ -3856,20 +3861,25 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_ if (mode == PFM_CODE_RR) { CTX_USED_IBR(ctx, rnum); - if (can_access_pmu) ia64_set_ibr(rnum, dbreg.val); + if (can_access_pmu) { + ia64_set_ibr(rnum, dbreg.val); + ia64_dv_serialize_instruction(); + } ctx->ctx_ibrs[rnum] = dbreg.val; - DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x is_loaded=%d access_pmu=%d\n", + DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); } else { CTX_USED_DBR(ctx, rnum); - if (can_access_pmu) ia64_set_dbr(rnum, dbreg.val); - + if (can_access_pmu) { + ia64_set_dbr(rnum, dbreg.val); + ia64_dv_serialize_data(); + } ctx->ctx_dbrs[rnum] = dbreg.val; - DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x is_loaded=%d access_pmu=%d\n", + DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); } } @@ -3970,7 +3980,10 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) state = ctx->ctx_state; is_system = ctx->ctx_fl_system; - if (state != PFM_CTX_LOADED && state != PFM_CTX_MASKED) return -EINVAL; + /* + * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) + */ + if (state == PFM_CTX_UNLOADED) return -EINVAL; /* * In system wide and when the context is loaded, access can only happen @@ -4367,8 +4380,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * guaranteed safe by earlier check against DBG_VALID */ if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf.num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf.num_dbrs); + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); } /* * set new ownership @@ -4559,31 +4572,6 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg return 0; } -static void -pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) -{ - struct task_struct *task = ctx->ctx_task; - - ia64_psr(regs)->up = 0; - ia64_psr(regs)->sp = 1; - - if (GET_PMU_OWNER() == task) { - DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid)); - SET_PMU_OWNER(NULL, NULL); - } - - /* - * disconnect the task from the context and vice-versa - */ - PFM_SET_WORK_PENDING(task, 0); - - task->thread.pfm_context = NULL; - task->thread.flags &= ~IA64_THREAD_PM_VALID; - - DPRINT(("force cleanupf for [%d]\n", task->pid)); -} - - /* * called only from exit_thread(): task == current @@ -4701,21 +4689,22 @@ static int pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) { struct task_struct *task; - int state; + int state, old_state; +recheck: state = ctx->ctx_state; + task = ctx->ctx_task; - task = PFM_CTX_TASK(ctx); if (task == NULL) { DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); return 0; } DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", - ctx->ctx_fd, - state, - task->pid, - task->state, PFM_CMD_STOPPED(cmd))); + ctx->ctx_fd, + state, + task->pid, + task->state, PFM_CMD_STOPPED(cmd))); /* * self-monitoring always ok. @@ -4727,31 +4716,63 @@ pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) if (task == current || ctx->ctx_fl_system) return 0; /* - * context is UNLOADED, MASKED we are safe to go + * if context is UNLOADED we are safe to go */ - if (state != PFM_CTX_LOADED) return 0; + if (state == PFM_CTX_UNLOADED) return 0; - if (state == PFM_CTX_ZOMBIE) return -EINVAL; + /* + * no command can operate on a zombie context + */ + if (state == PFM_CTX_ZOMBIE) { + DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); + return -EINVAL; + } /* - * context is loaded, we must make sure the task is stopped + * context is LOADED or MASKED. Some commands may need to have + * the task stopped. + * * We could lift this restriction for UP but it would mean that * the user has no guarantee the task would not run between * two successive calls to perfmonctl(). That's probably OK. * If this user wants to ensure the task does not run, then * the task must be stopped. */ - if (PFM_CMD_STOPPED(cmd) && task->state != TASK_STOPPED) { - DPRINT(("[%d] task not in stopped state\n", task->pid)); - return -EBUSY; - } + if (PFM_CMD_STOPPED(cmd)) { + if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { + DPRINT(("[%d] task not in stopped state\n", task->pid)); + return -EBUSY; + } + /* + * task is now stopped, wait for ctxsw out + * + * This is an interesting point in the code. + * We need to unprotect the context because + * the pfm_save_regs() routines needs to grab + * the same lock. There are danger in doing + * this because it leaves a window open for + * another task to get access to the context + * and possibly change its state. The one thing + * that is not possible is for the context to disappear + * because we are protected by the VFS layer, i.e., + * get_fd()/put_fd(). + */ + old_state = state; - UNPROTECT_CTX(ctx, flags); + UNPROTECT_CTX(ctx, flags); - wait_task_inactive(task); + wait_task_inactive(task); - PROTECT_CTX(ctx, flags); + PROTECT_CTX(ctx, flags); + /* + * we must recheck to verify if state has changed + */ + if (ctx->ctx_state != old_state) { + DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); + goto recheck; + } + } return 0; } @@ -4759,10 +4780,8 @@ pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) * system-call entry point (must return long) */ asmlinkage long -sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, long arg7, - long arg8, long stack) +sys_perfmonctl (int fd, int cmd, void __user *arg, int count) { - struct pt_regs *regs = (struct pt_regs *)&stack; struct file *file = NULL; pfm_context_t *ctx = NULL; unsigned long flags = 0UL; @@ -4777,7 +4796,7 @@ sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, lon /* * reject any call if perfmon was disabled at initialization */ - if (unlikely(PFM_IS_DISABLED())) return -ENOSYS; + if (unlikely(pmu_conf == NULL)) return -ENOSYS; if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { DPRINT(("invalid cmd=%d\n", cmd)); @@ -4886,7 +4905,7 @@ restart_args: if (unlikely(ret)) goto abort_locked; skip_fd: - ret = (*func)(ctx, args_k, count, regs); + ret = (*func)(ctx, args_k, count, ia64_task_regs(current)); call_made = 1; @@ -4957,26 +4976,14 @@ pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_reg static void pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) { - if (ctx->ctx_fl_system) { - printk(KERN_ERR "perfmon: pfm_context_force_terminate [%d] is system-wide\n", current->pid); - return; - } - /* - * we stop the whole thing, we do no need to flush - * we know we WERE masked - */ - pfm_clear_psr_up(); - ia64_psr(regs)->up = 0; - ia64_psr(regs)->sp = 1; + int ret; - /* - * disconnect the task from the context and vice-versa - */ - current->thread.pfm_context = NULL; - current->thread.flags &= ~IA64_THREAD_PM_VALID; - ctx->ctx_task = NULL; + DPRINT(("entering for [%d]\n", current->pid)); - DPRINT(("context terminated\n")); + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret); + } /* * and wakeup controlling task, indicating we are now disconnected @@ -5036,6 +5043,18 @@ pfm_handle_work(void) UNPROTECT_CTX(ctx, flags); + /* + * pfm_handle_work() is currently called with interrupts disabled. + * The down_interruptible call may sleep, therefore we + * must re-enable interrupts to avoid deadlocks. It is + * safe to do so because this function is called ONLY + * when returning to user level (PUStk=1), in which case + * there is no risk of kernel stack overflow due to deep + * interrupt nesting. + */ + BUG_ON(flags & IA64_PSR_I); + local_irq_enable(); + DPRINT(("before block sleeping\n")); /* @@ -5046,6 +5065,12 @@ pfm_handle_work(void) DPRINT(("after block sleeping ret=%d\n", ret)); + /* + * disable interrupts to restore state we had upon entering + * this function + */ + local_irq_disable(); + PROTECT_CTX(ctx, flags); /* @@ -5160,7 +5185,7 @@ pfm_end_notify_user(pfm_context_t *ctx) static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs) { - pfm_ovfl_arg_t ovfl_arg; + pfm_ovfl_arg_t *ovfl_arg; unsigned long mask; unsigned long old_val, ovfl_val, new_val; unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; @@ -5178,7 +5203,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str tstamp = ia64_get_itc(); mask = pmc0 >> PMU_FIRST_COUNTER; - ovfl_val = pmu_conf.ovfl_val; + ovfl_val = pmu_conf->ovfl_val; has_smpl = CTX_HAS_SMPL(ctx); DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " @@ -5247,7 +5272,8 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str int j, k, ret = 0; int this_cpu = smp_processor_id(); - pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; + pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; + ovfl_arg = &ctx->ctx_ovfl_arg; prefetch(ctx->ctx_smpl_hdr); @@ -5257,15 +5283,15 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str if ((pmd_mask & 0x1) == 0) continue; - ovfl_arg.ovfl_pmd = (unsigned char )i; - ovfl_arg.ovfl_notify = ovfl_notify & mask ? 1 : 0; - ovfl_arg.active_set = 0; - ovfl_arg.ovfl_ctrl.val = 0; /* module must fill in all fields */ - ovfl_arg.smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; + ovfl_arg->ovfl_pmd = (unsigned char )i; + ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; + ovfl_arg->active_set = 0; + ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ + ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; - ovfl_arg.pmd_value = ctx->ctx_pmds[i].val; - ovfl_arg.pmd_last_reset = ctx->ctx_pmds[i].lval; - ovfl_arg.pmd_eventid = ctx->ctx_pmds[i].eventid; + ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; + ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; + ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; /* * copy values of pmds of interest. Sampling format may copy them @@ -5274,8 +5300,8 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str if (smpl_pmds) { for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { if ((smpl_pmds & 0x1) == 0) continue; - ovfl_arg.smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); - DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg.smpl_pmds_values[k-1])); + ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); + DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); } } @@ -5286,7 +5312,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str /* * call custom buffer format record (handler) routine */ - ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, &ovfl_arg, regs, tstamp); + ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); end_cycles = ia64_get_itc(); @@ -5294,13 +5320,13 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str * For those controls, we take the union because they have * an all or nothing behavior. */ - ovfl_ctrl.bits.notify_user |= ovfl_arg.ovfl_ctrl.bits.notify_user; - ovfl_ctrl.bits.block_task |= ovfl_arg.ovfl_ctrl.bits.block_task; - ovfl_ctrl.bits.mask_monitoring |= ovfl_arg.ovfl_ctrl.bits.mask_monitoring; + ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; + ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; + ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; /* * build the bitmask of pmds to reset now */ - if (ovfl_arg.ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; + if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; } @@ -5330,9 +5356,8 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str if (ovfl_notify == 0) reset_pmds = ovfl_pmds; } - DPRINT(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", - ovfl_pmds, - reset_pmds)); + DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); + /* * reset the requested PMD registers using the short reset values */ @@ -5536,111 +5561,185 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) return IRQ_HANDLED; } +/* + * /proc/perfmon interface, for debug only + */ + +#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1) -/* for debug only */ -static int -pfm_proc_info(char *page) +static void * +pfm_proc_start(struct seq_file *m, loff_t *pos) { - char *p = page; - struct list_head * pos; - pfm_buffer_fmt_t * entry; - unsigned long psr, flags; - int online_cpus = 0; - int i; + if (*pos == 0) { + return PFM_PROC_SHOW_HEADER; + } - p += sprintf(p, "perfmon version : %u.%u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN); - p += sprintf(p, "model : %s\n", pmu_conf.pmu_name); - p += sprintf(p, "fastctxsw : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No"); - p += sprintf(p, "expert mode : %s\n", pfm_sysctl.expert_mode > 0 ? "Yes": "No"); - p += sprintf(p, "ovfl_mask : 0x%lx\n", pmu_conf.ovfl_val); - - for(i=0; i < NR_CPUS; i++) { - if (cpu_online(i) == 0) continue; - p += sprintf(p, "CPU%-2d overflow intrs : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_count); - p += sprintf(p, "CPU%-2d overflow cycles : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_cycles); - p += sprintf(p, "CPU%-2d overflow min : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_cycles_min); - p += sprintf(p, "CPU%-2d overflow max : %lu\n", i, pfm_stats[i].pfm_ovfl_intr_cycles_max); - p += sprintf(p, "CPU%-2d smpl handler calls : %lu\n", i, pfm_stats[i].pfm_smpl_handler_calls); - p += sprintf(p, "CPU%-2d smpl handler cycles : %lu\n", i, pfm_stats[i].pfm_smpl_handler_cycles); - p += sprintf(p, "CPU%-2d spurious intrs : %lu\n", i, pfm_stats[i].pfm_spurious_ovfl_intr_count); - p += sprintf(p, "CPU%-2d replay intrs : %lu\n", i, pfm_stats[i].pfm_replay_ovfl_intr_count); - p += sprintf(p, "CPU%-2d syst_wide : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_SYST_WIDE ? 1 : 0); - p += sprintf(p, "CPU%-2d dcr_pp : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_DCR_PP ? 1 : 0); - p += sprintf(p, "CPU%-2d exclude idle : %d\n" , i, pfm_get_cpu_data(pfm_syst_info, i) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0); - p += sprintf(p, "CPU%-2d owner : %d\n" , i, pfm_get_cpu_data(pmu_owner, i) ? pfm_get_cpu_data(pmu_owner, i)->pid: -1); - p += sprintf(p, "CPU%-2d context : %p\n" , i, pfm_get_cpu_data(pmu_ctx, i)); - p += sprintf(p, "CPU%-2d activations : %lu\n", i, pfm_get_cpu_data(pmu_activation_number,i)); - online_cpus++; - } - - if (online_cpus == 1) - { - psr = pfm_get_psr(); - ia64_srlz_d(); - p += sprintf(p, "CPU%-2d psr : 0x%lx\n", smp_processor_id(), psr); - p += sprintf(p, "CPU%-2d pmc0 : 0x%lx\n", smp_processor_id(), ia64_get_pmc(0)); - for(i=4; i < 8; i++) { - p += sprintf(p, "CPU%-2d pmc%u : 0x%lx\n", smp_processor_id(), i, ia64_get_pmc(i)); - p += sprintf(p, "CPU%-2d pmd%u : 0x%lx\n", smp_processor_id(), i, ia64_get_pmd(i)); - } + while (*pos <= NR_CPUS) { + if (cpu_online(*pos - 1)) { + return (void *)*pos; + } + ++*pos; } + return NULL; +} - LOCK_PFS(flags); - p += sprintf(p, "proc_sessions : %u\n" - "sys_sessions : %u\n" - "sys_use_dbregs : %u\n" - "ptrace_use_dbregs : %u\n", - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_sys_use_dbregs, - pfm_sessions.pfs_ptrace_use_dbregs); - UNLOCK_PFS(flags); +static void * +pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return pfm_proc_start(m, pos); +} + +static void +pfm_proc_stop(struct seq_file *m, void *v) +{ +} + +static void +pfm_proc_show_header(struct seq_file *m) +{ + struct list_head * pos; + pfm_buffer_fmt_t * entry; + unsigned long flags; + + seq_printf(m, + "perfmon version : %u.%u\n" + "model : %s\n" + "fastctxsw : %s\n" + "expert mode : %s\n" + "ovfl_mask : 0x%lx\n" + "PMU flags : 0x%x\n", + PFM_VERSION_MAJ, PFM_VERSION_MIN, + pmu_conf->pmu_name, + pfm_sysctl.fastctxsw > 0 ? "Yes": "No", + pfm_sysctl.expert_mode > 0 ? "Yes": "No", + pmu_conf->ovfl_val, + pmu_conf->flags); + + LOCK_PFS(flags); + + seq_printf(m, + "proc_sessions : %u\n" + "sys_sessions : %u\n" + "sys_use_dbregs : %u\n" + "ptrace_use_dbregs : %u\n", + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_sys_use_dbregs, + pfm_sessions.pfs_ptrace_use_dbregs); + + UNLOCK_PFS(flags); spin_lock(&pfm_buffer_fmt_lock); list_for_each(pos, &pfm_buffer_fmt_list) { entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); - p += sprintf(p, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", - entry->fmt_uuid[0], - entry->fmt_uuid[1], - entry->fmt_uuid[2], - entry->fmt_uuid[3], - entry->fmt_uuid[4], - entry->fmt_uuid[5], - entry->fmt_uuid[6], - entry->fmt_uuid[7], - entry->fmt_uuid[8], - entry->fmt_uuid[9], - entry->fmt_uuid[10], - entry->fmt_uuid[11], - entry->fmt_uuid[12], - entry->fmt_uuid[13], - entry->fmt_uuid[14], - entry->fmt_uuid[15], - entry->fmt_name); + seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", + entry->fmt_uuid[0], + entry->fmt_uuid[1], + entry->fmt_uuid[2], + entry->fmt_uuid[3], + entry->fmt_uuid[4], + entry->fmt_uuid[5], + entry->fmt_uuid[6], + entry->fmt_uuid[7], + entry->fmt_uuid[8], + entry->fmt_uuid[9], + entry->fmt_uuid[10], + entry->fmt_uuid[11], + entry->fmt_uuid[12], + entry->fmt_uuid[13], + entry->fmt_uuid[14], + entry->fmt_uuid[15], + entry->fmt_name); } spin_unlock(&pfm_buffer_fmt_lock); - return p - page; } -/* /proc interface, for debug only */ static int -perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data) +pfm_proc_show(struct seq_file *m, void *v) { - int len = pfm_proc_info(page); + unsigned long psr; + unsigned int i; + int cpu; + + if (v == PFM_PROC_SHOW_HEADER) { + pfm_proc_show_header(m); + return 0; + } - if (len <= off+count) *eof = 1; + /* show info for CPU (v - 1) */ + + cpu = (long)v - 1; + seq_printf(m, + "CPU%-2d overflow intrs : %lu\n" + "CPU%-2d overflow cycles : %lu\n" + "CPU%-2d overflow min : %lu\n" + "CPU%-2d overflow max : %lu\n" + "CPU%-2d smpl handler calls : %lu\n" + "CPU%-2d smpl handler cycles : %lu\n" + "CPU%-2d spurious intrs : %lu\n" + "CPU%-2d replay intrs : %lu\n" + "CPU%-2d syst_wide : %d\n" + "CPU%-2d dcr_pp : %d\n" + "CPU%-2d exclude idle : %d\n" + "CPU%-2d owner : %d\n" + "CPU%-2d context : %p\n" + "CPU%-2d activations : %lu\n", + cpu, pfm_stats[cpu].pfm_ovfl_intr_count, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, + cpu, pfm_stats[cpu].pfm_smpl_handler_calls, + cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, + cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, + cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, + cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, + cpu, pfm_get_cpu_data(pmu_ctx, cpu), + cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); + + if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { - *start = page + off; - len -= off; + psr = pfm_get_psr(); - if (len>count) len = count; - if (len<0) len = 0; + ia64_srlz_d(); - return len; + seq_printf(m, + "CPU%-2d psr : 0x%lx\n" + "CPU%-2d pmc0 : 0x%lx\n", + cpu, psr, + cpu, ia64_get_pmc(0)); + + for (i=0; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_COUNTING(i) == 0) continue; + seq_printf(m, + "CPU%-2d pmc%u : 0x%lx\n" + "CPU%-2d pmd%u : 0x%lx\n", + cpu, i, ia64_get_pmc(i), + cpu, i, ia64_get_pmd(i)); + } + } + return 0; } +struct seq_operations pfm_seq_ops = { + .start = pfm_proc_start, + .next = pfm_proc_next, + .stop = pfm_proc_stop, + .show = pfm_proc_show +}; + +static int +pfm_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &pfm_seq_ops); +} + + /* * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens * during pfm_enable() hence before pfm_start(). We cannot assume monitoring @@ -5694,6 +5793,32 @@ pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_c } #ifdef CONFIG_SMP + +static void +pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) +{ + struct task_struct *task = ctx->ctx_task; + + ia64_psr(regs)->up = 0; + ia64_psr(regs)->sp = 1; + + if (GET_PMU_OWNER() == task) { + DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid)); + SET_PMU_OWNER(NULL, NULL); + } + + /* + * disconnect the task from the context and vice-versa + */ + PFM_SET_WORK_PENDING(task, 0); + + task->thread.pfm_context = NULL; + task->thread.flags &= ~IA64_THREAD_PM_VALID; + + DPRINT(("force cleanup for [%d]\n", task->pid)); +} + + /* * in 2.6, interrupts are masked when we come here and the runqueue lock is held */ @@ -5732,14 +5857,6 @@ pfm_save_regs(struct task_struct *task) return; } - /* - * sanity check - */ - if (ctx->ctx_last_activation != GET_ACTIVATION()) { - pfm_unprotect_ctx_ctxsw(ctx, flags); - return; - } - /* * save current PSR: needed because we modify it */ @@ -5899,6 +6016,7 @@ pfm_load_regs (struct task_struct *task) unsigned long pmc_mask = 0UL, pmd_mask = 0UL; unsigned long flags; u64 psr, psr_up; + int need_irq_resend; ctx = PFM_GET_CTX(task); if (unlikely(ctx == NULL)) return; @@ -5919,6 +6037,8 @@ pfm_load_regs (struct task_struct *task) flags = pfm_protect_ctx_ctxsw(ctx); psr = pfm_get_psr(); + need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; + BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); BUG_ON(psr & IA64_PSR_I); @@ -5944,8 +6064,8 @@ pfm_load_regs (struct task_struct *task) * stale state. */ if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf.num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf.num_dbrs); + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); } /* * retrieve saved psr.up @@ -6004,12 +6124,12 @@ pfm_load_regs (struct task_struct *task) ia64_set_pmc(0, t->pmcs[0]); ia64_srlz_d(); t->pmcs[0] = 0UL; -#ifndef CONFIG_MCKINLEY + /* * will replay the PMU interrupt */ - hw_resend_irq(NULL, IA64_PERFMON_VECTOR); -#endif + if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); + pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; } @@ -6061,6 +6181,7 @@ pfm_load_regs (struct task_struct *task) struct task_struct *owner; unsigned long pmd_mask, pmc_mask; u64 psr, psr_up; + int need_irq_resend; owner = GET_PMU_OWNER(); ctx = PFM_GET_CTX(task); @@ -6079,14 +6200,15 @@ pfm_load_regs (struct task_struct *task) * (not perfmon) by the previous task. */ if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf.num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf.num_dbrs); + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); } /* * retrieved saved psr.up */ psr_up = ctx->ctx_saved_psr_up; + need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; /* * short path, our state is still there, just @@ -6143,12 +6265,11 @@ pfm_load_regs (struct task_struct *task) t->pmcs[0] = 0UL; -#ifndef CONFIG_MCKINLEY /* * will replay the PMU interrupt */ - hw_resend_irq(NULL, IA64_PERFMON_VECTOR); -#endif + if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); + pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; } @@ -6184,15 +6305,15 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) */ is_self = ctx->ctx_task == task ? 1 : 0; -#ifdef CONFIG_SMP - if (task == current) { -#else /* - * in UP, the state can still be in the registers + * can access PMU is task is the owner of the PMU state on the current CPU + * or if we are running on the CPU bound to the context in system-wide mode + * (that is not necessarily the task the context is attached to in this mode). + * In system-wide we always have can_access_pmu true because a task running on an + * invalid processor is flagged earlier in the call stack (see pfm_stop). */ - if (task == current || GET_PMU_OWNER() == task) { -#endif - can_access_pmu = 1; + can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); + if (can_access_pmu) { /* * Mark the PMU as not owned * This will cause the interrupt handler to do nothing in case an overflow @@ -6202,6 +6323,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) * on. */ SET_PMU_OWNER(NULL, NULL); + DPRINT(("releasing ownership\n")); /* * read current overflow status: @@ -6222,7 +6344,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) */ task->thread.pmcs[0] = 0; } - ovfl_val = pmu_conf.ovfl_val; + ovfl_val = pmu_conf->ovfl_val; /* * we save all the used pmds * we take care of overflows for counting PMDs @@ -6230,6 +6352,9 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) * XXX: sampling situation is not taken into account here */ mask2 = ctx->ctx_used_pmds[0]; + + DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); + for (i = 0; mask2; i++, mask2>>=1) { /* skip non used pmds */ @@ -6268,7 +6393,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) } } - DPRINT(("[%d] is_self=%d ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, is_self, i, val, pmd_val)); + DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); if (is_self) task->thread.pmds[i] = pmd_val; @@ -6287,6 +6412,36 @@ static struct irqaction perfmon_irqaction = { */ static int init_pfm_fs(void); +static int __init +pfm_probe_pmu(void) +{ + pmu_config_t **p; + int family; + + family = local_cpu_data->family; + p = pmu_confs; + + while(*p) { + if ((*p)->probe) { + if ((*p)->probe() == 0) goto found; + } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { + goto found; + } + p++; + } + return -1; +found: + pmu_conf = *p; + return 0; +} + +static struct file_operations pfm_proc_fops = { + .open = pfm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + int __init pfm_init(void) { @@ -6297,12 +6452,9 @@ pfm_init(void) PFM_VERSION_MIN, IA64_PERFMON_VECTOR); - /* - * PMU type sanity check - * XXX: maybe better to implement autodetection (but then we have a larger kernel) - */ - if (local_cpu_data->family != pmu_conf.pmu_family) { - printk(KERN_INFO "perfmon: disabled, kernel only supports %s PMU family\n", pmu_conf.pmu_name); + if (pfm_probe_pmu()) { + printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", + local_cpu_data->family); return -ENODEV; } @@ -6313,56 +6465,64 @@ pfm_init(void) n = 0; for (i=0; PMC_IS_LAST(i) == 0; i++) { if (PMC_IS_IMPL(i) == 0) continue; - pmu_conf.impl_pmcs[i>>6] |= 1UL << (i&63); + pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); n++; } - pmu_conf.num_pmcs = n; + pmu_conf->num_pmcs = n; n = 0; n_counters = 0; for (i=0; PMD_IS_LAST(i) == 0; i++) { if (PMD_IS_IMPL(i) == 0) continue; - pmu_conf.impl_pmds[i>>6] |= 1UL << (i&63); + pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); n++; if (PMD_IS_COUNTING(i)) n_counters++; } - pmu_conf.num_pmds = n; - pmu_conf.num_counters = n_counters; + pmu_conf->num_pmds = n; + pmu_conf->num_counters = n_counters; /* * sanity checks on the number of debug registers */ - if (pmu_conf.use_rr_dbregs) { - if (pmu_conf.num_ibrs > IA64_NUM_DBG_REGS) { - printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf.num_ibrs); + if (pmu_conf->use_rr_dbregs) { + if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { + printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); + pmu_conf = NULL; return -1; } - if (pmu_conf.num_dbrs > IA64_NUM_DBG_REGS) { - printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf.num_ibrs); + if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { + printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); + pmu_conf = NULL; return -1; } } printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", - pmu_conf.pmu_name, - pmu_conf.num_pmcs, - pmu_conf.num_pmds, - pmu_conf.num_counters, - ffz(pmu_conf.ovfl_val)); + pmu_conf->pmu_name, + pmu_conf->num_pmcs, + pmu_conf->num_pmds, + pmu_conf->num_counters, + ffz(pmu_conf->ovfl_val)); /* sanity check */ - if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) { + if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); + pmu_conf = NULL; return -1; } /* * create /proc/perfmon (mostly for debugging purposes) */ - perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL); + perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL); if (perfmon_dir == NULL) { printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); + pmu_conf = NULL; return -1; } + /* + * install customized file operations for /proc/perfmon entry + */ + perfmon_dir->proc_fops = &pfm_proc_fops; /* * create /proc/sys/kernel/perfmon (for debugging purposes) @@ -6379,9 +6539,6 @@ pfm_init(void) for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; - /* we are all set */ - pmu_conf.enabled = 1; - return 0; } @@ -6393,8 +6550,6 @@ __initcall(pfm_init); void pfm_init_percpu (void) { - int i; - /* * make sure no measurement is active * (may inherit programmed PMCs from EFI). @@ -6412,28 +6567,6 @@ pfm_init_percpu (void) ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); ia64_srlz_d(); - - /* - * we first initialize the PMU to a stable state. - * the values may have been changed from their power-up - * values by software executed before the kernel took over. - * - * At this point, pmu_conf has not yet been initialized - * - * On McKinley, this code is ineffective until PMC4 is initialized - * but that's all right because we take care of pmc0 later. - * - * XXX: potential problems with pmc1. - */ - for (i=1; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_IMPL(i) == 0) continue; - ia64_set_pmc(i, PMC_DFL_VAL(i)); - } - - for (i=0; PMD_IS_LAST(i) == 0; i++) { - if (PMD_IS_IMPL(i) == 0) continue; - ia64_set_pmd(i, 0UL); - } } /* @@ -6538,8 +6671,7 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs) } #else /* !CONFIG_PERFMON */ asmlinkage long -sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, long arg7, - long arg8, long stack) +sys_perfmonctl (int fd, int cmd, void *arg, int count) { return -ENOSYS; }