X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fperfmon.c;h=c0a4d8f8fab377ccb0a32e011c0848689dbd16b7;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=4f1543cdafec51d8f9651df34a4476d80e86224e;hpb=8e8ece46a861c84343256819eaec77e608ff9217;p=linux-2.6.git diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 4f1543cda..c0a4d8f8f 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -11,7 +11,7 @@ * Version Perfmon-2.x is a rewrite of perfmon-1.x * by Stephane Eranian, Hewlett Packard Co. * - * Copyright (C) 1999-2003, 2005 Hewlett Packard Co + * Copyright (C) 1999-2005 Hewlett Packard Co * Stephane Eranian * David Mosberger-Tang * @@ -19,7 +19,6 @@ * http://www.hpl.hp.com/research/linux/perfmon */ -#include #include #include #include @@ -35,12 +34,14 @@ #include #include #include +#include #include #include -#include #include +#include +#include +#include #include -#include #include #include @@ -63,6 +64,9 @@ #define PFM_INVALID_ACTIVATION (~0UL) +#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ +#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ + /* * depth of message queue */ @@ -287,7 +291,7 @@ typedef struct pfm_context { unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ - struct semaphore ctx_restart_sem; /* use for blocking notification mode */ + struct completion ctx_restart_done; /* use for blocking notification mode */ unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ @@ -297,14 +301,17 @@ typedef struct pfm_context { unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ - unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ + unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ - pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ + pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ + + unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ + unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ u64 ctx_saved_psr_up; /* only contains psr.up value */ @@ -481,14 +488,6 @@ typedef struct { #define PFM_CMD_ARG_MANY -1 /* cannot be zero */ -typedef struct { - int debug; /* turn on/off debugging via syslog */ - int debug_ovfl; /* turn on/off debug printk in overflow handler */ - int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ - int expert_mode; /* turn on/off value checking */ - int debug_pfm_read; -} pfm_sysctl_t; - typedef struct { unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ @@ -507,6 +506,9 @@ typedef struct { static pfm_stats_t pfm_stats[NR_CPUS]; static pfm_session_t pfm_sessions; /* global sessions information */ +static DEFINE_SPINLOCK(pfm_alt_install_check); +static pfm_intr_handler_desc_t *pfm_alt_intr_handler; + static struct proc_dir_entry *perfmon_dir; static pfm_uuid_t pfm_null_uuid = {0,}; @@ -516,8 +518,8 @@ static LIST_HEAD(pfm_buffer_fmt_list); static pmu_config_t *pmu_conf; /* sysctl() controls */ -static pfm_sysctl_t pfm_sysctl; -int pfm_debug_var; +pfm_sysctl_t pfm_sysctl; +EXPORT_SYMBOL(pfm_sysctl); static ctl_table pfm_ctl_table[]={ {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, @@ -537,7 +539,6 @@ static ctl_table pfm_sysctl_root[] = { static struct ctl_table_header *pfm_sysctl_header; static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); -static int pfm_flush(struct file *filp); #define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) #define pfm_get_cpu_data(a,b) per_cpu(a, b) @@ -581,7 +582,7 @@ pfm_protect_ctx_ctxsw(pfm_context_t *x) return 0UL; } -static inline unsigned long +static inline void pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) { spin_unlock(&(x)->ctx_lock); @@ -600,10 +601,11 @@ pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, } -static struct super_block * -pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +static int +pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); + return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); } static struct file_system_type pfm_fs_type = { @@ -616,6 +618,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); DEFINE_PER_CPU(unsigned long, pmu_activation_number); +EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); /* forward declaration */ @@ -633,9 +636,11 @@ static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, #include "perfmon_itanium.h" #include "perfmon_mckinley.h" +#include "perfmon_montecito.h" #include "perfmon_generic.h" static pmu_config_t *pmu_confs[]={ + &pmu_conf_mont, &pmu_conf_mck, &pmu_conf_ita, &pmu_conf_gen, /* must be last */ @@ -849,9 +854,8 @@ pfm_context_alloc(void) * allocate context descriptor * must be able to free with interrupts disabled */ - ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); + ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); if (ctx) { - memset(ctx, 0, sizeof(pfm_context_t)); DPRINT(("alloc ctx @%p\n", ctx)); } return ctx; @@ -870,7 +874,6 @@ static void pfm_mask_monitoring(struct task_struct *task) { pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; unsigned long mask, val, ovfl_mask; int i; @@ -891,7 +894,7 @@ pfm_mask_monitoring(struct task_struct *task) * So in both cases, the live register contains the owner's * state. We can ONLY touch the PMU registers and NOT the PSR. * - * As a consequence to this call, the thread->pmds[] array + * As a consequence to this call, the ctx->th_pmds[] array * contains stale information which must be ignored * when context is reloaded AND monitoring is active (see * pfm_restart). @@ -926,9 +929,9 @@ pfm_mask_monitoring(struct task_struct *task) mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { if ((mask & 0x1) == 0UL) continue; - ia64_set_pmc(i, th->pmcs[i] & ~0xfUL); - th->pmcs[i] &= ~0xfUL; - DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i])); + ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); + ctx->th_pmcs[i] &= ~0xfUL; + DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); } /* * make all of this visible @@ -945,7 +948,6 @@ static void pfm_restore_monitoring(struct task_struct *task) { pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; unsigned long mask, ovfl_mask; unsigned long psr, val; int i, is_system; @@ -1011,9 +1013,9 @@ pfm_restore_monitoring(struct task_struct *task) mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { if ((mask & 0x1) == 0UL) continue; - th->pmcs[i] = ctx->ctx_pmcs[i]; - ia64_set_pmc(i, th->pmcs[i]); - DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i])); + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + ia64_set_pmc(i, ctx->th_pmcs[i]); + DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i])); } ia64_srlz_d(); @@ -1072,7 +1074,6 @@ pfm_restore_pmds(unsigned long *pmds, unsigned long mask) static inline void pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) { - struct thread_struct *thread = &task->thread; unsigned long ovfl_val = pmu_conf->ovfl_val; unsigned long mask = ctx->ctx_all_pmds[0]; unsigned long val; @@ -1094,11 +1095,11 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) ctx->ctx_pmds[i].val = val & ~ovfl_val; val &= ovfl_val; } - thread->pmds[i] = val; + ctx->th_pmds[i] = val; DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", i, - thread->pmds[i], + ctx->th_pmds[i], ctx->ctx_pmds[i].val)); } } @@ -1109,7 +1110,6 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) static inline void pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) { - struct thread_struct *thread = &task->thread; unsigned long mask = ctx->ctx_all_pmcs[0]; int i; @@ -1117,8 +1117,8 @@ pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) for (i=0; mask; i++, mask>>=1) { /* masking 0 with ovfl_val yields 0 */ - thread->pmcs[i] = ctx->ctx_pmcs[i]; - DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i])); + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); } } @@ -1275,6 +1275,8 @@ out: } EXPORT_SYMBOL(pfm_unregister_buffer_fmt); +extern void update_pal_halt_status(int); + static int pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) { @@ -1321,6 +1323,11 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) is_syswide, cpu)); + /* + * disable default_idle() to go to PAL_HALT + */ + update_pal_halt_status(0); + UNLOCK_PFS(flags); return 0; @@ -1328,7 +1335,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) error_conflict: DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", pfm_sessions.pfs_sys_session[cpu]->pid, - smp_processor_id())); + cpu)); abort: UNLOCK_PFS(flags); @@ -1376,6 +1383,12 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) is_syswide, cpu)); + /* + * if possible, enable default_idle() to go into PAL_HALT + */ + if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) + update_pal_halt_status(1); + UNLOCK_PFS(flags); return 0; @@ -1578,7 +1591,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) goto abort_locked; } - DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); + DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); ret = -EFAULT; if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); @@ -1702,7 +1715,7 @@ static void pfm_syswide_force_stop(void *info) { pfm_context_t *ctx = (pfm_context_t *)info; - struct pt_regs *regs = ia64_task_regs(current); + struct pt_regs *regs = task_pt_regs(current); struct task_struct *owner; unsigned long flags; int ret; @@ -1762,7 +1775,7 @@ pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) * When caller is self-monitoring, the context is unloaded. */ static int -pfm_flush(struct file *filp) +pfm_flush(struct file *filp, fl_owner_t id) { pfm_context_t *ctx; struct task_struct *task; @@ -1807,7 +1820,7 @@ pfm_flush(struct file *filp) is_system = ctx->ctx_fl_system; task = PFM_CTX_TASK(ctx); - regs = ia64_task_regs(task); + regs = task_pt_regs(task); DPRINT(("ctx_state=%d is_current=%d\n", state, @@ -1937,7 +1950,7 @@ pfm_close(struct inode *inode, struct file *filp) is_system = ctx->ctx_fl_system; task = PFM_CTX_TASK(ctx); - regs = ia64_task_regs(task); + regs = task_pt_regs(task); DPRINT(("ctx_state=%d is_current=%d\n", state, @@ -1981,7 +1994,7 @@ pfm_close(struct inode *inode, struct file *filp) /* * force task to wake up from MASKED state */ - up(&ctx->ctx_restart_sem); + complete(&ctx->ctx_restart_done); DPRINT(("waking up ctx_state=%d\n", state)); @@ -2176,13 +2189,13 @@ pfm_alloc_fd(struct file **cfile) /* * allocate a new dcache entry */ - file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); - if (!file->f_dentry) goto out; + file->f_path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); + if (!file->f_path.dentry) goto out; - file->f_dentry->d_op = &pfmfs_dentry_operations; + file->f_path.dentry->d_op = &pfmfs_dentry_operations; - d_add(file->f_dentry, inode); - file->f_vfsmnt = mntget(pfmfs_mnt); + d_add(file->f_path.dentry, inode); + file->f_path.mnt = mntget(pfmfs_mnt); file->f_mapping = inode->i_mapping; file->f_op = &pfm_file_ops; @@ -2211,15 +2224,18 @@ static void pfm_free_fd(int fd, struct file *file) { struct files_struct *files = current->files; + struct fdtable *fdt; /* * there ie no fd_uninstall(), so we do it here */ spin_lock(&files->file_lock); - files->fd[fd] = NULL; + fdt = files_fdtable(files); + rcu_assign_pointer(fdt->fd[fd], NULL); spin_unlock(&files->file_lock); - if (file) put_filp(file); + if (file) + put_filp(file); put_unused_fd(fd); } @@ -2286,7 +2302,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; @@ -2342,7 +2358,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon insert_vm_struct(mm, vma); vx_vmpages_add(mm, size >> PAGE_SHIFT); - vm_stat_account(vma); + vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, + vma_pages(vma)); up_write(&task->mm->mmap_sem); /* @@ -2692,7 +2709,7 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg /* * init restart semaphore to locked */ - sema_init(&ctx->ctx_restart_sem, 0); + init_completion(&ctx->ctx_restart_done); /* * activation is used in SMP only @@ -2845,7 +2862,6 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) static int pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned long value, pmc_pm; @@ -2866,7 +2882,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (is_loaded) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3021,7 +3036,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). * - * The value in thread->pmcs[] may be modified on overflow, i.e., when + * The value in th_pmcs[] may be modified on overflow, i.e., when * monitoring needs to be stopped. */ if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); @@ -3035,7 +3050,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * write thread state */ - if (is_system == 0) thread->pmcs[cnum] = value; + if (is_system == 0) ctx->th_pmcs[cnum] = value; /* * write hardware register if we can @@ -3087,7 +3102,6 @@ error: static int pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned long value, hw_value, ovfl_mask; @@ -3111,7 +3125,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * the owner of the local PMU. */ if (likely(is_loaded)) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3219,7 +3232,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * write thread state */ - if (is_system == 0) thread->pmds[cnum] = hw_value; + if (is_system == 0) ctx->th_pmds[cnum] = hw_value; /* * write hardware register if we can @@ -3285,7 +3298,6 @@ abort_mission: static int pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; unsigned long val = 0UL, lval, ovfl_mask, sval; pfarg_reg_t *req = (pfarg_reg_t *)arg; @@ -3309,7 +3321,6 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (likely(is_loaded)) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3371,7 +3382,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * if context is zombie, then task does not exist anymore. * In this case, we use the full value saved in the context (pfm_flush_regs()). */ - val = is_loaded ? thread->pmds[cnum] : 0UL; + val = is_loaded ? ctx->th_pmds[cnum] : 0UL; } rd_func = pmu_conf->pmd_desc[cnum].read_check; @@ -3673,7 +3684,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) */ if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { DPRINT(("unblocking [%d] \n", task->pid)); - up(&ctx->ctx_restart_sem); + complete(&ctx->ctx_restart_done); } else { DPRINT(("[%d] armed exit trap\n", task->pid)); @@ -3697,8 +3708,6 @@ pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_sysctl.debug = m == 0 ? 0 : 1; - pfm_debug_var = pfm_sysctl.debug; - printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); if (m == 0) { @@ -4042,7 +4051,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) */ ia64_psr(regs)->up = 0; } else { - tregs = ia64_task_regs(task); + tregs = task_pt_regs(task); /* * stop monitoring at the user level @@ -4124,7 +4133,7 @@ pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ia64_psr(regs)->up = 1; } else { - tregs = ia64_task_regs(ctx->ctx_task); + tregs = task_pt_regs(ctx->ctx_task); /* * start monitoring at the kernel level the next @@ -4214,7 +4223,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", req->load_pid, ctx->ctx_state)); - return -EINVAL; + return -EBUSY; } DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); @@ -4308,6 +4317,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", thread->pfm_context, ctx)); + ret = -EBUSY; old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); if (old != NULL) { DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); @@ -4341,8 +4351,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_copy_pmds(task, ctx); pfm_copy_pmcs(task, ctx); - pmcs_source = thread->pmcs; - pmds_source = thread->pmds; + pmcs_source = ctx->th_pmcs; + pmds_source = ctx->th_pmds; /* * always the case for system-wide @@ -4393,7 +4403,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * when not current, task MUST be stopped, so this is safe */ - regs = ia64_task_regs(task); + regs = task_pt_regs(task); /* force a full reload */ ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; @@ -4519,7 +4529,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg /* * per-task mode */ - tregs = task == current ? regs : ia64_task_regs(task); + tregs = task == current ? regs : task_pt_regs(task); if (task == current) { /* @@ -4582,7 +4592,7 @@ pfm_exit_thread(struct task_struct *task) { pfm_context_t *ctx; unsigned long flags; - struct pt_regs *regs = ia64_task_regs(task); + struct pt_regs *regs = task_pt_regs(task); int ret, state; int free_ok = 0; @@ -4716,16 +4726,26 @@ recheck: if (task == current || ctx->ctx_fl_system) return 0; /* - * if context is UNLOADED we are safe to go + * we are monitoring another thread */ - if (state == PFM_CTX_UNLOADED) return 0; - - /* - * no command can operate on a zombie context - */ - if (state == PFM_CTX_ZOMBIE) { - DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); - return -EINVAL; + switch(state) { + case PFM_CTX_UNLOADED: + /* + * if context is UNLOADED we are safe to go + */ + return 0; + case PFM_CTX_ZOMBIE: + /* + * no command can operate on a zombie context + */ + DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); + return -EINVAL; + case PFM_CTX_MASKED: + /* + * PMU state has been saved to software even though + * the thread may still be running. + */ + if (cmd != PFM_UNLOAD_CONTEXT) return 0; } /* @@ -4905,7 +4925,7 @@ restart_args: if (unlikely(ret)) goto abort_locked; skip_fd: - ret = (*func)(ctx, args_k, count, ia64_task_regs(current)); + ret = (*func)(ctx, args_k, count, task_pt_regs(current)); call_made = 1; @@ -4913,14 +4933,16 @@ abort_locked: if (likely(ctx)) { DPRINT(("context unlocked\n")); UNPROTECT_CTX(ctx, flags); - fput(file); } /* copy argument back to user, if needed */ if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; error_args: - if (args_k) kfree(args_k); + if (file) + fput(file); + + kfree(args_k); DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); @@ -4998,13 +5020,21 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) } static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); - + /* + * pfm_handle_work() can be called with interrupts enabled + * (TIF_NEED_RESCHED) or disabled. The down_interruptible + * call may sleep, therefore we must re-enable interrupts + * to avoid deadlocks. It is safe to do so because this function + * is called ONLY when returning to user level (PUStk=1), in which case + * there is no risk of kernel stack overflow due to deep + * interrupt nesting. + */ void pfm_handle_work(void) { pfm_context_t *ctx; struct pt_regs *regs; - unsigned long flags; + unsigned long flags, dummy_flags; unsigned long ovfl_regs; unsigned int reason; int ret; @@ -5021,7 +5051,7 @@ pfm_handle_work(void) pfm_clear_task_notify(); - regs = ia64_task_regs(current); + regs = task_pt_regs(current); /* * extract reason for being here and clear @@ -5041,18 +5071,15 @@ pfm_handle_work(void) //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking; + /* + * restore interrupt mask to what it was on entry. + * Could be enabled/diasbled. + */ UNPROTECT_CTX(ctx, flags); - /* - * pfm_handle_work() is currently called with interrupts disabled. - * The down_interruptible call may sleep, therefore we - * must re-enable interrupts to avoid deadlocks. It is - * safe to do so because this function is called ONLY - * when returning to user level (PUStk=1), in which case - * there is no risk of kernel stack overflow due to deep - * interrupt nesting. - */ - BUG_ON(flags & IA64_PSR_I); + /* + * force interrupt enable because of down_interruptible() + */ local_irq_enable(); DPRINT(("before block sleeping\n")); @@ -5061,17 +5088,17 @@ pfm_handle_work(void) * may go through without blocking on SMP systems * if restart has been received already by the time we call down() */ - ret = down_interruptible(&ctx->ctx_restart_sem); + ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); DPRINT(("after block sleeping ret=%d\n", ret)); /* - * disable interrupts to restore state we had upon entering - * this function + * lock context and mask interrupts again + * We save flags into a dummy because we may have + * altered interrupts mask compared to entry in this + * function. */ - local_irq_disable(); - - PROTECT_CTX(ctx, flags); + PROTECT_CTX(ctx, dummy_flags); /* * we need to read the ovfl_regs only after wake-up @@ -5097,7 +5124,9 @@ skip_blocking: ctx->ctx_ovfl_regs[0] = 0UL; nothing_to_do: - + /* + * restore flags as they were upon entry + */ UNPROTECT_CTX(ctx, flags); } @@ -5529,34 +5558,41 @@ report_spurious2: } static irqreturn_t -pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) +pfm_interrupt_handler(int irq, void *arg) { unsigned long start_cycles, total_cycles; unsigned long min, max; int this_cpu; int ret; + struct pt_regs *regs = get_irq_regs(); this_cpu = get_cpu(); - min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; - max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; + if (likely(!pfm_alt_intr_handler)) { + min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; + max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; - start_cycles = ia64_get_itc(); + start_cycles = ia64_get_itc(); - ret = pfm_do_interrupt_handler(irq, arg, regs); + ret = pfm_do_interrupt_handler(irq, arg, regs); - total_cycles = ia64_get_itc(); + total_cycles = ia64_get_itc(); - /* - * don't measure spurious interrupts - */ - if (likely(ret == 0)) { - total_cycles -= start_cycles; + /* + * don't measure spurious interrupts + */ + if (likely(ret == 0)) { + total_cycles -= start_cycles; - if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; - if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; + if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; + if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; - pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + } + } + else { + (*pfm_alt_intr_handler->handler)(irq, arg, regs); } + put_cpu_no_resched(); return IRQ_HANDLED; } @@ -5760,7 +5796,7 @@ pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_c * on every CPU, so we can rely on the pid to identify the idle task. */ if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { - regs = ia64_task_regs(task); + regs = task_pt_regs(task); ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; return; } @@ -5826,14 +5862,12 @@ void pfm_save_regs(struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long flags; u64 psr; ctx = PFM_GET_CTX(task); if (ctx == NULL) return; - t = &task->thread; /* * we always come here with interrupts ALREADY disabled by @@ -5843,7 +5877,7 @@ pfm_save_regs(struct task_struct *task) flags = pfm_protect_ctx_ctxsw(ctx); if (ctx->ctx_state == PFM_CTX_ZOMBIE) { - struct pt_regs *regs = ia64_task_regs(task); + struct pt_regs *regs = task_pt_regs(task); pfm_clear_psr_up(); @@ -5891,19 +5925,19 @@ pfm_save_regs(struct task_struct *task) * guarantee we will be schedule at that same * CPU again. */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); /* * save pmc0 ia64_srlz_d() done in pfm_save_pmds() * we will need it on the restore path to check * for pending overflow. */ - t->pmcs[0] = ia64_get_pmc(0); + ctx->th_pmcs[0] = ia64_get_pmc(0); /* * unfreeze PMU if had pending overflows */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); /* * finally, allow context access. @@ -5948,7 +5982,6 @@ static void pfm_lazy_save_regs (struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long flags; { u64 psr = pfm_get_psr(); @@ -5956,7 +5989,6 @@ pfm_lazy_save_regs (struct task_struct *task) } ctx = PFM_GET_CTX(task); - t = &task->thread; /* * we need to mask PMU overflow here to @@ -5981,19 +6013,19 @@ pfm_lazy_save_regs (struct task_struct *task) /* * save all the pmds we use */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); /* * save pmc0 ia64_srlz_d() done in pfm_save_pmds() * it is needed to check for pended overflow * on the restore path */ - t->pmcs[0] = ia64_get_pmc(0); + ctx->th_pmcs[0] = ia64_get_pmc(0); /* * unfreeze PMU if had pending overflows */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); /* * now get can unmask PMU interrupts, they will @@ -6012,7 +6044,6 @@ void pfm_load_regs (struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long pmc_mask = 0UL, pmd_mask = 0UL; unsigned long flags; u64 psr, psr_up; @@ -6023,11 +6054,10 @@ pfm_load_regs (struct task_struct *task) BUG_ON(GET_PMU_OWNER()); - t = &task->thread; /* * possible on unload */ - if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return; + if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; /* * we always come here with interrupts ALREADY disabled by @@ -6043,7 +6073,7 @@ pfm_load_regs (struct task_struct *task) BUG_ON(psr & IA64_PSR_I); if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { - struct pt_regs *regs = ia64_task_regs(task); + struct pt_regs *regs = task_pt_regs(task); BUG_ON(ctx->ctx_smpl_hdr); @@ -6109,26 +6139,26 @@ pfm_load_regs (struct task_struct *task) * * XXX: optimize here */ - if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask); - if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask); + if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); + if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); /* * check for pending overflow at the time the state * was saved. */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { /* * reload pmc0 with the overflow information * On McKinley PMU, this will trigger a PMU interrupt */ - ia64_set_pmc(0, t->pmcs[0]); + ia64_set_pmc(0, ctx->th_pmcs[0]); ia64_srlz_d(); - t->pmcs[0] = 0UL; + ctx->th_pmcs[0] = 0UL; /* * will replay the PMU interrupt */ - if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); + if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; } @@ -6176,7 +6206,6 @@ pfm_load_regs (struct task_struct *task) void pfm_load_regs (struct task_struct *task) { - struct thread_struct *t; pfm_context_t *ctx; struct task_struct *owner; unsigned long pmd_mask, pmc_mask; @@ -6185,7 +6214,6 @@ pfm_load_regs (struct task_struct *task) owner = GET_PMU_OWNER(); ctx = PFM_GET_CTX(task); - t = &task->thread; psr = pfm_get_psr(); BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); @@ -6248,27 +6276,27 @@ pfm_load_regs (struct task_struct *task) */ pmc_mask = ctx->ctx_all_pmcs[0]; - pfm_restore_pmds(t->pmds, pmd_mask); - pfm_restore_pmcs(t->pmcs, pmc_mask); + pfm_restore_pmds(ctx->th_pmds, pmd_mask); + pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); /* * check for pending overflow at the time the state * was saved. */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { /* * reload pmc0 with the overflow information * On McKinley PMU, this will trigger a PMU interrupt */ - ia64_set_pmc(0, t->pmcs[0]); + ia64_set_pmc(0, ctx->th_pmcs[0]); ia64_srlz_d(); - t->pmcs[0] = 0UL; + ctx->th_pmcs[0] = 0UL; /* * will replay the PMU interrupt */ - if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); + if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; } @@ -6338,11 +6366,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) */ pfm_unfreeze_pmu(); } else { - pmc0 = task->thread.pmcs[0]; + pmc0 = ctx->th_pmcs[0]; /* * clear whatever overflow status bits there were */ - task->thread.pmcs[0] = 0; + ctx->th_pmcs[0] = 0; } ovfl_val = pmu_conf->ovfl_val; /* @@ -6363,7 +6391,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) /* * can access PMU always true in system wide mode */ - val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i]; + val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; if (PMD_IS_COUNTING(i)) { DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", @@ -6395,7 +6423,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); - if (is_self) task->thread.pmds[i] = pmd_val; + if (is_self) ctx->th_pmds[i] = pmd_val; ctx->ctx_pmds[i].val = val; } @@ -6403,10 +6431,145 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) static struct irqaction perfmon_irqaction = { .handler = pfm_interrupt_handler, - .flags = SA_INTERRUPT, + .flags = IRQF_DISABLED, .name = "perfmon" }; +static void +pfm_alt_save_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = task_pt_regs(current); + + DPRINT(("called\n")); + + /* + * should not be necessary but + * let's take not risk + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * This call is required + * May cause a spurious interrupt on some processors + */ + pfm_freeze_pmu(); + + ia64_srlz_d(); +} + +void +pfm_alt_restore_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = task_pt_regs(current); + + DPRINT(("called\n")); + + /* + * put PMU back in state expected + * by perfmon + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * perfmon runs with PMU unfrozen at all times + */ + pfm_unfreeze_pmu(); + + ia64_srlz_d(); +} + +int +pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int ret, i; + int reserve_cpu; + + /* some sanity checks */ + if (hdl == NULL || hdl->handler == NULL) return -EINVAL; + + /* do the easy test first */ + if (pfm_alt_intr_handler) return -EBUSY; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + /* reserve our session */ + for_each_online_cpu(reserve_cpu) { + ret = pfm_reserve_session(NULL, 1, reserve_cpu); + if (ret) goto cleanup_reserve; + } + + /* save the current system wide pmu states */ + ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + goto cleanup_reserve; + } + + /* officially change to the alternate interrupt handler */ + pfm_alt_intr_handler = hdl; + + spin_unlock(&pfm_alt_install_check); + + return 0; + +cleanup_reserve: + for_each_online_cpu(i) { + /* don't unreserve more than we reserved */ + if (i >= reserve_cpu) break; + + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return ret; +} +EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); + +int +pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int i; + int ret; + + if (hdl == NULL) return -EINVAL; + + /* cannot remove someone else's handler! */ + if (pfm_alt_intr_handler != hdl) return -EINVAL; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + pfm_alt_intr_handler = NULL; + + ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + } + + for_each_online_cpu(i) { + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return 0; +} +EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); + /* * perfmon initialization routine, called from the initcall() table */ @@ -6504,7 +6667,7 @@ pfm_init(void) ffz(pmu_conf->ovfl_val)); /* sanity check */ - if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { + if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); pmu_conf = NULL; return -1; @@ -6550,6 +6713,7 @@ __initcall(pfm_init); void pfm_init_percpu (void) { + static int first_time=1; /* * make sure no measurement is active * (may inherit programmed PMCs from EFI). @@ -6562,8 +6726,10 @@ pfm_init_percpu (void) */ pfm_unfreeze_pmu(); - if (smp_processor_id() == 0) + if (first_time) { register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + first_time=0; + } ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); ia64_srlz_d(); @@ -6576,7 +6742,6 @@ void dump_pmu_state(const char *from) { struct task_struct *task; - struct thread_struct *t; struct pt_regs *regs; pfm_context_t *ctx; unsigned long psr, dcr, info, flags; @@ -6585,7 +6750,7 @@ dump_pmu_state(const char *from) local_irq_save(flags); this_cpu = smp_processor_id(); - regs = ia64_task_regs(current); + regs = task_pt_regs(current); info = PFM_CPUINFO_GET(); dcr = ia64_getreg(_IA64_REG_CR_DCR); @@ -6621,16 +6786,14 @@ dump_pmu_state(const char *from) ia64_psr(regs)->up = 0; ia64_psr(regs)->pp = 0; - t = ¤t->thread; - for (i=1; PMC_IS_LAST(i) == 0; i++) { if (PMC_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]); + printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); } for (i=1; PMD_IS_LAST(i) == 0; i++) { if (PMD_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]); + printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); } if (ctx) {