X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fperfmon.c;h=249b590c73ccfe30e687504cc2b02fc632de2279;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=4f1543cdafec51d8f9651df34a4476d80e86224e;hpb=8e8ece46a861c84343256819eaec77e608ff9217;p=linux-2.6.git diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 4f1543cda..249b590c7 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -11,7 +11,7 @@ * Version Perfmon-2.x is a rewrite of perfmon-1.x * by Stephane Eranian, Hewlett Packard Co. * - * Copyright (C) 1999-2003, 2005 Hewlett Packard Co + * Copyright (C) 1999-2005 Hewlett Packard Co * Stephane Eranian * David Mosberger-Tang * @@ -37,8 +37,10 @@ #include #include #include -#include #include +#include +#include +#include #include #include @@ -287,7 +289,7 @@ typedef struct pfm_context { unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ - struct semaphore ctx_restart_sem; /* use for blocking notification mode */ + struct completion ctx_restart_done; /* use for blocking notification mode */ unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ @@ -481,14 +483,6 @@ typedef struct { #define PFM_CMD_ARG_MANY -1 /* cannot be zero */ -typedef struct { - int debug; /* turn on/off debugging via syslog */ - int debug_ovfl; /* turn on/off debug printk in overflow handler */ - int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ - int expert_mode; /* turn on/off value checking */ - int debug_pfm_read; -} pfm_sysctl_t; - typedef struct { unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ @@ -507,6 +501,9 @@ typedef struct { static pfm_stats_t pfm_stats[NR_CPUS]; static pfm_session_t pfm_sessions; /* global sessions information */ +static DEFINE_SPINLOCK(pfm_alt_install_check); +static pfm_intr_handler_desc_t *pfm_alt_intr_handler; + static struct proc_dir_entry *perfmon_dir; static pfm_uuid_t pfm_null_uuid = {0,}; @@ -516,8 +513,8 @@ static LIST_HEAD(pfm_buffer_fmt_list); static pmu_config_t *pmu_conf; /* sysctl() controls */ -static pfm_sysctl_t pfm_sysctl; -int pfm_debug_var; +pfm_sysctl_t pfm_sysctl; +EXPORT_SYMBOL(pfm_sysctl); static ctl_table pfm_ctl_table[]={ {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, @@ -581,7 +578,7 @@ pfm_protect_ctx_ctxsw(pfm_context_t *x) return 0UL; } -static inline unsigned long +static inline void pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) { spin_unlock(&(x)->ctx_lock); @@ -616,6 +613,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); DEFINE_PER_CPU(unsigned long, pmu_activation_number); +EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); /* forward declaration */ @@ -633,9 +631,11 @@ static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, #include "perfmon_itanium.h" #include "perfmon_mckinley.h" +#include "perfmon_montecito.h" #include "perfmon_generic.h" static pmu_config_t *pmu_confs[]={ + &pmu_conf_mont, &pmu_conf_mck, &pmu_conf_ita, &pmu_conf_gen, /* must be last */ @@ -1275,6 +1275,8 @@ out: } EXPORT_SYMBOL(pfm_unregister_buffer_fmt); +extern void update_pal_halt_status(int); + static int pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) { @@ -1321,6 +1323,11 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) is_syswide, cpu)); + /* + * disable default_idle() to go to PAL_HALT + */ + update_pal_halt_status(0); + UNLOCK_PFS(flags); return 0; @@ -1328,7 +1335,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) error_conflict: DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", pfm_sessions.pfs_sys_session[cpu]->pid, - smp_processor_id())); + cpu)); abort: UNLOCK_PFS(flags); @@ -1376,6 +1383,12 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) is_syswide, cpu)); + /* + * if possible, enable default_idle() to go into PAL_HALT + */ + if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) + update_pal_halt_status(1); + UNLOCK_PFS(flags); return 0; @@ -1578,7 +1591,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) goto abort_locked; } - DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); + DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); ret = -EFAULT; if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); @@ -1702,7 +1715,7 @@ static void pfm_syswide_force_stop(void *info) { pfm_context_t *ctx = (pfm_context_t *)info; - struct pt_regs *regs = ia64_task_regs(current); + struct pt_regs *regs = task_pt_regs(current); struct task_struct *owner; unsigned long flags; int ret; @@ -1807,7 +1820,7 @@ pfm_flush(struct file *filp) is_system = ctx->ctx_fl_system; task = PFM_CTX_TASK(ctx); - regs = ia64_task_regs(task); + regs = task_pt_regs(task); DPRINT(("ctx_state=%d is_current=%d\n", state, @@ -1937,7 +1950,7 @@ pfm_close(struct inode *inode, struct file *filp) is_system = ctx->ctx_fl_system; task = PFM_CTX_TASK(ctx); - regs = ia64_task_regs(task); + regs = task_pt_regs(task); DPRINT(("ctx_state=%d is_current=%d\n", state, @@ -1981,7 +1994,7 @@ pfm_close(struct inode *inode, struct file *filp) /* * force task to wake up from MASKED state */ - up(&ctx->ctx_restart_sem); + complete(&ctx->ctx_restart_done); DPRINT(("waking up ctx_state=%d\n", state)); @@ -2211,15 +2224,18 @@ static void pfm_free_fd(int fd, struct file *file) { struct files_struct *files = current->files; + struct fdtable *fdt; /* * there ie no fd_uninstall(), so we do it here */ spin_lock(&files->file_lock); - files->fd[fd] = NULL; + fdt = files_fdtable(files); + rcu_assign_pointer(fdt->fd[fd], NULL); spin_unlock(&files->file_lock); - if (file) put_filp(file); + if (file) + put_filp(file); put_unused_fd(fd); } @@ -2342,7 +2358,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon insert_vm_struct(mm, vma); vx_vmpages_add(mm, size >> PAGE_SHIFT); - vm_stat_account(vma); + vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, + vma_pages(vma)); up_write(&task->mm->mmap_sem); /* @@ -2692,7 +2709,7 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg /* * init restart semaphore to locked */ - sema_init(&ctx->ctx_restart_sem, 0); + init_completion(&ctx->ctx_restart_done); /* * activation is used in SMP only @@ -3673,7 +3690,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) */ if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { DPRINT(("unblocking [%d] \n", task->pid)); - up(&ctx->ctx_restart_sem); + complete(&ctx->ctx_restart_done); } else { DPRINT(("[%d] armed exit trap\n", task->pid)); @@ -3697,8 +3714,6 @@ pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_sysctl.debug = m == 0 ? 0 : 1; - pfm_debug_var = pfm_sysctl.debug; - printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); if (m == 0) { @@ -4042,7 +4057,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) */ ia64_psr(regs)->up = 0; } else { - tregs = ia64_task_regs(task); + tregs = task_pt_regs(task); /* * stop monitoring at the user level @@ -4124,7 +4139,7 @@ pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) ia64_psr(regs)->up = 1; } else { - tregs = ia64_task_regs(ctx->ctx_task); + tregs = task_pt_regs(ctx->ctx_task); /* * start monitoring at the kernel level the next @@ -4214,7 +4229,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", req->load_pid, ctx->ctx_state)); - return -EINVAL; + return -EBUSY; } DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); @@ -4308,6 +4323,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", thread->pfm_context, ctx)); + ret = -EBUSY; old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); if (old != NULL) { DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); @@ -4393,7 +4409,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * when not current, task MUST be stopped, so this is safe */ - regs = ia64_task_regs(task); + regs = task_pt_regs(task); /* force a full reload */ ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; @@ -4519,7 +4535,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg /* * per-task mode */ - tregs = task == current ? regs : ia64_task_regs(task); + tregs = task == current ? regs : task_pt_regs(task); if (task == current) { /* @@ -4582,7 +4598,7 @@ pfm_exit_thread(struct task_struct *task) { pfm_context_t *ctx; unsigned long flags; - struct pt_regs *regs = ia64_task_regs(task); + struct pt_regs *regs = task_pt_regs(task); int ret, state; int free_ok = 0; @@ -4716,16 +4732,26 @@ recheck: if (task == current || ctx->ctx_fl_system) return 0; /* - * if context is UNLOADED we are safe to go + * we are monitoring another thread */ - if (state == PFM_CTX_UNLOADED) return 0; - - /* - * no command can operate on a zombie context - */ - if (state == PFM_CTX_ZOMBIE) { - DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); - return -EINVAL; + switch(state) { + case PFM_CTX_UNLOADED: + /* + * if context is UNLOADED we are safe to go + */ + return 0; + case PFM_CTX_ZOMBIE: + /* + * no command can operate on a zombie context + */ + DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); + return -EINVAL; + case PFM_CTX_MASKED: + /* + * PMU state has been saved to software even though + * the thread may still be running. + */ + if (cmd != PFM_UNLOAD_CONTEXT) return 0; } /* @@ -4905,7 +4931,7 @@ restart_args: if (unlikely(ret)) goto abort_locked; skip_fd: - ret = (*func)(ctx, args_k, count, ia64_task_regs(current)); + ret = (*func)(ctx, args_k, count, task_pt_regs(current)); call_made = 1; @@ -4920,7 +4946,7 @@ abort_locked: if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; error_args: - if (args_k) kfree(args_k); + kfree(args_k); DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); @@ -4998,13 +5024,21 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) } static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); - + /* + * pfm_handle_work() can be called with interrupts enabled + * (TIF_NEED_RESCHED) or disabled. The down_interruptible + * call may sleep, therefore we must re-enable interrupts + * to avoid deadlocks. It is safe to do so because this function + * is called ONLY when returning to user level (PUStk=1), in which case + * there is no risk of kernel stack overflow due to deep + * interrupt nesting. + */ void pfm_handle_work(void) { pfm_context_t *ctx; struct pt_regs *regs; - unsigned long flags; + unsigned long flags, dummy_flags; unsigned long ovfl_regs; unsigned int reason; int ret; @@ -5021,7 +5055,7 @@ pfm_handle_work(void) pfm_clear_task_notify(); - regs = ia64_task_regs(current); + regs = task_pt_regs(current); /* * extract reason for being here and clear @@ -5041,18 +5075,15 @@ pfm_handle_work(void) //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking; + /* + * restore interrupt mask to what it was on entry. + * Could be enabled/diasbled. + */ UNPROTECT_CTX(ctx, flags); - /* - * pfm_handle_work() is currently called with interrupts disabled. - * The down_interruptible call may sleep, therefore we - * must re-enable interrupts to avoid deadlocks. It is - * safe to do so because this function is called ONLY - * when returning to user level (PUStk=1), in which case - * there is no risk of kernel stack overflow due to deep - * interrupt nesting. - */ - BUG_ON(flags & IA64_PSR_I); + /* + * force interrupt enable because of down_interruptible() + */ local_irq_enable(); DPRINT(("before block sleeping\n")); @@ -5061,17 +5092,17 @@ pfm_handle_work(void) * may go through without blocking on SMP systems * if restart has been received already by the time we call down() */ - ret = down_interruptible(&ctx->ctx_restart_sem); + ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); DPRINT(("after block sleeping ret=%d\n", ret)); /* - * disable interrupts to restore state we had upon entering - * this function + * lock context and mask interrupts again + * We save flags into a dummy because we may have + * altered interrupts mask compared to entry in this + * function. */ - local_irq_disable(); - - PROTECT_CTX(ctx, flags); + PROTECT_CTX(ctx, dummy_flags); /* * we need to read the ovfl_regs only after wake-up @@ -5097,7 +5128,9 @@ skip_blocking: ctx->ctx_ovfl_regs[0] = 0UL; nothing_to_do: - + /* + * restore flags as they were upon entry + */ UNPROTECT_CTX(ctx, flags); } @@ -5537,26 +5570,32 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) int ret; this_cpu = get_cpu(); - min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; - max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; + if (likely(!pfm_alt_intr_handler)) { + min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; + max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; - start_cycles = ia64_get_itc(); + start_cycles = ia64_get_itc(); - ret = pfm_do_interrupt_handler(irq, arg, regs); + ret = pfm_do_interrupt_handler(irq, arg, regs); - total_cycles = ia64_get_itc(); + total_cycles = ia64_get_itc(); - /* - * don't measure spurious interrupts - */ - if (likely(ret == 0)) { - total_cycles -= start_cycles; + /* + * don't measure spurious interrupts + */ + if (likely(ret == 0)) { + total_cycles -= start_cycles; - if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; - if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; + if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; + if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; - pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + } } + else { + (*pfm_alt_intr_handler->handler)(irq, arg, regs); + } + put_cpu_no_resched(); return IRQ_HANDLED; } @@ -5760,7 +5799,7 @@ pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_c * on every CPU, so we can rely on the pid to identify the idle task. */ if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { - regs = ia64_task_regs(task); + regs = task_pt_regs(task); ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; return; } @@ -5843,7 +5882,7 @@ pfm_save_regs(struct task_struct *task) flags = pfm_protect_ctx_ctxsw(ctx); if (ctx->ctx_state == PFM_CTX_ZOMBIE) { - struct pt_regs *regs = ia64_task_regs(task); + struct pt_regs *regs = task_pt_regs(task); pfm_clear_psr_up(); @@ -6043,7 +6082,7 @@ pfm_load_regs (struct task_struct *task) BUG_ON(psr & IA64_PSR_I); if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { - struct pt_regs *regs = ia64_task_regs(task); + struct pt_regs *regs = task_pt_regs(task); BUG_ON(ctx->ctx_smpl_hdr); @@ -6407,6 +6446,141 @@ static struct irqaction perfmon_irqaction = { .name = "perfmon" }; +static void +pfm_alt_save_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = task_pt_regs(current); + + DPRINT(("called\n")); + + /* + * should not be necessary but + * let's take not risk + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * This call is required + * May cause a spurious interrupt on some processors + */ + pfm_freeze_pmu(); + + ia64_srlz_d(); +} + +void +pfm_alt_restore_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = task_pt_regs(current); + + DPRINT(("called\n")); + + /* + * put PMU back in state expected + * by perfmon + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * perfmon runs with PMU unfrozen at all times + */ + pfm_unfreeze_pmu(); + + ia64_srlz_d(); +} + +int +pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int ret, i; + int reserve_cpu; + + /* some sanity checks */ + if (hdl == NULL || hdl->handler == NULL) return -EINVAL; + + /* do the easy test first */ + if (pfm_alt_intr_handler) return -EBUSY; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + /* reserve our session */ + for_each_online_cpu(reserve_cpu) { + ret = pfm_reserve_session(NULL, 1, reserve_cpu); + if (ret) goto cleanup_reserve; + } + + /* save the current system wide pmu states */ + ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + goto cleanup_reserve; + } + + /* officially change to the alternate interrupt handler */ + pfm_alt_intr_handler = hdl; + + spin_unlock(&pfm_alt_install_check); + + return 0; + +cleanup_reserve: + for_each_online_cpu(i) { + /* don't unreserve more than we reserved */ + if (i >= reserve_cpu) break; + + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return ret; +} +EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); + +int +pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int i; + int ret; + + if (hdl == NULL) return -EINVAL; + + /* cannot remove someone else's handler! */ + if (pfm_alt_intr_handler != hdl) return -EINVAL; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + pfm_alt_intr_handler = NULL; + + ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + } + + for_each_online_cpu(i) { + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return 0; +} +EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); + /* * perfmon initialization routine, called from the initcall() table */ @@ -6550,6 +6724,7 @@ __initcall(pfm_init); void pfm_init_percpu (void) { + static int first_time=1; /* * make sure no measurement is active * (may inherit programmed PMCs from EFI). @@ -6562,8 +6737,10 @@ pfm_init_percpu (void) */ pfm_unfreeze_pmu(); - if (smp_processor_id() == 0) + if (first_time) { register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + first_time=0; + } ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); ia64_srlz_d(); @@ -6585,7 +6762,7 @@ dump_pmu_state(const char *from) local_irq_save(flags); this_cpu = smp_processor_id(); - regs = ia64_task_regs(current); + regs = task_pt_regs(current); info = PFM_CPUINFO_GET(); dcr = ia64_getreg(_IA64_REG_CR_DCR);