X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fperfmon.c;h=c0a4d8f8fab377ccb0a32e011c0848689dbd16b7;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=4f1543cdafec51d8f9651df34a4476d80e86224e;hpb=8e8ece46a861c84343256819eaec77e608ff9217;p=linux-2.6.git

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 4f1543cda..c0a4d8f8f 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -11,7 +11,7 @@
  * Version Perfmon-2.x is a rewrite of perfmon-1.x
  * by Stephane Eranian, Hewlett Packard Co.
  *
- * Copyright (C) 1999-2003, 2005  Hewlett Packard Co
+ * Copyright (C) 1999-2005  Hewlett Packard Co
  *               Stephane Eranian <eranian@hpl.hp.com>
  *               David Mosberger-Tang <davidm@hpl.hp.com>
  *
@@ -19,7 +19,6 @@
  * 	http://www.hpl.hp.com/research/linux/perfmon
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -35,12 +34,14 @@
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/vfs.h>
+#include <linux/smp.h>
 #include <linux/pagemap.h>
 #include <linux/mount.h>
-#include <linux/version.h>
 #include <linux/bitops.h>
+#include <linux/capability.h>
+#include <linux/rcupdate.h>
+#include <linux/completion.h>
 #include <linux/vs_memory.h>
-#include <linux/vs_cvirt.h>
 
 #include <asm/errno.h>
 #include <asm/intrinsics.h>
@@ -63,6 +64,9 @@
 
 #define PFM_INVALID_ACTIVATION	(~0UL)
 
+#define PFM_NUM_PMC_REGS	64	/* PMC save area for ctxsw */
+#define PFM_NUM_PMD_REGS	64	/* PMD save area for ctxsw */
+
 /*
  * depth of message queue
  */
@@ -287,7 +291,7 @@ typedef struct pfm_context {
 
 	unsigned long		ctx_ovfl_regs[4];	/* which registers overflowed (notification) */
 
-	struct semaphore	ctx_restart_sem;   	/* use for blocking notification mode */
+	struct completion	ctx_restart_done;  	/* use for blocking notification mode */
 
 	unsigned long		ctx_used_pmds[4];	/* bitmask of PMD used            */
 	unsigned long		ctx_all_pmds[4];	/* bitmask of all accessible PMDs */
@@ -297,14 +301,17 @@ typedef struct pfm_context {
 	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
 	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */
 
-	unsigned long		ctx_pmcs[IA64_NUM_PMC_REGS];	/*  saved copies of PMC values */
+	unsigned long		ctx_pmcs[PFM_NUM_PMC_REGS];	/*  saved copies of PMC values */
 
 	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
 	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
 	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
 	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
 
-	pfm_counter_t		ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
+	pfm_counter_t		ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */
+
+	unsigned long		th_pmcs[PFM_NUM_PMC_REGS];	/* PMC thread save state */
+	unsigned long		th_pmds[PFM_NUM_PMD_REGS];	/* PMD thread save state */
 
 	u64			ctx_saved_psr_up;	/* only contains psr.up value */
 
@@ -481,14 +488,6 @@ typedef struct {
 
 #define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
 
-typedef struct {
-	int	debug;		/* turn on/off debugging via syslog */
-	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
-	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
-	int	expert_mode;	/* turn on/off value checking */
-	int 	debug_pfm_read;
-} pfm_sysctl_t;
-
 typedef struct {
 	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
 	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
@@ -507,6 +506,9 @@ typedef struct {
 static pfm_stats_t		pfm_stats[NR_CPUS];
 static pfm_session_t		pfm_sessions;	/* global sessions information */
 
+static DEFINE_SPINLOCK(pfm_alt_install_check);
+static pfm_intr_handler_desc_t  *pfm_alt_intr_handler;
+
 static struct proc_dir_entry 	*perfmon_dir;
 static pfm_uuid_t		pfm_null_uuid = {0,};
 
@@ -516,8 +518,8 @@ static LIST_HEAD(pfm_buffer_fmt_list);
 static pmu_config_t		*pmu_conf;
 
 /* sysctl() controls */
-static pfm_sysctl_t pfm_sysctl;
-int pfm_debug_var;
+pfm_sysctl_t pfm_sysctl;
+EXPORT_SYMBOL(pfm_sysctl);
 
 static ctl_table pfm_ctl_table[]={
 	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
@@ -537,7 +539,6 @@ static ctl_table pfm_sysctl_root[] = {
 static struct ctl_table_header *pfm_sysctl_header;
 
 static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
-static int pfm_flush(struct file *filp);
 
 #define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
 #define pfm_get_cpu_data(a,b)		per_cpu(a, b)
@@ -581,7 +582,7 @@ pfm_protect_ctx_ctxsw(pfm_context_t *x)
 	return 0UL;
 }
 
-static inline unsigned long
+static inline void
 pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
 {
 	spin_unlock(&(x)->ctx_lock);
@@ -600,10 +601,11 @@ pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 }
 
 
-static struct super_block *
-pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+static int
+pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data,
+	     struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
+	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
 }
 
 static struct file_system_type pfm_fs_type = {
@@ -616,6 +618,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info);
 DEFINE_PER_CPU(struct task_struct *, pmu_owner);
 DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
 DEFINE_PER_CPU(unsigned long, pmu_activation_number);
+EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
 
 
 /* forward declaration */
@@ -633,9 +636,11 @@ static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count,
 
 #include "perfmon_itanium.h"
 #include "perfmon_mckinley.h"
+#include "perfmon_montecito.h"
 #include "perfmon_generic.h"
 
 static pmu_config_t *pmu_confs[]={
+	&pmu_conf_mont,
 	&pmu_conf_mck,
 	&pmu_conf_ita,
 	&pmu_conf_gen, /* must be last */
@@ -849,9 +854,8 @@ pfm_context_alloc(void)
 	 * allocate context descriptor 
 	 * must be able to free with interrupts disabled
 	 */
-	ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
+	ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL);
 	if (ctx) {
-		memset(ctx, 0, sizeof(pfm_context_t));
 		DPRINT(("alloc ctx @%p\n", ctx));
 	}
 	return ctx;
@@ -870,7 +874,6 @@ static void
 pfm_mask_monitoring(struct task_struct *task)
 {
 	pfm_context_t *ctx = PFM_GET_CTX(task);
-	struct thread_struct *th = &task->thread;
 	unsigned long mask, val, ovfl_mask;
 	int i;
 
@@ -891,7 +894,7 @@ pfm_mask_monitoring(struct task_struct *task)
 	 * So in both cases, the live register contains the owner's
 	 * state. We can ONLY touch the PMU registers and NOT the PSR.
 	 *
-	 * As a consequence to this call, the thread->pmds[] array
+	 * As a consequence to this call, the ctx->th_pmds[] array
 	 * contains stale information which must be ignored
 	 * when context is reloaded AND monitoring is active (see
 	 * pfm_restart).
@@ -926,9 +929,9 @@ pfm_mask_monitoring(struct task_struct *task)
 	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
 	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
 		if ((mask & 0x1) == 0UL) continue;
-		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
-		th->pmcs[i] &= ~0xfUL;
-		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
+		ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL);
+		ctx->th_pmcs[i] &= ~0xfUL;
+		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
 	}
 	/*
 	 * make all of this visible
@@ -945,7 +948,6 @@ static void
 pfm_restore_monitoring(struct task_struct *task)
 {
 	pfm_context_t *ctx = PFM_GET_CTX(task);
-	struct thread_struct *th = &task->thread;
 	unsigned long mask, ovfl_mask;
 	unsigned long psr, val;
 	int i, is_system;
@@ -1011,9 +1013,9 @@ pfm_restore_monitoring(struct task_struct *task)
 	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
 	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
 		if ((mask & 0x1) == 0UL) continue;
-		th->pmcs[i] = ctx->ctx_pmcs[i];
-		ia64_set_pmc(i, th->pmcs[i]);
-		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
+		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+		ia64_set_pmc(i, ctx->th_pmcs[i]);
+		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i]));
 	}
 	ia64_srlz_d();
 
@@ -1072,7 +1074,6 @@ pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
 static inline void
 pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 {
-	struct thread_struct *thread = &task->thread;
 	unsigned long ovfl_val = pmu_conf->ovfl_val;
 	unsigned long mask = ctx->ctx_all_pmds[0];
 	unsigned long val;
@@ -1094,11 +1095,11 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 			ctx->ctx_pmds[i].val = val & ~ovfl_val;
 			 val &= ovfl_val;
 		}
-		thread->pmds[i] = val;
+		ctx->th_pmds[i] = val;
 
 		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
 			i,
-			thread->pmds[i],
+			ctx->th_pmds[i],
 			ctx->ctx_pmds[i].val));
 	}
 }
@@ -1109,7 +1110,6 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 static inline void
 pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
 {
-	struct thread_struct *thread = &task->thread;
 	unsigned long mask = ctx->ctx_all_pmcs[0];
 	int i;
 
@@ -1117,8 +1117,8 @@ pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
 
 	for (i=0; mask; i++, mask>>=1) {
 		/* masking 0 with ovfl_val yields 0 */
-		thread->pmcs[i] = ctx->ctx_pmcs[i];
-		DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
+		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
 	}
 }
 
@@ -1275,6 +1275,8 @@ out:
 }
 EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
 
+extern void update_pal_halt_status(int);
+
 static int
 pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 {
@@ -1321,6 +1323,11 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 		is_syswide,
 		cpu));
 
+	/*
+	 * disable default_idle() to go to PAL_HALT
+	 */
+	update_pal_halt_status(0);
+
 	UNLOCK_PFS(flags);
 
 	return 0;
@@ -1328,7 +1335,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 error_conflict:
 	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
   		pfm_sessions.pfs_sys_session[cpu]->pid,
-		smp_processor_id()));
+		cpu));
 abort:
 	UNLOCK_PFS(flags);
 
@@ -1376,6 +1383,12 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
 		is_syswide,
 		cpu));
 
+	/*
+	 * if possible, enable default_idle() to go into PAL_HALT
+	 */
+	if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0)
+		update_pal_halt_status(1);
+
 	UNLOCK_PFS(flags);
 
 	return 0;
@@ -1578,7 +1591,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 		goto abort_locked;
 	}
 
-	DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
+	DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
 
 	ret = -EFAULT;
   	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
@@ -1702,7 +1715,7 @@ static void
 pfm_syswide_force_stop(void *info)
 {
 	pfm_context_t   *ctx = (pfm_context_t *)info;
-	struct pt_regs *regs = ia64_task_regs(current);
+	struct pt_regs *regs = task_pt_regs(current);
 	struct task_struct *owner;
 	unsigned long flags;
 	int ret;
@@ -1762,7 +1775,7 @@ pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
  * When caller is self-monitoring, the context is unloaded.
  */
 static int
-pfm_flush(struct file *filp)
+pfm_flush(struct file *filp, fl_owner_t id)
 {
 	pfm_context_t *ctx;
 	struct task_struct *task;
@@ -1807,7 +1820,7 @@ pfm_flush(struct file *filp)
 	is_system = ctx->ctx_fl_system;
 
 	task = PFM_CTX_TASK(ctx);
-	regs = ia64_task_regs(task);
+	regs = task_pt_regs(task);
 
 	DPRINT(("ctx_state=%d is_current=%d\n",
 		state,
@@ -1937,7 +1950,7 @@ pfm_close(struct inode *inode, struct file *filp)
 	is_system = ctx->ctx_fl_system;
 
 	task = PFM_CTX_TASK(ctx);
-	regs = ia64_task_regs(task);
+	regs = task_pt_regs(task);
 
 	DPRINT(("ctx_state=%d is_current=%d\n", 
 		state,
@@ -1981,7 +1994,7 @@ pfm_close(struct inode *inode, struct file *filp)
 		/*
 		 * force task to wake up from MASKED state
 		 */
-		up(&ctx->ctx_restart_sem);
+		complete(&ctx->ctx_restart_done);
 
 		DPRINT(("waking up ctx_state=%d\n", state));
 
@@ -2176,13 +2189,13 @@ pfm_alloc_fd(struct file **cfile)
 	/*
 	 * allocate a new dcache entry
 	 */
-	file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
-	if (!file->f_dentry) goto out;
+	file->f_path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
+	if (!file->f_path.dentry) goto out;
 
-	file->f_dentry->d_op = &pfmfs_dentry_operations;
+	file->f_path.dentry->d_op = &pfmfs_dentry_operations;
 
-	d_add(file->f_dentry, inode);
-	file->f_vfsmnt = mntget(pfmfs_mnt);
+	d_add(file->f_path.dentry, inode);
+	file->f_path.mnt = mntget(pfmfs_mnt);
 	file->f_mapping = inode->i_mapping;
 
 	file->f_op    = &pfm_file_ops;
@@ -2211,15 +2224,18 @@ static void
 pfm_free_fd(int fd, struct file *file)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 
 	/* 
 	 * there ie no fd_uninstall(), so we do it here
 	 */
 	spin_lock(&files->file_lock);
-        files->fd[fd] = NULL;
+	fdt = files_fdtable(files);
+	rcu_assign_pointer(fdt->fd[fd], NULL);
 	spin_unlock(&files->file_lock);
 
-	if (file) put_filp(file);
+	if (file)
+		put_filp(file);
 	put_unused_fd(fd);
 }
 
@@ -2286,7 +2302,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
 
 	/* allocate vma */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
@@ -2342,7 +2358,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	insert_vm_struct(mm, vma);
 
 	vx_vmpages_add(mm, size >> PAGE_SHIFT);
-	vm_stat_account(vma);
+	vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
+							vma_pages(vma));
 	up_write(&task->mm->mmap_sem);
 
 	/*
@@ -2692,7 +2709,7 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	/*
 	 * init restart semaphore to locked
 	 */
-	sema_init(&ctx->ctx_restart_sem, 0);
+	init_completion(&ctx->ctx_restart_done);
 
 	/*
 	 * activation is used in SMP only
@@ -2845,7 +2862,6 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
 static int
 pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
 	unsigned long value, pmc_pm;
@@ -2866,7 +2882,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
 
 	if (is_loaded) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3021,7 +3036,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		 *
 		 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
 		 *
-		 * The value in thread->pmcs[] may be modified on overflow, i.e.,  when
+		 * The value in th_pmcs[] may be modified on overflow, i.e.,  when
 		 * monitoring needs to be stopped.
 		 */
 		if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
@@ -3035,7 +3050,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			/*
 			 * write thread state
 			 */
-			if (is_system == 0) thread->pmcs[cnum] = value;
+			if (is_system == 0) ctx->th_pmcs[cnum] = value;
 
 			/*
 			 * write hardware register if we can
@@ -3087,7 +3102,6 @@ error:
 static int
 pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
 	unsigned long value, hw_value, ovfl_mask;
@@ -3111,7 +3125,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 * the owner of the local PMU.
 	 */
 	if (likely(is_loaded)) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3219,7 +3232,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			/*
 		 	 * write thread state
 		 	 */
-			if (is_system == 0) thread->pmds[cnum] = hw_value;
+			if (is_system == 0) ctx->th_pmds[cnum] = hw_value;
 
 			/*
 			 * write hardware register if we can
@@ -3285,7 +3298,6 @@ abort_mission:
 static int
 pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	unsigned long val = 0UL, lval, ovfl_mask, sval;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
@@ -3309,7 +3321,6 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
 
 	if (likely(is_loaded)) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3371,7 +3382,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			 * if context is zombie, then task does not exist anymore.
 			 * In this case, we use the full value saved in the context (pfm_flush_regs()).
 			 */
-			val = is_loaded ? thread->pmds[cnum] : 0UL;
+			val = is_loaded ? ctx->th_pmds[cnum] : 0UL;
 		}
 		rd_func = pmu_conf->pmd_desc[cnum].read_check;
 
@@ -3673,7 +3684,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 */
 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
 		DPRINT(("unblocking [%d] \n", task->pid));
-		up(&ctx->ctx_restart_sem);
+		complete(&ctx->ctx_restart_done);
 	} else {
 		DPRINT(("[%d] armed exit trap\n", task->pid));
 
@@ -3697,8 +3708,6 @@ pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 	pfm_sysctl.debug = m == 0 ? 0 : 1;
 
-	pfm_debug_var = pfm_sysctl.debug;
-
 	printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
 
 	if (m == 0) {
@@ -4042,7 +4051,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 	 */
 		ia64_psr(regs)->up = 0;
 	} else {
-		tregs = ia64_task_regs(task);
+		tregs = task_pt_regs(task);
 
 		/*
 	 	 * stop monitoring at the user level
@@ -4124,7 +4133,7 @@ pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		ia64_psr(regs)->up = 1;
 
 	} else {
-		tregs = ia64_task_regs(ctx->ctx_task);
+		tregs = task_pt_regs(ctx->ctx_task);
 
 		/*
 		 * start monitoring at the kernel level the next
@@ -4214,7 +4223,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
 			req->load_pid,
 			ctx->ctx_state));
-		return -EINVAL;
+		return -EBUSY;
 	}
 
 	DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
@@ -4308,6 +4317,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
 		thread->pfm_context, ctx));
 
+	ret = -EBUSY;
 	old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
 	if (old != NULL) {
 		DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
@@ -4341,8 +4351,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	pfm_copy_pmds(task, ctx);
 	pfm_copy_pmcs(task, ctx);
 
-	pmcs_source = thread->pmcs;
-	pmds_source = thread->pmds;
+	pmcs_source = ctx->th_pmcs;
+	pmds_source = ctx->th_pmds;
 
 	/*
 	 * always the case for system-wide
@@ -4393,7 +4403,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		/*
 		 * when not current, task MUST be stopped, so this is safe
 		 */
-		regs = ia64_task_regs(task);
+		regs = task_pt_regs(task);
 
 		/* force a full reload */
 		ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
@@ -4519,7 +4529,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	/*
 	 * per-task mode
 	 */
-	tregs = task == current ? regs : ia64_task_regs(task);
+	tregs = task == current ? regs : task_pt_regs(task);
 
 	if (task == current) {
 		/*
@@ -4582,7 +4592,7 @@ pfm_exit_thread(struct task_struct *task)
 {
 	pfm_context_t *ctx;
 	unsigned long flags;
-	struct pt_regs *regs = ia64_task_regs(task);
+	struct pt_regs *regs = task_pt_regs(task);
 	int ret, state;
 	int free_ok = 0;
 
@@ -4716,16 +4726,26 @@ recheck:
 	if (task == current || ctx->ctx_fl_system) return 0;
 
 	/*
-	 * if context is UNLOADED we are safe to go
+	 * we are monitoring another thread
 	 */
-	if (state == PFM_CTX_UNLOADED) return 0;
-
-	/*
-	 * no command can operate on a zombie context
-	 */
-	if (state == PFM_CTX_ZOMBIE) {
-		DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
-		return -EINVAL;
+	switch(state) {
+		case PFM_CTX_UNLOADED:
+			/*
+			 * if context is UNLOADED we are safe to go
+			 */
+			return 0;
+		case PFM_CTX_ZOMBIE:
+			/*
+			 * no command can operate on a zombie context
+			 */
+			DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
+			return -EINVAL;
+		case PFM_CTX_MASKED:
+			/*
+			 * PMU state has been saved to software even though
+			 * the thread may still be running.
+			 */
+			if (cmd != PFM_UNLOAD_CONTEXT) return 0;
 	}
 
 	/*
@@ -4905,7 +4925,7 @@ restart_args:
 	if (unlikely(ret)) goto abort_locked;
 
 skip_fd:
-	ret = (*func)(ctx, args_k, count, ia64_task_regs(current));
+	ret = (*func)(ctx, args_k, count, task_pt_regs(current));
 
 	call_made = 1;
 
@@ -4913,14 +4933,16 @@ abort_locked:
 	if (likely(ctx)) {
 		DPRINT(("context unlocked\n"));
 		UNPROTECT_CTX(ctx, flags);
-		fput(file);
 	}
 
 	/* copy argument back to user, if needed */
 	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
 
 error_args:
-	if (args_k) kfree(args_k);
+	if (file)
+		fput(file);
+
+	kfree(args_k);
 
 	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
 
@@ -4998,13 +5020,21 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
 }
 
 static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
-
+ /*
+  * pfm_handle_work() can be called with interrupts enabled
+  * (TIF_NEED_RESCHED) or disabled. The down_interruptible
+  * call may sleep, therefore we must re-enable interrupts
+  * to avoid deadlocks. It is safe to do so because this function
+  * is called ONLY when returning to user level (PUStk=1), in which case
+  * there is no risk of kernel stack overflow due to deep
+  * interrupt nesting.
+  */
 void
 pfm_handle_work(void)
 {
 	pfm_context_t *ctx;
 	struct pt_regs *regs;
-	unsigned long flags;
+	unsigned long flags, dummy_flags;
 	unsigned long ovfl_regs;
 	unsigned int reason;
 	int ret;
@@ -5021,7 +5051,7 @@ pfm_handle_work(void)
 
 	pfm_clear_task_notify();
 
-	regs = ia64_task_regs(current);
+	regs = task_pt_regs(current);
 
 	/*
 	 * extract reason for being here and clear
@@ -5041,18 +5071,15 @@ pfm_handle_work(void)
 	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
 	if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
 
+	/*
+	 * restore interrupt mask to what it was on entry.
+	 * Could be enabled/diasbled.
+	 */
 	UNPROTECT_CTX(ctx, flags);
 
-	 /*
-	  * pfm_handle_work() is currently called with interrupts disabled.
-	  * The down_interruptible call may sleep, therefore we
-	  * must re-enable interrupts to avoid deadlocks. It is
-	  * safe to do so because this function is called ONLY
-	  * when returning to user level (PUStk=1), in which case
-	  * there is no risk of kernel stack overflow due to deep
-	  * interrupt nesting.
-	  */
-	BUG_ON(flags & IA64_PSR_I);
+	/*
+	 * force interrupt enable because of down_interruptible()
+	 */
 	local_irq_enable();
 
 	DPRINT(("before block sleeping\n"));
@@ -5061,17 +5088,17 @@ pfm_handle_work(void)
 	 * may go through without blocking on SMP systems
 	 * if restart has been received already by the time we call down()
 	 */
-	ret = down_interruptible(&ctx->ctx_restart_sem);
+	ret = wait_for_completion_interruptible(&ctx->ctx_restart_done);
 
 	DPRINT(("after block sleeping ret=%d\n", ret));
 
 	/*
-	 * disable interrupts to restore state we had upon entering
-	 * this function
+	 * lock context and mask interrupts again
+	 * We save flags into a dummy because we may have
+	 * altered interrupts mask compared to entry in this
+	 * function.
 	 */
-	local_irq_disable();
-
-	PROTECT_CTX(ctx, flags);
+	PROTECT_CTX(ctx, dummy_flags);
 
 	/*
 	 * we need to read the ovfl_regs only after wake-up
@@ -5097,7 +5124,9 @@ skip_blocking:
 	ctx->ctx_ovfl_regs[0] = 0UL;
 
 nothing_to_do:
-
+	/*
+	 * restore flags as they were upon entry
+	 */
 	UNPROTECT_CTX(ctx, flags);
 }
 
@@ -5529,34 +5558,41 @@ report_spurious2:
 }
 
 static irqreturn_t
-pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+pfm_interrupt_handler(int irq, void *arg)
 {
 	unsigned long start_cycles, total_cycles;
 	unsigned long min, max;
 	int this_cpu;
 	int ret;
+	struct pt_regs *regs = get_irq_regs();
 
 	this_cpu = get_cpu();
-	min      = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
-	max      = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
+	if (likely(!pfm_alt_intr_handler)) {
+		min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
+		max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
 
-	start_cycles = ia64_get_itc();
+		start_cycles = ia64_get_itc();
 
-	ret = pfm_do_interrupt_handler(irq, arg, regs);
+		ret = pfm_do_interrupt_handler(irq, arg, regs);
 
-	total_cycles = ia64_get_itc();
+		total_cycles = ia64_get_itc();
 
-	/*
-	 * don't measure spurious interrupts
-	 */
-	if (likely(ret == 0)) {
-		total_cycles -= start_cycles;
+		/*
+		 * don't measure spurious interrupts
+		 */
+		if (likely(ret == 0)) {
+			total_cycles -= start_cycles;
 
-		if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
-		if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
+			if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
+			if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
 
-		pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
+			pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
+		}
+	}
+	else {
+		(*pfm_alt_intr_handler->handler)(irq, arg, regs);
 	}
+
 	put_cpu_no_resched();
 	return IRQ_HANDLED;
 }
@@ -5760,7 +5796,7 @@ pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_c
 	 * on every CPU, so we can rely on the pid to identify the idle task.
 	 */
 	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
-		regs = ia64_task_regs(task);
+		regs = task_pt_regs(task);
 		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
 		return;
 	}
@@ -5826,14 +5862,12 @@ void
 pfm_save_regs(struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long flags;
 	u64 psr;
 
 
 	ctx = PFM_GET_CTX(task);
 	if (ctx == NULL) return;
-	t = &task->thread;
 
 	/*
  	 * we always come here with interrupts ALREADY disabled by
@@ -5843,7 +5877,7 @@ pfm_save_regs(struct task_struct *task)
 	flags = pfm_protect_ctx_ctxsw(ctx);
 
 	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
-		struct pt_regs *regs = ia64_task_regs(task);
+		struct pt_regs *regs = task_pt_regs(task);
 
 		pfm_clear_psr_up();
 
@@ -5891,19 +5925,19 @@ pfm_save_regs(struct task_struct *task)
 	 * guarantee we will be schedule at that same
 	 * CPU again.
 	 */
-	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+	pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
 
 	/*
 	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
 	 * we will need it on the restore path to check
 	 * for pending overflow.
 	 */
-	t->pmcs[0] = ia64_get_pmc(0);
+	ctx->th_pmcs[0] = ia64_get_pmc(0);
 
 	/*
 	 * unfreeze PMU if had pending overflows
 	 */
-	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+	if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
 
 	/*
 	 * finally, allow context access.
@@ -5948,7 +5982,6 @@ static void
 pfm_lazy_save_regs (struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long flags;
 
 	{ u64 psr  = pfm_get_psr();
@@ -5956,7 +5989,6 @@ pfm_lazy_save_regs (struct task_struct *task)
 	}
 
 	ctx = PFM_GET_CTX(task);
-	t   = &task->thread;
 
 	/*
 	 * we need to mask PMU overflow here to
@@ -5981,19 +6013,19 @@ pfm_lazy_save_regs (struct task_struct *task)
 	/*
 	 * save all the pmds we use
 	 */
-	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+	pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
 
 	/*
 	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
 	 * it is needed to check for pended overflow
 	 * on the restore path
 	 */
-	t->pmcs[0] = ia64_get_pmc(0);
+	ctx->th_pmcs[0] = ia64_get_pmc(0);
 
 	/*
 	 * unfreeze PMU if had pending overflows
 	 */
-	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+	if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
 
 	/*
 	 * now get can unmask PMU interrupts, they will
@@ -6012,7 +6044,6 @@ void
 pfm_load_regs (struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
 	unsigned long flags;
 	u64 psr, psr_up;
@@ -6023,11 +6054,10 @@ pfm_load_regs (struct task_struct *task)
 
 	BUG_ON(GET_PMU_OWNER());
 
-	t     = &task->thread;
 	/*
 	 * possible on unload
 	 */
-	if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
+	if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return;
 
 	/*
  	 * we always come here with interrupts ALREADY disabled by
@@ -6043,7 +6073,7 @@ pfm_load_regs (struct task_struct *task)
 	BUG_ON(psr & IA64_PSR_I);
 
 	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
-		struct pt_regs *regs = ia64_task_regs(task);
+		struct pt_regs *regs = task_pt_regs(task);
 
 		BUG_ON(ctx->ctx_smpl_hdr);
 
@@ -6109,26 +6139,26 @@ pfm_load_regs (struct task_struct *task)
 	 *
 	 * XXX: optimize here
 	 */
-	if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
-	if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
+	if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+	if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
 
 	/*
 	 * check for pending overflow at the time the state
 	 * was saved.
 	 */
-	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+	if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
 		/*
 		 * reload pmc0 with the overflow information
 		 * On McKinley PMU, this will trigger a PMU interrupt
 		 */
-		ia64_set_pmc(0, t->pmcs[0]);
+		ia64_set_pmc(0, ctx->th_pmcs[0]);
 		ia64_srlz_d();
-		t->pmcs[0] = 0UL;
+		ctx->th_pmcs[0] = 0UL;
 
 		/*
 		 * will replay the PMU interrupt
 		 */
-		if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+		if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
 
 		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
 	}
@@ -6176,7 +6206,6 @@ pfm_load_regs (struct task_struct *task)
 void
 pfm_load_regs (struct task_struct *task)
 {
-	struct thread_struct *t;
 	pfm_context_t *ctx;
 	struct task_struct *owner;
 	unsigned long pmd_mask, pmc_mask;
@@ -6185,7 +6214,6 @@ pfm_load_regs (struct task_struct *task)
 
 	owner = GET_PMU_OWNER();
 	ctx   = PFM_GET_CTX(task);
-	t     = &task->thread;
 	psr   = pfm_get_psr();
 
 	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
@@ -6248,27 +6276,27 @@ pfm_load_regs (struct task_struct *task)
 	 */
 	pmc_mask = ctx->ctx_all_pmcs[0];
 
-	pfm_restore_pmds(t->pmds, pmd_mask);
-	pfm_restore_pmcs(t->pmcs, pmc_mask);
+	pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+	pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
 
 	/*
 	 * check for pending overflow at the time the state
 	 * was saved.
 	 */
-	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+	if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
 		/*
 		 * reload pmc0 with the overflow information
 		 * On McKinley PMU, this will trigger a PMU interrupt
 		 */
-		ia64_set_pmc(0, t->pmcs[0]);
+		ia64_set_pmc(0, ctx->th_pmcs[0]);
 		ia64_srlz_d();
 
-		t->pmcs[0] = 0UL;
+		ctx->th_pmcs[0] = 0UL;
 
 		/*
 		 * will replay the PMU interrupt
 		 */
-		if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+		if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
 
 		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
 	}
@@ -6338,11 +6366,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 		 */
 		pfm_unfreeze_pmu();
 	} else {
-		pmc0 = task->thread.pmcs[0];
+		pmc0 = ctx->th_pmcs[0];
 		/*
 		 * clear whatever overflow status bits there were
 		 */
-		task->thread.pmcs[0] = 0;
+		ctx->th_pmcs[0] = 0;
 	}
 	ovfl_val = pmu_conf->ovfl_val;
 	/*
@@ -6363,7 +6391,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 		/*
 		 * can access PMU always true in system wide mode
 		 */
-		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
+		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i];
 
 		if (PMD_IS_COUNTING(i)) {
 			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
@@ -6395,7 +6423,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 
 		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
 
-		if (is_self) task->thread.pmds[i] = pmd_val;
+		if (is_self) ctx->th_pmds[i] = pmd_val;
 
 		ctx->ctx_pmds[i].val = val;
 	}
@@ -6403,10 +6431,145 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 
 static struct irqaction perfmon_irqaction = {
 	.handler = pfm_interrupt_handler,
-	.flags   = SA_INTERRUPT,
+	.flags   = IRQF_DISABLED,
 	.name    = "perfmon"
 };
 
+static void
+pfm_alt_save_pmu_state(void *data)
+{
+	struct pt_regs *regs;
+
+	regs = task_pt_regs(current);
+
+	DPRINT(("called\n"));
+
+	/*
+	 * should not be necessary but
+	 * let's take not risk
+	 */
+	pfm_clear_psr_up();
+	pfm_clear_psr_pp();
+	ia64_psr(regs)->pp = 0;
+
+	/*
+	 * This call is required
+	 * May cause a spurious interrupt on some processors
+	 */
+	pfm_freeze_pmu();
+
+	ia64_srlz_d();
+}
+
+void
+pfm_alt_restore_pmu_state(void *data)
+{
+	struct pt_regs *regs;
+
+	regs = task_pt_regs(current);
+
+	DPRINT(("called\n"));
+
+	/*
+	 * put PMU back in state expected
+	 * by perfmon
+	 */
+	pfm_clear_psr_up();
+	pfm_clear_psr_pp();
+	ia64_psr(regs)->pp = 0;
+
+	/*
+	 * perfmon runs with PMU unfrozen at all times
+	 */
+	pfm_unfreeze_pmu();
+
+	ia64_srlz_d();
+}
+
+int
+pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
+{
+	int ret, i;
+	int reserve_cpu;
+
+	/* some sanity checks */
+	if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
+
+	/* do the easy test first */
+	if (pfm_alt_intr_handler) return -EBUSY;
+
+	/* one at a time in the install or remove, just fail the others */
+	if (!spin_trylock(&pfm_alt_install_check)) {
+		return -EBUSY;
+	}
+
+	/* reserve our session */
+	for_each_online_cpu(reserve_cpu) {
+		ret = pfm_reserve_session(NULL, 1, reserve_cpu);
+		if (ret) goto cleanup_reserve;
+	}
+
+	/* save the current system wide pmu states */
+	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
+	if (ret) {
+		DPRINT(("on_each_cpu() failed: %d\n", ret));
+		goto cleanup_reserve;
+	}
+
+	/* officially change to the alternate interrupt handler */
+	pfm_alt_intr_handler = hdl;
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return 0;
+
+cleanup_reserve:
+	for_each_online_cpu(i) {
+		/* don't unreserve more than we reserved */
+		if (i >= reserve_cpu) break;
+
+		pfm_unreserve_session(NULL, 1, i);
+	}
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt);
+
+int
+pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
+{
+	int i;
+	int ret;
+
+	if (hdl == NULL) return -EINVAL;
+
+	/* cannot remove someone else's handler! */
+	if (pfm_alt_intr_handler != hdl) return -EINVAL;
+
+	/* one at a time in the install or remove, just fail the others */
+	if (!spin_trylock(&pfm_alt_install_check)) {
+		return -EBUSY;
+	}
+
+	pfm_alt_intr_handler = NULL;
+
+	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
+	if (ret) {
+		DPRINT(("on_each_cpu() failed: %d\n", ret));
+	}
+
+	for_each_online_cpu(i) {
+		pfm_unreserve_session(NULL, 1, i);
+	}
+
+	spin_unlock(&pfm_alt_install_check);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt);
+
 /*
  * perfmon initialization routine, called from the initcall() table
  */
@@ -6504,7 +6667,7 @@ pfm_init(void)
 	       ffz(pmu_conf->ovfl_val));
 
 	/* sanity check */
-	if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
+	if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) {
 		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
 		pmu_conf = NULL;
 		return -1;
@@ -6550,6 +6713,7 @@ __initcall(pfm_init);
 void
 pfm_init_percpu (void)
 {
+	static int first_time=1;
 	/*
 	 * make sure no measurement is active
 	 * (may inherit programmed PMCs from EFI).
@@ -6562,8 +6726,10 @@ pfm_init_percpu (void)
 	 */
 	pfm_unfreeze_pmu();
 
-	if (smp_processor_id() == 0)
+	if (first_time) {
 		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+		first_time=0;
+	}
 
 	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
 	ia64_srlz_d();
@@ -6576,7 +6742,6 @@ void
 dump_pmu_state(const char *from)
 {
 	struct task_struct *task;
-	struct thread_struct *t;
 	struct pt_regs *regs;
 	pfm_context_t *ctx;
 	unsigned long psr, dcr, info, flags;
@@ -6585,7 +6750,7 @@ dump_pmu_state(const char *from)
 	local_irq_save(flags);
 
 	this_cpu = smp_processor_id();
-	regs     = ia64_task_regs(current);
+	regs     = task_pt_regs(current);
 	info     = PFM_CPUINFO_GET();
 	dcr      = ia64_getreg(_IA64_REG_CR_DCR);
 
@@ -6621,16 +6786,14 @@ dump_pmu_state(const char *from)
 	ia64_psr(regs)->up = 0;
 	ia64_psr(regs)->pp = 0;
 
-	t = &current->thread;
-
 	for (i=1; PMC_IS_LAST(i) == 0; i++) {
 		if (PMC_IS_IMPL(i) == 0) continue;
-		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
+		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]);
 	}
 
 	for (i=1; PMD_IS_LAST(i) == 0; i++) {
 		if (PMD_IS_IMPL(i) == 0) continue;
-		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
+		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]);
 	}
 
 	if (ctx) {