#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/version.h>
+#include <linux/vs_memory.h>
+#include <linux/vs_cvirt.h>
#include <asm/bitops.h>
#include <asm/errno.h>
unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */
int ctx_fd; /* file descriptor used my this context */
+ pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */
pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */
void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */
mntput(pfmfs_mnt);
}
-static loff_t
-pfm_lseek(struct file *file, loff_t offset, int whence)
-{
- DPRINT(("pfm_lseek called\n"));
- return -ESPIPE;
-}
-
static ssize_t
-pfm_read(struct file *filp, char *buf, size_t size, loff_t *ppos)
+pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
{
pfm_context_t *ctx;
pfm_msg_t *msg;
DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
return -EINVAL;
}
- /*
- * seeks are not allowed on message queues
- */
- if (ppos != &filp->f_pos) return -ESPIPE;
PROTECT_CTX(ctx, flags);
}
static ssize_t
-pfm_write(struct file *file, const char *ubuf,
+pfm_write(struct file *file, const char __user *ubuf,
size_t size, loff_t *ppos)
{
DPRINT(("pfm_write called\n"));
pfm_fasync(int fd, struct file *filp, int on)
{
pfm_context_t *ctx;
- unsigned long flags;
int ret;
if (PFM_IS_FILE(filp) == 0) {
printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
return -EBADF;
}
-
-
- PROTECT_CTX(ctx, flags);
-
+ /*
+ * we cannot mask interrupts during this call because this may
+ * may go to sleep if memory is not readily avalaible.
+ *
+ * We are protected from the conetxt disappearing by the get_fd()/put_fd()
+ * done in caller. Serialization of this function is ensured by caller.
+ */
ret = pfm_do_fasync(fd, filp, ctx, on);
+
DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
fd,
on,
ctx->ctx_async_queue, ret));
- UNPROTECT_CTX(ctx, flags);
-
return ret;
}
static struct file_operations pfm_file_ops = {
- .llseek = pfm_lseek,
+ .llseek = no_llseek,
.read = pfm_read,
.write = pfm_write,
.poll = pfm_poll,
static void
pfm_free_fd(int fd, struct file *file)
{
+ struct files_struct *files = current->files;
+
+ /*
+ * there ie no fd_uninstall(), so we do it here
+ */
+ spin_lock(&files->file_lock);
+ files->fd[fd] = NULL;
+ spin_unlock(&files->file_lock);
+
if (file) put_filp(file);
put_unused_fd(fd);
}
* if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
* return -ENOMEM;
*/
- if (size > task->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN;
+ if (size > task->rlim[RLIMIT_MEMLOCK].rlim_cur) return -ENOMEM;
/*
* We do the easy to undo allocations first.
// mm->total_vm += size >> PAGE_SHIFT;
vx_vmpages_add(mm, size >> PAGE_SHIFT);
-
+ vm_stat_account(vma);
up_write(&task->mm->mmap_sem);
/*
*/
if (task == current) return 0;
- if (task->state != TASK_STOPPED) {
+ if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
return -EBUSY;
}
ctx = pfm_context_alloc();
if (!ctx) goto error;
- req->ctx_fd = ctx->ctx_fd = pfm_alloc_fd(&filp);
- if (req->ctx_fd < 0) goto error_file;
+ ret = pfm_alloc_fd(&filp);
+ if (ret < 0) goto error_file;
+
+ req->ctx_fd = ctx->ctx_fd = ret;
/*
* attach context to file
state = ctx->ctx_state;
is_system = ctx->ctx_fl_system;
- if (state != PFM_CTX_LOADED && state != PFM_CTX_MASKED) return -EINVAL;
+ /*
+ * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE)
+ */
+ if (state == PFM_CTX_UNLOADED) return -EINVAL;
/*
* In system wide and when the context is loaded, access can only happen
return 0;
}
-static void
-pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
-{
- struct task_struct *task = ctx->ctx_task;
-
- ia64_psr(regs)->up = 0;
- ia64_psr(regs)->sp = 1;
-
- if (GET_PMU_OWNER() == task) {
- DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
- SET_PMU_OWNER(NULL, NULL);
- }
-
- /*
- * disconnect the task from the context and vice-versa
- */
- PFM_SET_WORK_PENDING(task, 0);
-
- task->thread.pfm_context = NULL;
- task->thread.flags &= ~IA64_THREAD_PM_VALID;
-
- DPRINT(("force cleanupf for [%d]\n", task->pid));
-}
-
-
/*
* called only from exit_thread(): task == current
pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
{
struct task_struct *task;
- int state;
+ int state, old_state;
+recheck:
state = ctx->ctx_state;
+ task = ctx->ctx_task;
- task = PFM_CTX_TASK(ctx);
if (task == NULL) {
DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
return 0;
}
DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
- ctx->ctx_fd,
- state,
- task->pid,
- task->state, PFM_CMD_STOPPED(cmd)));
+ ctx->ctx_fd,
+ state,
+ task->pid,
+ task->state, PFM_CMD_STOPPED(cmd)));
/*
* self-monitoring always ok.
if (task == current || ctx->ctx_fl_system) return 0;
/*
- * context is UNLOADED, MASKED we are safe to go
+ * if context is UNLOADED we are safe to go
*/
- if (state != PFM_CTX_LOADED) return 0;
+ if (state == PFM_CTX_UNLOADED) return 0;
- if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+ /*
+ * no command can operate on a zombie context
+ */
+ if (state == PFM_CTX_ZOMBIE) {
+ DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
+ return -EINVAL;
+ }
/*
- * context is loaded, we must make sure the task is stopped
+ * context is LOADED or MASKED. Some commands may need to have
+ * the task stopped.
+ *
* We could lift this restriction for UP but it would mean that
* the user has no guarantee the task would not run between
* two successive calls to perfmonctl(). That's probably OK.
* If this user wants to ensure the task does not run, then
* the task must be stopped.
*/
- if (PFM_CMD_STOPPED(cmd) && task->state != TASK_STOPPED) {
- DPRINT(("[%d] task not in stopped state\n", task->pid));
- return -EBUSY;
- }
+ if (PFM_CMD_STOPPED(cmd)) {
+ if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
+ DPRINT(("[%d] task not in stopped state\n", task->pid));
+ return -EBUSY;
+ }
+ /*
+ * task is now stopped, wait for ctxsw out
+ *
+ * This is an interesting point in the code.
+ * We need to unprotect the context because
+ * the pfm_save_regs() routines needs to grab
+ * the same lock. There are danger in doing
+ * this because it leaves a window open for
+ * another task to get access to the context
+ * and possibly change its state. The one thing
+ * that is not possible is for the context to disappear
+ * because we are protected by the VFS layer, i.e.,
+ * get_fd()/put_fd().
+ */
+ old_state = state;
- UNPROTECT_CTX(ctx, flags);
+ UNPROTECT_CTX(ctx, flags);
- wait_task_inactive(task);
+ wait_task_inactive(task);
- PROTECT_CTX(ctx, flags);
+ PROTECT_CTX(ctx, flags);
+ /*
+ * we must recheck to verify if state has changed
+ */
+ if (ctx->ctx_state != old_state) {
+ DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
+ goto recheck;
+ }
+ }
return 0;
}
* system-call entry point (must return long)
*/
asmlinkage long
-sys_perfmonctl (int fd, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
+sys_perfmonctl (int fd, int cmd, void __user *arg, int count, long arg5, long arg6, long arg7,
long arg8, long stack)
{
struct pt_regs *regs = (struct pt_regs *)&stack;
static void
pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
{
- pfm_ovfl_arg_t ovfl_arg;
+ pfm_ovfl_arg_t *ovfl_arg;
unsigned long mask;
unsigned long old_val, ovfl_val, new_val;
unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
int j, k, ret = 0;
int this_cpu = smp_processor_id();
- pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
+ pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
+ ovfl_arg = &ctx->ctx_ovfl_arg;
prefetch(ctx->ctx_smpl_hdr);
if ((pmd_mask & 0x1) == 0) continue;
- ovfl_arg.ovfl_pmd = (unsigned char )i;
- ovfl_arg.ovfl_notify = ovfl_notify & mask ? 1 : 0;
- ovfl_arg.active_set = 0;
- ovfl_arg.ovfl_ctrl.val = 0; /* module must fill in all fields */
- ovfl_arg.smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
+ ovfl_arg->ovfl_pmd = (unsigned char )i;
+ ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0;
+ ovfl_arg->active_set = 0;
+ ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
+ ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
- ovfl_arg.pmd_value = ctx->ctx_pmds[i].val;
- ovfl_arg.pmd_last_reset = ctx->ctx_pmds[i].lval;
- ovfl_arg.pmd_eventid = ctx->ctx_pmds[i].eventid;
+ ovfl_arg->pmd_value = ctx->ctx_pmds[i].val;
+ ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
+ ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid;
/*
* copy values of pmds of interest. Sampling format may copy them
if (smpl_pmds) {
for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
if ((smpl_pmds & 0x1) == 0) continue;
- ovfl_arg.smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
- DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg.smpl_pmds_values[k-1]));
+ ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
+ DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
}
}
/*
* call custom buffer format record (handler) routine
*/
- ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, &ovfl_arg, regs, tstamp);
+ ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
end_cycles = ia64_get_itc();
* For those controls, we take the union because they have
* an all or nothing behavior.
*/
- ovfl_ctrl.bits.notify_user |= ovfl_arg.ovfl_ctrl.bits.notify_user;
- ovfl_ctrl.bits.block_task |= ovfl_arg.ovfl_ctrl.bits.block_task;
- ovfl_ctrl.bits.mask_monitoring |= ovfl_arg.ovfl_ctrl.bits.mask_monitoring;
+ ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user;
+ ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task;
+ ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
/*
* build the bitmask of pmds to reset now
*/
- if (ovfl_arg.ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
+ if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
}
}
#ifdef CONFIG_SMP
+
+static void
+pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
+{
+ struct task_struct *task = ctx->ctx_task;
+
+ ia64_psr(regs)->up = 0;
+ ia64_psr(regs)->sp = 1;
+
+ if (GET_PMU_OWNER() == task) {
+ DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
+ SET_PMU_OWNER(NULL, NULL);
+ }
+
+ /*
+ * disconnect the task from the context and vice-versa
+ */
+ PFM_SET_WORK_PENDING(task, 0);
+
+ task->thread.pfm_context = NULL;
+ task->thread.flags &= ~IA64_THREAD_PM_VALID;
+
+ DPRINT(("force cleanup for [%d]\n", task->pid));
+}
+
+
/*
* in 2.6, interrupts are masked when we come here and the runqueue lock is held
*/
*/
is_self = ctx->ctx_task == task ? 1 : 0;
-#ifdef CONFIG_SMP
- if (task == current) {
-#else
/*
- * in UP, the state can still be in the registers
+ * can access PMU is task is the owner of the PMU state on the current CPU
+ * or if we are running on the CPU bound to the context in system-wide mode
+ * (that is not necessarily the task the context is attached to in this mode).
+ * In system-wide we always have can_access_pmu true because a task running on an
+ * invalid processor is flagged earlier in the call stack (see pfm_stop).
*/
- if (task == current || GET_PMU_OWNER() == task) {
-#endif
- can_access_pmu = 1;
+ can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id());
+ if (can_access_pmu) {
/*
* Mark the PMU as not owned
* This will cause the interrupt handler to do nothing in case an overflow
* on.
*/
SET_PMU_OWNER(NULL, NULL);
+ DPRINT(("releasing ownership\n"));
/*
* read current overflow status: