VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / arch / ia64 / kernel / perfmon.c
index 6ea20f2..2f71b39 100644 (file)
@@ -311,6 +311,7 @@ typedef struct pfm_context {
        unsigned int            ctx_cpu;                /* cpu to which perfmon is applied (system wide) */
 
        int                     ctx_fd;                 /* file descriptor used my this context */
+       pfm_ovfl_arg_t          ctx_ovfl_arg;           /* argument to custom buffer format handler */
 
        pfm_buffer_fmt_t        *ctx_buf_fmt;           /* buffer format callbacks */
        void                    *ctx_smpl_hdr;          /* points to sampling buffer header kernel vaddr */
@@ -1511,13 +1512,6 @@ exit_pfm_fs(void)
        mntput(pfmfs_mnt);
 }
 
-static loff_t
-pfm_lseek(struct file *file, loff_t offset, int whence)
-{
-       DPRINT(("pfm_lseek called\n"));
-       return -ESPIPE;
-}
-
 static ssize_t
 pfm_read(struct file *filp, char *buf, size_t size, loff_t *ppos)
 {
@@ -1544,10 +1538,6 @@ pfm_read(struct file *filp, char *buf, size_t size, loff_t *ppos)
                DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
                return -EINVAL;
        }
-       /*
-        * seeks are not allowed on message queues
-        */
-       if (ppos != &filp->f_pos) return -ESPIPE;
 
        PROTECT_CTX(ctx, flags);
 
@@ -2140,7 +2130,7 @@ pfm_no_open(struct inode *irrelevant, struct file *dontcare)
 
 
 static struct file_operations pfm_file_ops = {
-       .llseek   = pfm_lseek,
+       .llseek   = no_llseek,
        .read     = pfm_read,
        .write    = pfm_write,
        .poll     = pfm_poll,
@@ -4585,31 +4575,6 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
        return 0;
 }
 
-static void
-pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
-{
-       struct task_struct *task = ctx->ctx_task;
-
-       ia64_psr(regs)->up = 0;
-       ia64_psr(regs)->sp = 1;
-
-       if (GET_PMU_OWNER() == task) {
-               DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
-               SET_PMU_OWNER(NULL, NULL);
-       }
-
-       /*
-        * disconnect the task from the context and vice-versa
-        */
-       PFM_SET_WORK_PENDING(task, 0);
-
-       task->thread.pfm_context  = NULL;
-       task->thread.flags       &= ~IA64_THREAD_PM_VALID;
-
-       DPRINT(("force cleanupf for [%d]\n",  task->pid));
-}
-
-
 
 /*
  * called only from exit_thread(): task == current
@@ -4727,21 +4692,22 @@ static int
 pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
 {
        struct task_struct *task;
-       int state;
+       int state, old_state;
 
+recheck:
        state = ctx->ctx_state;
+       task  = ctx->ctx_task;
 
-       task = PFM_CTX_TASK(ctx);
        if (task == NULL) {
                DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
                return 0;
        }
 
        DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
-                               ctx->ctx_fd,
-                               state,
-                               task->pid,
-                               task->state, PFM_CMD_STOPPED(cmd)));
+               ctx->ctx_fd,
+               state,
+               task->pid,
+               task->state, PFM_CMD_STOPPED(cmd)));
 
        /*
         * self-monitoring always ok.
@@ -4753,31 +4719,63 @@ pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
        if (task == current || ctx->ctx_fl_system) return 0;
 
        /*
-        * context is UNLOADED, MASKED we are safe to go
+        * if context is UNLOADED we are safe to go
         */
-       if (state != PFM_CTX_LOADED) return 0;
+       if (state == PFM_CTX_UNLOADED) return 0;
 
-       if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+       /*
+        * no command can operate on a zombie context
+        */
+       if (state == PFM_CTX_ZOMBIE) {
+               DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
+               return -EINVAL;
+       }
 
        /*
-        * context is loaded, we must make sure the task is stopped
+        * context is LOADED or MASKED. Some commands may need to have 
+        * the task stopped.
+        *
         * We could lift this restriction for UP but it would mean that
         * the user has no guarantee the task would not run between
         * two successive calls to perfmonctl(). That's probably OK.
         * If this user wants to ensure the task does not run, then
         * the task must be stopped.
         */
-       if (PFM_CMD_STOPPED(cmd) && task->state != TASK_STOPPED) {
-               DPRINT(("[%d] task not in stopped state\n", task->pid));
-               return -EBUSY;
-       }
+       if (PFM_CMD_STOPPED(cmd)) {
+               if (task->state != TASK_STOPPED) {
+                       DPRINT(("[%d] task not in stopped state\n", task->pid));
+                       return -EBUSY;
+               }
+               /*
+                * task is now stopped, wait for ctxsw out
+                *
+                * This is an interesting point in the code.
+                * We need to unprotect the context because
+                * the pfm_save_regs() routines needs to grab
+                * the same lock. There are danger in doing
+                * this because it leaves a window open for
+                * another task to get access to the context
+                * and possibly change its state. The one thing
+                * that is not possible is for the context to disappear
+                * because we are protected by the VFS layer, i.e.,
+                * get_fd()/put_fd().
+                */
+               old_state = state;
 
-       UNPROTECT_CTX(ctx, flags);
+               UNPROTECT_CTX(ctx, flags);
 
-       wait_task_inactive(task);
+               wait_task_inactive(task);
 
-       PROTECT_CTX(ctx, flags);
+               PROTECT_CTX(ctx, flags);
 
+               /*
+                * we must recheck to verify if state has changed
+                */
+               if (ctx->ctx_state != old_state) {
+                       DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
+                       goto recheck;
+               }
+       }
        return 0;
 }
 
@@ -5186,7 +5184,7 @@ pfm_end_notify_user(pfm_context_t *ctx)
 static void
 pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
 {
-       pfm_ovfl_arg_t ovfl_arg;
+       pfm_ovfl_arg_t *ovfl_arg;
        unsigned long mask;
        unsigned long old_val, ovfl_val, new_val;
        unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
@@ -5273,7 +5271,8 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
                int j, k, ret = 0;
                int this_cpu = smp_processor_id();
 
-               pmd_mask   = ovfl_pmds >> PMU_FIRST_COUNTER;
+               pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
+               ovfl_arg = &ctx->ctx_ovfl_arg;
 
                prefetch(ctx->ctx_smpl_hdr);
 
@@ -5283,15 +5282,15 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 
                        if ((pmd_mask & 0x1) == 0) continue;
 
-                       ovfl_arg.ovfl_pmd      = (unsigned char )i;
-                       ovfl_arg.ovfl_notify   = ovfl_notify & mask ? 1 : 0;
-                       ovfl_arg.active_set    = 0;
-                       ovfl_arg.ovfl_ctrl.val = 0; /* module must fill in all fields */
-                       ovfl_arg.smpl_pmds[0]  = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
+                       ovfl_arg->ovfl_pmd      = (unsigned char )i;
+                       ovfl_arg->ovfl_notify   = ovfl_notify & mask ? 1 : 0;
+                       ovfl_arg->active_set    = 0;
+                       ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
+                       ovfl_arg->smpl_pmds[0]  = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
 
-                       ovfl_arg.pmd_value      = ctx->ctx_pmds[i].val;
-                       ovfl_arg.pmd_last_reset = ctx->ctx_pmds[i].lval;
-                       ovfl_arg.pmd_eventid    = ctx->ctx_pmds[i].eventid;
+                       ovfl_arg->pmd_value      = ctx->ctx_pmds[i].val;
+                       ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
+                       ovfl_arg->pmd_eventid    = ctx->ctx_pmds[i].eventid;
 
                        /*
                         * copy values of pmds of interest. Sampling format may copy them
@@ -5300,8 +5299,8 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
                        if (smpl_pmds) {
                                for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
                                        if ((smpl_pmds & 0x1) == 0) continue;
-                                       ovfl_arg.smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ?  pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
-                                       DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg.smpl_pmds_values[k-1]));
+                                       ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ?  pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
+                                       DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
                                }
                        }
 
@@ -5312,7 +5311,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
                        /*
                         * call custom buffer format record (handler) routine
                         */
-                       ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, &ovfl_arg, regs, tstamp);
+                       ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
 
                        end_cycles = ia64_get_itc();
 
@@ -5320,13 +5319,13 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
                         * For those controls, we take the union because they have
                         * an all or nothing behavior.
                         */
-                       ovfl_ctrl.bits.notify_user     |= ovfl_arg.ovfl_ctrl.bits.notify_user;
-                       ovfl_ctrl.bits.block_task      |= ovfl_arg.ovfl_ctrl.bits.block_task;
-                       ovfl_ctrl.bits.mask_monitoring |= ovfl_arg.ovfl_ctrl.bits.mask_monitoring;
+                       ovfl_ctrl.bits.notify_user     |= ovfl_arg->ovfl_ctrl.bits.notify_user;
+                       ovfl_ctrl.bits.block_task      |= ovfl_arg->ovfl_ctrl.bits.block_task;
+                       ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
                        /*
                         * build the bitmask of pmds to reset now
                         */
-                       if (ovfl_arg.ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
+                       if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
 
                        pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
                }
@@ -5794,6 +5793,32 @@ pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_c
 }
 
 #ifdef CONFIG_SMP
+
+static void
+pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
+{
+       struct task_struct *task = ctx->ctx_task;
+
+       ia64_psr(regs)->up = 0;
+       ia64_psr(regs)->sp = 1;
+
+       if (GET_PMU_OWNER() == task) {
+               DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
+               SET_PMU_OWNER(NULL, NULL);
+       }
+
+       /*
+        * disconnect the task from the context and vice-versa
+        */
+       PFM_SET_WORK_PENDING(task, 0);
+
+       task->thread.pfm_context  = NULL;
+       task->thread.flags       &= ~IA64_THREAD_PM_VALID;
+
+       DPRINT(("force cleanup for [%d]\n",  task->pid));
+}
+
+
 /*
  * in 2.6, interrupts are masked when we come here and the runqueue lock is held
  */