fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / fs / proc / array.c
index 4a2cce7..2407c83 100644 (file)
@@ -52,7 +52,6 @@
  *                      :  base.c too.
  */
 
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
 #include <linux/times.h>
-#include <linux/ninline.h>
+#include <linux/cpuset.h>
+#include <linux/tracehook.h>
+#include <linux/rcupdate.h>
+#include <linux/delayacct.h>
+#include <linux/vs_context.h>
+#include <linux/vs_network.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/processor.h>
+#include "internal.h"
 
 /* Gcc optimizes away "strlen(x)" for constant x */
 #define ADDBUF(buffer, string) \
@@ -89,10 +94,13 @@ static inline char * task_name(struct task_struct *p, char * buf)
 {
        int i;
        char * name;
+       char tcomm[sizeof(p->comm)];
+
+       get_task_comm(tcomm, p);
 
        ADDBUF(buf, "Name:\t");
-       name = p->comm;
-       i = sizeof(p->comm);
+       name = tcomm;
+       i = sizeof(tcomm);
        do {
                unsigned char c = *name;
                name++;
@@ -128,19 +136,22 @@ static const char *task_state_array[] = {
        "S (sleeping)",         /*  1 */
        "D (disk sleep)",       /*  2 */
        "T (stopped)",          /*  4 */
-       "Z (zombie)",           /*  8 */
-       "X (dead)",             /* 16 */
-       "H (on hold)"           /* 32 */
+       "T (tracing stop)",     /*  8 */
+       "H (on hold)",          /* 16 */
+       "Z (zombie)",           /* 32 */
+       "X (dead)",             /* 64 */
 };
 
 static inline const char * get_task_state(struct task_struct *tsk)
 {
-       unsigned int state = tsk->state & (TASK_RUNNING |
-                                          TASK_INTERRUPTIBLE |
-                                          TASK_UNINTERRUPTIBLE |
-                                          TASK_ZOMBIE |
-                                          TASK_STOPPED |
-                                          TASK_ONHOLD);
+       unsigned int state = (tsk->state & (TASK_RUNNING |
+                                           TASK_INTERRUPTIBLE |
+                                           TASK_UNINTERRUPTIBLE |
+                                           TASK_STOPPED |
+                                          TASK_TRACED |
+                                          TASK_ONHOLD)) |
+                       (tsk->exit_state & (EXIT_ZOMBIE |
+                                           EXIT_DEAD));
        const char **p = &task_state_array[0];
 
        while (state) {
@@ -152,11 +163,20 @@ static inline const char * get_task_state(struct task_struct *tsk)
 
 static inline char * task_state(struct task_struct *p, char *buffer)
 {
+       struct task_struct *tracer;
+       pid_t pid, ptgid, tracer_pid, tgid;
+       struct group_info *group_info;
        int g;
-       pid_t ppid;
+       struct fdtable *fdt = NULL;
+
+       rcu_read_lock();
+       tracer = tracehook_tracer_task(p);
+       tracer_pid = tracer == NULL ? 0 : tracer->pid;
+       tgid = vx_map_tgid(p->tgid);
+       pid = vx_map_pid(p->pid);
+       ptgid = vx_map_pid(pid_alive(p) ?
+               rcu_dereference(p->parent)->tgid : 0);
 
-       read_lock(&tasklist_lock);
-       ppid = vx_map_tgid(current->vx_info, p->real_parent->pid);
        buffer += sprintf(buffer,
                "State:\t%s\n"
                "SleepAVG:\t%lu%%\n"
@@ -168,23 +188,27 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
                (p->sleep_avg/1024)*100/(1020000000/1024),
-               p->tgid,
-               p->pid, p->pid ? ppid : 0,
-               p->pid && p->ptrace ? p->parent->pid : 0,
+               tgid, pid, (pid > 1) ? ptgid : 0,
+               tracer_pid,
                p->uid, p->euid, p->suid, p->fsuid,
                p->gid, p->egid, p->sgid, p->fsgid);
-       read_unlock(&tasklist_lock);
+
        task_lock(p);
+       if (p->files)
+               fdt = files_fdtable(p->files);
        buffer += sprintf(buffer,
                "FDSize:\t%d\n"
                "Groups:\t",
-               p->files ? p->files->max_fds : 0);
+               fdt ? fdt->max_fds : 0);
+       rcu_read_unlock();
+
+       group_info = p->group_info;
+       get_group_info(group_info);
        task_unlock(p);
 
-       get_group_info(p->group_info);
-       for (g = 0; g < min(p->group_info->ngroups,NGROUPS_SMALL); g++)
-               buffer += sprintf(buffer, "%d ", GROUP_AT(p->group_info,g));
-       put_group_info(p->group_info);
+       for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++)
+               buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g));
+       put_group_info(group_info);
 
        buffer += sprintf(buffer, "\n");
        return buffer;
@@ -232,8 +256,11 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
 
 static inline char * task_sig(struct task_struct *p, char *buffer)
 {
+       unsigned long flags;
        sigset_t pending, shpending, blocked, ignored, caught;
        int num_threads = 0;
+       unsigned long qsize = 0;
+       unsigned long qlim = 0;
 
        sigemptyset(&pending);
        sigemptyset(&shpending);
@@ -241,20 +268,21 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
        sigemptyset(&ignored);
        sigemptyset(&caught);
 
-       /* Gather all the data with the appropriate locks held */
-       read_lock(&tasklist_lock);
-       if (p->sighand) {
-               spin_lock_irq(&p->sighand->siglock);
+       rcu_read_lock();
+       if (lock_task_sighand(p, &flags)) {
                pending = p->pending.signal;
                shpending = p->signal->shared_pending.signal;
                blocked = p->blocked;
                collect_sigign_sigcatch(p, &ignored, &caught);
                num_threads = atomic_read(&p->signal->count);
-               spin_unlock_irq(&p->sighand->siglock);
+               qsize = atomic_read(&p->user->sigpending);
+               qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
+               unlock_task_sighand(p, &flags);
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
 
        buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
+       buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
 
        /* render them all */
        buffer = render_sigset_t("SigPnd:\t", &pending, buffer);
@@ -268,20 +296,24 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
 
 static inline char *task_cap(struct task_struct *p, char *buffer)
 {
-    return buffer + sprintf(buffer, "CapInh:\t%016x\n"
-                           "CapPrm:\t%016x\n"
-                           "CapEff:\t%016x\n",
-                           cap_t(p->cap_inheritable),
-                           cap_t(p->cap_permitted),
-                           cap_t(p->cap_effective));
+       struct vx_info *vxi = p->vx_info;
+
+       return buffer + sprintf(buffer,
+               "CapInh:\t%016x\n"
+               "CapPrm:\t%016x\n"
+               "CapEff:\t%016x\n",
+               (unsigned)vx_info_mbcap(vxi, p->cap_inheritable),
+               (unsigned)vx_info_mbcap(vxi, p->cap_permitted),
+               (unsigned)vx_info_mbcap(vxi, p->cap_effective));
 }
 
-extern char *task_mem(struct mm_struct *, char *);
 int proc_pid_status(struct task_struct *task, char * buffer)
 {
        char * orig = buffer;
-#ifdef CONFIG_VSERVER_LEGACY           
+#ifdef CONFIG_VSERVER_LEGACY
        struct vx_info *vxi;
+#endif
+#ifdef CONFIG_VSERVER_LEGACYNET
        struct nx_info *nxi;
 #endif
        struct mm_struct *mm = get_task_mm(task);
@@ -295,13 +327,16 @@ int proc_pid_status(struct task_struct *task, char * buffer)
        }
        buffer = task_sig(task, buffer);
        buffer = task_cap(task, buffer);
+       buffer = cpuset_task_status_allowed(task, buffer);
 
-#ifdef CONFIG_VSERVER_LEGACY           
+       if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
+               goto skip;
+#ifdef CONFIG_VSERVER_LEGACY
        buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task));
        vxi = task_get_vx_info(task);
        if (vxi) {
                buffer += sprintf (buffer,"ctxflags: %08llx\n"
-                       ,vxi->vx_flags);
+                       ,(unsigned long long)vxi->vx_flags);
                buffer += sprintf (buffer,"initpid: %d\n"
                        ,vxi->vx_initpid);
        } else {
@@ -309,6 +344,10 @@ int proc_pid_status(struct task_struct *task, char * buffer)
                buffer += sprintf (buffer,"initpid: none\n");
        }
        put_vx_info(vxi);
+#else
+       buffer += sprintf (buffer,"VxID: %d\n", vx_task_xid(task));
+#endif
+#ifdef CONFIG_VSERVER_LEGACYNET
        nxi = task_get_nx_info(task);
        if (nxi) {
                int i;
@@ -322,97 +361,139 @@ int proc_pid_status(struct task_struct *task, char * buffer)
                *buffer++ = '\n';
                buffer += sprintf (buffer,"ipv4root_bcast: %08x\n"
                        ,nxi->v4_bcast);
-               buffer += sprintf (buffer,"ipv4root_refcnt: %d\n"
-                       ,atomic_read(&nxi->nx_refcount));
        } else {
                buffer += sprintf (buffer,"ipv4root: 0\n");
                buffer += sprintf (buffer,"ipv4root_bcast: 0\n");
        }
        put_nx_info(nxi);
 #endif
-#if defined(CONFIG_ARCH_S390)
+skip:
+#if defined(CONFIG_S390)
        buffer = task_show_regs(task, buffer);
 #endif
        return buffer - orig;
 }
 
-extern unsigned long task_vsize(struct mm_struct *);
-int proc_pid_stat(struct task_struct *task, char * buffer)
+static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 {
-       unsigned long vsize, eip, esp, wchan;
+       unsigned long vsize, eip, esp, wchan = ~0UL;
        long priority, nice;
-       unsigned long long bias_jiffies;
        int tty_pgrp = -1, tty_nr = 0;
        sigset_t sigign, sigcatch;
        char state;
        int res;
-       pid_t ppid, pgid = -1, sid = -1;
+       pid_t pid = 0, ppid = 0, pgid = -1, sid = -1;
        int num_threads = 0;
        struct mm_struct *mm;
        unsigned long long start_time;
+       unsigned long cmin_flt = 0, cmaj_flt = 0;
+       unsigned long  min_flt = 0,  maj_flt = 0;
+       cputime_t cutime, cstime, utime, stime;
+       unsigned long rsslim = 0;
+       char tcomm[sizeof(task->comm)];
+       unsigned long flags;
 
        state = *get_task_state(task);
        vsize = eip = esp = 0;
-       bias_jiffies = INITIAL_JIFFIES;
-
-       task_lock(task);
-       if (__vx_task_flags(task, VXF_VIRT_UPTIME, 0)) {
-               bias_jiffies = task->vx_info->cvirt.bias_jiffies;
-               /* hmm, do we need that? */
-               if (bias_jiffies > task->start_time)
-                       bias_jiffies = task->start_time;
-       }
-
-       mm = task->mm;
-       if(mm)
-               mm = mmgrab(mm);
-       task_unlock(task);
+       mm = get_task_mm(task);
        if (mm) {
-               down_read(&mm->mmap_sem);
                vsize = task_vsize(mm);
                eip = KSTK_EIP(task);
                esp = KSTK_ESP(task);
-               up_read(&mm->mmap_sem);
        }
 
-       wchan = get_wchan(task);
+       get_task_comm(tcomm, task);
 
        sigemptyset(&sigign);
        sigemptyset(&sigcatch);
-       read_lock(&tasklist_lock);
-       if (task->sighand) {
-               spin_lock_irq(&task->sighand->siglock);
-               num_threads = atomic_read(&task->signal->count);
+       cutime = cstime = utime = stime = cputime_zero;
+
+       rcu_read_lock();
+       if (lock_task_sighand(task, &flags)) {
+               struct signal_struct *sig = task->signal;
+
+               if (sig->tty) {
+                       tty_pgrp = sig->tty->pgrp;
+                       tty_nr = new_encode_dev(tty_devnum(sig->tty));
+               }
+
+               num_threads = atomic_read(&sig->count);
                collect_sigign_sigcatch(task, &sigign, &sigcatch);
-               spin_unlock_irq(&task->sighand->siglock);
-       }
-       if (task->signal) {
-               if (task->signal->tty) {
-                       tty_pgrp = task->signal->tty->pgrp;
-                       tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
+
+               cmin_flt = sig->cmin_flt;
+               cmaj_flt = sig->cmaj_flt;
+               cutime = sig->cutime;
+               cstime = sig->cstime;
+               rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
+
+               /* add up live thread stats at the group level */
+               if (whole) {
+                       struct task_struct *t = task;
+                       do {
+                               min_flt += t->min_flt;
+                               maj_flt += t->maj_flt;
+                               utime = cputime_add(utime, t->utime);
+                               stime = cputime_add(stime, t->stime);
+                               t = next_thread(t);
+                       } while (t != task);
+
+                       min_flt += sig->min_flt;
+                       maj_flt += sig->maj_flt;
+                       utime = cputime_add(utime, sig->utime);
+                       stime = cputime_add(stime, sig->stime);
                }
-               pgid = process_group(task);
-               sid = task->signal->session;
+
+               sid = signal_session(sig);
+               pid = vx_info_map_pid(task->vx_info, task->pid);
+               pgid = vx_info_map_pid(task->vx_info, process_group(task));
+               ppid = (pid > 1) ? vx_info_map_tgid(task->vx_info,
+                       rcu_dereference(task->parent)->tgid) : 0;
+
+               unlock_task_sighand(task, &flags);
+       }
+       rcu_read_unlock();
+
+       if (!whole || num_threads<2) {
+               wchan = 0;
+               if (current->uid == task->uid || current->euid == task->uid ||
+                               capable(CAP_SYS_NICE))
+                       wchan = get_wchan(task);
+       }
+       if (!whole) {
+               min_flt = task->min_flt;
+               maj_flt = task->maj_flt;
+               utime = task->utime;
+               stime = task->stime;
        }
-       read_unlock(&tasklist_lock);
 
        /* scale priority and nice values from timeslices to -20..20 */
        /* to make it look like a "normal" Unix priority/nice value  */
        priority = task_prio(task);
        nice = task_nice(task);
 
-       read_lock(&tasklist_lock);
-       ppid = task->pid ? task->real_parent->pid : 0;
-       read_unlock(&tasklist_lock);
-
        /* Temporary variable needed for gcc-2.96 */
-       start_time = jiffies_64_to_clock_t(task->start_time - bias_jiffies);
+       /* convert timespec -> nsec*/
+       start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
+                               + task->start_time.tv_nsec;
+       /* convert nsec -> ticks */
+       start_time = nsec_to_clock_t(start_time);
+
+       /* fixup start time for virt uptime */
+       if (vx_flags(VXF_VIRT_UPTIME, 0)) {
+               unsigned long long bias =
+                       current->vx_info->cvirt.bias_clock;
+
+               if (start_time > bias)
+                       start_time -= bias;
+               else
+                       start_time = 0;
+       }
 
        res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
-%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
-               task->pid,
-               task->comm,
+%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n",
+               pid,
+               tcomm,
                state,
                ppid,
                pgid,
@@ -420,22 +501,21 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
                tty_nr,
                tty_pgrp,
                task->flags,
-               task->min_flt,
-               task->cmin_flt,
-               task->maj_flt,
-               task->cmaj_flt,
-               jiffies_to_clock_t(task->utime),
-               jiffies_to_clock_t(task->stime),
-               jiffies_to_clock_t(task->cutime),
-               jiffies_to_clock_t(task->cstime),
+               min_flt,
+               cmin_flt,
+               maj_flt,
+               cmaj_flt,
+               cputime_to_clock_t(utime),
+               cputime_to_clock_t(stime),
+               cputime_to_clock_t(cutime),
+               cputime_to_clock_t(cstime),
                priority,
                nice,
                num_threads,
-               jiffies_to_clock_t(task->it_real_value),
                start_time,
                vsize,
-               mm ? mm->rss : 0, /* you might want to shift this left 3 */
-               task->rlim[RLIMIT_RSS].rlim_cur,
+               mm ? get_mm_rss(mm) : 0,
+               rsslim,
                mm ? mm->start_code : 0,
                mm ? mm->end_code : 0,
                mm ? mm->start_stack : 0,
@@ -455,23 +535,30 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
                task->exit_signal,
                task_cpu(task),
                task->rt_priority,
-               task->policy);
+               task->policy,
+               (unsigned long long)delayacct_blkio_ticks(task));
        if(mm)
                mmput(mm);
        return res;
 }
 
-extern int task_statm(struct mm_struct *, int *, int *, int *, int *);
+int proc_tid_stat(struct task_struct *task, char * buffer)
+{
+       return do_task_stat(task, buffer, 0);
+}
+
+int proc_tgid_stat(struct task_struct *task, char * buffer)
+{
+       return do_task_stat(task, buffer, 1);
+}
+
 int proc_pid_statm(struct task_struct *task, char *buffer)
 {
        int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
        struct mm_struct *mm = get_task_mm(task);
        
        if (mm) {
-               down_read(&mm->mmap_sem);
                size = task_statm(mm, &shared, &text, &data, &resident);
-               up_read(&mm->mmap_sem);
-
                mmput(mm);
        }