Add changes from the Linux-2.6 tree.
[linux-2.6.git] / fs / proc / array.c
index 454c280..2407c83 100644 (file)
@@ -52,7 +52,6 @@
  *                      :  base.c too.
  */
 
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
 #include <linux/times.h>
-#include <linux/vs_base.h>
+#include <linux/cpuset.h>
+#include <linux/tracehook.h>
+#include <linux/rcupdate.h>
+#include <linux/delayacct.h>
 #include <linux/vs_context.h>
 #include <linux/vs_network.h>
-#include <linux/vs_cvirt.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
 #include <asm/processor.h>
+#include "internal.h"
 
 /* Gcc optimizes away "strlen(x)" for constant x */
 #define ADDBUF(buffer, string) \
@@ -135,9 +137,9 @@ static const char *task_state_array[] = {
        "D (disk sleep)",       /*  2 */
        "T (stopped)",          /*  4 */
        "T (tracing stop)",     /*  8 */
-       "Z (zombie)",           /* 16 */
-       "X (dead)",             /* 32 */
-       "H (on hold)"           /* 64 */
+       "H (on hold)",          /* 16 */
+       "Z (zombie)",           /* 32 */
+       "X (dead)",             /* 64 */
 };
 
 static inline const char * get_task_state(struct task_struct *tsk)
@@ -146,8 +148,8 @@ static inline const char * get_task_state(struct task_struct *tsk)
                                            TASK_INTERRUPTIBLE |
                                            TASK_UNINTERRUPTIBLE |
                                            TASK_STOPPED |
-                                           TASK_TRACED |
-                                           TASK_ONHOLD)) |
+                                          TASK_TRACED |
+                                          TASK_ONHOLD)) |
                        (tsk->exit_state & (EXIT_ZOMBIE |
                                            EXIT_DEAD));
        const char **p = &task_state_array[0];
@@ -161,15 +163,20 @@ static inline const char * get_task_state(struct task_struct *tsk)
 
 static inline char * task_state(struct task_struct *p, char *buffer)
 {
+       struct task_struct *tracer;
+       pid_t pid, ptgid, tracer_pid, tgid;
        struct group_info *group_info;
        int g;
-       pid_t pid, ppid, tppid, tgid;
+       struct fdtable *fdt = NULL;
 
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
+       tracer = tracehook_tracer_task(p);
+       tracer_pid = tracer == NULL ? 0 : tracer->pid;
        tgid = vx_map_tgid(p->tgid);
        pid = vx_map_pid(p->pid);
-       ppid = vx_map_pid(p->real_parent->pid);
-       tppid = vx_map_pid(p->parent->pid);
+       ptgid = vx_map_pid(pid_alive(p) ?
+               rcu_dereference(p->parent)->tgid : 0);
+
        buffer += sprintf(buffer,
                "State:\t%s\n"
                "SleepAVG:\t%lu%%\n"
@@ -181,16 +188,19 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
                (p->sleep_avg/1024)*100/(1020000000/1024),
-               tgid, pid, (pid > 1) ? ppid : 0,
-               p->pid && p->ptrace ? tppid : 0,
+               tgid, pid, (pid > 1) ? ptgid : 0,
+               tracer_pid,
                p->uid, p->euid, p->suid, p->fsuid,
                p->gid, p->egid, p->sgid, p->fsgid);
-       read_unlock(&tasklist_lock);
+
        task_lock(p);
+       if (p->files)
+               fdt = files_fdtable(p->files);
        buffer += sprintf(buffer,
                "FDSize:\t%d\n"
                "Groups:\t",
-               p->files ? p->files->max_fds : 0);
+               fdt ? fdt->max_fds : 0);
+       rcu_read_unlock();
 
        group_info = p->group_info;
        get_group_info(group_info);
@@ -246,8 +256,11 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
 
 static inline char * task_sig(struct task_struct *p, char *buffer)
 {
+       unsigned long flags;
        sigset_t pending, shpending, blocked, ignored, caught;
        int num_threads = 0;
+       unsigned long qsize = 0;
+       unsigned long qlim = 0;
 
        sigemptyset(&pending);
        sigemptyset(&shpending);
@@ -255,20 +268,21 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
        sigemptyset(&ignored);
        sigemptyset(&caught);
 
-       /* Gather all the data with the appropriate locks held */
-       read_lock(&tasklist_lock);
-       if (p->sighand) {
-               spin_lock_irq(&p->sighand->siglock);
+       rcu_read_lock();
+       if (lock_task_sighand(p, &flags)) {
                pending = p->pending.signal;
                shpending = p->signal->shared_pending.signal;
                blocked = p->blocked;
                collect_sigign_sigcatch(p, &ignored, &caught);
                num_threads = atomic_read(&p->signal->count);
-               spin_unlock_irq(&p->sighand->siglock);
+               qsize = atomic_read(&p->user->sigpending);
+               qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
+               unlock_task_sighand(p, &flags);
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
 
        buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
+       buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
 
        /* render them all */
        buffer = render_sigset_t("SigPnd:\t", &pending, buffer);
@@ -282,12 +296,15 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
 
 static inline char *task_cap(struct task_struct *p, char *buffer)
 {
-    return buffer + sprintf(buffer, "CapInh:\t%016x\n"
-                           "CapPrm:\t%016x\n"
-                           "CapEff:\t%016x\n",
-                           cap_t(p->cap_inheritable),
-                           cap_t(p->cap_permitted),
-                           cap_t(p->cap_effective));
+       struct vx_info *vxi = p->vx_info;
+
+       return buffer + sprintf(buffer,
+               "CapInh:\t%016x\n"
+               "CapPrm:\t%016x\n"
+               "CapEff:\t%016x\n",
+               (unsigned)vx_info_mbcap(vxi, p->cap_inheritable),
+               (unsigned)vx_info_mbcap(vxi, p->cap_permitted),
+               (unsigned)vx_info_mbcap(vxi, p->cap_effective));
 }
 
 int proc_pid_status(struct task_struct *task, char * buffer)
@@ -295,6 +312,8 @@ int proc_pid_status(struct task_struct *task, char * buffer)
        char * orig = buffer;
 #ifdef CONFIG_VSERVER_LEGACY
        struct vx_info *vxi;
+#endif
+#ifdef CONFIG_VSERVER_LEGACYNET
        struct nx_info *nxi;
 #endif
        struct mm_struct *mm = get_task_mm(task);
@@ -308,7 +327,10 @@ int proc_pid_status(struct task_struct *task, char * buffer)
        }
        buffer = task_sig(task, buffer);
        buffer = task_cap(task, buffer);
+       buffer = cpuset_task_status_allowed(task, buffer);
 
+       if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
+               goto skip;
 #ifdef CONFIG_VSERVER_LEGACY
        buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task));
        vxi = task_get_vx_info(task);
@@ -322,6 +344,10 @@ int proc_pid_status(struct task_struct *task, char * buffer)
                buffer += sprintf (buffer,"initpid: none\n");
        }
        put_vx_info(vxi);
+#else
+       buffer += sprintf (buffer,"VxID: %d\n", vx_task_xid(task));
+#endif
+#ifdef CONFIG_VSERVER_LEGACYNET
        nxi = task_get_nx_info(task);
        if (nxi) {
                int i;
@@ -341,27 +367,31 @@ int proc_pid_status(struct task_struct *task, char * buffer)
        }
        put_nx_info(nxi);
 #endif
-#if defined(CONFIG_ARCH_S390)
+skip:
+#if defined(CONFIG_S390)
        buffer = task_show_regs(task, buffer);
 #endif
        return buffer - orig;
 }
 
-int proc_pid_stat(struct task_struct *task, char * buffer)
+static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 {
-       unsigned long vsize, eip, esp, wchan;
+       unsigned long vsize, eip, esp, wchan = ~0UL;
        long priority, nice;
-       unsigned long long bias_uptime = 0;
        int tty_pgrp = -1, tty_nr = 0;
        sigset_t sigign, sigcatch;
        char state;
        int res;
-       pid_t pid, ppid, pgid = -1, sid = -1;
+       pid_t pid = 0, ppid = 0, pgid = -1, sid = -1;
        int num_threads = 0;
        struct mm_struct *mm;
        unsigned long long start_time;
-       unsigned long cmin_flt = 0, cmaj_flt = 0, cutime = 0, cstime = 0;
+       unsigned long cmin_flt = 0, cmaj_flt = 0;
+       unsigned long  min_flt = 0,  maj_flt = 0;
+       cputime_t cutime, cstime, utime, stime;
+       unsigned long rsslim = 0;
        char tcomm[sizeof(task->comm)];
+       unsigned long flags;
 
        state = *get_task_state(task);
        vsize = eip = esp = 0;
@@ -373,60 +403,95 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
        }
 
        get_task_comm(tcomm, task);
-       wchan = 0;
-       if (current->uid == task->uid || current->euid == task->uid ||
-                                                       capable(CAP_SYS_NICE))
-               wchan = get_wchan(task);
 
        sigemptyset(&sigign);
        sigemptyset(&sigcatch);
-       read_lock(&tasklist_lock);
-       if (task->sighand) {
-               spin_lock_irq(&task->sighand->siglock);
-               num_threads = atomic_read(&task->signal->count);
+       cutime = cstime = utime = stime = cputime_zero;
+
+       rcu_read_lock();
+       if (lock_task_sighand(task, &flags)) {
+               struct signal_struct *sig = task->signal;
+
+               if (sig->tty) {
+                       tty_pgrp = sig->tty->pgrp;
+                       tty_nr = new_encode_dev(tty_devnum(sig->tty));
+               }
+
+               num_threads = atomic_read(&sig->count);
                collect_sigign_sigcatch(task, &sigign, &sigcatch);
-               spin_unlock_irq(&task->sighand->siglock);
-       }
-       if (task->signal) {
-               if (task->signal->tty) {
-                       tty_pgrp = task->signal->tty->pgrp;
-                       tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
+
+               cmin_flt = sig->cmin_flt;
+               cmaj_flt = sig->cmaj_flt;
+               cutime = sig->cutime;
+               cstime = sig->cstime;
+               rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
+
+               /* add up live thread stats at the group level */
+               if (whole) {
+                       struct task_struct *t = task;
+                       do {
+                               min_flt += t->min_flt;
+                               maj_flt += t->maj_flt;
+                               utime = cputime_add(utime, t->utime);
+                               stime = cputime_add(stime, t->stime);
+                               t = next_thread(t);
+                       } while (t != task);
+
+                       min_flt += sig->min_flt;
+                       maj_flt += sig->maj_flt;
+                       utime = cputime_add(utime, sig->utime);
+                       stime = cputime_add(stime, sig->stime);
                }
-               pgid = process_group(task);
-               sid = task->signal->session;
-               cmin_flt = task->signal->cmin_flt;
-               cmaj_flt = task->signal->cmaj_flt;
-               cutime = task->signal->cutime;
-               cstime = task->signal->cstime;
+
+               sid = signal_session(sig);
+               pid = vx_info_map_pid(task->vx_info, task->pid);
+               pgid = vx_info_map_pid(task->vx_info, process_group(task));
+               ppid = (pid > 1) ? vx_info_map_tgid(task->vx_info,
+                       rcu_dereference(task->parent)->tgid) : 0;
+
+               unlock_task_sighand(task, &flags);
        }
-       if (task_vx_flags(task, VXF_VIRT_UPTIME, 0)) {
-               bias_uptime = task->vx_info->cvirt.bias_uptime.tv_sec * NSEC_PER_SEC
-                       + task->vx_info->cvirt.bias_uptime.tv_nsec;
+       rcu_read_unlock();
+
+       if (!whole || num_threads<2) {
+               wchan = 0;
+               if (current->uid == task->uid || current->euid == task->uid ||
+                               capable(CAP_SYS_NICE))
+                       wchan = get_wchan(task);
+       }
+       if (!whole) {
+               min_flt = task->min_flt;
+               maj_flt = task->maj_flt;
+               utime = task->utime;
+               stime = task->stime;
        }
-       read_unlock(&tasklist_lock);
 
        /* scale priority and nice values from timeslices to -20..20 */
        /* to make it look like a "normal" Unix priority/nice value  */
        priority = task_prio(task);
        nice = task_nice(task);
 
-       read_lock(&tasklist_lock);
-       pid = vx_info_map_pid(task->vx_info, task->pid);
-       ppid = (!(pid > 1)) ? 0 :
-               vx_info_map_pid(task->vx_info, task->real_parent->pid);
-       pgid = vx_info_map_pid(task->vx_info, pgid);
-       read_unlock(&tasklist_lock);
-
        /* Temporary variable needed for gcc-2.96 */
        /* convert timespec -> nsec*/
        start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
                                + task->start_time.tv_nsec;
        /* convert nsec -> ticks */
-       start_time = nsec_to_clock_t(start_time - bias_uptime);
+       start_time = nsec_to_clock_t(start_time);
+
+       /* fixup start time for virt uptime */
+       if (vx_flags(VXF_VIRT_UPTIME, 0)) {
+               unsigned long long bias =
+                       current->vx_info->cvirt.bias_clock;
+
+               if (start_time > bias)
+                       start_time -= bias;
+               else
+                       start_time = 0;
+       }
 
        res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
-%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
+%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n",
                pid,
                tcomm,
                state,
@@ -436,22 +501,21 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
                tty_nr,
                tty_pgrp,
                task->flags,
-               task->min_flt,
+               min_flt,
                cmin_flt,
-               task->maj_flt,
+               maj_flt,
                cmaj_flt,
-               jiffies_to_clock_t(task->utime),
-               jiffies_to_clock_t(task->stime),
-               jiffies_to_clock_t(cutime),
-               jiffies_to_clock_t(cstime),
+               cputime_to_clock_t(utime),
+               cputime_to_clock_t(stime),
+               cputime_to_clock_t(cutime),
+               cputime_to_clock_t(cstime),
                priority,
                nice,
                num_threads,
-               jiffies_to_clock_t(task->it_real_value),
                start_time,
                vsize,
-               mm ? mm->rss : 0, /* you might want to shift this left 3 */
-               task->rlim[RLIMIT_RSS].rlim_cur,
+               mm ? get_mm_rss(mm) : 0,
+               rsslim,
                mm ? mm->start_code : 0,
                mm ? mm->end_code : 0,
                mm ? mm->start_stack : 0,
@@ -471,12 +535,23 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
                task->exit_signal,
                task_cpu(task),
                task->rt_priority,
-               task->policy);
+               task->policy,
+               (unsigned long long)delayacct_blkio_ticks(task));
        if(mm)
                mmput(mm);
        return res;
 }
 
+int proc_tid_stat(struct task_struct *task, char * buffer)
+{
+       return do_task_stat(task, buffer, 0);
+}
+
+int proc_tgid_stat(struct task_struct *task, char * buffer)
+{
+       return do_task_stat(task, buffer, 1);
+}
+
 int proc_pid_statm(struct task_struct *task, char *buffer)
 {
        int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
@@ -490,21 +565,3 @@ int proc_pid_statm(struct task_struct *task, char *buffer)
        return sprintf(buffer,"%d %d %d %d %d %d %d\n",
                       size, resident, shared, text, lib, data, 0);
 }
-
-
-int proc_pid_delay(struct task_struct *task, char * buffer)
-{
-       int res;
-
-       res  = sprintf(buffer,"%u %llu %llu %u %llu %u %llu\n",
-                      get_delay(task,runs),
-                      (unsigned long long)get_delay(task,runcpu_total),
-                      (unsigned long long)get_delay(task,waitcpu_total),
-                      get_delay(task,num_iowaits),
-                      (unsigned long long)get_delay(task,iowait_total),
-                      get_delay(task,num_memwaits),
-                      (unsigned long long)get_delay(task,mem_iowait_total)
-               );
-       return res;
-}
-