#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/vs_network.h>
-#include <linux/vs_cvirt.h>
+#include "internal.h"
/*
* For hysterical raisins we keep the same inumbers as in the old procfs.
PROC_TGID_MAPS,
PROC_TGID_MOUNTS,
PROC_TGID_WCHAN,
+#ifdef CONFIG_SCHEDSTATS
+ PROC_TGID_SCHEDSTAT,
+#endif
#ifdef CONFIG_SECURITY
PROC_TGID_ATTR,
PROC_TGID_ATTR_CURRENT,
#endif
PROC_TGID_VX_INFO,
PROC_TGID_IP_INFO,
+#ifdef CONFIG_AUDITSYSCALL
+ PROC_TGID_LOGINUID,
+#endif
PROC_TGID_FD_DIR,
+ PROC_TGID_OOM_SCORE,
+ PROC_TGID_OOM_ADJUST,
PROC_TID_INO,
PROC_TID_STATUS,
PROC_TID_MEM,
PROC_TID_MAPS,
PROC_TID_MOUNTS,
PROC_TID_WCHAN,
+#ifdef CONFIG_SCHEDSTATS
+ PROC_TID_SCHEDSTAT,
+#endif
#ifdef CONFIG_SECURITY
PROC_TID_ATTR,
PROC_TID_ATTR_CURRENT,
#endif
PROC_TID_VX_INFO,
PROC_TID_IP_INFO,
+#ifdef CONFIG_AUDITSYSCALL
+ PROC_TID_LOGINUID,
+#endif
PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
+ PROC_TID_OOM_SCORE,
+ PROC_TID_OOM_ADJUST,
};
struct pid_entry {
#endif
#ifdef CONFIG_KALLSYMS
E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO),
+#endif
+#ifdef CONFIG_SCHEDSTATS
+ E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
#endif
E(PROC_TGID_VX_INFO, "vinfo", S_IFREG|S_IRUGO),
E(PROC_TGID_IP_INFO, "ninfo", S_IFREG|S_IRUGO),
+ E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
+ E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
+#ifdef CONFIG_AUDITSYSCALL
+ E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
+#endif
{0,0,NULL,0}
};
static struct pid_entry tid_base_stuff[] = {
#endif
#ifdef CONFIG_KALLSYMS
E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO),
+#endif
+#ifdef CONFIG_SCHEDSTATS
+ E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
#endif
E(PROC_TID_VX_INFO, "vinfo", S_IFREG|S_IRUGO),
E(PROC_TID_IP_INFO, "ninfo", S_IFREG|S_IRUGO),
+ E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
+ E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
+#ifdef CONFIG_AUDITSYSCALL
+ E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
+#endif
{0,0,NULL,0}
};
#undef E
-static inline struct task_struct *proc_task(struct inode *inode)
-{
- return PROC_I(inode)->task;
-}
-
-static inline int proc_type(struct inode *inode)
-{
- return PROC_I(inode)->type;
-}
-
-int proc_pid_stat(struct task_struct*,char*);
-int proc_pid_status(struct task_struct*,char*);
-int proc_pid_statm(struct task_struct*,char*);
-int proc_pid_cpu(struct task_struct*,char*);
-
static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
{
struct task_struct *task = proc_task(inode);
return -ENOENT;
}
-static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
-{
- struct vm_area_struct * vma;
- int result = -ENOENT;
- struct task_struct *task = proc_task(inode);
- struct mm_struct * mm = get_task_mm(task);
-
- if (!mm)
- goto out;
- down_read(&mm->mmap_sem);
- vma = mm->mmap;
- while (vma) {
- if ((vma->vm_flags & VM_EXECUTABLE) &&
- vma->vm_file) {
- *mnt = mntget(vma->vm_file->f_vfsmnt);
- *dentry = dget(vma->vm_file->f_dentry);
- result = 0;
- break;
- }
- vma = vma->vm_next;
- }
- up_read(&mm->mmap_sem);
- mmput(mm);
-out:
- return result;
-}
-
static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
{
struct fs_struct *fs;
#define MAY_PTRACE(task) \
(task == current || \
(task->parent == current && \
- (task->ptrace & PT_PTRACED) && task->state == TASK_STOPPED && \
+ (task->ptrace & PT_PTRACED) && \
+ (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
security_ptrace(current,task) == 0))
static int may_ptrace_attach(struct task_struct *task)
struct mm_struct *mm = get_task_mm(task);
if (!mm)
goto out;
+ if (!mm->arg_end)
+ goto out_mm; /* Shh! No looking before we're done */
len = mm->arg_end - mm->arg_start;
// If the nul at the end of args has been overwritten, then
// assume application is using setproctitle(3).
- if (res > 0 && buffer[res-1] != '\0') {
+ if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
len = strnlen(buffer, res);
if (len < res) {
res = len;
res = strnlen(buffer, res);
}
}
+out_mm:
mmput(mm);
-
out:
return res;
}
char *modname;
const char *sym_name;
unsigned long wchan, size, offset;
- char namebuf[128];
+ char namebuf[KSYM_NAME_LEN+1];
wchan = get_wchan(task);
}
#endif /* CONFIG_KALLSYMS */
+#ifdef CONFIG_SCHEDSTATS
+/*
+ * Provides /proc/PID/schedstat
+ */
+static int proc_pid_schedstat(struct task_struct *task, char *buffer)
+{
+ return sprintf(buffer, "%lu %lu %lu\n",
+ task->sched_info.cpu_time,
+ task->sched_info.run_delay,
+ task->sched_info.pcnt);
+}
+#endif
+
+/* The badness from the OOM killer */
+unsigned long badness(struct task_struct *p, unsigned long uptime);
+static int proc_oom_score(struct task_struct *task, char *buffer)
+{
+ unsigned long points;
+ struct timespec uptime;
+
+ do_posix_clock_monotonic_gettime(&uptime);
+ points = badness(task, uptime.tv_sec);
+ return sprintf(buffer, "%lu\n", points);
+}
+
/************************************************************************/
/* Here the fs part begins */
/************************************************************************/
static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
{
- if (vfs_permission(inode, mask) != 0)
+ if (generic_permission(inode, mask, NULL) != 0)
return -EACCES;
return proc_check_root(inode);
}
struct inode * inode = file->f_dentry->d_inode;
unsigned long page;
ssize_t length;
- ssize_t end;
struct task_struct *task = proc_task(inode);
if (count > PROC_BLOCK_SIZE)
length = PROC_I(inode)->op.proc_read(task, (char*)page);
- if (length < 0) {
- free_page(page);
- return length;
- }
- /* Static 4kB (or whatever) block capacity */
- if (*ppos >= length) {
- free_page(page);
- return 0;
- }
- if (count + *ppos > length)
- count = length - *ppos;
- end = count + *ppos;
- if (copy_to_user(buf, (char *) page + *ppos, count))
- count = -EFAULT;
- else
- *ppos = end;
+ if (length >= 0)
+ length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
free_page(page);
- return count;
+ return length;
}
static struct file_operations proc_info_file_operations = {
.open = mem_open,
};
+static ssize_t oom_adjust_read(struct file *file, char *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task = proc_task(file->f_dentry->d_inode);
+ char buffer[8];
+ size_t len;
+ int oom_adjust = task->oomkilladj;
+ loff_t __ppos = *ppos;
+
+ len = sprintf(buffer, "%i\n", oom_adjust);
+ if (__ppos >= len)
+ return 0;
+ if (count > len-__ppos)
+ count = len-__ppos;
+ if (copy_to_user(buf, buffer + __ppos, count))
+ return -EFAULT;
+ *ppos = __ppos + count;
+ return count;
+}
+
+static ssize_t oom_adjust_write(struct file *file, const char *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task = proc_task(file->f_dentry->d_inode);
+ char buffer[8], *end;
+ int oom_adjust;
+
+ if (!capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+ memset(buffer, 0, 8);
+ if (count > 6)
+ count = 6;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+ oom_adjust = simple_strtol(buffer, &end, 0);
+ if (oom_adjust < -16 || oom_adjust > 15)
+ return -EINVAL;
+ if (*end == '\n')
+ end++;
+ task->oomkilladj = oom_adjust;
+ if (end - buffer == 0)
+ return -EIO;
+ return end - buffer;
+}
+
+static struct file_operations proc_oom_adjust_operations = {
+ read: oom_adjust_read,
+ write: oom_adjust_write,
+};
+
static struct inode_operations proc_mem_inode_operations = {
.permission = proc_permission,
};
+#ifdef CONFIG_AUDITSYSCALL
+#define TMPBUFLEN 21
+static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode * inode = file->f_dentry->d_inode;
+ struct task_struct *task = proc_task(inode);
+ ssize_t length;
+ char tmpbuf[TMPBUFLEN];
+
+ length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
+ audit_get_loginuid(task->audit_context));
+ return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
+}
+
+static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode * inode = file->f_dentry->d_inode;
+ char *page, *tmp;
+ ssize_t length;
+ struct task_struct *task = proc_task(inode);
+ uid_t loginuid;
+
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+
+ if (current != task)
+ return -EPERM;
+
+ if (count > PAGE_SIZE)
+ count = PAGE_SIZE;
+
+ if (*ppos != 0) {
+ /* No partial writes. */
+ return -EINVAL;
+ }
+ page = (char*)__get_free_page(GFP_USER);
+ if (!page)
+ return -ENOMEM;
+ length = -EFAULT;
+ if (copy_from_user(page, buf, count))
+ goto out_free_page;
+
+ loginuid = simple_strtoul(page, &tmp, 10);
+ if (tmp == page) {
+ length = -EINVAL;
+ goto out_free_page;
+
+ }
+ length = audit_set_loginuid(task->audit_context, loginuid);
+ if (likely(length == 0))
+ length = count;
+
+out_free_page:
+ free_page((unsigned long) page);
+ return length;
+}
+
+static struct file_operations proc_loginuid_operations = {
+ .read = proc_loginuid_read,
+ .write = proc_loginuid_write,
+};
+#endif
+
static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
.follow_link = proc_pid_follow_link
};
-static int pid_alive(struct task_struct *p)
-{
- BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid);
- return atomic_read(&p->pids[PIDTYPE_PID].pid.count);
-}
-
#define NUMBUF 10
static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
struct inode *inode = dentry->d_inode;
struct pid_entry *p;
ino_t ino;
- int ret;
+ int ret, hide;
ret = -ENOENT;
if (!pid_alive(proc_task(inode)))
goto out;
}
p = ents + i;
+ hide = vx_flags(VXF_INFO_HIDE, 0);
while (p->name) {
+ if (hide) {
+ switch (p->type) {
+ case PROC_TGID_VX_INFO:
+ case PROC_TGID_IP_INFO:
+ goto skip;
+ }
+ }
if (filldir(dirent, p->name, p->len, filp->f_pos,
fake_ino(pid, p->type), p->mode >> 12) < 0)
goto out;
filp->f_pos++;
+ skip:
p++;
}
}
struct inode *inode = dentry->d_inode;
struct task_struct *task = proc_task(inode);
- if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+ if (!vx_check(vx_task_xid(task), VX_IDENT))
goto out_drop;
/* discard wrong fakeinit */
struct inode * inode = file->f_dentry->d_inode;
unsigned long page;
ssize_t length;
- ssize_t end;
struct task_struct *task = proc_task(inode);
if (count > PAGE_SIZE)
length = security_getprocattr(task,
(char*)file->f_dentry->d_name.name,
(void*)page, count);
- if (length < 0) {
- free_page(page);
- return length;
- }
- /* Static 4kB (or whatever) block capacity */
- if (*ppos >= length) {
- free_page(page);
- return 0;
- }
- if (count + *ppos > length)
- count = length - *ppos;
- end = count + *ppos;
- if (copy_to_user(buf, (char *) page + *ppos, count))
- count = -EFAULT;
- else
- *ppos = end;
+ if (length >= 0)
+ length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
free_page(page);
- return count;
+ return length;
}
static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
static struct inode_operations proc_tgid_attr_inode_operations;
#endif
+extern int proc_pid_vx_info(struct task_struct *, char *);
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
/* SMP-safe */
static struct dentry *proc_pident_lookup(struct inode *dir,
struct dentry *dentry,
ei->op.proc_read = proc_pid_status;
break;
case PROC_TID_STAT:
+ inode->i_fop = &proc_info_file_operations;
+ ei->op.proc_read = proc_tid_stat;
+ break;
case PROC_TGID_STAT:
inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_pid_stat;
+ ei->op.proc_read = proc_tgid_stat;
break;
case PROC_TID_CMDLINE:
case PROC_TGID_CMDLINE:
inode->i_fop = &proc_info_file_operations;
ei->op.proc_read = proc_pid_wchan;
break;
+#endif
+#ifdef CONFIG_SCHEDSTATS
+ case PROC_TID_SCHEDSTAT:
+ case PROC_TGID_SCHEDSTAT:
+ inode->i_fop = &proc_info_file_operations;
+ ei->op.proc_read = proc_pid_schedstat;
+ break;
+#endif
+ case PROC_TID_OOM_SCORE:
+ case PROC_TGID_OOM_SCORE:
+ inode->i_fop = &proc_info_file_operations;
+ ei->op.proc_read = proc_oom_score;
+ break;
+ case PROC_TID_OOM_ADJUST:
+ case PROC_TGID_OOM_ADJUST:
+ inode->i_fop = &proc_oom_adjust_operations;
+ break;
+#ifdef CONFIG_AUDITSYSCALL
+ case PROC_TID_LOGINUID:
+ case PROC_TGID_LOGINUID:
+ inode->i_fop = &proc_loginuid_operations;
+ break;
#endif
case PROC_TID_VX_INFO:
case PROC_TGID_VX_INFO:
+ if (task_vx_flags(task, VXF_INFO_HIDE, 0))
+ goto out_noent;
inode->i_fop = &proc_info_file_operations;
ei->op.proc_read = proc_pid_vx_info;
break;
case PROC_TID_IP_INFO:
case PROC_TGID_IP_INFO:
+ if (task_vx_flags(task, VXF_INFO_HIDE, 0))
+ goto out_noent;
inode->i_fop = &proc_info_file_operations;
ei->op.proc_read = proc_pid_nx_info;
break;
default:
printk("procfs: impossible type (%d)",p->type);
- iput(inode);
- return ERR_PTR(-EINVAL);
+ error = -EINVAL;
+ goto out_put;
}
dentry->d_op = &pid_dentry_operations;
d_add(dentry, inode);
return NULL;
+out_noent:
+ error=-ENOENT;
+out_put:
+ iput(inode);
out:
return ERR_PTR(error);
}
int buflen)
{
char tmp[30];
- sprintf(tmp, "%d", current->tgid);
+ sprintf(tmp, "%d", vx_map_tgid(current->tgid));
return vfs_readlink(dentry,buffer,buflen,tmp);
}
static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char tmp[30];
- sprintf(tmp, "%d", current->tgid);
+ sprintf(tmp, "%d", vx_map_tgid(current->tgid));
return vfs_follow_link(nd,tmp);
}
void proc_pid_flush(struct dentry *proc_dentry)
{
+ might_sleep();
if(proc_dentry != NULL) {
shrink_dcache_parent(proc_dentry);
dput(proc_dentry);
}
}
+#define VXF_FAKE_INIT (VXF_INFO_INIT|VXF_STATE_INIT)
+
+static inline int proc_pid_visible(struct task_struct *task, int pid)
+{
+ if ((pid == 1) &&
+ !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
+ goto visible;
+ if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+ goto visible;
+ return 0;
+visible:
+ return 1;
+}
+
/* SMP-safe */
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
{
d_add(dentry, inode);
return NULL;
}
- tgid = vx_rmap_tgid(current->vx_info, name_to_int(dentry));
+ tgid = name_to_int(dentry);
if (tgid == ~0U)
goto out;
if (!task)
goto out;
- inode = NULL;
- if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
- inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
+ /* check for context visibility */
+ if (!proc_pid_visible(task, tgid))
+ goto out_drop_task;
+
+ inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
+ if (!inode)
+ goto out_drop_task;
- if (!inode) {
- put_task_struct(task);
- goto out;
- }
inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
inode->i_op = &proc_tgid_base_inode_operations;
inode->i_fop = &proc_tgid_base_operations;
goto out;
}
return NULL;
+out_drop_task:
+ put_task_struct(task);
out:
return ERR_PTR(-ENOENT);
}
struct inode *inode;
unsigned tid;
- tid = vx_rmap_tgid(current->vx_info, name_to_int(dentry));
+ tid = name_to_int(dentry);
if (tid == ~0U)
goto out;
-
-/* handle fakeinit */
+ if (vx_current_initpid(tid))
+ goto out;
read_lock(&tasklist_lock);
task = find_task_by_pid(tid);
if (leader->tgid != task->tgid)
goto out_drop_task;
- inode = NULL;
- if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
- inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
+ /* check for context visibility */
+ if (!proc_pid_visible(task, tid))
+ goto out_drop_task;
+ inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
if (!inode)
goto out_drop_task;
+
inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
inode->i_op = &proc_tid_base_inode_operations;
inode->i_fop = &proc_tid_base_operations;
read_lock(&tasklist_lock);
p = NULL;
if (version) {
- p = find_task_by_pid(version);
- if (!thread_group_leader(p))
+ p = find_task_by_real_pid(version);
+ if (p && !thread_group_leader(p))
p = NULL;
}
if (!pid_alive(p))
continue;
- if (!vx_check(vx_task_xid(p), VX_WATCH|VX_IDENT))
+ /* check for context visibility */
+ if (!proc_pid_visible(p, tgid))
continue;
if (--index >= 0)
continue;
- tgids[nr_tgids] = vx_map_tgid(current->vx_info, tgid);
+ tgids[nr_tgids] = vx_map_tgid(tgid);
nr_tgids++;
if (nr_tgids >= PROC_MAXPIDS)
break;
if (pid_alive(task)) do {
int tid = task->pid;
- if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+ /* check for context visibility */
+ if (!proc_pid_visible(task, tid))
continue;
if (--index >= 0)
continue;
- tids[nr_tids] = vx_map_tgid(current->vx_info, tid);
+ tids[nr_tids] = vx_map_pid(tid);
nr_tids++;
if (nr_tids >= PROC_MAXPIDS)
break;
char buf[PROC_NUMBUF];
unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
unsigned int nr_tgids, i;
+ int next_tgid;
if (!nr) {
ino_t ino = fake_ino(0,PROC_TGID_INO);
nr++;
}
- /*
- * f_version caches the last tgid which was returned from readdir
+ /* f_version caches the tgid value that the last readdir call couldn't
+ * return. lseek aka telldir automagically resets f_version to 0.
*/
- nr_tgids = get_tgid_list(nr, filp->f_version, tgid_array);
+ next_tgid = filp->f_version;
+ filp->f_version = 0;
+ for (;;) {
+ nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
+ if (!nr_tgids) {
+ /* no more entries ! */
+ break;
+ }
+ next_tgid = 0;
- for (i = 0; i < nr_tgids; i++) {
- int tgid = tgid_array[i];
- ino_t ino = fake_ino(tgid,PROC_TGID_INO);
- unsigned long j = PROC_NUMBUF;
+ /* do not use the last found pid, reserve it for next_tgid */
+ if (nr_tgids == PROC_MAXPIDS) {
+ nr_tgids--;
+ next_tgid = tgid_array[nr_tgids];
+ }
- do
- buf[--j] = '0' + (tgid % 10);
- while ((tgid /= 10) != 0);
+ for (i=0;i<nr_tgids;i++) {
+ int tgid = tgid_array[i];
+ ino_t ino = fake_ino(tgid,PROC_TGID_INO);
+ unsigned long j = PROC_NUMBUF;
- if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
- filp->f_version = tgid;
- break;
+ do
+ buf[--j] = '0' + (tgid % 10);
+ while ((tgid /= 10) != 0);
+
+ if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
+ /* returning this tgid failed, save it as the first
+ * pid for the next readir call */
+ filp->f_version = tgid_array[i];
+ goto out;
+ }
+ filp->f_pos++;
+ nr++;
}
- filp->f_pos++;
}
+out:
return 0;
}