X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fproc%2Fbase.c;h=f6e0c627497a49061035efcb128c8b49ebbf85af;hb=4e76c8a9fa413ccc09d3f7f664183dcce3555d57;hp=7ff742cecadcec4ee91ac3e669c2083bb3f71e40;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/fs/proc/base.c b/fs/proc/base.c index 7ff742cec..f6e0c6274 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -11,6 +11,40 @@ * go into icache. We cache the reference to task_struct upon lookup too. * Eventually it should become a filesystem in its own. We don't use the * rest of procfs anymore. + * + * + * Changelog: + * 17-Jan-2005 + * Allan Bezerra + * Bruna Moreira + * Edjard Mota + * Ilias Biris + * Mauricio Lin + * + * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT + * + * A new process specific entry (smaps) included in /proc. It shows the + * size of rss for each memory area. The maps entry lacks information + * about physical memory size (rss) for each mapped file, i.e., + * rss information for executables and library files. + * This additional information is useful for any tools that need to know + * about physical memory consumption for a process specific library. + * + * Changelog: + * 21-Feb-2005 + * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT + * Pud inclusion in the page table walking. + * + * ChangeLog: + * 10-Mar-2005 + * 10LE Instituto Nokia de Tecnologia - INdT: + * A better way to walks through the page table as suggested by Hugh Dickins. + * + * Simo Piiroinen : + * Smaps information related to shared, private, clean and dirty pages. + * + * Paul Mundt : + * Overall revision about smaps. */ #include @@ -21,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -28,10 +63,18 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include "internal.h" /* * For hysterical raisins we keep the same inumbers as in the old procfs. @@ -48,6 +91,9 @@ enum pid_directory_inos { PROC_TGID_TASK, PROC_TGID_STATUS, PROC_TGID_MEM, +#ifdef CONFIG_SECCOMP + PROC_TGID_SECCOMP, +#endif PROC_TGID_CWD, PROC_TGID_ROOT, PROC_TGID_EXE, @@ -58,8 +104,19 @@ enum pid_directory_inos { PROC_TGID_STAT, PROC_TGID_STATM, PROC_TGID_MAPS, + PROC_TGID_NUMA_MAPS, PROC_TGID_MOUNTS, + PROC_TGID_MOUNTSTATS, PROC_TGID_WCHAN, +#ifdef CONFIG_MMU + PROC_TGID_SMAPS, +#endif +#ifdef CONFIG_SCHEDSTATS + PROC_TGID_SCHEDSTAT, +#endif +#ifdef CONFIG_CPUSETS + PROC_TGID_CPUSET, +#endif #ifdef CONFIG_SECURITY PROC_TGID_ATTR, PROC_TGID_ATTR_CURRENT, @@ -67,10 +124,19 @@ enum pid_directory_inos { PROC_TGID_ATTR_EXEC, PROC_TGID_ATTR_FSCREATE, #endif - PROC_TGID_FD_DIR, + PROC_TGID_VX_INFO, + PROC_TGID_IP_INFO, +#ifdef CONFIG_AUDITSYSCALL + PROC_TGID_LOGINUID, +#endif + PROC_TGID_OOM_SCORE, + PROC_TGID_OOM_ADJUST, PROC_TID_INO, PROC_TID_STATUS, PROC_TID_MEM, +#ifdef CONFIG_SECCOMP + PROC_TID_SECCOMP, +#endif PROC_TID_CWD, PROC_TID_ROOT, PROC_TID_EXE, @@ -81,8 +147,19 @@ enum pid_directory_inos { PROC_TID_STAT, PROC_TID_STATM, PROC_TID_MAPS, + PROC_TID_NUMA_MAPS, PROC_TID_MOUNTS, + PROC_TID_MOUNTSTATS, PROC_TID_WCHAN, +#ifdef CONFIG_MMU + PROC_TID_SMAPS, +#endif +#ifdef CONFIG_SCHEDSTATS + PROC_TID_SCHEDSTAT, +#endif +#ifdef CONFIG_CPUSETS + PROC_TID_CPUSET, +#endif #ifdef CONFIG_SECURITY PROC_TID_ATTR, PROC_TID_ATTR_CURRENT, @@ -90,6 +167,15 @@ enum pid_directory_inos { PROC_TID_ATTR_EXEC, PROC_TID_ATTR_FSCREATE, #endif + PROC_TID_VX_INFO, + PROC_TID_IP_INFO, +#ifdef CONFIG_AUDITSYSCALL + PROC_TID_LOGINUID, +#endif + PROC_TID_OOM_SCORE, + PROC_TID_OOM_ADJUST, + + /* Add new entries before this */ PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; @@ -111,17 +197,40 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), - E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), + E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUSR), +#ifdef CONFIG_NUMA + E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), +#endif E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP + E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), + E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR), +#ifdef CONFIG_MMU + E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUSR), +#endif #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif #ifdef CONFIG_KALLSYMS E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_SCHEDSTATS + E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_CPUSETS + E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), +#endif + E(PROC_TGID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TGID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), + E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), + E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), +#ifdef CONFIG_AUDITSYSCALL + E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), #endif {0,0,NULL,0} }; @@ -133,17 +242,39 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), - E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), + E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUSR), +#ifdef CONFIG_NUMA + E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), +#endif E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP + E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_MMU + E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUSR), +#endif #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif #ifdef CONFIG_KALLSYMS E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_SCHEDSTATS + E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_CPUSETS + E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), +#endif + E(PROC_TID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), + E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), + E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), +#ifdef CONFIG_AUDITSYSCALL + E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), #endif {0,0,NULL,0} }; @@ -167,21 +298,6 @@ static struct pid_entry tid_attr_stuff[] = { #undef E -static inline struct task_struct *proc_task(struct inode *inode) -{ - return PROC_I(inode)->task; -} - -static inline int proc_type(struct inode *inode) -{ - return PROC_I(inode)->type; -} - -int proc_pid_stat(struct task_struct*,char*); -int proc_pid_status(struct task_struct*,char*); -int proc_pid_statm(struct task_struct*,char*); -int proc_pid_cpu(struct task_struct*,char*); - static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { struct task_struct *task = proc_task(inode); @@ -191,6 +307,10 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm files = get_files_struct(task); if (files) { + /* + * We are not taking a ref to the file structure, so we must + * hold ->file_lock. + */ spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { @@ -206,42 +326,21 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm return -ENOENT; } -static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static struct fs_struct *get_fs_struct(struct task_struct *task) { - struct vm_area_struct * vma; - int result = -ENOENT; - struct task_struct *task = proc_task(inode); - struct mm_struct * mm = get_task_mm(task); - - if (!mm) - goto out; - down_read(&mm->mmap_sem); - vma = mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && - vma->vm_file) { - *mnt = mntget(vma->vm_file->f_vfsmnt); - *dentry = dget(vma->vm_file->f_dentry); - result = 0; - break; - } - vma = vma->vm_next; - } - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; + struct fs_struct *fs; + task_lock(task); + fs = task->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(task); + return fs; } static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - struct fs_struct *fs; + struct fs_struct *fs = get_fs_struct(proc_task(inode)); int result = -ENOENT; - task_lock(proc_task(inode)); - fs = proc_task(inode)->fs; - if(fs) - atomic_inc(&fs->count); - task_unlock(proc_task(inode)); if (fs) { read_lock(&fs->lock); *mnt = mntget(fs->pwdmnt); @@ -254,14 +353,56 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs } static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct fs_struct *fs = get_fs_struct(proc_task(inode)); + int result = -ENOENT; + if (fs) { + read_lock(&fs->lock); + *mnt = mntget(fs->rootmnt); + *dentry = dget(fs->root); + read_unlock(&fs->lock); + result = 0; + put_fs_struct(fs); + } + return result; +} + + +/* Same as proc_root_link, but this addionally tries to get fs from other + * threads in the group */ +static int proc_task_root_link(struct inode *inode, struct dentry **dentry, + struct vfsmount **mnt) { struct fs_struct *fs; int result = -ENOENT; - task_lock(proc_task(inode)); - fs = proc_task(inode)->fs; - if(fs) + struct task_struct *leader = proc_task(inode); + + task_lock(leader); + fs = leader->fs; + if (fs) { atomic_inc(&fs->count); - task_unlock(proc_task(inode)); + task_unlock(leader); + } else { + /* Try to get fs from other threads */ + task_unlock(leader); + read_lock(&tasklist_lock); + if (pid_alive(leader)) { + struct task_struct *task = leader; + + while ((task = next_thread(task)) != leader) { + task_lock(task); + fs = task->fs; + if (fs) { + atomic_inc(&fs->count); + task_unlock(task); + break; + } + task_unlock(task); + } + } + read_unlock(&tasklist_lock); + } + if (fs) { read_lock(&fs->lock); *mnt = mntget(fs->rootmnt); @@ -273,37 +414,32 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf return result; } + #define MAY_PTRACE(task) \ (task == current || \ (task->parent == current && \ - (task->ptrace & PT_PTRACED) && task->state == TASK_STOPPED && \ + (task->ptrace & PT_PTRACED) && \ + (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ security_ptrace(current,task) == 0)) -static int may_ptrace_attach(struct task_struct *task) +struct mm_struct *mm_for_maps(struct task_struct *task) { - int retval = 0; - + struct mm_struct *mm = get_task_mm(task); + if (!mm) + return NULL; + down_read(&mm->mmap_sem); task_lock(task); - - if (!task->mm) - goto out; - if (((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) - goto out; - rmb(); - if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + if (task->mm != mm) goto out; - if (security_ptrace(current, task)) + if (task->mm != current->mm && __ptrace_may_attach(task)) goto out; - - retval = 1; + task_unlock(task); + return mm; out: task_unlock(task); - return retval; + up_read(&mm->mmap_sem); + mmput(mm); + return NULL; } static int proc_pid_environ(struct task_struct *task, char * buffer) @@ -315,7 +451,7 @@ static int proc_pid_environ(struct task_struct *task, char * buffer) if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->env_start, buffer, len, 0); - if (!may_ptrace_attach(task)) + if (!ptrace_may_attach(task)) res = -ESRCH; mmput(mm); } @@ -329,6 +465,8 @@ static int proc_pid_cmdline(struct task_struct *task, char * buffer) struct mm_struct *mm = get_task_mm(task); if (!mm) goto out; + if (!mm->arg_end) + goto out_mm; /* Shh! No looking before we're done */ len = mm->arg_end - mm->arg_start; @@ -339,7 +477,7 @@ static int proc_pid_cmdline(struct task_struct *task, char * buffer) // If the nul at the end of args has been overwritten, then // assume application is using setproctitle(3). - if (res > 0 && buffer[res-1] != '\0') { + if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { len = strnlen(buffer, res); if (len < res) { res = len; @@ -351,8 +489,8 @@ static int proc_pid_cmdline(struct task_struct *task, char * buffer) res = strnlen(buffer, res); } } +out_mm: mmput(mm); - out: return res; } @@ -386,7 +524,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) char *modname; const char *sym_name; unsigned long wchan, size, offset; - char namebuf[128]; + char namebuf[KSYM_NAME_LEN+1]; wchan = get_wchan(task); @@ -397,20 +535,52 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) } #endif /* CONFIG_KALLSYMS */ +#ifdef CONFIG_SCHEDSTATS +/* + * Provides /proc/PID/schedstat + */ +static int proc_pid_schedstat(struct task_struct *task, char *buffer) +{ + return sprintf(buffer, "%lu %lu %lu\n", + task->sched_info.cpu_time, + task->sched_info.run_delay, + task->sched_info.pcnt); +} +#endif + +/* The badness from the OOM killer */ +unsigned long badness(struct task_struct *p, unsigned long uptime); +static int proc_oom_score(struct task_struct *task, char *buffer) +{ + unsigned long points; + struct timespec uptime; + + do_posix_clock_monotonic_gettime(&uptime); + points = badness(task, uptime.tv_sec); + return sprintf(buffer, "%lu\n", points); +} + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ /* permission checks */ -static int proc_check_root(struct inode *inode) +/* If the process being read is separated by chroot from the reading process, + * don't let the reader access the threads. + * + * note: this does dput(root) and mntput(vfsmnt) on exit. + */ +static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) { - struct dentry *de, *base, *root; - struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; + struct dentry *de, *base; + struct vfsmount *our_vfsmnt, *mnt; int res = 0; - if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ - return -ENOENT; + /* context admin override */ + if (capable(CAP_CONTEXT)) + goto override; + read_lock(¤t->fs->lock); our_vfsmnt = mntget(current->fs->rootmnt); base = dget(current->fs->root); @@ -420,11 +590,11 @@ static int proc_check_root(struct inode *inode) de = root; mnt = vfsmnt; - while (vfsmnt != our_vfsmnt) { - if (vfsmnt == vfsmnt->mnt_parent) + while (mnt != our_vfsmnt) { + if (mnt == mnt->mnt_parent) goto out; - de = vfsmnt->mnt_mountpoint; - vfsmnt = vfsmnt->mnt_parent; + de = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; } if (!is_subdir(de, base)) @@ -434,8 +604,9 @@ static int proc_check_root(struct inode *inode) exit: dput(base); mntput(our_vfsmnt); +override: dput(root); - mntput(mnt); + mntput(vfsmnt); return res; out: spin_unlock(&vfsmount_lock); @@ -443,13 +614,58 @@ out: goto exit; } +static int proc_check_root(struct inode *inode) +{ + struct dentry *root; + struct vfsmount *vfsmnt; + + if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ + return -ENOENT; + return proc_check_chroot(root, vfsmnt); +} + static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) { - if (vfs_permission(inode, mask) != 0) + if (generic_permission(inode, mask, NULL) != 0) return -EACCES; return proc_check_root(inode); } +static int proc_setattr(struct dentry *dentry, struct iattr *attr) +{ + int error; + struct inode *inode = dentry->d_inode; + + if (attr->ia_valid & ATTR_MODE) + return -EPERM; + + error = inode_change_ok(inode, attr); + if (!error) { + error = security_inode_setattr(dentry, attr); + if (!error) + error = inode_setattr(inode, attr); + } + return error; +} + +static struct inode_operations proc_def_inode_operations = { + .setattr = proc_setattr, +}; + +static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + struct dentry *root; + struct vfsmount *vfsmnt; + + if (generic_permission(inode, mask, NULL) != 0) + return -EACCES; + + if (proc_task_root_link(inode, &root, &vfsmnt)) + return -ENOENT; + + return proc_check_chroot(root, vfsmnt); +} + extern struct seq_operations proc_pid_maps_op; static int maps_open(struct inode *inode, struct file *file) { @@ -469,11 +685,124 @@ static struct file_operations proc_maps_operations = { .release = seq_release, }; +#ifdef CONFIG_NUMA +extern struct seq_operations proc_pid_numa_maps_op; +static int numa_maps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_numa_maps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_numa_maps_operations = { + .open = numa_maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + +#ifdef CONFIG_MMU +extern struct seq_operations proc_pid_smaps_op; +static int smaps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_smaps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_smaps_operations = { + .open = smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + extern struct seq_operations mounts_op; +struct proc_mounts { + struct seq_file m; + int event; +}; + static int mounts_open(struct inode *inode, struct file *file) { struct task_struct *task = proc_task(inode); - int ret = seq_open(file, &mounts_op); + struct namespace *namespace; + struct proc_mounts *p; + int ret = -EINVAL; + + task_lock(task); + namespace = task->namespace; + if (namespace) + get_namespace(namespace); + task_unlock(task); + + if (namespace) { + ret = -ENOMEM; + p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); + if (p) { + file->private_data = &p->m; + ret = seq_open(file, &mounts_op); + if (!ret) { + p->m.private = namespace; + p->event = namespace->event; + return 0; + } + kfree(p); + } + put_namespace(namespace); + } + return ret; +} + +static int mounts_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + struct namespace *namespace = m->private; + put_namespace(namespace); + return seq_release(inode, file); +} + +static unsigned mounts_poll(struct file *file, poll_table *wait) +{ + struct proc_mounts *p = file->private_data; + struct namespace *ns = p->m.private; + unsigned res = 0; + + poll_wait(file, &ns->poll, wait); + + spin_lock(&vfsmount_lock); + if (p->event != ns->event) { + p->event = ns->event; + res = POLLERR; + } + spin_unlock(&vfsmount_lock); + + return res; +} + +static struct file_operations proc_mounts_operations = { + .open = mounts_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mounts_release, + .poll = mounts_poll, +}; + +extern struct seq_operations mountstats_op; +static int mountstats_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &mountstats_op); if (!ret) { struct seq_file *m = file->private_data; @@ -494,16 +823,8 @@ static int mounts_open(struct inode *inode, struct file *file) return ret; } -static int mounts_release(struct inode *inode, struct file *file) -{ - struct seq_file *m = file->private_data; - struct namespace *namespace = m->private; - put_namespace(namespace); - return seq_release(inode, file); -} - -static struct file_operations proc_mounts_operations = { - .open = mounts_open, +static struct file_operations proc_mountstats_operations = { + .open = mountstats_open, .read = seq_read, .llseek = seq_lseek, .release = mounts_release, @@ -511,13 +832,12 @@ static struct file_operations proc_mounts_operations = { #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ -static ssize_t proc_info_read(struct file * file, char * buf, +static ssize_t proc_info_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file->f_dentry->d_inode; unsigned long page; ssize_t length; - ssize_t end; struct task_struct *task = proc_task(inode); if (count > PROC_BLOCK_SIZE) @@ -527,24 +847,10 @@ static ssize_t proc_info_read(struct file * file, char * buf, length = PROC_I(inode)->op.proc_read(task, (char*)page); - if (length < 0) { - free_page(page); - return length; - } - /* Static 4kB (or whatever) block capacity */ - if (*ppos >= length) { - free_page(page); - return 0; - } - if (count + *ppos > length) - count = length - *ppos; - end = count + *ppos; - if (copy_to_user(buf, (char *) page + *ppos, count)) - count = -EFAULT; - else - *ppos = end; + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); free_page(page); - return count; + return length; } static struct file_operations proc_info_file_operations = { @@ -557,7 +863,7 @@ static int mem_open(struct inode* inode, struct file* file) return 0; } -static ssize_t mem_read(struct file * file, char * buf, +static ssize_t mem_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task = proc_task(file->f_dentry->d_inode); @@ -566,7 +872,7 @@ static ssize_t mem_read(struct file * file, char * buf, int ret = -ESRCH; struct mm_struct *mm; - if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) goto out; ret = -ENOMEM; @@ -592,7 +898,7 @@ static ssize_t mem_read(struct file * file, char * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { + if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { if (!ret) ret = -EIO; break; @@ -630,7 +936,7 @@ static ssize_t mem_write(struct file * file, const char * buf, struct task_struct *task = proc_task(file->f_dentry->d_inode); unsigned long dst = *ppos; - if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) return -ESRCH; page = (char *)__get_free_page(GFP_USER); @@ -685,11 +991,182 @@ static struct file_operations proc_mem_operations = { .open = mem_open, }; +static ssize_t oom_adjust_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = proc_task(file->f_dentry->d_inode); + char buffer[8]; + size_t len; + int oom_adjust = task->oomkilladj; + loff_t __ppos = *ppos; + + len = sprintf(buffer, "%i\n", oom_adjust); + if (__ppos >= len) + return 0; + if (count > len-__ppos) + count = len-__ppos; + if (copy_to_user(buf, buffer + __ppos, count)) + return -EFAULT; + *ppos = __ppos + count; + return count; +} + +static ssize_t oom_adjust_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = proc_task(file->f_dentry->d_inode); + char buffer[8], *end; + int oom_adjust; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + memset(buffer, 0, 8); + if (count > 6) + count = 6; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + oom_adjust = simple_strtol(buffer, &end, 0); + if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE) + return -EINVAL; + if (*end == '\n') + end++; + task->oomkilladj = oom_adjust; + if (end - buffer == 0) + return -EIO; + return end - buffer; +} + +static struct file_operations proc_oom_adjust_operations = { + .read = oom_adjust_read, + .write = oom_adjust_write, +}; + static struct inode_operations proc_mem_inode_operations = { .permission = proc_permission, + .setattr = proc_setattr, }; -static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) +#ifdef CONFIG_AUDITSYSCALL +#define TMPBUFLEN 21 +static ssize_t proc_loginuid_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + struct task_struct *task = proc_task(inode); + ssize_t length; + char tmpbuf[TMPBUFLEN]; + + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", + audit_get_loginuid(task->audit_context)); + return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); +} + +static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + char *page, *tmp; + ssize_t length; + struct task_struct *task = proc_task(inode); + uid_t loginuid; + + if (!capable(CAP_AUDIT_CONTROL)) + return -EPERM; + + if (current != task) + return -EPERM; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + if (*ppos != 0) { + /* No partial writes. */ + return -EINVAL; + } + page = (char*)__get_free_page(GFP_USER); + if (!page) + return -ENOMEM; + length = -EFAULT; + if (copy_from_user(page, buf, count)) + goto out_free_page; + + loginuid = simple_strtoul(page, &tmp, 10); + if (tmp == page) { + length = -EINVAL; + goto out_free_page; + + } + length = audit_set_loginuid(task, loginuid); + if (likely(length == 0)) + length = count; + +out_free_page: + free_page((unsigned long) page); + return length; +} + +static struct file_operations proc_loginuid_operations = { + .read = proc_loginuid_read, + .write = proc_loginuid_write, +}; +#endif + +#ifdef CONFIG_SECCOMP +static ssize_t seccomp_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *tsk = proc_task(file->f_dentry->d_inode); + char __buf[20]; + loff_t __ppos = *ppos; + size_t len; + + /* no need to print the trailing zero, so use only len */ + len = sprintf(__buf, "%u\n", tsk->seccomp.mode); + if (__ppos >= len) + return 0; + if (count > len - __ppos) + count = len - __ppos; + if (copy_to_user(buf, __buf + __ppos, count)) + return -EFAULT; + *ppos = __ppos + count; + return count; +} + +static ssize_t seccomp_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *tsk = proc_task(file->f_dentry->d_inode); + char __buf[20], *end; + unsigned int seccomp_mode; + + /* can set it only once to be even more secure */ + if (unlikely(tsk->seccomp.mode)) + return -EPERM; + + memset(__buf, 0, sizeof(__buf)); + count = min(count, sizeof(__buf) - 1); + if (copy_from_user(__buf, buf, count)) + return -EFAULT; + seccomp_mode = simple_strtoul(__buf, &end, 0); + if (*end == '\n') + end++; + if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { + tsk->seccomp.mode = seccomp_mode; + set_tsk_thread_flag(tsk, TIF_SECCOMP); + } else + return -EINVAL; + if (unlikely(!(end - __buf))) + return -EIO; + return end - __buf; +} + +static struct file_operations proc_seccomp_operations = { + .read = seccomp_read, + .write = seccomp_write, +}; +#endif /* CONFIG_SECCOMP */ + +static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; int error = -EACCES; @@ -706,11 +1183,11 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); nd->last_type = LAST_BIND; out: - return error; + return ERR_PTR(error); } static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, - char *buffer, int buflen) + char __user *buffer, int buflen) { struct inode * inode; char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; @@ -735,7 +1212,7 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, return len; } -static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen) +static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) { int error = -EACCES; struct inode *inode = dentry->d_inode; @@ -764,15 +1241,10 @@ out: static struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, - .follow_link = proc_pid_follow_link + .follow_link = proc_pid_follow_link, + .setattr = proc_setattr, }; -static int pid_alive(struct task_struct *p) -{ - BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid); - return atomic_read(&p->pids[PIDTYPE_PID].pid.count); -} - #define NUMBUF 10 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) @@ -783,6 +1255,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) int retval; char buf[NUMBUF]; struct files_struct * files; + struct fdtable *fdt; retval = -ENOENT; if (!pid_alive(p)) @@ -805,15 +1278,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) files = get_files_struct(p); if (!files) goto out; - spin_lock(&files->file_lock); + rcu_read_lock(); + fdt = files_fdtable(files); for (fd = filp->f_pos-2; - fd < files->max_fds; + fd < fdt->max_fds; fd++, filp->f_pos++) { unsigned int i,j; if (!fcheck_files(files, fd)) continue; - spin_unlock(&files->file_lock); + rcu_read_unlock(); j = NUMBUF; i = fd; @@ -825,12 +1299,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ino = fake_ino(tid, PROC_TID_FD_DIR + fd); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { - spin_lock(&files->file_lock); + rcu_read_lock(); break; } - spin_lock(&files->file_lock); + rcu_read_lock(); } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } out: @@ -847,7 +1321,7 @@ static int proc_pident_readdir(struct file *filp, struct inode *inode = dentry->d_inode; struct pid_entry *p; ino_t ino; - int ret; + int ret, hide; ret = -ENOENT; if (!pid_alive(proc_task(inode))) @@ -878,11 +1352,20 @@ static int proc_pident_readdir(struct file *filp, goto out; } p = ents + i; + hide = vx_flags(VXF_INFO_HIDE, 0); while (p->name) { + if (hide) { + switch (p->type) { + case PROC_TGID_VX_INFO: + case PROC_TGID_IP_INFO: + goto skip; + } + } if (filldir(dirent, p->name, p->len, filp->f_pos, fake_ino(pid, p->type), p->mode >> 12) < 0) goto out; filp->f_pos++; + skip: p++; } } @@ -918,7 +1401,9 @@ static int task_dumpable(struct task_struct *task) if (mm) dumpable = mm->dumpable; task_unlock(task); - return dumpable; + if(dumpable == 1) + return 1; + return 0; } @@ -938,6 +1423,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st ei->task = NULL; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(task->pid, ino); + inode->i_op = &proc_def_inode_operations; if (!pid_alive(task)) goto out_unlock; @@ -954,6 +1440,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = task->euid; inode->i_gid = task->egid; } + inode->i_xid = vx_task_xid(task); security_task_to_inode(task, inode); out: @@ -979,6 +1466,11 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct task_struct *task = proc_task(inode); + + if (!vx_check(vx_task_xid(task), VX_IDENT)) + goto out_drop; + /* discard wrong fakeinit */ + if (pid_alive(task)) { if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { inode->i_uid = task->euid; @@ -987,9 +1479,11 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_uid = 0; inode->i_gid = 0; } + inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); return 1; } +out_drop: d_drop(dentry); return 0; } @@ -1003,9 +1497,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); if (fcheck_files(files, fd)) { - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { inode->i_uid = task->euid; @@ -1014,10 +1508,11 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_uid = 0; inode->i_gid = 0; } + inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); return 1; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } d_drop(dentry); @@ -1109,6 +1604,11 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, if (!files) goto out_unlock; inode->i_mode = S_IFLNK; + + /* + * We are not taking a ref to the file structure, so we must + * hold ->file_lock. + */ spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (!file) @@ -1154,21 +1654,22 @@ static struct file_operations proc_task_operations = { static struct inode_operations proc_fd_inode_operations = { .lookup = proc_lookupfd, .permission = proc_permission, + .setattr = proc_setattr, }; static struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, - .permission = proc_permission, + .permission = proc_task_permission, + .setattr = proc_setattr, }; #ifdef CONFIG_SECURITY -static ssize_t proc_pid_attr_read(struct file * file, char * buf, +static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file->f_dentry->d_inode; unsigned long page; ssize_t length; - ssize_t end; struct task_struct *task = proc_task(inode); if (count > PAGE_SIZE) @@ -1179,27 +1680,13 @@ static ssize_t proc_pid_attr_read(struct file * file, char * buf, length = security_getprocattr(task, (char*)file->f_dentry->d_name.name, (void*)page, count); - if (length < 0) { - free_page(page); - return length; - } - /* Static 4kB (or whatever) block capacity */ - if (*ppos >= length) { - free_page(page); - return 0; - } - if (count + *ppos > length) - count = length - *ppos; - end = count + *ppos; - if (copy_to_user(buf, (char *) page + *ppos, count)) - count = -EFAULT; - else - *ppos = end; + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); free_page(page); - return count; + return length; } -static ssize_t proc_pid_attr_write(struct file * file, const char * buf, +static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file->f_dentry->d_inode; @@ -1239,6 +1726,11 @@ static struct file_operations proc_tgid_attr_operations; static struct inode_operations proc_tgid_attr_inode_operations; #endif +extern int proc_pid_vx_info(struct task_struct *, char *); +extern int proc_pid_nx_info(struct task_struct *, char *); + +static int get_tid_list(int index, unsigned int *tids, struct inode *dir); + /* SMP-safe */ static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, @@ -1278,7 +1770,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, */ switch(p->type) { case PROC_TGID_TASK: - inode->i_nlink = 3; + inode->i_nlink = 2 + get_tid_list(2, NULL, dir); inode->i_op = &proc_task_inode_operations; inode->i_fop = &proc_task_operations; break; @@ -1319,9 +1811,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, ei->op.proc_read = proc_pid_status; break; case PROC_TID_STAT: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_tid_stat; + break; case PROC_TGID_STAT: inode->i_fop = &proc_info_file_operations; - ei->op.proc_read = proc_pid_stat; + ei->op.proc_read = proc_tgid_stat; break; case PROC_TID_CMDLINE: case PROC_TGID_CMDLINE: @@ -1337,15 +1832,37 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MAPS: inode->i_fop = &proc_maps_operations; break; +#ifdef CONFIG_NUMA + case PROC_TID_NUMA_MAPS: + case PROC_TGID_NUMA_MAPS: + inode->i_fop = &proc_numa_maps_operations; + break; +#endif case PROC_TID_MEM: case PROC_TGID_MEM: inode->i_op = &proc_mem_inode_operations; inode->i_fop = &proc_mem_operations; break; +#ifdef CONFIG_SECCOMP + case PROC_TID_SECCOMP: + case PROC_TGID_SECCOMP: + inode->i_fop = &proc_seccomp_operations; + break; +#endif /* CONFIG_SECCOMP */ case PROC_TID_MOUNTS: case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; break; +#ifdef CONFIG_MMU + case PROC_TID_SMAPS: + case PROC_TGID_SMAPS: + inode->i_fop = &proc_smaps_operations; + break; +#endif + case PROC_TID_MOUNTSTATS: + case PROC_TGID_MOUNTSTATS: + inode->i_fop = &proc_mountstats_operations; + break; #ifdef CONFIG_SECURITY case PROC_TID_ATTR: inode->i_nlink = 2; @@ -1375,15 +1892,61 @@ static struct dentry *proc_pident_lookup(struct inode *dir, ei->op.proc_read = proc_pid_wchan; break; #endif +#ifdef CONFIG_SCHEDSTATS + case PROC_TID_SCHEDSTAT: + case PROC_TGID_SCHEDSTAT: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_schedstat; + break; +#endif +#ifdef CONFIG_CPUSETS + case PROC_TID_CPUSET: + case PROC_TGID_CPUSET: + inode->i_fop = &proc_cpuset_operations; + break; +#endif + case PROC_TID_OOM_SCORE: + case PROC_TGID_OOM_SCORE: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_oom_score; + break; + case PROC_TID_OOM_ADJUST: + case PROC_TGID_OOM_ADJUST: + inode->i_fop = &proc_oom_adjust_operations; + break; +#ifdef CONFIG_AUDITSYSCALL + case PROC_TID_LOGINUID: + case PROC_TGID_LOGINUID: + inode->i_fop = &proc_loginuid_operations; + break; +#endif + case PROC_TID_VX_INFO: + case PROC_TGID_VX_INFO: + if (task_vx_flags(task, VXF_INFO_HIDE, 0)) + goto out_noent; + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_vx_info; + break; + case PROC_TID_IP_INFO: + case PROC_TGID_IP_INFO: + if (task_vx_flags(task, VXF_INFO_HIDE, 0)) + goto out_noent; + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_nx_info; + break; default: printk("procfs: impossible type (%d)",p->type); - iput(inode); - return ERR_PTR(-EINVAL); + error = -EINVAL; + goto out_put; } dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); return NULL; +out_noent: + error=-ENOENT; +out_put: + iput(inode); out: return ERR_PTR(error); } @@ -1408,10 +1971,12 @@ static struct file_operations proc_tid_base_operations = { static struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, + .setattr = proc_setattr, }; static struct inode_operations proc_tid_base_inode_operations = { .lookup = proc_tid_base_lookup, + .setattr = proc_setattr, }; #ifdef CONFIG_SECURITY @@ -1453,43 +2018,47 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir, static struct inode_operations proc_tgid_attr_inode_operations = { .lookup = proc_tgid_attr_lookup, + .setattr = proc_setattr, }; static struct inode_operations proc_tid_attr_inode_operations = { .lookup = proc_tid_attr_lookup, + .setattr = proc_setattr, }; #endif /* * /proc/self: */ -static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen) +static int proc_self_readlink(struct dentry *dentry, char __user *buffer, + int buflen) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return vfs_readlink(dentry,buffer,buflen,tmp); } -static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); - return vfs_follow_link(nd,tmp); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); + return ERR_PTR(vfs_follow_link(nd,tmp)); } static struct inode_operations proc_self_inode_operations = { .readlink = proc_self_readlink, .follow_link = proc_self_follow_link, + .setattr = proc_setattr, }; /** - * proc_pid_unhash - Unhash /proc/ entry from the dcache. + * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. * @p: task that should be flushed. * - * Drops the /proc/ dcache entry from the hash chains. + * Drops the /proc/@pid dcache entry from the hash chains. * - * Dropping /proc/ entries and detach_pid must be synchroneous, - * otherwise e.g. /proc//exe might point to the wrong executable, + * Dropping /proc/@pid entries and detach_pid must be synchroneous, + * otherwise e.g. /proc/@pid/exe might point to the wrong executable, * if the pid value is immediately reused. This is enforced by * - caller must acquire spin_lock(p->proc_lock) * - must be called before detach_pid() @@ -1506,31 +2075,50 @@ struct dentry *proc_pid_unhash(struct task_struct *p) if (proc_dentry != NULL) { spin_lock(&dcache_lock); + spin_lock(&proc_dentry->d_lock); if (!d_unhashed(proc_dentry)) { dget_locked(proc_dentry); __d_drop(proc_dentry); - } else + spin_unlock(&proc_dentry->d_lock); + } else { + spin_unlock(&proc_dentry->d_lock); proc_dentry = NULL; + } spin_unlock(&dcache_lock); } return proc_dentry; } /** - * proc_pid_flush - recover memory used by stale /proc//x entries - * @proc_entry: directoy to prune. + * proc_pid_flush - recover memory used by stale /proc/@pid/x entries + * @proc_dentry: directoy to prune. * * Shrink the /proc directory that was used by the just killed thread. */ void proc_pid_flush(struct dentry *proc_dentry) { + might_sleep(); if(proc_dentry != NULL) { shrink_dcache_parent(proc_dentry); dput(proc_dentry); } } +#define VXF_FAKE_INIT (VXF_INFO_INIT|VXF_STATE_INIT) + +static inline int proc_pid_visible(struct task_struct *task, int pid) +{ + if ((pid == 1) && + !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT)) + goto visible; + if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto visible; + return 0; +visible: + return 1; +} + /* SMP-safe */ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { @@ -1567,18 +2155,23 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct if (!task) goto out; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + /* check for context visibility */ + if (!proc_pid_visible(task, tgid)) + goto out_drop_task; + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + if (!inode) + goto out_drop_task; - if (!inode) { - put_task_struct(task); - goto out; - } inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; - inode->i_nlink = 3; inode->i_flags|=S_IMMUTABLE; +#ifdef CONFIG_SECURITY + inode->i_nlink = 5; +#else + inode->i_nlink = 4; +#endif dentry->d_op = &pid_base_dentry_operations; @@ -1598,6 +2191,8 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct goto out; } return NULL; +out_drop_task: + put_task_struct(task); out: return ERR_PTR(-ENOENT); } @@ -1613,6 +2208,8 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry tid = name_to_int(dentry); if (tid == ~0U) goto out; + if (vx_current_initpid(tid)) + goto out; read_lock(&tasklist_lock); task = find_task_by_pid(tid); @@ -1624,16 +2221,23 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (leader->tgid != task->tgid) goto out_drop_task; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); - + /* check for context visibility */ + if (!proc_pid_visible(task, tid)) + goto out_drop_task; + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); if (!inode) goto out_drop_task; + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; - inode->i_nlink = 3; inode->i_flags|=S_IMMUTABLE; +#ifdef CONFIG_SECURITY + inode->i_nlink = 4; +#else + inode->i_nlink = 3; +#endif dentry->d_op = &pid_base_dentry_operations; @@ -1664,8 +2268,8 @@ static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) read_lock(&tasklist_lock); p = NULL; if (version) { - p = find_task_by_pid(version); - if (!thread_group_leader(p)) + p = find_task_by_real_pid(version); + if (p && !thread_group_leader(p)) p = NULL; } @@ -1676,11 +2280,15 @@ static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) for ( ; p != &init_task; p = next_task(p)) { int tgid = p->pid; + if (!pid_alive(p)) continue; + /* check for context visibility */ + if (!proc_pid_visible(p, tgid)) + continue; if (--index >= 0) continue; - tgids[nr_tgids] = tgid; + tgids[nr_tgids] = vx_map_tgid(tgid); nr_tgids++; if (nr_tgids >= PROC_MAXPIDS) break; @@ -1710,9 +2318,13 @@ static int get_tid_list(int index, unsigned int *tids, struct inode *dir) if (pid_alive(task)) do { int tid = task->pid; + /* check for context visibility */ + if (!proc_pid_visible(task, tid)) + continue; if (--index >= 0) continue; - tids[nr_tids] = tid; + if (tids != NULL) + tids[nr_tids] = vx_map_pid(tid); nr_tids++; if (nr_tids >= PROC_MAXPIDS) break; @@ -1728,6 +2340,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) char buf[PROC_NUMBUF]; unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; unsigned int nr_tgids, i; + int next_tgid; if (!nr) { ino_t ino = fake_ino(0,PROC_TGID_INO); @@ -1737,24 +2350,45 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) nr++; } - /* - * f_version caches the last tgid which was returned from readdir + /* f_version caches the tgid value that the last readdir call couldn't + * return. lseek aka telldir automagically resets f_version to 0. */ - nr_tgids = get_tgid_list(nr, filp->f_version, tgid_array); + next_tgid = filp->f_version; + filp->f_version = 0; + for (;;) { + nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); + if (!nr_tgids) { + /* no more entries ! */ + break; + } + next_tgid = 0; - for (i = 0; i < nr_tgids; i++) { - int tgid = tgid_array[i]; - ino_t ino = fake_ino(tgid,PROC_TGID_INO); - unsigned long j = PROC_NUMBUF; + /* do not use the last found pid, reserve it for next_tgid */ + if (nr_tgids == PROC_MAXPIDS) { + nr_tgids--; + next_tgid = tgid_array[nr_tgids]; + } - do buf[--j] = '0' + (tgid % 10); while (tgid/=10); + for (i=0;if_pos, ino, DT_DIR) < 0) { - filp->f_version = tgid; - break; + do + buf[--j] = '0' + (tgid % 10); + while ((tgid /= 10) != 0); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { + /* returning this tgid failed, save it as the first + * pid for the next readir call */ + filp->f_version = tgid_array[i]; + goto out; + } + filp->f_pos++; + nr++; } - filp->f_pos++; } +out: return 0; } @@ -1766,11 +2400,14 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi unsigned int nr_tids, i; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; + struct task_struct *task = proc_task(inode); int retval = -ENOENT; ino_t ino; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ - if (!pid_alive(proc_task(inode))) + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out; + if (!pid_alive(task)) goto out; retval = 0; @@ -1790,6 +2427,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi } nr_tids = get_tid_list(pos, tid_array, inode); + inode->i_nlink = pos + nr_tids; for (i = 0; i < nr_tids; i++) { unsigned long j = PROC_NUMBUF; @@ -1799,7 +2437,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi do buf[--j] = '0' + (tid % 10); - while (tid /= 10); + while ((tid /= 10) != 0); if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) break;