patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / fs / proc / base.c
1 /*
2  *  linux/fs/proc/base.c
3  *
4  *  Copyright (C) 1991, 1992 Linus Torvalds
5  *
6  *  proc base directory handling functions
7  *
8  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9  *  Instead of using magical inumbers to determine the kind of object
10  *  we allocate and fill in-core inodes upon lookup. They don't even
11  *  go into icache. We cache the reference to task_struct upon lookup too.
12  *  Eventually it should become a filesystem in its own. We don't use the
13  *  rest of procfs anymore.
14  */
15
16 #include <asm/uaccess.h>
17
18 #include <linux/config.h>
19 #include <linux/errno.h>
20 #include <linux/time.h>
21 #include <linux/proc_fs.h>
22 #include <linux/stat.h>
23 #include <linux/init.h>
24 #include <linux/file.h>
25 #include <linux/string.h>
26 #include <linux/seq_file.h>
27 #include <linux/namei.h>
28 #include <linux/namespace.h>
29 #include <linux/mm.h>
30 #include <linux/smp_lock.h>
31 #include <linux/kallsyms.h>
32 #include <linux/mount.h>
33 #include <linux/security.h>
34 #include <linux/ptrace.h>
35 #include <linux/vs_network.h>
36 #include <linux/vs_cvirt.h>
37
38 /*
39  * For hysterical raisins we keep the same inumbers as in the old procfs.
40  * Feel free to change the macro below - just keep the range distinct from
41  * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
42  * As soon as we'll get a separate superblock we will be able to forget
43  * about magical ranges too.
44  */
45
46 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
47
48 enum pid_directory_inos {
49         PROC_TGID_INO = 2,
50         PROC_TGID_TASK,
51         PROC_TGID_STATUS,
52         PROC_TGID_MEM,
53         PROC_TGID_CWD,
54         PROC_TGID_ROOT,
55         PROC_TGID_EXE,
56         PROC_TGID_FD,
57         PROC_TGID_ENVIRON,
58         PROC_TGID_AUXV,
59         PROC_TGID_CMDLINE,
60         PROC_TGID_STAT,
61         PROC_TGID_STATM,
62         PROC_TGID_MAPS,
63         PROC_TGID_MOUNTS,
64         PROC_TGID_WCHAN,
65 #ifdef CONFIG_SECURITY
66         PROC_TGID_ATTR,
67         PROC_TGID_ATTR_CURRENT,
68         PROC_TGID_ATTR_PREV,
69         PROC_TGID_ATTR_EXEC,
70         PROC_TGID_ATTR_FSCREATE,
71 #endif
72         PROC_TGID_VX_INFO,
73         PROC_TGID_IP_INFO,
74         PROC_TGID_FD_DIR,
75         PROC_TID_INO,
76         PROC_TID_STATUS,
77         PROC_TID_MEM,
78         PROC_TID_CWD,
79         PROC_TID_ROOT,
80         PROC_TID_EXE,
81         PROC_TID_FD,
82         PROC_TID_ENVIRON,
83         PROC_TID_AUXV,
84         PROC_TID_CMDLINE,
85         PROC_TID_STAT,
86         PROC_TID_STATM,
87         PROC_TID_MAPS,
88         PROC_TID_MOUNTS,
89         PROC_TID_WCHAN,
90 #ifdef CONFIG_SECURITY
91         PROC_TID_ATTR,
92         PROC_TID_ATTR_CURRENT,
93         PROC_TID_ATTR_PREV,
94         PROC_TID_ATTR_EXEC,
95         PROC_TID_ATTR_FSCREATE,
96 #endif
97         PROC_TID_VX_INFO,
98         PROC_TID_IP_INFO,
99         PROC_TID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
100 };
101
102 struct pid_entry {
103         int type;
104         int len;
105         char *name;
106         mode_t mode;
107 };
108
109 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
110
111 static struct pid_entry tgid_base_stuff[] = {
112         E(PROC_TGID_TASK,      "task",    S_IFDIR|S_IRUGO|S_IXUGO),
113         E(PROC_TGID_FD,        "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
114         E(PROC_TGID_ENVIRON,   "environ", S_IFREG|S_IRUSR),
115         E(PROC_TGID_AUXV,      "auxv",    S_IFREG|S_IRUSR),
116         E(PROC_TGID_STATUS,    "status",  S_IFREG|S_IRUGO),
117         E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO),
118         E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
119         E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
120         E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
121         E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
122         E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
123         E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
124         E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
125         E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
126 #ifdef CONFIG_SECURITY
127         E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
128 #endif
129 #ifdef CONFIG_KALLSYMS
130         E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
131 #endif
132         E(PROC_TGID_VX_INFO,   "vinfo",   S_IFREG|S_IRUGO),
133         E(PROC_TGID_IP_INFO,   "ninfo",   S_IFREG|S_IRUGO),
134         {0,0,NULL,0}
135 };
136 static struct pid_entry tid_base_stuff[] = {
137         E(PROC_TID_FD,         "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
138         E(PROC_TID_ENVIRON,    "environ", S_IFREG|S_IRUSR),
139         E(PROC_TID_AUXV,       "auxv",    S_IFREG|S_IRUSR),
140         E(PROC_TID_STATUS,     "status",  S_IFREG|S_IRUGO),
141         E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO),
142         E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
143         E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
144         E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
145         E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
146         E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
147         E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
148         E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
149         E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
150 #ifdef CONFIG_SECURITY
151         E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
152 #endif
153 #ifdef CONFIG_KALLSYMS
154         E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
155 #endif
156         E(PROC_TID_VX_INFO,    "vinfo",   S_IFREG|S_IRUGO),
157         E(PROC_TID_IP_INFO,    "ninfo",   S_IFREG|S_IRUGO),
158         {0,0,NULL,0}
159 };
160
161 #ifdef CONFIG_SECURITY
162 static struct pid_entry tgid_attr_stuff[] = {
163         E(PROC_TGID_ATTR_CURRENT,  "current",  S_IFREG|S_IRUGO|S_IWUGO),
164         E(PROC_TGID_ATTR_PREV,     "prev",     S_IFREG|S_IRUGO),
165         E(PROC_TGID_ATTR_EXEC,     "exec",     S_IFREG|S_IRUGO|S_IWUGO),
166         E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
167         {0,0,NULL,0}
168 };
169 static struct pid_entry tid_attr_stuff[] = {
170         E(PROC_TID_ATTR_CURRENT,   "current",  S_IFREG|S_IRUGO|S_IWUGO),
171         E(PROC_TID_ATTR_PREV,      "prev",     S_IFREG|S_IRUGO),
172         E(PROC_TID_ATTR_EXEC,      "exec",     S_IFREG|S_IRUGO|S_IWUGO),
173         E(PROC_TID_ATTR_FSCREATE,  "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
174         {0,0,NULL,0}
175 };
176 #endif
177
178 #undef E
179
180 static inline struct task_struct *proc_task(struct inode *inode)
181 {
182         return PROC_I(inode)->task;
183 }
184
185 static inline int proc_type(struct inode *inode)
186 {
187         return PROC_I(inode)->type;
188 }
189
190 int proc_pid_stat(struct task_struct*,char*);
191 int proc_pid_status(struct task_struct*,char*);
192 int proc_pid_statm(struct task_struct*,char*);
193 int proc_pid_cpu(struct task_struct*,char*);
194
195 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
196 {
197         struct task_struct *task = proc_task(inode);
198         struct files_struct *files;
199         struct file *file;
200         int fd = proc_type(inode) - PROC_TID_FD_DIR;
201
202         files = get_files_struct(task);
203         if (files) {
204                 spin_lock(&files->file_lock);
205                 file = fcheck_files(files, fd);
206                 if (file) {
207                         *mnt = mntget(file->f_vfsmnt);
208                         *dentry = dget(file->f_dentry);
209                         spin_unlock(&files->file_lock);
210                         put_files_struct(files);
211                         return 0;
212                 }
213                 spin_unlock(&files->file_lock);
214                 put_files_struct(files);
215         }
216         return -ENOENT;
217 }
218
219 static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
220 {
221         struct vm_area_struct * vma;
222         int result = -ENOENT;
223         struct task_struct *task = proc_task(inode);
224         struct mm_struct * mm = get_task_mm(task);
225
226         if (!mm)
227                 goto out;
228         down_read(&mm->mmap_sem);
229         vma = mm->mmap;
230         while (vma) {
231                 if ((vma->vm_flags & VM_EXECUTABLE) && 
232                     vma->vm_file) {
233                         *mnt = mntget(vma->vm_file->f_vfsmnt);
234                         *dentry = dget(vma->vm_file->f_dentry);
235                         result = 0;
236                         break;
237                 }
238                 vma = vma->vm_next;
239         }
240         up_read(&mm->mmap_sem);
241         mmput(mm);
242 out:
243         return result;
244 }
245
246 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
247 {
248         struct fs_struct *fs;
249         int result = -ENOENT;
250         task_lock(proc_task(inode));
251         fs = proc_task(inode)->fs;
252         if(fs)
253                 atomic_inc(&fs->count);
254         task_unlock(proc_task(inode));
255         if (fs) {
256                 read_lock(&fs->lock);
257                 *mnt = mntget(fs->pwdmnt);
258                 *dentry = dget(fs->pwd);
259                 read_unlock(&fs->lock);
260                 result = 0;
261                 put_fs_struct(fs);
262         }
263         return result;
264 }
265
266 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
267 {
268         struct fs_struct *fs;
269         int result = -ENOENT;
270         task_lock(proc_task(inode));
271         fs = proc_task(inode)->fs;
272         if(fs)
273                 atomic_inc(&fs->count);
274         task_unlock(proc_task(inode));
275         if (fs) {
276                 read_lock(&fs->lock);
277                 *mnt = mntget(fs->rootmnt);
278                 *dentry = dget(fs->root);
279                 read_unlock(&fs->lock);
280                 result = 0;
281                 put_fs_struct(fs);
282         }
283         return result;
284 }
285
286 #define MAY_PTRACE(task) \
287         (task == current || \
288         (task->parent == current && \
289         (task->ptrace & PT_PTRACED) &&  task->state == TASK_STOPPED && \
290          security_ptrace(current,task) == 0))
291
292 static int may_ptrace_attach(struct task_struct *task)
293 {
294         int retval = 0;
295
296         task_lock(task);
297
298         if (!task->mm)
299                 goto out;
300         if (((current->uid != task->euid) ||
301              (current->uid != task->suid) ||
302              (current->uid != task->uid) ||
303              (current->gid != task->egid) ||
304              (current->gid != task->sgid) ||
305              (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
306                 goto out;
307         rmb();
308         if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
309                 goto out;
310         if (security_ptrace(current, task))
311                 goto out;
312
313         retval = 1;
314 out:
315         task_unlock(task);
316         return retval;
317 }
318
319 static int proc_pid_environ(struct task_struct *task, char * buffer)
320 {
321         int res = 0;
322         struct mm_struct *mm = get_task_mm(task);
323         if (mm) {
324                 unsigned int len = mm->env_end - mm->env_start;
325                 if (len > PAGE_SIZE)
326                         len = PAGE_SIZE;
327                 res = access_process_vm(task, mm->env_start, buffer, len, 0);
328                 if (!may_ptrace_attach(task))
329                         res = -ESRCH;
330                 mmput(mm);
331         }
332         return res;
333 }
334
335 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
336 {
337         int res = 0;
338         unsigned int len;
339         struct mm_struct *mm = get_task_mm(task);
340         if (!mm)
341                 goto out;
342
343         len = mm->arg_end - mm->arg_start;
344  
345         if (len > PAGE_SIZE)
346                 len = PAGE_SIZE;
347  
348         res = access_process_vm(task, mm->arg_start, buffer, len, 0);
349
350         // If the nul at the end of args has been overwritten, then
351         // assume application is using setproctitle(3).
352         if (res > 0 && buffer[res-1] != '\0') {
353                 len = strnlen(buffer, res);
354                 if (len < res) {
355                     res = len;
356                 } else {
357                         len = mm->env_end - mm->env_start;
358                         if (len > PAGE_SIZE - res)
359                                 len = PAGE_SIZE - res;
360                         res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
361                         res = strnlen(buffer, res);
362                 }
363         }
364         mmput(mm);
365
366 out:
367         return res;
368 }
369
370 static int proc_pid_auxv(struct task_struct *task, char *buffer)
371 {
372         int res = 0;
373         struct mm_struct *mm = get_task_mm(task);
374         if (mm) {
375                 unsigned int nwords = 0;
376                 do
377                         nwords += 2;
378                 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
379                 res = nwords * sizeof(mm->saved_auxv[0]);
380                 if (res > PAGE_SIZE)
381                         res = PAGE_SIZE;
382                 memcpy(buffer, mm->saved_auxv, res);
383                 mmput(mm);
384         }
385         return res;
386 }
387
388
389 #ifdef CONFIG_KALLSYMS
390 /*
391  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
392  * Returns the resolved symbol.  If that fails, simply return the address.
393  */
394 static int proc_pid_wchan(struct task_struct *task, char *buffer)
395 {
396         char *modname;
397         const char *sym_name;
398         unsigned long wchan, size, offset;
399         char namebuf[128];
400
401         wchan = get_wchan(task);
402
403         sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf);
404         if (sym_name)
405                 return sprintf(buffer, "%s", sym_name);
406         return sprintf(buffer, "%lu", wchan);
407 }
408 #endif /* CONFIG_KALLSYMS */
409
410 /************************************************************************/
411 /*                       Here the fs part begins                        */
412 /************************************************************************/
413
414 /* permission checks */
415
416 static int proc_check_root(struct inode *inode)
417 {
418         struct dentry *de, *base, *root;
419         struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
420         int res = 0;
421
422         if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
423                 return -ENOENT;
424         read_lock(&current->fs->lock);
425         our_vfsmnt = mntget(current->fs->rootmnt);
426         base = dget(current->fs->root);
427         read_unlock(&current->fs->lock);
428
429         spin_lock(&vfsmount_lock);
430         de = root;
431         mnt = vfsmnt;
432
433         while (vfsmnt != our_vfsmnt) {
434                 if (vfsmnt == vfsmnt->mnt_parent)
435                         goto out;
436                 de = vfsmnt->mnt_mountpoint;
437                 vfsmnt = vfsmnt->mnt_parent;
438         }
439
440         if (!is_subdir(de, base))
441                 goto out;
442         spin_unlock(&vfsmount_lock);
443
444 exit:
445         dput(base);
446         mntput(our_vfsmnt);
447         dput(root);
448         mntput(mnt);
449         return res;
450 out:
451         spin_unlock(&vfsmount_lock);
452         res = -EACCES;
453         goto exit;
454 }
455
456 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
457 {
458         if (vfs_permission(inode, mask) != 0)
459                 return -EACCES;
460         return proc_check_root(inode);
461 }
462
463 extern struct seq_operations proc_pid_maps_op;
464 static int maps_open(struct inode *inode, struct file *file)
465 {
466         struct task_struct *task = proc_task(inode);
467         int ret = seq_open(file, &proc_pid_maps_op);
468         if (!ret) {
469                 struct seq_file *m = file->private_data;
470                 m->private = task;
471         }
472         return ret;
473 }
474
475 static struct file_operations proc_maps_operations = {
476         .open           = maps_open,
477         .read           = seq_read,
478         .llseek         = seq_lseek,
479         .release        = seq_release,
480 };
481
482 extern struct seq_operations mounts_op;
483 static int mounts_open(struct inode *inode, struct file *file)
484 {
485         struct task_struct *task = proc_task(inode);
486         int ret = seq_open(file, &mounts_op);
487
488         if (!ret) {
489                 struct seq_file *m = file->private_data;
490                 struct namespace *namespace;
491                 task_lock(task);
492                 namespace = task->namespace;
493                 if (namespace)
494                         get_namespace(namespace);
495                 task_unlock(task);
496
497                 if (namespace)
498                         m->private = namespace;
499                 else {
500                         seq_release(inode, file);
501                         ret = -EINVAL;
502                 }
503         }
504         return ret;
505 }
506
507 static int mounts_release(struct inode *inode, struct file *file)
508 {
509         struct seq_file *m = file->private_data;
510         struct namespace *namespace = m->private;
511         put_namespace(namespace);
512         return seq_release(inode, file);
513 }
514
515 static struct file_operations proc_mounts_operations = {
516         .open           = mounts_open,
517         .read           = seq_read,
518         .llseek         = seq_lseek,
519         .release        = mounts_release,
520 };
521
522 #define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
523
524 static ssize_t proc_info_read(struct file * file, char __user * buf,
525                           size_t count, loff_t *ppos)
526 {
527         struct inode * inode = file->f_dentry->d_inode;
528         unsigned long page;
529         ssize_t length;
530         ssize_t end;
531         struct task_struct *task = proc_task(inode);
532
533         if (count > PROC_BLOCK_SIZE)
534                 count = PROC_BLOCK_SIZE;
535         if (!(page = __get_free_page(GFP_KERNEL)))
536                 return -ENOMEM;
537
538         length = PROC_I(inode)->op.proc_read(task, (char*)page);
539
540         if (length < 0) {
541                 free_page(page);
542                 return length;
543         }
544         /* Static 4kB (or whatever) block capacity */
545         if (*ppos >= length) {
546                 free_page(page);
547                 return 0;
548         }
549         if (count + *ppos > length)
550                 count = length - *ppos;
551         end = count + *ppos;
552         if (copy_to_user(buf, (char *) page + *ppos, count))
553                 count = -EFAULT;
554         else
555                 *ppos = end;
556         free_page(page);
557         return count;
558 }
559
560 static struct file_operations proc_info_file_operations = {
561         .read           = proc_info_read,
562 };
563
564 static int mem_open(struct inode* inode, struct file* file)
565 {
566         file->private_data = (void*)((long)current->self_exec_id);
567         return 0;
568 }
569
570 static ssize_t mem_read(struct file * file, char __user * buf,
571                         size_t count, loff_t *ppos)
572 {
573         struct task_struct *task = proc_task(file->f_dentry->d_inode);
574         char *page;
575         unsigned long src = *ppos;
576         int ret = -ESRCH;
577         struct mm_struct *mm;
578
579         if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
580                 goto out;
581
582         ret = -ENOMEM;
583         page = (char *)__get_free_page(GFP_USER);
584         if (!page)
585                 goto out;
586
587         ret = 0;
588  
589         mm = get_task_mm(task);
590         if (!mm)
591                 goto out_free;
592
593         ret = -EIO;
594  
595         if (file->private_data != (void*)((long)current->self_exec_id))
596                 goto out_put;
597
598         ret = 0;
599  
600         while (count > 0) {
601                 int this_len, retval;
602
603                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
604                 retval = access_process_vm(task, src, page, this_len, 0);
605                 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) {
606                         if (!ret)
607                                 ret = -EIO;
608                         break;
609                 }
610
611                 if (copy_to_user(buf, page, retval)) {
612                         ret = -EFAULT;
613                         break;
614                 }
615  
616                 ret += retval;
617                 src += retval;
618                 buf += retval;
619                 count -= retval;
620         }
621         *ppos = src;
622
623 out_put:
624         mmput(mm);
625 out_free:
626         free_page((unsigned long) page);
627 out:
628         return ret;
629 }
630
631 #define mem_write NULL
632
633 #ifndef mem_write
634 /* This is a security hazard */
635 static ssize_t mem_write(struct file * file, const char * buf,
636                          size_t count, loff_t *ppos)
637 {
638         int copied = 0;
639         char *page;
640         struct task_struct *task = proc_task(file->f_dentry->d_inode);
641         unsigned long dst = *ppos;
642
643         if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
644                 return -ESRCH;
645
646         page = (char *)__get_free_page(GFP_USER);
647         if (!page)
648                 return -ENOMEM;
649
650         while (count > 0) {
651                 int this_len, retval;
652
653                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
654                 if (copy_from_user(page, buf, this_len)) {
655                         copied = -EFAULT;
656                         break;
657                 }
658                 retval = access_process_vm(task, dst, page, this_len, 1);
659                 if (!retval) {
660                         if (!copied)
661                                 copied = -EIO;
662                         break;
663                 }
664                 copied += retval;
665                 buf += retval;
666                 dst += retval;
667                 count -= retval;                        
668         }
669         *ppos = dst;
670         free_page((unsigned long) page);
671         return copied;
672 }
673 #endif
674
675 static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
676 {
677         switch (orig) {
678         case 0:
679                 file->f_pos = offset;
680                 break;
681         case 1:
682                 file->f_pos += offset;
683                 break;
684         default:
685                 return -EINVAL;
686         }
687         force_successful_syscall_return();
688         return file->f_pos;
689 }
690
691 static struct file_operations proc_mem_operations = {
692         .llseek         = mem_lseek,
693         .read           = mem_read,
694         .write          = mem_write,
695         .open           = mem_open,
696 };
697
698 static struct inode_operations proc_mem_inode_operations = {
699         .permission     = proc_permission,
700 };
701
702 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
703 {
704         struct inode *inode = dentry->d_inode;
705         int error = -EACCES;
706
707         /* We don't need a base pointer in the /proc filesystem */
708         path_release(nd);
709
710         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
711                 goto out;
712         error = proc_check_root(inode);
713         if (error)
714                 goto out;
715
716         error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
717         nd->last_type = LAST_BIND;
718 out:
719         return error;
720 }
721
722 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
723                             char __user *buffer, int buflen)
724 {
725         struct inode * inode;
726         char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
727         int len;
728
729         if (!tmp)
730                 return -ENOMEM;
731                 
732         inode = dentry->d_inode;
733         path = d_path(dentry, mnt, tmp, PAGE_SIZE);
734         len = PTR_ERR(path);
735         if (IS_ERR(path))
736                 goto out;
737         len = tmp + PAGE_SIZE - 1 - path;
738
739         if (len > buflen)
740                 len = buflen;
741         if (copy_to_user(buffer, path, len))
742                 len = -EFAULT;
743  out:
744         free_page((unsigned long)tmp);
745         return len;
746 }
747
748 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
749 {
750         int error = -EACCES;
751         struct inode *inode = dentry->d_inode;
752         struct dentry *de;
753         struct vfsmount *mnt = NULL;
754
755         lock_kernel();
756
757         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
758                 goto out;
759         error = proc_check_root(inode);
760         if (error)
761                 goto out;
762
763         error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
764         if (error)
765                 goto out;
766
767         error = do_proc_readlink(de, mnt, buffer, buflen);
768         dput(de);
769         mntput(mnt);
770 out:
771         unlock_kernel();
772         return error;
773 }
774
775 static struct inode_operations proc_pid_link_inode_operations = {
776         .readlink       = proc_pid_readlink,
777         .follow_link    = proc_pid_follow_link
778 };
779
780 static int pid_alive(struct task_struct *p)
781 {
782         BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid);
783         return atomic_read(&p->pids[PIDTYPE_PID].pid.count);
784 }
785
786 #define NUMBUF 10
787
788 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
789 {
790         struct inode *inode = filp->f_dentry->d_inode;
791         struct task_struct *p = proc_task(inode);
792         unsigned int fd, tid, ino;
793         int retval;
794         char buf[NUMBUF];
795         struct files_struct * files;
796
797         retval = -ENOENT;
798         if (!pid_alive(p))
799                 goto out;
800         retval = 0;
801         tid = p->pid;
802
803         fd = filp->f_pos;
804         switch (fd) {
805                 case 0:
806                         if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
807                                 goto out;
808                         filp->f_pos++;
809                 case 1:
810                         ino = fake_ino(tid, PROC_TID_INO);
811                         if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
812                                 goto out;
813                         filp->f_pos++;
814                 default:
815                         files = get_files_struct(p);
816                         if (!files)
817                                 goto out;
818                         spin_lock(&files->file_lock);
819                         for (fd = filp->f_pos-2;
820                              fd < files->max_fds;
821                              fd++, filp->f_pos++) {
822                                 unsigned int i,j;
823
824                                 if (!fcheck_files(files, fd))
825                                         continue;
826                                 spin_unlock(&files->file_lock);
827
828                                 j = NUMBUF;
829                                 i = fd;
830                                 do {
831                                         j--;
832                                         buf[j] = '0' + (i % 10);
833                                         i /= 10;
834                                 } while (i);
835
836                                 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
837                                 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
838                                         spin_lock(&files->file_lock);
839                                         break;
840                                 }
841                                 spin_lock(&files->file_lock);
842                         }
843                         spin_unlock(&files->file_lock);
844                         put_files_struct(files);
845         }
846 out:
847         return retval;
848 }
849
850 static int proc_pident_readdir(struct file *filp,
851                 void *dirent, filldir_t filldir,
852                 struct pid_entry *ents, unsigned int nents)
853 {
854         int i;
855         int pid;
856         struct dentry *dentry = filp->f_dentry;
857         struct inode *inode = dentry->d_inode;
858         struct pid_entry *p;
859         ino_t ino;
860         int ret;
861
862         ret = -ENOENT;
863         if (!pid_alive(proc_task(inode)))
864                 goto out;
865
866         ret = 0;
867         pid = proc_task(inode)->pid;
868         i = filp->f_pos;
869         switch (i) {
870         case 0:
871                 ino = inode->i_ino;
872                 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
873                         goto out;
874                 i++;
875                 filp->f_pos++;
876                 /* fall through */
877         case 1:
878                 ino = parent_ino(dentry);
879                 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
880                         goto out;
881                 i++;
882                 filp->f_pos++;
883                 /* fall through */
884         default:
885                 i -= 2;
886                 if (i >= nents) {
887                         ret = 1;
888                         goto out;
889                 }
890                 p = ents + i;
891                 while (p->name) {
892                         if (filldir(dirent, p->name, p->len, filp->f_pos,
893                                     fake_ino(pid, p->type), p->mode >> 12) < 0)
894                                 goto out;
895                         filp->f_pos++;
896                         p++;
897                 }
898         }
899
900         ret = 1;
901 out:
902         return ret;
903 }
904
905 static int proc_tgid_base_readdir(struct file * filp,
906                              void * dirent, filldir_t filldir)
907 {
908         return proc_pident_readdir(filp,dirent,filldir,
909                                    tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
910 }
911
912 static int proc_tid_base_readdir(struct file * filp,
913                              void * dirent, filldir_t filldir)
914 {
915         return proc_pident_readdir(filp,dirent,filldir,
916                                    tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
917 }
918
919 /* building an inode */
920
921 static int task_dumpable(struct task_struct *task)
922 {
923         int dumpable = 0;
924         struct mm_struct *mm;
925
926         task_lock(task);
927         mm = task->mm;
928         if (mm)
929                 dumpable = mm->dumpable;
930         task_unlock(task);
931         return dumpable;
932 }
933
934
935 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
936 {
937         struct inode * inode;
938         struct proc_inode *ei;
939
940         /* We need a new inode */
941         
942         inode = new_inode(sb);
943         if (!inode)
944                 goto out;
945
946         /* Common stuff */
947         ei = PROC_I(inode);
948         ei->task = NULL;
949         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
950         inode->i_ino = fake_ino(task->pid, ino);
951
952         if (!pid_alive(task))
953                 goto out_unlock;
954
955         /*
956          * grab the reference to task.
957          */
958         get_task_struct(task);
959         ei->task = task;
960         ei->type = ino;
961         inode->i_uid = 0;
962         inode->i_gid = 0;
963         if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) {
964                 inode->i_uid = task->euid;
965                 inode->i_gid = task->egid;
966         }
967         inode->i_xid = vx_task_xid(task);
968         security_task_to_inode(task, inode);
969
970 out:
971         return inode;
972
973 out_unlock:
974         ei->pde = NULL;
975         iput(inode);
976         return NULL;
977 }
978
979 /* dentry stuff */
980
981 /*
982  *      Exceptional case: normally we are not allowed to unhash a busy
983  * directory. In this case, however, we can do it - no aliasing problems
984  * due to the way we treat inodes.
985  *
986  * Rewrite the inode's ownerships here because the owning task may have
987  * performed a setuid(), etc.
988  */
989 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
990 {
991         struct inode *inode = dentry->d_inode;
992         struct task_struct *task = proc_task(inode);
993
994         if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
995                 goto out_drop;
996         /* discard wrong fakeinit */
997
998         if (pid_alive(task)) {
999                 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
1000                         inode->i_uid = task->euid;
1001                         inode->i_gid = task->egid;
1002                 } else {
1003                         inode->i_uid = 0;
1004                         inode->i_gid = 0;
1005                 }
1006                 security_task_to_inode(task, inode);
1007                 return 1;
1008         }
1009 out_drop:
1010         d_drop(dentry);
1011         return 0;
1012 }
1013
1014 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1015 {
1016         struct inode *inode = dentry->d_inode;
1017         struct task_struct *task = proc_task(inode);
1018         int fd = proc_type(inode) - PROC_TID_FD_DIR;
1019         struct files_struct *files;
1020
1021         files = get_files_struct(task);
1022         if (files) {
1023                 spin_lock(&files->file_lock);
1024                 if (fcheck_files(files, fd)) {
1025                         spin_unlock(&files->file_lock);
1026                         put_files_struct(files);
1027                         if (task_dumpable(task)) {
1028                                 inode->i_uid = task->euid;
1029                                 inode->i_gid = task->egid;
1030                         } else {
1031                                 inode->i_uid = 0;
1032                                 inode->i_gid = 0;
1033                         }
1034                         security_task_to_inode(task, inode);
1035                         return 1;
1036                 }
1037                 spin_unlock(&files->file_lock);
1038                 put_files_struct(files);
1039         }
1040         d_drop(dentry);
1041         return 0;
1042 }
1043
1044 static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1045 {
1046         struct task_struct *task = proc_task(inode);
1047         spin_lock(&task->proc_lock);
1048         if (task->proc_dentry == dentry)
1049                 task->proc_dentry = NULL;
1050         spin_unlock(&task->proc_lock);
1051         iput(inode);
1052 }
1053
1054 static int pid_delete_dentry(struct dentry * dentry)
1055 {
1056         /* Is the task we represent dead?
1057          * If so, then don't put the dentry on the lru list,
1058          * kill it immediately.
1059          */
1060         return !pid_alive(proc_task(dentry->d_inode));
1061 }
1062
1063 static struct dentry_operations tid_fd_dentry_operations =
1064 {
1065         .d_revalidate   = tid_fd_revalidate,
1066         .d_delete       = pid_delete_dentry,
1067 };
1068
1069 static struct dentry_operations pid_dentry_operations =
1070 {
1071         .d_revalidate   = pid_revalidate,
1072         .d_delete       = pid_delete_dentry,
1073 };
1074
1075 static struct dentry_operations pid_base_dentry_operations =
1076 {
1077         .d_revalidate   = pid_revalidate,
1078         .d_iput         = pid_base_iput,
1079         .d_delete       = pid_delete_dentry,
1080 };
1081
1082 /* Lookups */
1083
1084 static unsigned name_to_int(struct dentry *dentry)
1085 {
1086         const char *name = dentry->d_name.name;
1087         int len = dentry->d_name.len;
1088         unsigned n = 0;
1089
1090         if (len > 1 && *name == '0')
1091                 goto out;
1092         while (len-- > 0) {
1093                 unsigned c = *name++ - '0';
1094                 if (c > 9)
1095                         goto out;
1096                 if (n >= (~0U-9)/10)
1097                         goto out;
1098                 n *= 10;
1099                 n += c;
1100         }
1101         return n;
1102 out:
1103         return ~0U;
1104 }
1105
1106 /* SMP-safe */
1107 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1108 {
1109         struct task_struct *task = proc_task(dir);
1110         unsigned fd = name_to_int(dentry);
1111         struct file * file;
1112         struct files_struct * files;
1113         struct inode *inode;
1114         struct proc_inode *ei;
1115
1116         if (fd == ~0U)
1117                 goto out;
1118         if (!pid_alive(task))
1119                 goto out;
1120
1121         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1122         if (!inode)
1123                 goto out;
1124         ei = PROC_I(inode);
1125         files = get_files_struct(task);
1126         if (!files)
1127                 goto out_unlock;
1128         inode->i_mode = S_IFLNK;
1129         spin_lock(&files->file_lock);
1130         file = fcheck_files(files, fd);
1131         if (!file)
1132                 goto out_unlock2;
1133         if (file->f_mode & 1)
1134                 inode->i_mode |= S_IRUSR | S_IXUSR;
1135         if (file->f_mode & 2)
1136                 inode->i_mode |= S_IWUSR | S_IXUSR;
1137         spin_unlock(&files->file_lock);
1138         put_files_struct(files);
1139         inode->i_op = &proc_pid_link_inode_operations;
1140         inode->i_size = 64;
1141         ei->op.proc_get_link = proc_fd_link;
1142         dentry->d_op = &tid_fd_dentry_operations;
1143         d_add(dentry, inode);
1144         return NULL;
1145
1146 out_unlock2:
1147         spin_unlock(&files->file_lock);
1148         put_files_struct(files);
1149 out_unlock:
1150         iput(inode);
1151 out:
1152         return ERR_PTR(-ENOENT);
1153 }
1154
1155 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1156 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1157
1158 static struct file_operations proc_fd_operations = {
1159         .read           = generic_read_dir,
1160         .readdir        = proc_readfd,
1161 };
1162
1163 static struct file_operations proc_task_operations = {
1164         .read           = generic_read_dir,
1165         .readdir        = proc_task_readdir,
1166 };
1167
1168 /*
1169  * proc directories can do almost nothing..
1170  */
1171 static struct inode_operations proc_fd_inode_operations = {
1172         .lookup         = proc_lookupfd,
1173         .permission     = proc_permission,
1174 };
1175
1176 static struct inode_operations proc_task_inode_operations = {
1177         .lookup         = proc_task_lookup,
1178         .permission     = proc_permission,
1179 };
1180
1181 #ifdef CONFIG_SECURITY
1182 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1183                                   size_t count, loff_t *ppos)
1184 {
1185         struct inode * inode = file->f_dentry->d_inode;
1186         unsigned long page;
1187         ssize_t length;
1188         ssize_t end;
1189         struct task_struct *task = proc_task(inode);
1190
1191         if (count > PAGE_SIZE)
1192                 count = PAGE_SIZE;
1193         if (!(page = __get_free_page(GFP_KERNEL)))
1194                 return -ENOMEM;
1195
1196         length = security_getprocattr(task, 
1197                                       (char*)file->f_dentry->d_name.name, 
1198                                       (void*)page, count);
1199         if (length < 0) {
1200                 free_page(page);
1201                 return length;
1202         }
1203         /* Static 4kB (or whatever) block capacity */
1204         if (*ppos >= length) {
1205                 free_page(page);
1206                 return 0;
1207         }
1208         if (count + *ppos > length)
1209                 count = length - *ppos;
1210         end = count + *ppos;
1211         if (copy_to_user(buf, (char *) page + *ppos, count))
1212                 count = -EFAULT;
1213         else
1214                 *ppos = end;
1215         free_page(page);
1216         return count;
1217 }
1218
1219 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1220                                    size_t count, loff_t *ppos)
1221
1222         struct inode * inode = file->f_dentry->d_inode;
1223         char *page; 
1224         ssize_t length; 
1225         struct task_struct *task = proc_task(inode); 
1226
1227         if (count > PAGE_SIZE) 
1228                 count = PAGE_SIZE; 
1229         if (*ppos != 0) {
1230                 /* No partial writes. */
1231                 return -EINVAL;
1232         }
1233         page = (char*)__get_free_page(GFP_USER); 
1234         if (!page) 
1235                 return -ENOMEM;
1236         length = -EFAULT; 
1237         if (copy_from_user(page, buf, count)) 
1238                 goto out;
1239
1240         length = security_setprocattr(task, 
1241                                       (char*)file->f_dentry->d_name.name, 
1242                                       (void*)page, count);
1243 out:
1244         free_page((unsigned long) page);
1245         return length;
1246
1247
1248 static struct file_operations proc_pid_attr_operations = {
1249         .read           = proc_pid_attr_read,
1250         .write          = proc_pid_attr_write,
1251 };
1252
1253 static struct file_operations proc_tid_attr_operations;
1254 static struct inode_operations proc_tid_attr_inode_operations;
1255 static struct file_operations proc_tgid_attr_operations;
1256 static struct inode_operations proc_tgid_attr_inode_operations;
1257 #endif
1258
1259 /* SMP-safe */
1260 static struct dentry *proc_pident_lookup(struct inode *dir, 
1261                                          struct dentry *dentry,
1262                                          struct pid_entry *ents)
1263 {
1264         struct inode *inode;
1265         int error;
1266         struct task_struct *task = proc_task(dir);
1267         struct pid_entry *p;
1268         struct proc_inode *ei;
1269
1270         error = -ENOENT;
1271         inode = NULL;
1272
1273         if (!pid_alive(task))
1274                 goto out;
1275
1276         for (p = ents; p->name; p++) {
1277                 if (p->len != dentry->d_name.len)
1278                         continue;
1279                 if (!memcmp(dentry->d_name.name, p->name, p->len))
1280                         break;
1281         }
1282         if (!p->name)
1283                 goto out;
1284
1285         error = -EINVAL;
1286         inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1287         if (!inode)
1288                 goto out;
1289
1290         ei = PROC_I(inode);
1291         inode->i_mode = p->mode;
1292         /*
1293          * Yes, it does not scale. And it should not. Don't add
1294          * new entries into /proc/<tgid>/ without very good reasons.
1295          */
1296         switch(p->type) {
1297                 case PROC_TGID_TASK:
1298                         inode->i_nlink = 3;
1299                         inode->i_op = &proc_task_inode_operations;
1300                         inode->i_fop = &proc_task_operations;
1301                         break;
1302                 case PROC_TID_FD:
1303                 case PROC_TGID_FD:
1304                         inode->i_nlink = 2;
1305                         inode->i_op = &proc_fd_inode_operations;
1306                         inode->i_fop = &proc_fd_operations;
1307                         break;
1308                 case PROC_TID_EXE:
1309                 case PROC_TGID_EXE:
1310                         inode->i_op = &proc_pid_link_inode_operations;
1311                         ei->op.proc_get_link = proc_exe_link;
1312                         break;
1313                 case PROC_TID_CWD:
1314                 case PROC_TGID_CWD:
1315                         inode->i_op = &proc_pid_link_inode_operations;
1316                         ei->op.proc_get_link = proc_cwd_link;
1317                         break;
1318                 case PROC_TID_ROOT:
1319                 case PROC_TGID_ROOT:
1320                         inode->i_op = &proc_pid_link_inode_operations;
1321                         ei->op.proc_get_link = proc_root_link;
1322                         break;
1323                 case PROC_TID_ENVIRON:
1324                 case PROC_TGID_ENVIRON:
1325                         inode->i_fop = &proc_info_file_operations;
1326                         ei->op.proc_read = proc_pid_environ;
1327                         break;
1328                 case PROC_TID_AUXV:
1329                 case PROC_TGID_AUXV:
1330                         inode->i_fop = &proc_info_file_operations;
1331                         ei->op.proc_read = proc_pid_auxv;
1332                         break;
1333                 case PROC_TID_STATUS:
1334                 case PROC_TGID_STATUS:
1335                         inode->i_fop = &proc_info_file_operations;
1336                         ei->op.proc_read = proc_pid_status;
1337                         break;
1338                 case PROC_TID_STAT:
1339                 case PROC_TGID_STAT:
1340                         inode->i_fop = &proc_info_file_operations;
1341                         ei->op.proc_read = proc_pid_stat;
1342                         break;
1343                 case PROC_TID_CMDLINE:
1344                 case PROC_TGID_CMDLINE:
1345                         inode->i_fop = &proc_info_file_operations;
1346                         ei->op.proc_read = proc_pid_cmdline;
1347                         break;
1348                 case PROC_TID_STATM:
1349                 case PROC_TGID_STATM:
1350                         inode->i_fop = &proc_info_file_operations;
1351                         ei->op.proc_read = proc_pid_statm;
1352                         break;
1353                 case PROC_TID_MAPS:
1354                 case PROC_TGID_MAPS:
1355                         inode->i_fop = &proc_maps_operations;
1356                         break;
1357                 case PROC_TID_MEM:
1358                 case PROC_TGID_MEM:
1359                         inode->i_op = &proc_mem_inode_operations;
1360                         inode->i_fop = &proc_mem_operations;
1361                         break;
1362                 case PROC_TID_MOUNTS:
1363                 case PROC_TGID_MOUNTS:
1364                         inode->i_fop = &proc_mounts_operations;
1365                         break;
1366 #ifdef CONFIG_SECURITY
1367                 case PROC_TID_ATTR:
1368                         inode->i_nlink = 2;
1369                         inode->i_op = &proc_tid_attr_inode_operations;
1370                         inode->i_fop = &proc_tid_attr_operations;
1371                         break;
1372                 case PROC_TGID_ATTR:
1373                         inode->i_nlink = 2;
1374                         inode->i_op = &proc_tgid_attr_inode_operations;
1375                         inode->i_fop = &proc_tgid_attr_operations;
1376                         break;
1377                 case PROC_TID_ATTR_CURRENT:
1378                 case PROC_TGID_ATTR_CURRENT:
1379                 case PROC_TID_ATTR_PREV:
1380                 case PROC_TGID_ATTR_PREV:
1381                 case PROC_TID_ATTR_EXEC:
1382                 case PROC_TGID_ATTR_EXEC:
1383                 case PROC_TID_ATTR_FSCREATE:
1384                 case PROC_TGID_ATTR_FSCREATE:
1385                         inode->i_fop = &proc_pid_attr_operations;
1386                         break;
1387 #endif
1388 #ifdef CONFIG_KALLSYMS
1389                 case PROC_TID_WCHAN:
1390                 case PROC_TGID_WCHAN:
1391                         inode->i_fop = &proc_info_file_operations;
1392                         ei->op.proc_read = proc_pid_wchan;
1393                         break;
1394 #endif
1395                 case PROC_TID_VX_INFO:
1396                 case PROC_TGID_VX_INFO:
1397                         inode->i_fop = &proc_info_file_operations;
1398                         ei->op.proc_read = proc_pid_vx_info;
1399                         break;
1400                 case PROC_TID_IP_INFO:
1401                 case PROC_TGID_IP_INFO:
1402                         inode->i_fop = &proc_info_file_operations;
1403                         ei->op.proc_read = proc_pid_nx_info;
1404                         break;
1405                 default:
1406                         printk("procfs: impossible type (%d)",p->type);
1407                         iput(inode);
1408                         return ERR_PTR(-EINVAL);
1409         }
1410         dentry->d_op = &pid_dentry_operations;
1411         d_add(dentry, inode);
1412         return NULL;
1413
1414 out:
1415         return ERR_PTR(error);
1416 }
1417
1418 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1419         return proc_pident_lookup(dir, dentry, tgid_base_stuff);
1420 }
1421
1422 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1423         return proc_pident_lookup(dir, dentry, tid_base_stuff);
1424 }
1425
1426 static struct file_operations proc_tgid_base_operations = {
1427         .read           = generic_read_dir,
1428         .readdir        = proc_tgid_base_readdir,
1429 };
1430
1431 static struct file_operations proc_tid_base_operations = {
1432         .read           = generic_read_dir,
1433         .readdir        = proc_tid_base_readdir,
1434 };
1435
1436 static struct inode_operations proc_tgid_base_inode_operations = {
1437         .lookup         = proc_tgid_base_lookup,
1438 };
1439
1440 static struct inode_operations proc_tid_base_inode_operations = {
1441         .lookup         = proc_tid_base_lookup,
1442 };
1443
1444 #ifdef CONFIG_SECURITY
1445 static int proc_tgid_attr_readdir(struct file * filp,
1446                              void * dirent, filldir_t filldir)
1447 {
1448         return proc_pident_readdir(filp,dirent,filldir,
1449                                    tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
1450 }
1451
1452 static int proc_tid_attr_readdir(struct file * filp,
1453                              void * dirent, filldir_t filldir)
1454 {
1455         return proc_pident_readdir(filp,dirent,filldir,
1456                                    tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff));
1457 }
1458
1459 static struct file_operations proc_tgid_attr_operations = {
1460         .read           = generic_read_dir,
1461         .readdir        = proc_tgid_attr_readdir,
1462 };
1463
1464 static struct file_operations proc_tid_attr_operations = {
1465         .read           = generic_read_dir,
1466         .readdir        = proc_tid_attr_readdir,
1467 };
1468
1469 static struct dentry *proc_tgid_attr_lookup(struct inode *dir,
1470                                 struct dentry *dentry, struct nameidata *nd)
1471 {
1472         return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
1473 }
1474
1475 static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1476                                 struct dentry *dentry, struct nameidata *nd)
1477 {
1478         return proc_pident_lookup(dir, dentry, tid_attr_stuff);
1479 }
1480
1481 static struct inode_operations proc_tgid_attr_inode_operations = {
1482         .lookup         = proc_tgid_attr_lookup,
1483 };
1484
1485 static struct inode_operations proc_tid_attr_inode_operations = {
1486         .lookup         = proc_tid_attr_lookup,
1487 };
1488 #endif
1489
1490 /*
1491  * /proc/self:
1492  */
1493 static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen)
1494 {
1495         char tmp[30];
1496         sprintf(tmp, "%d", current->tgid);
1497         return vfs_readlink(dentry,buffer,buflen,tmp);
1498 }
1499
1500 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1501 {
1502         char tmp[30];
1503         sprintf(tmp, "%d", current->tgid);
1504         return vfs_follow_link(nd,tmp);
1505 }       
1506
1507 static struct inode_operations proc_self_inode_operations = {
1508         .readlink       = proc_self_readlink,
1509         .follow_link    = proc_self_follow_link,
1510 };
1511
1512 /**
1513  * proc_pid_unhash -  Unhash /proc/<pid> entry from the dcache.
1514  * @p: task that should be flushed.
1515  *
1516  * Drops the /proc/<pid> dcache entry from the hash chains.
1517  *
1518  * Dropping /proc/<pid> entries and detach_pid must be synchroneous,
1519  * otherwise e.g. /proc/<pid>/exe might point to the wrong executable,
1520  * if the pid value is immediately reused. This is enforced by
1521  * - caller must acquire spin_lock(p->proc_lock)
1522  * - must be called before detach_pid()
1523  * - proc_pid_lookup acquires proc_lock, and checks that
1524  *   the target is not dead by looking at the attach count
1525  *   of PIDTYPE_PID.
1526  */
1527
1528 struct dentry *proc_pid_unhash(struct task_struct *p)
1529 {
1530         struct dentry *proc_dentry;
1531
1532         proc_dentry = p->proc_dentry;
1533         if (proc_dentry != NULL) {
1534
1535                 spin_lock(&dcache_lock);
1536                 if (!d_unhashed(proc_dentry)) {
1537                         dget_locked(proc_dentry);
1538                         __d_drop(proc_dentry);
1539                 } else
1540                         proc_dentry = NULL;
1541                 spin_unlock(&dcache_lock);
1542         }
1543         return proc_dentry;
1544 }
1545
1546 /**
1547  * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
1548  * @proc_entry: directoy to prune.
1549  *
1550  * Shrink the /proc directory that was used by the just killed thread.
1551  */
1552         
1553 void proc_pid_flush(struct dentry *proc_dentry)
1554 {
1555         if(proc_dentry != NULL) {
1556                 shrink_dcache_parent(proc_dentry);
1557                 dput(proc_dentry);
1558         }
1559 }
1560
1561 /* SMP-safe */
1562 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1563 {
1564         struct task_struct *task;
1565         struct inode *inode;
1566         struct proc_inode *ei;
1567         unsigned tgid;
1568         int died;
1569
1570         if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
1571                 inode = new_inode(dir->i_sb);
1572                 if (!inode)
1573                         return ERR_PTR(-ENOMEM);
1574                 ei = PROC_I(inode);
1575                 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1576                 inode->i_ino = fake_ino(0, PROC_TGID_INO);
1577                 ei->pde = NULL;
1578                 inode->i_mode = S_IFLNK|S_IRWXUGO;
1579                 inode->i_uid = inode->i_gid = 0;
1580                 inode->i_size = 64;
1581                 inode->i_op = &proc_self_inode_operations;
1582                 d_add(dentry, inode);
1583                 return NULL;
1584         }
1585         tgid = vx_rmap_tgid(current->vx_info, name_to_int(dentry));
1586         if (tgid == ~0U)
1587                 goto out;
1588
1589         read_lock(&tasklist_lock);
1590         task = find_task_by_pid(tgid);
1591         if (task)
1592                 get_task_struct(task);
1593         read_unlock(&tasklist_lock);
1594         if (!task)
1595                 goto out;
1596
1597         inode = NULL;
1598         if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
1599                 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
1600
1601         if (!inode) {
1602                 put_task_struct(task);
1603                 goto out;
1604         }
1605         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1606         inode->i_op = &proc_tgid_base_inode_operations;
1607         inode->i_fop = &proc_tgid_base_operations;
1608         inode->i_nlink = 3;
1609         inode->i_flags|=S_IMMUTABLE;
1610
1611         dentry->d_op = &pid_base_dentry_operations;
1612
1613         died = 0;
1614         d_add(dentry, inode);
1615         spin_lock(&task->proc_lock);
1616         task->proc_dentry = dentry;
1617         if (!pid_alive(task)) {
1618                 dentry = proc_pid_unhash(task);
1619                 died = 1;
1620         }
1621         spin_unlock(&task->proc_lock);
1622
1623         put_task_struct(task);
1624         if (died) {
1625                 proc_pid_flush(dentry);
1626                 goto out;
1627         }
1628         return NULL;
1629 out:
1630         return ERR_PTR(-ENOENT);
1631 }
1632
1633 /* SMP-safe */
1634 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1635 {
1636         struct task_struct *task;
1637         struct task_struct *leader = proc_task(dir);
1638         struct inode *inode;
1639         unsigned tid;
1640
1641         tid = vx_rmap_tgid(current->vx_info, name_to_int(dentry));
1642         if (tid == ~0U)
1643                 goto out;
1644
1645 /*      handle fakeinit */
1646
1647         read_lock(&tasklist_lock);
1648         task = find_task_by_pid(tid);
1649         if (task)
1650                 get_task_struct(task);
1651         read_unlock(&tasklist_lock);
1652         if (!task)
1653                 goto out;
1654         if (leader->tgid != task->tgid)
1655                 goto out_drop_task;
1656
1657         inode = NULL;
1658         if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
1659                 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
1660
1661         if (!inode)
1662                 goto out_drop_task;
1663         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1664         inode->i_op = &proc_tid_base_inode_operations;
1665         inode->i_fop = &proc_tid_base_operations;
1666         inode->i_nlink = 3;
1667         inode->i_flags|=S_IMMUTABLE;
1668
1669         dentry->d_op = &pid_base_dentry_operations;
1670
1671         d_add(dentry, inode);
1672
1673         put_task_struct(task);
1674         return NULL;
1675 out_drop_task:
1676         put_task_struct(task);
1677 out:
1678         return ERR_PTR(-ENOENT);
1679 }
1680
1681 #define PROC_NUMBUF 10
1682 #define PROC_MAXPIDS 20
1683
1684 /*
1685  * Get a few tgid's to return for filldir - we need to hold the
1686  * tasklist lock while doing this, and we must release it before
1687  * we actually do the filldir itself, so we use a temp buffer..
1688  */
1689 static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
1690 {
1691         struct task_struct *p;
1692         int nr_tgids = 0;
1693
1694         index--;
1695         read_lock(&tasklist_lock);
1696         p = NULL;
1697         if (version) {
1698                 p = find_task_by_pid(version);
1699                 if (!thread_group_leader(p))
1700                         p = NULL;
1701         }
1702
1703         if (p)
1704                 index = 0;
1705         else
1706                 p = next_task(&init_task);
1707
1708         for ( ; p != &init_task; p = next_task(p)) {
1709                 int tgid = p->pid;
1710
1711                 if (!pid_alive(p))
1712                         continue;
1713                 if (!vx_check(vx_task_xid(p), VX_WATCH|VX_IDENT))
1714                         continue;
1715                 if (--index >= 0)
1716                         continue;
1717                 tgids[nr_tgids] = vx_map_tgid(current->vx_info, tgid);
1718                 nr_tgids++;
1719                 if (nr_tgids >= PROC_MAXPIDS)
1720                         break;
1721         }
1722         read_unlock(&tasklist_lock);
1723         return nr_tgids;
1724 }
1725
1726 /*
1727  * Get a few tid's to return for filldir - we need to hold the
1728  * tasklist lock while doing this, and we must release it before
1729  * we actually do the filldir itself, so we use a temp buffer..
1730  */
1731 static int get_tid_list(int index, unsigned int *tids, struct inode *dir)
1732 {
1733         struct task_struct *leader_task = proc_task(dir);
1734         struct task_struct *task = leader_task;
1735         int nr_tids = 0;
1736
1737         index -= 2;
1738         read_lock(&tasklist_lock);
1739         /*
1740          * The starting point task (leader_task) might be an already
1741          * unlinked task, which cannot be used to access the task-list
1742          * via next_thread().
1743          */
1744         if (pid_alive(task)) do {
1745                 int tid = task->pid;
1746
1747                 if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
1748                         continue;
1749                 if (--index >= 0)
1750                         continue;
1751                 tids[nr_tids] = vx_map_tgid(current->vx_info, tid);
1752                 nr_tids++;
1753                 if (nr_tids >= PROC_MAXPIDS)
1754                         break;
1755         } while ((task = next_thread(task)) != leader_task);
1756         read_unlock(&tasklist_lock);
1757         return nr_tids;
1758 }
1759
1760 /* for the /proc/ directory itself, after non-process stuff has been done */
1761 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1762 {
1763         unsigned int tgid_array[PROC_MAXPIDS];
1764         char buf[PROC_NUMBUF];
1765         unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1766         unsigned int nr_tgids, i;
1767
1768         if (!nr) {
1769                 ino_t ino = fake_ino(0,PROC_TGID_INO);
1770                 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1771                         return 0;
1772                 filp->f_pos++;
1773                 nr++;
1774         }
1775
1776         /*
1777          * f_version caches the last tgid which was returned from readdir
1778          */
1779         nr_tgids = get_tgid_list(nr, filp->f_version, tgid_array);
1780
1781         for (i = 0; i < nr_tgids; i++) {
1782                 int tgid = tgid_array[i];
1783                 ino_t ino = fake_ino(tgid,PROC_TGID_INO);
1784                 unsigned long j = PROC_NUMBUF;
1785
1786                 do buf[--j] = '0' + (tgid % 10); while (tgid/=10);
1787
1788                 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
1789                         filp->f_version = tgid;
1790                         break;
1791                 }
1792                 filp->f_pos++;
1793         }
1794         return 0;
1795 }
1796
1797 /* for the /proc/TGID/task/ directories */
1798 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
1799 {
1800         unsigned int tid_array[PROC_MAXPIDS];
1801         char buf[PROC_NUMBUF];
1802         unsigned int nr_tids, i;
1803         struct dentry *dentry = filp->f_dentry;
1804         struct inode *inode = dentry->d_inode;
1805         struct task_struct *task = proc_task(inode);
1806         int retval = -ENOENT;
1807         ino_t ino;
1808         unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
1809
1810         if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
1811                 goto out;
1812         if (!pid_alive(task))
1813                 goto out;
1814         retval = 0;
1815
1816         switch (pos) {
1817         case 0:
1818                 ino = inode->i_ino;
1819                 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
1820                         goto out;
1821                 pos++;
1822                 /* fall through */
1823         case 1:
1824                 ino = parent_ino(dentry);
1825                 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
1826                         goto out;
1827                 pos++;
1828                 /* fall through */
1829         }
1830
1831         nr_tids = get_tid_list(pos, tid_array, inode);
1832
1833         for (i = 0; i < nr_tids; i++) {
1834                 unsigned long j = PROC_NUMBUF;
1835                 int tid = tid_array[i];
1836
1837                 ino = fake_ino(tid,PROC_TID_INO);
1838
1839                 do
1840                         buf[--j] = '0' + (tid % 10);
1841                 while (tid /= 10);
1842
1843                 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0)
1844                         break;
1845                 pos++;
1846         }
1847 out:
1848         filp->f_pos = pos;
1849         return retval;
1850 }