ckrm E17 memory controller
[linux-2.6.git] / fs / proc / base.c
1 /*
2  *  linux/fs/proc/base.c
3  *
4  *  Copyright (C) 1991, 1992 Linus Torvalds
5  *
6  *  proc base directory handling functions
7  *
8  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9  *  Instead of using magical inumbers to determine the kind of object
10  *  we allocate and fill in-core inodes upon lookup. They don't even
11  *  go into icache. We cache the reference to task_struct upon lookup too.
12  *  Eventually it should become a filesystem in its own. We don't use the
13  *  rest of procfs anymore.
14  */
15
16 #include <asm/uaccess.h>
17
18 #include <linux/config.h>
19 #include <linux/errno.h>
20 #include <linux/time.h>
21 #include <linux/proc_fs.h>
22 #include <linux/stat.h>
23 #include <linux/init.h>
24 #include <linux/file.h>
25 #include <linux/string.h>
26 #include <linux/seq_file.h>
27 #include <linux/namei.h>
28 #include <linux/namespace.h>
29 #include <linux/mm.h>
30 #include <linux/smp_lock.h>
31 #include <linux/kallsyms.h>
32 #include <linux/mount.h>
33 #include <linux/security.h>
34 #include <linux/ptrace.h>
35
36 /*
37  * For hysterical raisins we keep the same inumbers as in the old procfs.
38  * Feel free to change the macro below - just keep the range distinct from
39  * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
40  * As soon as we'll get a separate superblock we will be able to forget
41  * about magical ranges too.
42  */
43
44 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
45
46 enum pid_directory_inos {
47         PROC_TGID_INO = 2,
48         PROC_TGID_TASK,
49         PROC_TGID_STATUS,
50         PROC_TGID_MEM,
51         PROC_TGID_CWD,
52         PROC_TGID_ROOT,
53         PROC_TGID_EXE,
54         PROC_TGID_FD,
55         PROC_TGID_ENVIRON,
56         PROC_TGID_AUXV,
57         PROC_TGID_CMDLINE,
58         PROC_TGID_STAT,
59         PROC_TGID_STATM,
60         PROC_TGID_MAPS,
61         PROC_TGID_MOUNTS,
62         PROC_TGID_WCHAN,
63 #ifdef CONFIG_SCHEDSTATS
64         PROC_TGID_SCHEDSTAT,
65 #endif
66 #ifdef CONFIG_SECURITY
67         PROC_TGID_ATTR,
68         PROC_TGID_ATTR_CURRENT,
69         PROC_TGID_ATTR_PREV,
70         PROC_TGID_ATTR_EXEC,
71         PROC_TGID_ATTR_FSCREATE,
72 #endif
73         PROC_TGID_FD_DIR,
74         PROC_TID_INO,
75         PROC_TID_STATUS,
76         PROC_TID_MEM,
77         PROC_TID_CWD,
78         PROC_TID_ROOT,
79         PROC_TID_EXE,
80         PROC_TID_FD,
81         PROC_TID_ENVIRON,
82         PROC_TID_AUXV,
83         PROC_TID_CMDLINE,
84         PROC_TID_STAT,
85         PROC_TID_STATM,
86         PROC_TID_MAPS,
87         PROC_TID_MOUNTS,
88         PROC_TID_WCHAN,
89 #ifdef CONFIG_SCHEDSTATS
90         PROC_TID_SCHEDSTAT,
91 #endif
92 #ifdef CONFIG_SECURITY
93         PROC_TID_ATTR,
94         PROC_TID_ATTR_CURRENT,
95         PROC_TID_ATTR_PREV,
96         PROC_TID_ATTR_EXEC,
97         PROC_TID_ATTR_FSCREATE,
98 #endif
99 #ifdef CONFIG_DELAY_ACCT
100         PROC_TID_DELAY_ACCT,
101         PROC_TGID_DELAY_ACCT,
102 #endif
103         PROC_TID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
104 };
105
106 struct pid_entry {
107         int type;
108         int len;
109         char *name;
110         mode_t mode;
111 };
112
113 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
114
115 static struct pid_entry tgid_base_stuff[] = {
116         E(PROC_TGID_TASK,      "task",    S_IFDIR|S_IRUGO|S_IXUGO),
117         E(PROC_TGID_FD,        "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
118         E(PROC_TGID_ENVIRON,   "environ", S_IFREG|S_IRUSR),
119         E(PROC_TGID_AUXV,      "auxv",    S_IFREG|S_IRUSR),
120         E(PROC_TGID_STATUS,    "status",  S_IFREG|S_IRUGO),
121         E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO),
122         E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
123         E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
124         E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
125         E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
126         E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
127         E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
128         E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
129         E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
130 #ifdef CONFIG_SECURITY
131         E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
132 #endif
133 #ifdef CONFIG_DELAY_ACCT
134         E(PROC_TGID_DELAY_ACCT,"delay",   S_IFREG|S_IRUGO),
135 #endif
136 #ifdef CONFIG_KALLSYMS
137         E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
138 #endif
139 #ifdef CONFIG_SCHEDSTATS
140         E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
141 #endif
142         {0,0,NULL,0}
143 };
144 static struct pid_entry tid_base_stuff[] = {
145         E(PROC_TID_FD,         "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
146         E(PROC_TID_ENVIRON,    "environ", S_IFREG|S_IRUSR),
147         E(PROC_TID_AUXV,       "auxv",    S_IFREG|S_IRUSR),
148         E(PROC_TID_STATUS,     "status",  S_IFREG|S_IRUGO),
149         E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO),
150         E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
151         E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
152         E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
153         E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
154         E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
155         E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
156         E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
157         E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
158 #ifdef CONFIG_SECURITY
159         E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
160 #endif
161 #ifdef CONFIG_DELAY_ACCT
162         E(PROC_TGID_DELAY_ACCT,"delay",   S_IFREG|S_IRUGO),
163 #endif
164 #ifdef CONFIG_KALLSYMS
165         E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
166 #endif
167 #ifdef CONFIG_SCHEDSTATS
168         E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
169 #endif
170         {0,0,NULL,0}
171 };
172
173 #ifdef CONFIG_SECURITY
174 static struct pid_entry tgid_attr_stuff[] = {
175         E(PROC_TGID_ATTR_CURRENT,  "current",  S_IFREG|S_IRUGO|S_IWUGO),
176         E(PROC_TGID_ATTR_PREV,     "prev",     S_IFREG|S_IRUGO),
177         E(PROC_TGID_ATTR_EXEC,     "exec",     S_IFREG|S_IRUGO|S_IWUGO),
178         E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
179         {0,0,NULL,0}
180 };
181 static struct pid_entry tid_attr_stuff[] = {
182         E(PROC_TID_ATTR_CURRENT,   "current",  S_IFREG|S_IRUGO|S_IWUGO),
183         E(PROC_TID_ATTR_PREV,      "prev",     S_IFREG|S_IRUGO),
184         E(PROC_TID_ATTR_EXEC,      "exec",     S_IFREG|S_IRUGO|S_IWUGO),
185         E(PROC_TID_ATTR_FSCREATE,  "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
186         {0,0,NULL,0}
187 };
188 #endif
189
190 #undef E
191
192 static inline struct task_struct *proc_task(struct inode *inode)
193 {
194         return PROC_I(inode)->task;
195 }
196
197 static inline int proc_type(struct inode *inode)
198 {
199         return PROC_I(inode)->type;
200 }
201
202 int proc_tid_stat(struct task_struct*,char*);
203 int proc_tgid_stat(struct task_struct*,char*);
204 int proc_pid_status(struct task_struct*,char*);
205 int proc_pid_statm(struct task_struct*,char*);
206 int proc_pid_delay(struct task_struct*,char*);
207
208 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
209 {
210         struct task_struct *task = proc_task(inode);
211         struct files_struct *files;
212         struct file *file;
213         int fd = proc_type(inode) - PROC_TID_FD_DIR;
214
215         files = get_files_struct(task);
216         if (files) {
217                 spin_lock(&files->file_lock);
218                 file = fcheck_files(files, fd);
219                 if (file) {
220                         *mnt = mntget(file->f_vfsmnt);
221                         *dentry = dget(file->f_dentry);
222                         spin_unlock(&files->file_lock);
223                         put_files_struct(files);
224                         return 0;
225                 }
226                 spin_unlock(&files->file_lock);
227                 put_files_struct(files);
228         }
229         return -ENOENT;
230 }
231
232 static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
233 {
234         struct vm_area_struct * vma;
235         int result = -ENOENT;
236         struct task_struct *task = proc_task(inode);
237         struct mm_struct * mm = get_task_mm(task);
238
239         if (!mm)
240                 goto out;
241         down_read(&mm->mmap_sem);
242         vma = mm->mmap;
243         while (vma) {
244                 if ((vma->vm_flags & VM_EXECUTABLE) && 
245                     vma->vm_file) {
246                         *mnt = mntget(vma->vm_file->f_vfsmnt);
247                         *dentry = dget(vma->vm_file->f_dentry);
248                         result = 0;
249                         break;
250                 }
251                 vma = vma->vm_next;
252         }
253         up_read(&mm->mmap_sem);
254         mmput(mm);
255 out:
256         return result;
257 }
258
259 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
260 {
261         struct fs_struct *fs;
262         int result = -ENOENT;
263         task_lock(proc_task(inode));
264         fs = proc_task(inode)->fs;
265         if(fs)
266                 atomic_inc(&fs->count);
267         task_unlock(proc_task(inode));
268         if (fs) {
269                 read_lock(&fs->lock);
270                 *mnt = mntget(fs->pwdmnt);
271                 *dentry = dget(fs->pwd);
272                 read_unlock(&fs->lock);
273                 result = 0;
274                 put_fs_struct(fs);
275         }
276         return result;
277 }
278
279 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
280 {
281         struct fs_struct *fs;
282         int result = -ENOENT;
283         task_lock(proc_task(inode));
284         fs = proc_task(inode)->fs;
285         if(fs)
286                 atomic_inc(&fs->count);
287         task_unlock(proc_task(inode));
288         if (fs) {
289                 read_lock(&fs->lock);
290                 *mnt = mntget(fs->rootmnt);
291                 *dentry = dget(fs->root);
292                 read_unlock(&fs->lock);
293                 result = 0;
294                 put_fs_struct(fs);
295         }
296         return result;
297 }
298
299 #define MAY_PTRACE(task) \
300         (task == current || \
301         (task->parent == current && \
302         (task->ptrace & PT_PTRACED) && \
303          (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
304          security_ptrace(current,task) == 0))
305
306 static int may_ptrace_attach(struct task_struct *task)
307 {
308         int retval = 0;
309
310         task_lock(task);
311
312         if (!task->mm)
313                 goto out;
314         if (((current->uid != task->euid) ||
315              (current->uid != task->suid) ||
316              (current->uid != task->uid) ||
317              (current->gid != task->egid) ||
318              (current->gid != task->sgid) ||
319              (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
320                 goto out;
321         rmb();
322         if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
323                 goto out;
324         if (security_ptrace(current, task))
325                 goto out;
326
327         retval = 1;
328 out:
329         task_unlock(task);
330         return retval;
331 }
332
333 static int proc_pid_environ(struct task_struct *task, char * buffer)
334 {
335         int res = 0;
336         struct mm_struct *mm = get_task_mm(task);
337         if (mm) {
338                 unsigned int len = mm->env_end - mm->env_start;
339                 if (len > PAGE_SIZE)
340                         len = PAGE_SIZE;
341                 res = access_process_vm(task, mm->env_start, buffer, len, 0);
342                 if (!may_ptrace_attach(task))
343                         res = -ESRCH;
344                 mmput(mm);
345         }
346         return res;
347 }
348
349 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
350 {
351         int res = 0;
352         unsigned int len;
353         struct mm_struct *mm = get_task_mm(task);
354         if (!mm)
355                 goto out;
356         if (!mm->arg_end)
357                 goto out_mm;    /* Shh! No looking before we're done */
358
359         len = mm->arg_end - mm->arg_start;
360  
361         if (len > PAGE_SIZE)
362                 len = PAGE_SIZE;
363  
364         res = access_process_vm(task, mm->arg_start, buffer, len, 0);
365
366         // If the nul at the end of args has been overwritten, then
367         // assume application is using setproctitle(3).
368         if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
369                 len = strnlen(buffer, res);
370                 if (len < res) {
371                     res = len;
372                 } else {
373                         len = mm->env_end - mm->env_start;
374                         if (len > PAGE_SIZE - res)
375                                 len = PAGE_SIZE - res;
376                         res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
377                         res = strnlen(buffer, res);
378                 }
379         }
380 out_mm:
381         mmput(mm);
382 out:
383         return res;
384 }
385
386 static int proc_pid_auxv(struct task_struct *task, char *buffer)
387 {
388         int res = 0;
389         struct mm_struct *mm = get_task_mm(task);
390         if (mm) {
391                 unsigned int nwords = 0;
392                 do
393                         nwords += 2;
394                 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
395                 res = nwords * sizeof(mm->saved_auxv[0]);
396                 if (res > PAGE_SIZE)
397                         res = PAGE_SIZE;
398                 memcpy(buffer, mm->saved_auxv, res);
399                 mmput(mm);
400         }
401         return res;
402 }
403
404
405 #ifdef CONFIG_KALLSYMS
406 /*
407  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
408  * Returns the resolved symbol.  If that fails, simply return the address.
409  */
410 static int proc_pid_wchan(struct task_struct *task, char *buffer)
411 {
412         char *modname;
413         const char *sym_name;
414         unsigned long wchan, size, offset;
415         char namebuf[128];
416
417         wchan = get_wchan(task);
418
419         sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf);
420         if (sym_name)
421                 return sprintf(buffer, "%s", sym_name);
422         return sprintf(buffer, "%lu", wchan);
423 }
424 #endif /* CONFIG_KALLSYMS */
425
426 #ifdef CONFIG_SCHEDSTATS
427 /*
428  * Provides /proc/PID/schedstat
429  */
430 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
431 {
432         return sprintf(buffer, "%lu %lu %lu\n",
433                         task->sched_info.cpu_time,
434                         task->sched_info.run_delay,
435                         task->sched_info.pcnt);
436 }
437 #endif
438
439 /************************************************************************/
440 /*                       Here the fs part begins                        */
441 /************************************************************************/
442
443 /* permission checks */
444
445 static int proc_check_root(struct inode *inode)
446 {
447         struct dentry *de, *base, *root;
448         struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
449         int res = 0;
450
451         if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
452                 return -ENOENT;
453         read_lock(&current->fs->lock);
454         our_vfsmnt = mntget(current->fs->rootmnt);
455         base = dget(current->fs->root);
456         read_unlock(&current->fs->lock);
457
458         spin_lock(&vfsmount_lock);
459         de = root;
460         mnt = vfsmnt;
461
462         while (vfsmnt != our_vfsmnt) {
463                 if (vfsmnt == vfsmnt->mnt_parent)
464                         goto out;
465                 de = vfsmnt->mnt_mountpoint;
466                 vfsmnt = vfsmnt->mnt_parent;
467         }
468
469         if (!is_subdir(de, base))
470                 goto out;
471         spin_unlock(&vfsmount_lock);
472
473 exit:
474         dput(base);
475         mntput(our_vfsmnt);
476         dput(root);
477         mntput(mnt);
478         return res;
479 out:
480         spin_unlock(&vfsmount_lock);
481         res = -EACCES;
482         goto exit;
483 }
484
485 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
486 {
487         if (generic_permission(inode, mask, NULL) != 0)
488                 return -EACCES;
489         return proc_check_root(inode);
490 }
491
492 extern struct seq_operations proc_pid_maps_op;
493 static int maps_open(struct inode *inode, struct file *file)
494 {
495         struct task_struct *task = proc_task(inode);
496         int ret = seq_open(file, &proc_pid_maps_op);
497         if (!ret) {
498                 struct seq_file *m = file->private_data;
499                 m->private = task;
500         }
501         return ret;
502 }
503
504 static struct file_operations proc_maps_operations = {
505         .open           = maps_open,
506         .read           = seq_read,
507         .llseek         = seq_lseek,
508         .release        = seq_release,
509 };
510
511 extern struct seq_operations mounts_op;
512 static int mounts_open(struct inode *inode, struct file *file)
513 {
514         struct task_struct *task = proc_task(inode);
515         int ret = seq_open(file, &mounts_op);
516
517         if (!ret) {
518                 struct seq_file *m = file->private_data;
519                 struct namespace *namespace;
520                 task_lock(task);
521                 namespace = task->namespace;
522                 if (namespace)
523                         get_namespace(namespace);
524                 task_unlock(task);
525
526                 if (namespace)
527                         m->private = namespace;
528                 else {
529                         seq_release(inode, file);
530                         ret = -EINVAL;
531                 }
532         }
533         return ret;
534 }
535
536 static int mounts_release(struct inode *inode, struct file *file)
537 {
538         struct seq_file *m = file->private_data;
539         struct namespace *namespace = m->private;
540         put_namespace(namespace);
541         return seq_release(inode, file);
542 }
543
544 static struct file_operations proc_mounts_operations = {
545         .open           = mounts_open,
546         .read           = seq_read,
547         .llseek         = seq_lseek,
548         .release        = mounts_release,
549 };
550
551 #define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
552
553 static ssize_t proc_info_read(struct file * file, char __user * buf,
554                           size_t count, loff_t *ppos)
555 {
556         struct inode * inode = file->f_dentry->d_inode;
557         unsigned long page;
558         ssize_t length;
559         struct task_struct *task = proc_task(inode);
560
561         if (count > PROC_BLOCK_SIZE)
562                 count = PROC_BLOCK_SIZE;
563         if (!(page = __get_free_page(GFP_KERNEL)))
564                 return -ENOMEM;
565
566         length = PROC_I(inode)->op.proc_read(task, (char*)page);
567
568         if (length >= 0)
569                 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
570         free_page(page);
571         return length;
572 }
573
574 static struct file_operations proc_info_file_operations = {
575         .read           = proc_info_read,
576 };
577
578 static int mem_open(struct inode* inode, struct file* file)
579 {
580         file->private_data = (void*)((long)current->self_exec_id);
581         return 0;
582 }
583
584 static ssize_t mem_read(struct file * file, char __user * buf,
585                         size_t count, loff_t *ppos)
586 {
587         struct task_struct *task = proc_task(file->f_dentry->d_inode);
588         char *page;
589         unsigned long src = *ppos;
590         int ret = -ESRCH;
591         struct mm_struct *mm;
592
593         if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
594                 goto out;
595
596         ret = -ENOMEM;
597         page = (char *)__get_free_page(GFP_USER);
598         if (!page)
599                 goto out;
600
601         ret = 0;
602  
603         mm = get_task_mm(task);
604         if (!mm)
605                 goto out_free;
606
607         ret = -EIO;
608  
609         if (file->private_data != (void*)((long)current->self_exec_id))
610                 goto out_put;
611
612         ret = 0;
613  
614         while (count > 0) {
615                 int this_len, retval;
616
617                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
618                 retval = access_process_vm(task, src, page, this_len, 0);
619                 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) {
620                         if (!ret)
621                                 ret = -EIO;
622                         break;
623                 }
624
625                 if (copy_to_user(buf, page, retval)) {
626                         ret = -EFAULT;
627                         break;
628                 }
629  
630                 ret += retval;
631                 src += retval;
632                 buf += retval;
633                 count -= retval;
634         }
635         *ppos = src;
636
637 out_put:
638         mmput(mm);
639 out_free:
640         free_page((unsigned long) page);
641 out:
642         return ret;
643 }
644
645 #define mem_write NULL
646
647 #ifndef mem_write
648 /* This is a security hazard */
649 static ssize_t mem_write(struct file * file, const char * buf,
650                          size_t count, loff_t *ppos)
651 {
652         int copied = 0;
653         char *page;
654         struct task_struct *task = proc_task(file->f_dentry->d_inode);
655         unsigned long dst = *ppos;
656
657         if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
658                 return -ESRCH;
659
660         page = (char *)__get_free_page(GFP_USER);
661         if (!page)
662                 return -ENOMEM;
663
664         while (count > 0) {
665                 int this_len, retval;
666
667                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
668                 if (copy_from_user(page, buf, this_len)) {
669                         copied = -EFAULT;
670                         break;
671                 }
672                 retval = access_process_vm(task, dst, page, this_len, 1);
673                 if (!retval) {
674                         if (!copied)
675                                 copied = -EIO;
676                         break;
677                 }
678                 copied += retval;
679                 buf += retval;
680                 dst += retval;
681                 count -= retval;                        
682         }
683         *ppos = dst;
684         free_page((unsigned long) page);
685         return copied;
686 }
687 #endif
688
689 static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
690 {
691         switch (orig) {
692         case 0:
693                 file->f_pos = offset;
694                 break;
695         case 1:
696                 file->f_pos += offset;
697                 break;
698         default:
699                 return -EINVAL;
700         }
701         force_successful_syscall_return();
702         return file->f_pos;
703 }
704
705 static struct file_operations proc_mem_operations = {
706         .llseek         = mem_lseek,
707         .read           = mem_read,
708         .write          = mem_write,
709         .open           = mem_open,
710 };
711
712 static struct inode_operations proc_mem_inode_operations = {
713         .permission     = proc_permission,
714 };
715
716 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
717 {
718         struct inode *inode = dentry->d_inode;
719         int error = -EACCES;
720
721         /* We don't need a base pointer in the /proc filesystem */
722         path_release(nd);
723
724         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
725                 goto out;
726         error = proc_check_root(inode);
727         if (error)
728                 goto out;
729
730         error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
731         nd->last_type = LAST_BIND;
732 out:
733         return error;
734 }
735
736 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
737                             char __user *buffer, int buflen)
738 {
739         struct inode * inode;
740         char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
741         int len;
742
743         if (!tmp)
744                 return -ENOMEM;
745                 
746         inode = dentry->d_inode;
747         path = d_path(dentry, mnt, tmp, PAGE_SIZE);
748         len = PTR_ERR(path);
749         if (IS_ERR(path))
750                 goto out;
751         len = tmp + PAGE_SIZE - 1 - path;
752
753         if (len > buflen)
754                 len = buflen;
755         if (copy_to_user(buffer, path, len))
756                 len = -EFAULT;
757  out:
758         free_page((unsigned long)tmp);
759         return len;
760 }
761
762 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
763 {
764         int error = -EACCES;
765         struct inode *inode = dentry->d_inode;
766         struct dentry *de;
767         struct vfsmount *mnt = NULL;
768
769         lock_kernel();
770
771         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
772                 goto out;
773         error = proc_check_root(inode);
774         if (error)
775                 goto out;
776
777         error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
778         if (error)
779                 goto out;
780
781         error = do_proc_readlink(de, mnt, buffer, buflen);
782         dput(de);
783         mntput(mnt);
784 out:
785         unlock_kernel();
786         return error;
787 }
788
789 static struct inode_operations proc_pid_link_inode_operations = {
790         .readlink       = proc_pid_readlink,
791         .follow_link    = proc_pid_follow_link
792 };
793
794 #define NUMBUF 10
795
796 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
797 {
798         struct inode *inode = filp->f_dentry->d_inode;
799         struct task_struct *p = proc_task(inode);
800         unsigned int fd, tid, ino;
801         int retval;
802         char buf[NUMBUF];
803         struct files_struct * files;
804
805         retval = -ENOENT;
806         if (!pid_alive(p))
807                 goto out;
808         retval = 0;
809         tid = p->pid;
810
811         fd = filp->f_pos;
812         switch (fd) {
813                 case 0:
814                         if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
815                                 goto out;
816                         filp->f_pos++;
817                 case 1:
818                         ino = fake_ino(tid, PROC_TID_INO);
819                         if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
820                                 goto out;
821                         filp->f_pos++;
822                 default:
823                         files = get_files_struct(p);
824                         if (!files)
825                                 goto out;
826                         spin_lock(&files->file_lock);
827                         for (fd = filp->f_pos-2;
828                              fd < files->max_fds;
829                              fd++, filp->f_pos++) {
830                                 unsigned int i,j;
831
832                                 if (!fcheck_files(files, fd))
833                                         continue;
834                                 spin_unlock(&files->file_lock);
835
836                                 j = NUMBUF;
837                                 i = fd;
838                                 do {
839                                         j--;
840                                         buf[j] = '0' + (i % 10);
841                                         i /= 10;
842                                 } while (i);
843
844                                 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
845                                 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
846                                         spin_lock(&files->file_lock);
847                                         break;
848                                 }
849                                 spin_lock(&files->file_lock);
850                         }
851                         spin_unlock(&files->file_lock);
852                         put_files_struct(files);
853         }
854 out:
855         return retval;
856 }
857
858 static int proc_pident_readdir(struct file *filp,
859                 void *dirent, filldir_t filldir,
860                 struct pid_entry *ents, unsigned int nents)
861 {
862         int i;
863         int pid;
864         struct dentry *dentry = filp->f_dentry;
865         struct inode *inode = dentry->d_inode;
866         struct pid_entry *p;
867         ino_t ino;
868         int ret;
869
870         ret = -ENOENT;
871         if (!pid_alive(proc_task(inode)))
872                 goto out;
873
874         ret = 0;
875         pid = proc_task(inode)->pid;
876         i = filp->f_pos;
877         switch (i) {
878         case 0:
879                 ino = inode->i_ino;
880                 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
881                         goto out;
882                 i++;
883                 filp->f_pos++;
884                 /* fall through */
885         case 1:
886                 ino = parent_ino(dentry);
887                 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
888                         goto out;
889                 i++;
890                 filp->f_pos++;
891                 /* fall through */
892         default:
893                 i -= 2;
894                 if (i >= nents) {
895                         ret = 1;
896                         goto out;
897                 }
898                 p = ents + i;
899                 while (p->name) {
900                         if (filldir(dirent, p->name, p->len, filp->f_pos,
901                                     fake_ino(pid, p->type), p->mode >> 12) < 0)
902                                 goto out;
903                         filp->f_pos++;
904                         p++;
905                 }
906         }
907
908         ret = 1;
909 out:
910         return ret;
911 }
912
913 static int proc_tgid_base_readdir(struct file * filp,
914                              void * dirent, filldir_t filldir)
915 {
916         return proc_pident_readdir(filp,dirent,filldir,
917                                    tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
918 }
919
920 static int proc_tid_base_readdir(struct file * filp,
921                              void * dirent, filldir_t filldir)
922 {
923         return proc_pident_readdir(filp,dirent,filldir,
924                                    tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
925 }
926
927 /* building an inode */
928
929 static int task_dumpable(struct task_struct *task)
930 {
931         int dumpable = 0;
932         struct mm_struct *mm;
933
934         task_lock(task);
935         mm = task->mm;
936         if (mm)
937                 dumpable = mm->dumpable;
938         task_unlock(task);
939         return dumpable;
940 }
941
942
943 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
944 {
945         struct inode * inode;
946         struct proc_inode *ei;
947
948         /* We need a new inode */
949         
950         inode = new_inode(sb);
951         if (!inode)
952                 goto out;
953
954         /* Common stuff */
955         ei = PROC_I(inode);
956         ei->task = NULL;
957         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
958         inode->i_ino = fake_ino(task->pid, ino);
959
960         if (!pid_alive(task))
961                 goto out_unlock;
962
963         /*
964          * grab the reference to task.
965          */
966         get_task_struct(task);
967         ei->task = task;
968         ei->type = ino;
969         inode->i_uid = 0;
970         inode->i_gid = 0;
971         if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) {
972                 inode->i_uid = task->euid;
973                 inode->i_gid = task->egid;
974         }
975         security_task_to_inode(task, inode);
976
977 out:
978         return inode;
979
980 out_unlock:
981         ei->pde = NULL;
982         iput(inode);
983         return NULL;
984 }
985
986 /* dentry stuff */
987
988 /*
989  *      Exceptional case: normally we are not allowed to unhash a busy
990  * directory. In this case, however, we can do it - no aliasing problems
991  * due to the way we treat inodes.
992  *
993  * Rewrite the inode's ownerships here because the owning task may have
994  * performed a setuid(), etc.
995  */
996 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
997 {
998         struct inode *inode = dentry->d_inode;
999         struct task_struct *task = proc_task(inode);
1000         if (pid_alive(task)) {
1001                 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
1002                         inode->i_uid = task->euid;
1003                         inode->i_gid = task->egid;
1004                 } else {
1005                         inode->i_uid = 0;
1006                         inode->i_gid = 0;
1007                 }
1008                 security_task_to_inode(task, inode);
1009                 return 1;
1010         }
1011         d_drop(dentry);
1012         return 0;
1013 }
1014
1015 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1016 {
1017         struct inode *inode = dentry->d_inode;
1018         struct task_struct *task = proc_task(inode);
1019         int fd = proc_type(inode) - PROC_TID_FD_DIR;
1020         struct files_struct *files;
1021
1022         files = get_files_struct(task);
1023         if (files) {
1024                 spin_lock(&files->file_lock);
1025                 if (fcheck_files(files, fd)) {
1026                         spin_unlock(&files->file_lock);
1027                         put_files_struct(files);
1028                         if (task_dumpable(task)) {
1029                                 inode->i_uid = task->euid;
1030                                 inode->i_gid = task->egid;
1031                         } else {
1032                                 inode->i_uid = 0;
1033                                 inode->i_gid = 0;
1034                         }
1035                         security_task_to_inode(task, inode);
1036                         return 1;
1037                 }
1038                 spin_unlock(&files->file_lock);
1039                 put_files_struct(files);
1040         }
1041         d_drop(dentry);
1042         return 0;
1043 }
1044
1045 static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1046 {
1047         struct task_struct *task = proc_task(inode);
1048         spin_lock(&task->proc_lock);
1049         if (task->proc_dentry == dentry)
1050                 task->proc_dentry = NULL;
1051         spin_unlock(&task->proc_lock);
1052         iput(inode);
1053 }
1054
1055 static int pid_delete_dentry(struct dentry * dentry)
1056 {
1057         /* Is the task we represent dead?
1058          * If so, then don't put the dentry on the lru list,
1059          * kill it immediately.
1060          */
1061         return !pid_alive(proc_task(dentry->d_inode));
1062 }
1063
1064 static struct dentry_operations tid_fd_dentry_operations =
1065 {
1066         .d_revalidate   = tid_fd_revalidate,
1067         .d_delete       = pid_delete_dentry,
1068 };
1069
1070 static struct dentry_operations pid_dentry_operations =
1071 {
1072         .d_revalidate   = pid_revalidate,
1073         .d_delete       = pid_delete_dentry,
1074 };
1075
1076 static struct dentry_operations pid_base_dentry_operations =
1077 {
1078         .d_revalidate   = pid_revalidate,
1079         .d_iput         = pid_base_iput,
1080         .d_delete       = pid_delete_dentry,
1081 };
1082
1083 /* Lookups */
1084
1085 static unsigned name_to_int(struct dentry *dentry)
1086 {
1087         const char *name = dentry->d_name.name;
1088         int len = dentry->d_name.len;
1089         unsigned n = 0;
1090
1091         if (len > 1 && *name == '0')
1092                 goto out;
1093         while (len-- > 0) {
1094                 unsigned c = *name++ - '0';
1095                 if (c > 9)
1096                         goto out;
1097                 if (n >= (~0U-9)/10)
1098                         goto out;
1099                 n *= 10;
1100                 n += c;
1101         }
1102         return n;
1103 out:
1104         return ~0U;
1105 }
1106
1107 /* SMP-safe */
1108 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1109 {
1110         struct task_struct *task = proc_task(dir);
1111         unsigned fd = name_to_int(dentry);
1112         struct file * file;
1113         struct files_struct * files;
1114         struct inode *inode;
1115         struct proc_inode *ei;
1116
1117         if (fd == ~0U)
1118                 goto out;
1119         if (!pid_alive(task))
1120                 goto out;
1121
1122         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1123         if (!inode)
1124                 goto out;
1125         ei = PROC_I(inode);
1126         files = get_files_struct(task);
1127         if (!files)
1128                 goto out_unlock;
1129         inode->i_mode = S_IFLNK;
1130         spin_lock(&files->file_lock);
1131         file = fcheck_files(files, fd);
1132         if (!file)
1133                 goto out_unlock2;
1134         if (file->f_mode & 1)
1135                 inode->i_mode |= S_IRUSR | S_IXUSR;
1136         if (file->f_mode & 2)
1137                 inode->i_mode |= S_IWUSR | S_IXUSR;
1138         spin_unlock(&files->file_lock);
1139         put_files_struct(files);
1140         inode->i_op = &proc_pid_link_inode_operations;
1141         inode->i_size = 64;
1142         ei->op.proc_get_link = proc_fd_link;
1143         dentry->d_op = &tid_fd_dentry_operations;
1144         d_add(dentry, inode);
1145         return NULL;
1146
1147 out_unlock2:
1148         spin_unlock(&files->file_lock);
1149         put_files_struct(files);
1150 out_unlock:
1151         iput(inode);
1152 out:
1153         return ERR_PTR(-ENOENT);
1154 }
1155
1156 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1157 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1158
1159 static struct file_operations proc_fd_operations = {
1160         .read           = generic_read_dir,
1161         .readdir        = proc_readfd,
1162 };
1163
1164 static struct file_operations proc_task_operations = {
1165         .read           = generic_read_dir,
1166         .readdir        = proc_task_readdir,
1167 };
1168
1169 /*
1170  * proc directories can do almost nothing..
1171  */
1172 static struct inode_operations proc_fd_inode_operations = {
1173         .lookup         = proc_lookupfd,
1174         .permission     = proc_permission,
1175 };
1176
1177 static struct inode_operations proc_task_inode_operations = {
1178         .lookup         = proc_task_lookup,
1179         .permission     = proc_permission,
1180 };
1181
1182 #ifdef CONFIG_SECURITY
1183 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1184                                   size_t count, loff_t *ppos)
1185 {
1186         struct inode * inode = file->f_dentry->d_inode;
1187         unsigned long page;
1188         ssize_t length;
1189         struct task_struct *task = proc_task(inode);
1190
1191         if (count > PAGE_SIZE)
1192                 count = PAGE_SIZE;
1193         if (!(page = __get_free_page(GFP_KERNEL)))
1194                 return -ENOMEM;
1195
1196         length = security_getprocattr(task, 
1197                                       (char*)file->f_dentry->d_name.name, 
1198                                       (void*)page, count);
1199         if (length >= 0)
1200                 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1201         free_page(page);
1202         return length;
1203 }
1204
1205 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1206                                    size_t count, loff_t *ppos)
1207
1208         struct inode * inode = file->f_dentry->d_inode;
1209         char *page; 
1210         ssize_t length; 
1211         struct task_struct *task = proc_task(inode); 
1212
1213         if (count > PAGE_SIZE) 
1214                 count = PAGE_SIZE; 
1215         if (*ppos != 0) {
1216                 /* No partial writes. */
1217                 return -EINVAL;
1218         }
1219         page = (char*)__get_free_page(GFP_USER); 
1220         if (!page) 
1221                 return -ENOMEM;
1222         length = -EFAULT; 
1223         if (copy_from_user(page, buf, count)) 
1224                 goto out;
1225
1226         length = security_setprocattr(task, 
1227                                       (char*)file->f_dentry->d_name.name, 
1228                                       (void*)page, count);
1229 out:
1230         free_page((unsigned long) page);
1231         return length;
1232
1233
1234 static struct file_operations proc_pid_attr_operations = {
1235         .read           = proc_pid_attr_read,
1236         .write          = proc_pid_attr_write,
1237 };
1238
1239 static struct file_operations proc_tid_attr_operations;
1240 static struct inode_operations proc_tid_attr_inode_operations;
1241 static struct file_operations proc_tgid_attr_operations;
1242 static struct inode_operations proc_tgid_attr_inode_operations;
1243 #endif
1244
1245 /* SMP-safe */
1246 static struct dentry *proc_pident_lookup(struct inode *dir, 
1247                                          struct dentry *dentry,
1248                                          struct pid_entry *ents)
1249 {
1250         struct inode *inode;
1251         int error;
1252         struct task_struct *task = proc_task(dir);
1253         struct pid_entry *p;
1254         struct proc_inode *ei;
1255
1256         error = -ENOENT;
1257         inode = NULL;
1258
1259         if (!pid_alive(task))
1260                 goto out;
1261
1262         for (p = ents; p->name; p++) {
1263                 if (p->len != dentry->d_name.len)
1264                         continue;
1265                 if (!memcmp(dentry->d_name.name, p->name, p->len))
1266                         break;
1267         }
1268         if (!p->name)
1269                 goto out;
1270
1271         error = -EINVAL;
1272         inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1273         if (!inode)
1274                 goto out;
1275
1276         ei = PROC_I(inode);
1277         inode->i_mode = p->mode;
1278         /*
1279          * Yes, it does not scale. And it should not. Don't add
1280          * new entries into /proc/<tgid>/ without very good reasons.
1281          */
1282         switch(p->type) {
1283                 case PROC_TGID_TASK:
1284                         inode->i_nlink = 3;
1285                         inode->i_op = &proc_task_inode_operations;
1286                         inode->i_fop = &proc_task_operations;
1287                         break;
1288                 case PROC_TID_FD:
1289                 case PROC_TGID_FD:
1290                         inode->i_nlink = 2;
1291                         inode->i_op = &proc_fd_inode_operations;
1292                         inode->i_fop = &proc_fd_operations;
1293                         break;
1294                 case PROC_TID_EXE:
1295                 case PROC_TGID_EXE:
1296                         inode->i_op = &proc_pid_link_inode_operations;
1297                         ei->op.proc_get_link = proc_exe_link;
1298                         break;
1299                 case PROC_TID_CWD:
1300                 case PROC_TGID_CWD:
1301                         inode->i_op = &proc_pid_link_inode_operations;
1302                         ei->op.proc_get_link = proc_cwd_link;
1303                         break;
1304                 case PROC_TID_ROOT:
1305                 case PROC_TGID_ROOT:
1306                         inode->i_op = &proc_pid_link_inode_operations;
1307                         ei->op.proc_get_link = proc_root_link;
1308                         break;
1309                 case PROC_TID_ENVIRON:
1310                 case PROC_TGID_ENVIRON:
1311                         inode->i_fop = &proc_info_file_operations;
1312                         ei->op.proc_read = proc_pid_environ;
1313                         break;
1314                 case PROC_TID_AUXV:
1315                 case PROC_TGID_AUXV:
1316                         inode->i_fop = &proc_info_file_operations;
1317                         ei->op.proc_read = proc_pid_auxv;
1318                         break;
1319                 case PROC_TID_STATUS:
1320                 case PROC_TGID_STATUS:
1321                         inode->i_fop = &proc_info_file_operations;
1322                         ei->op.proc_read = proc_pid_status;
1323                         break;
1324                 case PROC_TID_STAT:
1325                         inode->i_fop = &proc_info_file_operations;
1326                         ei->op.proc_read = proc_tid_stat;
1327                         break;
1328                 case PROC_TGID_STAT:
1329                         inode->i_fop = &proc_info_file_operations;
1330                         ei->op.proc_read = proc_tgid_stat;
1331                         break;
1332                 case PROC_TID_CMDLINE:
1333                 case PROC_TGID_CMDLINE:
1334                         inode->i_fop = &proc_info_file_operations;
1335                         ei->op.proc_read = proc_pid_cmdline;
1336                         break;
1337                 case PROC_TID_STATM:
1338                 case PROC_TGID_STATM:
1339                         inode->i_fop = &proc_info_file_operations;
1340                         ei->op.proc_read = proc_pid_statm;
1341                         break;
1342                 case PROC_TID_MAPS:
1343                 case PROC_TGID_MAPS:
1344                         inode->i_fop = &proc_maps_operations;
1345                         break;
1346                 case PROC_TID_MEM:
1347                 case PROC_TGID_MEM:
1348                         inode->i_op = &proc_mem_inode_operations;
1349                         inode->i_fop = &proc_mem_operations;
1350                         break;
1351                 case PROC_TID_MOUNTS:
1352                 case PROC_TGID_MOUNTS:
1353                         inode->i_fop = &proc_mounts_operations;
1354                         break;
1355 #ifdef CONFIG_SECURITY
1356                 case PROC_TID_ATTR:
1357                         inode->i_nlink = 2;
1358                         inode->i_op = &proc_tid_attr_inode_operations;
1359                         inode->i_fop = &proc_tid_attr_operations;
1360                         break;
1361                 case PROC_TGID_ATTR:
1362                         inode->i_nlink = 2;
1363                         inode->i_op = &proc_tgid_attr_inode_operations;
1364                         inode->i_fop = &proc_tgid_attr_operations;
1365                         break;
1366                 case PROC_TID_ATTR_CURRENT:
1367                 case PROC_TGID_ATTR_CURRENT:
1368                 case PROC_TID_ATTR_PREV:
1369                 case PROC_TGID_ATTR_PREV:
1370                 case PROC_TID_ATTR_EXEC:
1371                 case PROC_TGID_ATTR_EXEC:
1372                 case PROC_TID_ATTR_FSCREATE:
1373                 case PROC_TGID_ATTR_FSCREATE:
1374                         inode->i_fop = &proc_pid_attr_operations;
1375                         break;
1376 #endif
1377 #ifdef CONFIG_KALLSYMS
1378                 case PROC_TID_WCHAN:
1379                 case PROC_TGID_WCHAN:
1380                         inode->i_fop = &proc_info_file_operations;
1381                         ei->op.proc_read = proc_pid_wchan;
1382                         break;
1383 #endif
1384 #ifdef CONFIG_DELAY_ACCT
1385                 case PROC_TID_DELAY_ACCT:
1386                 case PROC_TGID_DELAY_ACCT:
1387                         inode->i_fop = &proc_info_file_operations;
1388                         ei->op.proc_read = proc_pid_delay;
1389                         break;
1390 #endif
1391 #ifdef CONFIG_SCHEDSTATS
1392                 case PROC_TID_SCHEDSTAT:
1393                 case PROC_TGID_SCHEDSTAT:
1394                         inode->i_fop = &proc_info_file_operations;
1395                         ei->op.proc_read = proc_pid_schedstat;
1396                         break;
1397 #endif
1398                 default:
1399                         printk("procfs: impossible type (%d)",p->type);
1400                         iput(inode);
1401                         return ERR_PTR(-EINVAL);
1402         }
1403         dentry->d_op = &pid_dentry_operations;
1404         d_add(dentry, inode);
1405         return NULL;
1406
1407 out:
1408         return ERR_PTR(error);
1409 }
1410
1411 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1412         return proc_pident_lookup(dir, dentry, tgid_base_stuff);
1413 }
1414
1415 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1416         return proc_pident_lookup(dir, dentry, tid_base_stuff);
1417 }
1418
1419 static struct file_operations proc_tgid_base_operations = {
1420         .read           = generic_read_dir,
1421         .readdir        = proc_tgid_base_readdir,
1422 };
1423
1424 static struct file_operations proc_tid_base_operations = {
1425         .read           = generic_read_dir,
1426         .readdir        = proc_tid_base_readdir,
1427 };
1428
1429 static struct inode_operations proc_tgid_base_inode_operations = {
1430         .lookup         = proc_tgid_base_lookup,
1431 };
1432
1433 static struct inode_operations proc_tid_base_inode_operations = {
1434         .lookup         = proc_tid_base_lookup,
1435 };
1436
1437 #ifdef CONFIG_SECURITY
1438 static int proc_tgid_attr_readdir(struct file * filp,
1439                              void * dirent, filldir_t filldir)
1440 {
1441         return proc_pident_readdir(filp,dirent,filldir,
1442                                    tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
1443 }
1444
1445 static int proc_tid_attr_readdir(struct file * filp,
1446                              void * dirent, filldir_t filldir)
1447 {
1448         return proc_pident_readdir(filp,dirent,filldir,
1449                                    tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff));
1450 }
1451
1452 static struct file_operations proc_tgid_attr_operations = {
1453         .read           = generic_read_dir,
1454         .readdir        = proc_tgid_attr_readdir,
1455 };
1456
1457 static struct file_operations proc_tid_attr_operations = {
1458         .read           = generic_read_dir,
1459         .readdir        = proc_tid_attr_readdir,
1460 };
1461
1462 static struct dentry *proc_tgid_attr_lookup(struct inode *dir,
1463                                 struct dentry *dentry, struct nameidata *nd)
1464 {
1465         return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
1466 }
1467
1468 static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1469                                 struct dentry *dentry, struct nameidata *nd)
1470 {
1471         return proc_pident_lookup(dir, dentry, tid_attr_stuff);
1472 }
1473
1474 static struct inode_operations proc_tgid_attr_inode_operations = {
1475         .lookup         = proc_tgid_attr_lookup,
1476 };
1477
1478 static struct inode_operations proc_tid_attr_inode_operations = {
1479         .lookup         = proc_tid_attr_lookup,
1480 };
1481 #endif
1482
1483 /*
1484  * /proc/self:
1485  */
1486 static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1487                               int buflen)
1488 {
1489         char tmp[30];
1490         sprintf(tmp, "%d", current->tgid);
1491         return vfs_readlink(dentry,buffer,buflen,tmp);
1492 }
1493
1494 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1495 {
1496         char tmp[30];
1497         sprintf(tmp, "%d", current->tgid);
1498         return vfs_follow_link(nd,tmp);
1499 }       
1500
1501 static struct inode_operations proc_self_inode_operations = {
1502         .readlink       = proc_self_readlink,
1503         .follow_link    = proc_self_follow_link,
1504 };
1505
1506 /**
1507  * proc_pid_unhash -  Unhash /proc/<pid> entry from the dcache.
1508  * @p: task that should be flushed.
1509  *
1510  * Drops the /proc/<pid> dcache entry from the hash chains.
1511  *
1512  * Dropping /proc/<pid> entries and detach_pid must be synchroneous,
1513  * otherwise e.g. /proc/<pid>/exe might point to the wrong executable,
1514  * if the pid value is immediately reused. This is enforced by
1515  * - caller must acquire spin_lock(p->proc_lock)
1516  * - must be called before detach_pid()
1517  * - proc_pid_lookup acquires proc_lock, and checks that
1518  *   the target is not dead by looking at the attach count
1519  *   of PIDTYPE_PID.
1520  */
1521
1522 struct dentry *proc_pid_unhash(struct task_struct *p)
1523 {
1524         struct dentry *proc_dentry;
1525
1526         proc_dentry = p->proc_dentry;
1527         if (proc_dentry != NULL) {
1528
1529                 spin_lock(&dcache_lock);
1530                 if (!d_unhashed(proc_dentry)) {
1531                         dget_locked(proc_dentry);
1532                         __d_drop(proc_dentry);
1533                 } else
1534                         proc_dentry = NULL;
1535                 spin_unlock(&dcache_lock);
1536         }
1537         return proc_dentry;
1538 }
1539
1540 /**
1541  * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
1542  * @proc_entry: directoy to prune.
1543  *
1544  * Shrink the /proc directory that was used by the just killed thread.
1545  */
1546         
1547 void proc_pid_flush(struct dentry *proc_dentry)
1548 {
1549         might_sleep();
1550         if(proc_dentry != NULL) {
1551                 shrink_dcache_parent(proc_dentry);
1552                 dput(proc_dentry);
1553         }
1554 }
1555
1556 /* SMP-safe */
1557 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1558 {
1559         struct task_struct *task;
1560         struct inode *inode;
1561         struct proc_inode *ei;
1562         unsigned tgid;
1563         int died;
1564
1565         if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
1566                 inode = new_inode(dir->i_sb);
1567                 if (!inode)
1568                         return ERR_PTR(-ENOMEM);
1569                 ei = PROC_I(inode);
1570                 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1571                 inode->i_ino = fake_ino(0, PROC_TGID_INO);
1572                 ei->pde = NULL;
1573                 inode->i_mode = S_IFLNK|S_IRWXUGO;
1574                 inode->i_uid = inode->i_gid = 0;
1575                 inode->i_size = 64;
1576                 inode->i_op = &proc_self_inode_operations;
1577                 d_add(dentry, inode);
1578                 return NULL;
1579         }
1580         tgid = name_to_int(dentry);
1581         if (tgid == ~0U)
1582                 goto out;
1583
1584         read_lock(&tasklist_lock);
1585         task = find_task_by_pid(tgid);
1586         if (task)
1587                 get_task_struct(task);
1588         read_unlock(&tasklist_lock);
1589         if (!task)
1590                 goto out;
1591
1592         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
1593
1594
1595         if (!inode) {
1596                 put_task_struct(task);
1597                 goto out;
1598         }
1599         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1600         inode->i_op = &proc_tgid_base_inode_operations;
1601         inode->i_fop = &proc_tgid_base_operations;
1602         inode->i_nlink = 3;
1603         inode->i_flags|=S_IMMUTABLE;
1604
1605         dentry->d_op = &pid_base_dentry_operations;
1606
1607         died = 0;
1608         d_add(dentry, inode);
1609         spin_lock(&task->proc_lock);
1610         task->proc_dentry = dentry;
1611         if (!pid_alive(task)) {
1612                 dentry = proc_pid_unhash(task);
1613                 died = 1;
1614         }
1615         spin_unlock(&task->proc_lock);
1616
1617         put_task_struct(task);
1618         if (died) {
1619                 proc_pid_flush(dentry);
1620                 goto out;
1621         }
1622         return NULL;
1623 out:
1624         return ERR_PTR(-ENOENT);
1625 }
1626
1627 /* SMP-safe */
1628 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1629 {
1630         struct task_struct *task;
1631         struct task_struct *leader = proc_task(dir);
1632         struct inode *inode;
1633         unsigned tid;
1634
1635         tid = name_to_int(dentry);
1636         if (tid == ~0U)
1637                 goto out;
1638
1639         read_lock(&tasklist_lock);
1640         task = find_task_by_pid(tid);
1641         if (task)
1642                 get_task_struct(task);
1643         read_unlock(&tasklist_lock);
1644         if (!task)
1645                 goto out;
1646         if (leader->tgid != task->tgid)
1647                 goto out_drop_task;
1648
1649         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
1650
1651
1652         if (!inode)
1653                 goto out_drop_task;
1654         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1655         inode->i_op = &proc_tid_base_inode_operations;
1656         inode->i_fop = &proc_tid_base_operations;
1657         inode->i_nlink = 3;
1658         inode->i_flags|=S_IMMUTABLE;
1659
1660         dentry->d_op = &pid_base_dentry_operations;
1661
1662         d_add(dentry, inode);
1663
1664         put_task_struct(task);
1665         return NULL;
1666 out_drop_task:
1667         put_task_struct(task);
1668 out:
1669         return ERR_PTR(-ENOENT);
1670 }
1671
1672 #define PROC_NUMBUF 10
1673 #define PROC_MAXPIDS 20
1674
1675 /*
1676  * Get a few tgid's to return for filldir - we need to hold the
1677  * tasklist lock while doing this, and we must release it before
1678  * we actually do the filldir itself, so we use a temp buffer..
1679  */
1680 static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
1681 {
1682         struct task_struct *p;
1683         int nr_tgids = 0;
1684
1685         index--;
1686         read_lock(&tasklist_lock);
1687         p = NULL;
1688         if (version) {
1689                 p = find_task_by_pid(version);
1690                 if (p && !thread_group_leader(p))
1691                         p = NULL;
1692         }
1693
1694         if (p)
1695                 index = 0;
1696         else
1697                 p = next_task(&init_task);
1698
1699         for ( ; p != &init_task; p = next_task(p)) {
1700                 int tgid = p->pid;
1701                 if (!pid_alive(p))
1702                         continue;
1703                 if (--index >= 0)
1704                         continue;
1705                 tgids[nr_tgids] = tgid;
1706                 nr_tgids++;
1707                 if (nr_tgids >= PROC_MAXPIDS)
1708                         break;
1709         }
1710         read_unlock(&tasklist_lock);
1711         return nr_tgids;
1712 }
1713
1714 /*
1715  * Get a few tid's to return for filldir - we need to hold the
1716  * tasklist lock while doing this, and we must release it before
1717  * we actually do the filldir itself, so we use a temp buffer..
1718  */
1719 static int get_tid_list(int index, unsigned int *tids, struct inode *dir)
1720 {
1721         struct task_struct *leader_task = proc_task(dir);
1722         struct task_struct *task = leader_task;
1723         int nr_tids = 0;
1724
1725         index -= 2;
1726         read_lock(&tasklist_lock);
1727         /*
1728          * The starting point task (leader_task) might be an already
1729          * unlinked task, which cannot be used to access the task-list
1730          * via next_thread().
1731          */
1732         if (pid_alive(task)) do {
1733                 int tid = task->pid;
1734
1735                 if (--index >= 0)
1736                         continue;
1737                 tids[nr_tids] = tid;
1738                 nr_tids++;
1739                 if (nr_tids >= PROC_MAXPIDS)
1740                         break;
1741         } while ((task = next_thread(task)) != leader_task);
1742         read_unlock(&tasklist_lock);
1743         return nr_tids;
1744 }
1745
1746 /* for the /proc/ directory itself, after non-process stuff has been done */
1747 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1748 {
1749         unsigned int tgid_array[PROC_MAXPIDS];
1750         char buf[PROC_NUMBUF];
1751         unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1752         unsigned int nr_tgids, i;
1753         int next_tgid;
1754
1755         if (!nr) {
1756                 ino_t ino = fake_ino(0,PROC_TGID_INO);
1757                 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1758                         return 0;
1759                 filp->f_pos++;
1760                 nr++;
1761         }
1762
1763         /* f_version caches the tgid value that the last readdir call couldn't
1764          * return. lseek aka telldir automagically resets f_version to 0.
1765          */
1766         next_tgid = filp->f_version;
1767         filp->f_version = 0;
1768         for (;;) {
1769                 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
1770                 if (!nr_tgids) {
1771                         /* no more entries ! */
1772                         break;
1773                 }
1774                 next_tgid = 0;
1775
1776                 /* do not use the last found pid, reserve it for next_tgid */
1777                 if (nr_tgids == PROC_MAXPIDS) {
1778                         nr_tgids--;
1779                         next_tgid = tgid_array[nr_tgids];
1780                 }
1781
1782                 for (i=0;i<nr_tgids;i++) {
1783                         int tgid = tgid_array[i];
1784                         ino_t ino = fake_ino(tgid,PROC_TGID_INO);
1785                         unsigned long j = PROC_NUMBUF;
1786
1787                         do
1788                                 buf[--j] = '0' + (tgid % 10);
1789                         while ((tgid /= 10) != 0);
1790
1791                         if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
1792                                 /* returning this tgid failed, save it as the first
1793                                  * pid for the next readir call */
1794                                 filp->f_version = tgid_array[i];
1795                                 goto out;
1796                         }
1797                         filp->f_pos++;
1798                         nr++;
1799                 }
1800         }
1801 out:
1802         return 0;
1803 }
1804
1805 /* for the /proc/TGID/task/ directories */
1806 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
1807 {
1808         unsigned int tid_array[PROC_MAXPIDS];
1809         char buf[PROC_NUMBUF];
1810         unsigned int nr_tids, i;
1811         struct dentry *dentry = filp->f_dentry;
1812         struct inode *inode = dentry->d_inode;
1813         int retval = -ENOENT;
1814         ino_t ino;
1815         unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
1816
1817         if (!pid_alive(proc_task(inode)))
1818                 goto out;
1819         retval = 0;
1820
1821         switch (pos) {
1822         case 0:
1823                 ino = inode->i_ino;
1824                 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
1825                         goto out;
1826                 pos++;
1827                 /* fall through */
1828         case 1:
1829                 ino = parent_ino(dentry);
1830                 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
1831                         goto out;
1832                 pos++;
1833                 /* fall through */
1834         }
1835
1836         nr_tids = get_tid_list(pos, tid_array, inode);
1837
1838         for (i = 0; i < nr_tids; i++) {
1839                 unsigned long j = PROC_NUMBUF;
1840                 int tid = tid_array[i];
1841
1842                 ino = fake_ino(tid,PROC_TID_INO);
1843
1844                 do
1845                         buf[--j] = '0' + (tid % 10);
1846                 while ((tid /= 10) != 0);
1847
1848                 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0)
1849                         break;
1850                 pos++;
1851         }
1852 out:
1853         filp->f_pos = pos;
1854         return retval;
1855 }