This stack check implementation leverages the compiler's profiling (gcc -p)
[linux-2.6.git] / fs / namei.c
1 /*
2  *  linux/fs/namei.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6
7 /*
8  * Some corrections by tytso.
9  */
10
11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12  * lookup logic.
13  */
14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
15  */
16
17 #include <linux/init.h>
18 #include <linux/module.h>
19 #include <linux/slab.h>
20 #include <linux/fs.h>
21 #include <linux/namei.h>
22 #include <linux/quotaops.h>
23 #include <linux/pagemap.h>
24 #include <linux/dnotify.h>
25 #include <linux/smp_lock.h>
26 #include <linux/personality.h>
27 #include <linux/security.h>
28 #include <linux/mount.h>
29 #include <linux/audit.h>
30 #include <linux/vs_base.h>
31
32 #include <asm/namei.h>
33 #include <asm/uaccess.h>
34
35 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
36
37 /* [Feb-1997 T. Schoebel-Theuer]
38  * Fundamental changes in the pathname lookup mechanisms (namei)
39  * were necessary because of omirr.  The reason is that omirr needs
40  * to know the _real_ pathname, not the user-supplied one, in case
41  * of symlinks (and also when transname replacements occur).
42  *
43  * The new code replaces the old recursive symlink resolution with
44  * an iterative one (in case of non-nested symlink chains).  It does
45  * this with calls to <fs>_follow_link().
46  * As a side effect, dir_namei(), _namei() and follow_link() are now 
47  * replaced with a single function lookup_dentry() that can handle all 
48  * the special cases of the former code.
49  *
50  * With the new dcache, the pathname is stored at each inode, at least as
51  * long as the refcount of the inode is positive.  As a side effect, the
52  * size of the dcache depends on the inode cache and thus is dynamic.
53  *
54  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
55  * resolution to correspond with current state of the code.
56  *
57  * Note that the symlink resolution is not *completely* iterative.
58  * There is still a significant amount of tail- and mid- recursion in
59  * the algorithm.  Also, note that <fs>_readlink() is not used in
60  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
61  * may return different results than <fs>_follow_link().  Many virtual
62  * filesystems (including /proc) exhibit this behavior.
63  */
64
65 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
66  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
67  * and the name already exists in form of a symlink, try to create the new
68  * name indicated by the symlink. The old code always complained that the
69  * name already exists, due to not following the symlink even if its target
70  * is nonexistent.  The new semantics affects also mknod() and link() when
71  * the name is a symlink pointing to a non-existant name.
72  *
73  * I don't know which semantics is the right one, since I have no access
74  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
75  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
76  * "old" one. Personally, I think the new semantics is much more logical.
77  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
78  * file does succeed in both HP-UX and SunOs, but not in Solaris
79  * and in the old Linux semantics.
80  */
81
82 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
83  * semantics.  See the comments in "open_namei" and "do_link" below.
84  *
85  * [10-Sep-98 Alan Modra] Another symlink change.
86  */
87
88 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
89  *      inside the path - always follow.
90  *      in the last component in creation/removal/renaming - never follow.
91  *      if LOOKUP_FOLLOW passed - follow.
92  *      if the pathname has trailing slashes - follow.
93  *      otherwise - don't follow.
94  * (applied in that order).
95  *
96  * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
97  * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
98  * During the 2.4 we need to fix the userland stuff depending on it -
99  * hopefully we will be able to get rid of that wart in 2.5. So far only
100  * XEmacs seems to be relying on it...
101  */
102 /*
103  * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
104  * implemented.  Let's see if raised priority of ->s_vfs_rename_sem gives
105  * any extra contention...
106  */
107
108 /* In order to reduce some races, while at the same time doing additional
109  * checking and hopefully speeding things up, we copy filenames to the
110  * kernel data space before using them..
111  *
112  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
113  * PATH_MAX includes the nul terminator --RR.
114  */
115 static inline int do_getname(const char __user *filename, char *page)
116 {
117         int retval;
118         unsigned long len = PATH_MAX;
119
120         if ((unsigned long) filename >= TASK_SIZE) {
121                 if (!segment_eq(get_fs(), KERNEL_DS))
122                         return -EFAULT;
123         } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
124                 len = TASK_SIZE - (unsigned long) filename;
125
126         retval = strncpy_from_user((char *)page, filename, len);
127         if (retval > 0) {
128                 if (retval < len)
129                         return 0;
130                 return -ENAMETOOLONG;
131         } else if (!retval)
132                 retval = -ENOENT;
133         return retval;
134 }
135
136 char * getname(const char __user * filename)
137 {
138         char *tmp, *result;
139
140         result = ERR_PTR(-ENOMEM);
141         tmp = __getname();
142         if (tmp)  {
143                 int retval = do_getname(filename, tmp);
144
145                 result = tmp;
146                 if (retval < 0) {
147                         __putname(tmp);
148                         result = ERR_PTR(retval);
149                 }
150         }
151         if (unlikely(current->audit_context) && !IS_ERR(result) && result)
152                 audit_getname(result);
153         return result;
154 }
155
156 /*
157  *      vfs_permission()
158  *
159  * is used to check for read/write/execute permissions on a file.
160  * We use "fsuid" for this, letting us set arbitrary permissions
161  * for filesystem access without changing the "normal" uids which
162  * are used for other things..
163  */
164 int vfs_permission(struct inode * inode, int mask)
165 {
166         umode_t                 mode = inode->i_mode;
167
168         /* Prevent vservers from escaping chroot() barriers */
169         if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
170                 return -EACCES;
171
172         if (mask & MAY_WRITE) {
173                 /*
174                  * Nobody gets write access to a read-only fs.
175                  */
176                 if (IS_RDONLY(inode) &&
177                     (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
178                         return -EROFS;
179
180                 /*
181                  * Nobody gets write access to an immutable file.
182                  */
183                 if (IS_IMMUTABLE(inode))
184                         return -EACCES;
185         }
186
187         if (current->fsuid == inode->i_uid)
188                 mode >>= 6;
189         else if (in_group_p(inode->i_gid))
190                 mode >>= 3;
191
192         /*
193          * If the DACs are ok we don't need any capability check.
194          */
195         if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
196                 return 0;
197
198         /*
199          * Read/write DACs are always overridable.
200          * Executable DACs are overridable if at least one exec bit is set.
201          */
202         if (!(mask & MAY_EXEC) ||
203             (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
204                 if (capable(CAP_DAC_OVERRIDE))
205                         return 0;
206
207         /*
208          * Searching includes executable on directories, else just read.
209          */
210         if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
211                 if (capable(CAP_DAC_READ_SEARCH))
212                         return 0;
213
214         return -EACCES;
215 }
216
217 int permission(struct inode * inode,int mask, struct nameidata *nd)
218 {
219         int retval;
220         int submask;
221         umode_t mode = inode->i_mode;
222
223         /* Ordinary permission routines do not understand MAY_APPEND. */
224         submask = mask & ~MAY_APPEND;
225
226         if (nd && (mask & MAY_WRITE) && MNT_IS_RDONLY(nd->mnt) &&
227                 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
228                 return -EROFS;
229
230         if (inode->i_op && inode->i_op->permission)
231                 retval = inode->i_op->permission(inode, submask, nd);
232         else
233                 retval = vfs_permission(inode, submask);
234         if (retval)
235                 return retval;
236
237         return security_inode_permission(inode, mask, nd);
238 }
239
240 /*
241  * get_write_access() gets write permission for a file.
242  * put_write_access() releases this write permission.
243  * This is used for regular files.
244  * We cannot support write (and maybe mmap read-write shared) accesses and
245  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
246  * can have the following values:
247  * 0: no writers, no VM_DENYWRITE mappings
248  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
249  * > 0: (i_writecount) users are writing to the file.
250  *
251  * Normally we operate on that counter with atomic_{inc,dec} and it's safe
252  * except for the cases where we don't hold i_writecount yet. Then we need to
253  * use {get,deny}_write_access() - these functions check the sign and refuse
254  * to do the change if sign is wrong. Exclusion between them is provided by
255  * the inode->i_lock spinlock.
256  */
257
258 int get_write_access(struct inode * inode)
259 {
260         spin_lock(&inode->i_lock);
261         if (atomic_read(&inode->i_writecount) < 0) {
262                 spin_unlock(&inode->i_lock);
263                 return -ETXTBSY;
264         }
265         atomic_inc(&inode->i_writecount);
266         spin_unlock(&inode->i_lock);
267
268         return 0;
269 }
270
271 int deny_write_access(struct file * file)
272 {
273         struct inode *inode = file->f_dentry->d_inode;
274
275         spin_lock(&inode->i_lock);
276         if (atomic_read(&inode->i_writecount) > 0) {
277                 spin_unlock(&inode->i_lock);
278                 return -ETXTBSY;
279         }
280         atomic_dec(&inode->i_writecount);
281         spin_unlock(&inode->i_lock);
282
283         return 0;
284 }
285
286 void path_release(struct nameidata *nd)
287 {
288         dput(nd->dentry);
289         mntput(nd->mnt);
290 }
291
292 /*
293  * umount() mustn't call path_release()/mntput() as that would clear
294  * mnt_expiry_mark
295  */
296 void path_release_on_umount(struct nameidata *nd)
297 {
298         dput(nd->dentry);
299         _mntput(nd->mnt);
300 }
301
302 /*
303  * Internal lookup() using the new generic dcache.
304  * SMP-safe
305  */
306 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
307 {
308         struct dentry * dentry = __d_lookup(parent, name);
309
310         /* lockess __d_lookup may fail due to concurrent d_move() 
311          * in some unrelated directory, so try with d_lookup
312          */
313         if (!dentry)
314                 dentry = d_lookup(parent, name);
315
316         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
317                 if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
318                         dput(dentry);
319                         dentry = NULL;
320                 }
321         }
322         return dentry;
323 }
324
325 /*
326  * Short-cut version of permission(), for calling by
327  * path_walk(), when dcache lock is held.  Combines parts
328  * of permission() and vfs_permission(), and tests ONLY for
329  * MAY_EXEC permission.
330  *
331  * If appropriate, check DAC only.  If not appropriate, or
332  * short-cut DAC fails, then call permission() to do more
333  * complete permission check.
334  */
335 static inline int exec_permission_lite(struct inode *inode,
336                                        struct nameidata *nd)
337 {
338         umode_t mode = inode->i_mode;
339
340         if (inode->i_op && inode->i_op->permission)
341                 return -EAGAIN;
342
343         if (current->fsuid == inode->i_uid)
344                 mode >>= 6;
345         else if (in_group_p(inode->i_gid))
346                 mode >>= 3;
347
348         if (mode & MAY_EXEC)
349                 goto ok;
350
351         if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
352                 goto ok;
353
354         if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
355                 goto ok;
356
357         if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
358                 goto ok;
359
360         return -EACCES;
361 ok:
362         return security_inode_permission(inode, MAY_EXEC, nd);
363 }
364
365 /*
366  * This is called when everything else fails, and we actually have
367  * to go to the low-level filesystem to find out what we should do..
368  *
369  * We get the directory semaphore, and after getting that we also
370  * make sure that nobody added the entry to the dcache in the meantime..
371  * SMP-safe
372  */
373 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
374 {
375         struct dentry * result;
376         struct inode *dir = parent->d_inode;
377
378         down(&dir->i_sem);
379         /*
380          * First re-do the cached lookup just in case it was created
381          * while we waited for the directory semaphore..
382          *
383          * FIXME! This could use version numbering or similar to
384          * avoid unnecessary cache lookups.
385          *
386          * The "dcache_lock" is purely to protect the RCU list walker
387          * from concurrent renames at this point (we mustn't get false
388          * negatives from the RCU list walk here, unlike the optimistic
389          * fast walk).
390          *
391          * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
392          */
393         result = d_lookup(parent, name);
394         if (!result) {
395                 struct dentry * dentry = d_alloc(parent, name);
396                 result = ERR_PTR(-ENOMEM);
397                 if (dentry) {
398                         result = dir->i_op->lookup(dir, dentry, nd);
399                         if (result)
400                                 dput(dentry);
401                         else
402                                 result = dentry;
403                 }
404                 up(&dir->i_sem);
405                 return result;
406         }
407
408         /*
409          * Uhhuh! Nasty case: the cache was re-populated while
410          * we waited on the semaphore. Need to revalidate.
411          */
412         up(&dir->i_sem);
413         if (result->d_op && result->d_op->d_revalidate) {
414                 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
415                         dput(result);
416                         result = ERR_PTR(-ENOENT);
417                 }
418         }
419         return result;
420 }
421
422 static int __emul_lookup_dentry(const char *, struct nameidata *);
423
424 /* SMP-safe */
425 static inline int
426 walk_init_root(const char *name, struct nameidata *nd)
427 {
428         read_lock(&current->fs->lock);
429         if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
430                 nd->mnt = mntget(current->fs->altrootmnt);
431                 nd->dentry = dget(current->fs->altroot);
432                 read_unlock(&current->fs->lock);
433                 if (__emul_lookup_dentry(name,nd))
434                         return 0;
435                 read_lock(&current->fs->lock);
436         }
437         nd->mnt = mntget(current->fs->rootmnt);
438         nd->dentry = dget(current->fs->root);
439         read_unlock(&current->fs->lock);
440         return 1;
441 }
442
443 static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
444 {
445         int res = 0;
446         char *name;
447         if (IS_ERR(link))
448                 goto fail;
449
450         if (*link == '/') {
451                 path_release(nd);
452                 if (!walk_init_root(link, nd))
453                         /* weird __emul_prefix() stuff did it */
454                         goto out;
455         }
456         res = link_path_walk(link, nd);
457 out:
458         if (nd->depth || res || nd->last_type!=LAST_NORM)
459                 return res;
460         /*
461          * If it is an iterative symlinks resolution in open_namei() we
462          * have to copy the last component. And all that crap because of
463          * bloody create() on broken symlinks. Furrfu...
464          */
465         name = __getname();
466         if (unlikely(!name)) {
467                 path_release(nd);
468                 return -ENOMEM;
469         }
470         strcpy(name, nd->last.name);
471         nd->last.name = name;
472         return 0;
473 fail:
474         path_release(nd);
475         return PTR_ERR(link);
476 }
477
478 /*
479  * This limits recursive symlink follows to 8, while
480  * limiting consecutive symlinks to 40.
481  *
482  * Without that kind of total limit, nasty chains of consecutive
483  * symlinks can cause almost arbitrarily long lookups. 
484  */
485 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
486 {
487         int err = -ELOOP;
488         if (current->link_count >= MAX_NESTED_LINKS)
489                 goto loop;
490         if (current->total_link_count >= 40)
491                 goto loop;
492         BUG_ON(nd->depth >= MAX_NESTED_LINKS);
493         cond_resched();
494         err = security_inode_follow_link(dentry, nd);
495         if (err)
496                 goto loop;
497         current->link_count++;
498         current->total_link_count++;
499         nd->depth++;
500         touch_atime(nd->mnt, dentry);
501         nd_set_link(nd, NULL);
502         err = dentry->d_inode->i_op->follow_link(dentry, nd);
503         if (!err) {
504                 char *s = nd_get_link(nd);
505                 if (s)
506                         err = __vfs_follow_link(nd, s);
507                 if (dentry->d_inode->i_op->put_link)
508                         dentry->d_inode->i_op->put_link(dentry, nd);
509         }
510         current->link_count--;
511         nd->depth--;
512         return err;
513 loop:
514         path_release(nd);
515         return err;
516 }
517
518 int follow_up(struct vfsmount **mnt, struct dentry **dentry)
519 {
520         struct vfsmount *parent;
521         struct dentry *mountpoint;
522         spin_lock(&vfsmount_lock);
523         parent=(*mnt)->mnt_parent;
524         if (parent == *mnt) {
525                 spin_unlock(&vfsmount_lock);
526                 return 0;
527         }
528         mntget(parent);
529         mountpoint=dget((*mnt)->mnt_mountpoint);
530         spin_unlock(&vfsmount_lock);
531         dput(*dentry);
532         *dentry = mountpoint;
533         mntput(*mnt);
534         *mnt = parent;
535         return 1;
536 }
537
538 /* no need for dcache_lock, as serialization is taken care in
539  * namespace.c
540  */
541 static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
542 {
543         int res = 0;
544         while (d_mountpoint(*dentry)) {
545                 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
546                 if (!mounted)
547                         break;
548                 mntput(*mnt);
549                 *mnt = mounted;
550                 dput(*dentry);
551                 *dentry = dget(mounted->mnt_root);
552                 res = 1;
553         }
554         return res;
555 }
556
557 /* no need for dcache_lock, as serialization is taken care in
558  * namespace.c
559  */
560 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
561 {
562         struct vfsmount *mounted;
563
564         mounted = lookup_mnt(*mnt, *dentry);
565         if (mounted) {
566                 mntput(*mnt);
567                 *mnt = mounted;
568                 dput(*dentry);
569                 *dentry = dget(mounted->mnt_root);
570                 return 1;
571         }
572         return 0;
573 }
574
575 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
576 {
577         return __follow_down(mnt,dentry);
578 }
579  
580 static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry)
581 {
582         while(1) {
583                 struct vfsmount *parent;
584                 struct dentry *old = *dentry;
585
586                 read_lock(&current->fs->lock);
587                 if (*dentry == current->fs->root &&
588                     *mnt == current->fs->rootmnt) {
589                         read_unlock(&current->fs->lock);
590                         break;
591                 }
592                 read_unlock(&current->fs->lock);
593                 spin_lock(&dcache_lock);
594                 if (*dentry != (*mnt)->mnt_root) {
595                         *dentry = dget((*dentry)->d_parent);
596                         spin_unlock(&dcache_lock);
597                         dput(old);
598                         break;
599                 }
600                 spin_unlock(&dcache_lock);
601                 spin_lock(&vfsmount_lock);
602                 parent = (*mnt)->mnt_parent;
603                 if (parent == *mnt) {
604                         spin_unlock(&vfsmount_lock);
605                         break;
606                 }
607                 mntget(parent);
608                 *dentry = dget((*mnt)->mnt_mountpoint);
609                 spin_unlock(&vfsmount_lock);
610                 dput(old);
611                 mntput(*mnt);
612                 *mnt = parent;
613         }
614         follow_mount(mnt, dentry);
615 }
616
617 struct path {
618         struct vfsmount *mnt;
619         struct dentry *dentry;
620 };
621
622 /*
623  *  It's more convoluted than I'd like it to be, but... it's still fairly
624  *  small and for now I'd prefer to have fast path as straight as possible.
625  *  It _is_ time-critical.
626  */
627 static int do_lookup(struct nameidata *nd, struct qstr *name,
628                      struct path *path)
629 {
630         struct vfsmount *mnt = nd->mnt;
631         struct dentry *dentry = __d_lookup(nd->dentry, name);
632
633         if (!dentry)
634                 goto need_lookup;
635         if (dentry->d_op && dentry->d_op->d_revalidate)
636                 goto need_revalidate;
637 done:
638         path->mnt = mnt;
639         path->dentry = dentry;
640         return 0;
641
642 need_lookup:
643         dentry = real_lookup(nd->dentry, name, nd);
644         if (IS_ERR(dentry))
645                 goto fail;
646         goto done;
647
648 need_revalidate:
649         if (dentry->d_op->d_revalidate(dentry, nd))
650                 goto done;
651         if (d_invalidate(dentry))
652                 goto done;
653         dput(dentry);
654         goto need_lookup;
655
656 fail:
657         return PTR_ERR(dentry);
658 }
659
660 /*
661  * Name resolution.
662  *
663  * This is the basic name resolution function, turning a pathname
664  * into the final dentry.
665  *
666  * We expect 'base' to be positive and a directory.
667  */
668 int fastcall link_path_walk(const char * name, struct nameidata *nd)
669 {
670         struct path next;
671         struct inode *inode;
672         int err, atomic;
673         unsigned int lookup_flags = nd->flags;
674
675         atomic = (lookup_flags & LOOKUP_ATOMIC);
676
677         while (*name=='/')
678                 name++;
679         if (!*name)
680                 goto return_reval;
681
682         inode = nd->dentry->d_inode;
683         if (nd->depth)
684                 lookup_flags = LOOKUP_FOLLOW;
685
686         /* At this point we know we have a real path component. */
687         for(;;) {
688                 unsigned long hash;
689                 struct qstr this;
690                 unsigned int c;
691
692                 err = exec_permission_lite(inode, nd);
693                 if (err == -EAGAIN) { 
694                         err = permission(inode, MAY_EXEC, nd);
695                 }
696                 if (err)
697                         break;
698
699                 this.name = name;
700                 c = *(const unsigned char *)name;
701
702                 hash = init_name_hash();
703                 do {
704                         name++;
705                         hash = partial_name_hash(c, hash);
706                         c = *(const unsigned char *)name;
707                 } while (c && (c != '/'));
708                 this.len = name - (const char *) this.name;
709                 this.hash = end_name_hash(hash);
710
711                 /* remove trailing slashes? */
712                 if (!c)
713                         goto last_component;
714                 while (*++name == '/');
715                 if (!*name)
716                         goto last_with_slashes;
717
718                 /*
719                  * "." and ".." are special - ".." especially so because it has
720                  * to be able to know about the current root directory and
721                  * parent relationships.
722                  */
723                 if (this.name[0] == '.') switch (this.len) {
724                         default:
725                                 break;
726                         case 2: 
727                                 if (this.name[1] != '.')
728                                         break;
729                                 follow_dotdot(&nd->mnt, &nd->dentry);
730                                 inode = nd->dentry->d_inode;
731                                 /* fallthrough */
732                         case 1:
733                                 continue;
734                 }
735                 /*
736                  * See if the low-level filesystem might want
737                  * to use its own hash..
738                  */
739                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
740                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
741                         if (err < 0)
742                                 break;
743                 }
744                 err = -EWOULDBLOCKIO;
745                 if (atomic)
746                         break;
747                 nd->flags |= LOOKUP_CONTINUE;
748                 /* This does the actual lookups.. */
749                 err = do_lookup(nd, &this, &next);
750                 if (err)
751                         break;
752                 /* Check mountpoints.. */
753                 follow_mount(&next.mnt, &next.dentry);
754
755                 err = -ENOENT;
756                 inode = next.dentry->d_inode;
757                 if (!inode)
758                         goto out_dput;
759                 err = -ENOTDIR; 
760                 if (!inode->i_op)
761                         goto out_dput;
762
763                 if (inode->i_op->follow_link) {
764                         mntget(next.mnt);
765                         err = do_follow_link(next.dentry, nd);
766                         dput(next.dentry);
767                         mntput(next.mnt);
768                         if (err)
769                                 goto return_err;
770                         err = -ENOENT;
771                         inode = nd->dentry->d_inode;
772                         if (!inode)
773                                 break;
774                         err = -ENOTDIR; 
775                         if (!inode->i_op)
776                                 break;
777                 } else {
778                         dput(nd->dentry);
779                         nd->mnt = next.mnt;
780                         nd->dentry = next.dentry;
781                 }
782                 err = -ENOTDIR; 
783                 if (!inode->i_op->lookup)
784                         break;
785                 continue;
786                 /* here ends the main loop */
787
788 last_with_slashes:
789                 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
790 last_component:
791                 nd->flags &= ~LOOKUP_CONTINUE;
792                 if (lookup_flags & LOOKUP_PARENT)
793                         goto lookup_parent;
794                 if (this.name[0] == '.') switch (this.len) {
795                         default:
796                                 break;
797                         case 2: 
798                                 if (this.name[1] != '.')
799                                         break;
800                                 follow_dotdot(&nd->mnt, &nd->dentry);
801                                 inode = nd->dentry->d_inode;
802                                 /* fallthrough */
803                         case 1:
804                                 goto return_reval;
805                 }
806                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
807                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
808                         if (err < 0)
809                                 break;
810                 }
811                 err = -EWOULDBLOCKIO;
812                 if (atomic)
813                         break;
814                 err = do_lookup(nd, &this, &next);
815                 if (err)
816                         break;
817                 follow_mount(&next.mnt, &next.dentry);
818                 inode = next.dentry->d_inode;
819                 if ((lookup_flags & LOOKUP_FOLLOW)
820                     && inode && inode->i_op && inode->i_op->follow_link) {
821                         mntget(next.mnt);
822                         err = do_follow_link(next.dentry, nd);
823                         dput(next.dentry);
824                         mntput(next.mnt);
825                         if (err)
826                                 goto return_err;
827                         inode = nd->dentry->d_inode;
828                 } else {
829                         dput(nd->dentry);
830                         nd->mnt = next.mnt;
831                         nd->dentry = next.dentry;
832                 }
833                 err = -ENOENT;
834                 if (!inode)
835                         break;
836                 if (lookup_flags & LOOKUP_DIRECTORY) {
837                         err = -ENOTDIR; 
838                         if (!inode->i_op || !inode->i_op->lookup)
839                                 break;
840                 }
841                 goto return_base;
842 lookup_parent:
843                 nd->last = this;
844                 nd->last_type = LAST_NORM;
845                 if (this.name[0] != '.')
846                         goto return_base;
847                 if (this.len == 1)
848                         nd->last_type = LAST_DOT;
849                 else if (this.len == 2 && this.name[1] == '.')
850                         nd->last_type = LAST_DOTDOT;
851                 else
852                         goto return_base;
853 return_reval:
854                 /*
855                  * We bypassed the ordinary revalidation routines.
856                  * We may need to check the cached dentry for staleness.
857                  */
858                 if (nd->dentry && nd->dentry->d_sb &&
859                     (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
860                         err = -ESTALE;
861                         /* Note: we do not d_invalidate() */
862                         if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
863                                 break;
864                 }
865 return_base:
866                 return 0;
867 out_dput:
868                 dput(next.dentry);
869                 break;
870         }
871         path_release(nd);
872 return_err:
873         return err;
874 }
875
876 int fastcall path_walk(const char * name, struct nameidata *nd)
877 {
878         current->total_link_count = 0;
879         return link_path_walk(name, nd);
880 }
881
882 /* SMP-safe */
883 /* returns 1 if everything is done */
884 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
885 {
886         if (path_walk(name, nd))
887                 return 0;               /* something went wrong... */
888
889         if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
890                 struct dentry *old_dentry = nd->dentry;
891                 struct vfsmount *old_mnt = nd->mnt;
892                 struct qstr last = nd->last;
893                 int last_type = nd->last_type;
894                 /*
895                  * NAME was not found in alternate root or it's a directory.  Try to find
896                  * it in the normal root:
897                  */
898                 nd->last_type = LAST_ROOT;
899                 read_lock(&current->fs->lock);
900                 nd->mnt = mntget(current->fs->rootmnt);
901                 nd->dentry = dget(current->fs->root);
902                 read_unlock(&current->fs->lock);
903                 if (path_walk(name, nd) == 0) {
904                         if (nd->dentry->d_inode) {
905                                 dput(old_dentry);
906                                 mntput(old_mnt);
907                                 return 1;
908                         }
909                         path_release(nd);
910                 }
911                 nd->dentry = old_dentry;
912                 nd->mnt = old_mnt;
913                 nd->last = last;
914                 nd->last_type = last_type;
915         }
916         return 1;
917 }
918
919 void set_fs_altroot(void)
920 {
921         char *emul = __emul_prefix();
922         struct nameidata nd;
923         struct vfsmount *mnt = NULL, *oldmnt;
924         struct dentry *dentry = NULL, *olddentry;
925         int err;
926
927         if (!emul)
928                 goto set_it;
929         err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
930         if (!err) {
931                 mnt = nd.mnt;
932                 dentry = nd.dentry;
933         }
934 set_it:
935         write_lock(&current->fs->lock);
936         oldmnt = current->fs->altrootmnt;
937         olddentry = current->fs->altroot;
938         current->fs->altrootmnt = mnt;
939         current->fs->altroot = dentry;
940         write_unlock(&current->fs->lock);
941         if (olddentry) {
942                 dput(olddentry);
943                 mntput(oldmnt);
944         }
945 }
946
947 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
948 {
949         int retval;
950
951         nd->last_type = LAST_ROOT; /* if there are only slashes... */
952         nd->flags = flags;
953         nd->depth = 0;
954
955         read_lock(&current->fs->lock);
956         if (*name=='/') {
957                 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
958                         nd->mnt = mntget(current->fs->altrootmnt);
959                         nd->dentry = dget(current->fs->altroot);
960                         read_unlock(&current->fs->lock);
961                         if (__emul_lookup_dentry(name,nd))
962                                 return 0;
963                         read_lock(&current->fs->lock);
964                 }
965                 nd->mnt = mntget(current->fs->rootmnt);
966                 nd->dentry = dget(current->fs->root);
967         } else {
968                 nd->mnt = mntget(current->fs->pwdmnt);
969                 nd->dentry = dget(current->fs->pwd);
970         }
971         read_unlock(&current->fs->lock);
972         current->total_link_count = 0;
973         retval = link_path_walk(name, nd);
974         if (unlikely(current->audit_context
975                      && nd && nd->dentry && nd->dentry->d_inode))
976                 audit_inode(name,
977                             nd->dentry->d_inode->i_ino,
978                             nd->dentry->d_inode->i_rdev);
979         return retval;
980 }
981
982 /*
983  * Restricted form of lookup. Doesn't follow links, single-component only,
984  * needs parent already locked. Doesn't follow mounts.
985  * SMP-safe.
986  */
987 static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd)
988 {
989         struct dentry * dentry;
990         struct inode *inode;
991         int err;
992
993         inode = base->d_inode;
994         err = permission(inode, MAY_EXEC, nd);
995         dentry = ERR_PTR(err);
996         if (err)
997                 goto out;
998
999         /*
1000          * See if the low-level filesystem might want
1001          * to use its own hash..
1002          */
1003         if (base->d_op && base->d_op->d_hash) {
1004                 err = base->d_op->d_hash(base, name);
1005                 dentry = ERR_PTR(err);
1006                 if (err < 0)
1007                         goto out;
1008         }
1009
1010         dentry = cached_lookup(base, name, nd);
1011         if (!dentry) {
1012                 struct dentry *new = d_alloc(base, name);
1013                 dentry = ERR_PTR(-ENOMEM);
1014                 if (!new)
1015                         goto out;
1016                 dentry = inode->i_op->lookup(inode, new, nd);
1017                 if (!dentry)
1018                         dentry = new;
1019                 else
1020                         dput(new);
1021         }
1022 out:
1023         return dentry;
1024 }
1025
1026 struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
1027 {
1028         return __lookup_hash(name, base, NULL);
1029 }
1030
1031 /* SMP-safe */
1032 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
1033 {
1034         unsigned long hash;
1035         struct qstr this;
1036         unsigned int c;
1037
1038         this.name = name;
1039         this.len = len;
1040         if (!len)
1041                 goto access;
1042
1043         hash = init_name_hash();
1044         while (len--) {
1045                 c = *(const unsigned char *)name++;
1046                 if (c == '/' || c == '\0')
1047                         goto access;
1048                 hash = partial_name_hash(c, hash);
1049         }
1050         this.hash = end_name_hash(hash);
1051
1052         return lookup_hash(&this, base);
1053 access:
1054         return ERR_PTR(-EACCES);
1055 }
1056
1057 /*
1058  *      namei()
1059  *
1060  * is used by most simple commands to get the inode of a specified name.
1061  * Open, link etc use their own routines, but this is enough for things
1062  * like 'chmod' etc.
1063  *
1064  * namei exists in two versions: namei/lnamei. The only difference is
1065  * that namei follows links, while lnamei does not.
1066  * SMP-safe
1067  */
1068 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
1069 {
1070         char *tmp = getname(name);
1071         int err = PTR_ERR(tmp);
1072
1073         if (!IS_ERR(tmp)) {
1074                 err = path_lookup(tmp, flags, nd);
1075                 putname(tmp);
1076         }
1077         return err;
1078 }
1079
1080 /*
1081  * It's inline, so penalty for filesystems that don't use sticky bit is
1082  * minimal.
1083  */
1084 static inline int check_sticky(struct inode *dir, struct inode *inode)
1085 {
1086         if (!(dir->i_mode & S_ISVTX))
1087                 return 0;
1088         if (inode->i_uid == current->fsuid)
1089                 return 0;
1090         if (dir->i_uid == current->fsuid)
1091                 return 0;
1092         return !capable(CAP_FOWNER);
1093 }
1094
1095 /*
1096  *      Check whether we can remove a link victim from directory dir, check
1097  *  whether the type of victim is right.
1098  *  1. We can't do it if dir is read-only (done in permission())
1099  *  2. We should have write and exec permissions on dir
1100  *  3. We can't remove anything from append-only dir
1101  *  4. We can't do anything with immutable dir (done in permission())
1102  *  5. If the sticky bit on dir is set we should either
1103  *      a. be owner of dir, or
1104  *      b. be owner of victim, or
1105  *      c. have CAP_FOWNER capability
1106  *  6. If the victim is append-only or immutable we can't do antyhing with
1107  *     links pointing to it.
1108  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
1109  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
1110  *  9. We can't remove a root or mountpoint.
1111  * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
1112  *     nfs_async_unlink().
1113  */
1114 static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1115 {
1116         int error;
1117         if (!victim->d_inode)
1118                 return -ENOENT;
1119         if (victim->d_parent->d_inode != dir)
1120                 BUG();
1121                         
1122         error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
1123         if (error)
1124                 return error;
1125         if (IS_APPEND(dir))
1126                 return -EPERM;
1127         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
1128                 IS_IXORUNLINK(victim->d_inode))
1129                 return -EPERM;
1130         if (isdir) {
1131                 if (!S_ISDIR(victim->d_inode->i_mode))
1132                         return -ENOTDIR;
1133                 if (IS_ROOT(victim))
1134                         return -EBUSY;
1135         } else if (S_ISDIR(victim->d_inode->i_mode))
1136                 return -EISDIR;
1137         if (IS_DEADDIR(dir))
1138                 return -ENOENT;
1139         if (victim->d_flags & DCACHE_NFSFS_RENAMED)
1140                 return -EBUSY;
1141         return 0;
1142 }
1143
1144 /*      Check whether we can create an object with dentry child in directory
1145  *  dir.
1146  *  1. We can't do it if child already exists (open has special treatment for
1147  *     this case, but since we are inlined it's OK)
1148  *  2. We can't do it if dir is read-only (done in permission())
1149  *  3. We should have write and exec permissions on dir
1150  *  4. We can't do it if dir is immutable (done in permission())
1151  */
1152 static inline int may_create(struct inode *dir, struct dentry *child,
1153                              struct nameidata *nd)
1154 {
1155         if (child->d_inode)
1156                 return -EEXIST;
1157         if (IS_DEADDIR(dir))
1158                 return -ENOENT;
1159         return permission(dir,MAY_WRITE | MAY_EXEC, nd);
1160 }
1161
1162 static inline int mnt_may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child) {
1163        if (child->d_inode)
1164                return -EEXIST;
1165        if (IS_DEADDIR(dir))
1166                return -ENOENT;
1167        if (mnt->mnt_flags & MNT_RDONLY)
1168                return -EROFS;
1169        return 0;
1170 }
1171
1172 static inline int mnt_may_unlink(struct vfsmount *mnt, struct inode *dir, struct dentry *child) {
1173        if (!child->d_inode)
1174                return -ENOENT;
1175        if (mnt->mnt_flags & MNT_RDONLY)
1176                return -EROFS;
1177        return 0;
1178 }
1179
1180 /* 
1181  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
1182  * reasons.
1183  *
1184  * O_DIRECTORY translates into forcing a directory lookup.
1185  */
1186 static inline int lookup_flags(unsigned int f)
1187 {
1188         unsigned long retval = LOOKUP_FOLLOW;
1189
1190         if (f & O_NOFOLLOW)
1191                 retval &= ~LOOKUP_FOLLOW;
1192         
1193         if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
1194                 retval &= ~LOOKUP_FOLLOW;
1195         
1196         if (f & O_DIRECTORY)
1197                 retval |= LOOKUP_DIRECTORY;
1198         if (f & O_ATOMICLOOKUP)
1199                 retval |= LOOKUP_ATOMIC;
1200
1201         return retval;
1202 }
1203
1204 /*
1205  * p1 and p2 should be directories on the same fs.
1206  */
1207 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1208 {
1209         struct dentry *p;
1210
1211         if (p1 == p2) {
1212                 down(&p1->d_inode->i_sem);
1213                 return NULL;
1214         }
1215
1216         down(&p1->d_inode->i_sb->s_vfs_rename_sem);
1217
1218         for (p = p1; p->d_parent != p; p = p->d_parent) {
1219                 if (p->d_parent == p2) {
1220                         down(&p2->d_inode->i_sem);
1221                         down(&p1->d_inode->i_sem);
1222                         return p;
1223                 }
1224         }
1225
1226         for (p = p2; p->d_parent != p; p = p->d_parent) {
1227                 if (p->d_parent == p1) {
1228                         down(&p1->d_inode->i_sem);
1229                         down(&p2->d_inode->i_sem);
1230                         return p;
1231                 }
1232         }
1233
1234         down(&p1->d_inode->i_sem);
1235         down(&p2->d_inode->i_sem);
1236         return NULL;
1237 }
1238
1239 void unlock_rename(struct dentry *p1, struct dentry *p2)
1240 {
1241         up(&p1->d_inode->i_sem);
1242         if (p1 != p2) {
1243                 up(&p2->d_inode->i_sem);
1244                 up(&p1->d_inode->i_sb->s_vfs_rename_sem);
1245         }
1246 }
1247
1248 int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1249                 struct nameidata *nd)
1250 {
1251         int error = may_create(dir, dentry, nd);
1252
1253         if (error)
1254                 return error;
1255
1256         if (!dir->i_op || !dir->i_op->create)
1257                 return -EACCES; /* shouldn't it be ENOSYS? */
1258         mode &= S_IALLUGO;
1259         mode |= S_IFREG;
1260         error = security_inode_create(dir, dentry, mode);
1261         if (error)
1262                 return error;
1263         DQUOT_INIT(dir);
1264         error = dir->i_op->create(dir, dentry, mode, nd);
1265         if (!error) {
1266                 inode_dir_notify(dir, DN_CREATE);
1267                 security_inode_post_create(dir, dentry, mode);
1268         }
1269         return error;
1270 }
1271
1272 int may_open(struct nameidata *nd, int acc_mode, int flag)
1273 {
1274         struct dentry *dentry = nd->dentry;
1275         struct inode *inode = dentry->d_inode;
1276         int error;
1277
1278         if (!inode)
1279                 return -ENOENT;
1280
1281         if (S_ISLNK(inode->i_mode))
1282                 return -ELOOP;
1283         
1284         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1285                 return -EISDIR;
1286
1287         error = permission(inode, acc_mode, nd);
1288         if (error)
1289                 return error;
1290
1291         /*
1292          * FIFO's, sockets and device files are special: they don't
1293          * actually live on the filesystem itself, and as such you
1294          * can write to them even if the filesystem is read-only.
1295          */
1296         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1297                 flag &= ~O_TRUNC;
1298         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1299                 if (nd->mnt->mnt_flags & MNT_NODEV)
1300                         return -EACCES;
1301
1302                 flag &= ~O_TRUNC;
1303         } else if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt)))
1304                 && (flag & FMODE_WRITE))
1305                 return -EROFS;
1306         /*
1307          * An append-only file must be opened in append mode for writing.
1308          */
1309         if (IS_APPEND(inode)) {
1310                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1311                         return -EPERM;
1312                 if (flag & O_TRUNC)
1313                         return -EPERM;
1314         }
1315
1316         /* O_NOATIME can only be set by the owner or superuser */
1317         if (flag & O_NOATIME)
1318                 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
1319                         return -EPERM;
1320
1321         /*
1322          * Ensure there are no outstanding leases on the file.
1323          */
1324         error = break_lease(inode, flag);
1325         if (error)
1326                 return error;
1327
1328         if (flag & O_TRUNC) {
1329                 error = get_write_access(inode);
1330                 if (error)
1331                         return error;
1332
1333                 /*
1334                  * Refuse to truncate files with mandatory locks held on them.
1335                  */
1336                 error = locks_verify_locked(inode);
1337                 if (!error) {
1338                         DQUOT_INIT(inode);
1339                         
1340                         error = do_truncate(dentry, 0);
1341                 }
1342                 put_write_access(inode);
1343                 if (error)
1344                         return error;
1345         } else
1346                 if (flag & FMODE_WRITE)
1347                         DQUOT_INIT(inode);
1348
1349         return 0;
1350 }
1351
1352 /*
1353  *      open_namei()
1354  *
1355  * namei for open - this is in fact almost the whole open-routine.
1356  *
1357  * Note that the low bits of "flag" aren't the same as in the open
1358  * system call - they are 00 - no permissions needed
1359  *                        01 - read permission needed
1360  *                        10 - write permission needed
1361  *                        11 - read/write permissions needed
1362  * which is a lot more logical, and also allows the "no perm" needed
1363  * for symlinks (where the permissions are checked later).
1364  * SMP-safe
1365  */
1366 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1367 {
1368         int acc_mode, error = 0;
1369         struct dentry *dentry;
1370         struct dentry *dir;
1371         int count = 0;
1372
1373         acc_mode = ACC_MODE(flag);
1374
1375         /* Allow the LSM permission hook to distinguish append 
1376            access from general write access. */
1377         if (flag & O_APPEND)
1378                 acc_mode |= MAY_APPEND;
1379
1380         /* Fill in the open() intent data */
1381         nd->intent.open.flags = flag;
1382         nd->intent.open.create_mode = mode;
1383
1384         /*
1385          * The simplest case - just a plain lookup.
1386          */
1387         if (!(flag & O_CREAT)) {
1388                 error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
1389                 if (error)
1390                         return error;
1391                 goto ok;
1392         }
1393
1394         /*
1395          * Create - we need to know the parent.
1396          */
1397         error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
1398         if (error)
1399                 return error;
1400
1401         /*
1402          * We have the parent and last component. First of all, check
1403          * that we are not asked to creat(2) an obvious directory - that
1404          * will not do.
1405          */
1406         error = -EISDIR;
1407         if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1408                 goto exit;
1409
1410         dir = nd->dentry;
1411         nd->flags &= ~LOOKUP_PARENT;
1412         down(&dir->d_inode->i_sem);
1413         dentry = __lookup_hash(&nd->last, nd->dentry, nd);
1414
1415 do_last:
1416         error = PTR_ERR(dentry);
1417         if (IS_ERR(dentry)) {
1418                 up(&dir->d_inode->i_sem);
1419                 goto exit;
1420         }
1421
1422         /* Negative dentry, just create the file */
1423         if (!dentry->d_inode) {
1424                 if (!IS_POSIXACL(dir->d_inode))
1425                         mode &= ~current->fs->umask;
1426                 error = vfs_create(dir->d_inode, dentry, mode, nd);
1427                 up(&dir->d_inode->i_sem);
1428                 dput(nd->dentry);
1429                 nd->dentry = dentry;
1430                 if (error)
1431                         goto exit;
1432                 /* Don't check for write permission, don't truncate */
1433                 acc_mode = 0;
1434                 flag &= ~O_TRUNC;
1435                 goto ok;
1436         }
1437
1438         /*
1439          * It already exists.
1440          */
1441         up(&dir->d_inode->i_sem);
1442
1443         error = -EEXIST;
1444         if (flag & O_EXCL)
1445                 goto exit_dput;
1446
1447         if (d_mountpoint(dentry)) {
1448                 error = -ELOOP;
1449                 if (flag & O_NOFOLLOW)
1450                         goto exit_dput;
1451                 while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1452         }
1453         error = -ENOENT;
1454         if (!dentry->d_inode)
1455                 goto exit_dput;
1456         if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1457                 goto do_link;
1458
1459         dput(nd->dentry);
1460         nd->dentry = dentry;
1461         error = -EISDIR;
1462         if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1463                 goto exit;
1464 ok:
1465         error = may_open(nd, acc_mode, flag);
1466         if (error)
1467                 goto exit;
1468         return 0;
1469
1470 exit_dput:
1471         dput(dentry);
1472 exit:
1473         path_release(nd);
1474         return error;
1475
1476 do_link:
1477         error = -ELOOP;
1478         if (flag & O_NOFOLLOW)
1479                 goto exit_dput;
1480         /*
1481          * This is subtle. Instead of calling do_follow_link() we do the
1482          * thing by hands. The reason is that this way we have zero link_count
1483          * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1484          * After that we have the parent and last component, i.e.
1485          * we are in the same situation as after the first path_walk().
1486          * Well, almost - if the last component is normal we get its copy
1487          * stored in nd->last.name and we will have to putname() it when we
1488          * are done. Procfs-like symlinks just set LAST_BIND.
1489          */
1490         nd->flags |= LOOKUP_PARENT;
1491         error = security_inode_follow_link(dentry, nd);
1492         if (error)
1493                 goto exit_dput;
1494         touch_atime(nd->mnt, dentry);
1495         nd_set_link(nd, NULL);
1496         error = dentry->d_inode->i_op->follow_link(dentry, nd);
1497         if (!error) {
1498                 char *s = nd_get_link(nd);
1499                 if (s)
1500                         error = __vfs_follow_link(nd, s);
1501                 if (dentry->d_inode->i_op->put_link)
1502                         dentry->d_inode->i_op->put_link(dentry, nd);
1503         }
1504         dput(dentry);
1505         if (error)
1506                 return error;
1507         nd->flags &= ~LOOKUP_PARENT;
1508         if (nd->last_type == LAST_BIND) {
1509                 dentry = nd->dentry;
1510                 goto ok;
1511         }
1512         error = -EISDIR;
1513         if (nd->last_type != LAST_NORM)
1514                 goto exit;
1515         if (nd->last.name[nd->last.len]) {
1516                 putname(nd->last.name);
1517                 goto exit;
1518         }
1519         error = -ELOOP;
1520         if (count++==32) {
1521                 putname(nd->last.name);
1522                 goto exit;
1523         }
1524         dir = nd->dentry;
1525         down(&dir->d_inode->i_sem);
1526         dentry = __lookup_hash(&nd->last, nd->dentry, nd);
1527         putname(nd->last.name);
1528         goto do_last;
1529 }
1530
1531 /**
1532  * lookup_create - lookup a dentry, creating it if it doesn't exist
1533  * @nd: nameidata info
1534  * @is_dir: directory flag
1535  *
1536  * Simple function to lookup and return a dentry and create it
1537  * if it doesn't exist.  Is SMP-safe.
1538  */
1539 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1540 {
1541         struct dentry *dentry;
1542         int error;
1543
1544         down(&nd->dentry->d_inode->i_sem);
1545         error = -EEXIST;
1546         if (nd->last_type != LAST_NORM)
1547                 goto out;
1548         nd->flags &= ~LOOKUP_PARENT;
1549         dentry = lookup_hash(&nd->last, nd->dentry);
1550         if (IS_ERR(dentry))
1551                 goto ret;
1552         error = mnt_may_create(nd->mnt, nd->dentry->d_inode, dentry);
1553         if (error)
1554                 goto fail;
1555         error = -ENOENT;
1556         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1557                 goto fail;
1558 ret:
1559         return dentry;
1560 fail:
1561         dput(dentry);
1562 out:
1563         return ERR_PTR(error);
1564 }
1565
1566 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1567 {
1568         int error = may_create(dir, dentry, NULL);
1569
1570         if (error)
1571                 return error;
1572
1573         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1574                 return -EPERM;
1575
1576         if (!dir->i_op || !dir->i_op->mknod)
1577                 return -EPERM;
1578
1579         error = security_inode_mknod(dir, dentry, mode, dev);
1580         if (error)
1581                 return error;
1582
1583         DQUOT_INIT(dir);
1584         error = dir->i_op->mknod(dir, dentry, mode, dev);
1585         if (!error) {
1586                 inode_dir_notify(dir, DN_CREATE);
1587                 security_inode_post_mknod(dir, dentry, mode, dev);
1588         }
1589         return error;
1590 }
1591
1592 asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
1593 {
1594         int error = 0;
1595         char * tmp;
1596         struct dentry * dentry;
1597         struct nameidata nd;
1598
1599         if (S_ISDIR(mode))
1600                 return -EPERM;
1601         tmp = getname(filename);
1602         if (IS_ERR(tmp))
1603                 return PTR_ERR(tmp);
1604
1605         error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1606         if (error)
1607                 goto out;
1608         dentry = lookup_create(&nd, 0);
1609         error = PTR_ERR(dentry);
1610
1611         if (!IS_POSIXACL(nd.dentry->d_inode))
1612                 mode &= ~current->fs->umask;
1613         if (!IS_ERR(dentry)) {
1614                 switch (mode & S_IFMT) {
1615                 case 0: case S_IFREG:
1616                         error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
1617                         break;
1618                 case S_IFCHR: case S_IFBLK:
1619                         error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
1620                                         new_decode_dev(dev));
1621                         break;
1622                 case S_IFIFO: case S_IFSOCK:
1623                         error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
1624                         break;
1625                 case S_IFDIR:
1626                         error = -EPERM;
1627                         break;
1628                 default:
1629                         error = -EINVAL;
1630                 }
1631                 dput(dentry);
1632         }
1633         up(&nd.dentry->d_inode->i_sem);
1634         path_release(&nd);
1635 out:
1636         putname(tmp);
1637
1638         return error;
1639 }
1640
1641 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1642 {
1643         int error = may_create(dir, dentry, NULL);
1644
1645         if (error)
1646                 return error;
1647
1648         if (!dir->i_op || !dir->i_op->mkdir)
1649                 return -EPERM;
1650
1651         mode &= (S_IRWXUGO|S_ISVTX);
1652         error = security_inode_mkdir(dir, dentry, mode);
1653         if (error)
1654                 return error;
1655
1656         DQUOT_INIT(dir);
1657         error = dir->i_op->mkdir(dir, dentry, mode);
1658         if (!error) {
1659                 inode_dir_notify(dir, DN_CREATE);
1660                 security_inode_post_mkdir(dir,dentry, mode);
1661         }
1662         return error;
1663 }
1664
1665 asmlinkage long sys_mkdir(const char __user * pathname, int mode)
1666 {
1667         int error = 0;
1668         char * tmp;
1669
1670         tmp = getname(pathname);
1671         error = PTR_ERR(tmp);
1672         if (!IS_ERR(tmp)) {
1673                 struct dentry *dentry;
1674                 struct nameidata nd;
1675
1676                 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1677                 if (error)
1678                         goto out;
1679                 dentry = lookup_create(&nd, 1);
1680                 error = PTR_ERR(dentry);
1681                 if (!IS_ERR(dentry)) {
1682                         if (!IS_POSIXACL(nd.dentry->d_inode))
1683                                 mode &= ~current->fs->umask;
1684                         error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
1685                         dput(dentry);
1686                 }
1687                 up(&nd.dentry->d_inode->i_sem);
1688                 path_release(&nd);
1689 out:
1690                 putname(tmp);
1691         }
1692
1693         return error;
1694 }
1695
1696 /*
1697  * We try to drop the dentry early: we should have
1698  * a usage count of 2 if we're the only user of this
1699  * dentry, and if that is true (possibly after pruning
1700  * the dcache), then we drop the dentry now.
1701  *
1702  * A low-level filesystem can, if it choses, legally
1703  * do a
1704  *
1705  *      if (!d_unhashed(dentry))
1706  *              return -EBUSY;
1707  *
1708  * if it cannot handle the case of removing a directory
1709  * that is still in use by something else..
1710  */
1711 static void d_unhash(struct dentry *dentry)
1712 {
1713         dget(dentry);
1714         spin_lock(&dcache_lock);
1715         switch (atomic_read(&dentry->d_count)) {
1716         default:
1717                 spin_unlock(&dcache_lock);
1718                 shrink_dcache_parent(dentry);
1719                 spin_lock(&dcache_lock);
1720                 if (atomic_read(&dentry->d_count) != 2)
1721                         break;
1722         case 2:
1723                 __d_drop(dentry);
1724         }
1725         spin_unlock(&dcache_lock);
1726 }
1727
1728 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1729 {
1730         int error = may_delete(dir, dentry, 1);
1731
1732         if (error)
1733                 return error;
1734
1735         if (!dir->i_op || !dir->i_op->rmdir)
1736                 return -EPERM;
1737
1738         DQUOT_INIT(dir);
1739
1740         down(&dentry->d_inode->i_sem);
1741         d_unhash(dentry);
1742         if (d_mountpoint(dentry))
1743                 error = -EBUSY;
1744         else {
1745                 error = security_inode_rmdir(dir, dentry);
1746                 if (!error) {
1747                         error = dir->i_op->rmdir(dir, dentry);
1748                         if (!error)
1749                                 dentry->d_inode->i_flags |= S_DEAD;
1750                 }
1751         }
1752         up(&dentry->d_inode->i_sem);
1753         if (!error) {
1754                 inode_dir_notify(dir, DN_DELETE);
1755                 d_delete(dentry);
1756         }
1757         dput(dentry);
1758
1759         return error;
1760 }
1761
1762 asmlinkage long sys_rmdir(const char __user * pathname)
1763 {
1764         int error = 0;
1765         char * name;
1766         struct dentry *dentry;
1767         struct nameidata nd;
1768
1769         name = getname(pathname);
1770         if(IS_ERR(name))
1771                 return PTR_ERR(name);
1772
1773         error = path_lookup(name, LOOKUP_PARENT, &nd);
1774         if (error)
1775                 goto exit;
1776
1777         switch(nd.last_type) {
1778                 case LAST_DOTDOT:
1779                         error = -ENOTEMPTY;
1780                         goto exit1;
1781                 case LAST_DOT:
1782                         error = -EINVAL;
1783                         goto exit1;
1784                 case LAST_ROOT:
1785                         error = -EBUSY;
1786                         goto exit1;
1787         }
1788         down(&nd.dentry->d_inode->i_sem);
1789         dentry = lookup_hash(&nd.last, nd.dentry);
1790         error = PTR_ERR(dentry);
1791         if (!IS_ERR(dentry)) {
1792                 error = mnt_may_unlink(nd.mnt, nd.dentry->d_inode, dentry);
1793                 if (error)
1794                         goto exit2;
1795                 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1796         exit2:
1797                 dput(dentry);
1798         }
1799         up(&nd.dentry->d_inode->i_sem);
1800 exit1:
1801         path_release(&nd);
1802 exit:
1803         putname(name);
1804         return error;
1805 }
1806
1807 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1808 {
1809         int error = may_delete(dir, dentry, 0);
1810
1811         if (error)
1812                 return error;
1813
1814         if (!dir->i_op || !dir->i_op->unlink)
1815                 return -EPERM;
1816
1817         DQUOT_INIT(dir);
1818
1819         down(&dentry->d_inode->i_sem);
1820         if (d_mountpoint(dentry))
1821                 error = -EBUSY;
1822         else {
1823                 error = security_inode_unlink(dir, dentry);
1824                 if (!error)
1825                         error = dir->i_op->unlink(dir, dentry);
1826         }
1827         up(&dentry->d_inode->i_sem);
1828
1829         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
1830         if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
1831                 d_delete(dentry);
1832                 inode_dir_notify(dir, DN_DELETE);
1833         }
1834         return error;
1835 }
1836
1837 /*
1838  * Make sure that the actual truncation of the file will occur outside its
1839  * directory's i_sem.  Truncate can take a long time if there is a lot of
1840  * writeout happening, and we don't want to prevent access to the directory
1841  * while waiting on the I/O.
1842  */
1843 asmlinkage long sys_unlink(const char __user * pathname)
1844 {
1845         int error = 0;
1846         char * name;
1847         struct dentry *dentry;
1848         struct nameidata nd;
1849         struct inode *inode = NULL;
1850
1851         name = getname(pathname);
1852         if(IS_ERR(name))
1853                 return PTR_ERR(name);
1854
1855         error = path_lookup(name, LOOKUP_PARENT, &nd);
1856         if (error)
1857                 goto exit;
1858         error = -EISDIR;
1859         if (nd.last_type != LAST_NORM)
1860                 goto exit1;
1861         down(&nd.dentry->d_inode->i_sem);
1862         dentry = lookup_hash(&nd.last, nd.dentry);
1863         error = PTR_ERR(dentry);
1864         if (!IS_ERR(dentry)) {
1865                 /* Why not before? Because we want correct error value */
1866                 if (nd.last.name[nd.last.len])
1867                         goto slashes;
1868                 error = mnt_may_unlink(nd.mnt, nd.dentry->d_inode, dentry);
1869                 if (error)
1870                         goto exit2;
1871                 inode = dentry->d_inode;
1872                 if (inode)
1873                         atomic_inc(&inode->i_count);
1874                 error = vfs_unlink(nd.dentry->d_inode, dentry);
1875         exit2:
1876                 dput(dentry);
1877         }
1878         up(&nd.dentry->d_inode->i_sem);
1879 exit1:
1880         path_release(&nd);
1881 exit:
1882         putname(name);
1883
1884         if (inode)
1885                 iput(inode);    /* truncate the inode here */
1886         return error;
1887
1888 slashes:
1889         error = !dentry->d_inode ? -ENOENT :
1890                 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1891         goto exit2;
1892 }
1893
1894 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
1895 {
1896         int error = may_create(dir, dentry, NULL);
1897
1898         if (error)
1899                 return error;
1900
1901         if (!dir->i_op || !dir->i_op->symlink)
1902                 return -EPERM;
1903
1904         error = security_inode_symlink(dir, dentry, oldname);
1905         if (error)
1906                 return error;
1907
1908         DQUOT_INIT(dir);
1909         error = dir->i_op->symlink(dir, dentry, oldname);
1910         if (!error) {
1911                 inode_dir_notify(dir, DN_CREATE);
1912                 security_inode_post_symlink(dir, dentry, oldname);
1913         }
1914         return error;
1915 }
1916
1917 asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname)
1918 {
1919         int error = 0;
1920         char * from;
1921         char * to;
1922
1923         from = getname(oldname);
1924         if(IS_ERR(from))
1925                 return PTR_ERR(from);
1926         to = getname(newname);
1927         error = PTR_ERR(to);
1928         if (!IS_ERR(to)) {
1929                 struct dentry *dentry;
1930                 struct nameidata nd;
1931
1932                 error = path_lookup(to, LOOKUP_PARENT, &nd);
1933                 if (error)
1934                         goto out;
1935                 dentry = lookup_create(&nd, 0);
1936                 error = PTR_ERR(dentry);
1937                 if (!IS_ERR(dentry)) {
1938                         error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
1939                         dput(dentry);
1940                 }
1941                 up(&nd.dentry->d_inode->i_sem);
1942                 path_release(&nd);
1943 out:
1944                 putname(to);
1945         }
1946         putname(from);
1947         return error;
1948 }
1949
1950 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1951 {
1952         struct inode *inode = old_dentry->d_inode;
1953         int error;
1954
1955         if (!inode)
1956                 return -ENOENT;
1957
1958         error = may_create(dir, new_dentry, NULL);
1959         if (error)
1960                 return error;
1961
1962         if (dir->i_sb != inode->i_sb)
1963                 return -EXDEV;
1964
1965         /*
1966          * A link to an append-only or immutable file cannot be created.
1967          */
1968         if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
1969                 return -EPERM;
1970         if (!dir->i_op || !dir->i_op->link)
1971                 return -EPERM;
1972         if (S_ISDIR(old_dentry->d_inode->i_mode))
1973                 return -EPERM;
1974
1975         error = security_inode_link(old_dentry, dir, new_dentry);
1976         if (error)
1977                 return error;
1978
1979         down(&old_dentry->d_inode->i_sem);
1980         DQUOT_INIT(dir);
1981         error = dir->i_op->link(old_dentry, dir, new_dentry);
1982         up(&old_dentry->d_inode->i_sem);
1983         if (!error) {
1984                 inode_dir_notify(dir, DN_CREATE);
1985                 security_inode_post_link(old_dentry, dir, new_dentry);
1986         }
1987         return error;
1988 }
1989
1990 /*
1991  * Hardlinks are often used in delicate situations.  We avoid
1992  * security-related surprises by not following symlinks on the
1993  * newname.  --KAB
1994  *
1995  * We don't follow them on the oldname either to be compatible
1996  * with linux 2.0, and to avoid hard-linking to directories
1997  * and other special files.  --ADM
1998  */
1999 asmlinkage long sys_link(const char __user * oldname, const char __user * newname)
2000 {
2001         struct dentry *new_dentry;
2002         struct nameidata nd, old_nd;
2003         int error;
2004         char * to;
2005
2006         to = getname(newname);
2007         if (IS_ERR(to))
2008                 return PTR_ERR(to);
2009
2010         error = __user_walk(oldname, 0, &old_nd);
2011         if (error)
2012                 goto exit;
2013         error = path_lookup(to, LOOKUP_PARENT, &nd);
2014         if (error)
2015                 goto out;
2016         /*
2017          * We allow hard-links to be created to a bind-mount as long
2018          * as the bind-mount is not read-only.  Checking for cross-dev
2019          * links is subsumed by the superblock check in vfs_link().
2020          */
2021         error = -EROFS;
2022         if (MNT_IS_RDONLY(old_nd.mnt))
2023                 goto out_release;
2024         new_dentry = lookup_create(&nd, 0);
2025         error = PTR_ERR(new_dentry);
2026         if (!IS_ERR(new_dentry)) {
2027                 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
2028                 dput(new_dentry);
2029         }
2030         up(&nd.dentry->d_inode->i_sem);
2031 out_release:
2032         path_release(&nd);
2033 out:
2034         path_release(&old_nd);
2035 exit:
2036         putname(to);
2037
2038         return error;
2039 }
2040
2041 /*
2042  * The worst of all namespace operations - renaming directory. "Perverted"
2043  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
2044  * Problems:
2045  *      a) we can get into loop creation. Check is done in is_subdir().
2046  *      b) race potential - two innocent renames can create a loop together.
2047  *         That's where 4.4 screws up. Current fix: serialization on
2048  *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
2049  *         story.
2050  *      c) we have to lock _three_ objects - parents and victim (if it exists).
2051  *         And that - after we got ->i_sem on parents (until then we don't know
2052  *         whether the target exists).  Solution: try to be smart with locking
2053  *         order for inodes.  We rely on the fact that tree topology may change
2054  *         only under ->s_vfs_rename_sem _and_ that parent of the object we
2055  *         move will be locked.  Thus we can rank directories by the tree
2056  *         (ancestors first) and rank all non-directories after them.
2057  *         That works since everybody except rename does "lock parent, lookup,
2058  *         lock child" and rename is under ->s_vfs_rename_sem.
2059  *         HOWEVER, it relies on the assumption that any object with ->lookup()
2060  *         has no more than 1 dentry.  If "hybrid" objects will ever appear,
2061  *         we'd better make sure that there's no link(2) for them.
2062  *      d) some filesystems don't support opened-but-unlinked directories,
2063  *         either because of layout or because they are not ready to deal with
2064  *         all cases correctly. The latter will be fixed (taking this sort of
2065  *         stuff into VFS), but the former is not going away. Solution: the same
2066  *         trick as in rmdir().
2067  *      e) conversion from fhandle to dentry may come in the wrong moment - when
2068  *         we are removing the target. Solution: we will have to grab ->i_sem
2069  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
2070  *         ->i_sem on parents, which works but leads to some truely excessive
2071  *         locking].
2072  */
2073 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2074                struct inode *new_dir, struct dentry *new_dentry)
2075 {
2076         int error = 0;
2077         struct inode *target;
2078
2079         /*
2080          * If we are going to change the parent - check write permissions,
2081          * we'll need to flip '..'.
2082          */
2083         if (new_dir != old_dir) {
2084                 error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
2085                 if (error)
2086                         return error;
2087         }
2088
2089         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
2090         if (error)
2091                 return error;
2092
2093         target = new_dentry->d_inode;
2094         if (target) {
2095                 down(&target->i_sem);
2096                 d_unhash(new_dentry);
2097         }
2098         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2099                 error = -EBUSY;
2100         else 
2101                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2102         if (target) {
2103                 if (!error)
2104                         target->i_flags |= S_DEAD;
2105                 up(&target->i_sem);
2106                 if (d_unhashed(new_dentry))
2107                         d_rehash(new_dentry);
2108                 dput(new_dentry);
2109         }
2110         if (!error) {
2111                 d_move(old_dentry,new_dentry);
2112                 security_inode_post_rename(old_dir, old_dentry,
2113                                            new_dir, new_dentry);
2114         }
2115         return error;
2116 }
2117
2118 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2119                struct inode *new_dir, struct dentry *new_dentry)
2120 {
2121         struct inode *target;
2122         int error;
2123
2124         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
2125         if (error)
2126                 return error;
2127
2128         dget(new_dentry);
2129         target = new_dentry->d_inode;
2130         if (target)
2131                 down(&target->i_sem);
2132         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2133                 error = -EBUSY;
2134         else
2135                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2136         if (!error) {
2137                 /* The following d_move() should become unconditional */
2138                 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2139                         d_move(old_dentry, new_dentry);
2140                 security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
2141         }
2142         if (target)
2143                 up(&target->i_sem);
2144         dput(new_dentry);
2145         return error;
2146 }
2147
2148 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2149                struct inode *new_dir, struct dentry *new_dentry)
2150 {
2151         int error;
2152         int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2153
2154         if (old_dentry->d_inode == new_dentry->d_inode)
2155                 return 0;
2156  
2157         error = may_delete(old_dir, old_dentry, is_dir);
2158         if (error)
2159                 return error;
2160
2161         if (!new_dentry->d_inode)
2162                 error = may_create(new_dir, new_dentry, NULL);
2163         else
2164                 error = may_delete(new_dir, new_dentry, is_dir);
2165         if (error)
2166                 return error;
2167
2168         if (!old_dir->i_op || !old_dir->i_op->rename)
2169                 return -EPERM;
2170
2171         DQUOT_INIT(old_dir);
2172         DQUOT_INIT(new_dir);
2173
2174         if (is_dir)
2175                 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2176         else
2177                 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2178         if (!error) {
2179                 if (old_dir == new_dir)
2180                         inode_dir_notify(old_dir, DN_RENAME);
2181                 else {
2182                         inode_dir_notify(old_dir, DN_DELETE);
2183                         inode_dir_notify(new_dir, DN_CREATE);
2184                 }
2185         }
2186         return error;
2187 }
2188
2189 static inline int do_rename(const char * oldname, const char * newname)
2190 {
2191         int error = 0;
2192         struct dentry * old_dir, * new_dir;
2193         struct dentry * old_dentry, *new_dentry;
2194         struct dentry * trap;
2195         struct nameidata oldnd, newnd;
2196
2197         error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
2198         if (error)
2199                 goto exit;
2200
2201         error = path_lookup(newname, LOOKUP_PARENT, &newnd);
2202         if (error)
2203                 goto exit1;
2204
2205         error = -EXDEV;
2206         if (oldnd.mnt != newnd.mnt)
2207                 goto exit2;
2208
2209         old_dir = oldnd.dentry;
2210         error = -EBUSY;
2211         if (oldnd.last_type != LAST_NORM)
2212                 goto exit2;
2213
2214         new_dir = newnd.dentry;
2215         if (newnd.last_type != LAST_NORM)
2216                 goto exit2;
2217
2218         trap = lock_rename(new_dir, old_dir);
2219
2220         old_dentry = lookup_hash(&oldnd.last, old_dir);
2221         error = PTR_ERR(old_dentry);
2222         if (IS_ERR(old_dentry))
2223                 goto exit3;
2224         /* source must exist */
2225         error = -ENOENT;
2226         if (!old_dentry->d_inode)
2227                 goto exit4;
2228         /* unless the source is a directory trailing slashes give -ENOTDIR */
2229         if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
2230                 error = -ENOTDIR;
2231                 if (oldnd.last.name[oldnd.last.len])
2232                         goto exit4;
2233                 if (newnd.last.name[newnd.last.len])
2234                         goto exit4;
2235         }
2236         /* source should not be ancestor of target */
2237         error = -EINVAL;
2238         if (old_dentry == trap)
2239                 goto exit4;
2240         error = -EROFS;
2241         if (MNT_IS_RDONLY(newnd.mnt))
2242                 goto exit4;
2243         new_dentry = lookup_hash(&newnd.last, new_dir);
2244         error = PTR_ERR(new_dentry);
2245         if (IS_ERR(new_dentry))
2246                 goto exit4;
2247         /* target should not be an ancestor of source */
2248         error = -ENOTEMPTY;
2249         if (new_dentry == trap)
2250                 goto exit5;
2251
2252         error = vfs_rename(old_dir->d_inode, old_dentry,
2253                                    new_dir->d_inode, new_dentry);
2254 exit5:
2255         dput(new_dentry);
2256 exit4:
2257         dput(old_dentry);
2258 exit3:
2259         unlock_rename(new_dir, old_dir);
2260 exit2:
2261         path_release(&newnd);
2262 exit1:
2263         path_release(&oldnd);
2264 exit:
2265         return error;
2266 }
2267
2268 asmlinkage long sys_rename(const char __user * oldname, const char __user * newname)
2269 {
2270         int error;
2271         char * from;
2272         char * to;
2273
2274         from = getname(oldname);
2275         if(IS_ERR(from))
2276                 return PTR_ERR(from);
2277         to = getname(newname);
2278         error = PTR_ERR(to);
2279         if (!IS_ERR(to)) {
2280                 error = do_rename(from,to);
2281                 putname(to);
2282         }
2283         putname(from);
2284         return error;
2285 }
2286
2287 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
2288 {
2289         int len;
2290
2291         len = PTR_ERR(link);
2292         if (IS_ERR(link))
2293                 goto out;
2294
2295         len = strlen(link);
2296         if (len > (unsigned) buflen)
2297                 len = buflen;
2298         if (copy_to_user(buffer, link, len))
2299                 len = -EFAULT;
2300 out:
2301         return len;
2302 }
2303
2304 /*
2305  * A helper for ->readlink().  This should be used *ONLY* for symlinks that
2306  * have ->follow_link() touching nd only in nd_set_link().  Using (or not
2307  * using) it for any given inode is up to filesystem.
2308  */
2309 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2310 {
2311         struct nameidata nd;
2312         int res;
2313         nd.depth = 0;
2314         res = dentry->d_inode->i_op->follow_link(dentry, &nd);
2315         if (!res) {
2316                 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
2317                 if (dentry->d_inode->i_op->put_link)
2318                         dentry->d_inode->i_op->put_link(dentry, &nd);
2319         }
2320         return res;
2321 }
2322
2323 int vfs_follow_link(struct nameidata *nd, const char *link)
2324 {
2325         return __vfs_follow_link(nd, link);
2326 }
2327
2328 /* get the link contents into pagecache */
2329 static char *page_getlink(struct dentry * dentry, struct page **ppage)
2330 {
2331         struct page * page;
2332         struct address_space *mapping = dentry->d_inode->i_mapping;
2333         page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
2334                                 NULL);
2335         if (IS_ERR(page))
2336                 goto sync_fail;
2337         wait_on_page_locked(page);
2338         if (!PageUptodate(page))
2339                 goto async_fail;
2340         *ppage = page;
2341         return kmap(page);
2342
2343 async_fail:
2344         page_cache_release(page);
2345         return ERR_PTR(-EIO);
2346
2347 sync_fail:
2348         return (char*)page;
2349 }
2350
2351 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2352 {
2353         struct page *page = NULL;
2354         char *s = page_getlink(dentry, &page);
2355         int res = vfs_readlink(dentry,buffer,buflen,s);
2356         if (page) {
2357                 kunmap(page);
2358                 page_cache_release(page);
2359         }
2360         return res;
2361 }
2362
2363 int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
2364 {
2365         struct page *page;
2366         nd_set_link(nd, page_getlink(dentry, &page));
2367         return 0;
2368 }
2369
2370 void page_put_link(struct dentry *dentry, struct nameidata *nd)
2371 {
2372         if (!IS_ERR(nd_get_link(nd))) {
2373                 struct page *page;
2374                 page = find_get_page(dentry->d_inode->i_mapping, 0);
2375                 if (!page)
2376                         BUG();
2377                 kunmap(page);
2378                 page_cache_release(page);
2379                 page_cache_release(page);
2380         }
2381 }
2382
2383 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
2384 {
2385         struct page *page = NULL;
2386         char *s = page_getlink(dentry, &page);
2387         int res = __vfs_follow_link(nd, s);
2388         if (page) {
2389                 kunmap(page);
2390                 page_cache_release(page);
2391         }
2392         return res;
2393 }
2394
2395 int page_symlink(struct inode *inode, const char *symname, int len)
2396 {
2397         struct address_space *mapping = inode->i_mapping;
2398         struct page *page = grab_cache_page(mapping, 0);
2399         int err = -ENOMEM;
2400         char *kaddr;
2401
2402         if (!page)
2403                 goto fail;
2404         err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
2405         if (err)
2406                 goto fail_map;
2407         kaddr = kmap_atomic(page, KM_USER0);
2408         memcpy(kaddr, symname, len-1);
2409         kunmap_atomic(kaddr, KM_USER0);
2410         mapping->a_ops->commit_write(NULL, page, 0, len-1);
2411         /*
2412          * Notice that we are _not_ going to block here - end of page is
2413          * unmapped, so this will only try to map the rest of page, see
2414          * that it is unmapped (typically even will not look into inode -
2415          * ->i_size will be enough for everything) and zero it out.
2416          * OTOH it's obviously correct and should make the page up-to-date.
2417          */
2418         if (!PageUptodate(page)) {
2419                 err = mapping->a_ops->readpage(NULL, page);
2420                 wait_on_page_locked(page);
2421         } else {
2422                 unlock_page(page);
2423         }
2424         page_cache_release(page);
2425         if (err < 0)
2426                 goto fail;
2427         mark_inode_dirty(inode);
2428         return 0;
2429 fail_map:
2430         unlock_page(page);
2431         page_cache_release(page);
2432 fail:
2433         return err;
2434 }
2435
2436 struct inode_operations page_symlink_inode_operations = {
2437         .readlink       = generic_readlink,
2438         .follow_link    = page_follow_link_light,
2439         .put_link       = page_put_link,
2440 };
2441
2442 EXPORT_SYMBOL(__user_walk);
2443 EXPORT_SYMBOL(follow_down);
2444 EXPORT_SYMBOL(follow_up);
2445 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
2446 EXPORT_SYMBOL(getname);
2447 EXPORT_SYMBOL(lock_rename);
2448 EXPORT_SYMBOL(lookup_create);
2449 EXPORT_SYMBOL(lookup_hash);
2450 EXPORT_SYMBOL(lookup_one_len);
2451 EXPORT_SYMBOL(page_follow_link);
2452 EXPORT_SYMBOL(page_follow_link_light);
2453 EXPORT_SYMBOL(page_put_link);
2454 EXPORT_SYMBOL(page_readlink);
2455 EXPORT_SYMBOL(page_symlink);
2456 EXPORT_SYMBOL(page_symlink_inode_operations);
2457 EXPORT_SYMBOL(path_lookup);
2458 EXPORT_SYMBOL(path_release);
2459 EXPORT_SYMBOL(path_walk);
2460 EXPORT_SYMBOL(permission);
2461 EXPORT_SYMBOL(unlock_rename);
2462 EXPORT_SYMBOL(vfs_create);
2463 EXPORT_SYMBOL(vfs_follow_link);
2464 EXPORT_SYMBOL(vfs_link);
2465 EXPORT_SYMBOL(vfs_mkdir);
2466 EXPORT_SYMBOL(vfs_mknod);
2467 EXPORT_SYMBOL(vfs_permission);
2468 EXPORT_SYMBOL(vfs_readlink);
2469 EXPORT_SYMBOL(vfs_rename);
2470 EXPORT_SYMBOL(vfs_rmdir);
2471 EXPORT_SYMBOL(vfs_symlink);
2472 EXPORT_SYMBOL(vfs_unlink);
2473 EXPORT_SYMBOL(generic_readlink);