df5b8f852f81a417c587447807b2c3e2a03ca4f2
[linux-2.6.git] / fs / namei.c
1 /*
2  *  linux/fs/namei.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6
7 /*
8  * Some corrections by tytso.
9  */
10
11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12  * lookup logic.
13  */
14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
15  */
16
17 #include <linux/init.h>
18 #include <linux/module.h>
19 #include <linux/slab.h>
20 #include <linux/fs.h>
21 #include <linux/namei.h>
22 #include <linux/quotaops.h>
23 #include <linux/pagemap.h>
24 #include <linux/dnotify.h>
25 #include <linux/smp_lock.h>
26 #include <linux/personality.h>
27 #include <linux/security.h>
28 #include <linux/mount.h>
29 #include <linux/audit.h>
30 #include <asm/namei.h>
31 #include <asm/uaccess.h>
32
33 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
34
35 /* [Feb-1997 T. Schoebel-Theuer]
36  * Fundamental changes in the pathname lookup mechanisms (namei)
37  * were necessary because of omirr.  The reason is that omirr needs
38  * to know the _real_ pathname, not the user-supplied one, in case
39  * of symlinks (and also when transname replacements occur).
40  *
41  * The new code replaces the old recursive symlink resolution with
42  * an iterative one (in case of non-nested symlink chains).  It does
43  * this with calls to <fs>_follow_link().
44  * As a side effect, dir_namei(), _namei() and follow_link() are now 
45  * replaced with a single function lookup_dentry() that can handle all 
46  * the special cases of the former code.
47  *
48  * With the new dcache, the pathname is stored at each inode, at least as
49  * long as the refcount of the inode is positive.  As a side effect, the
50  * size of the dcache depends on the inode cache and thus is dynamic.
51  *
52  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
53  * resolution to correspond with current state of the code.
54  *
55  * Note that the symlink resolution is not *completely* iterative.
56  * There is still a significant amount of tail- and mid- recursion in
57  * the algorithm.  Also, note that <fs>_readlink() is not used in
58  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
59  * may return different results than <fs>_follow_link().  Many virtual
60  * filesystems (including /proc) exhibit this behavior.
61  */
62
63 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
64  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
65  * and the name already exists in form of a symlink, try to create the new
66  * name indicated by the symlink. The old code always complained that the
67  * name already exists, due to not following the symlink even if its target
68  * is nonexistent.  The new semantics affects also mknod() and link() when
69  * the name is a symlink pointing to a non-existant name.
70  *
71  * I don't know which semantics is the right one, since I have no access
72  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
73  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
74  * "old" one. Personally, I think the new semantics is much more logical.
75  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
76  * file does succeed in both HP-UX and SunOs, but not in Solaris
77  * and in the old Linux semantics.
78  */
79
80 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
81  * semantics.  See the comments in "open_namei" and "do_link" below.
82  *
83  * [10-Sep-98 Alan Modra] Another symlink change.
84  */
85
86 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
87  *      inside the path - always follow.
88  *      in the last component in creation/removal/renaming - never follow.
89  *      if LOOKUP_FOLLOW passed - follow.
90  *      if the pathname has trailing slashes - follow.
91  *      otherwise - don't follow.
92  * (applied in that order).
93  *
94  * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
95  * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
96  * During the 2.4 we need to fix the userland stuff depending on it -
97  * hopefully we will be able to get rid of that wart in 2.5. So far only
98  * XEmacs seems to be relying on it...
99  */
100 /*
101  * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
102  * implemented.  Let's see if raised priority of ->s_vfs_rename_sem gives
103  * any extra contention...
104  */
105
106 /* In order to reduce some races, while at the same time doing additional
107  * checking and hopefully speeding things up, we copy filenames to the
108  * kernel data space before using them..
109  *
110  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
111  * PATH_MAX includes the nul terminator --RR.
112  */
113 static inline int do_getname(const char __user *filename, char *page)
114 {
115         int retval;
116         unsigned long len = PATH_MAX;
117
118         if ((unsigned long) filename >= TASK_SIZE) {
119                 if (!segment_eq(get_fs(), KERNEL_DS))
120                         return -EFAULT;
121         } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
122                 len = TASK_SIZE - (unsigned long) filename;
123
124         retval = strncpy_from_user((char *)page, filename, len);
125         if (retval > 0) {
126                 if (retval < len)
127                         return 0;
128                 return -ENAMETOOLONG;
129         } else if (!retval)
130                 retval = -ENOENT;
131         return retval;
132 }
133
134 char * getname(const char __user * filename)
135 {
136         char *tmp, *result;
137
138         result = ERR_PTR(-ENOMEM);
139         tmp = __getname();
140         if (tmp)  {
141                 int retval = do_getname(filename, tmp);
142
143                 result = tmp;
144                 if (retval < 0) {
145                         __putname(tmp);
146                         result = ERR_PTR(retval);
147                 }
148         }
149         if (unlikely(current->audit_context) && !IS_ERR(result) && result)
150                 audit_getname(result);
151         return result;
152 }
153
154 /*
155  *      vfs_permission()
156  *
157  * is used to check for read/write/execute permissions on a file.
158  * We use "fsuid" for this, letting us set arbitrary permissions
159  * for filesystem access without changing the "normal" uids which
160  * are used for other things..
161  */
162 int vfs_permission(struct inode * inode, int mask)
163 {
164         umode_t                 mode = inode->i_mode;
165
166         if (mask & MAY_WRITE) {
167                 /*
168                  * Nobody gets write access to a read-only fs.
169                  */
170                 if (IS_RDONLY(inode) &&
171                     (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
172                         return -EROFS;
173
174                 /*
175                  * Nobody gets write access to an immutable file.
176                  */
177                 if (IS_IMMUTABLE(inode))
178                         return -EACCES;
179         }
180
181         if (current->fsuid == inode->i_uid)
182                 mode >>= 6;
183         else if (in_group_p(inode->i_gid))
184                 mode >>= 3;
185
186         /*
187          * If the DACs are ok we don't need any capability check.
188          */
189         if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
190                 return 0;
191
192         /*
193          * Read/write DACs are always overridable.
194          * Executable DACs are overridable if at least one exec bit is set.
195          */
196         if (!(mask & MAY_EXEC) ||
197             (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
198                 if (capable(CAP_DAC_OVERRIDE))
199                         return 0;
200
201         /*
202          * Searching includes executable on directories, else just read.
203          */
204         if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
205                 if (capable(CAP_DAC_READ_SEARCH))
206                         return 0;
207
208         return -EACCES;
209 }
210
211 int permission(struct inode * inode,int mask, struct nameidata *nd)
212 {
213         int retval;
214         int submask;
215
216         /* Ordinary permission routines do not understand MAY_APPEND. */
217         submask = mask & ~MAY_APPEND;
218
219         if (inode->i_op && inode->i_op->permission)
220                 retval = inode->i_op->permission(inode, submask, nd);
221         else
222                 retval = vfs_permission(inode, submask);
223         if (retval)
224                 return retval;
225
226         return security_inode_permission(inode, mask, nd);
227 }
228
229 /*
230  * get_write_access() gets write permission for a file.
231  * put_write_access() releases this write permission.
232  * This is used for regular files.
233  * We cannot support write (and maybe mmap read-write shared) accesses and
234  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
235  * can have the following values:
236  * 0: no writers, no VM_DENYWRITE mappings
237  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
238  * > 0: (i_writecount) users are writing to the file.
239  *
240  * Normally we operate on that counter with atomic_{inc,dec} and it's safe
241  * except for the cases where we don't hold i_writecount yet. Then we need to
242  * use {get,deny}_write_access() - these functions check the sign and refuse
243  * to do the change if sign is wrong. Exclusion between them is provided by
244  * the inode->i_lock spinlock.
245  */
246
247 int get_write_access(struct inode * inode)
248 {
249         spin_lock(&inode->i_lock);
250         if (atomic_read(&inode->i_writecount) < 0) {
251                 spin_unlock(&inode->i_lock);
252                 return -ETXTBSY;
253         }
254         atomic_inc(&inode->i_writecount);
255         spin_unlock(&inode->i_lock);
256
257         return 0;
258 }
259
260 int deny_write_access(struct file * file)
261 {
262         struct inode *inode = file->f_dentry->d_inode;
263
264         spin_lock(&inode->i_lock);
265         if (atomic_read(&inode->i_writecount) > 0) {
266                 spin_unlock(&inode->i_lock);
267                 return -ETXTBSY;
268         }
269         atomic_dec(&inode->i_writecount);
270         spin_unlock(&inode->i_lock);
271
272         return 0;
273 }
274
275 void path_release(struct nameidata *nd)
276 {
277         dput(nd->dentry);
278         mntput(nd->mnt);
279 }
280
281 /*
282  * Internal lookup() using the new generic dcache.
283  * SMP-safe
284  */
285 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
286 {
287         struct dentry * dentry = __d_lookup(parent, name);
288
289         /* lockess __d_lookup may fail due to concurrent d_move() 
290          * in some unrelated directory, so try with d_lookup
291          */
292         if (!dentry)
293                 dentry = d_lookup(parent, name);
294
295         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
296                 if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
297                         dput(dentry);
298                         dentry = NULL;
299                 }
300         }
301         return dentry;
302 }
303
304 /*
305  * Short-cut version of permission(), for calling by
306  * path_walk(), when dcache lock is held.  Combines parts
307  * of permission() and vfs_permission(), and tests ONLY for
308  * MAY_EXEC permission.
309  *
310  * If appropriate, check DAC only.  If not appropriate, or
311  * short-cut DAC fails, then call permission() to do more
312  * complete permission check.
313  */
314 static inline int exec_permission_lite(struct inode *inode,
315                                        struct nameidata *nd)
316 {
317         umode_t mode = inode->i_mode;
318
319         if ((inode->i_op && inode->i_op->permission))
320                 return -EAGAIN;
321
322         if (current->fsuid == inode->i_uid)
323                 mode >>= 6;
324         else if (in_group_p(inode->i_gid))
325                 mode >>= 3;
326
327         if (mode & MAY_EXEC)
328                 goto ok;
329
330         if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
331                 goto ok;
332
333         if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
334                 goto ok;
335
336         return -EACCES;
337 ok:
338         return security_inode_permission(inode, MAY_EXEC, nd);
339 }
340
341 /*
342  * This is called when everything else fails, and we actually have
343  * to go to the low-level filesystem to find out what we should do..
344  *
345  * We get the directory semaphore, and after getting that we also
346  * make sure that nobody added the entry to the dcache in the meantime..
347  * SMP-safe
348  */
349 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
350 {
351         struct dentry * result;
352         struct inode *dir = parent->d_inode;
353
354         down(&dir->i_sem);
355         /*
356          * First re-do the cached lookup just in case it was created
357          * while we waited for the directory semaphore..
358          *
359          * FIXME! This could use version numbering or similar to
360          * avoid unnecessary cache lookups.
361          *
362          * The "dcache_lock" is purely to protect the RCU list walker
363          * from concurrent renames at this point (we mustn't get false
364          * negatives from the RCU list walk here, unlike the optimistic
365          * fast walk).
366          *
367          * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
368          */
369         result = d_lookup(parent, name);
370         if (!result) {
371                 struct dentry * dentry = d_alloc(parent, name);
372                 result = ERR_PTR(-ENOMEM);
373                 if (dentry) {
374                         result = dir->i_op->lookup(dir, dentry, nd);
375                         if (result)
376                                 dput(dentry);
377                         else
378                                 result = dentry;
379                 }
380                 up(&dir->i_sem);
381                 return result;
382         }
383
384         /*
385          * Uhhuh! Nasty case: the cache was re-populated while
386          * we waited on the semaphore. Need to revalidate.
387          */
388         up(&dir->i_sem);
389         if (result->d_op && result->d_op->d_revalidate) {
390                 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
391                         dput(result);
392                         result = ERR_PTR(-ENOENT);
393                 }
394         }
395         return result;
396 }
397
398 inline void nd_set_link(struct nameidata *nd, char *path)
399 {
400         nd->saved_names[current->link_count] = path;
401 }
402
403 inline char *nd_get_link(struct nameidata *nd)
404 {
405         return nd->saved_names[current->link_count];
406 }
407
408 EXPORT_SYMBOL(nd_set_link);
409 EXPORT_SYMBOL(nd_get_link);
410
411 static inline int __vfs_follow_link(struct nameidata *, const char *);
412
413 /*
414  * This limits recursive symlink follows to 8, while
415  * limiting consecutive symlinks to 40.
416  *
417  * Without that kind of total limit, nasty chains of consecutive
418  * symlinks can cause almost arbitrarily long lookups. 
419  */
420 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
421 {
422         int err = -ELOOP;
423         if (current->link_count >= MAX_NESTED_LINKS)
424                 goto loop;
425         if (current->total_link_count >= 40)
426                 goto loop;
427         cond_resched();
428         err = security_inode_follow_link(dentry, nd);
429         if (err)
430                 goto loop;
431         current->link_count++;
432         current->total_link_count++;
433         touch_atime(nd->mnt, dentry);
434         nd_set_link(nd, NULL);
435         err = dentry->d_inode->i_op->follow_link(dentry, nd);
436         if (!err) {
437                 char *s = nd_get_link(nd);
438                 if (s)
439                         err = __vfs_follow_link(nd, s);
440                 if (dentry->d_inode->i_op->put_link)
441                         dentry->d_inode->i_op->put_link(dentry, nd);
442         }
443         current->link_count--;
444         return err;
445 loop:
446         path_release(nd);
447         return err;
448 }
449
450 int follow_up(struct vfsmount **mnt, struct dentry **dentry)
451 {
452         struct vfsmount *parent;
453         struct dentry *mountpoint;
454         spin_lock(&vfsmount_lock);
455         parent=(*mnt)->mnt_parent;
456         if (parent == *mnt) {
457                 spin_unlock(&vfsmount_lock);
458                 return 0;
459         }
460         mntget(parent);
461         mountpoint=dget((*mnt)->mnt_mountpoint);
462         spin_unlock(&vfsmount_lock);
463         dput(*dentry);
464         *dentry = mountpoint;
465         mntput(*mnt);
466         *mnt = parent;
467         return 1;
468 }
469
470 /* no need for dcache_lock, as serialization is taken care in
471  * namespace.c
472  */
473 static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
474 {
475         int res = 0;
476         while (d_mountpoint(*dentry)) {
477                 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
478                 if (!mounted)
479                         break;
480                 mntput(*mnt);
481                 *mnt = mounted;
482                 dput(*dentry);
483                 *dentry = dget(mounted->mnt_root);
484                 res = 1;
485         }
486         return res;
487 }
488
489 /* no need for dcache_lock, as serialization is taken care in
490  * namespace.c
491  */
492 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
493 {
494         struct vfsmount *mounted;
495
496         mounted = lookup_mnt(*mnt, *dentry);
497         if (mounted) {
498                 mntput(*mnt);
499                 *mnt = mounted;
500                 dput(*dentry);
501                 *dentry = dget(mounted->mnt_root);
502                 return 1;
503         }
504         return 0;
505 }
506
507 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
508 {
509         return __follow_down(mnt,dentry);
510 }
511  
512 static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry)
513 {
514         while(1) {
515                 struct vfsmount *parent;
516                 struct dentry *old = *dentry;
517
518                 read_lock(&current->fs->lock);
519                 if (*dentry == current->fs->root &&
520                     *mnt == current->fs->rootmnt) {
521                         read_unlock(&current->fs->lock);
522                         break;
523                 }
524                 read_unlock(&current->fs->lock);
525                 spin_lock(&dcache_lock);
526                 if (*dentry != (*mnt)->mnt_root) {
527                         *dentry = dget((*dentry)->d_parent);
528                         spin_unlock(&dcache_lock);
529                         dput(old);
530                         break;
531                 }
532                 spin_unlock(&dcache_lock);
533                 spin_lock(&vfsmount_lock);
534                 parent = (*mnt)->mnt_parent;
535                 if (parent == *mnt) {
536                         spin_unlock(&vfsmount_lock);
537                         break;
538                 }
539                 mntget(parent);
540                 *dentry = dget((*mnt)->mnt_mountpoint);
541                 spin_unlock(&vfsmount_lock);
542                 dput(old);
543                 mntput(*mnt);
544                 *mnt = parent;
545         }
546         follow_mount(mnt, dentry);
547 }
548
549 struct path {
550         struct vfsmount *mnt;
551         struct dentry *dentry;
552 };
553
554 /*
555  *  It's more convoluted than I'd like it to be, but... it's still fairly
556  *  small and for now I'd prefer to have fast path as straight as possible.
557  *  It _is_ time-critical.
558  */
559 static int do_lookup(struct nameidata *nd, struct qstr *name,
560                      struct path *path)
561 {
562         struct vfsmount *mnt = nd->mnt;
563         struct dentry *dentry = __d_lookup(nd->dentry, name);
564
565         if (!dentry)
566                 goto need_lookup;
567         if (dentry->d_op && dentry->d_op->d_revalidate)
568                 goto need_revalidate;
569 done:
570         path->mnt = mnt;
571         path->dentry = dentry;
572         return 0;
573
574 need_lookup:
575         dentry = real_lookup(nd->dentry, name, nd);
576         if (IS_ERR(dentry))
577                 goto fail;
578         goto done;
579
580 need_revalidate:
581         if (dentry->d_op->d_revalidate(dentry, nd))
582                 goto done;
583         if (d_invalidate(dentry))
584                 goto done;
585         dput(dentry);
586         goto need_lookup;
587
588 fail:
589         return PTR_ERR(dentry);
590 }
591
592 /*
593  * Name resolution.
594  *
595  * This is the basic name resolution function, turning a pathname
596  * into the final dentry.
597  *
598  * We expect 'base' to be positive and a directory.
599  */
600 int fastcall link_path_walk(const char * name, struct nameidata *nd)
601 {
602         struct path next;
603         struct inode *inode;
604         int err, atomic;
605         unsigned int lookup_flags = nd->flags;
606
607         atomic = (lookup_flags & LOOKUP_ATOMIC);
608
609         while (*name=='/')
610                 name++;
611         if (!*name)
612                 goto return_reval;
613
614         inode = nd->dentry->d_inode;
615         if (current->link_count)
616                 lookup_flags = LOOKUP_FOLLOW;
617
618         /* At this point we know we have a real path component. */
619         for(;;) {
620                 unsigned long hash;
621                 struct qstr this;
622                 unsigned int c;
623
624                 err = exec_permission_lite(inode, nd);
625                 if (err == -EAGAIN) { 
626                         err = permission(inode, MAY_EXEC, nd);
627                 }
628                 if (err)
629                         break;
630
631                 this.name = name;
632                 c = *(const unsigned char *)name;
633
634                 hash = init_name_hash();
635                 do {
636                         name++;
637                         hash = partial_name_hash(c, hash);
638                         c = *(const unsigned char *)name;
639                 } while (c && (c != '/'));
640                 this.len = name - (const char *) this.name;
641                 this.hash = end_name_hash(hash);
642
643                 /* remove trailing slashes? */
644                 if (!c)
645                         goto last_component;
646                 while (*++name == '/');
647                 if (!*name)
648                         goto last_with_slashes;
649
650                 /*
651                  * "." and ".." are special - ".." especially so because it has
652                  * to be able to know about the current root directory and
653                  * parent relationships.
654                  */
655                 if (this.name[0] == '.') switch (this.len) {
656                         default:
657                                 break;
658                         case 2: 
659                                 if (this.name[1] != '.')
660                                         break;
661                                 follow_dotdot(&nd->mnt, &nd->dentry);
662                                 inode = nd->dentry->d_inode;
663                                 /* fallthrough */
664                         case 1:
665                                 continue;
666                 }
667                 /*
668                  * See if the low-level filesystem might want
669                  * to use its own hash..
670                  */
671                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
672                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
673                         if (err < 0)
674                                 break;
675                 }
676                 err = -EWOULDBLOCKIO;
677                 if (atomic)
678                         break;
679                 nd->flags |= LOOKUP_CONTINUE;
680                 /* This does the actual lookups.. */
681                 err = do_lookup(nd, &this, &next);
682                 if (err)
683                         break;
684                 /* Check mountpoints.. */
685                 follow_mount(&next.mnt, &next.dentry);
686
687                 err = -ENOENT;
688                 inode = next.dentry->d_inode;
689                 if (!inode)
690                         goto out_dput;
691                 err = -ENOTDIR; 
692                 if (!inode->i_op)
693                         goto out_dput;
694
695                 if (inode->i_op->follow_link) {
696                         mntget(next.mnt);
697                         err = do_follow_link(next.dentry, nd);
698                         dput(next.dentry);
699                         mntput(next.mnt);
700                         if (err)
701                                 goto return_err;
702                         err = -ENOENT;
703                         inode = nd->dentry->d_inode;
704                         if (!inode)
705                                 break;
706                         err = -ENOTDIR; 
707                         if (!inode->i_op)
708                                 break;
709                 } else {
710                         dput(nd->dentry);
711                         nd->mnt = next.mnt;
712                         nd->dentry = next.dentry;
713                 }
714                 err = -ENOTDIR; 
715                 if (!inode->i_op->lookup)
716                         break;
717                 continue;
718                 /* here ends the main loop */
719
720 last_with_slashes:
721                 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
722 last_component:
723                 nd->flags &= ~LOOKUP_CONTINUE;
724                 if (lookup_flags & LOOKUP_PARENT)
725                         goto lookup_parent;
726                 if (this.name[0] == '.') switch (this.len) {
727                         default:
728                                 break;
729                         case 2: 
730                                 if (this.name[1] != '.')
731                                         break;
732                                 follow_dotdot(&nd->mnt, &nd->dentry);
733                                 inode = nd->dentry->d_inode;
734                                 /* fallthrough */
735                         case 1:
736                                 goto return_reval;
737                 }
738                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
739                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
740                         if (err < 0)
741                                 break;
742                 }
743                 err = -EWOULDBLOCKIO;
744                 if (atomic)
745                         break;
746                 err = do_lookup(nd, &this, &next);
747                 if (err)
748                         break;
749                 follow_mount(&next.mnt, &next.dentry);
750                 inode = next.dentry->d_inode;
751                 if ((lookup_flags & LOOKUP_FOLLOW)
752                     && inode && inode->i_op && inode->i_op->follow_link) {
753                         mntget(next.mnt);
754                         err = do_follow_link(next.dentry, nd);
755                         dput(next.dentry);
756                         mntput(next.mnt);
757                         if (err)
758                                 goto return_err;
759                         inode = nd->dentry->d_inode;
760                 } else {
761                         dput(nd->dentry);
762                         nd->mnt = next.mnt;
763                         nd->dentry = next.dentry;
764                 }
765                 err = -ENOENT;
766                 if (!inode)
767                         break;
768                 if (lookup_flags & LOOKUP_DIRECTORY) {
769                         err = -ENOTDIR; 
770                         if (!inode->i_op || !inode->i_op->lookup)
771                                 break;
772                 }
773                 goto return_base;
774 lookup_parent:
775                 nd->last = this;
776                 nd->last_type = LAST_NORM;
777                 if (this.name[0] != '.')
778                         goto return_base;
779                 if (this.len == 1)
780                         nd->last_type = LAST_DOT;
781                 else if (this.len == 2 && this.name[1] == '.')
782                         nd->last_type = LAST_DOTDOT;
783                 else
784                         goto return_base;
785 return_reval:
786                 /*
787                  * We bypassed the ordinary revalidation routines.
788                  * We may need to check the cached dentry for staleness.
789                  */
790                 if (nd->dentry && nd->dentry->d_sb &&
791                     (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
792                         err = -ESTALE;
793                         /* Note: we do not d_invalidate() */
794                         if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
795                                 break;
796                 }
797 return_base:
798                 return 0;
799 out_dput:
800                 dput(next.dentry);
801                 break;
802         }
803         path_release(nd);
804 return_err:
805         return err;
806 }
807
808 int fastcall path_walk(const char * name, struct nameidata *nd)
809 {
810         current->total_link_count = 0;
811         return link_path_walk(name, nd);
812 }
813
814 /* SMP-safe */
815 /* returns 1 if everything is done */
816 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
817 {
818         if (path_walk(name, nd))
819                 return 0;               /* something went wrong... */
820
821         if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
822                 struct nameidata nd_root;
823                 /*
824                  * NAME was not found in alternate root or it's a directory.  Try to find
825                  * it in the normal root:
826                  */
827                 nd_root.last_type = LAST_ROOT;
828                 nd_root.flags = nd->flags;
829                 memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent));
830                 read_lock(&current->fs->lock);
831                 nd_root.mnt = mntget(current->fs->rootmnt);
832                 nd_root.dentry = dget(current->fs->root);
833                 read_unlock(&current->fs->lock);
834                 if (path_walk(name, &nd_root))
835                         return 1;
836                 if (nd_root.dentry->d_inode) {
837                         path_release(nd);
838                         nd->dentry = nd_root.dentry;
839                         nd->mnt = nd_root.mnt;
840                         nd->last = nd_root.last;
841                         return 1;
842                 }
843                 path_release(&nd_root);
844         }
845         return 1;
846 }
847
848 void set_fs_altroot(void)
849 {
850         char *emul = __emul_prefix();
851         struct nameidata nd;
852         struct vfsmount *mnt = NULL, *oldmnt;
853         struct dentry *dentry = NULL, *olddentry;
854         int err;
855
856         if (!emul)
857                 goto set_it;
858         err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
859         if (!err) {
860                 mnt = nd.mnt;
861                 dentry = nd.dentry;
862         }
863 set_it:
864         write_lock(&current->fs->lock);
865         oldmnt = current->fs->altrootmnt;
866         olddentry = current->fs->altroot;
867         current->fs->altrootmnt = mnt;
868         current->fs->altroot = dentry;
869         write_unlock(&current->fs->lock);
870         if (olddentry) {
871                 dput(olddentry);
872                 mntput(oldmnt);
873         }
874 }
875
876 /* SMP-safe */
877 static inline int
878 walk_init_root(const char *name, struct nameidata *nd)
879 {
880         read_lock(&current->fs->lock);
881         if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
882                 nd->mnt = mntget(current->fs->altrootmnt);
883                 nd->dentry = dget(current->fs->altroot);
884                 read_unlock(&current->fs->lock);
885                 if (__emul_lookup_dentry(name,nd))
886                         return 0;
887                 read_lock(&current->fs->lock);
888         }
889         nd->mnt = mntget(current->fs->rootmnt);
890         nd->dentry = dget(current->fs->root);
891         read_unlock(&current->fs->lock);
892         return 1;
893 }
894
895 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
896 {
897         int retval;
898
899         nd->last_type = LAST_ROOT; /* if there are only slashes... */
900         nd->flags = flags;
901
902         read_lock(&current->fs->lock);
903         if (*name=='/') {
904                 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
905                         nd->mnt = mntget(current->fs->altrootmnt);
906                         nd->dentry = dget(current->fs->altroot);
907                         read_unlock(&current->fs->lock);
908                         if (__emul_lookup_dentry(name,nd))
909                                 return 0;
910                         read_lock(&current->fs->lock);
911                 }
912                 nd->mnt = mntget(current->fs->rootmnt);
913                 nd->dentry = dget(current->fs->root);
914         }
915         else{
916                 nd->mnt = mntget(current->fs->pwdmnt);
917                 nd->dentry = dget(current->fs->pwd);
918         }
919         read_unlock(&current->fs->lock);
920         current->total_link_count = 0;
921         retval = link_path_walk(name, nd);
922         if (unlikely(current->audit_context
923                      && nd && nd->dentry && nd->dentry->d_inode))
924                 audit_inode(name,
925                             nd->dentry->d_inode->i_ino,
926                             nd->dentry->d_inode->i_rdev);
927         return retval;
928 }
929
930 /*
931  * Restricted form of lookup. Doesn't follow links, single-component only,
932  * needs parent already locked. Doesn't follow mounts.
933  * SMP-safe.
934  */
935 static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd)
936 {
937         struct dentry * dentry;
938         struct inode *inode;
939         int err;
940
941         inode = base->d_inode;
942         err = permission(inode, MAY_EXEC, nd);
943         dentry = ERR_PTR(err);
944         if (err)
945                 goto out;
946
947         /*
948          * See if the low-level filesystem might want
949          * to use its own hash..
950          */
951         if (base->d_op && base->d_op->d_hash) {
952                 err = base->d_op->d_hash(base, name);
953                 dentry = ERR_PTR(err);
954                 if (err < 0)
955                         goto out;
956         }
957
958         dentry = cached_lookup(base, name, nd);
959         if (!dentry) {
960                 struct dentry *new = d_alloc(base, name);
961                 dentry = ERR_PTR(-ENOMEM);
962                 if (!new)
963                         goto out;
964                 dentry = inode->i_op->lookup(inode, new, nd);
965                 if (!dentry)
966                         dentry = new;
967                 else
968                         dput(new);
969         }
970 out:
971         return dentry;
972 }
973
974 struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
975 {
976         return __lookup_hash(name, base, NULL);
977 }
978
979 /* SMP-safe */
980 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
981 {
982         unsigned long hash;
983         struct qstr this;
984         unsigned int c;
985
986         this.name = name;
987         this.len = len;
988         if (!len)
989                 goto access;
990
991         hash = init_name_hash();
992         while (len--) {
993                 c = *(const unsigned char *)name++;
994                 if (c == '/' || c == '\0')
995                         goto access;
996                 hash = partial_name_hash(c, hash);
997         }
998         this.hash = end_name_hash(hash);
999
1000         return lookup_hash(&this, base);
1001 access:
1002         return ERR_PTR(-EACCES);
1003 }
1004
1005 /*
1006  *      namei()
1007  *
1008  * is used by most simple commands to get the inode of a specified name.
1009  * Open, link etc use their own routines, but this is enough for things
1010  * like 'chmod' etc.
1011  *
1012  * namei exists in two versions: namei/lnamei. The only difference is
1013  * that namei follows links, while lnamei does not.
1014  * SMP-safe
1015  */
1016 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
1017 {
1018         char *tmp = getname(name);
1019         int err = PTR_ERR(tmp);
1020
1021         if (!IS_ERR(tmp)) {
1022                 err = path_lookup(tmp, flags, nd);
1023                 putname(tmp);
1024         }
1025         return err;
1026 }
1027
1028 /*
1029  * It's inline, so penalty for filesystems that don't use sticky bit is
1030  * minimal.
1031  */
1032 static inline int check_sticky(struct inode *dir, struct inode *inode)
1033 {
1034         if (!(dir->i_mode & S_ISVTX))
1035                 return 0;
1036         if (inode->i_uid == current->fsuid)
1037                 return 0;
1038         if (dir->i_uid == current->fsuid)
1039                 return 0;
1040         return !capable(CAP_FOWNER);
1041 }
1042
1043 /*
1044  *      Check whether we can remove a link victim from directory dir, check
1045  *  whether the type of victim is right.
1046  *  1. We can't do it if dir is read-only (done in permission())
1047  *  2. We should have write and exec permissions on dir
1048  *  3. We can't remove anything from append-only dir
1049  *  4. We can't do anything with immutable dir (done in permission())
1050  *  5. If the sticky bit on dir is set we should either
1051  *      a. be owner of dir, or
1052  *      b. be owner of victim, or
1053  *      c. have CAP_FOWNER capability
1054  *  6. If the victim is append-only or immutable we can't do antyhing with
1055  *     links pointing to it.
1056  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
1057  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
1058  *  9. We can't remove a root or mountpoint.
1059  * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
1060  *     nfs_async_unlink().
1061  */
1062 static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1063 {
1064         int error;
1065         if (!victim->d_inode)
1066                 return -ENOENT;
1067         if (victim->d_parent->d_inode != dir)
1068                 BUG();
1069                         
1070         error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
1071         if (error)
1072                 return error;
1073         if (IS_APPEND(dir))
1074                 return -EPERM;
1075         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
1076             IS_IMMUTABLE(victim->d_inode))
1077                 return -EPERM;
1078         if (isdir) {
1079                 if (!S_ISDIR(victim->d_inode->i_mode))
1080                         return -ENOTDIR;
1081                 if (IS_ROOT(victim))
1082                         return -EBUSY;
1083         } else if (S_ISDIR(victim->d_inode->i_mode))
1084                 return -EISDIR;
1085         if (IS_DEADDIR(dir))
1086                 return -ENOENT;
1087         if (victim->d_flags & DCACHE_NFSFS_RENAMED)
1088                 return -EBUSY;
1089         return 0;
1090 }
1091
1092 /*      Check whether we can create an object with dentry child in directory
1093  *  dir.
1094  *  1. We can't do it if child already exists (open has special treatment for
1095  *     this case, but since we are inlined it's OK)
1096  *  2. We can't do it if dir is read-only (done in permission())
1097  *  3. We should have write and exec permissions on dir
1098  *  4. We can't do it if dir is immutable (done in permission())
1099  */
1100 static inline int may_create(struct inode *dir, struct dentry *child,
1101                              struct nameidata *nd)
1102 {
1103         if (child->d_inode)
1104                 return -EEXIST;
1105         if (IS_DEADDIR(dir))
1106                 return -ENOENT;
1107         return permission(dir,MAY_WRITE | MAY_EXEC, nd);
1108 }
1109
1110 /* 
1111  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
1112  * reasons.
1113  *
1114  * O_DIRECTORY translates into forcing a directory lookup.
1115  */
1116 static inline int lookup_flags(unsigned int f)
1117 {
1118         unsigned long retval = LOOKUP_FOLLOW;
1119
1120         if (f & O_NOFOLLOW)
1121                 retval &= ~LOOKUP_FOLLOW;
1122         
1123         if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
1124                 retval &= ~LOOKUP_FOLLOW;
1125         
1126         if (f & O_DIRECTORY)
1127                 retval |= LOOKUP_DIRECTORY;
1128         if (f & O_ATOMICLOOKUP)
1129                 retval |= LOOKUP_ATOMIC;
1130
1131         return retval;
1132 }
1133
1134 /*
1135  * p1 and p2 should be directories on the same fs.
1136  */
1137 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1138 {
1139         struct dentry *p;
1140
1141         if (p1 == p2) {
1142                 down(&p1->d_inode->i_sem);
1143                 return NULL;
1144         }
1145
1146         down(&p1->d_inode->i_sb->s_vfs_rename_sem);
1147
1148         for (p = p1; p->d_parent != p; p = p->d_parent) {
1149                 if (p->d_parent == p2) {
1150                         down(&p2->d_inode->i_sem);
1151                         down(&p1->d_inode->i_sem);
1152                         return p;
1153                 }
1154         }
1155
1156         for (p = p2; p->d_parent != p; p = p->d_parent) {
1157                 if (p->d_parent == p1) {
1158                         down(&p1->d_inode->i_sem);
1159                         down(&p2->d_inode->i_sem);
1160                         return p;
1161                 }
1162         }
1163
1164         down(&p1->d_inode->i_sem);
1165         down(&p2->d_inode->i_sem);
1166         return NULL;
1167 }
1168
1169 void unlock_rename(struct dentry *p1, struct dentry *p2)
1170 {
1171         up(&p1->d_inode->i_sem);
1172         if (p1 != p2) {
1173                 up(&p2->d_inode->i_sem);
1174                 up(&p1->d_inode->i_sb->s_vfs_rename_sem);
1175         }
1176 }
1177
1178 int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1179                 struct nameidata *nd)
1180 {
1181         int error = may_create(dir, dentry, nd);
1182
1183         if (error)
1184                 return error;
1185
1186         if (!dir->i_op || !dir->i_op->create)
1187                 return -EACCES; /* shouldn't it be ENOSYS? */
1188         mode &= S_IALLUGO;
1189         mode |= S_IFREG;
1190         error = security_inode_create(dir, dentry, mode);
1191         if (error)
1192                 return error;
1193         DQUOT_INIT(dir);
1194         error = dir->i_op->create(dir, dentry, mode, nd);
1195         if (!error) {
1196                 inode_dir_notify(dir, DN_CREATE);
1197                 security_inode_post_create(dir, dentry, mode);
1198         }
1199         return error;
1200 }
1201
1202 int may_open(struct nameidata *nd, int acc_mode, int flag)
1203 {
1204         struct dentry *dentry = nd->dentry;
1205         struct inode *inode = dentry->d_inode;
1206         int error;
1207
1208         if (!inode)
1209                 return -ENOENT;
1210
1211         if (S_ISLNK(inode->i_mode))
1212                 return -ELOOP;
1213         
1214         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1215                 return -EISDIR;
1216
1217         error = permission(inode, acc_mode, nd);
1218         if (error)
1219                 return error;
1220
1221         /*
1222          * FIFO's, sockets and device files are special: they don't
1223          * actually live on the filesystem itself, and as such you
1224          * can write to them even if the filesystem is read-only.
1225          */
1226         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1227                 flag &= ~O_TRUNC;
1228         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1229                 if (nd->mnt->mnt_flags & MNT_NODEV)
1230                         return -EACCES;
1231
1232                 flag &= ~O_TRUNC;
1233         } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
1234                 return -EROFS;
1235         /*
1236          * An append-only file must be opened in append mode for writing.
1237          */
1238         if (IS_APPEND(inode)) {
1239                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1240                         return -EPERM;
1241                 if (flag & O_TRUNC)
1242                         return -EPERM;
1243         }
1244
1245         /*
1246          * Ensure there are no outstanding leases on the file.
1247          */
1248         error = break_lease(inode, flag);
1249         if (error)
1250                 return error;
1251
1252         if (flag & O_TRUNC) {
1253                 error = get_write_access(inode);
1254                 if (error)
1255                         return error;
1256
1257                 /*
1258                  * Refuse to truncate files with mandatory locks held on them.
1259                  */
1260                 error = locks_verify_locked(inode);
1261                 if (!error) {
1262                         DQUOT_INIT(inode);
1263                         
1264                         error = do_truncate(dentry, 0);
1265                 }
1266                 put_write_access(inode);
1267                 if (error)
1268                         return error;
1269         } else
1270                 if (flag & FMODE_WRITE)
1271                         DQUOT_INIT(inode);
1272
1273         return 0;
1274 }
1275
1276 /*
1277  *      open_namei()
1278  *
1279  * namei for open - this is in fact almost the whole open-routine.
1280  *
1281  * Note that the low bits of "flag" aren't the same as in the open
1282  * system call - they are 00 - no permissions needed
1283  *                        01 - read permission needed
1284  *                        10 - write permission needed
1285  *                        11 - read/write permissions needed
1286  * which is a lot more logical, and also allows the "no perm" needed
1287  * for symlinks (where the permissions are checked later).
1288  * SMP-safe
1289  */
1290 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1291 {
1292         int acc_mode, error = 0;
1293         struct dentry *dentry;
1294         struct dentry *dir;
1295         int count = 0;
1296
1297         acc_mode = ACC_MODE(flag);
1298
1299         /* Allow the LSM permission hook to distinguish append 
1300            access from general write access. */
1301         if (flag & O_APPEND)
1302                 acc_mode |= MAY_APPEND;
1303
1304         /* Fill in the open() intent data */
1305         nd->intent.open.flags = flag;
1306         nd->intent.open.create_mode = mode;
1307
1308         /*
1309          * The simplest case - just a plain lookup.
1310          */
1311         if (!(flag & O_CREAT)) {
1312                 error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
1313                 if (error)
1314                         return error;
1315                 goto ok;
1316         }
1317
1318         /*
1319          * Create - we need to know the parent.
1320          */
1321         error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
1322         if (error)
1323                 return error;
1324
1325         /*
1326          * We have the parent and last component. First of all, check
1327          * that we are not asked to creat(2) an obvious directory - that
1328          * will not do.
1329          */
1330         error = -EISDIR;
1331         if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1332                 goto exit;
1333
1334         dir = nd->dentry;
1335         nd->flags &= ~LOOKUP_PARENT;
1336         down(&dir->d_inode->i_sem);
1337         dentry = __lookup_hash(&nd->last, nd->dentry, nd);
1338
1339 do_last:
1340         error = PTR_ERR(dentry);
1341         if (IS_ERR(dentry)) {
1342                 up(&dir->d_inode->i_sem);
1343                 goto exit;
1344         }
1345
1346         /* Negative dentry, just create the file */
1347         if (!dentry->d_inode) {
1348                 if (!IS_POSIXACL(dir->d_inode))
1349                         mode &= ~current->fs->umask;
1350                 error = vfs_create(dir->d_inode, dentry, mode, nd);
1351                 up(&dir->d_inode->i_sem);
1352                 dput(nd->dentry);
1353                 nd->dentry = dentry;
1354                 if (error)
1355                         goto exit;
1356                 /* Don't check for write permission, don't truncate */
1357                 acc_mode = 0;
1358                 flag &= ~O_TRUNC;
1359                 goto ok;
1360         }
1361
1362         /*
1363          * It already exists.
1364          */
1365         up(&dir->d_inode->i_sem);
1366
1367         error = -EEXIST;
1368         if (flag & O_EXCL)
1369                 goto exit_dput;
1370
1371         if (d_mountpoint(dentry)) {
1372                 error = -ELOOP;
1373                 if (flag & O_NOFOLLOW)
1374                         goto exit_dput;
1375                 while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1376         }
1377         error = -ENOENT;
1378         if (!dentry->d_inode)
1379                 goto exit_dput;
1380         if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1381                 goto do_link;
1382
1383         dput(nd->dentry);
1384         nd->dentry = dentry;
1385         error = -EISDIR;
1386         if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1387                 goto exit;
1388 ok:
1389         error = may_open(nd, acc_mode, flag);
1390         if (error)
1391                 goto exit;
1392         return 0;
1393
1394 exit_dput:
1395         dput(dentry);
1396 exit:
1397         path_release(nd);
1398         return error;
1399
1400 do_link:
1401         error = -ELOOP;
1402         if (flag & O_NOFOLLOW)
1403                 goto exit_dput;
1404         /*
1405          * This is subtle. Instead of calling do_follow_link() we do the
1406          * thing by hands. The reason is that this way we have zero link_count
1407          * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1408          * After that we have the parent and last component, i.e.
1409          * we are in the same situation as after the first path_walk().
1410          * Well, almost - if the last component is normal we get its copy
1411          * stored in nd->last.name and we will have to putname() it when we
1412          * are done. Procfs-like symlinks just set LAST_BIND.
1413          */
1414         nd->flags |= LOOKUP_PARENT;
1415         error = security_inode_follow_link(dentry, nd);
1416         if (error)
1417                 goto exit_dput;
1418         touch_atime(nd->mnt, dentry);
1419         nd_set_link(nd, NULL);
1420         error = dentry->d_inode->i_op->follow_link(dentry, nd);
1421         if (!error) {
1422                 char *s = nd_get_link(nd);
1423                 if (s)
1424                         error = __vfs_follow_link(nd, s);
1425                 if (dentry->d_inode->i_op->put_link)
1426                         dentry->d_inode->i_op->put_link(dentry, nd);
1427         }
1428         dput(dentry);
1429         if (error)
1430                 return error;
1431         nd->flags &= ~LOOKUP_PARENT;
1432         if (nd->last_type == LAST_BIND) {
1433                 dentry = nd->dentry;
1434                 goto ok;
1435         }
1436         error = -EISDIR;
1437         if (nd->last_type != LAST_NORM)
1438                 goto exit;
1439         if (nd->last.name[nd->last.len]) {
1440                 putname(nd->last.name);
1441                 goto exit;
1442         }
1443         error = -ELOOP;
1444         if (count++==32) {
1445                 putname(nd->last.name);
1446                 goto exit;
1447         }
1448         dir = nd->dentry;
1449         down(&dir->d_inode->i_sem);
1450         dentry = __lookup_hash(&nd->last, nd->dentry, nd);
1451         putname(nd->last.name);
1452         goto do_last;
1453 }
1454
1455 /**
1456  * lookup_create - lookup a dentry, creating it if it doesn't exist
1457  * @nd: nameidata info
1458  * @is_dir: directory flag
1459  *
1460  * Simple function to lookup and return a dentry and create it
1461  * if it doesn't exist.  Is SMP-safe.
1462  */
1463 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1464 {
1465         struct dentry *dentry;
1466
1467         down(&nd->dentry->d_inode->i_sem);
1468         dentry = ERR_PTR(-EEXIST);
1469         if (nd->last_type != LAST_NORM)
1470                 goto fail;
1471         nd->flags &= ~LOOKUP_PARENT;
1472         dentry = lookup_hash(&nd->last, nd->dentry);
1473         if (IS_ERR(dentry))
1474                 goto fail;
1475         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1476                 goto enoent;
1477         return dentry;
1478 enoent:
1479         dput(dentry);
1480         dentry = ERR_PTR(-ENOENT);
1481 fail:
1482         return dentry;
1483 }
1484
1485 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1486 {
1487         int error = may_create(dir, dentry, NULL);
1488
1489         if (error)
1490                 return error;
1491
1492         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1493                 return -EPERM;
1494
1495         if (!dir->i_op || !dir->i_op->mknod)
1496                 return -EPERM;
1497
1498         error = security_inode_mknod(dir, dentry, mode, dev);
1499         if (error)
1500                 return error;
1501
1502         DQUOT_INIT(dir);
1503         error = dir->i_op->mknod(dir, dentry, mode, dev);
1504         if (!error) {
1505                 inode_dir_notify(dir, DN_CREATE);
1506                 security_inode_post_mknod(dir, dentry, mode, dev);
1507         }
1508         return error;
1509 }
1510
1511 asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
1512 {
1513         int error = 0;
1514         char * tmp;
1515         struct dentry * dentry;
1516         struct nameidata nd;
1517
1518         if (S_ISDIR(mode))
1519                 return -EPERM;
1520         tmp = getname(filename);
1521         if (IS_ERR(tmp))
1522                 return PTR_ERR(tmp);
1523
1524         error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1525         if (error)
1526                 goto out;
1527         dentry = lookup_create(&nd, 0);
1528         error = PTR_ERR(dentry);
1529
1530         if (!IS_POSIXACL(nd.dentry->d_inode))
1531                 mode &= ~current->fs->umask;
1532         if (!IS_ERR(dentry)) {
1533                 switch (mode & S_IFMT) {
1534                 case 0: case S_IFREG:
1535                         error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
1536                         break;
1537                 case S_IFCHR: case S_IFBLK:
1538                         error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
1539                                         new_decode_dev(dev));
1540                         break;
1541                 case S_IFIFO: case S_IFSOCK:
1542                         error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
1543                         break;
1544                 case S_IFDIR:
1545                         error = -EPERM;
1546                         break;
1547                 default:
1548                         error = -EINVAL;
1549                 }
1550                 dput(dentry);
1551         }
1552         up(&nd.dentry->d_inode->i_sem);
1553         path_release(&nd);
1554 out:
1555         putname(tmp);
1556
1557         return error;
1558 }
1559
1560 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1561 {
1562         int error = may_create(dir, dentry, NULL);
1563
1564         if (error)
1565                 return error;
1566
1567         if (!dir->i_op || !dir->i_op->mkdir)
1568                 return -EPERM;
1569
1570         mode &= (S_IRWXUGO|S_ISVTX);
1571         error = security_inode_mkdir(dir, dentry, mode);
1572         if (error)
1573                 return error;
1574
1575         DQUOT_INIT(dir);
1576         error = dir->i_op->mkdir(dir, dentry, mode);
1577         if (!error) {
1578                 inode_dir_notify(dir, DN_CREATE);
1579                 security_inode_post_mkdir(dir,dentry, mode);
1580         }
1581         return error;
1582 }
1583
1584 asmlinkage long sys_mkdir(const char __user * pathname, int mode)
1585 {
1586         int error = 0;
1587         char * tmp;
1588
1589         tmp = getname(pathname);
1590         error = PTR_ERR(tmp);
1591         if (!IS_ERR(tmp)) {
1592                 struct dentry *dentry;
1593                 struct nameidata nd;
1594
1595                 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1596                 if (error)
1597                         goto out;
1598                 dentry = lookup_create(&nd, 1);
1599                 error = PTR_ERR(dentry);
1600                 if (!IS_ERR(dentry)) {
1601                         if (!IS_POSIXACL(nd.dentry->d_inode))
1602                                 mode &= ~current->fs->umask;
1603                         error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
1604                         dput(dentry);
1605                 }
1606                 up(&nd.dentry->d_inode->i_sem);
1607                 path_release(&nd);
1608 out:
1609                 putname(tmp);
1610         }
1611
1612         return error;
1613 }
1614
1615 /*
1616  * We try to drop the dentry early: we should have
1617  * a usage count of 2 if we're the only user of this
1618  * dentry, and if that is true (possibly after pruning
1619  * the dcache), then we drop the dentry now.
1620  *
1621  * A low-level filesystem can, if it choses, legally
1622  * do a
1623  *
1624  *      if (!d_unhashed(dentry))
1625  *              return -EBUSY;
1626  *
1627  * if it cannot handle the case of removing a directory
1628  * that is still in use by something else..
1629  */
1630 static void d_unhash(struct dentry *dentry)
1631 {
1632         dget(dentry);
1633         spin_lock(&dcache_lock);
1634         switch (atomic_read(&dentry->d_count)) {
1635         default:
1636                 spin_unlock(&dcache_lock);
1637                 shrink_dcache_parent(dentry);
1638                 spin_lock(&dcache_lock);
1639                 if (atomic_read(&dentry->d_count) != 2)
1640                         break;
1641         case 2:
1642                 __d_drop(dentry);
1643         }
1644         spin_unlock(&dcache_lock);
1645 }
1646
1647 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1648 {
1649         int error = may_delete(dir, dentry, 1);
1650
1651         if (error)
1652                 return error;
1653
1654         if (!dir->i_op || !dir->i_op->rmdir)
1655                 return -EPERM;
1656
1657         DQUOT_INIT(dir);
1658
1659         down(&dentry->d_inode->i_sem);
1660         d_unhash(dentry);
1661         if (d_mountpoint(dentry))
1662                 error = -EBUSY;
1663         else {
1664                 error = security_inode_rmdir(dir, dentry);
1665                 if (!error) {
1666                         error = dir->i_op->rmdir(dir, dentry);
1667                         if (!error)
1668                                 dentry->d_inode->i_flags |= S_DEAD;
1669                 }
1670         }
1671         up(&dentry->d_inode->i_sem);
1672         if (!error) {
1673                 inode_dir_notify(dir, DN_DELETE);
1674                 d_delete(dentry);
1675         }
1676         dput(dentry);
1677
1678         return error;
1679 }
1680
1681 asmlinkage long sys_rmdir(const char __user * pathname)
1682 {
1683         int error = 0;
1684         char * name;
1685         struct dentry *dentry;
1686         struct nameidata nd;
1687
1688         name = getname(pathname);
1689         if(IS_ERR(name))
1690                 return PTR_ERR(name);
1691
1692         error = path_lookup(name, LOOKUP_PARENT, &nd);
1693         if (error)
1694                 goto exit;
1695
1696         switch(nd.last_type) {
1697                 case LAST_DOTDOT:
1698                         error = -ENOTEMPTY;
1699                         goto exit1;
1700                 case LAST_DOT:
1701                         error = -EINVAL;
1702                         goto exit1;
1703                 case LAST_ROOT:
1704                         error = -EBUSY;
1705                         goto exit1;
1706         }
1707         down(&nd.dentry->d_inode->i_sem);
1708         dentry = lookup_hash(&nd.last, nd.dentry);
1709         error = PTR_ERR(dentry);
1710         if (!IS_ERR(dentry)) {
1711                 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1712                 dput(dentry);
1713         }
1714         up(&nd.dentry->d_inode->i_sem);
1715 exit1:
1716         path_release(&nd);
1717 exit:
1718         putname(name);
1719         return error;
1720 }
1721
1722 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1723 {
1724         int error = may_delete(dir, dentry, 0);
1725
1726         if (error)
1727                 return error;
1728
1729         if (!dir->i_op || !dir->i_op->unlink)
1730                 return -EPERM;
1731
1732         DQUOT_INIT(dir);
1733
1734         down(&dentry->d_inode->i_sem);
1735         if (d_mountpoint(dentry))
1736                 error = -EBUSY;
1737         else {
1738                 error = security_inode_unlink(dir, dentry);
1739                 if (!error)
1740                         error = dir->i_op->unlink(dir, dentry);
1741         }
1742         up(&dentry->d_inode->i_sem);
1743
1744         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
1745         if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
1746                 d_delete(dentry);
1747                 inode_dir_notify(dir, DN_DELETE);
1748         }
1749         return error;
1750 }
1751
1752 /*
1753  * Make sure that the actual truncation of the file will occur outside its
1754  * directory's i_sem.  Truncate can take a long time if there is a lot of
1755  * writeout happening, and we don't want to prevent access to the directory
1756  * while waiting on the I/O.
1757  */
1758 asmlinkage long sys_unlink(const char __user * pathname)
1759 {
1760         int error = 0;
1761         char * name;
1762         struct dentry *dentry;
1763         struct nameidata nd;
1764         struct inode *inode = NULL;
1765
1766         name = getname(pathname);
1767         if(IS_ERR(name))
1768                 return PTR_ERR(name);
1769
1770         error = path_lookup(name, LOOKUP_PARENT, &nd);
1771         if (error)
1772                 goto exit;
1773         error = -EISDIR;
1774         if (nd.last_type != LAST_NORM)
1775                 goto exit1;
1776         down(&nd.dentry->d_inode->i_sem);
1777         dentry = lookup_hash(&nd.last, nd.dentry);
1778         error = PTR_ERR(dentry);
1779         if (!IS_ERR(dentry)) {
1780                 /* Why not before? Because we want correct error value */
1781                 if (nd.last.name[nd.last.len])
1782                         goto slashes;
1783                 inode = dentry->d_inode;
1784                 if (inode)
1785                         atomic_inc(&inode->i_count);
1786                 error = vfs_unlink(nd.dentry->d_inode, dentry);
1787         exit2:
1788                 dput(dentry);
1789         }
1790         up(&nd.dentry->d_inode->i_sem);
1791 exit1:
1792         path_release(&nd);
1793 exit:
1794         putname(name);
1795
1796         if (inode)
1797                 iput(inode);    /* truncate the inode here */
1798         return error;
1799
1800 slashes:
1801         error = !dentry->d_inode ? -ENOENT :
1802                 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1803         goto exit2;
1804 }
1805
1806 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
1807 {
1808         int error = may_create(dir, dentry, NULL);
1809
1810         if (error)
1811                 return error;
1812
1813         if (!dir->i_op || !dir->i_op->symlink)
1814                 return -EPERM;
1815
1816         error = security_inode_symlink(dir, dentry, oldname);
1817         if (error)
1818                 return error;
1819
1820         DQUOT_INIT(dir);
1821         error = dir->i_op->symlink(dir, dentry, oldname);
1822         if (!error) {
1823                 inode_dir_notify(dir, DN_CREATE);
1824                 security_inode_post_symlink(dir, dentry, oldname);
1825         }
1826         return error;
1827 }
1828
1829 asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname)
1830 {
1831         int error = 0;
1832         char * from;
1833         char * to;
1834
1835         from = getname(oldname);
1836         if(IS_ERR(from))
1837                 return PTR_ERR(from);
1838         to = getname(newname);
1839         error = PTR_ERR(to);
1840         if (!IS_ERR(to)) {
1841                 struct dentry *dentry;
1842                 struct nameidata nd;
1843
1844                 error = path_lookup(to, LOOKUP_PARENT, &nd);
1845                 if (error)
1846                         goto out;
1847                 dentry = lookup_create(&nd, 0);
1848                 error = PTR_ERR(dentry);
1849                 if (!IS_ERR(dentry)) {
1850                         error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
1851                         dput(dentry);
1852                 }
1853                 up(&nd.dentry->d_inode->i_sem);
1854                 path_release(&nd);
1855 out:
1856                 putname(to);
1857         }
1858         putname(from);
1859         return error;
1860 }
1861
1862 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1863 {
1864         struct inode *inode = old_dentry->d_inode;
1865         int error;
1866
1867         if (!inode)
1868                 return -ENOENT;
1869
1870         error = may_create(dir, new_dentry, NULL);
1871         if (error)
1872                 return error;
1873
1874         if (dir->i_sb != inode->i_sb)
1875                 return -EXDEV;
1876
1877         /*
1878          * A link to an append-only or immutable file cannot be created.
1879          */
1880         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1881                 return -EPERM;
1882         if (!dir->i_op || !dir->i_op->link)
1883                 return -EPERM;
1884         if (S_ISDIR(old_dentry->d_inode->i_mode))
1885                 return -EPERM;
1886
1887         error = security_inode_link(old_dentry, dir, new_dentry);
1888         if (error)
1889                 return error;
1890
1891         down(&old_dentry->d_inode->i_sem);
1892         DQUOT_INIT(dir);
1893         error = dir->i_op->link(old_dentry, dir, new_dentry);
1894         up(&old_dentry->d_inode->i_sem);
1895         if (!error) {
1896                 inode_dir_notify(dir, DN_CREATE);
1897                 security_inode_post_link(old_dentry, dir, new_dentry);
1898         }
1899         return error;
1900 }
1901
1902 /*
1903  * Hardlinks are often used in delicate situations.  We avoid
1904  * security-related surprises by not following symlinks on the
1905  * newname.  --KAB
1906  *
1907  * We don't follow them on the oldname either to be compatible
1908  * with linux 2.0, and to avoid hard-linking to directories
1909  * and other special files.  --ADM
1910  */
1911 asmlinkage long sys_link(const char __user * oldname, const char __user * newname)
1912 {
1913         struct dentry *new_dentry;
1914         struct nameidata nd, old_nd;
1915         int error;
1916         char * to;
1917
1918         to = getname(newname);
1919         if (IS_ERR(to))
1920                 return PTR_ERR(to);
1921
1922         error = __user_walk(oldname, 0, &old_nd);
1923         if (error)
1924                 goto exit;
1925         error = path_lookup(to, LOOKUP_PARENT, &nd);
1926         if (error)
1927                 goto out;
1928         error = -EXDEV;
1929         if (old_nd.mnt != nd.mnt)
1930                 goto out_release;
1931         new_dentry = lookup_create(&nd, 0);
1932         error = PTR_ERR(new_dentry);
1933         if (!IS_ERR(new_dentry)) {
1934                 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1935                 dput(new_dentry);
1936         }
1937         up(&nd.dentry->d_inode->i_sem);
1938 out_release:
1939         path_release(&nd);
1940 out:
1941         path_release(&old_nd);
1942 exit:
1943         putname(to);
1944
1945         return error;
1946 }
1947
1948 /*
1949  * The worst of all namespace operations - renaming directory. "Perverted"
1950  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1951  * Problems:
1952  *      a) we can get into loop creation. Check is done in is_subdir().
1953  *      b) race potential - two innocent renames can create a loop together.
1954  *         That's where 4.4 screws up. Current fix: serialization on
1955  *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
1956  *         story.
1957  *      c) we have to lock _three_ objects - parents and victim (if it exists).
1958  *         And that - after we got ->i_sem on parents (until then we don't know
1959  *         whether the target exists).  Solution: try to be smart with locking
1960  *         order for inodes.  We rely on the fact that tree topology may change
1961  *         only under ->s_vfs_rename_sem _and_ that parent of the object we
1962  *         move will be locked.  Thus we can rank directories by the tree
1963  *         (ancestors first) and rank all non-directories after them.
1964  *         That works since everybody except rename does "lock parent, lookup,
1965  *         lock child" and rename is under ->s_vfs_rename_sem.
1966  *         HOWEVER, it relies on the assumption that any object with ->lookup()
1967  *         has no more than 1 dentry.  If "hybrid" objects will ever appear,
1968  *         we'd better make sure that there's no link(2) for them.
1969  *      d) some filesystems don't support opened-but-unlinked directories,
1970  *         either because of layout or because they are not ready to deal with
1971  *         all cases correctly. The latter will be fixed (taking this sort of
1972  *         stuff into VFS), but the former is not going away. Solution: the same
1973  *         trick as in rmdir().
1974  *      e) conversion from fhandle to dentry may come in the wrong moment - when
1975  *         we are removing the target. Solution: we will have to grab ->i_sem
1976  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1977  *         ->i_sem on parents, which works but leads to some truely excessive
1978  *         locking].
1979  */
1980 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1981                struct inode *new_dir, struct dentry *new_dentry)
1982 {
1983         int error = 0;
1984         struct inode *target;
1985
1986         /*
1987          * If we are going to change the parent - check write permissions,
1988          * we'll need to flip '..'.
1989          */
1990         if (new_dir != old_dir) {
1991                 error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
1992                 if (error)
1993                         return error;
1994         }
1995
1996         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
1997         if (error)
1998                 return error;
1999
2000         target = new_dentry->d_inode;
2001         if (target) {
2002                 down(&target->i_sem);
2003                 d_unhash(new_dentry);
2004         }
2005         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2006                 error = -EBUSY;
2007         else 
2008                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2009         if (target) {
2010                 if (!error)
2011                         target->i_flags |= S_DEAD;
2012                 up(&target->i_sem);
2013                 if (d_unhashed(new_dentry))
2014                         d_rehash(new_dentry);
2015                 dput(new_dentry);
2016         }
2017         if (!error) {
2018                 d_move(old_dentry,new_dentry);
2019                 security_inode_post_rename(old_dir, old_dentry,
2020                                            new_dir, new_dentry);
2021         }
2022         return error;
2023 }
2024
2025 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2026                struct inode *new_dir, struct dentry *new_dentry)
2027 {
2028         struct inode *target;
2029         int error;
2030
2031         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
2032         if (error)
2033                 return error;
2034
2035         dget(new_dentry);
2036         target = new_dentry->d_inode;
2037         if (target)
2038                 down(&target->i_sem);
2039         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2040                 error = -EBUSY;
2041         else
2042                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2043         if (!error) {
2044                 /* The following d_move() should become unconditional */
2045                 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2046                         d_move(old_dentry, new_dentry);
2047                 security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
2048         }
2049         if (target)
2050                 up(&target->i_sem);
2051         dput(new_dentry);
2052         return error;
2053 }
2054
2055 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2056                struct inode *new_dir, struct dentry *new_dentry)
2057 {
2058         int error;
2059         int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2060
2061         if (old_dentry->d_inode == new_dentry->d_inode)
2062                 return 0;
2063  
2064         error = may_delete(old_dir, old_dentry, is_dir);
2065         if (error)
2066                 return error;
2067
2068         if (!new_dentry->d_inode)
2069                 error = may_create(new_dir, new_dentry, NULL);
2070         else
2071                 error = may_delete(new_dir, new_dentry, is_dir);
2072         if (error)
2073                 return error;
2074
2075         if (!old_dir->i_op || !old_dir->i_op->rename)
2076                 return -EPERM;
2077
2078         DQUOT_INIT(old_dir);
2079         DQUOT_INIT(new_dir);
2080
2081         if (is_dir)
2082                 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2083         else
2084                 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2085         if (!error) {
2086                 if (old_dir == new_dir)
2087                         inode_dir_notify(old_dir, DN_RENAME);
2088                 else {
2089                         inode_dir_notify(old_dir, DN_DELETE);
2090                         inode_dir_notify(new_dir, DN_CREATE);
2091                 }
2092         }
2093         return error;
2094 }
2095
2096 static inline int do_rename(const char * oldname, const char * newname)
2097 {
2098         int error = 0;
2099         struct dentry * old_dir, * new_dir;
2100         struct dentry * old_dentry, *new_dentry;
2101         struct dentry * trap;
2102         struct nameidata oldnd, newnd;
2103
2104         error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
2105         if (error)
2106                 goto exit;
2107
2108         error = path_lookup(newname, LOOKUP_PARENT, &newnd);
2109         if (error)
2110                 goto exit1;
2111
2112         error = -EXDEV;
2113         if (oldnd.mnt != newnd.mnt)
2114                 goto exit2;
2115
2116         old_dir = oldnd.dentry;
2117         error = -EBUSY;
2118         if (oldnd.last_type != LAST_NORM)
2119                 goto exit2;
2120
2121         new_dir = newnd.dentry;
2122         if (newnd.last_type != LAST_NORM)
2123                 goto exit2;
2124
2125         trap = lock_rename(new_dir, old_dir);
2126
2127         old_dentry = lookup_hash(&oldnd.last, old_dir);
2128         error = PTR_ERR(old_dentry);
2129         if (IS_ERR(old_dentry))
2130                 goto exit3;
2131         /* source must exist */
2132         error = -ENOENT;
2133         if (!old_dentry->d_inode)
2134                 goto exit4;
2135         /* unless the source is a directory trailing slashes give -ENOTDIR */
2136         if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
2137                 error = -ENOTDIR;
2138                 if (oldnd.last.name[oldnd.last.len])
2139                         goto exit4;
2140                 if (newnd.last.name[newnd.last.len])
2141                         goto exit4;
2142         }
2143         /* source should not be ancestor of target */
2144         error = -EINVAL;
2145         if (old_dentry == trap)
2146                 goto exit4;
2147         new_dentry = lookup_hash(&newnd.last, new_dir);
2148         error = PTR_ERR(new_dentry);
2149         if (IS_ERR(new_dentry))
2150                 goto exit4;
2151         /* target should not be an ancestor of source */
2152         error = -ENOTEMPTY;
2153         if (new_dentry == trap)
2154                 goto exit5;
2155
2156         error = vfs_rename(old_dir->d_inode, old_dentry,
2157                                    new_dir->d_inode, new_dentry);
2158 exit5:
2159         dput(new_dentry);
2160 exit4:
2161         dput(old_dentry);
2162 exit3:
2163         unlock_rename(new_dir, old_dir);
2164 exit2:
2165         path_release(&newnd);
2166 exit1:
2167         path_release(&oldnd);
2168 exit:
2169         return error;
2170 }
2171
2172 asmlinkage long sys_rename(const char __user * oldname, const char __user * newname)
2173 {
2174         int error;
2175         char * from;
2176         char * to;
2177
2178         from = getname(oldname);
2179         if(IS_ERR(from))
2180                 return PTR_ERR(from);
2181         to = getname(newname);
2182         error = PTR_ERR(to);
2183         if (!IS_ERR(to)) {
2184                 error = do_rename(from,to);
2185                 putname(to);
2186         }
2187         putname(from);
2188         return error;
2189 }
2190
2191 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
2192 {
2193         int len;
2194
2195         len = PTR_ERR(link);
2196         if (IS_ERR(link))
2197                 goto out;
2198
2199         len = strlen(link);
2200         if (len > (unsigned) buflen)
2201                 len = buflen;
2202         if (copy_to_user(buffer, link, len))
2203                 len = -EFAULT;
2204 out:
2205         return len;
2206 }
2207
2208 /*
2209  * A helper for ->readlink().  This should be used *ONLY* for symlinks that
2210  * have ->follow_link() touching nd only in nd_set_link().  Using (or not
2211  * using) it for any given inode is up to filesystem.
2212  */
2213 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2214 {
2215         struct nameidata nd;
2216         int res = dentry->d_inode->i_op->follow_link(dentry, &nd);
2217         if (!res) {
2218                 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
2219                 if (dentry->d_inode->i_op->put_link)
2220                         dentry->d_inode->i_op->put_link(dentry, &nd);
2221         }
2222         return res;
2223 }
2224
2225 static inline int
2226 __vfs_follow_link(struct nameidata *nd, const char *link)
2227 {
2228         int res = 0;
2229         char *name;
2230         if (IS_ERR(link))
2231                 goto fail;
2232
2233         if (*link == '/') {
2234                 path_release(nd);
2235                 if (!walk_init_root(link, nd))
2236                         /* weird __emul_prefix() stuff did it */
2237                         goto out;
2238         }
2239         res = link_path_walk(link, nd);
2240 out:
2241         if (current->link_count || res || nd->last_type!=LAST_NORM)
2242                 return res;
2243         /*
2244          * If it is an iterative symlinks resolution in open_namei() we
2245          * have to copy the last component. And all that crap because of
2246          * bloody create() on broken symlinks. Furrfu...
2247          */
2248         name = __getname();
2249         if (unlikely(!name)) {
2250                 path_release(nd);
2251                 return -ENOMEM;
2252         }
2253         strcpy(name, nd->last.name);
2254         nd->last.name = name;
2255         return 0;
2256 fail:
2257         path_release(nd);
2258         return PTR_ERR(link);
2259 }
2260
2261 int vfs_follow_link(struct nameidata *nd, const char *link)
2262 {
2263         return __vfs_follow_link(nd, link);
2264 }
2265
2266 /* get the link contents into pagecache */
2267 static char *page_getlink(struct dentry * dentry, struct page **ppage)
2268 {
2269         struct page * page;
2270         struct address_space *mapping = dentry->d_inode->i_mapping;
2271         page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
2272                                 NULL);
2273         if (IS_ERR(page))
2274                 goto sync_fail;
2275         wait_on_page_locked(page);
2276         if (!PageUptodate(page))
2277                 goto async_fail;
2278         *ppage = page;
2279         return kmap(page);
2280
2281 async_fail:
2282         page_cache_release(page);
2283         return ERR_PTR(-EIO);
2284
2285 sync_fail:
2286         return (char*)page;
2287 }
2288
2289 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2290 {
2291         struct page *page = NULL;
2292         char *s = page_getlink(dentry, &page);
2293         int res = vfs_readlink(dentry,buffer,buflen,s);
2294         if (page) {
2295                 kunmap(page);
2296                 page_cache_release(page);
2297         }
2298         return res;
2299 }
2300
2301 int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
2302 {
2303         struct page *page;
2304         char *s = page_getlink(dentry, &page);
2305         if (!IS_ERR(s)) {
2306                 nd_set_link(nd, s);
2307                 s = NULL;
2308         }
2309         return PTR_ERR(s);
2310 }
2311
2312 void page_put_link(struct dentry *dentry, struct nameidata *nd)
2313 {
2314         if (!IS_ERR(nd_get_link(nd))) {
2315                 struct page *page;
2316                 page = find_get_page(dentry->d_inode->i_mapping, 0);
2317                 if (!page)
2318                         BUG();
2319                 kunmap(page);
2320                 page_cache_release(page);
2321                 page_cache_release(page);
2322         }
2323 }
2324
2325 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
2326 {
2327         struct page *page = NULL;
2328         char *s = page_getlink(dentry, &page);
2329         int res = __vfs_follow_link(nd, s);
2330         if (page) {
2331                 kunmap(page);
2332                 page_cache_release(page);
2333         }
2334         return res;
2335 }
2336
2337 int page_symlink(struct inode *inode, const char *symname, int len)
2338 {
2339         struct address_space *mapping = inode->i_mapping;
2340         struct page *page = grab_cache_page(mapping, 0);
2341         int err = -ENOMEM;
2342         char *kaddr;
2343
2344         if (!page)
2345                 goto fail;
2346         err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
2347         if (err)
2348                 goto fail_map;
2349         kaddr = kmap_atomic(page, KM_USER0);
2350         memcpy(kaddr, symname, len-1);
2351         kunmap_atomic(kaddr, KM_USER0);
2352         mapping->a_ops->commit_write(NULL, page, 0, len-1);
2353         /*
2354          * Notice that we are _not_ going to block here - end of page is
2355          * unmapped, so this will only try to map the rest of page, see
2356          * that it is unmapped (typically even will not look into inode -
2357          * ->i_size will be enough for everything) and zero it out.
2358          * OTOH it's obviously correct and should make the page up-to-date.
2359          */
2360         if (!PageUptodate(page)) {
2361                 err = mapping->a_ops->readpage(NULL, page);
2362                 wait_on_page_locked(page);
2363         } else {
2364                 unlock_page(page);
2365         }
2366         page_cache_release(page);
2367         if (err < 0)
2368                 goto fail;
2369         mark_inode_dirty(inode);
2370         return 0;
2371 fail_map:
2372         unlock_page(page);
2373         page_cache_release(page);
2374 fail:
2375         return err;
2376 }
2377
2378 struct inode_operations page_symlink_inode_operations = {
2379         .readlink       = generic_readlink,
2380         .follow_link    = page_follow_link_light,
2381         .put_link       = page_put_link,
2382 };
2383
2384 EXPORT_SYMBOL(__user_walk);
2385 EXPORT_SYMBOL(follow_down);
2386 EXPORT_SYMBOL(follow_up);
2387 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
2388 EXPORT_SYMBOL(getname);
2389 EXPORT_SYMBOL(lock_rename);
2390 EXPORT_SYMBOL(lookup_create);
2391 EXPORT_SYMBOL(lookup_hash);
2392 EXPORT_SYMBOL(lookup_one_len);
2393 EXPORT_SYMBOL(page_follow_link);
2394 EXPORT_SYMBOL(page_follow_link_light);
2395 EXPORT_SYMBOL(page_put_link);
2396 EXPORT_SYMBOL(page_readlink);
2397 EXPORT_SYMBOL(page_symlink);
2398 EXPORT_SYMBOL(page_symlink_inode_operations);
2399 EXPORT_SYMBOL(path_lookup);
2400 EXPORT_SYMBOL(path_release);
2401 EXPORT_SYMBOL(path_walk);
2402 EXPORT_SYMBOL(permission);
2403 EXPORT_SYMBOL(unlock_rename);
2404 EXPORT_SYMBOL(vfs_create);
2405 EXPORT_SYMBOL(vfs_follow_link);
2406 EXPORT_SYMBOL(vfs_link);
2407 EXPORT_SYMBOL(vfs_mkdir);
2408 EXPORT_SYMBOL(vfs_mknod);
2409 EXPORT_SYMBOL(vfs_permission);
2410 EXPORT_SYMBOL(vfs_readlink);
2411 EXPORT_SYMBOL(vfs_rename);
2412 EXPORT_SYMBOL(vfs_rmdir);
2413 EXPORT_SYMBOL(vfs_symlink);
2414 EXPORT_SYMBOL(vfs_unlink);
2415 EXPORT_SYMBOL(generic_readlink);