fs/namei.c

   1 /*
   2  *  linux/fs/namei.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  * Some corrections by tytso.
   9  */
  10
  11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12  * lookup logic.
  13  */
  14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
  15  */
  16
  17 #include <linux/init.h>
  18 #include <linux/module.h>
  19 #include <linux/slab.h>
  20 #include <linux/fs.h>
  21 #include <linux/namei.h>
  22 #include <linux/quotaops.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/dnotify.h>
  25 #include <linux/smp_lock.h>
  26 #include <linux/personality.h>
  27 #include <linux/security.h>
  28 #include <linux/mount.h>
  29 #include <linux/audit.h>
  30 #include <asm/namei.h>
  31 #include <asm/uaccess.h>
  32
  33 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  34
  35 /* [Feb-1997 T. Schoebel-Theuer]
  36  * Fundamental changes in the pathname lookup mechanisms (namei)
  37  * were necessary because of omirr.  The reason is that omirr needs
  38  * to know the _real_ pathname, not the user-supplied one, in case
  39  * of symlinks (and also when transname replacements occur).
  40  *
  41  * The new code replaces the old recursive symlink resolution with
  42  * an iterative one (in case of non-nested symlink chains).  It does
  43  * this with calls to <fs>_follow_link().
  44  * As a side effect, dir_namei(), _namei() and follow_link() are now
  45  * replaced with a single function lookup_dentry() that can handle all
  46  * the special cases of the former code.
  47  *
  48  * With the new dcache, the pathname is stored at each inode, at least as
  49  * long as the refcount of the inode is positive.  As a side effect, the
  50  * size of the dcache depends on the inode cache and thus is dynamic.
  51  *
  52  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  53  * resolution to correspond with current state of the code.
  54  *
  55  * Note that the symlink resolution is not *completely* iterative.
  56  * There is still a significant amount of tail- and mid- recursion in
  57  * the algorithm.  Also, note that <fs>_readlink() is not used in
  58  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  59  * may return different results than <fs>_follow_link().  Many virtual
  60  * filesystems (including /proc) exhibit this behavior.
  61  */
  62
  63 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  64  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  65  * and the name already exists in form of a symlink, try to create the new
  66  * name indicated by the symlink. The old code always complained that the
  67  * name already exists, due to not following the symlink even if its target
  68  * is nonexistent.  The new semantics affects also mknod() and link() when
  69  * the name is a symlink pointing to a non-existant name.
  70  *
  71  * I don't know which semantics is the right one, since I have no access
  72  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  73  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  74  * "old" one. Personally, I think the new semantics is much more logical.
  75  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  76  * file does succeed in both HP-UX and SunOs, but not in Solaris
  77  * and in the old Linux semantics.
  78  */
  79
  80 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  81  * semantics.  See the comments in "open_namei" and "do_link" below.
  82  *
  83  * [10-Sep-98 Alan Modra] Another symlink change.
  84  */
  85
  86 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
  87  *      inside the path - always follow.
  88  *      in the last component in creation/removal/renaming - never follow.
  89  *      if LOOKUP_FOLLOW passed - follow.
  90  *      if the pathname has trailing slashes - follow.
  91  *      otherwise - don't follow.
  92  * (applied in that order).
  93  *
  94  * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
  95  * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
  96  * During the 2.4 we need to fix the userland stuff depending on it -
  97  * hopefully we will be able to get rid of that wart in 2.5. So far only
  98  * XEmacs seems to be relying on it...
  99  */
 100 /*
 101  * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
 102  * implemented.  Let's see if raised priority of ->s_vfs_rename_sem gives
 103  * any extra contention...
 104  */
 105
 106 /* In order to reduce some races, while at the same time doing additional
 107  * checking and hopefully speeding things up, we copy filenames to the
 108  * kernel data space before using them..
 109  *
 110  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 111  * PATH_MAX includes the nul terminator --RR.
 112  */
 113 static inline int do_getname(const char __user *filename, char *page)
 114 {
 115         int retval;
 116         unsigned long len = PATH_MAX;
 117
 118         if ((unsigned long) filename >= TASK_SIZE) {
 119                 if (!segment_eq(get_fs(), KERNEL_DS))
 120                         return -EFAULT;
 121         } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
 122                 len = TASK_SIZE - (unsigned long) filename;
 123
 124         retval = strncpy_from_user((char *)page, filename, len);
 125         if (retval > 0) {
 126                 if (retval < len)
 127                         return 0;
 128                 return -ENAMETOOLONG;
 129         } else if (!retval)
 130                 retval = -ENOENT;
 131         return retval;
 132 }
 133
 134 char * getname(const char __user * filename)
 135 {
 136         char *tmp, *result;
 137
 138         result = ERR_PTR(-ENOMEM);
 139         tmp = __getname();
 140         if (tmp)  {
 141                 int retval = do_getname(filename, tmp);
 142
 143                 result = tmp;
 144                 if (retval < 0) {
 145                         __putname(tmp);
 146                         result = ERR_PTR(retval);
 147                 }
 148         }
 149         if (unlikely(current->audit_context) && !IS_ERR(result) && result)
 150                 audit_getname(result);
 151         return result;
 152 }
 153
 154 /*
 155  *      vfs_permission()
 156  *
 157  * is used to check for read/write/execute permissions on a file.
 158  * We use "fsuid" for this, letting us set arbitrary permissions
 159  * for filesystem access without changing the "normal" uids which
 160  * are used for other things..
 161  */
 162 int vfs_permission(struct inode * inode, int mask)
 163 {
 164         umode_t                 mode = inode->i_mode;
 165
 166         if (mask & MAY_WRITE) {
 167                 /*
 168                  * Nobody gets write access to a read-only fs.
 169                  */
 170                 if (IS_RDONLY(inode) &&
 171                     (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 172                         return -EROFS;
 173
 174                 /*
 175                  * Nobody gets write access to an immutable file.
 176                  */
 177                 if (IS_IMMUTABLE(inode))
 178                         return -EACCES;
 179         }
 180
 181         if (current->fsuid == inode->i_uid)
 182                 mode >>= 6;
 183         else if (in_group_p(inode->i_gid))
 184                 mode >>= 3;
 185
 186         /*
 187          * If the DACs are ok we don't need any capability check.
 188          */
 189         if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
 190                 return 0;
 191
 192         /*
 193          * Read/write DACs are always overridable.
 194          * Executable DACs are overridable if at least one exec bit is set.
 195          */
 196         if (!(mask & MAY_EXEC) ||
 197             (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
 198                 if (capable(CAP_DAC_OVERRIDE))
 199                         return 0;
 200
 201         /*
 202          * Searching includes executable on directories, else just read.
 203          */
 204         if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
 205                 if (capable(CAP_DAC_READ_SEARCH))
 206                         return 0;
 207
 208         return -EACCES;
 209 }
 210
 211 int permission(struct inode * inode,int mask, struct nameidata *nd)
 212 {
 213         int retval;
 214         int submask;
 215
 216         /* Ordinary permission routines do not understand MAY_APPEND. */
 217         submask = mask & ~MAY_APPEND;
 218
 219         if (inode->i_op && inode->i_op->permission)
 220                 retval = inode->i_op->permission(inode, submask, nd);
 221         else
 222                 retval = vfs_permission(inode, submask);
 223         if (retval)
 224                 return retval;
 225
 226         return security_inode_permission(inode, mask, nd);
 227 }
 228
 229 /*
 230  * get_write_access() gets write permission for a file.
 231  * put_write_access() releases this write permission.
 232  * This is used for regular files.
 233  * We cannot support write (and maybe mmap read-write shared) accesses and
 234  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 235  * can have the following values:
 236  * 0: no writers, no VM_DENYWRITE mappings
 237  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 238  * > 0: (i_writecount) users are writing to the file.
 239  *
 240  * Normally we operate on that counter with atomic_{inc,dec} and it's safe
 241  * except for the cases where we don't hold i_writecount yet. Then we need to
 242  * use {get,deny}_write_access() - these functions check the sign and refuse
 243  * to do the change if sign is wrong. Exclusion between them is provided by
 244  * the inode->i_lock spinlock.
 245  */
 246
 247 int get_write_access(struct inode * inode)
 248 {
 249         spin_lock(&inode->i_lock);
 250         if (atomic_read(&inode->i_writecount) < 0) {
 251                 spin_unlock(&inode->i_lock);
 252                 return -ETXTBSY;
 253         }
 254         atomic_inc(&inode->i_writecount);
 255         spin_unlock(&inode->i_lock);
 256
 257         return 0;
 258 }
 259
 260 int deny_write_access(struct file * file)
 261 {
 262         struct inode *inode = file->f_dentry->d_inode;
 263
 264         spin_lock(&inode->i_lock);
 265         if (atomic_read(&inode->i_writecount) > 0) {
 266                 spin_unlock(&inode->i_lock);
 267                 return -ETXTBSY;
 268         }
 269         atomic_dec(&inode->i_writecount);
 270         spin_unlock(&inode->i_lock);
 271
 272         return 0;
 273 }
 274
 275 void path_release(struct nameidata *nd)
 276 {
 277         dput(nd->dentry);
 278         mntput(nd->mnt);
 279 }
 280
 281 /*
 282  * Internal lookup() using the new generic dcache.
 283  * SMP-safe
 284  */
 285 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
 286 {
 287         struct dentry * dentry = __d_lookup(parent, name);
 288
 289         /* lockess __d_lookup may fail due to concurrent d_move()
 290          * in some unrelated directory, so try with d_lookup
 291          */
 292         if (!dentry)
 293                 dentry = d_lookup(parent, name);
 294
 295         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 296                 if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
 297                         dput(dentry);
 298                         dentry = NULL;
 299                 }
 300         }
 301         return dentry;
 302 }
 303
 304 /*
 305  * Short-cut version of permission(), for calling by
 306  * path_walk(), when dcache lock is held.  Combines parts
 307  * of permission() and vfs_permission(), and tests ONLY for
 308  * MAY_EXEC permission.
 309  *
 310  * If appropriate, check DAC only.  If not appropriate, or
 311  * short-cut DAC fails, then call permission() to do more
 312  * complete permission check.
 313  */
 314 static inline int exec_permission_lite(struct inode *inode,
 315                                        struct nameidata *nd)
 316 {
 317         umode_t mode = inode->i_mode;
 318
 319         if ((inode->i_op && inode->i_op->permission))
 320                 return -EAGAIN;
 321
 322         if (current->fsuid == inode->i_uid)
 323                 mode >>= 6;
 324         else if (in_group_p(inode->i_gid))
 325                 mode >>= 3;
 326
 327         if (mode & MAY_EXEC)
 328                 goto ok;
 329
 330         if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
 331                 goto ok;
 332
 333         if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
 334                 goto ok;
 335
 336         return -EACCES;
 337 ok:
 338         return security_inode_permission(inode, MAY_EXEC, nd);
 339 }
 340
 341 /*
 342  * This is called when everything else fails, and we actually have
 343  * to go to the low-level filesystem to find out what we should do..
 344  *
 345  * We get the directory semaphore, and after getting that we also
 346  * make sure that nobody added the entry to the dcache in the meantime..
 347  * SMP-safe
 348  */
 349 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
 350 {
 351         struct dentry * result;
 352         struct inode *dir = parent->d_inode;
 353
 354         down(&dir->i_sem);
 355         /*
 356          * First re-do the cached lookup just in case it was created
 357          * while we waited for the directory semaphore..
 358          *
 359          * FIXME! This could use version numbering or similar to
 360          * avoid unnecessary cache lookups.
 361          *
 362          * The "dcache_lock" is purely to protect the RCU list walker
 363          * from concurrent renames at this point (we mustn't get false
 364          * negatives from the RCU list walk here, unlike the optimistic
 365          * fast walk).
 366          *
 367          * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
 368          */
 369         result = d_lookup(parent, name);
 370         if (!result) {
 371                 struct dentry * dentry = d_alloc(parent, name);
 372                 result = ERR_PTR(-ENOMEM);
 373                 if (dentry) {
 374                         result = dir->i_op->lookup(dir, dentry, nd);
 375                         if (result)
 376                                 dput(dentry);
 377                         else
 378                                 result = dentry;
 379                 }
 380                 up(&dir->i_sem);
 381                 return result;
 382         }
 383
 384         /*
 385          * Uhhuh! Nasty case: the cache was re-populated while
 386          * we waited on the semaphore. Need to revalidate.
 387          */
 388         up(&dir->i_sem);
 389         if (result->d_op && result->d_op->d_revalidate) {
 390                 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
 391                         dput(result);
 392                         result = ERR_PTR(-ENOENT);
 393                 }
 394         }
 395         return result;
 396 }
 397
 398 inline void nd_set_link(struct nameidata *nd, char *path)
 399 {
 400         nd->saved_names[current->link_count] = path;
 401 }
 402
 403 inline char *nd_get_link(struct nameidata *nd)
 404 {
 405         return nd->saved_names[current->link_count];
 406 }
 407
 408 EXPORT_SYMBOL(nd_set_link);
 409 EXPORT_SYMBOL(nd_get_link);
 410
 411 static inline int __vfs_follow_link(struct nameidata *, const char *);
 412
 413 /*
 414  * This limits recursive symlink follows to 8, while
 415  * limiting consecutive symlinks to 40.
 416  *
 417  * Without that kind of total limit, nasty chains of consecutive
 418  * symlinks can cause almost arbitrarily long lookups.
 419  */
 420 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 421 {
 422         int err = -ELOOP;
 423         if (current->link_count >= MAX_NESTED_LINKS)
 424                 goto loop;
 425         if (current->total_link_count >= 40)
 426                 goto loop;
 427         cond_resched();
 428         err = security_inode_follow_link(dentry, nd);
 429         if (err)
 430                 goto loop;
 431         current->link_count++;
 432         current->total_link_count++;
 433         touch_atime(nd->mnt, dentry);
 434         nd_set_link(nd, NULL);
 435         err = dentry->d_inode->i_op->follow_link(dentry, nd);
 436         if (!err) {
 437                 char *s = nd_get_link(nd);
 438                 if (s)
 439                         err = __vfs_follow_link(nd, s);
 440                 if (dentry->d_inode->i_op->put_link)
 441                         dentry->d_inode->i_op->put_link(dentry, nd);
 442         }
 443         current->link_count--;
 444         return err;
 445 loop:
 446         path_release(nd);
 447         return err;
 448 }
 449
 450 int follow_up(struct vfsmount **mnt, struct dentry **dentry)
 451 {
 452         struct vfsmount *parent;
 453         struct dentry *mountpoint;
 454         spin_lock(&vfsmount_lock);
 455         parent=(*mnt)->mnt_parent;
 456         if (parent == *mnt) {
 457                 spin_unlock(&vfsmount_lock);
 458                 return 0;
 459         }
 460         mntget(parent);
 461         mountpoint=dget((*mnt)->mnt_mountpoint);
 462         spin_unlock(&vfsmount_lock);
 463         dput(*dentry);
 464         *dentry = mountpoint;
 465         mntput(*mnt);
 466         *mnt = parent;
 467         return 1;
 468 }
 469
 470 /* no need for dcache_lock, as serialization is taken care in
 471  * namespace.c
 472  */
 473 static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
 474 {
 475         int res = 0;
 476         while (d_mountpoint(*dentry)) {
 477                 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
 478                 if (!mounted)
 479                         break;
 480                 mntput(*mnt);
 481                 *mnt = mounted;
 482                 dput(*dentry);
 483                 *dentry = dget(mounted->mnt_root);
 484                 res = 1;
 485         }
 486         return res;
 487 }
 488
 489 /* no need for dcache_lock, as serialization is taken care in
 490  * namespace.c
 491  */
 492 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
 493 {
 494         struct vfsmount *mounted;
 495
 496         mounted = lookup_mnt(*mnt, *dentry);
 497         if (mounted) {
 498                 mntput(*mnt);
 499                 *mnt = mounted;
 500                 dput(*dentry);
 501                 *dentry = dget(mounted->mnt_root);
 502                 return 1;
 503         }
 504         return 0;
 505 }
 506
 507 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 508 {
 509         return __follow_down(mnt,dentry);
 510 }
 511
 512 static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry)
 513 {
 514         while(1) {
 515                 struct vfsmount *parent;
 516                 struct dentry *old = *dentry;
 517
 518                 read_lock(&current->fs->lock);
 519                 if (*dentry == current->fs->root &&
 520                     *mnt == current->fs->rootmnt) {
 521                         read_unlock(&current->fs->lock);
 522                         break;
 523                 }
 524                 read_unlock(&current->fs->lock);
 525                 spin_lock(&dcache_lock);
 526                 if (*dentry != (*mnt)->mnt_root) {
 527                         *dentry = dget((*dentry)->d_parent);
 528                         spin_unlock(&dcache_lock);
 529                         dput(old);
 530                         break;
 531                 }
 532                 spin_unlock(&dcache_lock);
 533                 spin_lock(&vfsmount_lock);
 534                 parent = (*mnt)->mnt_parent;
 535                 if (parent == *mnt) {
 536                         spin_unlock(&vfsmount_lock);
 537                         break;
 538                 }
 539                 mntget(parent);
 540                 *dentry = dget((*mnt)->mnt_mountpoint);
 541                 spin_unlock(&vfsmount_lock);
 542                 dput(old);
 543                 mntput(*mnt);
 544                 *mnt = parent;
 545         }
 546         follow_mount(mnt, dentry);
 547 }
 548
 549 struct path {
 550         struct vfsmount *mnt;
 551         struct dentry *dentry;
 552 };
 553
 554 /*
 555  *  It's more convoluted than I'd like it to be, but... it's still fairly
 556  *  small and for now I'd prefer to have fast path as straight as possible.
 557  *  It _is_ time-critical.
 558  */
 559 static int do_lookup(struct nameidata *nd, struct qstr *name,
 560                      struct path *path)
 561 {
 562         struct vfsmount *mnt = nd->mnt;
 563         struct dentry *dentry = __d_lookup(nd->dentry, name);
 564
 565         if (!dentry)
 566                 goto need_lookup;
 567         if (dentry->d_op && dentry->d_op->d_revalidate)
 568                 goto need_revalidate;
 569 done:
 570         path->mnt = mnt;
 571         path->dentry = dentry;
 572         return 0;
 573
 574 need_lookup:
 575         dentry = real_lookup(nd->dentry, name, nd);
 576         if (IS_ERR(dentry))
 577                 goto fail;
 578         goto done;
 579
 580 need_revalidate:
 581         if (dentry->d_op->d_revalidate(dentry, nd))
 582                 goto done;
 583         if (d_invalidate(dentry))
 584                 goto done;
 585         dput(dentry);
 586         goto need_lookup;
 587
 588 fail:
 589         return PTR_ERR(dentry);
 590 }
 591
 592 /*
 593  * Name resolution.
 594  *
 595  * This is the basic name resolution function, turning a pathname
 596  * into the final dentry.
 597  *
 598  * We expect 'base' to be positive and a directory.
 599  */
 600 int fastcall link_path_walk(const char * name, struct nameidata *nd)
 601 {
 602         struct path next;
 603         struct inode *inode;
 604         int err, atomic;
 605         unsigned int lookup_flags = nd->flags;
 606
 607         atomic = (lookup_flags & LOOKUP_ATOMIC);
 608
 609         while (*name=='/')
 610                 name++;
 611         if (!*name)
 612                 goto return_reval;
 613
 614         inode = nd->dentry->d_inode;
 615         if (current->link_count)
 616                 lookup_flags = LOOKUP_FOLLOW;
 617
 618         /* At this point we know we have a real path component. */
 619         for(;;) {
 620                 unsigned long hash;
 621                 struct qstr this;
 622                 unsigned int c;
 623
 624                 err = exec_permission_lite(inode, nd);
 625                 if (err == -EAGAIN) {
 626                         err = permission(inode, MAY_EXEC, nd);
 627                 }
 628                 if (err)
 629                         break;
 630
 631                 this.name = name;
 632                 c = *(const unsigned char *)name;
 633
 634                 hash = init_name_hash();
 635                 do {
 636                         name++;
 637                         hash = partial_name_hash(c, hash);
 638                         c = *(const unsigned char *)name;
 639                 } while (c && (c != '/'));
 640                 this.len = name - (const char *) this.name;
 641                 this.hash = end_name_hash(hash);
 642
 643                 /* remove trailing slashes? */
 644                 if (!c)
 645                         goto last_component;
 646                 while (*++name == '/');
 647                 if (!*name)
 648                         goto last_with_slashes;
 649
 650                 /*
 651                  * "." and ".." are special - ".." especially so because it has
 652                  * to be able to know about the current root directory and
 653                  * parent relationships.
 654                  */
 655                 if (this.name[0] == '.') switch (this.len) {
 656                         default:
 657                                 break;
 658                         case 2:
 659                                 if (this.name[1] != '.')
 660                                         break;
 661                                 follow_dotdot(&nd->mnt, &nd->dentry);
 662                                 inode = nd->dentry->d_inode;
 663                                 /* fallthrough */
 664                         case 1:
 665                                 continue;
 666                 }
 667                 /*
 668                  * See if the low-level filesystem might want
 669                  * to use its own hash..
 670                  */
 671                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 672                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 673                         if (err < 0)
 674                                 break;
 675                 }
 676                 err = -EWOULDBLOCKIO;
 677                 if (atomic)
 678                         break;
 679                 nd->flags |= LOOKUP_CONTINUE;
 680                 /* This does the actual lookups.. */
 681                 err = do_lookup(nd, &this, &next);
 682                 if (err)
 683                         break;
 684                 /* Check mountpoints.. */
 685                 follow_mount(&next.mnt, &next.dentry);
 686
 687                 err = -ENOENT;
 688                 inode = next.dentry->d_inode;
 689                 if (!inode)
 690                         goto out_dput;
 691                 err = -ENOTDIR;
 692                 if (!inode->i_op)
 693                         goto out_dput;
 694
 695                 if (inode->i_op->follow_link) {
 696                         mntget(next.mnt);
 697                         err = do_follow_link(next.dentry, nd);
 698                         dput(next.dentry);
 699                         mntput(next.mnt);
 700                         if (err)
 701                                 goto return_err;
 702                         err = -ENOENT;
 703                         inode = nd->dentry->d_inode;
 704                         if (!inode)
 705                                 break;
 706                         err = -ENOTDIR;
 707                         if (!inode->i_op)
 708                                 break;
 709                 } else {
 710                         dput(nd->dentry);
 711                         nd->mnt = next.mnt;
 712                         nd->dentry = next.dentry;
 713                 }
 714                 err = -ENOTDIR;
 715                 if (!inode->i_op->lookup)
 716                         break;
 717                 continue;
 718                 /* here ends the main loop */
 719
 720 last_with_slashes:
 721                 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 722 last_component:
 723                 nd->flags &= ~LOOKUP_CONTINUE;
 724                 if (lookup_flags & LOOKUP_PARENT)
 725                         goto lookup_parent;
 726                 if (this.name[0] == '.') switch (this.len) {
 727                         default:
 728                                 break;
 729                         case 2:
 730                                 if (this.name[1] != '.')
 731                                         break;
 732                                 follow_dotdot(&nd->mnt, &nd->dentry);
 733                                 inode = nd->dentry->d_inode;
 734                                 /* fallthrough */
 735                         case 1:
 736                                 goto return_reval;
 737                 }
 738                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 739                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 740                         if (err < 0)
 741                                 break;
 742                 }
 743                 err = -EWOULDBLOCKIO;
 744                 if (atomic)
 745                         break;
 746                 err = do_lookup(nd, &this, &next);
 747                 if (err)
 748                         break;
 749                 follow_mount(&next.mnt, &next.dentry);
 750                 inode = next.dentry->d_inode;
 751                 if ((lookup_flags & LOOKUP_FOLLOW)
 752                     && inode && inode->i_op && inode->i_op->follow_link) {
 753                         mntget(next.mnt);
 754                         err = do_follow_link(next.dentry, nd);
 755                         dput(next.dentry);
 756                         mntput(next.mnt);
 757                         if (err)
 758                                 goto return_err;
 759                         inode = nd->dentry->d_inode;
 760                 } else {
 761                         dput(nd->dentry);
 762                         nd->mnt = next.mnt;
 763                         nd->dentry = next.dentry;
 764                 }
 765                 err = -ENOENT;
 766                 if (!inode)
 767                         break;
 768                 if (lookup_flags & LOOKUP_DIRECTORY) {
 769                         err = -ENOTDIR;
 770                         if (!inode->i_op || !inode->i_op->lookup)
 771                                 break;
 772                 }
 773                 goto return_base;
 774 lookup_parent:
 775                 nd->last = this;
 776                 nd->last_type = LAST_NORM;
 777                 if (this.name[0] != '.')
 778                         goto return_base;
 779                 if (this.len == 1)
 780                         nd->last_type = LAST_DOT;
 781                 else if (this.len == 2 && this.name[1] == '.')
 782                         nd->last_type = LAST_DOTDOT;
 783                 else
 784                         goto return_base;
 785 return_reval:
 786                 /*
 787                  * We bypassed the ordinary revalidation routines.
 788                  * We may need to check the cached dentry for staleness.
 789                  */
 790                 if (nd->dentry && nd->dentry->d_sb &&
 791                     (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
 792                         err = -ESTALE;
 793                         /* Note: we do not d_invalidate() */
 794                         if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
 795                                 break;
 796                 }
 797 return_base:
 798                 return 0;
 799 out_dput:
 800                 dput(next.dentry);
 801                 break;
 802         }
 803         path_release(nd);
 804 return_err:
 805         return err;
 806 }
 807
 808 int fastcall path_walk(const char * name, struct nameidata *nd)
 809 {
 810         current->total_link_count = 0;
 811         return link_path_walk(name, nd);
 812 }
 813
 814 /* SMP-safe */
 815 /* returns 1 if everything is done */
 816 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
 817 {
 818         if (path_walk(name, nd))
 819                 return 0;               /* something went wrong... */
 820
 821         if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
 822                 struct nameidata nd_root;
 823                 /*
 824                  * NAME was not found in alternate root or it's a directory.  Try to find
 825                  * it in the normal root:
 826                  */
 827                 nd_root.last_type = LAST_ROOT;
 828                 nd_root.flags = nd->flags;
 829                 memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent));
 830                 read_lock(&current->fs->lock);
 831                 nd_root.mnt = mntget(current->fs->rootmnt);
 832                 nd_root.dentry = dget(current->fs->root);
 833                 read_unlock(&current->fs->lock);
 834                 if (path_walk(name, &nd_root))
 835                         return 1;
 836                 if (nd_root.dentry->d_inode) {
 837                         path_release(nd);
 838                         nd->dentry = nd_root.dentry;
 839                         nd->mnt = nd_root.mnt;
 840                         nd->last = nd_root.last;
 841                         return 1;
 842                 }
 843                 path_release(&nd_root);
 844         }
 845         return 1;
 846 }
 847
 848 void set_fs_altroot(void)
 849 {
 850         char *emul = __emul_prefix();
 851         struct nameidata nd;
 852         struct vfsmount *mnt = NULL, *oldmnt;
 853         struct dentry *dentry = NULL, *olddentry;
 854         int err;
 855
 856         if (!emul)
 857                 goto set_it;
 858         err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
 859         if (!err) {
 860                 mnt = nd.mnt;
 861                 dentry = nd.dentry;
 862         }
 863 set_it:
 864         write_lock(&current->fs->lock);
 865         oldmnt = current->fs->altrootmnt;
 866         olddentry = current->fs->altroot;
 867         current->fs->altrootmnt = mnt;
 868         current->fs->altroot = dentry;
 869         write_unlock(&current->fs->lock);
 870         if (olddentry) {
 871                 dput(olddentry);
 872                 mntput(oldmnt);
 873         }
 874 }
 875
 876 /* SMP-safe */
 877 static inline int
 878 walk_init_root(const char *name, struct nameidata *nd)
 879 {
 880         read_lock(&current->fs->lock);
 881         if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
 882                 nd->mnt = mntget(current->fs->altrootmnt);
 883                 nd->dentry = dget(current->fs->altroot);
 884                 read_unlock(&current->fs->lock);
 885                 if (__emul_lookup_dentry(name,nd))
 886                         return 0;
 887                 read_lock(&current->fs->lock);
 888         }
 889         nd->mnt = mntget(current->fs->rootmnt);
 890         nd->dentry = dget(current->fs->root);
 891         read_unlock(&current->fs->lock);
 892         return 1;
 893 }
 894
 895 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
 896 {
 897         int retval;
 898
 899         nd->last_type = LAST_ROOT; /* if there are only slashes... */
 900         nd->flags = flags;
 901
 902         read_lock(&current->fs->lock);
 903         if (*name=='/') {
 904                 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
 905                         nd->mnt = mntget(current->fs->altrootmnt);
 906                         nd->dentry = dget(current->fs->altroot);
 907                         read_unlock(&current->fs->lock);
 908                         if (__emul_lookup_dentry(name,nd))
 909                                 return 0;
 910                         read_lock(&current->fs->lock);
 911                 }
 912                 nd->mnt = mntget(current->fs->rootmnt);
 913                 nd->dentry = dget(current->fs->root);
 914         }
 915         else{
 916                 nd->mnt = mntget(current->fs->pwdmnt);
 917                 nd->dentry = dget(current->fs->pwd);
 918         }
 919         read_unlock(&current->fs->lock);
 920         current->total_link_count = 0;
 921         retval = link_path_walk(name, nd);
 922         if (unlikely(current->audit_context
 923                      && nd && nd->dentry && nd->dentry->d_inode))
 924                 audit_inode(name,
 925                             nd->dentry->d_inode->i_ino,
 926                             nd->dentry->d_inode->i_rdev);
 927         return retval;
 928 }
 929
 930 /*
 931  * Restricted form of lookup. Doesn't follow links, single-component only,
 932  * needs parent already locked. Doesn't follow mounts.
 933  * SMP-safe.
 934  */
 935 static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd)
 936 {
 937         struct dentry * dentry;
 938         struct inode *inode;
 939         int err;
 940
 941         inode = base->d_inode;
 942         err = permission(inode, MAY_EXEC, nd);
 943         dentry = ERR_PTR(err);
 944         if (err)
 945                 goto out;
 946
 947         /*
 948          * See if the low-level filesystem might want
 949          * to use its own hash..
 950          */
 951         if (base->d_op && base->d_op->d_hash) {
 952                 err = base->d_op->d_hash(base, name);
 953                 dentry = ERR_PTR(err);
 954                 if (err < 0)
 955                         goto out;
 956         }
 957
 958         dentry = cached_lookup(base, name, nd);
 959         if (!dentry) {
 960                 struct dentry *new = d_alloc(base, name);
 961                 dentry = ERR_PTR(-ENOMEM);
 962                 if (!new)
 963                         goto out;
 964                 dentry = inode->i_op->lookup(inode, new, nd);
 965                 if (!dentry)
 966                         dentry = new;
 967                 else
 968                         dput(new);
 969         }
 970 out:
 971         return dentry;
 972 }
 973
 974 struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
 975 {
 976         return __lookup_hash(name, base, NULL);
 977 }
 978
 979 /* SMP-safe */
 980 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
 981 {
 982         unsigned long hash;
 983         struct qstr this;
 984         unsigned int c;
 985
 986         this.name = name;
 987         this.len = len;
 988         if (!len)
 989                 goto access;
 990
 991         hash = init_name_hash();
 992         while (len--) {
 993                 c = *(const unsigned char *)name++;
 994                 if (c == '/' || c == '\0')
 995                         goto access;
 996                 hash = partial_name_hash(c, hash);
 997         }
 998         this.hash = end_name_hash(hash);
 999
1000         return lookup_hash(&this, base);
1001 access:
1002         return ERR_PTR(-EACCES);
1003 }
1004
1005 /*
1006  *      namei()
1007  *
1008  * is used by most simple commands to get the inode of a specified name.
1009  * Open, link etc use their own routines, but this is enough for things
1010  * like 'chmod' etc.
1011  *
1012  * namei exists in two versions: namei/lnamei. The only difference is
1013  * that namei follows links, while lnamei does not.
1014  * SMP-safe
1015  */
1016 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
1017 {
1018         char *tmp = getname(name);
1019         int err = PTR_ERR(tmp);
1020
1021         if (!IS_ERR(tmp)) {
1022                 err = path_lookup(tmp, flags, nd);
1023                 putname(tmp);
1024         }
1025         return err;
1026 }
1027
1028 /*
1029  * It's inline, so penalty for filesystems that don't use sticky bit is
1030  * minimal.
1031  */
1032 static inline int check_sticky(struct inode *dir, struct inode *inode)
1033 {
1034         if (!(dir->i_mode & S_ISVTX))
1035                 return 0;
1036         if (inode->i_uid == current->fsuid)
1037                 return 0;
1038         if (dir->i_uid == current->fsuid)
1039                 return 0;
1040         return !capable(CAP_FOWNER);
1041 }
1042
1043 /*
1044  *      Check whether we can remove a link victim from directory dir, check
1045  *  whether the type of victim is right.
1046  *  1. We can't do it if dir is read-only (done in permission())
1047  *  2. We should have write and exec permissions on dir
1048  *  3. We can't remove anything from append-only dir
1049  *  4. We can't do anything with immutable dir (done in permission())
1050  *  5. If the sticky bit on dir is set we should either
1051  *      a. be owner of dir, or
1052  *      b. be owner of victim, or
1053  *      c. have CAP_FOWNER capability
1054  *  6. If the victim is append-only or immutable we can't do antyhing with
1055  *     links pointing to it.
1056  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
1057  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
1058  *  9. We can't remove a root or mountpoint.
1059  * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
1060  *     nfs_async_unlink().
1061  */
1062 static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1063 {
1064         int error;
1065         if (!victim->d_inode)
1066                 return -ENOENT;
1067         if (victim->d_parent->d_inode != dir)
1068                 BUG();
1069
1070         error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
1071         if (error)
1072                 return error;
1073         if (IS_APPEND(dir))
1074                 return -EPERM;
1075         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
1076             IS_IMMUTABLE(victim->d_inode))
1077                 return -EPERM;
1078         if (isdir) {
1079                 if (!S_ISDIR(victim->d_inode->i_mode))
1080                         return -ENOTDIR;
1081                 if (IS_ROOT(victim))
1082                         return -EBUSY;
1083         } else if (S_ISDIR(victim->d_inode->i_mode))
1084                 return -EISDIR;
1085         if (IS_DEADDIR(dir))
1086                 return -ENOENT;
1087         if (victim->d_flags & DCACHE_NFSFS_RENAMED)
1088                 return -EBUSY;
1089         return 0;
1090 }
1091
1092 /*      Check whether we can create an object with dentry child in directory
1093  *  dir.
1094  *  1. We can't do it if child already exists (open has special treatment for
1095  *     this case, but since we are inlined it's OK)
1096  *  2. We can't do it if dir is read-only (done in permission())
1097  *  3. We should have write and exec permissions on dir
1098  *  4. We can't do it if dir is immutable (done in permission())
1099  */
1100 static inline int may_create(struct inode *dir, struct dentry *child,
1101                              struct nameidata *nd)
1102 {
1103         if (child->d_inode)
1104                 return -EEXIST;
1105         if (IS_DEADDIR(dir))
1106                 return -ENOENT;
1107         return permission(dir,MAY_WRITE | MAY_EXEC, nd);
1108 }
1109
1110 /*
1111  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
1112  * reasons.
1113  *
1114  * O_DIRECTORY translates into forcing a directory lookup.
1115  */
1116 static inline int lookup_flags(unsigned int f)
1117 {
1118         unsigned long retval = LOOKUP_FOLLOW;
1119
1120         if (f & O_NOFOLLOW)
1121                 retval &= ~LOOKUP_FOLLOW;
1122
1123         if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
1124                 retval &= ~LOOKUP_FOLLOW;
1125
1126         if (f & O_DIRECTORY)
1127                 retval |= LOOKUP_DIRECTORY;
1128         if (f & O_ATOMICLOOKUP)
1129                 retval |= LOOKUP_ATOMIC;
1130
1131         return retval;
1132 }
1133
1134 /*
1135  * p1 and p2 should be directories on the same fs.
1136  */
1137 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1138 {
1139         struct dentry *p;
1140
1141         if (p1 == p2) {
1142                 down(&p1->d_inode->i_sem);
1143                 return NULL;
1144         }
1145
1146         down(&p1->d_inode->i_sb->s_vfs_rename_sem);
1147
1148         for (p = p1; p->d_parent != p; p = p->d_parent) {
1149                 if (p->d_parent == p2) {
1150                         down(&p2->d_inode->i_sem);
1151                         down(&p1->d_inode->i_sem);
1152                         return p;
1153                 }
1154         }
1155
1156         for (p = p2; p->d_parent != p; p = p->d_parent) {
1157                 if (p->d_parent == p1) {
1158                         down(&p1->d_inode->i_sem);
1159                         down(&p2->d_inode->i_sem);
1160                         return p;
1161                 }
1162         }
1163
1164         down(&p1->d_inode->i_sem);
1165         down(&p2->d_inode->i_sem);
1166         return NULL;
1167 }
1168
1169 void unlock_rename(struct dentry *p1, struct dentry *p2)
1170 {
1171         up(&p1->d_inode->i_sem);
1172         if (p1 != p2) {
1173                 up(&p2->d_inode->i_sem);
1174                 up(&p1->d_inode->i_sb->s_vfs_rename_sem);
1175         }
1176 }
1177
1178 int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1179                 struct nameidata *nd)
1180 {
1181         int error = may_create(dir, dentry, nd);
1182
1183         if (error)
1184                 return error;
1185
1186         if (!dir->i_op || !dir->i_op->create)
1187                 return -EACCES; /* shouldn't it be ENOSYS? */
1188         mode &= S_IALLUGO;
1189         mode |= S_IFREG;
1190         error = security_inode_create(dir, dentry, mode);
1191         if (error)
1192                 return error;
1193         DQUOT_INIT(dir);
1194         error = dir->i_op->create(dir, dentry, mode, nd);
1195         if (!error) {
1196                 inode_dir_notify(dir, DN_CREATE);
1197                 security_inode_post_create(dir, dentry, mode);
1198         }
1199         return error;
1200 }
1201
1202 int may_open(struct nameidata *nd, int acc_mode, int flag)
1203 {
1204         struct dentry *dentry = nd->dentry;
1205         struct inode *inode = dentry->d_inode;
1206         int error;
1207
1208         if (!inode)
1209                 return -ENOENT;
1210
1211         if (S_ISLNK(inode->i_mode))
1212                 return -ELOOP;
1213
1214         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1215                 return -EISDIR;
1216
1217         error = permission(inode, acc_mode, nd);
1218         if (error)
1219                 return error;
1220
1221         /*
1222          * FIFO's, sockets and device files are special: they don't
1223          * actually live on the filesystem itself, and as such you
1224          * can write to them even if the filesystem is read-only.
1225          */
1226         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1227                 flag &= ~O_TRUNC;
1228         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1229                 if (nd->mnt->mnt_flags & MNT_NODEV)
1230                         return -EACCES;
1231
1232                 flag &= ~O_TRUNC;
1233         } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
1234                 return -EROFS;
1235         /*
1236          * An append-only file must be opened in append mode for writing.
1237          */
1238         if (IS_APPEND(inode)) {
1239                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1240                         return -EPERM;
1241                 if (flag & O_TRUNC)
1242                         return -EPERM;
1243         }
1244
1245         /*
1246          * Ensure there are no outstanding leases on the file.
1247          */
1248         error = break_lease(inode, flag);
1249         if (error)
1250                 return error;
1251
1252         if (flag & O_TRUNC) {
1253                 error = get_write_access(inode);
1254                 if (error)
1255                         return error;
1256
1257                 /*
1258                  * Refuse to truncate files with mandatory locks held on them.
1259                  */
1260                 error = locks_verify_locked(inode);
1261                 if (!error) {
1262                         DQUOT_INIT(inode);
1263
1264                         error = do_truncate(dentry, 0);
1265                 }
1266                 put_write_access(inode);
1267                 if (error)
1268                         return error;
1269         } else
1270                 if (flag & FMODE_WRITE)
1271                         DQUOT_INIT(inode);
1272
1273         return 0;
1274 }
1275
1276 /*
1277  *      open_namei()
1278  *
1279  * namei for open - this is in fact almost the whole open-routine.
1280  *
1281  * Note that the low bits of "flag" aren't the same as in the open
1282  * system call - they are 00 - no permissions needed
1283  *                        01 - read permission needed
1284  *                        10 - write permission needed
1285  *                        11 - read/write permissions needed
1286  * which is a lot more logical, and also allows the "no perm" needed
1287  * for symlinks (where the permissions are checked later).
1288  * SMP-safe
1289  */
1290 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1291 {
1292         int acc_mode, error = 0;
1293         struct dentry *dentry;
1294         struct dentry *dir;
1295         int count = 0;
1296
1297         acc_mode = ACC_MODE(flag);
1298
1299         /* Allow the LSM permission hook to distinguish append
1300            access from general write access. */
1301         if (flag & O_APPEND)
1302                 acc_mode |= MAY_APPEND;
1303
1304         /* Fill in the open() intent data */
1305         nd->intent.open.flags = flag;
1306         nd->intent.open.create_mode = mode;
1307
1308         /*
1309          * The simplest case - just a plain lookup.
1310          */
1311         if (!(flag & O_CREAT)) {
1312                 error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
1313                 if (error)
1314                         return error;
1315                 goto ok;
1316         }
1317
1318         /*
1319          * Create - we need to know the parent.
1320          */
1321         error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
1322         if (error)
1323                 return error;
1324
1325         /*
1326          * We have the parent and last component. First of all, check
1327          * that we are not asked to creat(2) an obvious directory - that
1328          * will not do.
1329          */
1330         error = -EISDIR;
1331         if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1332                 goto exit;
1333
1334         dir = nd->dentry;
1335         nd->flags &= ~LOOKUP_PARENT;
1336         down(&dir->d_inode->i_sem);
1337         dentry = __lookup_hash(&nd->last, nd->dentry, nd);
1338
1339 do_last:
1340         error = PTR_ERR(dentry);
1341         if (IS_ERR(dentry)) {
1342                 up(&dir->d_inode->i_sem);
1343                 goto exit;
1344         }
1345
1346         /* Negative dentry, just create the file */
1347         if (!dentry->d_inode) {
1348                 if (!IS_POSIXACL(dir->d_inode))
1349                         mode &= ~current->fs->umask;
1350                 error = vfs_create(dir->d_inode, dentry, mode, nd);
1351                 up(&dir->d_inode->i_sem);
1352                 dput(nd->dentry);
1353                 nd->dentry = dentry;
1354                 if (error)
1355                         goto exit;
1356                 /* Don't check for write permission, don't truncate */
1357                 acc_mode = 0;
1358                 flag &= ~O_TRUNC;
1359                 goto ok;
1360         }
1361
1362         /*
1363          * It already exists.
1364          */
1365         up(&dir->d_inode->i_sem);
1366
1367         error = -EEXIST;
1368         if (flag & O_EXCL)
1369                 goto exit_dput;
1370
1371         if (d_mountpoint(dentry)) {
1372                 error = -ELOOP;
1373                 if (flag & O_NOFOLLOW)
1374                         goto exit_dput;
1375                 while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1376         }
1377         error = -ENOENT;
1378         if (!dentry->d_inode)
1379                 goto exit_dput;
1380         if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1381                 goto do_link;
1382
1383         dput(nd->dentry);
1384         nd->dentry = dentry;
1385         error = -EISDIR;
1386         if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1387                 goto exit;
1388 ok:
1389         error = may_open(nd, acc_mode, flag);
1390         if (error)
1391                 goto exit;
1392         return 0;
1393
1394 exit_dput:
1395         dput(dentry);
1396 exit:
1397         path_release(nd);
1398         return error;
1399
1400 do_link:
1401         error = -ELOOP;
1402         if (flag & O_NOFOLLOW)
1403                 goto exit_dput;
1404         /*
1405          * This is subtle. Instead of calling do_follow_link() we do the
1406          * thing by hands. The reason is that this way we have zero link_count
1407          * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1408          * After that we have the parent and last component, i.e.
1409          * we are in the same situation as after the first path_walk().
1410          * Well, almost - if the last component is normal we get its copy
1411          * stored in nd->last.name and we will have to putname() it when we
1412          * are done. Procfs-like symlinks just set LAST_BIND.
1413          */
1414         nd->flags |= LOOKUP_PARENT;
1415         error = security_inode_follow_link(dentry, nd);
1416         if (error)
1417                 goto exit_dput;
1418         touch_atime(nd->mnt, dentry);
1419         nd_set_link(nd, NULL);
1420         error = dentry->d_inode->i_op->follow_link(dentry, nd);
1421         if (!error) {
1422                 char *s = nd_get_link(nd);
1423                 if (s)
1424                         error = __vfs_follow_link(nd, s);
1425                 if (dentry->d_inode->i_op->put_link)
1426                         dentry->d_inode->i_op->put_link(dentry, nd);
1427         }
1428         dput(dentry);
1429         if (error)
1430                 return error;
1431         nd->flags &= ~LOOKUP_PARENT;
1432         if (nd->last_type == LAST_BIND) {
1433                 dentry = nd->dentry;
1434                 goto ok;
1435         }
1436         error = -EISDIR;
1437         if (nd->last_type != LAST_NORM)
1438                 goto exit;
1439         if (nd->last.name[nd->last.len]) {
1440                 putname(nd->last.name);
1441                 goto exit;
1442         }
1443         error = -ELOOP;
1444         if (count++==32) {
1445                 putname(nd->last.name);
1446                 goto exit;
1447         }
1448         dir = nd->dentry;
1449         down(&dir->d_inode->i_sem);
1450         dentry = __lookup_hash(&nd->last, nd->dentry, nd);
1451         putname(nd->last.name);
1452         goto do_last;
1453 }
1454
1455 /**
1456  * lookup_create - lookup a dentry, creating it if it doesn't exist
1457  * @nd: nameidata info
1458  * @is_dir: directory flag
1459  *
1460  * Simple function to lookup and return a dentry and create it
1461  * if it doesn't exist.  Is SMP-safe.
1462  */
1463 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1464 {
1465         struct dentry *dentry;
1466
1467         down(&nd->dentry->d_inode->i_sem);
1468         dentry = ERR_PTR(-EEXIST);
1469         if (nd->last_type != LAST_NORM)
1470                 goto fail;
1471         nd->flags &= ~LOOKUP_PARENT;
1472         dentry = lookup_hash(&nd->last, nd->dentry);
1473         if (IS_ERR(dentry))
1474                 goto fail;
1475         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1476                 goto enoent;
1477         return dentry;
1478 enoent:
1479         dput(dentry);
1480         dentry = ERR_PTR(-ENOENT);
1481 fail:
1482         return dentry;
1483 }
1484
1485 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1486 {
1487         int error = may_create(dir, dentry, NULL);
1488
1489         if (error)
1490                 return error;
1491
1492         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1493                 return -EPERM;
1494
1495         if (!dir->i_op || !dir->i_op->mknod)
1496                 return -EPERM;
1497
1498         error = security_inode_mknod(dir, dentry, mode, dev);
1499         if (error)
1500                 return error;
1501
1502         DQUOT_INIT(dir);
1503         error = dir->i_op->mknod(dir, dentry, mode, dev);
1504         if (!error) {
1505                 inode_dir_notify(dir, DN_CREATE);
1506                 security_inode_post_mknod(dir, dentry, mode, dev);
1507         }
1508         return error;
1509 }
1510
1511 asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
1512 {
1513         int error = 0;
1514         char * tmp;
1515         struct dentry * dentry;
1516         struct nameidata nd;
1517
1518         if (S_ISDIR(mode))
1519                 return -EPERM;
1520         tmp = getname(filename);
1521         if (IS_ERR(tmp))
1522                 return PTR_ERR(tmp);
1523
1524         error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1525         if (error)
1526                 goto out;
1527         dentry = lookup_create(&nd, 0);
1528         error = PTR_ERR(dentry);
1529
1530         if (!IS_POSIXACL(nd.dentry->d_inode))
1531                 mode &= ~current->fs->umask;
1532         if (!IS_ERR(dentry)) {
1533                 switch (mode & S_IFMT) {
1534                 case 0: case S_IFREG:
1535                         error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
1536                         break;
1537                 case S_IFCHR: case S_IFBLK:
1538                         error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
1539                                         new_decode_dev(dev));
1540                         break;
1541                 case S_IFIFO: case S_IFSOCK:
1542                         error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
1543                         break;
1544                 case S_IFDIR:
1545                         error = -EPERM;
1546                         break;
1547                 default:
1548                         error = -EINVAL;
1549                 }
1550                 dput(dentry);
1551         }
1552         up(&nd.dentry->d_inode->i_sem);
1553         path_release(&nd);
1554 out:
1555         putname(tmp);
1556
1557         return error;
1558 }
1559
1560 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1561 {
1562         int error = may_create(dir, dentry, NULL);
1563
1564         if (error)
1565                 return error;
1566
1567         if (!dir->i_op || !dir->i_op->mkdir)
1568                 return -EPERM;
1569
1570         mode &= (S_IRWXUGO|S_ISVTX);
1571         error = security_inode_mkdir(dir, dentry, mode);
1572         if (error)
1573                 return error;
1574
1575         DQUOT_INIT(dir);
1576         error = dir->i_op->mkdir(dir, dentry, mode);
1577         if (!error) {
1578                 inode_dir_notify(dir, DN_CREATE);
1579                 security_inode_post_mkdir(dir,dentry, mode);
1580         }
1581         return error;
1582 }
1583
1584 asmlinkage long sys_mkdir(const char __user * pathname, int mode)
1585 {
1586         int error = 0;
1587         char * tmp;
1588
1589         tmp = getname(pathname);
1590         error = PTR_ERR(tmp);
1591         if (!IS_ERR(tmp)) {
1592                 struct dentry *dentry;
1593                 struct nameidata nd;
1594
1595                 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1596                 if (error)
1597                         goto out;
1598                 dentry = lookup_create(&nd, 1);
1599                 error = PTR_ERR(dentry);
1600                 if (!IS_ERR(dentry)) {
1601                         if (!IS_POSIXACL(nd.dentry->d_inode))
1602                                 mode &= ~current->fs->umask;
1603                         error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
1604                         dput(dentry);
1605                 }
1606                 up(&nd.dentry->d_inode->i_sem);
1607                 path_release(&nd);
1608 out:
1609                 putname(tmp);
1610         }
1611
1612         return error;
1613 }
1614
1615 /*
1616  * We try to drop the dentry early: we should have
1617  * a usage count of 2 if we're the only user of this
1618  * dentry, and if that is true (possibly after pruning
1619  * the dcache), then we drop the dentry now.
1620  *
1621  * A low-level filesystem can, if it choses, legally
1622  * do a
1623  *
1624  *      if (!d_unhashed(dentry))
1625  *              return -EBUSY;
1626  *
1627  * if it cannot handle the case of removing a directory
1628  * that is still in use by something else..
1629  */
1630 static void d_unhash(struct dentry *dentry)
1631 {
1632         dget(dentry);
1633         spin_lock(&dcache_lock);
1634         switch (atomic_read(&dentry->d_count)) {
1635         default:
1636                 spin_unlock(&dcache_lock);
1637                 shrink_dcache_parent(dentry);
1638                 spin_lock(&dcache_lock);
1639                 if (atomic_read(&dentry->d_count) != 2)
1640                         break;
1641         case 2:
1642                 __d_drop(dentry);
1643         }
1644         spin_unlock(&dcache_lock);
1645 }
1646
1647 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1648 {
1649         int error = may_delete(dir, dentry, 1);
1650
1651         if (error)
1652                 return error;
1653
1654         if (!dir->i_op || !dir->i_op->rmdir)
1655                 return -EPERM;
1656
1657         DQUOT_INIT(dir);
1658
1659         down(&dentry->d_inode->i_sem);
1660         d_unhash(dentry);
1661         if (d_mountpoint(dentry))
1662                 error = -EBUSY;
1663         else {
1664                 error = security_inode_rmdir(dir, dentry);
1665                 if (!error) {
1666                         error = dir->i_op->rmdir(dir, dentry);
1667                         if (!error)
1668                                 dentry->d_inode->i_flags |= S_DEAD;
1669                 }
1670         }
1671         up(&dentry->d_inode->i_sem);
1672         if (!error) {
1673                 inode_dir_notify(dir, DN_DELETE);
1674                 d_delete(dentry);
1675         }
1676         dput(dentry);
1677
1678         return error;
1679 }
1680
1681 asmlinkage long sys_rmdir(const char __user * pathname)
1682 {
1683         int error = 0;
1684         char * name;
1685         struct dentry *dentry;
1686         struct nameidata nd;
1687
1688         name = getname(pathname);
1689         if(IS_ERR(name))
1690                 return PTR_ERR(name);
1691
1692         error = path_lookup(name, LOOKUP_PARENT, &nd);
1693         if (error)
1694                 goto exit;
1695
1696         switch(nd.last_type) {
1697                 case LAST_DOTDOT:
1698                         error = -ENOTEMPTY;
1699                         goto exit1;
1700                 case LAST_DOT:
1701                         error = -EINVAL;
1702                         goto exit1;
1703                 case LAST_ROOT:
1704                         error = -EBUSY;
1705                         goto exit1;
1706         }
1707         down(&nd.dentry->d_inode->i_sem);
1708         dentry = lookup_hash(&nd.last, nd.dentry);
1709         error = PTR_ERR(dentry);
1710         if (!IS_ERR(dentry)) {
1711                 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1712                 dput(dentry);
1713         }
1714         up(&nd.dentry->d_inode->i_sem);
1715 exit1:
1716         path_release(&nd);
1717 exit:
1718         putname(name);
1719         return error;
1720 }
1721
1722 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1723 {
1724         int error = may_delete(dir, dentry, 0);
1725
1726         if (error)
1727                 return error;
1728
1729         if (!dir->i_op || !dir->i_op->unlink)
1730                 return -EPERM;
1731
1732         DQUOT_INIT(dir);
1733
1734         down(&dentry->d_inode->i_sem);
1735         if (d_mountpoint(dentry))
1736                 error = -EBUSY;
1737         else {
1738                 error = security_inode_unlink(dir, dentry);
1739                 if (!error)
1740                         error = dir->i_op->unlink(dir, dentry);
1741         }
1742         up(&dentry->d_inode->i_sem);
1743
1744         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
1745         if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
1746                 d_delete(dentry);
1747                 inode_dir_notify(dir, DN_DELETE);
1748         }
1749         return error;
1750 }
1751
1752 /*
1753  * Make sure that the actual truncation of the file will occur outside its
1754  * directory's i_sem.  Truncate can take a long time if there is a lot of
1755  * writeout happening, and we don't want to prevent access to the directory
1756  * while waiting on the I/O.
1757  */
1758 asmlinkage long sys_unlink(const char __user * pathname)
1759 {
1760         int error = 0;
1761         char * name;
1762         struct dentry *dentry;
1763         struct nameidata nd;
1764         struct inode *inode = NULL;
1765
1766         name = getname(pathname);
1767         if(IS_ERR(name))
1768                 return PTR_ERR(name);
1769
1770         error = path_lookup(name, LOOKUP_PARENT, &nd);
1771         if (error)
1772                 goto exit;
1773         error = -EISDIR;
1774         if (nd.last_type != LAST_NORM)
1775                 goto exit1;
1776         down(&nd.dentry->d_inode->i_sem);
1777         dentry = lookup_hash(&nd.last, nd.dentry);
1778         error = PTR_ERR(dentry);
1779         if (!IS_ERR(dentry)) {
1780                 /* Why not before? Because we want correct error value */
1781                 if (nd.last.name[nd.last.len])
1782                         goto slashes;
1783                 inode = dentry->d_inode;
1784                 if (inode)
1785                         atomic_inc(&inode->i_count);
1786                 error = vfs_unlink(nd.dentry->d_inode, dentry);
1787         exit2:
1788                 dput(dentry);
1789         }
1790         up(&nd.dentry->d_inode->i_sem);
1791 exit1:
1792         path_release(&nd);
1793 exit:
1794         putname(name);
1795
1796         if (inode)
1797                 iput(inode);    /* truncate the inode here */
1798         return error;
1799
1800 slashes:
1801         error = !dentry->d_inode ? -ENOENT :
1802                 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1803         goto exit2;
1804 }
1805
1806 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
1807 {
1808         int error = may_create(dir, dentry, NULL);
1809
1810         if (error)
1811                 return error;
1812
1813         if (!dir->i_op || !dir->i_op->symlink)
1814                 return -EPERM;
1815
1816         error = security_inode_symlink(dir, dentry, oldname);
1817         if (error)
1818                 return error;
1819
1820         DQUOT_INIT(dir);
1821         error = dir->i_op->symlink(dir, dentry, oldname);
1822         if (!error) {
1823                 inode_dir_notify(dir, DN_CREATE);
1824                 security_inode_post_symlink(dir, dentry, oldname);
1825         }
1826         return error;
1827 }
1828
1829 asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname)
1830 {
1831         int error = 0;
1832         char * from;
1833         char * to;
1834
1835         from = getname(oldname);
1836         if(IS_ERR(from))
1837                 return PTR_ERR(from);
1838         to = getname(newname);
1839         error = PTR_ERR(to);
1840         if (!IS_ERR(to)) {
1841                 struct dentry *dentry;
1842                 struct nameidata nd;
1843
1844                 error = path_lookup(to, LOOKUP_PARENT, &nd);
1845                 if (error)
1846                         goto out;
1847                 dentry = lookup_create(&nd, 0);
1848                 error = PTR_ERR(dentry);
1849                 if (!IS_ERR(dentry)) {
1850                         error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
1851                         dput(dentry);
1852                 }
1853                 up(&nd.dentry->d_inode->i_sem);
1854                 path_release(&nd);
1855 out:
1856                 putname(to);
1857         }
1858         putname(from);
1859         return error;
1860 }
1861
1862 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1863 {
1864         struct inode *inode = old_dentry->d_inode;
1865         int error;
1866
1867         if (!inode)
1868                 return -ENOENT;
1869
1870         error = may_create(dir, new_dentry, NULL);
1871         if (error)
1872                 return error;
1873
1874         if (dir->i_sb != inode->i_sb)
1875                 return -EXDEV;
1876
1877         /*
1878          * A link to an append-only or immutable file cannot be created.
1879          */
1880         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1881                 return -EPERM;
1882         if (!dir->i_op || !dir->i_op->link)
1883                 return -EPERM;
1884         if (S_ISDIR(old_dentry->d_inode->i_mode))
1885                 return -EPERM;
1886
1887         error = security_inode_link(old_dentry, dir, new_dentry);
1888         if (error)
1889                 return error;
1890
1891         down(&old_dentry->d_inode->i_sem);
1892         DQUOT_INIT(dir);
1893         error = dir->i_op->link(old_dentry, dir, new_dentry);
1894         up(&old_dentry->d_inode->i_sem);
1895         if (!error) {
1896                 inode_dir_notify(dir, DN_CREATE);
1897                 security_inode_post_link(old_dentry, dir, new_dentry);
1898         }
1899         return error;
1900 }
1901
1902 /*
1903  * Hardlinks are often used in delicate situations.  We avoid
1904  * security-related surprises by not following symlinks on the
1905  * newname.  --KAB
1906  *
1907  * We don't follow them on the oldname either to be compatible
1908  * with linux 2.0, and to avoid hard-linking to directories
1909  * and other special files.  --ADM
1910  */
1911 asmlinkage long sys_link(const char __user * oldname, const char __user * newname)
1912 {
1913         struct dentry *new_dentry;
1914         struct nameidata nd, old_nd;
1915         int error;
1916         char * to;
1917
1918         to = getname(newname);
1919         if (IS_ERR(to))
1920                 return PTR_ERR(to);
1921
1922         error = __user_walk(oldname, 0, &old_nd);
1923         if (error)
1924                 goto exit;
1925         error = path_lookup(to, LOOKUP_PARENT, &nd);
1926         if (error)
1927                 goto out;
1928         error = -EXDEV;
1929         if (old_nd.mnt != nd.mnt)
1930                 goto out_release;
1931         new_dentry = lookup_create(&nd, 0);
1932         error = PTR_ERR(new_dentry);
1933         if (!IS_ERR(new_dentry)) {
1934                 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1935                 dput(new_dentry);
1936         }
1937         up(&nd.dentry->d_inode->i_sem);
1938 out_release:
1939         path_release(&nd);
1940 out:
1941         path_release(&old_nd);
1942 exit:
1943         putname(to);
1944
1945         return error;
1946 }
1947
1948 /*
1949  * The worst of all namespace operations - renaming directory. "Perverted"
1950  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1951  * Problems:
1952  *      a) we can get into loop creation. Check is done in is_subdir().
1953  *      b) race potential - two innocent renames can create a loop together.
1954  *         That's where 4.4 screws up. Current fix: serialization on
1955  *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
1956  *         story.
1957  *      c) we have to lock _three_ objects - parents and victim (if it exists).
1958  *         And that - after we got ->i_sem on parents (until then we don't know
1959  *         whether the target exists).  Solution: try to be smart with locking
1960  *         order for inodes.  We rely on the fact that tree topology may change
1961  *         only under ->s_vfs_rename_sem _and_ that parent of the object we
1962  *         move will be locked.  Thus we can rank directories by the tree
1963  *         (ancestors first) and rank all non-directories after them.
1964  *         That works since everybody except rename does "lock parent, lookup,
1965  *         lock child" and rename is under ->s_vfs_rename_sem.
1966  *         HOWEVER, it relies on the assumption that any object with ->lookup()
1967  *         has no more than 1 dentry.  If "hybrid" objects will ever appear,
1968  *         we'd better make sure that there's no link(2) for them.
1969  *      d) some filesystems don't support opened-but-unlinked directories,
1970  *         either because of layout or because they are not ready to deal with
1971  *         all cases correctly. The latter will be fixed (taking this sort of
1972  *         stuff into VFS), but the former is not going away. Solution: the same
1973  *         trick as in rmdir().
1974  *      e) conversion from fhandle to dentry may come in the wrong moment - when
1975  *         we are removing the target. Solution: we will have to grab ->i_sem
1976  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1977  *         ->i_sem on parents, which works but leads to some truely excessive
1978  *         locking].
1979  */
1980 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1981                struct inode *new_dir, struct dentry *new_dentry)
1982 {
1983         int error = 0;
1984         struct inode *target;
1985
1986         /*
1987          * If we are going to change the parent - check write permissions,
1988          * we'll need to flip '..'.
1989          */
1990         if (new_dir != old_dir) {
1991                 error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
1992                 if (error)
1993                         return error;
1994         }
1995
1996         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
1997         if (error)
1998                 return error;
1999
2000         target = new_dentry->d_inode;
2001         if (target) {
2002                 down(&target->i_sem);
2003                 d_unhash(new_dentry);
2004         }
2005         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2006                 error = -EBUSY;
2007         else
2008                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2009         if (target) {
2010                 if (!error)
2011                         target->i_flags |= S_DEAD;
2012                 up(&target->i_sem);
2013                 if (d_unhashed(new_dentry))
2014                         d_rehash(new_dentry);
2015                 dput(new_dentry);
2016         }
2017         if (!error) {
2018                 d_move(old_dentry,new_dentry);
2019                 security_inode_post_rename(old_dir, old_dentry,
2020                                            new_dir, new_dentry);
2021         }
2022         return error;
2023 }
2024
2025 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2026                struct inode *new_dir, struct dentry *new_dentry)
2027 {
2028         struct inode *target;
2029         int error;
2030
2031         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
2032         if (error)
2033                 return error;
2034
2035         dget(new_dentry);
2036         target = new_dentry->d_inode;
2037         if (target)
2038                 down(&target->i_sem);
2039         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2040                 error = -EBUSY;
2041         else
2042                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2043         if (!error) {
2044                 /* The following d_move() should become unconditional */
2045                 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2046                         d_move(old_dentry, new_dentry);
2047                 security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
2048         }
2049         if (target)
2050                 up(&target->i_sem);
2051         dput(new_dentry);
2052         return error;
2053 }
2054
2055 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2056                struct inode *new_dir, struct dentry *new_dentry)
2057 {
2058         int error;
2059         int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2060
2061         if (old_dentry->d_inode == new_dentry->d_inode)
2062                 return 0;
2063
2064         error = may_delete(old_dir, old_dentry, is_dir);
2065         if (error)
2066                 return error;
2067
2068         if (!new_dentry->d_inode)
2069                 error = may_create(new_dir, new_dentry, NULL);
2070         else
2071                 error = may_delete(new_dir, new_dentry, is_dir);
2072         if (error)
2073                 return error;
2074
2075         if (!old_dir->i_op || !old_dir->i_op->rename)
2076                 return -EPERM;
2077
2078         DQUOT_INIT(old_dir);
2079         DQUOT_INIT(new_dir);
2080
2081         if (is_dir)
2082                 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2083         else
2084                 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2085         if (!error) {
2086                 if (old_dir == new_dir)
2087                         inode_dir_notify(old_dir, DN_RENAME);
2088                 else {
2089                         inode_dir_notify(old_dir, DN_DELETE);
2090                         inode_dir_notify(new_dir, DN_CREATE);
2091                 }
2092         }
2093         return error;
2094 }
2095
2096 static inline int do_rename(const char * oldname, const char * newname)
2097 {
2098         int error = 0;
2099         struct dentry * old_dir, * new_dir;
2100         struct dentry * old_dentry, *new_dentry;
2101         struct dentry * trap;
2102         struct nameidata oldnd, newnd;
2103
2104         error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
2105         if (error)
2106                 goto exit;
2107
2108         error = path_lookup(newname, LOOKUP_PARENT, &newnd);
2109         if (error)
2110                 goto exit1;
2111
2112         error = -EXDEV;
2113         if (oldnd.mnt != newnd.mnt)
2114                 goto exit2;
2115
2116         old_dir = oldnd.dentry;
2117         error = -EBUSY;
2118         if (oldnd.last_type != LAST_NORM)
2119                 goto exit2;
2120
2121         new_dir = newnd.dentry;
2122         if (newnd.last_type != LAST_NORM)
2123                 goto exit2;
2124
2125         trap = lock_rename(new_dir, old_dir);
2126
2127         old_dentry = lookup_hash(&oldnd.last, old_dir);
2128         error = PTR_ERR(old_dentry);
2129         if (IS_ERR(old_dentry))
2130                 goto exit3;
2131         /* source must exist */
2132         error = -ENOENT;
2133         if (!old_dentry->d_inode)
2134                 goto exit4;
2135         /* unless the source is a directory trailing slashes give -ENOTDIR */
2136         if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
2137                 error = -ENOTDIR;
2138                 if (oldnd.last.name[oldnd.last.len])
2139                         goto exit4;
2140                 if (newnd.last.name[newnd.last.len])
2141                         goto exit4;
2142         }
2143         /* source should not be ancestor of target */
2144         error = -EINVAL;
2145         if (old_dentry == trap)
2146                 goto exit4;
2147         new_dentry = lookup_hash(&newnd.last, new_dir);
2148         error = PTR_ERR(new_dentry);
2149         if (IS_ERR(new_dentry))
2150                 goto exit4;
2151         /* target should not be an ancestor of source */
2152         error = -ENOTEMPTY;
2153         if (new_dentry == trap)
2154                 goto exit5;
2155
2156         error = vfs_rename(old_dir->d_inode, old_dentry,
2157                                    new_dir->d_inode, new_dentry);
2158 exit5:
2159         dput(new_dentry);
2160 exit4:
2161         dput(old_dentry);
2162 exit3:
2163         unlock_rename(new_dir, old_dir);
2164 exit2:
2165         path_release(&newnd);
2166 exit1:
2167         path_release(&oldnd);
2168 exit:
2169         return error;
2170 }
2171
2172 asmlinkage long sys_rename(const char __user * oldname, const char __user * newname)
2173 {
2174         int error;
2175         char * from;
2176         char * to;
2177
2178         from = getname(oldname);
2179         if(IS_ERR(from))
2180                 return PTR_ERR(from);
2181         to = getname(newname);
2182         error = PTR_ERR(to);
2183         if (!IS_ERR(to)) {
2184                 error = do_rename(from,to);
2185                 putname(to);
2186         }
2187         putname(from);
2188         return error;
2189 }
2190
2191 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
2192 {
2193         int len;
2194
2195         len = PTR_ERR(link);
2196         if (IS_ERR(link))
2197                 goto out;
2198
2199         len = strlen(link);
2200         if (len > (unsigned) buflen)
2201                 len = buflen;
2202         if (copy_to_user(buffer, link, len))
2203                 len = -EFAULT;
2204 out:
2205         return len;
2206 }
2207
2208 /*
2209  * A helper for ->readlink().  This should be used *ONLY* for symlinks that
2210  * have ->follow_link() touching nd only in nd_set_link().  Using (or not
2211  * using) it for any given inode is up to filesystem.
2212  */
2213 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2214 {
2215         struct nameidata nd;
2216         int res = dentry->d_inode->i_op->follow_link(dentry, &nd);
2217         if (!res) {
2218                 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
2219                 if (dentry->d_inode->i_op->put_link)
2220                         dentry->d_inode->i_op->put_link(dentry, &nd);
2221         }
2222         return res;
2223 }
2224
2225 static inline int
2226 __vfs_follow_link(struct nameidata *nd, const char *link)
2227 {
2228         int res = 0;
2229         char *name;
2230         if (IS_ERR(link))
2231                 goto fail;
2232
2233         if (*link == '/') {
2234                 path_release(nd);
2235                 if (!walk_init_root(link, nd))
2236                         /* weird __emul_prefix() stuff did it */
2237                         goto out;
2238         }
2239         res = link_path_walk(link, nd);
2240 out:
2241         if (current->link_count || res || nd->last_type!=LAST_NORM)
2242                 return res;
2243         /*
2244          * If it is an iterative symlinks resolution in open_namei() we
2245          * have to copy the last component. And all that crap because of
2246          * bloody create() on broken symlinks. Furrfu...
2247          */
2248         name = __getname();
2249         if (unlikely(!name)) {
2250                 path_release(nd);
2251                 return -ENOMEM;
2252         }
2253         strcpy(name, nd->last.name);
2254         nd->last.name = name;
2255         return 0;
2256 fail:
2257         path_release(nd);
2258         return PTR_ERR(link);
2259 }
2260
2261 int vfs_follow_link(struct nameidata *nd, const char *link)
2262 {
2263         return __vfs_follow_link(nd, link);
2264 }
2265
2266 /* get the link contents into pagecache */
2267 static char *page_getlink(struct dentry * dentry, struct page **ppage)
2268 {
2269         struct page * page;
2270         struct address_space *mapping = dentry->d_inode->i_mapping;
2271         page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
2272                                 NULL);
2273         if (IS_ERR(page))
2274                 goto sync_fail;
2275         wait_on_page_locked(page);
2276         if (!PageUptodate(page))
2277                 goto async_fail;
2278         *ppage = page;
2279         return kmap(page);
2280
2281 async_fail:
2282         page_cache_release(page);
2283         return ERR_PTR(-EIO);
2284
2285 sync_fail:
2286         return (char*)page;
2287 }
2288
2289 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2290 {
2291         struct page *page = NULL;
2292         char *s = page_getlink(dentry, &page);
2293         int res = vfs_readlink(dentry,buffer,buflen,s);
2294         if (page) {
2295                 kunmap(page);
2296                 page_cache_release(page);
2297         }
2298         return res;
2299 }
2300
2301 int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
2302 {
2303         struct page *page;
2304         char *s = page_getlink(dentry, &page);
2305         if (!IS_ERR(s)) {
2306                 nd_set_link(nd, s);
2307                 s = NULL;
2308         }
2309         return PTR_ERR(s);
2310 }
2311
2312 void page_put_link(struct dentry *dentry, struct nameidata *nd)
2313 {
2314         if (!IS_ERR(nd_get_link(nd))) {
2315                 struct page *page;
2316                 page = find_get_page(dentry->d_inode->i_mapping, 0);
2317                 if (!page)
2318                         BUG();
2319                 kunmap(page);
2320                 page_cache_release(page);
2321                 page_cache_release(page);
2322         }
2323 }
2324
2325 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
2326 {
2327         struct page *page = NULL;
2328         char *s = page_getlink(dentry, &page);
2329         int res = __vfs_follow_link(nd, s);
2330         if (page) {
2331                 kunmap(page);
2332                 page_cache_release(page);
2333         }
2334         return res;
2335 }
2336
2337 int page_symlink(struct inode *inode, const char *symname, int len)
2338 {
2339         struct address_space *mapping = inode->i_mapping;
2340         struct page *page = grab_cache_page(mapping, 0);
2341         int err = -ENOMEM;
2342         char *kaddr;
2343
2344         if (!page)
2345                 goto fail;
2346         err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
2347         if (err)
2348                 goto fail_map;
2349         kaddr = kmap_atomic(page, KM_USER0);
2350         memcpy(kaddr, symname, len-1);
2351         kunmap_atomic(kaddr, KM_USER0);
2352         mapping->a_ops->commit_write(NULL, page, 0, len-1);
2353         /*
2354          * Notice that we are _not_ going to block here - end of page is
2355          * unmapped, so this will only try to map the rest of page, see
2356          * that it is unmapped (typically even will not look into inode -
2357          * ->i_size will be enough for everything) and zero it out.
2358          * OTOH it's obviously correct and should make the page up-to-date.
2359          */
2360         if (!PageUptodate(page)) {
2361                 err = mapping->a_ops->readpage(NULL, page);
2362                 wait_on_page_locked(page);
2363         } else {
2364                 unlock_page(page);
2365         }
2366         page_cache_release(page);
2367         if (err < 0)
2368                 goto fail;
2369         mark_inode_dirty(inode);
2370         return 0;
2371 fail_map:
2372         unlock_page(page);
2373         page_cache_release(page);
2374 fail:
2375         return err;
2376 }
2377
2378 struct inode_operations page_symlink_inode_operations = {
2379         .readlink       = generic_readlink,
2380         .follow_link    = page_follow_link_light,
2381         .put_link       = page_put_link,
2382 };
2383
2384 EXPORT_SYMBOL(__user_walk);
2385 EXPORT_SYMBOL(follow_down);
2386 EXPORT_SYMBOL(follow_up);
2387 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
2388 EXPORT_SYMBOL(getname);
2389 EXPORT_SYMBOL(lock_rename);
2390 EXPORT_SYMBOL(lookup_create);
2391 EXPORT_SYMBOL(lookup_hash);
2392 EXPORT_SYMBOL(lookup_one_len);
2393 EXPORT_SYMBOL(page_follow_link);
2394 EXPORT_SYMBOL(page_follow_link_light);
2395 EXPORT_SYMBOL(page_put_link);
2396 EXPORT_SYMBOL(page_readlink);
2397 EXPORT_SYMBOL(page_symlink);
2398 EXPORT_SYMBOL(page_symlink_inode_operations);
2399 EXPORT_SYMBOL(path_lookup);
2400 EXPORT_SYMBOL(path_release);
2401 EXPORT_SYMBOL(path_walk);
2402 EXPORT_SYMBOL(permission);
2403 EXPORT_SYMBOL(unlock_rename);
2404 EXPORT_SYMBOL(vfs_create);
2405 EXPORT_SYMBOL(vfs_follow_link);
2406 EXPORT_SYMBOL(vfs_link);
2407 EXPORT_SYMBOL(vfs_mkdir);
2408 EXPORT_SYMBOL(vfs_mknod);
2409 EXPORT_SYMBOL(vfs_permission);
2410 EXPORT_SYMBOL(vfs_readlink);
2411 EXPORT_SYMBOL(vfs_rename);
2412 EXPORT_SYMBOL(vfs_rmdir);
2413 EXPORT_SYMBOL(vfs_symlink);
2414 EXPORT_SYMBOL(vfs_unlink);
2415 EXPORT_SYMBOL(generic_readlink);