fs/namei.c

   1 /*
   2  *  linux/fs/namei.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  * Some corrections by tytso.
   9  */
  10
  11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12  * lookup logic.
  13  */
  14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
  15  */
  16
  17 #include <linux/init.h>
  18 #include <linux/module.h>
  19 #include <linux/slab.h>
  20 #include <linux/fs.h>
  21 #include <linux/namei.h>
  22 #include <linux/quotaops.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/fsnotify.h>
  25 #include <linux/smp_lock.h>
  26 #include <linux/personality.h>
  27 #include <linux/security.h>
  28 #include <linux/syscalls.h>
  29 #include <linux/mount.h>
  30 #include <linux/audit.h>
  31 #include <linux/capability.h>
  32 #include <linux/file.h>
  33 #include <linux/fcntl.h>
  34 #include <linux/namei.h>
  35 #include <linux/proc_fs.h>
  36 #include <linux/vs_base.h>
  37 #include <linux/vserver/inode.h>
  38 #include <linux/vserver/debug.h>
  39 #include <asm/namei.h>
  40 #include <asm/uaccess.h>
  41
  42 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  43
  44 /* [Feb-1997 T. Schoebel-Theuer]
  45  * Fundamental changes in the pathname lookup mechanisms (namei)
  46  * were necessary because of omirr.  The reason is that omirr needs
  47  * to know the _real_ pathname, not the user-supplied one, in case
  48  * of symlinks (and also when transname replacements occur).
  49  *
  50  * The new code replaces the old recursive symlink resolution with
  51  * an iterative one (in case of non-nested symlink chains).  It does
  52  * this with calls to <fs>_follow_link().
  53  * As a side effect, dir_namei(), _namei() and follow_link() are now
  54  * replaced with a single function lookup_dentry() that can handle all
  55  * the special cases of the former code.
  56  *
  57  * With the new dcache, the pathname is stored at each inode, at least as
  58  * long as the refcount of the inode is positive.  As a side effect, the
  59  * size of the dcache depends on the inode cache and thus is dynamic.
  60  *
  61  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  62  * resolution to correspond with current state of the code.
  63  *
  64  * Note that the symlink resolution is not *completely* iterative.
  65  * There is still a significant amount of tail- and mid- recursion in
  66  * the algorithm.  Also, note that <fs>_readlink() is not used in
  67  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  68  * may return different results than <fs>_follow_link().  Many virtual
  69  * filesystems (including /proc) exhibit this behavior.
  70  */
  71
  72 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  73  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  74  * and the name already exists in form of a symlink, try to create the new
  75  * name indicated by the symlink. The old code always complained that the
  76  * name already exists, due to not following the symlink even if its target
  77  * is nonexistent.  The new semantics affects also mknod() and link() when
  78  * the name is a symlink pointing to a non-existant name.
  79  *
  80  * I don't know which semantics is the right one, since I have no access
  81  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  82  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  83  * "old" one. Personally, I think the new semantics is much more logical.
  84  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  85  * file does succeed in both HP-UX and SunOs, but not in Solaris
  86  * and in the old Linux semantics.
  87  */
  88
  89 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  90  * semantics.  See the comments in "open_namei" and "do_link" below.
  91  *
  92  * [10-Sep-98 Alan Modra] Another symlink change.
  93  */
  94
  95 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
  96  *      inside the path - always follow.
  97  *      in the last component in creation/removal/renaming - never follow.
  98  *      if LOOKUP_FOLLOW passed - follow.
  99  *      if the pathname has trailing slashes - follow.
 100  *      otherwise - don't follow.
 101  * (applied in that order).
 102  *
 103  * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
 104  * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
 105  * During the 2.4 we need to fix the userland stuff depending on it -
 106  * hopefully we will be able to get rid of that wart in 2.5. So far only
 107  * XEmacs seems to be relying on it...
 108  */
 109 /*
 110  * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
 111  * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
 112  * any extra contention...
 113  */
 114
 115 /* In order to reduce some races, while at the same time doing additional
 116  * checking and hopefully speeding things up, we copy filenames to the
 117  * kernel data space before using them..
 118  *
 119  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 120  * PATH_MAX includes the nul terminator --RR.
 121  */
 122 static int do_getname(const char __user *filename, char *page)
 123 {
 124         int retval;
 125         unsigned long len = PATH_MAX;
 126
 127         if (!segment_eq(get_fs(), KERNEL_DS)) {
 128                 if ((unsigned long) filename >= TASK_SIZE)
 129                         return -EFAULT;
 130                 if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
 131                         len = TASK_SIZE - (unsigned long) filename;
 132         }
 133
 134         retval = strncpy_from_user(page, filename, len);
 135         if (retval > 0) {
 136                 if (retval < len)
 137                         return 0;
 138                 return -ENAMETOOLONG;
 139         } else if (!retval)
 140                 retval = -ENOENT;
 141         return retval;
 142 }
 143
 144 char * getname(const char __user * filename)
 145 {
 146         char *tmp, *result;
 147
 148         result = ERR_PTR(-ENOMEM);
 149         tmp = __getname();
 150         if (tmp)  {
 151                 int retval = do_getname(filename, tmp);
 152
 153                 result = tmp;
 154                 if (retval < 0) {
 155                         __putname(tmp);
 156                         result = ERR_PTR(retval);
 157                 }
 158         }
 159         audit_getname(result);
 160         return result;
 161 }
 162
 163 #ifdef CONFIG_AUDITSYSCALL
 164 void putname(const char *name)
 165 {
 166         if (unlikely(current->audit_context))
 167                 audit_putname(name);
 168         else
 169                 __putname(name);
 170 }
 171 EXPORT_SYMBOL(putname);
 172 #endif
 173
 174
 175 /**
 176  * generic_permission  -  check for access rights on a Posix-like filesystem
 177  * @inode:      inode to check access rights for
 178  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 179  * @check_acl:  optional callback to check for Posix ACLs
 180  *
 181  * Used to check for read/write/execute permissions on a file.
 182  * We use "fsuid" for this, letting us set arbitrary permissions
 183  * for filesystem access without changing the "normal" uids which
 184  * are used for other things..
 185  */
 186 int generic_permission(struct inode *inode, int mask,
 187                 int (*check_acl)(struct inode *inode, int mask))
 188 {
 189         umode_t                 mode = inode->i_mode;
 190
 191         if (current->fsuid == inode->i_uid)
 192                 mode >>= 6;
 193         else {
 194                 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
 195                         int error = check_acl(inode, mask);
 196                         if (error == -EACCES)
 197                                 goto check_capabilities;
 198                         else if (error != -EAGAIN)
 199                                 return error;
 200                 }
 201
 202                 if (in_group_p(inode->i_gid))
 203                         mode >>= 3;
 204         }
 205
 206         /*
 207          * If the DACs are ok we don't need any capability check.
 208          */
 209         if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
 210                 return 0;
 211
 212  check_capabilities:
 213         /*
 214          * Read/write DACs are always overridable.
 215          * Executable DACs are overridable if at least one exec bit is set.
 216          */
 217         if (!(mask & MAY_EXEC) ||
 218             (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
 219                 if (capable(CAP_DAC_OVERRIDE))
 220                         return 0;
 221
 222         /*
 223          * Searching includes executable on directories, else just read.
 224          */
 225         if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
 226                 if (capable(CAP_DAC_READ_SEARCH))
 227                         return 0;
 228
 229         return -EACCES;
 230 }
 231
 232 static inline int vx_barrier(struct inode *inode)
 233 {
 234         if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) {
 235                 vxwprintk(1, "xid=%d did hit the barrier.",
 236                         vx_current_xid());
 237                 return 1;
 238         }
 239         return 0;
 240 }
 241
 242 static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd)
 243 {
 244         if (vx_barrier(inode))
 245                 return -EACCES;
 246         if (inode->i_xid == 0)
 247                 return 0;
 248 #ifdef CONFIG_VSERVER_FILESHARING
 249         /* MEF: PlanetLab FS module assumes that any file that can be
 250          * named (e.g., via a cross mount) is not hidden from another
 251          * context or the admin context.
 252          */
 253         if (vx_check(inode->i_xid,VX_STATIC|VX_DYNAMIC))
 254                 return 0;
 255 #endif
 256         if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
 257                 return 0;
 258
 259         vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.",
 260                 vx_current_xid(), inode, inode->i_xid, inode->i_ino,
 261                 vxd_cond_path(nd));
 262         return -EACCES;
 263 }
 264
 265 int permission(struct inode *inode, int mask, struct nameidata *nd)
 266 {
 267         int retval, submask;
 268
 269         if (mask & MAY_WRITE) {
 270                 umode_t mode = inode->i_mode;
 271
 272                 /*
 273                  * Nobody gets write access to a read-only fs.
 274                  */
 275                 if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) &&
 276                     (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 277                         return -EROFS;
 278
 279                 /*
 280                  * Nobody gets write access to an immutable file.
 281                  */
 282                 if (IS_IMMUTABLE(inode))
 283                         return -EACCES;
 284         }
 285
 286
 287         /* Ordinary permission routines do not understand MAY_APPEND. */
 288         submask = mask & ~MAY_APPEND;
 289         if ((retval = xid_permission(inode, mask, nd)))
 290                 return retval;
 291         if (inode->i_op && inode->i_op->permission)
 292                 retval = inode->i_op->permission(inode, submask, nd);
 293         else
 294                 retval = generic_permission(inode, submask, NULL);
 295         if (retval)
 296                 return retval;
 297
 298         return security_inode_permission(inode, mask, nd);
 299 }
 300
 301 /**
 302  * vfs_permission  -  check for access rights to a given path
 303  * @nd:         lookup result that describes the path
 304  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 305  *
 306  * Used to check for read/write/execute permissions on a path.
 307  * We use "fsuid" for this, letting us set arbitrary permissions
 308  * for filesystem access without changing the "normal" uids which
 309  * are used for other things.
 310  */
 311 int vfs_permission(struct nameidata *nd, int mask)
 312 {
 313         return permission(nd->dentry->d_inode, mask, nd);
 314 }
 315
 316 /**
 317  * file_permission  -  check for additional access rights to a given file
 318  * @file:       file to check access rights for
 319  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 320  *
 321  * Used to check for read/write/execute permissions on an already opened
 322  * file.
 323  *
 324  * Note:
 325  *      Do not use this function in new code.  All access checks should
 326  *      be done using vfs_permission().
 327  */
 328 int file_permission(struct file *file, int mask)
 329 {
 330         return permission(file->f_dentry->d_inode, mask, NULL);
 331 }
 332
 333 /*
 334  * get_write_access() gets write permission for a file.
 335  * put_write_access() releases this write permission.
 336  * This is used for regular files.
 337  * We cannot support write (and maybe mmap read-write shared) accesses and
 338  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
 339  * can have the following values:
 340  * 0: no writers, no VM_DENYWRITE mappings
 341  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
 342  * > 0: (i_writecount) users are writing to the file.
 343  *
 344  * Normally we operate on that counter with atomic_{inc,dec} and it's safe
 345  * except for the cases where we don't hold i_writecount yet. Then we need to
 346  * use {get,deny}_write_access() - these functions check the sign and refuse
 347  * to do the change if sign is wrong. Exclusion between them is provided by
 348  * the inode->i_lock spinlock.
 349  */
 350
 351 int get_write_access(struct inode * inode)
 352 {
 353         spin_lock(&inode->i_lock);
 354         if (atomic_read(&inode->i_writecount) < 0) {
 355                 spin_unlock(&inode->i_lock);
 356                 return -ETXTBSY;
 357         }
 358         atomic_inc(&inode->i_writecount);
 359         spin_unlock(&inode->i_lock);
 360
 361         return 0;
 362 }
 363
 364 int deny_write_access(struct file * file)
 365 {
 366         struct inode *inode = file->f_dentry->d_inode;
 367
 368         spin_lock(&inode->i_lock);
 369         if (atomic_read(&inode->i_writecount) > 0) {
 370                 spin_unlock(&inode->i_lock);
 371                 return -ETXTBSY;
 372         }
 373         atomic_dec(&inode->i_writecount);
 374         spin_unlock(&inode->i_lock);
 375
 376         return 0;
 377 }
 378
 379 void path_release(struct nameidata *nd)
 380 {
 381         dput(nd->dentry);
 382         mntput(nd->mnt);
 383 }
 384
 385 /*
 386  * umount() mustn't call path_release()/mntput() as that would clear
 387  * mnt_expiry_mark
 388  */
 389 void path_release_on_umount(struct nameidata *nd)
 390 {
 391         dput(nd->dentry);
 392         mntput_no_expire(nd->mnt);
 393 }
 394
 395 /**
 396  * release_open_intent - free up open intent resources
 397  * @nd: pointer to nameidata
 398  */
 399 void release_open_intent(struct nameidata *nd)
 400 {
 401         if (nd->intent.open.file->f_dentry == NULL)
 402                 put_filp(nd->intent.open.file);
 403         else
 404                 fput(nd->intent.open.file);
 405 }
 406
 407 /*
 408  * Internal lookup() using the new generic dcache.
 409  * SMP-safe
 410  */
 411 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
 412 {
 413         struct dentry * dentry = __d_lookup(parent, name);
 414
 415         /* lockess __d_lookup may fail due to concurrent d_move()
 416          * in some unrelated directory, so try with d_lookup
 417          */
 418         if (!dentry)
 419                 dentry = d_lookup(parent, name);
 420
 421         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 422                 if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
 423                         dput(dentry);
 424                         dentry = NULL;
 425                 }
 426         }
 427         return dentry;
 428 }
 429
 430 /*
 431  * Short-cut version of permission(), for calling by
 432  * path_walk(), when dcache lock is held.  Combines parts
 433  * of permission() and generic_permission(), and tests ONLY for
 434  * MAY_EXEC permission.
 435  *
 436  * If appropriate, check DAC only.  If not appropriate, or
 437  * short-cut DAC fails, then call permission() to do more
 438  * complete permission check.
 439  */
 440 static int exec_permission_lite(struct inode *inode,
 441                                        struct nameidata *nd)
 442 {
 443         umode_t mode = inode->i_mode;
 444
 445         if (vx_barrier(inode))
 446                 return -EACCES;
 447         if (inode->i_op && inode->i_op->permission)
 448                 return -EAGAIN;
 449
 450         if (current->fsuid == inode->i_uid)
 451                 mode >>= 6;
 452         else if (in_group_p(inode->i_gid))
 453                 mode >>= 3;
 454
 455         if (mode & MAY_EXEC)
 456                 goto ok;
 457
 458         if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
 459                 goto ok;
 460
 461         if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
 462                 goto ok;
 463
 464         if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
 465                 goto ok;
 466
 467         return -EACCES;
 468 ok:
 469         return security_inode_permission(inode, MAY_EXEC, nd);
 470 }
 471
 472 /*
 473  * This is called when everything else fails, and we actually have
 474  * to go to the low-level filesystem to find out what we should do..
 475  *
 476  * We get the directory semaphore, and after getting that we also
 477  * make sure that nobody added the entry to the dcache in the meantime..
 478  * SMP-safe
 479  */
 480 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
 481 {
 482         struct dentry * result;
 483         struct inode *dir = parent->d_inode;
 484
 485         mutex_lock(&dir->i_mutex);
 486         /*
 487          * First re-do the cached lookup just in case it was created
 488          * while we waited for the directory semaphore..
 489          *
 490          * FIXME! This could use version numbering or similar to
 491          * avoid unnecessary cache lookups.
 492          *
 493          * The "dcache_lock" is purely to protect the RCU list walker
 494          * from concurrent renames at this point (we mustn't get false
 495          * negatives from the RCU list walk here, unlike the optimistic
 496          * fast walk).
 497          *
 498          * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
 499          */
 500         result = d_lookup(parent, name);
 501         if (!result) {
 502                 struct dentry * dentry = d_alloc(parent, name);
 503                 result = ERR_PTR(-ENOMEM);
 504                 if (dentry) {
 505                         result = dir->i_op->lookup(dir, dentry, nd);
 506                         if (result)
 507                                 dput(dentry);
 508                         else
 509                                 result = dentry;
 510                 }
 511                 mutex_unlock(&dir->i_mutex);
 512                 return result;
 513         }
 514
 515         /*
 516          * Uhhuh! Nasty case: the cache was re-populated while
 517          * we waited on the semaphore. Need to revalidate.
 518          */
 519         mutex_unlock(&dir->i_mutex);
 520         if (result->d_op && result->d_op->d_revalidate) {
 521                 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
 522                         dput(result);
 523                         result = ERR_PTR(-ENOENT);
 524                 }
 525         }
 526         return result;
 527 }
 528
 529 static int __emul_lookup_dentry(const char *, struct nameidata *);
 530
 531 /* SMP-safe */
 532 static __always_inline int
 533 walk_init_root(const char *name, struct nameidata *nd)
 534 {
 535         read_lock(&current->fs->lock);
 536         if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
 537                 nd->mnt = mntget(current->fs->altrootmnt);
 538                 nd->dentry = dget(current->fs->altroot);
 539                 read_unlock(&current->fs->lock);
 540                 if (__emul_lookup_dentry(name,nd))
 541                         return 0;
 542                 read_lock(&current->fs->lock);
 543         }
 544         nd->mnt = mntget(current->fs->rootmnt);
 545         nd->dentry = dget(current->fs->root);
 546         read_unlock(&current->fs->lock);
 547         return 1;
 548 }
 549
 550 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
 551 {
 552         int res = 0;
 553         char *name;
 554         if (IS_ERR(link))
 555                 goto fail;
 556
 557         if (*link == '/') {
 558                 path_release(nd);
 559                 if (!walk_init_root(link, nd))
 560                         /* weird __emul_prefix() stuff did it */
 561                         goto out;
 562         }
 563         res = link_path_walk(link, nd);
 564 out:
 565         if (nd->depth || res || nd->last_type!=LAST_NORM)
 566                 return res;
 567         /*
 568          * If it is an iterative symlinks resolution in open_namei() we
 569          * have to copy the last component. And all that crap because of
 570          * bloody create() on broken symlinks. Furrfu...
 571          */
 572         name = __getname();
 573         if (unlikely(!name)) {
 574                 path_release(nd);
 575                 return -ENOMEM;
 576         }
 577         strcpy(name, nd->last.name);
 578         nd->last.name = name;
 579         return 0;
 580 fail:
 581         path_release(nd);
 582         return PTR_ERR(link);
 583 }
 584
 585 struct path {
 586         struct vfsmount *mnt;
 587         struct dentry *dentry;
 588 };
 589
 590 static inline void dput_path(struct path *path, struct nameidata *nd)
 591 {
 592         dput(path->dentry);
 593         if (path->mnt != nd->mnt)
 594                 mntput(path->mnt);
 595 }
 596
 597 static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
 598 {
 599         dput(nd->dentry);
 600         if (nd->mnt != path->mnt)
 601                 mntput(nd->mnt);
 602         nd->mnt = path->mnt;
 603         nd->dentry = path->dentry;
 604 }
 605
 606 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
 607 {
 608         int error;
 609         void *cookie;
 610         struct dentry *dentry = path->dentry;
 611
 612         touch_atime(path->mnt, dentry);
 613         nd_set_link(nd, NULL);
 614
 615         if (path->mnt != nd->mnt) {
 616                 path_to_nameidata(path, nd);
 617                 dget(dentry);
 618         }
 619         mntget(path->mnt);
 620         cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
 621         error = PTR_ERR(cookie);
 622         if (!IS_ERR(cookie)) {
 623                 char *s = nd_get_link(nd);
 624                 error = 0;
 625                 if (s)
 626                         error = __vfs_follow_link(nd, s);
 627                 if (dentry->d_inode->i_op->put_link)
 628                         dentry->d_inode->i_op->put_link(dentry, nd, cookie);
 629         }
 630         dput(dentry);
 631         mntput(path->mnt);
 632
 633         return error;
 634 }
 635
 636 /*
 637  * This limits recursive symlink follows to 8, while
 638  * limiting consecutive symlinks to 40.
 639  *
 640  * Without that kind of total limit, nasty chains of consecutive
 641  * symlinks can cause almost arbitrarily long lookups.
 642  */
 643 static inline int do_follow_link(struct path *path, struct nameidata *nd)
 644 {
 645         int err = -ELOOP;
 646         if (current->link_count >= MAX_NESTED_LINKS)
 647                 goto loop;
 648         if (current->total_link_count >= 40)
 649                 goto loop;
 650         BUG_ON(nd->depth >= MAX_NESTED_LINKS);
 651         cond_resched();
 652         err = security_inode_follow_link(path->dentry, nd);
 653         if (err)
 654                 goto loop;
 655         current->link_count++;
 656         current->total_link_count++;
 657         nd->depth++;
 658         err = __do_follow_link(path, nd);
 659         current->link_count--;
 660         nd->depth--;
 661         return err;
 662 loop:
 663         dput_path(path, nd);
 664         path_release(nd);
 665         return err;
 666 }
 667
 668 int follow_up(struct vfsmount **mnt, struct dentry **dentry)
 669 {
 670         struct vfsmount *parent;
 671         struct dentry *mountpoint;
 672         spin_lock(&vfsmount_lock);
 673         parent=(*mnt)->mnt_parent;
 674         if (parent == *mnt) {
 675                 spin_unlock(&vfsmount_lock);
 676                 return 0;
 677         }
 678         mntget(parent);
 679         mountpoint=dget((*mnt)->mnt_mountpoint);
 680         spin_unlock(&vfsmount_lock);
 681         dput(*dentry);
 682         *dentry = mountpoint;
 683         mntput(*mnt);
 684         *mnt = parent;
 685         return 1;
 686 }
 687
 688 /* no need for dcache_lock, as serialization is taken care in
 689  * namespace.c
 690  */
 691 static int __follow_mount(struct path *path)
 692 {
 693         int res = 0;
 694         while (d_mountpoint(path->dentry)) {
 695                 struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
 696                 if (!mounted)
 697                         break;
 698                 dput(path->dentry);
 699                 if (res)
 700                         mntput(path->mnt);
 701                 path->mnt = mounted;
 702                 path->dentry = dget(mounted->mnt_root);
 703                 res = 1;
 704         }
 705         return res;
 706 }
 707
 708 static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
 709 {
 710         while (d_mountpoint(*dentry)) {
 711                 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
 712                 if (!mounted)
 713                         break;
 714                 dput(*dentry);
 715                 mntput(*mnt);
 716                 *mnt = mounted;
 717                 *dentry = dget(mounted->mnt_root);
 718         }
 719 }
 720
 721 /* no need for dcache_lock, as serialization is taken care in
 722  * namespace.c
 723  */
 724 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 725 {
 726         struct vfsmount *mounted;
 727
 728         mounted = lookup_mnt(*mnt, *dentry);
 729         if (mounted) {
 730                 dput(*dentry);
 731                 mntput(*mnt);
 732                 *mnt = mounted;
 733                 *dentry = dget(mounted->mnt_root);
 734                 return 1;
 735         }
 736         return 0;
 737 }
 738
 739 static __always_inline void follow_dotdot(struct nameidata *nd)
 740 {
 741         while(1) {
 742                 struct vfsmount *parent;
 743                 struct dentry *old = nd->dentry;
 744
 745                 read_lock(&current->fs->lock);
 746                 if (nd->dentry == current->fs->root &&
 747                     nd->mnt == current->fs->rootmnt) {
 748                         read_unlock(&current->fs->lock);
 749                         /* for sane '/' avoid follow_mount() */
 750                         return;
 751                 }
 752                 read_unlock(&current->fs->lock);
 753                 spin_lock(&dcache_lock);
 754                 if (nd->dentry != nd->mnt->mnt_root) {
 755                         nd->dentry = dget(nd->dentry->d_parent);
 756                         spin_unlock(&dcache_lock);
 757                         dput(old);
 758                         break;
 759                 }
 760                 spin_unlock(&dcache_lock);
 761                 spin_lock(&vfsmount_lock);
 762                 parent = nd->mnt->mnt_parent;
 763                 if (parent == nd->mnt) {
 764                         spin_unlock(&vfsmount_lock);
 765                         break;
 766                 }
 767                 mntget(parent);
 768                 nd->dentry = dget(nd->mnt->mnt_mountpoint);
 769                 spin_unlock(&vfsmount_lock);
 770                 dput(old);
 771                 mntput(nd->mnt);
 772                 nd->mnt = parent;
 773         }
 774         follow_mount(&nd->mnt, &nd->dentry);
 775 }
 776
 777 /*
 778  *  It's more convoluted than I'd like it to be, but... it's still fairly
 779  *  small and for now I'd prefer to have fast path as straight as possible.
 780  *  It _is_ time-critical.
 781  */
 782 static int do_lookup(struct nameidata *nd, struct qstr *name,
 783                      struct path *path, int atomic)
 784 {
 785         struct vfsmount *mnt = nd->mnt;
 786         struct dentry *dentry = __d_lookup(nd->dentry, name);
 787         struct inode *inode;
 788
 789         if (!dentry)
 790                 goto need_lookup;
 791         if (dentry->d_op && dentry->d_op->d_revalidate)
 792                 goto need_revalidate;
 793         inode = dentry->d_inode;
 794         if (!inode)
 795                 goto done;
 796 #ifdef CONFIG_VSERVER_FILESHARING
 797         /* MEF: PlanetLab FS module assumes that any file that can be
 798          * named (e.g., via a cross mount) is not hidden from another
 799          * context or the admin context.
 800          */
 801         if (vx_check(inode->i_xid,VX_STATIC|VX_DYNAMIC|VX_ADMIN)) {
 802                 /* do nothing */
 803         }
 804         else /* do the following check */
 805 #endif
 806         if (!vx_check(inode->i_xid, VX_WATCH|VX_ADMIN|VX_HOSTID|VX_IDENT))
 807                 goto hidden;
 808         if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
 809                 struct proc_dir_entry *de = PDE(inode);
 810
 811                 if (de && !vx_hide_check(0, de->vx_flags))
 812                         goto hidden;
 813         }
 814 done:
 815         path->mnt = mnt;
 816         path->dentry = dentry;
 817         __follow_mount(path);
 818         return 0;
 819 hidden:
 820         vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.",
 821                 vx_current_xid(), inode, inode->i_xid, inode->i_ino,
 822                 vxd_path(dentry, mnt));
 823         dput(dentry);
 824         return -ENOENT;
 825
 826 need_lookup:
 827         if (atomic)
 828                 return -EWOULDBLOCKIO;
 829         dentry = real_lookup(nd->dentry, name, nd);
 830         if (IS_ERR(dentry))
 831                 goto fail;
 832         goto done;
 833
 834 need_revalidate:
 835         if (atomic)
 836                 return -EWOULDBLOCKIO;
 837         if (dentry->d_op->d_revalidate(dentry, nd))
 838                 goto done;
 839         if (d_invalidate(dentry))
 840                 goto done;
 841         dput(dentry);
 842         goto need_lookup;
 843
 844 fail:
 845         return PTR_ERR(dentry);
 846 }
 847
 848 /*
 849  * Name resolution.
 850  * This is the basic name resolution function, turning a pathname into
 851  * the final dentry. We expect 'base' to be positive and a directory.
 852  *
 853  * Returns 0 and nd will have valid dentry and mnt on success.
 854  * Returns error and drops reference to input namei data on failure.
 855  */
 856 static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 857 {
 858         struct path next;
 859         struct inode *inode;
 860         int err, atomic;
 861         unsigned int lookup_flags = nd->flags;
 862
 863         atomic = (lookup_flags & LOOKUP_ATOMIC);
 864
 865         while (*name=='/')
 866                 name++;
 867         if (!*name)
 868                 goto return_reval;
 869
 870         inode = nd->dentry->d_inode;
 871         if (nd->depth)
 872                 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
 873
 874         /* At this point we know we have a real path component. */
 875         for(;;) {
 876                 unsigned long hash;
 877                 struct qstr this;
 878                 unsigned int c;
 879
 880                 nd->flags |= LOOKUP_CONTINUE;
 881                 err = exec_permission_lite(inode, nd);
 882                 if (err == -EAGAIN)
 883                         err = vfs_permission(nd, MAY_EXEC);
 884                 if (err)
 885                         break;
 886
 887                 this.name = name;
 888                 c = *(const unsigned char *)name;
 889
 890                 hash = init_name_hash();
 891                 do {
 892                         name++;
 893                         hash = partial_name_hash(c, hash);
 894                         c = *(const unsigned char *)name;
 895                 } while (c && (c != '/'));
 896                 this.len = name - (const char *) this.name;
 897                 this.hash = end_name_hash(hash);
 898
 899                 /* remove trailing slashes? */
 900                 if (!c)
 901                         goto last_component;
 902                 while (*++name == '/');
 903                 if (!*name)
 904                         goto last_with_slashes;
 905
 906                 /*
 907                  * "." and ".." are special - ".." especially so because it has
 908                  * to be able to know about the current root directory and
 909                  * parent relationships.
 910                  */
 911                 if (this.name[0] == '.') switch (this.len) {
 912                         default:
 913                                 break;
 914                         case 2:
 915                                 if (this.name[1] != '.')
 916                                         break;
 917                                 follow_dotdot(nd);
 918                                 inode = nd->dentry->d_inode;
 919                                 /* fallthrough */
 920                         case 1:
 921                                 continue;
 922                 }
 923                 /*
 924                  * See if the low-level filesystem might want
 925                  * to use its own hash..
 926                  */
 927                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 928                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 929                         if (err < 0)
 930                                 break;
 931                 }
 932                 /* This does the actual lookups.. */
 933                 err = do_lookup(nd, &this, &next, atomic);
 934                 if (err)
 935                         break;
 936
 937                 err = -ENOENT;
 938                 inode = next.dentry->d_inode;
 939                 if (!inode)
 940                         goto out_dput;
 941                 err = -ENOTDIR;
 942                 if (!inode->i_op)
 943                         goto out_dput;
 944
 945                 if (inode->i_op->follow_link) {
 946                         err = do_follow_link(&next, nd);
 947                         if (err)
 948                                 goto return_err;
 949                         err = -ENOENT;
 950                         inode = nd->dentry->d_inode;
 951                         if (!inode)
 952                                 break;
 953                         err = -ENOTDIR;
 954                         if (!inode->i_op)
 955                                 break;
 956                 } else
 957                         path_to_nameidata(&next, nd);
 958                 err = -ENOTDIR;
 959                 if (!inode->i_op->lookup)
 960                         break;
 961                 continue;
 962                 /* here ends the main loop */
 963
 964 last_with_slashes:
 965                 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 966 last_component:
 967                 /* Clear LOOKUP_CONTINUE iff it was previously unset */
 968                 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
 969                 if (lookup_flags & LOOKUP_PARENT)
 970                         goto lookup_parent;
 971                 if (this.name[0] == '.') switch (this.len) {
 972                         default:
 973                                 break;
 974                         case 2:
 975                                 if (this.name[1] != '.')
 976                                         break;
 977                                 follow_dotdot(nd);
 978                                 inode = nd->dentry->d_inode;
 979                                 /* fallthrough */
 980                         case 1:
 981                                 goto return_reval;
 982                 }
 983                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
 984                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
 985                         if (err < 0)
 986                                 break;
 987                 }
 988                 err = do_lookup(nd, &this, &next, atomic);
 989                 if (err)
 990                         break;
 991                 inode = next.dentry->d_inode;
 992                 if ((lookup_flags & LOOKUP_FOLLOW)
 993                     && inode && inode->i_op && inode->i_op->follow_link) {
 994                         err = do_follow_link(&next, nd);
 995                         if (err)
 996                                 goto return_err;
 997                         inode = nd->dentry->d_inode;
 998                 } else
 999                         path_to_nameidata(&next, nd);
1000                 err = -ENOENT;
1001                 if (!inode)
1002                         break;
1003                 if (lookup_flags & LOOKUP_DIRECTORY) {
1004                         err = -ENOTDIR;
1005                         if (!inode->i_op || !inode->i_op->lookup)
1006                                 break;
1007                 }
1008                 goto return_base;
1009 lookup_parent:
1010                 nd->last = this;
1011                 nd->last_type = LAST_NORM;
1012                 if (this.name[0] != '.')
1013                         goto return_base;
1014                 if (this.len == 1)
1015                         nd->last_type = LAST_DOT;
1016                 else if (this.len == 2 && this.name[1] == '.')
1017                         nd->last_type = LAST_DOTDOT;
1018                 else
1019                         goto return_base;
1020 return_reval:
1021                 /*
1022                  * We bypassed the ordinary revalidation routines.
1023                  * We may need to check the cached dentry for staleness.
1024                  */
1025                 if (nd->dentry && nd->dentry->d_sb &&
1026                     (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
1027                         err = -ESTALE;
1028                         /* Note: we do not d_invalidate() */
1029                         if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
1030                                 break;
1031                 }
1032 return_base:
1033                 return 0;
1034 out_dput:
1035                 dput_path(&next, nd);
1036                 break;
1037         }
1038         path_release(nd);
1039 return_err:
1040         return err;
1041 }
1042
1043 /*
1044  * Wrapper to retry pathname resolution whenever the underlying
1045  * file system returns an ESTALE.
1046  *
1047  * Retry the whole path once, forcing real lookup requests
1048  * instead of relying on the dcache.
1049  */
1050 int fastcall link_path_walk(const char *name, struct nameidata *nd)
1051 {
1052         struct nameidata save = *nd;
1053         int result;
1054
1055         /* make sure the stuff we saved doesn't go away */
1056         dget(save.dentry);
1057         mntget(save.mnt);
1058
1059         result = __link_path_walk(name, nd);
1060         if (result == -ESTALE) {
1061                 *nd = save;
1062                 dget(nd->dentry);
1063                 mntget(nd->mnt);
1064                 nd->flags |= LOOKUP_REVAL;
1065                 result = __link_path_walk(name, nd);
1066         }
1067
1068         dput(save.dentry);
1069         mntput(save.mnt);
1070
1071         return result;
1072 }
1073
1074 int fastcall path_walk(const char * name, struct nameidata *nd)
1075 {
1076         current->total_link_count = 0;
1077         return link_path_walk(name, nd);
1078 }
1079
1080 /*
1081  * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if
1082  * everything is done. Returns 0 and drops input nd, if lookup failed;
1083  */
1084 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
1085 {
1086         if (path_walk(name, nd))
1087                 return 0;               /* something went wrong... */
1088
1089         if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
1090                 struct dentry *old_dentry = nd->dentry;
1091                 struct vfsmount *old_mnt = nd->mnt;
1092                 struct qstr last = nd->last;
1093                 int last_type = nd->last_type;
1094                 /*
1095                  * NAME was not found in alternate root or it's a directory.  Try to find
1096                  * it in the normal root:
1097                  */
1098                 nd->last_type = LAST_ROOT;
1099                 read_lock(&current->fs->lock);
1100                 nd->mnt = mntget(current->fs->rootmnt);
1101                 nd->dentry = dget(current->fs->root);
1102                 read_unlock(&current->fs->lock);
1103                 if (path_walk(name, nd) == 0) {
1104                         if (nd->dentry->d_inode) {
1105                                 dput(old_dentry);
1106                                 mntput(old_mnt);
1107                                 return 1;
1108                         }
1109                         path_release(nd);
1110                 }
1111                 nd->dentry = old_dentry;
1112                 nd->mnt = old_mnt;
1113                 nd->last = last;
1114                 nd->last_type = last_type;
1115         }
1116         return 1;
1117 }
1118
1119 void set_fs_altroot(void)
1120 {
1121         char *emul = __emul_prefix();
1122         struct nameidata nd;
1123         struct vfsmount *mnt = NULL, *oldmnt;
1124         struct dentry *dentry = NULL, *olddentry;
1125         int err;
1126
1127         if (!emul)
1128                 goto set_it;
1129         err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
1130         if (!err) {
1131                 mnt = nd.mnt;
1132                 dentry = nd.dentry;
1133         }
1134 set_it:
1135         write_lock(&current->fs->lock);
1136         oldmnt = current->fs->altrootmnt;
1137         olddentry = current->fs->altroot;
1138         current->fs->altrootmnt = mnt;
1139         current->fs->altroot = dentry;
1140         write_unlock(&current->fs->lock);
1141         if (olddentry) {
1142                 dput(olddentry);
1143                 mntput(oldmnt);
1144         }
1145 }
1146
1147 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1148 static int fastcall do_path_lookup(int dfd, const char *name,
1149                                 unsigned int flags, struct nameidata *nd)
1150 {
1151         int retval = 0;
1152         int fput_needed;
1153         struct file *file;
1154
1155         nd->last_type = LAST_ROOT; /* if there are only slashes... */
1156         nd->flags = flags;
1157         nd->depth = 0;
1158
1159         if (*name=='/') {
1160                 read_lock(&current->fs->lock);
1161                 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
1162                         nd->mnt = mntget(current->fs->altrootmnt);
1163                         nd->dentry = dget(current->fs->altroot);
1164                         read_unlock(&current->fs->lock);
1165                         if (__emul_lookup_dentry(name,nd))
1166                                 goto out; /* found in altroot */
1167                         read_lock(&current->fs->lock);
1168                 }
1169                 nd->mnt = mntget(current->fs->rootmnt);
1170                 nd->dentry = dget(current->fs->root);
1171                 read_unlock(&current->fs->lock);
1172         } else if (dfd == AT_FDCWD) {
1173                 read_lock(&current->fs->lock);
1174                 nd->mnt = mntget(current->fs->pwdmnt);
1175                 nd->dentry = dget(current->fs->pwd);
1176                 read_unlock(&current->fs->lock);
1177         } else {
1178                 struct dentry *dentry;
1179
1180                 file = fget_light(dfd, &fput_needed);
1181                 retval = -EBADF;
1182                 if (!file)
1183                         goto out_fail;
1184
1185                 dentry = file->f_dentry;
1186
1187                 retval = -ENOTDIR;
1188                 if (!S_ISDIR(dentry->d_inode->i_mode))
1189                         goto fput_fail;
1190
1191                 retval = file_permission(file, MAY_EXEC);
1192                 if (retval)
1193                         goto fput_fail;
1194
1195                 nd->mnt = mntget(file->f_vfsmnt);
1196                 nd->dentry = dget(dentry);
1197
1198                 fput_light(file, fput_needed);
1199         }
1200         current->total_link_count = 0;
1201         retval = link_path_walk(name, nd);
1202 out:
1203         if (likely(retval == 0)) {
1204                 if (unlikely(current->audit_context && nd && nd->dentry &&
1205                                 nd->dentry->d_inode))
1206                 audit_inode(name, nd->dentry->d_inode, flags);
1207         }
1208 out_fail:
1209         return retval;
1210
1211 fput_fail:
1212         fput_light(file, fput_needed);
1213         goto out_fail;
1214 }
1215
1216 int fastcall path_lookup(const char *name, unsigned int flags,
1217                         struct nameidata *nd)
1218 {
1219         return do_path_lookup(AT_FDCWD, name, flags, nd);
1220 }
1221
1222 static int __path_lookup_intent_open(int dfd, const char *name,
1223                 unsigned int lookup_flags, struct nameidata *nd,
1224                 int open_flags, int create_mode)
1225 {
1226         struct file *filp = get_empty_filp();
1227         int err;
1228
1229         if (filp == NULL)
1230                 return -ENFILE;
1231         nd->intent.open.file = filp;
1232         nd->intent.open.flags = open_flags;
1233         nd->intent.open.create_mode = create_mode;
1234         err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1235         if (IS_ERR(nd->intent.open.file)) {
1236                 if (err == 0) {
1237                         err = PTR_ERR(nd->intent.open.file);
1238                         path_release(nd);
1239                 }
1240         } else if (err != 0)
1241                 release_open_intent(nd);
1242         return err;
1243 }
1244
1245 /**
1246  * path_lookup_open - lookup a file path with open intent
1247  * @dfd: the directory to use as base, or AT_FDCWD
1248  * @name: pointer to file name
1249  * @lookup_flags: lookup intent flags
1250  * @nd: pointer to nameidata
1251  * @open_flags: open intent flags
1252  */
1253 int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
1254                 struct nameidata *nd, int open_flags)
1255 {
1256         return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
1257                         open_flags, 0);
1258 }
1259
1260 /**
1261  * path_lookup_create - lookup a file path with open + create intent
1262  * @dfd: the directory to use as base, or AT_FDCWD
1263  * @name: pointer to file name
1264  * @lookup_flags: lookup intent flags
1265  * @nd: pointer to nameidata
1266  * @open_flags: open intent flags
1267  * @create_mode: create intent flags
1268  */
1269 static int path_lookup_create(int dfd, const char *name,
1270                               unsigned int lookup_flags, struct nameidata *nd,
1271                               int open_flags, int create_mode)
1272 {
1273         return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
1274                         nd, open_flags, create_mode);
1275 }
1276
1277 int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
1278                 struct nameidata *nd, int open_flags)
1279 {
1280         char *tmp = getname(name);
1281         int err = PTR_ERR(tmp);
1282
1283         if (!IS_ERR(tmp)) {
1284                 err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
1285                 putname(tmp);
1286         }
1287         return err;
1288 }
1289
1290 /*
1291  * Restricted form of lookup. Doesn't follow links, single-component only,
1292  * needs parent already locked. Doesn't follow mounts.
1293  * SMP-safe.
1294  */
1295 static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd)
1296 {
1297         struct dentry * dentry;
1298         struct inode *inode;
1299         int err;
1300
1301         inode = base->d_inode;
1302         err = permission(inode, MAY_EXEC, nd);
1303         dentry = ERR_PTR(err);
1304         if (err)
1305                 goto out;
1306
1307         /*
1308          * See if the low-level filesystem might want
1309          * to use its own hash..
1310          */
1311         if (base->d_op && base->d_op->d_hash) {
1312                 err = base->d_op->d_hash(base, name);
1313                 dentry = ERR_PTR(err);
1314                 if (err < 0)
1315                         goto out;
1316         }
1317
1318         dentry = cached_lookup(base, name, nd);
1319         if (!dentry) {
1320                 struct dentry *new = d_alloc(base, name);
1321                 dentry = ERR_PTR(-ENOMEM);
1322                 if (!new)
1323                         goto out;
1324                 dentry = inode->i_op->lookup(inode, new, nd);
1325                 if (!dentry)
1326                         dentry = new;
1327                 else
1328                         dput(new);
1329         }
1330 out:
1331         return dentry;
1332 }
1333
1334 static struct dentry *lookup_hash(struct nameidata *nd)
1335 {
1336         return __lookup_hash(&nd->last, nd->dentry, nd);
1337 }
1338
1339 /* SMP-safe */
1340 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
1341 {
1342         unsigned long hash;
1343         struct qstr this;
1344         unsigned int c;
1345
1346         this.name = name;
1347         this.len = len;
1348         if (!len)
1349                 goto access;
1350
1351         hash = init_name_hash();
1352         while (len--) {
1353                 c = *(const unsigned char *)name++;
1354                 if (c == '/' || c == '\0')
1355                         goto access;
1356                 hash = partial_name_hash(c, hash);
1357         }
1358         this.hash = end_name_hash(hash);
1359
1360         return __lookup_hash(&this, base, NULL);
1361 access:
1362         return ERR_PTR(-EACCES);
1363 }
1364
1365 /*
1366  *      namei()
1367  *
1368  * is used by most simple commands to get the inode of a specified name.
1369  * Open, link etc use their own routines, but this is enough for things
1370  * like 'chmod' etc.
1371  *
1372  * namei exists in two versions: namei/lnamei. The only difference is
1373  * that namei follows links, while lnamei does not.
1374  * SMP-safe
1375  */
1376 int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags,
1377                             struct nameidata *nd)
1378 {
1379         char *tmp = getname(name);
1380         int err = PTR_ERR(tmp);
1381
1382         if (!IS_ERR(tmp)) {
1383                 err = do_path_lookup(dfd, tmp, flags, nd);
1384                 putname(tmp);
1385         }
1386         return err;
1387 }
1388
1389 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
1390 {
1391         return __user_walk_fd(AT_FDCWD, name, flags, nd);
1392 }
1393
1394 /*
1395  * It's inline, so penalty for filesystems that don't use sticky bit is
1396  * minimal.
1397  */
1398 static inline int check_sticky(struct inode *dir, struct inode *inode)
1399 {
1400         if (!(dir->i_mode & S_ISVTX))
1401                 return 0;
1402         if (inode->i_uid == current->fsuid)
1403                 return 0;
1404         if (dir->i_uid == current->fsuid)
1405                 return 0;
1406         return !capable(CAP_FOWNER);
1407 }
1408
1409 /*
1410  *      Check whether we can remove a link victim from directory dir, check
1411  *  whether the type of victim is right.
1412  *  1. We can't do it if dir is read-only (done in permission())
1413  *  2. We should have write and exec permissions on dir
1414  *  3. We can't remove anything from append-only dir
1415  *  4. We can't do anything with immutable dir (done in permission())
1416  *  5. If the sticky bit on dir is set we should either
1417  *      a. be owner of dir, or
1418  *      b. be owner of victim, or
1419  *      c. have CAP_FOWNER capability
1420  *  6. If the victim is append-only or immutable we can't do antyhing with
1421  *     links pointing to it.
1422  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
1423  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
1424  *  9. We can't remove a root or mountpoint.
1425  * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
1426  *     nfs_async_unlink().
1427  */
1428 static int may_delete(struct inode *dir, struct dentry *victim,
1429         int isdir, struct nameidata *nd)
1430 {
1431         int error;
1432
1433         if (!victim->d_inode)
1434                 return -ENOENT;
1435
1436         BUG_ON(victim->d_parent->d_inode != dir);
1437         audit_inode_child(victim->d_name.name, victim->d_inode, dir->i_ino);
1438
1439         error = permission(dir,MAY_WRITE | MAY_EXEC, nd);
1440         if (error)
1441                 return error;
1442         if (IS_APPEND(dir))
1443                 return -EPERM;
1444         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
1445                 IS_IXORUNLINK(victim->d_inode))
1446                 return -EPERM;
1447         if (isdir) {
1448                 if (!S_ISDIR(victim->d_inode->i_mode))
1449                         return -ENOTDIR;
1450                 if (IS_ROOT(victim))
1451                         return -EBUSY;
1452         } else if (S_ISDIR(victim->d_inode->i_mode))
1453                 return -EISDIR;
1454         if (IS_DEADDIR(dir))
1455                 return -ENOENT;
1456         if (victim->d_flags & DCACHE_NFSFS_RENAMED)
1457                 return -EBUSY;
1458         return 0;
1459 }
1460
1461 /*      Check whether we can create an object with dentry child in directory
1462  *  dir.
1463  *  1. We can't do it if child already exists (open has special treatment for
1464  *     this case, but since we are inlined it's OK)
1465  *  2. We can't do it if dir is read-only (done in permission())
1466  *  3. We should have write and exec permissions on dir
1467  *  4. We can't do it if dir is immutable (done in permission())
1468  */
1469 static inline int may_create(struct inode *dir, struct dentry *child,
1470                              struct nameidata *nd)
1471 {
1472         if (child->d_inode)
1473                 return -EEXIST;
1474         if (IS_DEADDIR(dir))
1475                 return -ENOENT;
1476         return permission(dir,MAY_WRITE | MAY_EXEC, nd);
1477 }
1478
1479 /*
1480  * O_DIRECTORY translates into forcing a directory lookup.
1481  */
1482 static inline int lookup_flags(unsigned int f)
1483 {
1484         unsigned long retval = LOOKUP_FOLLOW;
1485
1486         if (f & O_NOFOLLOW)
1487                 retval &= ~LOOKUP_FOLLOW;
1488
1489         if (f & O_DIRECTORY)
1490                 retval |= LOOKUP_DIRECTORY;
1491         if (f & O_ATOMICLOOKUP)
1492                 retval |= LOOKUP_ATOMIC;
1493
1494         return retval;
1495 }
1496
1497 /*
1498  * p1 and p2 should be directories on the same fs.
1499  */
1500 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1501 {
1502         struct dentry *p;
1503
1504         if (p1 == p2) {
1505                 mutex_lock(&p1->d_inode->i_mutex);
1506                 return NULL;
1507         }
1508
1509         mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
1510
1511         for (p = p1; p->d_parent != p; p = p->d_parent) {
1512                 if (p->d_parent == p2) {
1513                         mutex_lock(&p2->d_inode->i_mutex);
1514                         mutex_lock(&p1->d_inode->i_mutex);
1515                         return p;
1516                 }
1517         }
1518
1519         for (p = p2; p->d_parent != p; p = p->d_parent) {
1520                 if (p->d_parent == p1) {
1521                         mutex_lock(&p1->d_inode->i_mutex);
1522                         mutex_lock(&p2->d_inode->i_mutex);
1523                         return p;
1524                 }
1525         }
1526
1527         mutex_lock(&p1->d_inode->i_mutex);
1528         mutex_lock(&p2->d_inode->i_mutex);
1529         return NULL;
1530 }
1531
1532 void unlock_rename(struct dentry *p1, struct dentry *p2)
1533 {
1534         mutex_unlock(&p1->d_inode->i_mutex);
1535         if (p1 != p2) {
1536                 mutex_unlock(&p2->d_inode->i_mutex);
1537                 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
1538         }
1539 }
1540
1541 int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1542                 struct nameidata *nd)
1543 {
1544         int error = may_create(dir, dentry, nd);
1545
1546         if (error)
1547                 return error;
1548
1549         if (!dir->i_op || !dir->i_op->create)
1550                 return -EACCES; /* shouldn't it be ENOSYS? */
1551         mode &= S_IALLUGO;
1552         mode |= S_IFREG;
1553         error = security_inode_create(dir, dentry, mode);
1554         if (error)
1555                 return error;
1556         DQUOT_INIT(dir);
1557         error = dir->i_op->create(dir, dentry, mode, nd);
1558         if (!error)
1559                 fsnotify_create(dir, dentry);
1560         return error;
1561 }
1562
1563 int may_open(struct nameidata *nd, int acc_mode, int flag)
1564 {
1565         struct dentry *dentry = nd->dentry;
1566         struct inode *inode = dentry->d_inode;
1567         int error;
1568
1569         if (!inode)
1570                 return -ENOENT;
1571
1572         if (S_ISLNK(inode->i_mode))
1573                 return -ELOOP;
1574
1575         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1576                 return -EISDIR;
1577
1578         error = vfs_permission(nd, acc_mode);
1579         if (error)
1580                 return error;
1581
1582         /*
1583          * FIFO's, sockets and device files are special: they don't
1584          * actually live on the filesystem itself, and as such you
1585          * can write to them even if the filesystem is read-only.
1586          */
1587         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1588                 flag &= ~O_TRUNC;
1589         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1590                 if (nd->mnt->mnt_flags & MNT_NODEV)
1591                         return -EACCES;
1592
1593                 flag &= ~O_TRUNC;
1594         } else if ((IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt))
1595                 && (flag & FMODE_WRITE))
1596                 return -EROFS;
1597         /*
1598          * An append-only file must be opened in append mode for writing.
1599          */
1600         if (IS_APPEND(inode)) {
1601                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1602                         return -EPERM;
1603                 if (flag & O_TRUNC)
1604                         return -EPERM;
1605         }
1606
1607         /* O_NOATIME can only be set by the owner or superuser */
1608         if (flag & O_NOATIME)
1609                 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
1610                         return -EPERM;
1611
1612         /*
1613          * Ensure there are no outstanding leases on the file.
1614          */
1615         error = break_lease(inode, flag);
1616         if (error)
1617                 return error;
1618
1619         if (flag & O_TRUNC) {
1620                 error = get_write_access(inode);
1621                 if (error)
1622                         return error;
1623
1624                 /*
1625                  * Refuse to truncate files with mandatory locks held on them.
1626                  */
1627                 error = locks_verify_locked(inode);
1628                 if (!error) {
1629                         DQUOT_INIT(inode);
1630
1631                         error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
1632                 }
1633                 put_write_access(inode);
1634                 if (error)
1635                         return error;
1636         } else
1637                 if (flag & FMODE_WRITE)
1638                         DQUOT_INIT(inode);
1639
1640         return 0;
1641 }
1642
1643 /*
1644  *      open_namei()
1645  *
1646  * namei for open - this is in fact almost the whole open-routine.
1647  *
1648  * Note that the low bits of "flag" aren't the same as in the open
1649  * system call - they are 00 - no permissions needed
1650  *                        01 - read permission needed
1651  *                        10 - write permission needed
1652  *                        11 - read/write permissions needed
1653  * which is a lot more logical, and also allows the "no perm" needed
1654  * for symlinks (where the permissions are checked later).
1655  * SMP-safe
1656  */
1657 int open_namei(int dfd, const char *pathname, int flag,
1658                 int mode, struct nameidata *nd)
1659 {
1660         int acc_mode, error;
1661         struct path path;
1662         struct dentry *dir;
1663         int count = 0;
1664
1665         acc_mode = ACC_MODE(flag);
1666
1667         /* O_TRUNC implies we need access checks for write permissions */
1668         if (flag & O_TRUNC)
1669                 acc_mode |= MAY_WRITE;
1670
1671         /* Allow the LSM permission hook to distinguish append
1672            access from general write access. */
1673         if (flag & O_APPEND)
1674                 acc_mode |= MAY_APPEND;
1675
1676         /*
1677          * The simplest case - just a plain lookup.
1678          */
1679         if (!(flag & O_CREAT)) {
1680                 error = path_lookup_open(dfd, pathname, lookup_flags(flag),
1681                                          nd, flag);
1682                 if (error)
1683                         return error;
1684                 goto ok;
1685         }
1686
1687         /*
1688          * Create - we need to know the parent.
1689          */
1690         error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
1691         if (error)
1692                 return error;
1693
1694         /*
1695          * We have the parent and last component. First of all, check
1696          * that we are not asked to creat(2) an obvious directory - that
1697          * will not do.
1698          */
1699         error = -EISDIR;
1700         if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1701                 goto exit;
1702
1703         dir = nd->dentry;
1704         nd->flags &= ~LOOKUP_PARENT;
1705         mutex_lock(&dir->d_inode->i_mutex);
1706         path.dentry = lookup_hash(nd);
1707         path.mnt = nd->mnt;
1708
1709 do_last:
1710         error = PTR_ERR(path.dentry);
1711         if (IS_ERR(path.dentry)) {
1712                 mutex_unlock(&dir->d_inode->i_mutex);
1713                 goto exit;
1714         }
1715
1716         if (IS_ERR(nd->intent.open.file)) {
1717                 mutex_unlock(&dir->d_inode->i_mutex);
1718                 error = PTR_ERR(nd->intent.open.file);
1719                 goto exit_dput;
1720         }
1721
1722         /* Negative dentry, just create the file */
1723         if (!path.dentry->d_inode) {
1724                 if (!IS_POSIXACL(dir->d_inode))
1725                         mode &= ~current->fs->umask;
1726                 error = vfs_create(dir->d_inode, path.dentry, mode, nd);
1727                 mutex_unlock(&dir->d_inode->i_mutex);
1728                 dput(nd->dentry);
1729                 nd->dentry = path.dentry;
1730                 if (error)
1731                         goto exit;
1732                 /* Don't check for write permission, don't truncate */
1733                 acc_mode = 0;
1734                 flag &= ~O_TRUNC;
1735                 goto ok;
1736         }
1737
1738         /*
1739          * It already exists.
1740          */
1741         mutex_unlock(&dir->d_inode->i_mutex);
1742
1743         error = -EEXIST;
1744         if (flag & O_EXCL)
1745                 goto exit_dput;
1746
1747         if (__follow_mount(&path)) {
1748                 error = -ELOOP;
1749                 if (flag & O_NOFOLLOW)
1750                         goto exit_dput;
1751         }
1752         error = -ENOENT;
1753         if (!path.dentry->d_inode)
1754                 goto exit_dput;
1755         if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
1756                 goto do_link;
1757
1758         path_to_nameidata(&path, nd);
1759         error = -EISDIR;
1760         if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
1761                 goto exit;
1762 ok:
1763         error = may_open(nd, acc_mode, flag);
1764         if (error)
1765                 goto exit;
1766         return 0;
1767
1768 exit_dput:
1769         dput_path(&path, nd);
1770 exit:
1771         if (!IS_ERR(nd->intent.open.file))
1772                 release_open_intent(nd);
1773         path_release(nd);
1774         return error;
1775
1776 do_link:
1777         error = -ELOOP;
1778         if (flag & O_NOFOLLOW)
1779                 goto exit_dput;
1780         /*
1781          * This is subtle. Instead of calling do_follow_link() we do the
1782          * thing by hands. The reason is that this way we have zero link_count
1783          * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1784          * After that we have the parent and last component, i.e.
1785          * we are in the same situation as after the first path_walk().
1786          * Well, almost - if the last component is normal we get its copy
1787          * stored in nd->last.name and we will have to putname() it when we
1788          * are done. Procfs-like symlinks just set LAST_BIND.
1789          */
1790         nd->flags |= LOOKUP_PARENT;
1791         error = security_inode_follow_link(path.dentry, nd);
1792         if (error)
1793                 goto exit_dput;
1794         error = __do_follow_link(&path, nd);
1795         if (error) {
1796                 /* Does someone understand code flow here? Or it is only
1797                  * me so stupid? Anathema to whoever designed this non-sense
1798                  * with "intent.open".
1799                  */
1800                 release_open_intent(nd);
1801                 return error;
1802         }
1803         nd->flags &= ~LOOKUP_PARENT;
1804         if (nd->last_type == LAST_BIND)
1805                 goto ok;
1806         error = -EISDIR;
1807         if (nd->last_type != LAST_NORM)
1808                 goto exit;
1809         if (nd->last.name[nd->last.len]) {
1810                 __putname(nd->last.name);
1811                 goto exit;
1812         }
1813         error = -ELOOP;
1814         if (count++==32) {
1815                 __putname(nd->last.name);
1816                 goto exit;
1817         }
1818         dir = nd->dentry;
1819         mutex_lock(&dir->d_inode->i_mutex);
1820         path.dentry = lookup_hash(nd);
1821         path.mnt = nd->mnt;
1822         __putname(nd->last.name);
1823         goto do_last;
1824 }
1825
1826 /**
1827  * lookup_create - lookup a dentry, creating it if it doesn't exist
1828  * @nd: nameidata info
1829  * @is_dir: directory flag
1830  *
1831  * Simple function to lookup and return a dentry and create it
1832  * if it doesn't exist.  Is SMP-safe.
1833  *
1834  * Returns with nd->dentry->d_inode->i_mutex locked.
1835  */
1836 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1837 {
1838         struct dentry *dentry = ERR_PTR(-EEXIST);
1839
1840         mutex_lock(&nd->dentry->d_inode->i_mutex);
1841         /*
1842          * Yucky last component or no last component at all?
1843          * (foo/., foo/.., /////)
1844          */
1845         if (nd->last_type != LAST_NORM)
1846                 goto fail;
1847         nd->flags &= ~LOOKUP_PARENT;
1848
1849         /*
1850          * Do the final lookup.
1851          */
1852         dentry = lookup_hash(nd);
1853         if (IS_ERR(dentry))
1854                 goto fail;
1855
1856         /*
1857          * Special case - lookup gave negative, but... we had foo/bar/
1858          * From the vfs_mknod() POV we just have a negative dentry -
1859          * all is fine. Let's be bastards - you had / on the end, you've
1860          * been asking for (non-existent) directory. -ENOENT for you.
1861          */
1862         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1863                 goto enoent;
1864         return dentry;
1865 enoent:
1866         dput(dentry);
1867         dentry = ERR_PTR(-ENOENT);
1868 fail:
1869         return dentry;
1870 }
1871 EXPORT_SYMBOL_GPL(lookup_create);
1872
1873 int vfs_mknod(struct inode *dir, struct dentry *dentry,
1874         int mode, dev_t dev, struct nameidata *nd)
1875 {
1876         int error = may_create(dir, dentry, nd);
1877
1878         if (error)
1879                 return error;
1880
1881         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1882                 return -EPERM;
1883
1884         if (!dir->i_op || !dir->i_op->mknod)
1885                 return -EPERM;
1886
1887         error = security_inode_mknod(dir, dentry, mode, dev);
1888         if (error)
1889                 return error;
1890
1891         DQUOT_INIT(dir);
1892         error = dir->i_op->mknod(dir, dentry, mode, dev);
1893         if (!error)
1894                 fsnotify_create(dir, dentry);
1895         return error;
1896 }
1897
1898 asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1899                                 unsigned dev)
1900 {
1901         int error = 0;
1902         char * tmp;
1903         struct dentry * dentry;
1904         struct nameidata nd;
1905
1906         if (S_ISDIR(mode))
1907                 return -EPERM;
1908         tmp = getname(filename);
1909         if (IS_ERR(tmp))
1910                 return PTR_ERR(tmp);
1911
1912         error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
1913         if (error)
1914                 goto out;
1915         dentry = lookup_create(&nd, 0);
1916         error = PTR_ERR(dentry);
1917
1918         if (!IS_POSIXACL(nd.dentry->d_inode))
1919                 mode &= ~current->fs->umask;
1920         if (!IS_ERR(dentry)) {
1921                 switch (mode & S_IFMT) {
1922                 case 0: case S_IFREG:
1923                         error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
1924                         break;
1925                 case S_IFCHR: case S_IFBLK:
1926                         error = vfs_mknod(nd.dentry->d_inode, dentry, mode,
1927                                         new_decode_dev(dev), &nd);
1928                         break;
1929                 case S_IFIFO: case S_IFSOCK:
1930                         error = vfs_mknod(nd.dentry->d_inode, dentry, mode,
1931                                         0, &nd);
1932                         break;
1933                 case S_IFDIR:
1934                         error = -EPERM;
1935                         break;
1936                 default:
1937                         error = -EINVAL;
1938                 }
1939                 dput(dentry);
1940         }
1941         mutex_unlock(&nd.dentry->d_inode->i_mutex);
1942         path_release(&nd);
1943 out:
1944         putname(tmp);
1945
1946         return error;
1947 }
1948
1949 asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
1950 {
1951         return sys_mknodat(AT_FDCWD, filename, mode, dev);
1952 }
1953
1954 int vfs_mkdir(struct inode *dir, struct dentry *dentry,
1955         int mode, struct nameidata *nd)
1956 {
1957         int error = may_create(dir, dentry, nd);
1958
1959         if (error)
1960                 return error;
1961
1962         if (!dir->i_op || !dir->i_op->mkdir)
1963                 return -EPERM;
1964
1965         mode &= (S_IRWXUGO|S_ISVTX);
1966         error = security_inode_mkdir(dir, dentry, mode);
1967         if (error)
1968                 return error;
1969
1970         DQUOT_INIT(dir);
1971         error = dir->i_op->mkdir(dir, dentry, mode);
1972         if (!error)
1973                 fsnotify_mkdir(dir, dentry);
1974         return error;
1975 }
1976
1977 asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
1978 {
1979         int error = 0;
1980         char * tmp;
1981
1982         tmp = getname(pathname);
1983         error = PTR_ERR(tmp);
1984         if (!IS_ERR(tmp)) {
1985                 struct dentry *dentry;
1986                 struct nameidata nd;
1987
1988                 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
1989                 if (error)
1990                         goto out;
1991                 dentry = lookup_create(&nd, 1);
1992                 error = PTR_ERR(dentry);
1993                 if (!IS_ERR(dentry)) {
1994                         if (!IS_POSIXACL(nd.dentry->d_inode))
1995                                 mode &= ~current->fs->umask;
1996                         error = vfs_mkdir(nd.dentry->d_inode, dentry,
1997                                 mode, &nd);
1998                         dput(dentry);
1999                 }
2000                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
2001                 path_release(&nd);
2002 out:
2003                 putname(tmp);
2004         }
2005
2006         return error;
2007 }
2008
2009 asmlinkage long sys_mkdir(const char __user *pathname, int mode)
2010 {
2011         return sys_mkdirat(AT_FDCWD, pathname, mode);
2012 }
2013
2014 /*
2015  * We try to drop the dentry early: we should have
2016  * a usage count of 2 if we're the only user of this
2017  * dentry, and if that is true (possibly after pruning
2018  * the dcache), then we drop the dentry now.
2019  *
2020  * A low-level filesystem can, if it choses, legally
2021  * do a
2022  *
2023  *      if (!d_unhashed(dentry))
2024  *              return -EBUSY;
2025  *
2026  * if it cannot handle the case of removing a directory
2027  * that is still in use by something else..
2028  */
2029 void dentry_unhash(struct dentry *dentry)
2030 {
2031         dget(dentry);
2032         if (atomic_read(&dentry->d_count))
2033                 shrink_dcache_parent(dentry);
2034         spin_lock(&dcache_lock);
2035         spin_lock(&dentry->d_lock);
2036         if (atomic_read(&dentry->d_count) == 2)
2037                 __d_drop(dentry);
2038         spin_unlock(&dentry->d_lock);
2039         spin_unlock(&dcache_lock);
2040 }
2041
2042 int vfs_rmdir(struct inode *dir, struct dentry *dentry,
2043         struct nameidata *nd)
2044 {
2045         int error = may_delete(dir, dentry, 1, nd);
2046
2047         if (error)
2048                 return error;
2049
2050         if (!dir->i_op || !dir->i_op->rmdir)
2051                 return -EPERM;
2052
2053         DQUOT_INIT(dir);
2054
2055         mutex_lock(&dentry->d_inode->i_mutex);
2056         dentry_unhash(dentry);
2057         if (d_mountpoint(dentry))
2058                 error = -EBUSY;
2059         else {
2060                 error = security_inode_rmdir(dir, dentry);
2061                 if (!error) {
2062                         error = dir->i_op->rmdir(dir, dentry);
2063                         if (!error)
2064                                 dentry->d_inode->i_flags |= S_DEAD;
2065                 }
2066         }
2067         mutex_unlock(&dentry->d_inode->i_mutex);
2068         if (!error) {
2069                 d_delete(dentry);
2070         }
2071         dput(dentry);
2072
2073         return error;
2074 }
2075
2076 static long do_rmdir(int dfd, const char __user *pathname)
2077 {
2078         int error = 0;
2079         char * name;
2080         struct dentry *dentry;
2081         struct nameidata nd;
2082
2083         name = getname(pathname);
2084         if(IS_ERR(name))
2085                 return PTR_ERR(name);
2086
2087         error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
2088         if (error)
2089                 goto exit;
2090
2091         switch(nd.last_type) {
2092                 case LAST_DOTDOT:
2093                         error = -ENOTEMPTY;
2094                         goto exit1;
2095                 case LAST_DOT:
2096                         error = -EINVAL;
2097                         goto exit1;
2098                 case LAST_ROOT:
2099                         error = -EBUSY;
2100                         goto exit1;
2101         }
2102         mutex_lock(&nd.dentry->d_inode->i_mutex);
2103         dentry = lookup_hash(&nd);
2104         error = PTR_ERR(dentry);
2105         if (!IS_ERR(dentry)) {
2106                 error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd);
2107                 dput(dentry);
2108         }
2109         mutex_unlock(&nd.dentry->d_inode->i_mutex);
2110 exit1:
2111         path_release(&nd);
2112 exit:
2113         putname(name);
2114         return error;
2115 }
2116
2117 asmlinkage long sys_rmdir(const char __user *pathname)
2118 {
2119         return do_rmdir(AT_FDCWD, pathname);
2120 }
2121
2122 int vfs_unlink(struct inode *dir, struct dentry *dentry,
2123         struct nameidata *nd)
2124 {
2125         int error = may_delete(dir, dentry, 0, nd);
2126
2127         if (error)
2128                 return error;
2129
2130         if (!dir->i_op || !dir->i_op->unlink)
2131                 return -EPERM;
2132
2133         DQUOT_INIT(dir);
2134
2135         mutex_lock(&dentry->d_inode->i_mutex);
2136         if (d_mountpoint(dentry))
2137                 error = -EBUSY;
2138         else {
2139                 error = security_inode_unlink(dir, dentry);
2140                 if (!error)
2141                         error = dir->i_op->unlink(dir, dentry);
2142         }
2143         mutex_unlock(&dentry->d_inode->i_mutex);
2144
2145         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
2146         if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
2147                 d_delete(dentry);
2148         }
2149
2150         return error;
2151 }
2152
2153 /*
2154  * Make sure that the actual truncation of the file will occur outside its
2155  * directory's i_mutex.  Truncate can take a long time if there is a lot of
2156  * writeout happening, and we don't want to prevent access to the directory
2157  * while waiting on the I/O.
2158  */
2159 static long do_unlinkat(int dfd, const char __user *pathname)
2160 {
2161         int error = 0;
2162         char * name;
2163         struct dentry *dentry;
2164         struct nameidata nd;
2165         struct inode *inode = NULL;
2166
2167         name = getname(pathname);
2168         if(IS_ERR(name))
2169                 return PTR_ERR(name);
2170
2171         error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
2172         if (error)
2173                 goto exit;
2174         error = -EISDIR;
2175         if (nd.last_type != LAST_NORM)
2176                 goto exit1;
2177         mutex_lock(&nd.dentry->d_inode->i_mutex);
2178         dentry = lookup_hash(&nd);
2179         error = PTR_ERR(dentry);
2180         if (!IS_ERR(dentry)) {
2181                 /* Why not before? Because we want correct error value */
2182                 if (nd.last.name[nd.last.len])
2183                         goto slashes;
2184                 inode = dentry->d_inode;
2185                 if (inode)
2186                         atomic_inc(&inode->i_count);
2187                 error = vfs_unlink(nd.dentry->d_inode, dentry, &nd);
2188         exit2:
2189                 dput(dentry);
2190         }
2191         mutex_unlock(&nd.dentry->d_inode->i_mutex);
2192         if (inode)
2193                 iput(inode);    /* truncate the inode here */
2194 exit1:
2195         path_release(&nd);
2196 exit:
2197         putname(name);
2198         return error;
2199
2200 slashes:
2201         error = !dentry->d_inode ? -ENOENT :
2202                 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
2203         goto exit2;
2204 }
2205
2206 asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag)
2207 {
2208         if ((flag & ~AT_REMOVEDIR) != 0)
2209                 return -EINVAL;
2210
2211         if (flag & AT_REMOVEDIR)
2212                 return do_rmdir(dfd, pathname);
2213
2214         return do_unlinkat(dfd, pathname);
2215 }
2216
2217 asmlinkage long sys_unlink(const char __user *pathname)
2218 {
2219         return do_unlinkat(AT_FDCWD, pathname);
2220 }
2221
2222 int vfs_symlink(struct inode *dir, struct dentry *dentry,
2223         const char *oldname, int mode, struct nameidata *nd)
2224 {
2225         int error = may_create(dir, dentry, nd);
2226
2227         if (error)
2228                 return error;
2229
2230         if (!dir->i_op || !dir->i_op->symlink)
2231                 return -EPERM;
2232
2233         error = security_inode_symlink(dir, dentry, oldname);
2234         if (error)
2235                 return error;
2236
2237         DQUOT_INIT(dir);
2238         error = dir->i_op->symlink(dir, dentry, oldname);
2239         if (!error)
2240                 fsnotify_create(dir, dentry);
2241         return error;
2242 }
2243
2244 asmlinkage long sys_symlinkat(const char __user *oldname,
2245                               int newdfd, const char __user *newname)
2246 {
2247         int error = 0;
2248         char * from;
2249         char * to;
2250
2251         from = getname(oldname);
2252         if(IS_ERR(from))
2253                 return PTR_ERR(from);
2254         to = getname(newname);
2255         error = PTR_ERR(to);
2256         if (!IS_ERR(to)) {
2257                 struct dentry *dentry;
2258                 struct nameidata nd;
2259
2260                 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
2261                 if (error)
2262                         goto out;
2263                 dentry = lookup_create(&nd, 0);
2264                 error = PTR_ERR(dentry);
2265                 if (!IS_ERR(dentry)) {
2266                         error = vfs_symlink(nd.dentry->d_inode, dentry,
2267                                 from, S_IALLUGO, &nd);
2268                         dput(dentry);
2269                 }
2270                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
2271                 path_release(&nd);
2272 out:
2273                 putname(to);
2274         }
2275         putname(from);
2276         return error;
2277 }
2278
2279 asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname)
2280 {
2281         return sys_symlinkat(oldname, AT_FDCWD, newname);
2282 }
2283
2284 int vfs_link(struct dentry *old_dentry, struct inode *dir,
2285         struct dentry *new_dentry, struct nameidata *nd)
2286 {
2287         struct inode *inode = old_dentry->d_inode;
2288         int error;
2289
2290         if (!inode)
2291                 return -ENOENT;
2292
2293         error = may_create(dir, new_dentry, nd);
2294         if (error)
2295                 return error;
2296
2297         if (dir->i_sb != inode->i_sb)
2298                 return -EXDEV;
2299
2300         /*
2301          * A link to an append-only or immutable file cannot be created.
2302          */
2303         if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
2304                 return -EPERM;
2305         if (!dir->i_op || !dir->i_op->link)
2306                 return -EPERM;
2307         if (S_ISDIR(old_dentry->d_inode->i_mode))
2308                 return -EPERM;
2309
2310         error = security_inode_link(old_dentry, dir, new_dentry);
2311         if (error)
2312                 return error;
2313
2314         mutex_lock(&old_dentry->d_inode->i_mutex);
2315         DQUOT_INIT(dir);
2316         error = dir->i_op->link(old_dentry, dir, new_dentry);
2317         mutex_unlock(&old_dentry->d_inode->i_mutex);
2318         if (!error)
2319                 fsnotify_create(dir, new_dentry);
2320         return error;
2321 }
2322
2323 /*
2324  * Hardlinks are often used in delicate situations.  We avoid
2325  * security-related surprises by not following symlinks on the
2326  * newname.  --KAB
2327  *
2328  * We don't follow them on the oldname either to be compatible
2329  * with linux 2.0, and to avoid hard-linking to directories
2330  * and other special files.  --ADM
2331  */
2332 asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2333                            int newdfd, const char __user *newname,
2334                            int flags)
2335 {
2336         struct dentry *new_dentry;
2337         struct nameidata nd, old_nd;
2338         int error;
2339         char * to;
2340
2341         if (flags != 0)
2342                 return -EINVAL;
2343
2344         to = getname(newname);
2345         if (IS_ERR(to))
2346                 return PTR_ERR(to);
2347
2348         error = __user_walk_fd(olddfd, oldname, 0, &old_nd);
2349         if (error)
2350                 goto exit;
2351         error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
2352         if (error)
2353                 goto out;
2354         /*
2355          * We allow hard-links to be created to a bind-mount as long
2356          * as the bind-mount is not read-only.  Checking for cross-dev
2357          * links is subsumed by the superblock check in vfs_link().
2358          */
2359         error = -EROFS;
2360         if (MNT_IS_RDONLY(old_nd.mnt))
2361                 goto out_release;
2362         new_dentry = lookup_create(&nd, 0);
2363         error = PTR_ERR(new_dentry);
2364         if (!IS_ERR(new_dentry)) {
2365                 error = vfs_link(old_nd.dentry, nd.dentry->d_inode,
2366                         new_dentry, &nd);
2367                 dput(new_dentry);
2368         }
2369         mutex_unlock(&nd.dentry->d_inode->i_mutex);
2370 out_release:
2371         path_release(&nd);
2372 out:
2373         path_release(&old_nd);
2374 exit:
2375         putname(to);
2376
2377         return error;
2378 }
2379
2380 asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
2381 {
2382         return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
2383 }
2384
2385 /*
2386  * The worst of all namespace operations - renaming directory. "Perverted"
2387  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
2388  * Problems:
2389  *      a) we can get into loop creation. Check is done in is_subdir().
2390  *      b) race potential - two innocent renames can create a loop together.
2391  *         That's where 4.4 screws up. Current fix: serialization on
2392  *         sb->s_vfs_rename_mutex. We might be more accurate, but that's another
2393  *         story.
2394  *      c) we have to lock _three_ objects - parents and victim (if it exists).
2395  *         And that - after we got ->i_mutex on parents (until then we don't know
2396  *         whether the target exists).  Solution: try to be smart with locking
2397  *         order for inodes.  We rely on the fact that tree topology may change
2398  *         only under ->s_vfs_rename_mutex _and_ that parent of the object we
2399  *         move will be locked.  Thus we can rank directories by the tree
2400  *         (ancestors first) and rank all non-directories after them.
2401  *         That works since everybody except rename does "lock parent, lookup,
2402  *         lock child" and rename is under ->s_vfs_rename_mutex.
2403  *         HOWEVER, it relies on the assumption that any object with ->lookup()
2404  *         has no more than 1 dentry.  If "hybrid" objects will ever appear,
2405  *         we'd better make sure that there's no link(2) for them.
2406  *      d) some filesystems don't support opened-but-unlinked directories,
2407  *         either because of layout or because they are not ready to deal with
2408  *         all cases correctly. The latter will be fixed (taking this sort of
2409  *         stuff into VFS), but the former is not going away. Solution: the same
2410  *         trick as in rmdir().
2411  *      e) conversion from fhandle to dentry may come in the wrong moment - when
2412  *         we are removing the target. Solution: we will have to grab ->i_mutex
2413  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
2414  *         ->i_mutex on parents, which works but leads to some truely excessive
2415  *         locking].
2416  */
2417 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2418                           struct inode *new_dir, struct dentry *new_dentry)
2419 {
2420         int error = 0;
2421         struct inode *target;
2422
2423         /*
2424          * If we are going to change the parent - check write permissions,
2425          * we'll need to flip '..'.
2426          */
2427         if (new_dir != old_dir) {
2428                 error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
2429                 if (error)
2430                         return error;
2431         }
2432
2433         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
2434         if (error)
2435                 return error;
2436
2437         target = new_dentry->d_inode;
2438         if (target) {
2439                 mutex_lock(&target->i_mutex);
2440                 dentry_unhash(new_dentry);
2441         }
2442         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2443                 error = -EBUSY;
2444         else
2445                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2446         if (target) {
2447                 if (!error)
2448                         target->i_flags |= S_DEAD;
2449                 mutex_unlock(&target->i_mutex);
2450                 if (d_unhashed(new_dentry))
2451                         d_rehash(new_dentry);
2452                 dput(new_dentry);
2453         }
2454         if (!error)
2455                 d_move(old_dentry,new_dentry);
2456         return error;
2457 }
2458
2459 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2460                             struct inode *new_dir, struct dentry *new_dentry)
2461 {
2462         struct inode *target;
2463         int error;
2464
2465         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
2466         if (error)
2467                 return error;
2468
2469         dget(new_dentry);
2470         target = new_dentry->d_inode;
2471         if (target)
2472                 mutex_lock(&target->i_mutex);
2473         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2474                 error = -EBUSY;
2475         else
2476                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2477         if (!error) {
2478                 /* The following d_move() should become unconditional */
2479                 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2480                         d_move(old_dentry, new_dentry);
2481         }
2482         if (target)
2483                 mutex_unlock(&target->i_mutex);
2484         dput(new_dentry);
2485         return error;
2486 }
2487
2488 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2489                struct inode *new_dir, struct dentry *new_dentry)
2490 {
2491         int error;
2492         int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2493         const char *old_name;
2494
2495         if (old_dentry->d_inode == new_dentry->d_inode)
2496                 return 0;
2497
2498         error = may_delete(old_dir, old_dentry, is_dir, NULL);
2499         if (error)
2500                 return error;
2501
2502         if (!new_dentry->d_inode)
2503                 error = may_create(new_dir, new_dentry, NULL);
2504         else
2505                 error = may_delete(new_dir, new_dentry, is_dir, NULL);
2506         if (error)
2507                 return error;
2508
2509         if (!old_dir->i_op || !old_dir->i_op->rename)
2510                 return -EPERM;
2511
2512         DQUOT_INIT(old_dir);
2513         DQUOT_INIT(new_dir);
2514
2515         old_name = fsnotify_oldname_init(old_dentry->d_name.name);
2516
2517         if (is_dir)
2518                 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2519         else
2520                 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2521         if (!error) {
2522                 const char *new_name = old_dentry->d_name.name;
2523                 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
2524                               new_dentry->d_inode, old_dentry->d_inode);
2525         }
2526         fsnotify_oldname_free(old_name);
2527
2528         return error;
2529 }
2530
2531 static int do_rename(int olddfd, const char *oldname,
2532                         int newdfd, const char *newname)
2533 {
2534         int error = 0;
2535         struct dentry * old_dir, * new_dir;
2536         struct dentry * old_dentry, *new_dentry;
2537         struct dentry * trap;
2538         struct nameidata oldnd, newnd;
2539
2540         error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
2541         if (error)
2542                 goto exit;
2543
2544         error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd);
2545         if (error)
2546                 goto exit1;
2547
2548         error = -EXDEV;
2549         if (oldnd.mnt != newnd.mnt)
2550                 goto exit2;
2551
2552         old_dir = oldnd.dentry;
2553         error = -EBUSY;
2554         if (oldnd.last_type != LAST_NORM)
2555                 goto exit2;
2556
2557         new_dir = newnd.dentry;
2558         if (newnd.last_type != LAST_NORM)
2559                 goto exit2;
2560
2561         trap = lock_rename(new_dir, old_dir);
2562
2563         old_dentry = lookup_hash(&oldnd);
2564         error = PTR_ERR(old_dentry);
2565         if (IS_ERR(old_dentry))
2566                 goto exit3;
2567         /* source must exist */
2568         error = -ENOENT;
2569         if (!old_dentry->d_inode)
2570                 goto exit4;
2571         /* unless the source is a directory trailing slashes give -ENOTDIR */
2572         if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
2573                 error = -ENOTDIR;
2574                 if (oldnd.last.name[oldnd.last.len])
2575                         goto exit4;
2576                 if (newnd.last.name[newnd.last.len])
2577                         goto exit4;
2578         }
2579         /* source should not be ancestor of target */
2580         error = -EINVAL;
2581         if (old_dentry == trap)
2582                 goto exit4;
2583         error = -EROFS;
2584         if (MNT_IS_RDONLY(newnd.mnt))
2585                 goto exit4;
2586         new_dentry = lookup_hash(&newnd);
2587         error = PTR_ERR(new_dentry);
2588         if (IS_ERR(new_dentry))
2589                 goto exit4;
2590         /* target should not be an ancestor of source */
2591         error = -ENOTEMPTY;
2592         if (new_dentry == trap)
2593                 goto exit5;
2594
2595         error = vfs_rename(old_dir->d_inode, old_dentry,
2596                                    new_dir->d_inode, new_dentry);
2597 exit5:
2598         dput(new_dentry);
2599 exit4:
2600         dput(old_dentry);
2601 exit3:
2602         unlock_rename(new_dir, old_dir);
2603 exit2:
2604         path_release(&newnd);
2605 exit1:
2606         path_release(&oldnd);
2607 exit:
2608         return error;
2609 }
2610
2611 asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2612                              int newdfd, const char __user *newname)
2613 {
2614         int error;
2615         char * from;
2616         char * to;
2617
2618         from = getname(oldname);
2619         if(IS_ERR(from))
2620                 return PTR_ERR(from);
2621         to = getname(newname);
2622         error = PTR_ERR(to);
2623         if (!IS_ERR(to)) {
2624                 error = do_rename(olddfd, from, newdfd, to);
2625                 putname(to);
2626         }
2627         putname(from);
2628         return error;
2629 }
2630
2631 asmlinkage long sys_rename(const char __user *oldname, const char __user *newname)
2632 {
2633         return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
2634 }
2635
2636 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
2637 {
2638         int len;
2639
2640         len = PTR_ERR(link);
2641         if (IS_ERR(link))
2642                 goto out;
2643
2644         len = strlen(link);
2645         if (len > (unsigned) buflen)
2646                 len = buflen;
2647         if (copy_to_user(buffer, link, len))
2648                 len = -EFAULT;
2649 out:
2650         return len;
2651 }
2652
2653 /*
2654  * A helper for ->readlink().  This should be used *ONLY* for symlinks that
2655  * have ->follow_link() touching nd only in nd_set_link().  Using (or not
2656  * using) it for any given inode is up to filesystem.
2657  */
2658 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2659 {
2660         struct nameidata nd;
2661         void *cookie;
2662
2663         nd.depth = 0;
2664         cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
2665         if (!IS_ERR(cookie)) {
2666                 int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
2667                 if (dentry->d_inode->i_op->put_link)
2668                         dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
2669                 cookie = ERR_PTR(res);
2670         }
2671         return PTR_ERR(cookie);
2672 }
2673
2674 int vfs_follow_link(struct nameidata *nd, const char *link)
2675 {
2676         return __vfs_follow_link(nd, link);
2677 }
2678
2679 /* get the link contents into pagecache */
2680 static char *page_getlink(struct dentry * dentry, struct page **ppage)
2681 {
2682         struct page * page;
2683         struct address_space *mapping = dentry->d_inode->i_mapping;
2684         page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
2685                                 NULL);
2686         if (IS_ERR(page))
2687                 goto sync_fail;
2688         wait_on_page_locked(page);
2689         if (!PageUptodate(page))
2690                 goto async_fail;
2691         *ppage = page;
2692         return kmap(page);
2693
2694 async_fail:
2695         page_cache_release(page);
2696         return ERR_PTR(-EIO);
2697
2698 sync_fail:
2699         return (char*)page;
2700 }
2701
2702 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2703 {
2704         struct page *page = NULL;
2705         char *s = page_getlink(dentry, &page);
2706         int res = vfs_readlink(dentry,buffer,buflen,s);
2707         if (page) {
2708                 kunmap(page);
2709                 page_cache_release(page);
2710         }
2711         return res;
2712 }
2713
2714 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
2715 {
2716         struct page *page = NULL;
2717         nd_set_link(nd, page_getlink(dentry, &page));
2718         return page;
2719 }
2720
2721 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2722 {
2723         struct page *page = cookie;
2724
2725         if (page) {
2726                 kunmap(page);
2727                 page_cache_release(page);
2728         }
2729 }
2730
2731 int __page_symlink(struct inode *inode, const char *symname, int len,
2732                 gfp_t gfp_mask)
2733 {
2734         struct address_space *mapping = inode->i_mapping;
2735         struct page *page;
2736         int err = -ENOMEM;
2737         char *kaddr;
2738
2739 retry:
2740         page = find_or_create_page(mapping, 0, gfp_mask);
2741         if (!page)
2742                 goto fail;
2743         err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
2744         if (err == AOP_TRUNCATED_PAGE) {
2745                 page_cache_release(page);
2746                 goto retry;
2747         }
2748         if (err)
2749                 goto fail_map;
2750         kaddr = kmap_atomic(page, KM_USER0);
2751         memcpy(kaddr, symname, len-1);
2752         kunmap_atomic(kaddr, KM_USER0);
2753         err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
2754         if (err == AOP_TRUNCATED_PAGE) {
2755                 page_cache_release(page);
2756                 goto retry;
2757         }
2758         if (err)
2759                 goto fail_map;
2760         /*
2761          * Notice that we are _not_ going to block here - end of page is
2762          * unmapped, so this will only try to map the rest of page, see
2763          * that it is unmapped (typically even will not look into inode -
2764          * ->i_size will be enough for everything) and zero it out.
2765          * OTOH it's obviously correct and should make the page up-to-date.
2766          */
2767         if (!PageUptodate(page)) {
2768                 err = mapping->a_ops->readpage(NULL, page);
2769                 if (err != AOP_TRUNCATED_PAGE)
2770                         wait_on_page_locked(page);
2771         } else {
2772                 unlock_page(page);
2773         }
2774         page_cache_release(page);
2775         if (err < 0)
2776                 goto fail;
2777         mark_inode_dirty(inode);
2778         return 0;
2779 fail_map:
2780         unlock_page(page);
2781         page_cache_release(page);
2782 fail:
2783         return err;
2784 }
2785
2786 int page_symlink(struct inode *inode, const char *symname, int len)
2787 {
2788         return __page_symlink(inode, symname, len,
2789                         mapping_gfp_mask(inode->i_mapping));
2790 }
2791
2792 struct inode_operations page_symlink_inode_operations = {
2793         .readlink       = generic_readlink,
2794         .follow_link    = page_follow_link_light,
2795         .put_link       = page_put_link,
2796 };
2797
2798 EXPORT_SYMBOL(__user_walk);
2799 EXPORT_SYMBOL(__user_walk_fd);
2800 EXPORT_SYMBOL(follow_down);
2801 EXPORT_SYMBOL(follow_up);
2802 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
2803 EXPORT_SYMBOL(getname);
2804 EXPORT_SYMBOL(lock_rename);
2805 EXPORT_SYMBOL(lookup_one_len);
2806 EXPORT_SYMBOL(page_follow_link_light);
2807 EXPORT_SYMBOL(page_put_link);
2808 EXPORT_SYMBOL(page_readlink);
2809 EXPORT_SYMBOL(__page_symlink);
2810 EXPORT_SYMBOL(page_symlink);
2811 EXPORT_SYMBOL(page_symlink_inode_operations);
2812 EXPORT_SYMBOL(path_lookup);
2813 EXPORT_SYMBOL(path_release);
2814 EXPORT_SYMBOL(path_walk);
2815 EXPORT_SYMBOL(permission);
2816 EXPORT_SYMBOL(vfs_permission);
2817 EXPORT_SYMBOL(file_permission);
2818 EXPORT_SYMBOL(unlock_rename);
2819 EXPORT_SYMBOL(vfs_create);
2820 EXPORT_SYMBOL(vfs_follow_link);
2821 EXPORT_SYMBOL(vfs_link);
2822 EXPORT_SYMBOL(vfs_mkdir);
2823 EXPORT_SYMBOL(vfs_mknod);
2824 EXPORT_SYMBOL(generic_permission);
2825 EXPORT_SYMBOL(vfs_readlink);
2826 EXPORT_SYMBOL(vfs_rename);
2827 EXPORT_SYMBOL(vfs_rmdir);
2828 EXPORT_SYMBOL(vfs_symlink);
2829 EXPORT_SYMBOL(vfs_unlink);
2830 EXPORT_SYMBOL(dentry_unhash);
2831 EXPORT_SYMBOL(generic_readlink);