vserver 1.9.3
[linux-2.6.git] / fs / namei.c
index 45b138d..aac4502 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/security.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/vs_base.h>
+
 #include <asm/namei.h>
 #include <asm/uaccess.h>
 
@@ -163,9 +165,6 @@ int vfs_permission(struct inode * inode, int mask)
 {
        umode_t                 mode = inode->i_mode;
 
-       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN|VX_WATCH))
-               return -EACCES;
-
        if (mask & MAY_WRITE) {
                /*
                 * Nobody gets write access to a read-only fs.
@@ -211,12 +210,17 @@ int vfs_permission(struct inode * inode, int mask)
        return -EACCES;
 }
 
-static inline int xid_permission(struct inode *inode)
+static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
        if (inode->i_xid == 0)
                return 0;
        if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
                return 0;
+/*
+       printk("VSW: xid=%d denied access to %p[#%d,%lu] »%*s«.\n",
+               vx_current_xid(), inode, inode->i_xid, inode->i_ino,
+               nd->dentry->d_name.len, nd->dentry->d_name.name);
+*/
        return -EACCES;
 }
 
@@ -228,7 +232,7 @@ int permission(struct inode * inode,int mask, struct nameidata *nd)
        /* Ordinary permission routines do not understand MAY_APPEND. */
        submask = mask & ~MAY_APPEND;
 
-       if ((retval = xid_permission(inode)))
+       if ((retval = xid_permission(inode, mask, nd)))
                return retval;
        if (inode->i_op && inode->i_op->permission)
                retval = inode->i_op->permission(inode, submask, nd);
@@ -292,6 +296,16 @@ void path_release(struct nameidata *nd)
        mntput(nd->mnt);
 }
 
+/*
+ * umount() mustn't call path_release()/mntput() as that would clear
+ * mnt_expiry_mark
+ */
+void path_release_on_umount(struct nameidata *nd)
+{
+       dput(nd->dentry);
+       _mntput(nd->mnt);
+}
+
 /*
  * Internal lookup() using the new generic dcache.
  * SMP-safe
@@ -330,7 +344,7 @@ static inline int exec_permission_lite(struct inode *inode,
 {
        umode_t mode = inode->i_mode;
 
-       if ((inode->i_op && inode->i_op->permission))
+       if (inode->i_op && inode->i_op->permission)
                return -EAGAIN;
 
        if (current->fsuid == inode->i_uid)
@@ -344,6 +358,9 @@ static inline int exec_permission_lite(struct inode *inode,
        if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
                goto ok;
 
+       if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
+               goto ok;
+
        if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
                goto ok;
 
@@ -409,6 +426,62 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
        return result;
 }
 
+static int __emul_lookup_dentry(const char *, struct nameidata *);
+
+/* SMP-safe */
+static inline int
+walk_init_root(const char *name, struct nameidata *nd)
+{
+       read_lock(&current->fs->lock);
+       if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+               nd->mnt = mntget(current->fs->altrootmnt);
+               nd->dentry = dget(current->fs->altroot);
+               read_unlock(&current->fs->lock);
+               if (__emul_lookup_dentry(name,nd))
+                       return 0;
+               read_lock(&current->fs->lock);
+       }
+       nd->mnt = mntget(current->fs->rootmnt);
+       nd->dentry = dget(current->fs->root);
+       read_unlock(&current->fs->lock);
+       return 1;
+}
+
+static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
+{
+       int res = 0;
+       char *name;
+       if (IS_ERR(link))
+               goto fail;
+
+       if (*link == '/') {
+               path_release(nd);
+               if (!walk_init_root(link, nd))
+                       /* weird __emul_prefix() stuff did it */
+                       goto out;
+       }
+       res = link_path_walk(link, nd);
+out:
+       if (nd->depth || res || nd->last_type!=LAST_NORM)
+               return res;
+       /*
+        * If it is an iterative symlinks resolution in open_namei() we
+        * have to copy the last component. And all that crap because of
+        * bloody create() on broken symlinks. Furrfu...
+        */
+       name = __getname();
+       if (unlikely(!name)) {
+               path_release(nd);
+               return -ENOMEM;
+       }
+       strcpy(name, nd->last.name);
+       nd->last.name = name;
+       return 0;
+fail:
+       path_release(nd);
+       return PTR_ERR(link);
+}
+
 /*
  * This limits recursive symlink follows to 8, while
  * limiting consecutive symlinks to 40.
@@ -419,19 +492,30 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        int err = -ELOOP;
-       if (current->link_count >= 5)
+       if (current->link_count >= MAX_NESTED_LINKS)
                goto loop;
        if (current->total_link_count >= 40)
                goto loop;
+       BUG_ON(nd->depth >= MAX_NESTED_LINKS);
        cond_resched();
        err = security_inode_follow_link(dentry, nd);
        if (err)
                goto loop;
        current->link_count++;
        current->total_link_count++;
+       nd->depth++;
        touch_atime(nd->mnt, dentry);
+       nd_set_link(nd, NULL);
        err = dentry->d_inode->i_op->follow_link(dentry, nd);
+       if (!err) {
+               char *s = nd_get_link(nd);
+               if (s)
+                       err = __vfs_follow_link(nd, s);
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, nd);
+       }
        current->link_count--;
+       nd->depth--;
        return err;
 loop:
        path_release(nd);
@@ -601,7 +685,7 @@ int fastcall link_path_walk(const char * name, struct nameidata *nd)
                goto return_reval;
 
        inode = nd->dentry->d_inode;
-       if (current->link_count)
+       if (nd->depth)
                lookup_flags = LOOKUP_FOLLOW;
 
        /* At this point we know we have a real path component. */
@@ -802,28 +886,31 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
                return 0;               /* something went wrong... */
 
        if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
-               struct nameidata nd_root;
+               struct dentry *old_dentry = nd->dentry;
+               struct vfsmount *old_mnt = nd->mnt;
+               struct qstr last = nd->last;
+               int last_type = nd->last_type;
                /*
                 * NAME was not found in alternate root or it's a directory.  Try to find
                 * it in the normal root:
                 */
-               nd_root.last_type = LAST_ROOT;
-               nd_root.flags = nd->flags;
-               memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent));
+               nd->last_type = LAST_ROOT;
                read_lock(&current->fs->lock);
-               nd_root.mnt = mntget(current->fs->rootmnt);
-               nd_root.dentry = dget(current->fs->root);
+               nd->mnt = mntget(current->fs->rootmnt);
+               nd->dentry = dget(current->fs->root);
                read_unlock(&current->fs->lock);
-               if (path_walk(name, &nd_root))
-                       return 1;
-               if (nd_root.dentry->d_inode) {
+               if (path_walk(name, nd) == 0) {
+                       if (nd->dentry->d_inode) {
+                               dput(old_dentry);
+                               mntput(old_mnt);
+                               return 1;
+                       }
                        path_release(nd);
-                       nd->dentry = nd_root.dentry;
-                       nd->mnt = nd_root.mnt;
-                       nd->last = nd_root.last;
-                       return 1;
                }
-               path_release(&nd_root);
+               nd->dentry = old_dentry;
+               nd->mnt = old_mnt;
+               nd->last = last;
+               nd->last_type = last_type;
        }
        return 1;
 }
@@ -856,31 +943,13 @@ set_it:
        }
 }
 
-/* SMP-safe */
-static inline int
-walk_init_root(const char *name, struct nameidata *nd)
-{
-       read_lock(&current->fs->lock);
-       if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
-               nd->mnt = mntget(current->fs->altrootmnt);
-               nd->dentry = dget(current->fs->altroot);
-               read_unlock(&current->fs->lock);
-               if (__emul_lookup_dentry(name,nd))
-                       return 0;
-               read_lock(&current->fs->lock);
-       }
-       nd->mnt = mntget(current->fs->rootmnt);
-       nd->dentry = dget(current->fs->root);
-       read_unlock(&current->fs->lock);
-       return 1;
-}
-
 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
 {
        int retval;
 
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
+       nd->depth = 0;
 
        read_lock(&current->fs->lock);
        if (*name=='/') {
@@ -894,8 +963,7 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata
                }
                nd->mnt = mntget(current->fs->rootmnt);
                nd->dentry = dget(current->fs->root);
-       }
-       else{
+       } else {
                nd->mnt = mntget(current->fs->pwdmnt);
                nd->dentry = dget(current->fs->pwd);
        }
@@ -1045,8 +1113,12 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
 static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 {
        int error;
-       if (!victim->d_inode || victim->d_parent->d_inode != dir)
+
+       if (!victim->d_inode)
                return -ENOENT;
+
+       BUG_ON(victim->d_parent->d_inode != dir);
+
        error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
        if (error)
                return error;
@@ -1220,6 +1292,11 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                        return -EPERM;
        }
 
+       /* O_NOATIME can only be set by the owner or superuser */
+       if (flag & O_NOATIME)
+               if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+                       return -EPERM;
+
        /*
         * Ensure there are no outstanding leases on the file.
         */
@@ -1394,7 +1471,15 @@ do_link:
        if (error)
                goto exit_dput;
        touch_atime(nd->mnt, dentry);
+       nd_set_link(nd, NULL);
        error = dentry->d_inode->i_op->follow_link(dentry, nd);
+       if (!error) {
+               char *s = nd_get_link(nd);
+               if (s)
+                       error = __vfs_follow_link(nd, s);
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, nd);
+       }
        dput(dentry);
        if (error)
                return error;
@@ -1597,7 +1682,7 @@ out:
  * if it cannot handle the case of removing a directory
  * that is still in use by something else..
  */
-static void d_unhash(struct dentry *dentry)
+void dentry_unhash(struct dentry *dentry)
 {
        dget(dentry);
        spin_lock(&dcache_lock);
@@ -1627,7 +1712,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
        DQUOT_INIT(dir);
 
        down(&dentry->d_inode->i_sem);
-       d_unhash(dentry);
+       dentry_unhash(dentry);
        if (d_mountpoint(dentry))
                error = -EBUSY;
        else {
@@ -1758,13 +1843,12 @@ asmlinkage long sys_unlink(const char __user * pathname)
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
+       if (inode)
+               iput(inode);    /* truncate the inode here */
 exit1:
        path_release(&nd);
 exit:
        putname(name);
-
-       if (inode)
-               iput(inode);    /* truncate the inode here */
        return error;
 
 slashes:
@@ -1773,7 +1857,7 @@ slashes:
        goto exit2;
 }
 
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
 {
        int error = may_create(dir, dentry, NULL);
 
@@ -1817,7 +1901,7 @@ asmlinkage long sys_symlink(const char __user * oldname, const char __user * new
                dentry = lookup_create(&nd, 0);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
-                       error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+                       error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
@@ -1970,7 +2054,7 @@ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
        target = new_dentry->d_inode;
        if (target) {
                down(&target->i_sem);
-               d_unhash(new_dentry);
+               dentry_unhash(new_dentry);
        }
        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
                error = -EBUSY;
@@ -2175,40 +2259,23 @@ out:
        return len;
 }
 
-static inline int
-__vfs_follow_link(struct nameidata *nd, const char *link)
+/*
+ * A helper for ->readlink().  This should be used *ONLY* for symlinks that
+ * have ->follow_link() touching nd only in nd_set_link().  Using (or not
+ * using) it for any given inode is up to filesystem.
+ */
+int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
-       int res = 0;
-       char *name;
-       if (IS_ERR(link))
-               goto fail;
-
-       if (*link == '/') {
-               path_release(nd);
-               if (!walk_init_root(link, nd))
-                       /* weird __emul_prefix() stuff did it */
-                       goto out;
-       }
-       res = link_path_walk(link, nd);
-out:
-       if (current->link_count || res || nd->last_type!=LAST_NORM)
-               return res;
-       /*
-        * If it is an iterative symlinks resolution in open_namei() we
-        * have to copy the last component. And all that crap because of
-        * bloody create() on broken symlinks. Furrfu...
-        */
-       name = __getname();
-       if (unlikely(!name)) {
-               path_release(nd);
-               return -ENOMEM;
+       struct nameidata nd;
+       int res;
+       nd.depth = 0;
+       res = dentry->d_inode->i_op->follow_link(dentry, &nd);
+       if (!res) {
+               res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, &nd);
        }
-       strcpy(name, nd->last.name);
-       nd->last.name = name;
-       return 0;
-fail:
-       path_release(nd);
-       return PTR_ERR(link);
+       return res;
 }
 
 int vfs_follow_link(struct nameidata *nd, const char *link)
@@ -2251,6 +2318,26 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
        return res;
 }
 
+int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
+{
+       struct page *page;
+       nd_set_link(nd, page_getlink(dentry, &page));
+       return 0;
+}
+
+void page_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+       if (!IS_ERR(nd_get_link(nd))) {
+               struct page *page;
+               page = find_get_page(dentry->d_inode->i_mapping, 0);
+               if (!page)
+                       BUG();
+               kunmap(page);
+               page_cache_release(page);
+               page_cache_release(page);
+       }
+}
+
 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct page *page = NULL;
@@ -2305,8 +2392,9 @@ fail:
 }
 
 struct inode_operations page_symlink_inode_operations = {
-       .readlink       = page_readlink,
-       .follow_link    = page_follow_link,
+       .readlink       = generic_readlink,
+       .follow_link    = page_follow_link_light,
+       .put_link       = page_put_link,
 };
 
 EXPORT_SYMBOL(__user_walk);
@@ -2319,6 +2407,8 @@ EXPORT_SYMBOL(lookup_create);
 EXPORT_SYMBOL(lookup_hash);
 EXPORT_SYMBOL(lookup_one_len);
 EXPORT_SYMBOL(page_follow_link);
+EXPORT_SYMBOL(page_follow_link_light);
+EXPORT_SYMBOL(page_put_link);
 EXPORT_SYMBOL(page_readlink);
 EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
@@ -2338,3 +2428,5 @@ EXPORT_SYMBOL(vfs_rename);
 EXPORT_SYMBOL(vfs_rmdir);
 EXPORT_SYMBOL(vfs_symlink);
 EXPORT_SYMBOL(vfs_unlink);
+EXPORT_SYMBOL(dentry_unhash);
+EXPORT_SYMBOL(generic_readlink);