This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / fs / namei.c
index 9828b94..44262f3 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/security.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/vs_base.h>
+
 #include <asm/namei.h>
 #include <asm/uaccess.h>
 
@@ -208,6 +210,20 @@ int vfs_permission(struct inode * inode, int mask)
        return -EACCES;
 }
 
+static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+       if (inode->i_xid == 0)
+               return 0;
+       if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
+               return 0;
+/*
+       printk("VSW: xid=%d denied access to %p[#%d,%lu] »%*s«.\n",
+               vx_current_xid(), inode, inode->i_xid, inode->i_ino,
+               nd->dentry->d_name.len, nd->dentry->d_name.name);
+*/
+       return -EACCES;
+}
+
 int permission(struct inode * inode,int mask, struct nameidata *nd)
 {
        int retval;
@@ -216,6 +232,8 @@ int permission(struct inode * inode,int mask, struct nameidata *nd)
        /* Ordinary permission routines do not understand MAY_APPEND. */
        submask = mask & ~MAY_APPEND;
 
+       if ((retval = xid_permission(inode, mask, nd)))
+               return retval;
        if (inode->i_op && inode->i_op->permission)
                retval = inode->i_op->permission(inode, submask, nd);
        else
@@ -278,6 +296,16 @@ void path_release(struct nameidata *nd)
        mntput(nd->mnt);
 }
 
+/*
+ * umount() mustn't call path_release()/mntput() as that would clear
+ * mnt_expiry_mark
+ */
+void path_release_on_umount(struct nameidata *nd)
+{
+       dput(nd->dentry);
+       _mntput(nd->mnt);
+}
+
 /*
  * Internal lookup() using the new generic dcache.
  * SMP-safe
@@ -316,7 +344,7 @@ static inline int exec_permission_lite(struct inode *inode,
 {
        umode_t mode = inode->i_mode;
 
-       if ((inode->i_op && inode->i_op->permission))
+       if (inode->i_op && inode->i_op->permission)
                return -EAGAIN;
 
        if (current->fsuid == inode->i_uid)
@@ -330,6 +358,9 @@ static inline int exec_permission_lite(struct inode *inode,
        if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
                goto ok;
 
+       if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
+               goto ok;
+
        if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
                goto ok;
 
@@ -395,6 +426,62 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
        return result;
 }
 
+static int __emul_lookup_dentry(const char *, struct nameidata *);
+
+/* SMP-safe */
+static inline int
+walk_init_root(const char *name, struct nameidata *nd)
+{
+       read_lock(&current->fs->lock);
+       if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+               nd->mnt = mntget(current->fs->altrootmnt);
+               nd->dentry = dget(current->fs->altroot);
+               read_unlock(&current->fs->lock);
+               if (__emul_lookup_dentry(name,nd))
+                       return 0;
+               read_lock(&current->fs->lock);
+       }
+       nd->mnt = mntget(current->fs->rootmnt);
+       nd->dentry = dget(current->fs->root);
+       read_unlock(&current->fs->lock);
+       return 1;
+}
+
+static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
+{
+       int res = 0;
+       char *name;
+       if (IS_ERR(link))
+               goto fail;
+
+       if (*link == '/') {
+               path_release(nd);
+               if (!walk_init_root(link, nd))
+                       /* weird __emul_prefix() stuff did it */
+                       goto out;
+       }
+       res = link_path_walk(link, nd);
+out:
+       if (nd->depth || res || nd->last_type!=LAST_NORM)
+               return res;
+       /*
+        * If it is an iterative symlinks resolution in open_namei() we
+        * have to copy the last component. And all that crap because of
+        * bloody create() on broken symlinks. Furrfu...
+        */
+       name = __getname();
+       if (unlikely(!name)) {
+               path_release(nd);
+               return -ENOMEM;
+       }
+       strcpy(name, nd->last.name);
+       nd->last.name = name;
+       return 0;
+fail:
+       path_release(nd);
+       return PTR_ERR(link);
+}
+
 /*
  * This limits recursive symlink follows to 8, while
  * limiting consecutive symlinks to 40.
@@ -405,19 +492,30 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        int err = -ELOOP;
-       if (current->link_count >= 5)
+       if (current->link_count >= MAX_NESTED_LINKS)
                goto loop;
        if (current->total_link_count >= 40)
                goto loop;
+       BUG_ON(nd->depth >= MAX_NESTED_LINKS);
        cond_resched();
        err = security_inode_follow_link(dentry, nd);
        if (err)
                goto loop;
        current->link_count++;
        current->total_link_count++;
+       nd->depth++;
        touch_atime(nd->mnt, dentry);
+       nd_set_link(nd, NULL);
        err = dentry->d_inode->i_op->follow_link(dentry, nd);
+       if (!err) {
+               char *s = nd_get_link(nd);
+               if (s)
+                       err = __vfs_follow_link(nd, s);
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, nd);
+       }
        current->link_count--;
+       nd->depth--;
        return err;
 loop:
        path_release(nd);
@@ -578,16 +676,18 @@ int fastcall link_path_walk(const char * name, struct nameidata *nd)
 {
        struct path next;
        struct inode *inode;
-       int err;
+       int err, atomic;
        unsigned int lookup_flags = nd->flags;
-       
+
+       atomic = (lookup_flags & LOOKUP_ATOMIC);
+
        while (*name=='/')
                name++;
        if (!*name)
                goto return_reval;
 
        inode = nd->dentry->d_inode;
-       if (current->link_count)
+       if (nd->depth)
                lookup_flags = LOOKUP_FOLLOW;
 
        /* At this point we know we have a real path component. */
@@ -648,6 +748,9 @@ int fastcall link_path_walk(const char * name, struct nameidata *nd)
                        if (err < 0)
                                break;
                }
+               err = -EWOULDBLOCKIO;
+               if (atomic)
+                       break;
                nd->flags |= LOOKUP_CONTINUE;
                /* This does the actual lookups.. */
                err = do_lookup(nd, &this, &next);
@@ -712,6 +815,9 @@ last_component:
                        if (err < 0)
                                break;
                }
+               err = -EWOULDBLOCKIO;
+               if (atomic)
+                       break;
                err = do_lookup(nd, &this, &next);
                if (err)
                        break;
@@ -788,28 +894,31 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
                return 0;               /* something went wrong... */
 
        if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
-               struct nameidata nd_root;
+               struct dentry *old_dentry = nd->dentry;
+               struct vfsmount *old_mnt = nd->mnt;
+               struct qstr last = nd->last;
+               int last_type = nd->last_type;
                /*
                 * NAME was not found in alternate root or it's a directory.  Try to find
                 * it in the normal root:
                 */
-               nd_root.last_type = LAST_ROOT;
-               nd_root.flags = nd->flags;
-               memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent));
+               nd->last_type = LAST_ROOT;
                read_lock(&current->fs->lock);
-               nd_root.mnt = mntget(current->fs->rootmnt);
-               nd_root.dentry = dget(current->fs->root);
+               nd->mnt = mntget(current->fs->rootmnt);
+               nd->dentry = dget(current->fs->root);
                read_unlock(&current->fs->lock);
-               if (path_walk(name, &nd_root))
-                       return 1;
-               if (nd_root.dentry->d_inode) {
+               if (path_walk(name, nd) == 0) {
+                       if (nd->dentry->d_inode) {
+                               dput(old_dentry);
+                               mntput(old_mnt);
+                               return 1;
+                       }
                        path_release(nd);
-                       nd->dentry = nd_root.dentry;
-                       nd->mnt = nd_root.mnt;
-                       nd->last = nd_root.last;
-                       return 1;
                }
-               path_release(&nd_root);
+               nd->dentry = old_dentry;
+               nd->mnt = old_mnt;
+               nd->last = last;
+               nd->last_type = last_type;
        }
        return 1;
 }
@@ -842,31 +951,13 @@ set_it:
        }
 }
 
-/* SMP-safe */
-static inline int
-walk_init_root(const char *name, struct nameidata *nd)
-{
-       read_lock(&current->fs->lock);
-       if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
-               nd->mnt = mntget(current->fs->altrootmnt);
-               nd->dentry = dget(current->fs->altroot);
-               read_unlock(&current->fs->lock);
-               if (__emul_lookup_dentry(name,nd))
-                       return 0;
-               read_lock(&current->fs->lock);
-       }
-       nd->mnt = mntget(current->fs->rootmnt);
-       nd->dentry = dget(current->fs->root);
-       read_unlock(&current->fs->lock);
-       return 1;
-}
-
 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
 {
        int retval;
 
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
+       nd->depth = 0;
 
        read_lock(&current->fs->lock);
        if (*name=='/') {
@@ -880,8 +971,7 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata
                }
                nd->mnt = mntget(current->fs->rootmnt);
                nd->dentry = dget(current->fs->root);
-       }
-       else{
+       } else {
                nd->mnt = mntget(current->fs->pwdmnt);
                nd->dentry = dget(current->fs->pwd);
        }
@@ -1031,15 +1121,18 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
 static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 {
        int error;
-       if (!victim->d_inode || victim->d_parent->d_inode != dir)
+       if (!victim->d_inode)
                return -ENOENT;
+       if (victim->d_parent->d_inode != dir)
+               BUG();
+                       
        error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
        if (error)
                return error;
        if (IS_APPEND(dir))
                return -EPERM;
        if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
-           IS_IMMUTABLE(victim->d_inode))
+               IS_IXORUNLINK(victim->d_inode))
                return -EPERM;
        if (isdir) {
                if (!S_ISDIR(victim->d_inode->i_mode))
@@ -1091,6 +1184,8 @@ static inline int lookup_flags(unsigned int f)
        
        if (f & O_DIRECTORY)
                retval |= LOOKUP_DIRECTORY;
+       if (f & O_ATOMICLOOKUP)
+               retval |= LOOKUP_ATOMIC;
 
        return retval;
 }
@@ -1206,6 +1301,11 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                        return -EPERM;
        }
 
+       /* O_NOATIME can only be set by the owner or superuser */
+       if (flag & O_NOATIME)
+               if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+                       return -EPERM;
+
        /*
         * Ensure there are no outstanding leases on the file.
         */
@@ -1380,7 +1480,15 @@ do_link:
        if (error)
                goto exit_dput;
        touch_atime(nd->mnt, dentry);
+       nd_set_link(nd, NULL);
        error = dentry->d_inode->i_op->follow_link(dentry, nd);
+       if (!error) {
+               char *s = nd_get_link(nd);
+               if (s)
+                       error = __vfs_follow_link(nd, s);
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, nd);
+       }
        dput(dentry);
        if (error)
                return error;
@@ -1833,7 +1941,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        /*
         * A link to an append-only or immutable file cannot be created.
         */
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
                return -EPERM;
        if (!dir->i_op || !dir->i_op->link)
                return -EPERM;
@@ -2161,40 +2269,23 @@ out:
        return len;
 }
 
-static inline int
-__vfs_follow_link(struct nameidata *nd, const char *link)
+/*
+ * A helper for ->readlink().  This should be used *ONLY* for symlinks that
+ * have ->follow_link() touching nd only in nd_set_link().  Using (or not
+ * using) it for any given inode is up to filesystem.
+ */
+int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
-       int res = 0;
-       char *name;
-       if (IS_ERR(link))
-               goto fail;
-
-       if (*link == '/') {
-               path_release(nd);
-               if (!walk_init_root(link, nd))
-                       /* weird __emul_prefix() stuff did it */
-                       goto out;
-       }
-       res = link_path_walk(link, nd);
-out:
-       if (current->link_count || res || nd->last_type!=LAST_NORM)
-               return res;
-       /*
-        * If it is an iterative symlinks resolution in open_namei() we
-        * have to copy the last component. And all that crap because of
-        * bloody create() on broken symlinks. Furrfu...
-        */
-       name = __getname();
-       if (unlikely(!name)) {
-               path_release(nd);
-               return -ENOMEM;
+       struct nameidata nd;
+       int res;
+       nd.depth = 0;
+       res = dentry->d_inode->i_op->follow_link(dentry, &nd);
+       if (!res) {
+               res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, &nd);
        }
-       strcpy(name, nd->last.name);
-       nd->last.name = name;
-       return 0;
-fail:
-       path_release(nd);
-       return PTR_ERR(link);
+       return res;
 }
 
 int vfs_follow_link(struct nameidata *nd, const char *link)
@@ -2237,6 +2328,26 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
        return res;
 }
 
+int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
+{
+       struct page *page;
+       nd_set_link(nd, page_getlink(dentry, &page));
+       return 0;
+}
+
+void page_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+       if (!IS_ERR(nd_get_link(nd))) {
+               struct page *page;
+               page = find_get_page(dentry->d_inode->i_mapping, 0);
+               if (!page)
+                       BUG();
+               kunmap(page);
+               page_cache_release(page);
+               page_cache_release(page);
+       }
+}
+
 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct page *page = NULL;
@@ -2291,8 +2402,9 @@ fail:
 }
 
 struct inode_operations page_symlink_inode_operations = {
-       .readlink       = page_readlink,
-       .follow_link    = page_follow_link,
+       .readlink       = generic_readlink,
+       .follow_link    = page_follow_link_light,
+       .put_link       = page_put_link,
 };
 
 EXPORT_SYMBOL(__user_walk);
@@ -2305,6 +2417,8 @@ EXPORT_SYMBOL(lookup_create);
 EXPORT_SYMBOL(lookup_hash);
 EXPORT_SYMBOL(lookup_one_len);
 EXPORT_SYMBOL(page_follow_link);
+EXPORT_SYMBOL(page_follow_link_light);
+EXPORT_SYMBOL(page_put_link);
 EXPORT_SYMBOL(page_readlink);
 EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
@@ -2324,3 +2438,4 @@ EXPORT_SYMBOL(vfs_rename);
 EXPORT_SYMBOL(vfs_rmdir);
 EXPORT_SYMBOL(vfs_symlink);
 EXPORT_SYMBOL(vfs_unlink);
+EXPORT_SYMBOL(generic_readlink);