VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / fs / namei.c
index 763e958..8a1943d 100644 (file)
@@ -165,9 +165,6 @@ int vfs_permission(struct inode * inode, int mask)
 {
        umode_t                 mode = inode->i_mode;
 
-       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN|VX_WATCH))
-               return -EACCES;
-
        if (mask & MAY_WRITE) {
                /*
                 * Nobody gets write access to a read-only fs.
@@ -213,12 +210,17 @@ int vfs_permission(struct inode * inode, int mask)
        return -EACCES;
 }
 
-static inline int xid_permission(struct inode *inode)
+static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
        if (inode->i_xid == 0)
                return 0;
        if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
                return 0;
+/*
+       printk("VSW: xid=%d denied access to %p[#%d,%lu] »%*s«.\n",
+               vx_current_xid(), inode, inode->i_xid, inode->i_ino,
+               nd->dentry->d_name.len, nd->dentry->d_name.name);
+*/
        return -EACCES;
 }
 
@@ -230,7 +232,7 @@ int permission(struct inode * inode,int mask, struct nameidata *nd)
        /* Ordinary permission routines do not understand MAY_APPEND. */
        submask = mask & ~MAY_APPEND;
 
-       if ((retval = xid_permission(inode)))
+       if ((retval = xid_permission(inode, mask, nd)))
                return retval;
        if (inode->i_op && inode->i_op->permission)
                retval = inode->i_op->permission(inode, submask, nd);
@@ -294,6 +296,16 @@ void path_release(struct nameidata *nd)
        mntput(nd->mnt);
 }
 
+/*
+ * umount() mustn't call path_release()/mntput() as that would clear
+ * mnt_expiry_mark
+ */
+void path_release_on_umount(struct nameidata *nd)
+{
+       dput(nd->dentry);
+       _mntput(nd->mnt);
+}
+
 /*
  * Internal lookup() using the new generic dcache.
  * SMP-safe
@@ -332,7 +344,7 @@ static inline int exec_permission_lite(struct inode *inode,
 {
        umode_t mode = inode->i_mode;
 
-       if ((inode->i_op && inode->i_op->permission))
+       if (inode->i_op && inode->i_op->permission)
                return -EAGAIN;
 
        if (current->fsuid == inode->i_uid)
@@ -346,6 +358,9 @@ static inline int exec_permission_lite(struct inode *inode,
        if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
                goto ok;
 
+       if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
+               goto ok;
+
        if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
                goto ok;
 
@@ -411,6 +426,62 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
        return result;
 }
 
+static int __emul_lookup_dentry(const char *, struct nameidata *);
+
+/* SMP-safe */
+static inline int
+walk_init_root(const char *name, struct nameidata *nd)
+{
+       read_lock(&current->fs->lock);
+       if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+               nd->mnt = mntget(current->fs->altrootmnt);
+               nd->dentry = dget(current->fs->altroot);
+               read_unlock(&current->fs->lock);
+               if (__emul_lookup_dentry(name,nd))
+                       return 0;
+               read_lock(&current->fs->lock);
+       }
+       nd->mnt = mntget(current->fs->rootmnt);
+       nd->dentry = dget(current->fs->root);
+       read_unlock(&current->fs->lock);
+       return 1;
+}
+
+static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
+{
+       int res = 0;
+       char *name;
+       if (IS_ERR(link))
+               goto fail;
+
+       if (*link == '/') {
+               path_release(nd);
+               if (!walk_init_root(link, nd))
+                       /* weird __emul_prefix() stuff did it */
+                       goto out;
+       }
+       res = link_path_walk(link, nd);
+out:
+       if (nd->depth || res || nd->last_type!=LAST_NORM)
+               return res;
+       /*
+        * If it is an iterative symlinks resolution in open_namei() we
+        * have to copy the last component. And all that crap because of
+        * bloody create() on broken symlinks. Furrfu...
+        */
+       name = __getname();
+       if (unlikely(!name)) {
+               path_release(nd);
+               return -ENOMEM;
+       }
+       strcpy(name, nd->last.name);
+       nd->last.name = name;
+       return 0;
+fail:
+       path_release(nd);
+       return PTR_ERR(link);
+}
+
 /*
  * This limits recursive symlink follows to 8, while
  * limiting consecutive symlinks to 40.
@@ -421,19 +492,30 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        int err = -ELOOP;
-       if (current->link_count >= 5)
+       if (current->link_count >= MAX_NESTED_LINKS)
                goto loop;
        if (current->total_link_count >= 40)
                goto loop;
+       BUG_ON(nd->depth >= MAX_NESTED_LINKS);
        cond_resched();
        err = security_inode_follow_link(dentry, nd);
        if (err)
                goto loop;
        current->link_count++;
        current->total_link_count++;
+       nd->depth++;
        touch_atime(nd->mnt, dentry);
+       nd_set_link(nd, NULL);
        err = dentry->d_inode->i_op->follow_link(dentry, nd);
+       if (!err) {
+               char *s = nd_get_link(nd);
+               if (s)
+                       err = __vfs_follow_link(nd, s);
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, nd);
+       }
        current->link_count--;
+       nd->depth--;
        return err;
 loop:
        path_release(nd);
@@ -603,7 +685,7 @@ int fastcall link_path_walk(const char * name, struct nameidata *nd)
                goto return_reval;
 
        inode = nd->dentry->d_inode;
-       if (current->link_count)
+       if (nd->depth)
                lookup_flags = LOOKUP_FOLLOW;
 
        /* At this point we know we have a real path component. */
@@ -811,6 +893,7 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
                 */
                nd_root.last_type = LAST_ROOT;
                nd_root.flags = nd->flags;
+               nd_root.depth = 0;
                memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent));
                read_lock(&current->fs->lock);
                nd_root.mnt = mntget(current->fs->rootmnt);
@@ -858,31 +941,13 @@ set_it:
        }
 }
 
-/* SMP-safe */
-static inline int
-walk_init_root(const char *name, struct nameidata *nd)
-{
-       read_lock(&current->fs->lock);
-       if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
-               nd->mnt = mntget(current->fs->altrootmnt);
-               nd->dentry = dget(current->fs->altroot);
-               read_unlock(&current->fs->lock);
-               if (__emul_lookup_dentry(name,nd))
-                       return 0;
-               read_lock(&current->fs->lock);
-       }
-       nd->mnt = mntget(current->fs->rootmnt);
-       nd->dentry = dget(current->fs->root);
-       read_unlock(&current->fs->lock);
-       return 1;
-}
-
 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
 {
        int retval;
 
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
+       nd->depth = 0;
 
        read_lock(&current->fs->lock);
        if (*name=='/') {
@@ -1222,6 +1287,11 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                        return -EPERM;
        }
 
+       /* O_NOATIME can only be set by the owner or superuser */
+       if (flag & O_NOATIME)
+               if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+                       return -EPERM;
+
        /*
         * Ensure there are no outstanding leases on the file.
         */
@@ -1396,7 +1466,15 @@ do_link:
        if (error)
                goto exit_dput;
        touch_atime(nd->mnt, dentry);
+       nd_set_link(nd, NULL);
        error = dentry->d_inode->i_op->follow_link(dentry, nd);
+       if (!error) {
+               char *s = nd_get_link(nd);
+               if (s)
+                       error = __vfs_follow_link(nd, s);
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, nd);
+       }
        dput(dentry);
        if (error)
                return error;
@@ -2177,40 +2255,23 @@ out:
        return len;
 }
 
-static inline int
-__vfs_follow_link(struct nameidata *nd, const char *link)
+/*
+ * A helper for ->readlink().  This should be used *ONLY* for symlinks that
+ * have ->follow_link() touching nd only in nd_set_link().  Using (or not
+ * using) it for any given inode is up to filesystem.
+ */
+int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
-       int res = 0;
-       char *name;
-       if (IS_ERR(link))
-               goto fail;
-
-       if (*link == '/') {
-               path_release(nd);
-               if (!walk_init_root(link, nd))
-                       /* weird __emul_prefix() stuff did it */
-                       goto out;
-       }
-       res = link_path_walk(link, nd);
-out:
-       if (current->link_count || res || nd->last_type!=LAST_NORM)
-               return res;
-       /*
-        * If it is an iterative symlinks resolution in open_namei() we
-        * have to copy the last component. And all that crap because of
-        * bloody create() on broken symlinks. Furrfu...
-        */
-       name = __getname();
-       if (unlikely(!name)) {
-               path_release(nd);
-               return -ENOMEM;
+       struct nameidata nd;
+       int res;
+       nd.depth = 0;
+       res = dentry->d_inode->i_op->follow_link(dentry, &nd);
+       if (!res) {
+               res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+               if (dentry->d_inode->i_op->put_link)
+                       dentry->d_inode->i_op->put_link(dentry, &nd);
        }
-       strcpy(name, nd->last.name);
-       nd->last.name = name;
-       return 0;
-fail:
-       path_release(nd);
-       return PTR_ERR(link);
+       return res;
 }
 
 int vfs_follow_link(struct nameidata *nd, const char *link)
@@ -2253,6 +2314,30 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
        return res;
 }
 
+int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
+{
+       struct page *page;
+       char *s = page_getlink(dentry, &page);
+       if (!IS_ERR(s)) {
+               nd_set_link(nd, s);
+               s = NULL;
+       }
+       return PTR_ERR(s);
+}
+
+void page_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+       if (!IS_ERR(nd_get_link(nd))) {
+               struct page *page;
+               page = find_get_page(dentry->d_inode->i_mapping, 0);
+               if (!page)
+                       BUG();
+               kunmap(page);
+               page_cache_release(page);
+               page_cache_release(page);
+       }
+}
+
 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct page *page = NULL;
@@ -2307,8 +2392,9 @@ fail:
 }
 
 struct inode_operations page_symlink_inode_operations = {
-       .readlink       = page_readlink,
-       .follow_link    = page_follow_link,
+       .readlink       = generic_readlink,
+       .follow_link    = page_follow_link_light,
+       .put_link       = page_put_link,
 };
 
 EXPORT_SYMBOL(__user_walk);
@@ -2321,6 +2407,8 @@ EXPORT_SYMBOL(lookup_create);
 EXPORT_SYMBOL(lookup_hash);
 EXPORT_SYMBOL(lookup_one_len);
 EXPORT_SYMBOL(page_follow_link);
+EXPORT_SYMBOL(page_follow_link_light);
+EXPORT_SYMBOL(page_put_link);
 EXPORT_SYMBOL(page_readlink);
 EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
@@ -2340,3 +2428,4 @@ EXPORT_SYMBOL(vfs_rename);
 EXPORT_SYMBOL(vfs_rmdir);
 EXPORT_SYMBOL(vfs_symlink);
 EXPORT_SYMBOL(vfs_unlink);
+EXPORT_SYMBOL(generic_readlink);