- fix DEBUGP() macro; newer gcc versions refuse to concatenate __FILE__
[linux-2.6.git] / fs / namei.c
index c2d43bb..656430d 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/security.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/vs_base.h>
+
 #include <asm/namei.h>
 #include <asm/uaccess.h>
 
@@ -163,7 +165,8 @@ int vfs_permission(struct inode * inode, int mask)
 {
        umode_t                 mode = inode->i_mode;
 
-       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN|VX_WATCH))
+       /* Prevent vservers from escaping chroot() barriers */
+       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
                return -EACCES;
 
        if (mask & MAY_WRITE) {
@@ -211,25 +214,19 @@ int vfs_permission(struct inode * inode, int mask)
        return -EACCES;
 }
 
-static inline int xid_permission(struct inode *inode)
-{
-       if (inode->i_xid == 0)
-               return 0;
-       if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
-               return 0;
-       return -EACCES;
-}
-
 int permission(struct inode * inode,int mask, struct nameidata *nd)
 {
        int retval;
        int submask;
+       umode_t mode = inode->i_mode;
 
        /* Ordinary permission routines do not understand MAY_APPEND. */
        submask = mask & ~MAY_APPEND;
 
-       if ((retval = xid_permission(inode)))
-               return retval;
+       if (nd && (mask & MAY_WRITE) && MNT_IS_RDONLY(nd->mnt) &&
+               (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+               return -EROFS;
+
        if (inode->i_op && inode->i_op->permission)
                retval = inode->i_op->permission(inode, submask, nd);
        else
@@ -292,6 +289,16 @@ void path_release(struct nameidata *nd)
        mntput(nd->mnt);
 }
 
+/*
+ * umount() mustn't call path_release()/mntput() as that would clear
+ * mnt_expiry_mark
+ */
+void path_release_on_umount(struct nameidata *nd)
+{
+       dput(nd->dentry);
+       _mntput(nd->mnt);
+}
+
 /*
  * Internal lookup() using the new generic dcache.
  * SMP-safe
@@ -330,7 +337,7 @@ static inline int exec_permission_lite(struct inode *inode,
 {
        umode_t mode = inode->i_mode;
 
-       if ((inode->i_op && inode->i_op->permission))
+       if (inode->i_op && inode->i_op->permission)
                return -EAGAIN;
 
        if (current->fsuid == inode->i_uid)
@@ -344,6 +351,9 @@ static inline int exec_permission_lite(struct inode *inode,
        if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
                goto ok;
 
+       if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
+               goto ok;
+
        if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
                goto ok;
 
@@ -409,20 +419,61 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
        return result;
 }
 
-inline void nd_set_link(struct nameidata *nd, char *path)
-{
-       nd->saved_names[current->link_count] = path;
-}
+static int __emul_lookup_dentry(const char *, struct nameidata *);
 
-inline char *nd_get_link(struct nameidata *nd)
+/* SMP-safe */
+static inline int
+walk_init_root(const char *name, struct nameidata *nd)
 {
-       return nd->saved_names[current->link_count];
+       read_lock(&current->fs->lock);
+       if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+               nd->mnt = mntget(current->fs->altrootmnt);
+               nd->dentry = dget(current->fs->altroot);
+               read_unlock(&current->fs->lock);
+               if (__emul_lookup_dentry(name,nd))
+                       return 0;
+               read_lock(&current->fs->lock);
+       }
+       nd->mnt = mntget(current->fs->rootmnt);
+       nd->dentry = dget(current->fs->root);
+       read_unlock(&current->fs->lock);
+       return 1;
 }
 
-EXPORT_SYMBOL(nd_set_link);
-EXPORT_SYMBOL(nd_get_link);
+static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
+{
+       int res = 0;
+       char *name;
+       if (IS_ERR(link))
+               goto fail;
 
-static inline int __vfs_follow_link(struct nameidata *, const char *);
+       if (*link == '/') {
+               path_release(nd);
+               if (!walk_init_root(link, nd))
+                       /* weird __emul_prefix() stuff did it */
+                       goto out;
+       }
+       res = link_path_walk(link, nd);
+out:
+       if (nd->depth || res || nd->last_type!=LAST_NORM)
+               return res;
+       /*
+        * If it is an iterative symlinks resolution in open_namei() we
+        * have to copy the last component. And all that crap because of
+        * bloody create() on broken symlinks. Furrfu...
+        */
+       name = __getname();
+       if (unlikely(!name)) {
+               path_release(nd);
+               return -ENOMEM;
+       }
+       strcpy(name, nd->last.name);
+       nd->last.name = name;
+       return 0;
+fail:
+       path_release(nd);
+       return PTR_ERR(link);
+}
 
 /*
  * This limits recursive symlink follows to 8, while
@@ -438,12 +489,14 @@ static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
                goto loop;
        if (current->total_link_count >= 40)
                goto loop;
+       BUG_ON(nd->depth >= MAX_NESTED_LINKS);
        cond_resched();
        err = security_inode_follow_link(dentry, nd);
        if (err)
                goto loop;
        current->link_count++;
        current->total_link_count++;
+       nd->depth++;
        touch_atime(nd->mnt, dentry);
        nd_set_link(nd, NULL);
        err = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -455,6 +508,7 @@ static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
                        dentry->d_inode->i_op->put_link(dentry, nd);
        }
        current->link_count--;
+       nd->depth--;
        return err;
 loop:
        path_release(nd);
@@ -626,7 +680,7 @@ int fastcall link_path_walk(const char * name, struct nameidata *nd)
                goto return_reval;
 
        inode = nd->dentry->d_inode;
-       if (current->link_count)
+       if (nd->depth)
                lookup_flags = LOOKUP_FOLLOW;
 
        /* At this point we know we have a real path component. */
@@ -833,28 +887,31 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
                return 0;               /* something went wrong... */
 
        if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
-               struct nameidata nd_root;
+               struct dentry *old_dentry = nd->dentry;
+               struct vfsmount *old_mnt = nd->mnt;
+               struct qstr last = nd->last;
+               int last_type = nd->last_type;
                /*
                 * NAME was not found in alternate root or it's a directory.  Try to find
                 * it in the normal root:
                 */
-               nd_root.last_type = LAST_ROOT;
-               nd_root.flags = nd->flags;
-               memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent));
+               nd->last_type = LAST_ROOT;
                read_lock(&current->fs->lock);
-               nd_root.mnt = mntget(current->fs->rootmnt);
-               nd_root.dentry = dget(current->fs->root);
+               nd->mnt = mntget(current->fs->rootmnt);
+               nd->dentry = dget(current->fs->root);
                read_unlock(&current->fs->lock);
-               if (path_walk(name, &nd_root))
-                       return 1;
-               if (nd_root.dentry->d_inode) {
+               if (path_walk(name, nd) == 0) {
+                       if (nd->dentry->d_inode) {
+                               dput(old_dentry);
+                               mntput(old_mnt);
+                               return 1;
+                       }
                        path_release(nd);
-                       nd->dentry = nd_root.dentry;
-                       nd->mnt = nd_root.mnt;
-                       nd->last = nd_root.last;
-                       return 1;
                }
-               path_release(&nd_root);
+               nd->dentry = old_dentry;
+               nd->mnt = old_mnt;
+               nd->last = last;
+               nd->last_type = last_type;
        }
        return 1;
 }
@@ -887,31 +944,13 @@ set_it:
        }
 }
 
-/* SMP-safe */
-static inline int
-walk_init_root(const char *name, struct nameidata *nd)
-{
-       read_lock(&current->fs->lock);
-       if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
-               nd->mnt = mntget(current->fs->altrootmnt);
-               nd->dentry = dget(current->fs->altroot);
-               read_unlock(&current->fs->lock);
-               if (__emul_lookup_dentry(name,nd))
-                       return 0;
-               read_lock(&current->fs->lock);
-       }
-       nd->mnt = mntget(current->fs->rootmnt);
-       nd->dentry = dget(current->fs->root);
-       read_unlock(&current->fs->lock);
-       return 1;
-}
-
 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
 {
        int retval;
 
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags;
+       nd->depth = 0;
 
        read_lock(&current->fs->lock);
        if (*name=='/') {
@@ -925,8 +964,7 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata
                }
                nd->mnt = mntget(current->fs->rootmnt);
                nd->dentry = dget(current->fs->root);
-       }
-       else{
+       } else {
                nd->mnt = mntget(current->fs->pwdmnt);
                nd->dentry = dget(current->fs->pwd);
        }
@@ -1121,6 +1159,24 @@ static inline int may_create(struct inode *dir, struct dentry *child,
        return permission(dir,MAY_WRITE | MAY_EXEC, nd);
 }
 
+static inline int mnt_may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child) {
+       if (child->d_inode)
+               return -EEXIST;
+       if (IS_DEADDIR(dir))
+               return -ENOENT;
+       if (mnt->mnt_flags & MNT_RDONLY)
+               return -EROFS;
+       return 0;
+}
+
+static inline int mnt_may_unlink(struct vfsmount *mnt, struct inode *dir, struct dentry *child) {
+       if (!child->d_inode)
+               return -ENOENT;
+       if (mnt->mnt_flags & MNT_RDONLY)
+               return -EROFS;
+       return 0;
+}
+
 /* 
  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
  * reasons.
@@ -1244,7 +1300,8 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                        return -EACCES;
 
                flag &= ~O_TRUNC;
-       } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
+       } else if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt)))
+               && (flag & FMODE_WRITE))
                return -EROFS;
        /*
         * An append-only file must be opened in append mode for writing.
@@ -1256,6 +1313,11 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                        return -EPERM;
        }
 
+       /* O_NOATIME can only be set by the owner or superuser */
+       if (flag & O_NOATIME)
+               if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+                       return -EPERM;
+
        /*
         * Ensure there are no outstanding leases on the file.
         */
@@ -1477,23 +1539,28 @@ do_link:
 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
        struct dentry *dentry;
+       int error;
 
        down(&nd->dentry->d_inode->i_sem);
-       dentry = ERR_PTR(-EEXIST);
+       error = -EEXIST;
        if (nd->last_type != LAST_NORM)
-               goto fail;
+               goto out;
        nd->flags &= ~LOOKUP_PARENT;
        dentry = lookup_hash(&nd->last, nd->dentry);
        if (IS_ERR(dentry))
+               goto ret;
+       error = mnt_may_create(nd->mnt, nd->dentry->d_inode, dentry);
+       if (error)
                goto fail;
+       error = -ENOENT;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-               goto enoent;
+               goto fail;
+ret:
        return dentry;
-enoent:
-       dput(dentry);
-       dentry = ERR_PTR(-ENOENT);
 fail:
-       return dentry;
+       dput(dentry);
+out:
+       return ERR_PTR(error);
 }
 
 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
@@ -1722,7 +1789,11 @@ asmlinkage long sys_rmdir(const char __user * pathname)
        dentry = lookup_hash(&nd.last, nd.dentry);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
+               error = mnt_may_unlink(nd.mnt, nd.dentry->d_inode, dentry);
+               if (error)
+                       goto exit2;
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
+       exit2:
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
@@ -1794,6 +1865,9 @@ asmlinkage long sys_unlink(const char __user * pathname)
                /* Why not before? Because we want correct error value */
                if (nd.last.name[nd.last.len])
                        goto slashes;
+               error = mnt_may_unlink(nd.mnt, nd.dentry->d_inode, dentry);
+               if (error)
+                       goto exit2;
                inode = dentry->d_inode;
                if (inode)
                        atomic_inc(&inode->i_count);
@@ -1939,8 +2013,13 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam
        error = path_lookup(to, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
-       error = -EXDEV;
-       if (old_nd.mnt != nd.mnt)
+       /*
+        * We allow hard-links to be created to a bind-mount as long
+        * as the bind-mount is not read-only.  Checking for cross-dev
+        * links is subsumed by the superblock check in vfs_link().
+        */
+       error = -EROFS;
+       if (MNT_IS_RDONLY(old_nd.mnt))
                goto out_release;
        new_dentry = lookup_create(&nd, 0);
        error = PTR_ERR(new_dentry);
@@ -2158,6 +2237,9 @@ static inline int do_rename(const char * oldname, const char * newname)
        error = -EINVAL;
        if (old_dentry == trap)
                goto exit4;
+       error = -EROFS;
+       if (MNT_IS_RDONLY(newnd.mnt))
+               goto exit4;
        new_dentry = lookup_hash(&newnd.last, new_dir);
        error = PTR_ERR(new_dentry);
        if (IS_ERR(new_dentry))
@@ -2227,7 +2309,9 @@ out:
 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
        struct nameidata nd;
-       int res = dentry->d_inode->i_op->follow_link(dentry, &nd);
+       int res;
+       nd.depth = 0;
+       res = dentry->d_inode->i_op->follow_link(dentry, &nd);
        if (!res) {
                res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
                if (dentry->d_inode->i_op->put_link)
@@ -2236,42 +2320,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
        return res;
 }
 
-static inline int
-__vfs_follow_link(struct nameidata *nd, const char *link)
-{
-       int res = 0;
-       char *name;
-       if (IS_ERR(link))
-               goto fail;
-
-       if (*link == '/') {
-               path_release(nd);
-               if (!walk_init_root(link, nd))
-                       /* weird __emul_prefix() stuff did it */
-                       goto out;
-       }
-       res = link_path_walk(link, nd);
-out:
-       if (current->link_count || res || nd->last_type!=LAST_NORM)
-               return res;
-       /*
-        * If it is an iterative symlinks resolution in open_namei() we
-        * have to copy the last component. And all that crap because of
-        * bloody create() on broken symlinks. Furrfu...
-        */
-       name = __getname();
-       if (unlikely(!name)) {
-               path_release(nd);
-               return -ENOMEM;
-       }
-       strcpy(name, nd->last.name);
-       nd->last.name = name;
-       return 0;
-fail:
-       path_release(nd);
-       return PTR_ERR(link);
-}
-
 int vfs_follow_link(struct nameidata *nd, const char *link)
 {
        return __vfs_follow_link(nd, link);
@@ -2315,12 +2363,8 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 int page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
 {
        struct page *page;
-       char *s = page_getlink(dentry, &page);
-       if (!IS_ERR(s)) {
-               nd_set_link(nd, s);
-               s = NULL;
-       }
-       return PTR_ERR(s);
+       nd_set_link(nd, page_getlink(dentry, &page));
+       return 0;
 }
 
 void page_put_link(struct dentry *dentry, struct nameidata *nd)