X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fnamei.c;h=bef7d99fb799b8affb9c1b947180d85488329422;hb=6c6294c4656749c1eeed12df7ae48e2bf5a394b3;hp=763e9584024112354e1027b816bdff1f9b807e81;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/fs/namei.c b/fs/namei.c index 763e95840..bef7d99fb 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -25,9 +25,12 @@ #include #include #include +#include #include #include -#include +#include +#include +#include #include #include @@ -117,13 +120,14 @@ static inline int do_getname(const char __user *filename, char *page) int retval; unsigned long len = PATH_MAX; - if ((unsigned long) filename >= TASK_SIZE) { - if (!segment_eq(get_fs(), KERNEL_DS)) + if (!segment_eq(get_fs(), KERNEL_DS)) { + if ((unsigned long) filename >= TASK_SIZE) return -EFAULT; - } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX) - len = TASK_SIZE - (unsigned long) filename; + if (TASK_SIZE - (unsigned long) filename < PATH_MAX) + len = TASK_SIZE - (unsigned long) filename; + } - retval = strncpy_from_user((char *)page, filename, len); + retval = strncpy_from_user(page, filename, len); if (retval > 0) { if (retval < len) return 0; @@ -153,40 +157,36 @@ char * getname(const char __user * filename) return result; } -/* - * vfs_permission() +/** + * generic_permission - check for access rights on a Posix-like filesystem + * @inode: inode to check access rights for + * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * @check_acl: optional callback to check for Posix ACLs * - * is used to check for read/write/execute permissions on a file. + * Used to check for read/write/execute permissions on a file. * We use "fsuid" for this, letting us set arbitrary permissions * for filesystem access without changing the "normal" uids which * are used for other things.. */ -int vfs_permission(struct inode * inode, int mask) +int generic_permission(struct inode *inode, int mask, + int (*check_acl)(struct inode *inode, int mask)) { umode_t mode = inode->i_mode; - if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN|VX_WATCH)) - return -EACCES; - - if (mask & MAY_WRITE) { - /* - * Nobody gets write access to a read-only fs. - */ - if (IS_RDONLY(inode) && - (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) - return -EROFS; - - /* - * Nobody gets write access to an immutable file. - */ - if (IS_IMMUTABLE(inode)) - return -EACCES; - } - if (current->fsuid == inode->i_uid) mode >>= 6; - else if (in_group_p(inode->i_gid)) - mode >>= 3; + else { + if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { + int error = check_acl(inode, mask); + if (error == -EACCES) + goto check_capabilities; + else if (error != -EAGAIN) + return error; + } + + if (in_group_p(inode->i_gid)) + mode >>= 3; + } /* * If the DACs are ok we don't need any capability check. @@ -194,6 +194,7 @@ int vfs_permission(struct inode * inode, int mask) if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) return 0; + check_capabilities: /* * Read/write DACs are always overridable. * Executable DACs are overridable if at least one exec bit is set. @@ -213,29 +214,54 @@ int vfs_permission(struct inode * inode, int mask) return -EACCES; } -static inline int xid_permission(struct inode *inode) +static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd) { + if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) { + vxwprintk(1, "xid=%d did hit the barrier.", + vx_current_xid()); + return -EACCES; + } if (inode->i_xid == 0) return 0; if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT)) return 0; + + vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_xid, inode->i_ino, + vxd_path(nd->dentry, nd->mnt)); return -EACCES; } -int permission(struct inode * inode,int mask, struct nameidata *nd) +int permission(struct inode *inode, int mask, struct nameidata *nd) { - int retval; - int submask; + int retval, submask; + + if (mask & MAY_WRITE) { + umode_t mode = inode->i_mode; + + /* + * Nobody gets write access to a read-only fs. + */ + if (IS_RDONLY(inode) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + return -EROFS; + + /* + * Nobody gets write access to an immutable file. + */ + if (IS_IMMUTABLE(inode)) + return -EACCES; + } + /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; - - if ((retval = xid_permission(inode))) + if ((retval = xid_permission(inode, mask, nd))) return retval; if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); else - retval = vfs_permission(inode, submask); + retval = generic_permission(inode, submask, NULL); if (retval) return retval; @@ -294,6 +320,16 @@ void path_release(struct nameidata *nd) mntput(nd->mnt); } +/* + * umount() mustn't call path_release()/mntput() as that would clear + * mnt_expiry_mark + */ +void path_release_on_umount(struct nameidata *nd) +{ + dput(nd->dentry); + _mntput(nd->mnt); +} + /* * Internal lookup() using the new generic dcache. * SMP-safe @@ -320,7 +356,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, /* * Short-cut version of permission(), for calling by * path_walk(), when dcache lock is held. Combines parts - * of permission() and vfs_permission(), and tests ONLY for + * of permission() and generic_permission(), and tests ONLY for * MAY_EXEC permission. * * If appropriate, check DAC only. If not appropriate, or @@ -332,7 +368,7 @@ static inline int exec_permission_lite(struct inode *inode, { umode_t mode = inode->i_mode; - if ((inode->i_op && inode->i_op->permission)) + if (inode->i_op && inode->i_op->permission) return -EAGAIN; if (current->fsuid == inode->i_uid) @@ -346,6 +382,9 @@ static inline int exec_permission_lite(struct inode *inode, if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) goto ok; + if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) + goto ok; + if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) goto ok; @@ -411,6 +450,80 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s return result; } +static int __emul_lookup_dentry(const char *, struct nameidata *); + +/* SMP-safe */ +static inline int +walk_init_root(const char *name, struct nameidata *nd) +{ + read_lock(¤t->fs->lock); + if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { + nd->mnt = mntget(current->fs->altrootmnt); + nd->dentry = dget(current->fs->altroot); + read_unlock(¤t->fs->lock); + if (__emul_lookup_dentry(name,nd)) + return 0; + read_lock(¤t->fs->lock); + } + nd->mnt = mntget(current->fs->rootmnt); + nd->dentry = dget(current->fs->root); + read_unlock(¤t->fs->lock); + return 1; +} + +static inline int __vfs_follow_link(struct nameidata *nd, const char *link) +{ + int res = 0; + char *name; + if (IS_ERR(link)) + goto fail; + + if (*link == '/') { + path_release(nd); + if (!walk_init_root(link, nd)) + /* weird __emul_prefix() stuff did it */ + goto out; + } + res = link_path_walk(link, nd); +out: + if (nd->depth || res || nd->last_type!=LAST_NORM) + return res; + /* + * If it is an iterative symlinks resolution in open_namei() we + * have to copy the last component. And all that crap because of + * bloody create() on broken symlinks. Furrfu... + */ + name = __getname(); + if (unlikely(!name)) { + path_release(nd); + return -ENOMEM; + } + strcpy(name, nd->last.name); + nd->last.name = name; + return 0; +fail: + path_release(nd); + return PTR_ERR(link); +} + +static inline int __do_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + int error; + + touch_atime(nd->mnt, dentry); + nd_set_link(nd, NULL); + error = dentry->d_inode->i_op->follow_link(dentry, nd); + if (!error) { + char *s = nd_get_link(nd); + if (s) + error = __vfs_follow_link(nd, s); + if (dentry->d_inode->i_op->put_link) + dentry->d_inode->i_op->put_link(dentry, nd); + } + + return error; +} + /* * This limits recursive symlink follows to 8, while * limiting consecutive symlinks to 40. @@ -421,19 +534,21 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) { int err = -ELOOP; - if (current->link_count >= 5) + if (current->link_count >= MAX_NESTED_LINKS) goto loop; if (current->total_link_count >= 40) goto loop; + BUG_ON(nd->depth >= MAX_NESTED_LINKS); cond_resched(); err = security_inode_follow_link(dentry, nd); if (err) goto loop; current->link_count++; current->total_link_count++; - touch_atime(nd->mnt, dentry); - err = dentry->d_inode->i_op->follow_link(dentry, nd); + nd->depth++; + err = __do_follow_link(dentry, nd); current->link_count--; + nd->depth--; return err; loop: path_release(nd); @@ -512,7 +627,7 @@ static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry) if (*dentry == current->fs->root && *mnt == current->fs->rootmnt) { read_unlock(¤t->fs->lock); - break; + return; } read_unlock(¤t->fs->lock); spin_lock(&dcache_lock); @@ -554,15 +669,33 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); + struct inode *inode; if (!dentry) goto need_lookup; if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; + inode = dentry->d_inode; + if (!inode) + goto done; + if (!vx_check(inode->i_xid, VX_WATCH|VX_ADMIN|VX_HOSTID|VX_IDENT)) + goto hidden; + if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) { + struct proc_dir_entry *de = PDE(inode); + + if (de && !vx_hide_check(0, de->vx_flags)) + goto hidden; + } done: path->mnt = mnt; path->dentry = dentry; return 0; +hidden: + vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_xid, inode->i_ino, + vxd_path(dentry, mnt)); + dput(dentry); + return -ENOENT; need_lookup: dentry = real_lookup(nd->dentry, name, nd); @@ -603,7 +736,7 @@ int fastcall link_path_walk(const char * name, struct nameidata *nd) goto return_reval; inode = nd->dentry->d_inode; - if (current->link_count) + if (nd->depth) lookup_flags = LOOKUP_FOLLOW; /* At this point we know we have a real path component. */ @@ -804,28 +937,31 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) return 0; /* something went wrong... */ if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { - struct nameidata nd_root; + struct dentry *old_dentry = nd->dentry; + struct vfsmount *old_mnt = nd->mnt; + struct qstr last = nd->last; + int last_type = nd->last_type; /* * NAME was not found in alternate root or it's a directory. Try to find * it in the normal root: */ - nd_root.last_type = LAST_ROOT; - nd_root.flags = nd->flags; - memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent)); + nd->last_type = LAST_ROOT; read_lock(¤t->fs->lock); - nd_root.mnt = mntget(current->fs->rootmnt); - nd_root.dentry = dget(current->fs->root); + nd->mnt = mntget(current->fs->rootmnt); + nd->dentry = dget(current->fs->root); read_unlock(¤t->fs->lock); - if (path_walk(name, &nd_root)) - return 1; - if (nd_root.dentry->d_inode) { + if (path_walk(name, nd) == 0) { + if (nd->dentry->d_inode) { + dput(old_dentry); + mntput(old_mnt); + return 1; + } path_release(nd); - nd->dentry = nd_root.dentry; - nd->mnt = nd_root.mnt; - nd->last = nd_root.last; - return 1; } - path_release(&nd_root); + nd->dentry = old_dentry; + nd->mnt = old_mnt; + nd->last = last; + nd->last_type = last_type; } return 1; } @@ -858,31 +994,13 @@ set_it: } } -/* SMP-safe */ -static inline int -walk_init_root(const char *name, struct nameidata *nd) -{ - read_lock(¤t->fs->lock); - if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { - nd->mnt = mntget(current->fs->altrootmnt); - nd->dentry = dget(current->fs->altroot); - read_unlock(¤t->fs->lock); - if (__emul_lookup_dentry(name,nd)) - return 0; - read_lock(¤t->fs->lock); - } - nd->mnt = mntget(current->fs->rootmnt); - nd->dentry = dget(current->fs->root); - read_unlock(¤t->fs->lock); - return 1; -} - int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) { int retval; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; + nd->depth = 0; read_lock(¤t->fs->lock); if (*name=='/') { @@ -896,8 +1014,7 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata } nd->mnt = mntget(current->fs->rootmnt); nd->dentry = dget(current->fs->root); - } - else{ + } else { nd->mnt = mntget(current->fs->pwdmnt); nd->dentry = dget(current->fs->pwd); } @@ -1047,8 +1164,12 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) { int error; - if (!victim->d_inode || victim->d_parent->d_inode != dir) + + if (!victim->d_inode) return -ENOENT; + + BUG_ON(victim->d_parent->d_inode != dir); + error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); if (error) return error; @@ -1222,6 +1343,11 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) return -EPERM; } + /* O_NOATIME can only be set by the owner or superuser */ + if (flag & O_NOATIME) + if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) + return -EPERM; + /* * Ensure there are no outstanding leases on the file. */ @@ -1395,8 +1521,7 @@ do_link: error = security_inode_follow_link(dentry, nd); if (error) goto exit_dput; - touch_atime(nd->mnt, dentry); - error = dentry->d_inode->i_op->follow_link(dentry, nd); + error = __do_follow_link(dentry, nd); dput(dentry); if (error) return error; @@ -1599,7 +1724,7 @@ out: * if it cannot handle the case of removing a directory * that is still in use by something else.. */ -static void d_unhash(struct dentry *dentry) +void dentry_unhash(struct dentry *dentry) { dget(dentry); spin_lock(&dcache_lock); @@ -1629,7 +1754,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) DQUOT_INIT(dir); down(&dentry->d_inode->i_sem); - d_unhash(dentry); + dentry_unhash(dentry); if (d_mountpoint(dentry)) error = -EBUSY; else { @@ -1760,13 +1885,12 @@ asmlinkage long sys_unlink(const char __user * pathname) dput(dentry); } up(&nd.dentry->d_inode->i_sem); + if (inode) + iput(inode); /* truncate the inode here */ exit1: path_release(&nd); exit: putname(name); - - if (inode) - iput(inode); /* truncate the inode here */ return error; slashes: @@ -1972,7 +2096,7 @@ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, target = new_dentry->d_inode; if (target) { down(&target->i_sem); - d_unhash(new_dentry); + dentry_unhash(new_dentry); } if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) error = -EBUSY; @@ -2177,40 +2301,23 @@ out: return len; } -static inline int -__vfs_follow_link(struct nameidata *nd, const char *link) +/* + * A helper for ->readlink(). This should be used *ONLY* for symlinks that + * have ->follow_link() touching nd only in nd_set_link(). Using (or not + * using) it for any given inode is up to filesystem. + */ +int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } - res = link_path_walk(link, nd); -out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; - /* - * If it is an iterative symlinks resolution in open_namei() we - * have to copy the last component. And all that crap because of - * bloody create() on broken symlinks. Furrfu... - */ - name = __getname(); - if (unlikely(!name)) { - path_release(nd); - return -ENOMEM; + struct nameidata nd; + int res; + nd.depth = 0; + res = dentry->d_inode->i_op->follow_link(dentry, &nd); + if (!res) { + res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + if (dentry->d_inode->i_op->put_link) + dentry->d_inode->i_op->put_link(dentry, &nd); } - strcpy(name, nd->last.name); - nd->last.name = name; - return 0; -fail: - path_release(nd); - return PTR_ERR(link); + return res; } int vfs_follow_link(struct nameidata *nd, const char *link) @@ -2253,16 +2360,24 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) return res; } -int page_follow_link(struct dentry *dentry, struct nameidata *nd) +int page_follow_link_light(struct dentry *dentry, struct nameidata *nd) { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); - int res = __vfs_follow_link(nd, s); - if (page) { + struct page *page; + nd_set_link(nd, page_getlink(dentry, &page)); + return 0; +} + +void page_put_link(struct dentry *dentry, struct nameidata *nd) +{ + if (!IS_ERR(nd_get_link(nd))) { + struct page *page; + page = find_get_page(dentry->d_inode->i_mapping, 0); + if (!page) + BUG(); kunmap(page); page_cache_release(page); + page_cache_release(page); } - return res; } int page_symlink(struct inode *inode, const char *symname, int len) @@ -2307,8 +2422,9 @@ fail: } struct inode_operations page_symlink_inode_operations = { - .readlink = page_readlink, - .follow_link = page_follow_link, + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, }; EXPORT_SYMBOL(__user_walk); @@ -2317,10 +2433,10 @@ EXPORT_SYMBOL(follow_up); EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ EXPORT_SYMBOL(getname); EXPORT_SYMBOL(lock_rename); -EXPORT_SYMBOL(lookup_create); EXPORT_SYMBOL(lookup_hash); EXPORT_SYMBOL(lookup_one_len); -EXPORT_SYMBOL(page_follow_link); +EXPORT_SYMBOL(page_follow_link_light); +EXPORT_SYMBOL(page_put_link); EXPORT_SYMBOL(page_readlink); EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); @@ -2334,9 +2450,11 @@ EXPORT_SYMBOL(vfs_follow_link); EXPORT_SYMBOL(vfs_link); EXPORT_SYMBOL(vfs_mkdir); EXPORT_SYMBOL(vfs_mknod); -EXPORT_SYMBOL(vfs_permission); +EXPORT_SYMBOL(generic_permission); EXPORT_SYMBOL(vfs_readlink); EXPORT_SYMBOL(vfs_rename); EXPORT_SYMBOL(vfs_rmdir); EXPORT_SYMBOL(vfs_symlink); EXPORT_SYMBOL(vfs_unlink); +EXPORT_SYMBOL(dentry_unhash); +EXPORT_SYMBOL(generic_readlink);