X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fnamespace.c;h=9b7d73b2be3e59d826ea8d2db8f8f98bb81c88d9;hb=9bf4aaab3e101692164d49b7ca357651eb691cb6;hp=9bd40b9ed65c3530519b3d1f15bdc027f5541130;hpb=db216c3d5e4c040e557a50f8f5d35d5c415e8c1c;p=linux-2.6.git diff --git a/fs/namespace.c b/fs/namespace.c index 9bd40b9ed..9b7d73b2b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -62,6 +62,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); + INIT_LIST_HEAD(&mnt->mnt_fslink); if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -108,13 +109,9 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) EXPORT_SYMBOL(lookup_mnt); -static int check_mnt(struct vfsmount *mnt) +static inline int check_mnt(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); - while (mnt->mnt_parent != mnt) - mnt = mnt->mnt_parent; - spin_unlock(&vfsmount_lock); - return mnt == current->namespace->root; + return mnt->mnt_namespace == current->namespace; } static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) @@ -166,6 +163,14 @@ clone_mnt(struct vfsmount *old, struct dentry *root) mnt->mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; + mnt->mnt_namespace = old->mnt_namespace; + + /* stick the duplicate mount on the same expiry list + * as the original if that was on one */ + spin_lock(&vfsmount_lock); + if (!list_empty(&old->mnt_fslink)) + list_add(&mnt->mnt_fslink, &old->mnt_fslink); + spin_unlock(&vfsmount_lock); } return mnt; } @@ -226,6 +231,7 @@ static int show_vfsmnt(struct seq_file *m, void *v) { MS_MANDLOCK, ",mand" }, { MS_NOATIME, ",noatime" }, { MS_NODIRATIME, ",nodiratime" }, + { MS_TAGXID, ",tagxid" }, { 0, NULL } }; static struct proc_fs_info mnt_info[] = { @@ -343,6 +349,7 @@ static inline void __umount_tree(struct vfsmount *mnt, struct list_head *kill) while (!list_empty(kill)) { mnt = list_entry(kill->next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); + list_del_init(&mnt->mnt_fslink); if (mnt->mnt_parent == mnt) { spin_unlock(&vfsmount_lock); } else { @@ -391,6 +398,24 @@ static int do_umount(struct vfsmount *mnt, int flags) if (retval) return retval; + /* + * Allow userspace to request a mountpoint be expired rather than + * unmounting unconditionally. Unmount only happens if: + * (1) the mark is already set (the mark is cleared by mntput()) + * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] + */ + if (flags & MNT_EXPIRE) { + if (mnt == current->fs->rootmnt || + flags & (MNT_FORCE | MNT_DETACH)) + return -EINVAL; + + if (atomic_read(&mnt->mnt_count) != 2) + return -EBUSY; + + if (!xchg(&mnt->mnt_expiry_mark, 1)) + return -EAGAIN; + } + /* * If we may have to abort operations to get out of this * mount, and they will themselves hold resources we must @@ -423,7 +448,7 @@ static int do_umount(struct vfsmount *mnt, int flags) down_write(&sb->s_umount); if (!(sb->s_flags & MS_RDONLY)) { lock_kernel(); - retval = do_remount_sb(sb, MS_RDONLY, 0, 0); + retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); unlock_kernel(); } up_write(&sb->s_umount); @@ -484,7 +509,7 @@ asmlinkage long sys_umount(char __user * name, int flags) retval = do_umount(nd.mnt, flags); dput_and_out: - path_release(&nd); + path_release_on_umount(&nd); out: return retval; } @@ -643,6 +668,11 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) } if (mnt) { + /* stop bind mounts from expiring */ + spin_lock(&vfsmount_lock); + list_del_init(&mnt->mnt_fslink); + spin_unlock(&vfsmount_lock); + err = graft_tree(mnt, nd); if (err) { spin_lock(&vfsmount_lock); @@ -663,7 +693,8 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) * on it - tough luck. */ -static int do_remount(struct nameidata *nd,int flags,int mnt_flags,void *data) +static int do_remount(struct nameidata *nd, int flags, int mnt_flags, + void *data) { int err; struct super_block * sb = nd->mnt->mnt_sb; @@ -735,6 +766,10 @@ static int do_move_mount(struct nameidata *nd, char *old_name) detach_mnt(old_nd.mnt, &parent_nd); attach_mnt(old_nd.mnt, nd); + + /* if the mount is moved, it should no longer be expire + * automatically */ + list_del_init(&old_nd.mnt->mnt_fslink); out2: spin_unlock(&vfsmount_lock); out1: @@ -747,11 +782,14 @@ out: return err; } -static int do_add_mount(struct nameidata *nd, char *type, int flags, +/* + * create a new mount for userspace and request it to be added into the + * namespace's tree + */ +static int do_new_mount(struct nameidata *nd, char *type, int flags, int mnt_flags, char *name, void *data) { struct vfsmount *mnt; - int err; if (!type || !memchr(type, 0, PAGE_SIZE)) return -EINVAL; @@ -761,9 +799,20 @@ static int do_add_mount(struct nameidata *nd, char *type, int flags, return -EPERM; mnt = do_kern_mount(type, flags, name, data); - err = PTR_ERR(mnt); if (IS_ERR(mnt)) - goto out; + return PTR_ERR(mnt); + + return do_add_mount(mnt, nd, mnt_flags, NULL); +} + +/* + * add a mount into a namespace's mount tree + * - provide the option of adding the new mount to an expiration list + */ +int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, + int mnt_flags, struct list_head *fslist) +{ + int err; down_write(¤t->namespace->sem); /* Something was mounted here while we slept */ @@ -775,22 +824,138 @@ static int do_add_mount(struct nameidata *nd, char *type, int flags, /* Refuse the same filesystem on the same mount point */ err = -EBUSY; - if (nd->mnt->mnt_sb == mnt->mnt_sb && nd->mnt->mnt_root == nd->dentry) + if (nd->mnt->mnt_sb == newmnt->mnt_sb && + nd->mnt->mnt_root == nd->dentry) goto unlock; err = -EINVAL; - if (S_ISLNK(mnt->mnt_root->d_inode->i_mode)) + if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) goto unlock; - mnt->mnt_flags = mnt_flags; - err = graft_tree(mnt, nd); + newmnt->mnt_flags = mnt_flags; + err = graft_tree(newmnt, nd); + + if (err == 0 && fslist) { + /* add to the specified expiration list */ + spin_lock(&vfsmount_lock); + list_add_tail(&newmnt->mnt_fslink, fslist); + spin_unlock(&vfsmount_lock); + } + unlock: up_write(¤t->namespace->sem); - mntput(mnt); -out: + mntput(newmnt); return err; } +EXPORT_SYMBOL_GPL(do_add_mount); + +/* + * process a list of expirable mountpoints with the intent of discarding any + * mountpoints that aren't in use and haven't been touched since last we came + * here + */ +void mark_mounts_for_expiry(struct list_head *mounts) +{ + struct namespace *namespace; + struct vfsmount *mnt, *next; + LIST_HEAD(graveyard); + + if (list_empty(mounts)) + return; + + spin_lock(&vfsmount_lock); + + /* extract from the expiration list every vfsmount that matches the + * following criteria: + * - only referenced by its parent vfsmount + * - still marked for expiry (marked on the last call here; marks are + * cleared by mntput()) + */ + list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) { + if (!xchg(&mnt->mnt_expiry_mark, 1) || + atomic_read(&mnt->mnt_count) != 1) + continue; + + mntget(mnt); + list_move(&mnt->mnt_fslink, &graveyard); + } + + /* + * go through the vfsmounts we've just consigned to the graveyard to + * - check that they're still dead + * - delete the vfsmount from the appropriate namespace under lock + * - dispose of the corpse + */ + while (!list_empty(&graveyard)) { + mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink); + list_del_init(&mnt->mnt_fslink); + + /* don't do anything if the namespace is dead - all the + * vfsmounts from it are going away anyway */ + namespace = mnt->mnt_namespace; + if (!namespace || atomic_read(&namespace->count) <= 0) + continue; + get_namespace(namespace); + + spin_unlock(&vfsmount_lock); + down_write(&namespace->sem); + spin_lock(&vfsmount_lock); + + /* check that it is still dead: the count should now be 2 - as + * contributed by the vfsmount parent and the mntget above */ + if (atomic_read(&mnt->mnt_count) == 2) { + struct vfsmount *xdmnt; + struct dentry *xdentry; + + /* delete from the namespace */ + list_del_init(&mnt->mnt_list); + list_del_init(&mnt->mnt_child); + list_del_init(&mnt->mnt_hash); + mnt->mnt_mountpoint->d_mounted--; + + xdentry = mnt->mnt_mountpoint; + mnt->mnt_mountpoint = mnt->mnt_root; + xdmnt = mnt->mnt_parent; + mnt->mnt_parent = mnt; + + spin_unlock(&vfsmount_lock); + + mntput(xdmnt); + dput(xdentry); + + /* now lay it to rest if this was the last ref on the + * superblock */ + if (atomic_read(&mnt->mnt_sb->s_active) == 1) { + /* last instance - try to be smart */ + lock_kernel(); + DQUOT_OFF(mnt->mnt_sb); + acct_auto_close(mnt->mnt_sb); + unlock_kernel(); + } + + mntput(mnt); + } else { + /* someone brought it back to life whilst we didn't + * have any locks held so return it to the expiration + * list */ + list_add_tail(&mnt->mnt_fslink, mounts); + spin_unlock(&vfsmount_lock); + } + + up_write(&namespace->sem); + + mntput(mnt); + put_namespace(namespace); + + spin_lock(&vfsmount_lock); + } + + spin_unlock(&vfsmount_lock); +} + +EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); + int copy_mount_options (const void __user *data, unsigned long *where) { int i; @@ -888,7 +1053,7 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, else if (flags & MS_MOVE) retval = do_move_mount(&nd, dev_name); else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, + retval = do_new_mount(&nd, type_page, flags, mnt_flags, dev_name, data_page); dput_out: path_release(&nd); @@ -901,6 +1066,7 @@ int copy_namespace(int flags, struct task_struct *tsk) struct namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; struct fs_struct *fs = tsk->fs; + struct vfsmount *p, *q; if (!namespace) return 0; @@ -935,14 +1101,16 @@ int copy_namespace(int flags, struct task_struct *tsk) list_add_tail(&new_ns->list, &new_ns->root->mnt_list); spin_unlock(&vfsmount_lock); - /* Second pass: switch the tsk->fs->* elements */ - if (fs) { - struct vfsmount *p, *q; - write_lock(&fs->lock); - - p = namespace->root; - q = new_ns->root; - while (p) { + /* + * Second pass: switch the tsk->fs->* elements and mark new vfsmounts + * as belonging to new namespace. We have already acquired a private + * fs_struct, so tsk->fs->lock is not needed. + */ + p = namespace->root; + q = new_ns->root; + while (p) { + q->mnt_namespace = new_ns; + if (fs) { if (p == fs->rootmnt) { rootmnt = p; fs->rootmnt = mntget(q); @@ -955,10 +1123,9 @@ int copy_namespace(int flags, struct task_struct *tsk) altrootmnt = p; fs->altrootmnt = mntget(q); } - p = next_mnt(p, namespace->root); - q = next_mnt(q, new_ns->root); } - write_unlock(&fs->lock); + p = next_mnt(p, namespace->root); + q = next_mnt(q, new_ns->root); } up_write(&tsk->namespace->sem); @@ -1213,6 +1380,7 @@ static void __init init_mount_tree(void) init_rwsem(&namespace->sem); list_add(&mnt->mnt_list, &namespace->list); namespace->root = mnt; + mnt->mnt_namespace = namespace; init_task.namespace = namespace; read_lock(&tasklist_lock); @@ -1280,8 +1448,15 @@ void __init mnt_init(unsigned long mempages) void __put_namespace(struct namespace *namespace) { + struct vfsmount *mnt; + down_write(&namespace->sem); spin_lock(&vfsmount_lock); + + list_for_each_entry(mnt, &namespace->list, mnt_list) { + mnt->mnt_namespace = NULL; + } + umount_tree(namespace->root); spin_unlock(&vfsmount_lock); up_write(&namespace->sem);