X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fnamespace.c;h=c1fcd7434a3d81cfaed4e809756a913e5fa7842e;hb=8e8ece46a861c84343256819eaec77e608ff9217;hp=fb0a3ab5893dee184e57bfa63040bd8f315d7655;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/fs/namespace.c b/fs/namespace.c index fb0a3ab58..c1fcd7434 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -21,7 +22,11 @@ #include #include #include +#include +#include + #include +#include extern int __init init_rootfs(void); @@ -35,7 +40,8 @@ static inline int sysfs_init(void) #endif /* spinlock for vfsmount related operations, inplace of dcache_lock */ -spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); + static struct list_head *mount_hashtable; static int hash_mask, hash_bits; static kmem_cache_t *mnt_cache; @@ -58,6 +64,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); + INIT_LIST_HEAD(&mnt->mnt_fslink); if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -102,15 +109,9 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) return found; } -EXPORT_SYMBOL(lookup_mnt); - -static int check_mnt(struct vfsmount *mnt) +static inline int check_mnt(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); - while (mnt->mnt_parent != mnt) - mnt = mnt->mnt_parent; - spin_unlock(&vfsmount_lock); - return mnt == current->namespace->root; + return mnt->mnt_namespace == current->namespace; } static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) @@ -162,6 +163,15 @@ clone_mnt(struct vfsmount *old, struct dentry *root) mnt->mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; + mnt->mnt_namespace = old->mnt_namespace; + mnt->mnt_xid = old->mnt_xid; + + /* stick the duplicate mount on the same expiry list + * as the original if that was on one */ + spin_lock(&vfsmount_lock); + if (!list_empty(&old->mnt_fslink)) + list_add(&mnt->mnt_fslink, &old->mnt_fslink); + spin_unlock(&vfsmount_lock); } return mnt; } @@ -209,6 +219,34 @@ static inline void mangle(struct seq_file *m, const char *s) seq_escape(m, s, " \t\n\\"); } +static int mnt_is_reachable(struct vfsmount *mnt) +{ + struct vfsmount *root_mnt; + struct dentry *root, *point; + int ret; + + if (!mnt) + return 1; + if (mnt == mnt->mnt_namespace->root) + return 1; + + spin_lock(&dcache_lock); + root_mnt = current->fs->rootmnt; + root = current->fs->root; + point = root; + + while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) { + point = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } + + ret = (mnt == root_mnt) && is_subdir(point, root); + + spin_unlock(&dcache_lock); + + return ret; +} + static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; @@ -222,6 +260,7 @@ static int show_vfsmnt(struct seq_file *m, void *v) { MS_MANDLOCK, ",mand" }, { MS_NOATIME, ",noatime" }, { MS_NODIRATIME, ",nodiratime" }, + { MS_TAGXID, ",tagxid" }, { 0, NULL } }; static struct proc_fs_info mnt_info[] = { @@ -230,12 +269,25 @@ static int show_vfsmnt(struct seq_file *m, void *v) { MNT_NOEXEC, ",noexec" }, { 0, NULL } }; + struct proc_fs_info *fs_infop; + unsigned long s_flags = mnt->mnt_sb->s_flags; + int mnt_flags = mnt->mnt_flags; - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); - seq_putc(m, ' '); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt)) + return 0; + + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "/dev/root / "); + } else { + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_putc(m, ' '); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + } mangle(m, mnt->mnt_sb->s_type->name); seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { @@ -246,6 +298,8 @@ static int show_vfsmnt(struct seq_file *m, void *v) if (mnt->mnt_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } + if (mnt->mnt_flags & MNT_XID) + seq_printf(m, ",xid=%d", mnt->mnt_xid); if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); @@ -259,7 +313,65 @@ struct seq_operations mounts_op = { .show = show_vfsmnt }; -/* +/** + * may_umount_tree - check if a mount tree is busy + * @mnt: root of mount tree + * + * This is called to check if a tree of mounts has any + * open files, pwds, chroots or sub mounts that are + * busy. + */ +int may_umount_tree(struct vfsmount *mnt) +{ + struct list_head *next; + struct vfsmount *this_parent = mnt; + int actual_refs; + int minimum_refs; + + spin_lock(&vfsmount_lock); + actual_refs = atomic_read(&mnt->mnt_count); + minimum_refs = 2; +repeat: + next = this_parent->mnt_mounts.next; +resume: + while (next != &this_parent->mnt_mounts) { + struct vfsmount *p = list_entry(next, struct vfsmount, mnt_child); + + next = next->next; + + actual_refs += atomic_read(&p->mnt_count); + minimum_refs += 2; + + if (!list_empty(&p->mnt_mounts)) { + this_parent = p; + goto repeat; + } + } + + if (this_parent != mnt) { + next = this_parent->mnt_child.next; + this_parent = this_parent->mnt_parent; + goto resume; + } + spin_unlock(&vfsmount_lock); + + if (actual_refs > minimum_refs) + return -EBUSY; + + return 0; +} + +EXPORT_SYMBOL(may_umount_tree); + +/** + * may_umount - check if a mount point is busy + * @mnt: root of mount + * + * This is called to check if a mount point has any + * open files, pwds, chroots or sub mounts. If the + * mount has sub mounts this will return busy + * regardless of whether the sub mounts are busy. + * * Doesn't take quota and stuff into account. IOW, in some cases it will * give false negatives. The main reason why it's here is that we need * a non-destructive way to look for easily umountable filesystems. @@ -273,19 +385,14 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -void umount_tree(struct vfsmount *mnt) +static inline void __umount_list(struct list_head *kill) { - struct vfsmount *p; - LIST_HEAD(kill); - - for (p = mnt; p; p = next_mnt(p, mnt)) { - list_del(&p->mnt_list); - list_add(&p->mnt_list, &kill); - } + struct vfsmount *mnt; - while (!list_empty(&kill)) { - mnt = list_entry(kill.next, struct vfsmount, mnt_list); + while (!list_empty(kill)) { + mnt = list_entry(kill->next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); + list_del_init(&mnt->mnt_fslink); if (mnt->mnt_parent == mnt) { spin_unlock(&vfsmount_lock); } else { @@ -299,6 +406,32 @@ void umount_tree(struct vfsmount *mnt) } } +void umount_tree(struct vfsmount *mnt) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + } + __umount_list(&kill); +} + +void umount_unused(struct vfsmount *mnt, struct fs_struct *fs) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + if (p == fs->rootmnt || p == fs->pwdmnt) + continue; + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + } + __umount_list(&kill); +} + static int do_umount(struct vfsmount *mnt, int flags) { struct super_block * sb = mnt->mnt_sb; @@ -308,6 +441,24 @@ static int do_umount(struct vfsmount *mnt, int flags) if (retval) return retval; + /* + * Allow userspace to request a mountpoint be expired rather than + * unmounting unconditionally. Unmount only happens if: + * (1) the mark is already set (the mark is cleared by mntput()) + * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] + */ + if (flags & MNT_EXPIRE) { + if (mnt == current->fs->rootmnt || + flags & (MNT_FORCE | MNT_DETACH)) + return -EINVAL; + + if (atomic_read(&mnt->mnt_count) != 2) + return -EBUSY; + + if (!xchg(&mnt->mnt_expiry_mark, 1)) + return -EAGAIN; + } + /* * If we may have to abort operations to get out of this * mount, and they will themselves hold resources we must @@ -340,7 +491,8 @@ static int do_umount(struct vfsmount *mnt, int flags) down_write(&sb->s_umount); if (!(sb->s_flags & MS_RDONLY)) { lock_kernel(); - retval = do_remount_sb(sb, MS_RDONLY, 0, 0); + DQUOT_OFF(sb); + retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); unlock_kernel(); } up_write(&sb->s_umount); @@ -396,16 +548,18 @@ asmlinkage long sys_umount(char __user * name, int flags) goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) goto dput_and_out; retval = do_umount(nd.mnt, flags); dput_and_out: - path_release(&nd); + path_release_on_umount(&nd); out: return retval; } +#ifdef __ARCH_WANT_SYS_OLDUMOUNT + /* * The 2.0 compatible umount. No flags. */ @@ -415,10 +569,14 @@ asmlinkage long sys_oldumount(char __user * name) return sys_umount(name,0); } +#endif + static int mount_is_safe(struct nameidata *nd) { if (capable(CAP_SYS_ADMIN)) return 0; + if (vx_ccaps(VXC_SECURE_MOUNT)) + return 0; return -EPERM; #ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) @@ -530,11 +688,12 @@ out_unlock: /* * do loopback mount. */ -static int do_loopback(struct nameidata *nd, char *old_name, int recurse) +static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, int flags) { struct nameidata old_nd; struct vfsmount *mnt = NULL; int err = mount_is_safe(nd); + int recurse = flags & MS_REC; if (err) return err; if (!old_name || !*old_name) @@ -554,6 +713,15 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) } if (mnt) { + /* stop bind mounts from expiring */ + spin_lock(&vfsmount_lock); + list_del_init(&mnt->mnt_fslink); + spin_unlock(&vfsmount_lock); + + if (flags & MS_XID) { + mnt->mnt_xid = xid; + mnt->mnt_flags |= MNT_XID; + } err = graft_tree(mnt, nd); if (err) { spin_lock(&vfsmount_lock); @@ -574,12 +742,13 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) * on it - tough luck. */ -static int do_remount(struct nameidata *nd,int flags,int mnt_flags,void *data) +static int do_remount(struct nameidata *nd, int flags, int mnt_flags, + void *data, xid_t xid) { int err; struct super_block * sb = nd->mnt->mnt_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_REMOUNT)) return -EPERM; if (!check_mnt(nd->mnt)) @@ -588,10 +757,15 @@ static int do_remount(struct nameidata *nd,int flags,int mnt_flags,void *data) if (nd->dentry != nd->mnt->mnt_root) return -EINVAL; + if (vx_ccaps(VXC_SECURE_REMOUNT)) + mnt_flags |= MNT_NODEV; down_write(&sb->s_umount); err = do_remount_sb(sb, flags, data, 0); - if (!err) + if (!err) { nd->mnt->mnt_flags=mnt_flags; + if (flags & MS_XID) + nd->mnt->mnt_xid = xid; + } up_write(&sb->s_umount); if (!err) security_sb_post_remount(nd->mnt, flags, data); @@ -603,7 +777,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) struct nameidata old_nd, parent_nd; struct vfsmount *p; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -646,6 +820,10 @@ static int do_move_mount(struct nameidata *nd, char *old_name) detach_mnt(old_nd.mnt, &parent_nd); attach_mnt(old_nd.mnt, nd); + + /* if the mount is moved, it should no longer be expire + * automatically */ + list_del_init(&old_nd.mnt->mnt_fslink); out2: spin_unlock(&vfsmount_lock); out1: @@ -658,23 +836,37 @@ out: return err; } -static int do_add_mount(struct nameidata *nd, char *type, int flags, +/* + * create a new mount for userspace and request it to be added into the + * namespace's tree + */ +static int do_new_mount(struct nameidata *nd, char *type, int flags, int mnt_flags, char *name, void *data) { struct vfsmount *mnt; - int err; if (!type || !memchr(type, 0, PAGE_SIZE)) return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); - err = PTR_ERR(mnt); if (IS_ERR(mnt)) - goto out; + return PTR_ERR(mnt); + + return do_add_mount(mnt, nd, mnt_flags, NULL); +} + +/* + * add a mount into a namespace's mount tree + * - provide the option of adding the new mount to an expiration list + */ +int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, + int mnt_flags, struct list_head *fslist) +{ + int err; down_write(¤t->namespace->sem); /* Something was mounted here while we slept */ @@ -686,23 +878,167 @@ static int do_add_mount(struct nameidata *nd, char *type, int flags, /* Refuse the same filesystem on the same mount point */ err = -EBUSY; - if (nd->mnt->mnt_sb == mnt->mnt_sb && nd->mnt->mnt_root == nd->dentry) + if (nd->mnt->mnt_sb == newmnt->mnt_sb && + nd->mnt->mnt_root == nd->dentry) goto unlock; err = -EINVAL; - if (S_ISLNK(mnt->mnt_root->d_inode->i_mode)) + if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) goto unlock; - mnt->mnt_flags = mnt_flags; - err = graft_tree(mnt, nd); + newmnt->mnt_flags = mnt_flags; + err = graft_tree(newmnt, nd); + + if (err == 0 && fslist) { + /* add to the specified expiration list */ + spin_lock(&vfsmount_lock); + list_add_tail(&newmnt->mnt_fslink, fslist); + spin_unlock(&vfsmount_lock); + } + unlock: up_write(¤t->namespace->sem); - mntput(mnt); -out: + mntput(newmnt); return err; } -int copy_mount_options (const void __user *data, unsigned long *where) +EXPORT_SYMBOL_GPL(do_add_mount); + +/* + * process a list of expirable mountpoints with the intent of discarding any + * mountpoints that aren't in use and haven't been touched since last we came + * here + */ +void mark_mounts_for_expiry(struct list_head *mounts) +{ + struct namespace *namespace; + struct vfsmount *mnt, *next; + LIST_HEAD(graveyard); + + if (list_empty(mounts)) + return; + + spin_lock(&vfsmount_lock); + + /* extract from the expiration list every vfsmount that matches the + * following criteria: + * - only referenced by its parent vfsmount + * - still marked for expiry (marked on the last call here; marks are + * cleared by mntput()) + */ + list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) { + if (!xchg(&mnt->mnt_expiry_mark, 1) || + atomic_read(&mnt->mnt_count) != 1) + continue; + + mntget(mnt); + list_move(&mnt->mnt_fslink, &graveyard); + } + + /* + * go through the vfsmounts we've just consigned to the graveyard to + * - check that they're still dead + * - delete the vfsmount from the appropriate namespace under lock + * - dispose of the corpse + */ + while (!list_empty(&graveyard)) { + mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink); + list_del_init(&mnt->mnt_fslink); + + /* don't do anything if the namespace is dead - all the + * vfsmounts from it are going away anyway */ + namespace = mnt->mnt_namespace; + if (!namespace || atomic_read(&namespace->count) <= 0) + continue; + get_namespace(namespace); + + spin_unlock(&vfsmount_lock); + down_write(&namespace->sem); + spin_lock(&vfsmount_lock); + + /* check that it is still dead: the count should now be 2 - as + * contributed by the vfsmount parent and the mntget above */ + if (atomic_read(&mnt->mnt_count) == 2) { + struct vfsmount *xdmnt; + struct dentry *xdentry; + + /* delete from the namespace */ + list_del_init(&mnt->mnt_list); + list_del_init(&mnt->mnt_child); + list_del_init(&mnt->mnt_hash); + mnt->mnt_mountpoint->d_mounted--; + + xdentry = mnt->mnt_mountpoint; + mnt->mnt_mountpoint = mnt->mnt_root; + xdmnt = mnt->mnt_parent; + mnt->mnt_parent = mnt; + + spin_unlock(&vfsmount_lock); + + mntput(xdmnt); + dput(xdentry); + + /* now lay it to rest if this was the last ref on the + * superblock */ + if (atomic_read(&mnt->mnt_sb->s_active) == 1) { + /* last instance - try to be smart */ + lock_kernel(); + DQUOT_OFF(mnt->mnt_sb); + acct_auto_close(mnt->mnt_sb); + unlock_kernel(); + } + + mntput(mnt); + } else { + /* someone brought it back to life whilst we didn't + * have any locks held so return it to the expiration + * list */ + list_add_tail(&mnt->mnt_fslink, mounts); + spin_unlock(&vfsmount_lock); + } + + up_write(&namespace->sem); + + mntput(mnt); + put_namespace(namespace); + + spin_lock(&vfsmount_lock); + } + + spin_unlock(&vfsmount_lock); +} + +EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); + +/* + * Some copy_from_user() implementations do not return the exact number of + * bytes remaining to copy on a fault. But copy_mount_options() requires that. + * Note that this function differs from copy_from_user() in that it will oops + * on bad values of `to', rather than returning a short copy. + */ +static long +exact_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + char *t = to; + const char __user *f = from; + char c; + + if (!access_ok(VERIFY_READ, from, n)) + return n; + + while (n) { + if (__get_user(c, f)) { + memset(t, 0, n); + break; + } + *t++ = c; + f++; + n--; + } + return n; +} + +int copy_mount_options(const void __user *data, unsigned long *where) { int i; unsigned long page; @@ -724,7 +1060,7 @@ int copy_mount_options (const void __user *data, unsigned long *where) if (size > PAGE_SIZE) size = PAGE_SIZE; - i = size - copy_from_user((void *)page, data, size); + i = size - exact_copy_from_user((void *)page, data, size); if (!i) { free_page(page); return -EFAULT; @@ -755,6 +1091,7 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, struct nameidata nd; int retval = 0; int mnt_flags = 0; + xid_t xid = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -770,6 +1107,14 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; + retval = vx_parse_xid(data_page, &xid, 1); + if (retval) { + mnt_flags |= MNT_XID; + /* bind and re-mounts get xid flag */ + if (flags & (MS_BIND|MS_REMOUNT)) + flags |= MS_XID; + } + /* Separate the per-mountpoint flags */ if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; @@ -779,6 +1124,9 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, mnt_flags |= MNT_NOEXEC; flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); + if (vx_ccaps(VXC_SECURE_MOUNT)) + mnt_flags |= MNT_NODEV; + /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); if (retval) @@ -790,13 +1138,13 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, if (flags & MS_REMOUNT) retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); + data_page, xid); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags & MS_REC); + retval = do_loopback(&nd, dev_name, xid, flags); else if (flags & MS_MOVE) retval = do_move_mount(&nd, dev_name); else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, + retval = do_new_mount(&nd, type_page, flags, mnt_flags, dev_name, data_page); dput_out: path_release(&nd); @@ -809,6 +1157,7 @@ int copy_namespace(int flags, struct task_struct *tsk) struct namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; struct fs_struct *fs = tsk->fs; + struct vfsmount *p, *q; if (!namespace) return 0; @@ -818,7 +1167,7 @@ int copy_namespace(int flags, struct task_struct *tsk) if (!(flags & CLONE_NEWNS)) return 0; - if (!capable(CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) { put_namespace(namespace); return -EPERM; } @@ -843,14 +1192,16 @@ int copy_namespace(int flags, struct task_struct *tsk) list_add_tail(&new_ns->list, &new_ns->root->mnt_list); spin_unlock(&vfsmount_lock); - /* Second pass: switch the tsk->fs->* elements */ - if (fs) { - struct vfsmount *p, *q; - write_lock(&fs->lock); - - p = namespace->root; - q = new_ns->root; - while (p) { + /* + * Second pass: switch the tsk->fs->* elements and mark new vfsmounts + * as belonging to new namespace. We have already acquired a private + * fs_struct, so tsk->fs->lock is not needed. + */ + p = namespace->root; + q = new_ns->root; + while (p) { + q->mnt_namespace = new_ns; + if (fs) { if (p == fs->rootmnt) { rootmnt = p; fs->rootmnt = mntget(q); @@ -863,10 +1214,9 @@ int copy_namespace(int flags, struct task_struct *tsk) altrootmnt = p; fs->altrootmnt = mntget(q); } - p = next_mnt(p, namespace->root); - q = next_mnt(q, new_ns->root); } - write_unlock(&fs->lock); + p = next_mnt(p, namespace->root); + q = next_mnt(q, new_ns->root); } up_write(&tsk->namespace->sem); @@ -950,8 +1300,6 @@ void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt, } } -EXPORT_SYMBOL(set_fs_root); - /* * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. * It can block. Requires the big lock held. @@ -975,8 +1323,6 @@ void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, } } -EXPORT_SYMBOL(set_fs_pwd); - static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) { struct task_struct *g, *p; @@ -1121,6 +1467,7 @@ static void __init init_mount_tree(void) init_rwsem(&namespace->sem); list_add(&mnt->mnt_list, &namespace->list); namespace->root = mnt; + mnt->mnt_namespace = namespace; init_task.namespace = namespace; read_lock(&tasklist_lock); @@ -1142,9 +1489,7 @@ void __init mnt_init(unsigned long mempages) int i; mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!mnt_cache) - panic("Cannot create vfsmount cache"); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); order = 0; mount_hashtable = (struct list_head *) @@ -1190,8 +1535,15 @@ void __init mnt_init(unsigned long mempages) void __put_namespace(struct namespace *namespace) { + struct vfsmount *mnt; + down_write(&namespace->sem); spin_lock(&vfsmount_lock); + + list_for_each_entry(mnt, &namespace->list, mnt_list) { + mnt->mnt_namespace = NULL; + } + umount_tree(namespace->root); spin_unlock(&vfsmount_lock); up_write(&namespace->sem);