X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fsuper.c;h=d32503631810f5eb79f25963605305472fe5d8f2;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=8d9bff178bcac6ea625e79b2aa7a95d78153bba0;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/fs/super.c b/fs/super.c index 8d9bff178..d32503631 100644 --- a/fs/super.c +++ b/fs/super.c @@ -32,9 +32,14 @@ #include /* for fsync_super() */ #include #include +#include #include #include /* for the emergency remount stuff */ #include +#include +#include +#include +#include #include @@ -43,7 +48,7 @@ void put_filesystem(struct file_system_type *fs); struct file_system_type *get_fs_type(const char *name); LIST_HEAD(super_blocks); -spinlock_t sb_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(sb_lock); /** * alloc_super - create new superblock @@ -53,11 +58,10 @@ spinlock_t sb_lock = SPIN_LOCK_UNLOCKED; */ static struct super_block *alloc_super(void) { - struct super_block *s = kmalloc(sizeof(struct super_block), GFP_USER); + struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static struct super_operations default_op; if (s) { - memset(s, 0, sizeof(struct super_block)); if (security_sb_alloc(s)) { kfree(s); s = NULL; @@ -68,20 +72,22 @@ static struct super_block *alloc_super(void) INIT_LIST_HEAD(&s->s_files); INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); - sema_init(&s->s_lock, 1); + mutex_init(&s->s_lock); down_write(&s->s_umount); s->s_count = S_BIAS; atomic_set(&s->s_active, 1); - sema_init(&s->s_vfs_rename_sem,1); - sema_init(&s->s_dquot.dqio_sem, 1); - sema_init(&s->s_dquot.dqonoff_sem, 1); + mutex_init(&s->s_vfs_rename_mutex); + mutex_init(&s->s_dquot.dqio_mutex); + mutex_init(&s->s_dquot.dqonoff_mutex); init_rwsem(&s->s_dquot.dqptr_sem); init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; s->dq_op = sb_dquot_ops; s->s_qcop = sb_quotactl_ops; s->s_op = &default_op; + s->s_time_gran = 1000000000; } out: return s; @@ -116,9 +122,30 @@ int __put_super(struct super_block *sb) return ret; } +/* + * Drop a superblock's refcount. + * Returns non-zero if the superblock is about to be destroyed and + * at least is already removed from super_blocks list, so if we are + * making a loop through super blocks then we need to restart. + * The caller must hold sb_lock. + */ +int __put_super_and_need_restart(struct super_block *sb) +{ + /* check for race with generic_shutdown_super() */ + if (list_empty(&sb->s_list)) { + /* super block is removed, need to restart... */ + __put_super(sb); + return 1; + } + /* can't be the last, since s_list is still in use */ + sb->s_count--; + BUG_ON(sb->s_count == 0); + return 0; +} + /** * put_super - drop a temporary reference to superblock - * @s: superblock in question + * @sb: superblock in question * * Drops a temporary reference, frees superblock if there's no * references left. @@ -146,6 +173,7 @@ void deactivate_super(struct super_block *s) if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { s->s_count -= S_BIAS-1; spin_unlock(&sb_lock); + DQUOT_OFF(s); down_write(&s->s_umount); fs->kill_sb(s); put_filesystem(fs); @@ -209,10 +237,10 @@ void generic_shutdown_super(struct super_block *sb) dput(root); fsync_super(sb); lock_super(sb); - lock_kernel(); sb->s_flags &= ~MS_ACTIVE; /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); + lock_kernel(); if (sop->write_super && sb->s_dirt) sop->write_super(sb); @@ -221,15 +249,17 @@ void generic_shutdown_super(struct super_block *sb) /* Forget any remaining inodes */ if (invalidate_inodes(sb)) { - printk("VFS: Busy inodes after unmount. " - "Self-destruct in 5 seconds. Have a nice day...\n"); + printk("VFS: Busy inodes after unmount of %s. " + "Self-destruct in 5 seconds. Have a nice day...\n", + sb->s_id); } unlock_kernel(); unlock_super(sb); } spin_lock(&sb_lock); - list_del(&sb->s_list); + /* should be initialized for __put_super_and_need_restart() */ + list_del_init(&sb->s_list); list_del(&sb->s_instances); spin_unlock(&sb_lock); up_write(&sb->s_umount); @@ -282,7 +312,7 @@ retry: } s->s_type = type; strlcpy(s->s_id, type->name, sizeof(s->s_id)); - list_add(&s->s_list, super_blocks.prev); + list_add_tail(&s->s_list, &super_blocks); list_add(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); @@ -315,20 +345,22 @@ static inline void write_super(struct super_block *sb) */ void sync_supers(void) { - struct super_block * sb; -restart: + struct super_block *sb; + spin_lock(&sb_lock); - sb = sb_entry(super_blocks.next); - while (sb != sb_entry(&super_blocks)) +restart: + list_for_each_entry(sb, &super_blocks, s_list) { if (sb->s_dirt) { sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); write_super(sb); - drop_super(sb); - goto restart; - } else - sb = sb_entry(sb->s_list.next); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + } spin_unlock(&sb_lock); } @@ -351,24 +383,20 @@ restart: void sync_filesystems(int wait) { struct super_block *sb; - static DECLARE_MUTEX(mutex); + static DEFINE_MUTEX(mutex); - down(&mutex); /* Could be down_interruptible */ + mutex_lock(&mutex); /* Could be down_interruptible */ spin_lock(&sb_lock); - for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); - sb = sb_entry(sb->s_list.next)) { + list_for_each_entry(sb, &super_blocks, s_list) { if (!sb->s_op->sync_fs) continue; if (sb->s_flags & MS_RDONLY) continue; sb->s_need_sync_fs = 1; } - spin_unlock(&sb_lock); restart: - spin_lock(&sb_lock); - for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); - sb = sb_entry(sb->s_list.next)) { + list_for_each_entry(sb, &super_blocks, s_list) { if (!sb->s_need_sync_fs) continue; sb->s_need_sync_fs = 0; @@ -379,11 +407,14 @@ restart: down_read(&sb->s_umount); if (sb->s_root && (wait || sb->s_dirt)) sb->s_op->sync_fs(sb, wait); - drop_super(sb); - goto restart; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } spin_unlock(&sb_lock); - up(&mutex); + mutex_unlock(&mutex); } /** @@ -396,21 +427,25 @@ restart: struct super_block * get_super(struct block_device *bdev) { - struct list_head *p; + struct super_block *sb; + if (!bdev) return NULL; -rescan: + spin_lock(&sb_lock); - list_for_each(p, &super_blocks) { - struct super_block *s = sb_entry(p); - if (s->s_bdev == bdev) { - s->s_count++; +rescan: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_bdev == bdev) { + sb->s_count++; spin_unlock(&sb_lock); - down_read(&s->s_umount); - if (s->s_root) - return s; - drop_super(s); - goto rescan; + down_read(&sb->s_umount); + if (sb->s_root) + return sb; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto rescan; } } spin_unlock(&sb_lock); @@ -421,28 +456,28 @@ EXPORT_SYMBOL(get_super); struct super_block * user_get_super(dev_t dev) { - struct list_head *p; + struct super_block *sb; -rescan: spin_lock(&sb_lock); - list_for_each(p, &super_blocks) { - struct super_block *s = sb_entry(p); - if (s->s_dev == dev) { - s->s_count++; +rescan: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_dev == dev) { + sb->s_count++; spin_unlock(&sb_lock); - down_read(&s->s_umount); - if (s->s_root) - return s; - drop_super(s); - goto rescan; + down_read(&sb->s_umount); + if (sb->s_root) + return sb; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto rescan; } } spin_unlock(&sb_lock); return NULL; } -EXPORT_SYMBOL(user_get_super); - asmlinkage long sys_ustat(unsigned dev, struct ustat __user * ubuf) { struct super_block *s; @@ -480,7 +515,7 @@ static void mark_files_ro(struct super_block *sb) struct file *f; file_list_lock(); - list_for_each_entry(f, &sb->s_files, f_list) { + list_for_each_entry(f, &sb->s_files, f_u.fu_list) { if (S_ISREG(f->f_dentry->d_inode->i_mode) && file_count(f)) f->f_mode &= ~FMODE_WRITE; } @@ -564,19 +599,24 @@ void emergency_remount(void) */ static struct idr unnamed_dev_idr; -static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */ +static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ int set_anon_super(struct super_block *s, void *data) { int dev; + int error; - spin_lock(&unnamed_dev_lock); - if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) { - spin_unlock(&unnamed_dev_lock); + retry: + if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) return -ENOMEM; - } - dev = idr_get_new(&unnamed_dev_idr, NULL); + spin_lock(&unnamed_dev_lock); + error = idr_get_new(&unnamed_dev_idr, NULL, &dev); spin_unlock(&unnamed_dev_lock); + if (error == -EAGAIN) + /* We raced and lost with another CPU. */ + goto retry; + else if (error) + return -EAGAIN; if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); @@ -628,6 +668,16 @@ static int test_bdev_super(struct super_block *s, void *data) return (void *)s->s_bdev == data; } +static void bdev_uevent(struct block_device *bdev, enum kobject_action action) +{ + if (bdev->bd_disk) { + if (bdev->bd_part) + kobject_uevent(&bdev->bd_part->kobj, action); + else + kobject_uevent(&bdev->bd_disk->kobj, action); + } +} + struct super_block *get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)) @@ -645,9 +695,9 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, * will protect the lockfs code from trying to start a snapshot * while we are mounting */ - down(&bdev->bd_mount_sem); + mutex_lock(&bdev->bd_mount_mutex); s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - up(&bdev->bd_mount_sem); + mutex_unlock(&bdev->bd_mount_mutex); if (IS_ERR(s)) goto out; @@ -663,15 +713,16 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); - s->s_old_blocksize = block_size(bdev); - sb_set_blocksize(s, s->s_old_blocksize); - error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); + sb_set_blocksize(s, block_size(bdev)); + error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); s = ERR_PTR(error); - } else + } else { s->s_flags |= MS_ACTIVE; + bdev_uevent(bdev, KOBJ_MOUNT); + } } return s; @@ -686,8 +737,10 @@ EXPORT_SYMBOL(get_sb_bdev); void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; + + bdev_uevent(bdev, KOBJ_UMOUNT); generic_shutdown_super(sb); - set_blocksize(bdev, sb->s_old_blocksize); + sync_blockdev(bdev); close_bdev_excl(bdev); } @@ -705,7 +758,7 @@ struct super_block *get_sb_nodev(struct file_system_type *fs_type, s->s_flags = flags; - error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); + error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); @@ -734,7 +787,7 @@ struct super_block *get_sb_single(struct file_system_type *fs_type, return s; if (!s->s_root) { s->s_flags = flags; - error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); + error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); @@ -752,7 +805,7 @@ struct vfsmount * do_kern_mount(const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); - struct super_block *sb = ERR_PTR(-ENOMEM); + struct super_block *sb; struct vfsmount *mnt; int error; char *secdata = NULL; @@ -760,6 +813,12 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) if (!type) return ERR_PTR(-ENODEV); + sb = ERR_PTR(-EPERM); + if ((type->fs_flags & FS_BINARY_MOUNTDATA) && + !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT)) + goto out; + + sb = ERR_PTR(-ENOMEM); mnt = alloc_vfsmnt(name); if (!mnt) goto out; @@ -781,6 +840,13 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) sb = type->get_sb(type, flags, name, data); if (IS_ERR(sb)) goto out_free_secdata; + + error = -EPERM; + if (!capable(CAP_SYS_ADMIN) && !sb->s_bdev && + (sb->s_magic != PROC_SUPER_MAGIC) && + (sb->s_magic != DEVPTS_SUPER_MAGIC)) + goto out_sb; + error = security_sb_kern_mount(sb, secdata); if (error) goto out_sb; @@ -789,6 +855,7 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) mnt->mnt_mountpoint = sb->s_root; mnt->mnt_parent = mnt; up_write(&sb->s_umount); + free_secdata(secdata); put_filesystem(type); return mnt; out_sb: @@ -804,6 +871,8 @@ out: return (struct vfsmount *)sb; } +EXPORT_SYMBOL_GPL(do_kern_mount); + struct vfsmount *kern_mount(struct file_system_type *type) { return do_kern_mount(type->name, 0, type->name, NULL);