Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / fs / super.c
index 8d9bff1..d325036 100644 (file)
 #include <linux/buffer_head.h>         /* for fsync_super() */
 #include <linux/mount.h>
 #include <linux/security.h>
+#include <linux/syscalls.h>
 #include <linux/vfs.h>
 #include <linux/writeback.h>           /* for the emergency remount stuff */
 #include <linux/idr.h>
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/devpts_fs.h>
+#include <linux/proc_fs.h>
 #include <asm/uaccess.h>
 
 
@@ -43,7 +48,7 @@ void put_filesystem(struct file_system_type *fs);
 struct file_system_type *get_fs_type(const char *name);
 
 LIST_HEAD(super_blocks);
-spinlock_t sb_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(sb_lock);
 
 /**
  *     alloc_super     -       create new superblock
@@ -53,11 +58,10 @@ spinlock_t sb_lock = SPIN_LOCK_UNLOCKED;
  */
 static struct super_block *alloc_super(void)
 {
-       struct super_block *s = kmalloc(sizeof(struct super_block),  GFP_USER);
+       struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
        static struct super_operations default_op;
 
        if (s) {
-               memset(s, 0, sizeof(struct super_block));
                if (security_sb_alloc(s)) {
                        kfree(s);
                        s = NULL;
@@ -68,20 +72,22 @@ static struct super_block *alloc_super(void)
                INIT_LIST_HEAD(&s->s_files);
                INIT_LIST_HEAD(&s->s_instances);
                INIT_HLIST_HEAD(&s->s_anon);
+               INIT_LIST_HEAD(&s->s_inodes);
                init_rwsem(&s->s_umount);
-               sema_init(&s->s_lock, 1);
+               mutex_init(&s->s_lock);
                down_write(&s->s_umount);
                s->s_count = S_BIAS;
                atomic_set(&s->s_active, 1);
-               sema_init(&s->s_vfs_rename_sem,1);
-               sema_init(&s->s_dquot.dqio_sem, 1);
-               sema_init(&s->s_dquot.dqonoff_sem, 1);
+               mutex_init(&s->s_vfs_rename_mutex);
+               mutex_init(&s->s_dquot.dqio_mutex);
+               mutex_init(&s->s_dquot.dqonoff_mutex);
                init_rwsem(&s->s_dquot.dqptr_sem);
                init_waitqueue_head(&s->s_wait_unfrozen);
                s->s_maxbytes = MAX_NON_LFS;
                s->dq_op = sb_dquot_ops;
                s->s_qcop = sb_quotactl_ops;
                s->s_op = &default_op;
+               s->s_time_gran = 1000000000;
        }
 out:
        return s;
@@ -116,9 +122,30 @@ int __put_super(struct super_block *sb)
        return ret;
 }
 
+/*
+ * Drop a superblock's refcount.
+ * Returns non-zero if the superblock is about to be destroyed and
+ * at least is already removed from super_blocks list, so if we are
+ * making a loop through super blocks then we need to restart.
+ * The caller must hold sb_lock.
+ */
+int __put_super_and_need_restart(struct super_block *sb)
+{
+       /* check for race with generic_shutdown_super() */
+       if (list_empty(&sb->s_list)) {
+               /* super block is removed, need to restart... */
+               __put_super(sb);
+               return 1;
+       }
+       /* can't be the last, since s_list is still in use */
+       sb->s_count--;
+       BUG_ON(sb->s_count == 0);
+       return 0;
+}
+
 /**
  *     put_super       -       drop a temporary reference to superblock
- *     @s: superblock in question
+ *     @sb: superblock in question
  *
  *     Drops a temporary reference, frees superblock if there's no
  *     references left.
@@ -146,6 +173,7 @@ void deactivate_super(struct super_block *s)
        if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
                s->s_count -= S_BIAS-1;
                spin_unlock(&sb_lock);
+               DQUOT_OFF(s);
                down_write(&s->s_umount);
                fs->kill_sb(s);
                put_filesystem(fs);
@@ -209,10 +237,10 @@ void generic_shutdown_super(struct super_block *sb)
                dput(root);
                fsync_super(sb);
                lock_super(sb);
-               lock_kernel();
                sb->s_flags &= ~MS_ACTIVE;
                /* bad name - it should be evict_inodes() */
                invalidate_inodes(sb);
+               lock_kernel();
 
                if (sop->write_super && sb->s_dirt)
                        sop->write_super(sb);
@@ -221,15 +249,17 @@ void generic_shutdown_super(struct super_block *sb)
 
                /* Forget any remaining inodes */
                if (invalidate_inodes(sb)) {
-                       printk("VFS: Busy inodes after unmount. "
-                          "Self-destruct in 5 seconds.  Have a nice day...\n");
+                       printk("VFS: Busy inodes after unmount of %s. "
+                          "Self-destruct in 5 seconds.  Have a nice day...\n",
+                          sb->s_id);
                }
 
                unlock_kernel();
                unlock_super(sb);
        }
        spin_lock(&sb_lock);
-       list_del(&sb->s_list);
+       /* should be initialized for __put_super_and_need_restart() */
+       list_del_init(&sb->s_list);
        list_del(&sb->s_instances);
        spin_unlock(&sb_lock);
        up_write(&sb->s_umount);
@@ -282,7 +312,7 @@ retry:
        }
        s->s_type = type;
        strlcpy(s->s_id, type->name, sizeof(s->s_id));
-       list_add(&s->s_list, super_blocks.prev);
+       list_add_tail(&s->s_list, &super_blocks);
        list_add(&s->s_instances, &type->fs_supers);
        spin_unlock(&sb_lock);
        get_filesystem(type);
@@ -315,20 +345,22 @@ static inline void write_super(struct super_block *sb)
  */
 void sync_supers(void)
 {
-       struct super_block * sb;
-restart:
+       struct super_block *sb;
+
        spin_lock(&sb_lock);
-       sb = sb_entry(super_blocks.next);
-       while (sb != sb_entry(&super_blocks))
+restart:
+       list_for_each_entry(sb, &super_blocks, s_list) {
                if (sb->s_dirt) {
                        sb->s_count++;
                        spin_unlock(&sb_lock);
                        down_read(&sb->s_umount);
                        write_super(sb);
-                       drop_super(sb);
-                       goto restart;
-               } else
-                       sb = sb_entry(sb->s_list.next);
+                       up_read(&sb->s_umount);
+                       spin_lock(&sb_lock);
+                       if (__put_super_and_need_restart(sb))
+                               goto restart;
+               }
+       }
        spin_unlock(&sb_lock);
 }
 
@@ -351,24 +383,20 @@ restart:
 void sync_filesystems(int wait)
 {
        struct super_block *sb;
-       static DECLARE_MUTEX(mutex);
+       static DEFINE_MUTEX(mutex);
 
-       down(&mutex);           /* Could be down_interruptible */
+       mutex_lock(&mutex);             /* Could be down_interruptible */
        spin_lock(&sb_lock);
-       for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-                       sb = sb_entry(sb->s_list.next)) {
+       list_for_each_entry(sb, &super_blocks, s_list) {
                if (!sb->s_op->sync_fs)
                        continue;
                if (sb->s_flags & MS_RDONLY)
                        continue;
                sb->s_need_sync_fs = 1;
        }
-       spin_unlock(&sb_lock);
 
 restart:
-       spin_lock(&sb_lock);
-       for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-                       sb = sb_entry(sb->s_list.next)) {
+       list_for_each_entry(sb, &super_blocks, s_list) {
                if (!sb->s_need_sync_fs)
                        continue;
                sb->s_need_sync_fs = 0;
@@ -379,11 +407,14 @@ restart:
                down_read(&sb->s_umount);
                if (sb->s_root && (wait || sb->s_dirt))
                        sb->s_op->sync_fs(sb, wait);
-               drop_super(sb);
-               goto restart;
+               up_read(&sb->s_umount);
+               /* restart only when sb is no longer on the list */
+               spin_lock(&sb_lock);
+               if (__put_super_and_need_restart(sb))
+                       goto restart;
        }
        spin_unlock(&sb_lock);
-       up(&mutex);
+       mutex_unlock(&mutex);
 }
 
 /**
@@ -396,21 +427,25 @@ restart:
 
 struct super_block * get_super(struct block_device *bdev)
 {
-       struct list_head *p;
+       struct super_block *sb;
+
        if (!bdev)
                return NULL;
-rescan:
+
        spin_lock(&sb_lock);
-       list_for_each(p, &super_blocks) {
-               struct super_block *s = sb_entry(p);
-               if (s->s_bdev == bdev) {
-                       s->s_count++;
+rescan:
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               if (sb->s_bdev == bdev) {
+                       sb->s_count++;
                        spin_unlock(&sb_lock);
-                       down_read(&s->s_umount);
-                       if (s->s_root)
-                               return s;
-                       drop_super(s);
-                       goto rescan;
+                       down_read(&sb->s_umount);
+                       if (sb->s_root)
+                               return sb;
+                       up_read(&sb->s_umount);
+                       /* restart only when sb is no longer on the list */
+                       spin_lock(&sb_lock);
+                       if (__put_super_and_need_restart(sb))
+                               goto rescan;
                }
        }
        spin_unlock(&sb_lock);
@@ -421,28 +456,28 @@ EXPORT_SYMBOL(get_super);
  
 struct super_block * user_get_super(dev_t dev)
 {
-       struct list_head *p;
+       struct super_block *sb;
 
-rescan:
        spin_lock(&sb_lock);
-       list_for_each(p, &super_blocks) {
-               struct super_block *s = sb_entry(p);
-               if (s->s_dev ==  dev) {
-                       s->s_count++;
+rescan:
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               if (sb->s_dev ==  dev) {
+                       sb->s_count++;
                        spin_unlock(&sb_lock);
-                       down_read(&s->s_umount);
-                       if (s->s_root)
-                               return s;
-                       drop_super(s);
-                       goto rescan;
+                       down_read(&sb->s_umount);
+                       if (sb->s_root)
+                               return sb;
+                       up_read(&sb->s_umount);
+                       /* restart only when sb is no longer on the list */
+                       spin_lock(&sb_lock);
+                       if (__put_super_and_need_restart(sb))
+                               goto rescan;
                }
        }
        spin_unlock(&sb_lock);
        return NULL;
 }
 
-EXPORT_SYMBOL(user_get_super);
-
 asmlinkage long sys_ustat(unsigned dev, struct ustat __user * ubuf)
 {
         struct super_block *s;
@@ -480,7 +515,7 @@ static void mark_files_ro(struct super_block *sb)
        struct file *f;
 
        file_list_lock();
-       list_for_each_entry(f, &sb->s_files, f_list) {
+       list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
                if (S_ISREG(f->f_dentry->d_inode->i_mode) && file_count(f))
                        f->f_mode &= ~FMODE_WRITE;
        }
@@ -564,19 +599,24 @@ void emergency_remount(void)
  */
 
 static struct idr unnamed_dev_idr;
-static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */
+static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
 
 int set_anon_super(struct super_block *s, void *data)
 {
        int dev;
+       int error;
 
-       spin_lock(&unnamed_dev_lock);
-       if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) {
-               spin_unlock(&unnamed_dev_lock);
+ retry:
+       if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0)
                return -ENOMEM;
-       }
-       dev = idr_get_new(&unnamed_dev_idr, NULL);
+       spin_lock(&unnamed_dev_lock);
+       error = idr_get_new(&unnamed_dev_idr, NULL, &dev);
        spin_unlock(&unnamed_dev_lock);
+       if (error == -EAGAIN)
+               /* We raced and lost with another CPU. */
+               goto retry;
+       else if (error)
+               return -EAGAIN;
 
        if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
                spin_lock(&unnamed_dev_lock);
@@ -628,6 +668,16 @@ static int test_bdev_super(struct super_block *s, void *data)
        return (void *)s->s_bdev == data;
 }
 
+static void bdev_uevent(struct block_device *bdev, enum kobject_action action)
+{
+       if (bdev->bd_disk) {
+               if (bdev->bd_part)
+                       kobject_uevent(&bdev->bd_part->kobj, action);
+               else
+                       kobject_uevent(&bdev->bd_disk->kobj, action);
+       }
+}
+
 struct super_block *get_sb_bdev(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data,
        int (*fill_super)(struct super_block *, void *, int))
@@ -645,9 +695,9 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
         * will protect the lockfs code from trying to start a snapshot
         * while we are mounting
         */
-       down(&bdev->bd_mount_sem);
+       mutex_lock(&bdev->bd_mount_mutex);
        s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-       up(&bdev->bd_mount_sem);
+       mutex_unlock(&bdev->bd_mount_mutex);
        if (IS_ERR(s))
                goto out;
 
@@ -663,15 +713,16 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 
                s->s_flags = flags;
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
-               s->s_old_blocksize = block_size(bdev);
-               sb_set_blocksize(s, s->s_old_blocksize);
-               error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+               sb_set_blocksize(s, block_size(bdev));
+               error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
                if (error) {
                        up_write(&s->s_umount);
                        deactivate_super(s);
                        s = ERR_PTR(error);
-               } else
+               } else {
                        s->s_flags |= MS_ACTIVE;
+                       bdev_uevent(bdev, KOBJ_MOUNT);
+               }
        }
 
        return s;
@@ -686,8 +737,10 @@ EXPORT_SYMBOL(get_sb_bdev);
 void kill_block_super(struct super_block *sb)
 {
        struct block_device *bdev = sb->s_bdev;
+
+       bdev_uevent(bdev, KOBJ_UMOUNT);
        generic_shutdown_super(sb);
-       set_blocksize(bdev, sb->s_old_blocksize);
+       sync_blockdev(bdev);
        close_bdev_excl(bdev);
 }
 
@@ -705,7 +758,7 @@ struct super_block *get_sb_nodev(struct file_system_type *fs_type,
 
        s->s_flags = flags;
 
-       error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+       error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
        if (error) {
                up_write(&s->s_umount);
                deactivate_super(s);
@@ -734,7 +787,7 @@ struct super_block *get_sb_single(struct file_system_type *fs_type,
                return s;
        if (!s->s_root) {
                s->s_flags = flags;
-               error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+               error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
                if (error) {
                        up_write(&s->s_umount);
                        deactivate_super(s);
@@ -752,7 +805,7 @@ struct vfsmount *
 do_kern_mount(const char *fstype, int flags, const char *name, void *data)
 {
        struct file_system_type *type = get_fs_type(fstype);
-       struct super_block *sb = ERR_PTR(-ENOMEM);
+       struct super_block *sb;
        struct vfsmount *mnt;
        int error;
        char *secdata = NULL;
@@ -760,6 +813,12 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
        if (!type)
                return ERR_PTR(-ENODEV);
 
+       sb = ERR_PTR(-EPERM);
+       if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
+               !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
+               goto out;
+
+       sb = ERR_PTR(-ENOMEM);
        mnt = alloc_vfsmnt(name);
        if (!mnt)
                goto out;
@@ -781,6 +840,13 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
        sb = type->get_sb(type, flags, name, data);
        if (IS_ERR(sb))
                goto out_free_secdata;
+
+       error = -EPERM;
+       if (!capable(CAP_SYS_ADMIN) && !sb->s_bdev &&
+               (sb->s_magic != PROC_SUPER_MAGIC) &&
+               (sb->s_magic != DEVPTS_SUPER_MAGIC))
+               goto out_sb;
+
        error = security_sb_kern_mount(sb, secdata);
        if (error)
                goto out_sb;
@@ -789,6 +855,7 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
        mnt->mnt_mountpoint = sb->s_root;
        mnt->mnt_parent = mnt;
        up_write(&sb->s_umount);
+       free_secdata(secdata);
        put_filesystem(type);
        return mnt;
 out_sb:
@@ -804,6 +871,8 @@ out:
        return (struct vfsmount *)sb;
 }
 
+EXPORT_SYMBOL_GPL(do_kern_mount);
+
 struct vfsmount *kern_mount(struct file_system_type *type)
 {
        return do_kern_mount(type->name, 0, type->name, NULL);