* New SMP locking.
* Jan Kara, <jack@suse.cz>, 10/2002
*
- * Added journalled quota support
+ * Added journalled quota support, fix lock inversion problems
* Jan Kara, <jack@suse.cz>, 2003,2004
*
* (C) Copyright 1994 - 1997 Marco van Wieringen
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/kmod.h>
-#include <linux/pagemap.h>
+#include <linux/namei.h>
+#include <linux/buffer_head.h>
#include <asm/uaccess.h>
*
* Any operation working on dquots via inode pointers must hold dqptr_sem. If
* operation is just reading pointers from inode (or not using them at all) the
- * read lock is enough. If pointers are altered function must hold write lock.
- * If operation is holding reference to dquot in other way (e.g. quotactl ops)
- * it must be guarded by dqonoff_sem.
+ * read lock is enough. If pointers are altered function must hold write lock
+ * (these locking rules also apply for S_NOQUOTA flag in the inode - note that
+ * for altering the flag i_sem is also needed). If operation is holding
+ * reference to dquot in other way (e.g. quotactl ops) it must be guarded by
+ * dqonoff_sem.
* This locking assures that:
* a) update/access to dquot pointers in inode is serialized
* b) everyone is guarded against invalidate_dquots()
* operations on dquots don't hold dq_lock as they copy data under dq_data_lock
* spinlock to internal buffers before writing.
*
- * Lock ordering (including journal_lock) is following:
- * dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem
+ * Lock ordering (including related VFS locks) is the following:
+ * i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
+ * > dquot->dq_lock > dqio_sem
+ * i_sem on quota files is special (it's below dqio_sem)
*/
-spinlock_t dq_list_lock = SPIN_LOCK_UNLOCKED;
-spinlock_t dq_data_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(dq_list_lock);
+DEFINE_SPINLOCK(dq_data_lock);
static char *quotatypes[] = INITQFNAMES;
static struct quota_format_type *quota_formats; /* List of registered formats */
static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
+/* SLAB cache for dquot structures */
+static kmem_cache_t *dquot_cachep;
+
int register_quota_format(struct quota_format_type *fmt)
{
spin_lock(&dq_list_lock);
* on all three lists, depending on its current state.
*
* All dquots are placed to the end of inuse_list when first created, and this
- * list is used for the sync and invalidate operations, which must look
- * at every dquot.
+ * list is used for invalidate operation, which must look at every dquot.
*
* Unused dquots (dq_count == 0) are added to the free_dquots list when freed,
* and this list is searched whenever we need an available dquot. Dquots are
static LIST_HEAD(inuse_list);
static LIST_HEAD(free_dquots);
-unsigned int dq_hash_bits, dq_hash_mask;
+static unsigned int dq_hash_bits, dq_hash_mask;
static struct hlist_head *dquot_hash;
struct dqstats dqstats;
static void dqput(struct dquot *dquot);
-static inline int const hashfn(struct super_block *sb, unsigned int id, int type)
+static inline unsigned int
+hashfn(const struct super_block *sb, unsigned int id, int type)
{
- unsigned long tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
+ unsigned long tmp;
+
+ tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask;
}
int remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head)
{
struct dquot *dquot = inode->i_dquot[type];
- int cnt;
inode->i_dquot[type] = NODQUOT;
- /* any other quota in use? */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] != NODQUOT)
- goto put_it;
- }
- inode->i_flags &= ~S_QUOTA;
-put_it:
if (dquot != NODQUOT) {
if (dqput_blocks(dquot)) {
#ifdef __DQUOT_PARANOIA
}
}
-/* Function in inode.c - remove pointers to dquots in icache */
-extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
-
/* Gather all references from inodes and drop them */
static void drop_dquot_ref(struct super_block *sb, int type)
{
LIST_HEAD(tofree_head);
+ /* We need to be guarded against prune_icache to reach all the
+ * inodes - otherwise some can be on the local list of prune_icache */
+ down(&iprune_sem);
down_write(&sb_dqopt(sb)->dqptr_sem);
remove_dquot_ref(sb, type, &tofree_head);
up_write(&sb_dqopt(sb)->dqptr_sem);
+ up(&iprune_sem);
put_dquot_list(&tofree_head);
}
dquot->dq_dqb.dqb_curinodes -= number;
else
dquot->dq_dqb.dqb_curinodes = 0;
- if (dquot->dq_dqb.dqb_curinodes < dquot->dq_dqb.dqb_isoftlimit)
+ if (dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
dquot->dq_dqb.dqb_itime = (time_t) 0;
clear_bit(DQ_INODES_B, &dquot->dq_flags);
}
dquot->dq_dqb.dqb_curspace -= number;
else
dquot->dq_dqb.dqb_curspace = 0;
- if (toqb(dquot->dq_dqb.dqb_curspace) < dquot->dq_dqb.dqb_bsoftlimit)
+ if (toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
dquot->dq_dqb.dqb_btime = (time_t) 0;
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}
+static int flag_print_warnings = 1;
+
static inline int need_print_warning(struct dquot *dquot)
{
+ if (!flag_print_warnings)
+ return 0;
+
switch (dquot->dq_type) {
case USRQUOTA:
return current->fsuid == dquot->dq_id;
if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags)))
return;
+
tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
if (warntype == ISOFTWARN || warntype == BSOFTWARN)
tty_write_message(current->signal->tty, ": warning, ");
tty_write_message(current->signal->tty, quotatypes[dquot->dq_type]);
switch (warntype) {
case IHARDWARN:
- msg = " file limit reached.\n";
+ msg = " file limit reached.\r\n";
break;
case ISOFTLONGWARN:
- msg = " file quota exceeded too long.\n";
+ msg = " file quota exceeded too long.\r\n";
break;
case ISOFTWARN:
- msg = " file quota exceeded.\n";
+ msg = " file quota exceeded.\r\n";
break;
case BHARDWARN:
- msg = " block limit reached.\n";
+ msg = " block limit reached.\r\n";
break;
case BSOFTLONGWARN:
- msg = " block quota exceeded too long.\n";
+ msg = " block quota exceeded too long.\r\n";
break;
case BSOFTWARN:
- msg = " block quota exceeded.\n";
+ msg = " block quota exceeded.\r\n";
break;
}
tty_write_message(current->signal->tty, msg);
break;
}
inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt);
- if (inode->i_dquot[cnt])
- inode->i_flags |= S_QUOTA;
}
}
out_err:
int cnt;
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- inode->i_flags &= ~S_QUOTA;
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] != NODQUOT) {
dqput(inode->i_dquot[cnt]);
{
int cnt;
struct quota_info *dqopt = sb_dqopt(sb);
+ struct inode *toputinode[MAXQUOTAS];
+ struct vfsmount *toputmnt[MAXQUOTAS];
/* We need to serialize quota_off() for device */
down(&dqopt->dqonoff_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ toputinode[cnt] = NULL;
+ toputmnt[cnt] = NULL;
if (type != -1 && cnt != type)
continue;
if (!sb_has_quota_enabled(sb, cnt))
dqopt->ops[cnt]->free_file_info(sb, cnt);
put_quota_format(dqopt->info[cnt].dqi_format);
- fput(dqopt->files[cnt]);
+ toputinode[cnt] = dqopt->files[cnt];
+ toputmnt[cnt] = dqopt->mnt[cnt];
dqopt->files[cnt] = NULL;
+ dqopt->mnt[cnt] = NULL;
dqopt->info[cnt].dqi_flags = 0;
dqopt->info[cnt].dqi_igrace = 0;
dqopt->info[cnt].dqi_bgrace = 0;
dqopt->ops[cnt] = NULL;
}
up(&dqopt->dqonoff_sem);
+ /* Sync the superblock so that buffers with quota data are written to
+ * disk (and so userspace sees correct data afterwards).
+ * The reference to vfsmnt we are still holding protects us from
+ * umount (we don't have it only when quotas are turned on/off for
+ * journal replay but in that case we are guarded by the fs anyway). */
+ if (sb->s_op->sync_fs)
+ sb->s_op->sync_fs(sb, 1);
+ sync_blockdev(sb->s_bdev);
+ /* Now the quota files are just ordinary files and we can set the
+ * inode flags back. Moreover we discard the pagecache so that
+ * userspace sees the writes we did bypassing the pagecache. We
+ * must also discard the blockdev buffers so that we see the
+ * changes done by userspace on the next quotaon() */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (toputinode[cnt]) {
+ down(&dqopt->dqonoff_sem);
+ /* If quota was reenabled in the meantime, we have
+ * nothing to do */
+ if (!sb_has_quota_enabled(sb, cnt)) {
+ down(&toputinode[cnt]->i_sem);
+ toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
+ S_NOATIME | S_NOQUOTA);
+ truncate_inode_pages(&toputinode[cnt]->i_data, 0);
+ up(&toputinode[cnt]->i_sem);
+ mark_inode_dirty(toputinode[cnt]);
+ iput(toputinode[cnt]);
+ }
+ up(&dqopt->dqonoff_sem);
+ /* We don't hold the reference when we turned on quotas
+ * just for the journal replay... */
+ if (toputmnt[cnt])
+ mntput(toputmnt[cnt]);
+ }
+ if (sb->s_bdev)
+ invalidate_bdev(sb->s_bdev, 0);
return 0;
}
* Turn quotas on on a device
*/
-/* Helper function when we already have file open */
-static int vfs_quota_on_file(struct file *f, int type, int format_id)
+/* Helper function when we already have the inode */
+static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
{
struct quota_format_type *fmt = find_quota_format(format_id);
- struct inode *inode;
- struct super_block *sb = f->f_dentry->d_sb;
+ struct super_block *sb = inode->i_sb;
struct quota_info *dqopt = sb_dqopt(sb);
- struct dquot *to_drop[MAXQUOTAS];
- int error, cnt;
- unsigned int oldflags;
+ int error;
+ int oldflags = -1;
if (!fmt)
return -ESRCH;
- error = -EIO;
- if (!f->f_op || !f->f_op->read || !f->f_op->write)
+ if (!S_ISREG(inode->i_mode)) {
+ error = -EACCES;
+ goto out_fmt;
+ }
+ if (IS_RDONLY(inode)) {
+ error = -EROFS;
goto out_fmt;
- inode = f->f_dentry->d_inode;
- error = -EACCES;
- if (!S_ISREG(inode->i_mode))
+ }
+ if (!sb->s_op->quota_write || !sb->s_op->quota_read) {
+ error = -EINVAL;
goto out_fmt;
+ }
+ /* As we bypass the pagecache we must now flush the inode so that
+ * we see all the changes from userspace... */
+ write_inode_now(inode, 1);
+ /* And now flush the block cache so that kernel sees the changes */
+ invalidate_bdev(sb->s_bdev, 0);
+ down(&inode->i_sem);
down(&dqopt->dqonoff_sem);
if (sb_has_quota_enabled(sb, type)) {
error = -EBUSY;
goto out_lock;
}
- oldflags = inode->i_flags;
- dqopt->files[type] = f;
- error = -EINVAL;
- if (!fmt->qf_ops->check_quota_file(sb, type))
- goto out_file_init;
/* We don't want quota and atime on quota files (deadlocks possible)
- * We also need to set GFP mask differently because we cannot recurse
- * into filesystem when allocating page for quota inode */
+ * Also nobody should write to the file - we use special IO operations
+ * which ignore the immutable bit. */
down_write(&dqopt->dqptr_sem);
- inode->i_flags |= S_NOQUOTA | S_NOATIME;
-
- /*
- * We write to quota files deep within filesystem code. We don't want
- * the VFS to reenter filesystem code when it tries to allocate a
- * pagecache page for the quota file write. So clear __GFP_FS in
- * the quota file's allocation flags.
- */
- mapping_set_gfp_mask(inode->i_mapping,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- to_drop[cnt] = inode->i_dquot[cnt];
- inode->i_dquot[cnt] = NODQUOT;
- }
- inode->i_flags &= ~S_QUOTA;
+ oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
+ inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
up_write(&dqopt->dqptr_sem);
- /* We must put dquots outside of dqptr_sem because we may need to
- * start transaction for dquot_release() */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (to_drop[cnt])
- dqput(to_drop[cnt]);
- }
+
+ error = -EIO;
+ dqopt->files[type] = igrab(inode);
+ if (!dqopt->files[type])
+ goto out_lock;
+ error = -EINVAL;
+ if (!fmt->qf_ops->check_quota_file(sb, type))
+ goto out_file_init;
dqopt->ops[type] = fmt->qf_ops;
dqopt->info[type].dqi_format = fmt;
goto out_file_init;
}
up(&dqopt->dqio_sem);
+ up(&inode->i_sem);
set_enable_flags(dqopt, type);
add_dquot_ref(sb, type);
return 0;
out_file_init:
- inode->i_flags = oldflags;
dqopt->files[type] = NULL;
+ iput(inode);
out_lock:
- up_write(&dqopt->dqptr_sem);
up(&dqopt->dqonoff_sem);
+ if (oldflags != -1) {
+ down_write(&dqopt->dqptr_sem);
+ /* Set the flags back (in the case of accidental quotaon()
+ * on a wrong file we don't want to mess up the flags) */
+ inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
+ inode->i_flags |= oldflags;
+ up_write(&dqopt->dqptr_sem);
+ }
+ up(&inode->i_sem);
out_fmt:
put_quota_format(fmt);
/* Actual function called from quotactl() */
int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
{
- struct file *f;
+ struct nameidata nd;
int error;
- f = filp_open(path, O_RDWR, 0600);
- if (IS_ERR(f))
- return PTR_ERR(f);
- error = security_quota_on(f);
+ error = path_lookup(path, LOOKUP_FOLLOW, &nd);
+ if (error < 0)
+ return error;
+ error = security_quota_on(nd.dentry);
if (error)
- goto out_f;
- error = vfs_quota_on_file(f, type, format_id);
- if (!error)
- return 0;
-out_f:
- filp_close(f, NULL);
+ goto out_path;
+ /* Quota file not on the same filesystem? */
+ if (nd.mnt->mnt_sb != sb)
+ error = -EXDEV;
+ else {
+ error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
+ if (!error)
+ sb_dqopt(sb)->mnt[type] = mntget(nd.mnt);
+ }
+out_path:
+ path_release(&nd);
return error;
}
/*
- * Function used by filesystems when filp_open() would fail (filesystem is
- * being mounted now). We will use a private file structure. Caller is
- * responsible that it's IO functions won't need vfsmnt structure or
- * some dentry tricks...
+ * This function is used when filesystem needs to initialize quotas
+ * during mount time.
*/
int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry)
{
- struct file *f;
int error;
- dget(dentry); /* Get a reference for struct file */
- f = dentry_open(dentry, NULL, O_RDWR);
- if (IS_ERR(f)) {
- error = PTR_ERR(f);
- goto out_dentry;
- }
- error = vfs_quota_on_file(f, type, format_id);
- if (!error)
- return 0;
- fput(f);
-out_dentry:
- dput(dentry);
- return error;
+ error = security_quota_on(dentry);
+ if (error)
+ return error;
+ return vfs_quota_on_inode(dentry->d_inode, type, format_id);
}
/* Generic routine for getting common part of quota structure */
.mode = 0444,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = FS_DQ_WARNINGS,
+ .procname = "warnings",
+ .data = &flag_print_warnings,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 },
};
{ .ctl_name = 0 },
};
-/* SLAB cache for dquot structures */
-kmem_cache_t *dquot_cachep;
-
static int __init dquot_init(void)
{
int i;
EXPORT_SYMBOL(register_quota_format);
EXPORT_SYMBOL(unregister_quota_format);
EXPORT_SYMBOL(dqstats);
-EXPORT_SYMBOL(dq_list_lock);
EXPORT_SYMBOL(dq_data_lock);
EXPORT_SYMBOL(vfs_quota_on);
EXPORT_SYMBOL(vfs_quota_on_mount);