X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Ffs-writeback.c;h=892643dc9af10a1a6fbf7c858fd430ab209a5724;hb=7172c64a7cee4dfa95864f49c914f7ea8cf497c8;hp=2f91937edc16aeb6d3fb39b39dab3129e962f236;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2f91937ed..892643dc9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -75,8 +75,23 @@ void __mark_inode_dirty(struct inode *inode, int flags) if ((inode->i_state & flags) == flags) return; - if (unlikely(block_dump)) - printk("%s(%d): dirtied file\n", current->comm, current->pid); + if (unlikely(block_dump)) { + struct dentry *dentry = NULL; + const char *name = "?"; + + if (!list_empty(&inode->i_dentry)) { + dentry = list_entry(inode->i_dentry.next, + struct dentry, d_alias); + if (dentry && dentry->d_name.name) + name = (const char *) dentry->d_name.name; + } + + if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) + printk(KERN_DEBUG + "%s(%d): dirtied inode %lu (%s) on %s\n", + current->comm, current->pid, inode->i_ino, + name, inode->i_sb->s_id); + } spin_lock(&inode_lock); if ((inode->i_state & flags) != flags) { @@ -118,10 +133,11 @@ out: EXPORT_SYMBOL(__mark_inode_dirty); -static void write_inode(struct inode *inode, int sync) +static int write_inode(struct inode *inode, int sync) { if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) - inode->i_sb->s_op->write_inode(inode, sync); + return inode->i_sb->s_op->write_inode(inode, sync); + return 0; } /* @@ -155,8 +171,11 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) ret = do_writepages(mapping, wbc); /* Don't write the inode if only I_DIRTY_PAGES was set */ - if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) - write_inode(inode, wait); + if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + int err = write_inode(inode, wait); + if (ret == 0) + ret = err; + } if (wait) { int err = filemap_fdatawait(mapping); @@ -198,8 +217,9 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } else if (inode->i_state & I_DIRTY) { /* * Someone redirtied the inode while were writing back - * the pages: nothing to do. + * the pages. */ + list_move(&inode->i_list, &sb->s_dirty); } else if (atomic_read(&inode->i_count)) { /* * The inode is clean, inuse @@ -217,12 +237,20 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Write out an inode's dirty pages. Called under inode_lock. + * Write out an inode's dirty pages. Called under inode_lock. Either the + * caller has ref on the inode (either via __iget or via syscall against an fd) + * or the inode has I_WILL_FREE set (via generic_forget_inode) */ static int -__writeback_single_inode(struct inode *inode, - struct writeback_control *wbc) +__writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { + wait_queue_head_t *wqh; + + if (!atomic_read(&inode->i_count)) + WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); + else + WARN_ON(inode->i_state & I_WILL_FREE); + if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { list_move(&inode->i_list, &inode->i_sb->s_dirty); return 0; @@ -231,12 +259,16 @@ __writeback_single_inode(struct inode *inode, /* * It's a data-integrity sync. We must wait. */ - while (inode->i_state & I_LOCK) { - __iget(inode); - spin_unlock(&inode_lock); - __wait_on_inode(inode); - iput(inode); - spin_lock(&inode_lock); + if (inode->i_state & I_LOCK) { + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LOCK); + + wqh = bit_waitqueue(&inode->i_state, __I_LOCK); + do { + spin_unlock(&inode_lock); + __wait_on_bit(wqh, &wq, inode_wait, + TASK_UNINTERRUPTIBLE); + spin_lock(&inode_lock); + } while (inode->i_state & I_LOCK); } return __sync_single_inode(inode, wbc); } @@ -286,18 +318,19 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) struct backing_dev_info *bdi = mapping->backing_dev_info; long pages_skipped; - if (bdi->memory_backed) { + if (!bdi_cap_writeback_dirty(bdi)) { + list_move(&inode->i_list, &sb->s_dirty); if (sb == blockdev_superblock) { /* * Dirty memory-backed blockdev: the ramdisk - * driver does this. + * driver does this. Skip just this inode */ - list_move(&inode->i_list, &sb->s_dirty); continue; } /* - * Assume that all inodes on this superblock are memory - * backed. Skip the superblock. + * Dirty memory-backed inode against a filesystem other + * than the kernel-internal bdev filesystem. Skip the + * entire superblock. */ break; } @@ -349,6 +382,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) } spin_unlock(&inode_lock); iput(inode); + cond_resched(); spin_lock(&inode_lock); if (wbc->nr_to_write <= 0) break; @@ -380,20 +414,36 @@ writeback_inodes(struct writeback_control *wbc) { struct super_block *sb; - spin_lock(&inode_lock); + might_sleep(); spin_lock(&sb_lock); +restart: sb = sb_entry(super_blocks.prev); for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { if (!list_empty(&sb->s_dirty) || !list_empty(&sb->s_io)) { + /* we're making our own get_super here */ + sb->s_count++; spin_unlock(&sb_lock); - sync_sb_inodes(sb, wbc); + /* + * If we can't get the readlock, there's no sense in + * waiting around, most of the time the FS is going to + * be unmounted by the time it is released. + */ + if (down_read_trylock(&sb->s_umount)) { + if (sb->s_root) { + spin_lock(&inode_lock); + sync_sb_inodes(sb, wbc); + spin_unlock(&inode_lock); + } + up_read(&sb->s_umount); + } spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } if (wbc->nr_to_write <= 0) break; } spin_unlock(&sb_lock); - spin_unlock(&inode_lock); } /* @@ -409,18 +459,17 @@ writeback_inodes(struct writeback_control *wbc) */ void sync_inodes_sb(struct super_block *sb, int wait) { - struct page_state ps; struct writeback_control wbc = { - .bdi = NULL, .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, - .older_than_this = NULL, - .nr_to_write = 0, + .range_start = 0, + .range_end = LLONG_MAX, }; + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - get_page_state(&ps); - wbc.nr_to_write = ps.nr_dirty + ps.nr_unstable + + wbc.nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused) + - ps.nr_dirty + ps.nr_unstable; + nr_dirty + nr_unstable; wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ spin_lock(&inode_lock); sync_sb_inodes(sb, &wbc); @@ -441,34 +490,9 @@ static void set_sb_syncing(int val) spin_unlock(&sb_lock); } -/* - * Find a superblock with inodes that need to be synced - */ -static struct super_block *get_super_to_sync(void) -{ - struct super_block *sb; -restart: - spin_lock(&sb_lock); - sb = sb_entry(super_blocks.prev); - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { - if (sb->s_syncing) - continue; - sb->s_syncing = 1; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (!sb->s_root) { - drop_super(sb); - goto restart; - } - return sb; - } - spin_unlock(&sb_lock); - return NULL; -} - /** - * sync_inodes + * sync_inodes - writes all inodes to disk + * @wait: wait for completion * * sync_inodes() goes through each super block's dirty inode list, writes the * inodes out, waits on the writeout and puts the inodes back on the normal @@ -485,47 +509,72 @@ restart: * outstanding dirty inodes, the writeback goes block-at-a-time within the * filesystem's write_inode(). This is extremely slow. */ -void sync_inodes(int wait) +static void __sync_inodes(int wait) { struct super_block *sb; - set_sb_syncing(0); - while ((sb = get_super_to_sync()) != NULL) { - sync_inodes_sb(sb, 0); - sync_blockdev(sb->s_bdev); - drop_super(sb); + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_syncing) + continue; + sb->s_syncing = 1; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) { + sync_inodes_sb(sb, wait); + sync_blockdev(sb->s_bdev); + } + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } + spin_unlock(&sb_lock); +} + +void sync_inodes(int wait) +{ + set_sb_syncing(0); + __sync_inodes(0); + if (wait) { set_sb_syncing(0); - while ((sb = get_super_to_sync()) != NULL) { - sync_inodes_sb(sb, 1); - sync_blockdev(sb->s_bdev); - drop_super(sb); - } + __sync_inodes(1); } } /** - * write_inode_now - write an inode to disk - * @inode: inode to write to disk - * @sync: whether the write should be synchronous or not + * write_inode_now - write an inode to disk + * @inode: inode to write to disk + * @sync: whether the write should be synchronous or not + * + * This function commits an inode to disk immediately if it is dirty. This is + * primarily needed by knfsd. * - * This function commits an inode to disk immediately if it is - * dirty. This is primarily needed by knfsd. + * The caller must either have a ref on the inode or must have set I_WILL_FREE. */ - -void write_inode_now(struct inode *inode, int sync) +int write_inode_now(struct inode *inode, int sync) { + int ret; struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = WB_SYNC_ALL, + .range_start = 0, + .range_end = LLONG_MAX, }; + if (!mapping_cap_writeback_dirty(inode->i_mapping)) + wbc.nr_to_write = 0; + + might_sleep(); spin_lock(&inode_lock); - __writeback_single_inode(inode, &wbc); + ret = __writeback_single_inode(inode, &wbc); spin_unlock(&inode_lock); if (sync) wait_on_inode(inode); + return ret; } EXPORT_SYMBOL(write_inode_now); @@ -554,13 +603,14 @@ EXPORT_SYMBOL(sync_inode); /** * generic_osync_inode - flush all dirty data for a given inode to disk * @inode: inode to write + * @mapping: the address_space that should be flushed * @what: what to write and wait upon * * This can be called by file_write functions for files which have the * O_SYNC flag set, to flush dirty writes to disk. * * @what is a bitmask, specifying which part of the inode's data should be - * written and waited upon: + * written and waited upon. * * OSYNC_DATA: i_mapping's dirty data * OSYNC_METADATA: the buffers at i_mapping->private_list @@ -573,7 +623,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int int need_write_inode_now = 0; int err2; - current->flags |= PF_SYNCWRITE; if (what & OSYNC_DATA) err = filemap_fdatawrite(mapping); if (what & (OSYNC_METADATA|OSYNC_DATA)) { @@ -586,7 +635,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int if (!err) err = err2; } - current->flags &= ~PF_SYNCWRITE; spin_lock(&inode_lock); if ((inode->i_state & I_DIRTY) && @@ -594,8 +642,11 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int need_write_inode_now = 1; spin_unlock(&inode_lock); - if (need_write_inode_now) - write_inode_now(inode, 1); + if (need_write_inode_now) { + err2 = write_inode_now(inode, 1); + if (!err) + err = err2; + } else wait_on_inode(inode); @@ -623,8 +674,9 @@ int writeback_acquire(struct backing_dev_info *bdi) /** * writeback_in_progress: determine whether there is writeback in progress - * against a backing device. * @bdi: the device's backing_dev_info structure. + * + * Determine whether there is writeback in progress against a backing device. */ int writeback_in_progress(struct backing_dev_info *bdi) {