X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Ffs-writeback.c;h=a4b142a6a2c7f4c870a322bf9d3c9d00be1e8a5f;hb=a2f44b27303a5353859d77a3e96a1d3f33f56ab7;hp=8e050fa58218cdac75ad6a49771a224e3d7b7c61;hpb=cace1c4618b6c6442b7dc973e935e7f3268e4aa7;p=linux-2.6.git diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8e050fa58..a4b142a6a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -22,8 +22,7 @@ #include #include #include - -extern struct super_block *blockdev_superblock; +#include "internal.h" /** * __mark_inode_dirty - internal function @@ -230,7 +229,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) * The inode is clean, unused */ list_move(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; } } wake_up_inode(inode); @@ -238,17 +236,34 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Write out an inode's dirty pages. Called under inode_lock. + * Write out an inode's dirty pages. Called under inode_lock. Either the + * caller has ref on the inode (either via __iget or via syscall against an fd) + * or the inode has I_WILL_FREE set (via generic_forget_inode) */ static int -__writeback_single_inode(struct inode *inode, - struct writeback_control *wbc) +__writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { wait_queue_head_t *wqh; + if (!atomic_read(&inode->i_count)) + WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); + else + WARN_ON(inode->i_state & I_WILL_FREE); + if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { + struct address_space *mapping = inode->i_mapping; + int ret; + list_move(&inode->i_list, &inode->i_sb->s_dirty); - return 0; + + /* + * Even if we don't actually write the inode itself here, + * we can at least start some of the data writeout.. + */ + spin_unlock(&inode_lock); + ret = do_writepages(mapping, wbc); + spin_lock(&inode_lock); + return ret; } /* @@ -259,11 +274,9 @@ __writeback_single_inode(struct inode *inode, wqh = bit_waitqueue(&inode->i_state, __I_LOCK); do { - __iget(inode); spin_unlock(&inode_lock); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); - iput(inode); spin_lock(&inode_lock); } while (inode->i_state & I_LOCK); } @@ -317,7 +330,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) if (!bdi_cap_writeback_dirty(bdi)) { list_move(&inode->i_list, &sb->s_dirty); - if (sb == blockdev_superblock) { + if (sb_is_blkdev_sb(sb)) { /* * Dirty memory-backed blockdev: the ramdisk * driver does this. Skip just this inode @@ -334,14 +347,14 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; - if (sb != blockdev_superblock) + if (!sb_is_blkdev_sb(sb)) break; /* Skip a congested fs */ list_move(&inode->i_list, &sb->s_dirty); continue; /* Skip a congested blockdev */ } if (wbc->bdi && bdi != wbc->bdi) { - if (sb != blockdev_superblock) + if (!sb_is_blkdev_sb(sb)) break; /* fs has the wrong queue */ list_move(&inode->i_list, &sb->s_dirty); continue; /* blockdev has wrong queue */ @@ -378,8 +391,8 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) list_move(&inode->i_list, &sb->s_dirty); } spin_unlock(&inode_lock); - cond_resched(); iput(inode); + cond_resched(); spin_lock(&inode_lock); if (wbc->nr_to_write <= 0) break; @@ -458,9 +471,11 @@ void sync_inodes_sb(struct super_block *sb, int wait) { struct writeback_control wbc = { .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, + .range_start = 0, + .range_end = LLONG_MAX, }; - unsigned long nr_dirty = read_page_state(nr_dirty); - unsigned long nr_unstable = read_page_state(nr_unstable); + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); wbc.nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused) + @@ -485,32 +500,6 @@ static void set_sb_syncing(int val) spin_unlock(&sb_lock); } -/* - * Find a superblock with inodes that need to be synced - */ -static struct super_block *get_super_to_sync(void) -{ - struct super_block *sb; -restart: - spin_lock(&sb_lock); - sb = sb_entry(super_blocks.prev); - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { - if (sb->s_syncing) - continue; - sb->s_syncing = 1; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (!sb->s_root) { - drop_super(sb); - goto restart; - } - return sb; - } - spin_unlock(&sb_lock); - return NULL; -} - /** * sync_inodes - writes all inodes to disk * @wait: wait for completion @@ -530,45 +519,64 @@ restart: * outstanding dirty inodes, the writeback goes block-at-a-time within the * filesystem's write_inode(). This is extremely slow. */ -void sync_inodes(int wait) +static void __sync_inodes(int wait) { struct super_block *sb; - set_sb_syncing(0); - while ((sb = get_super_to_sync()) != NULL) { - sync_inodes_sb(sb, 0); - sync_blockdev(sb->s_bdev); - drop_super(sb); + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_syncing) + continue; + sb->s_syncing = 1; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) { + sync_inodes_sb(sb, wait); + sync_blockdev(sb->s_bdev); + } + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } + spin_unlock(&sb_lock); +} + +void sync_inodes(int wait) +{ + set_sb_syncing(0); + __sync_inodes(0); + if (wait) { set_sb_syncing(0); - while ((sb = get_super_to_sync()) != NULL) { - sync_inodes_sb(sb, 1); - sync_blockdev(sb->s_bdev); - drop_super(sb); - } + __sync_inodes(1); } } /** - * write_inode_now - write an inode to disk - * @inode: inode to write to disk - * @sync: whether the write should be synchronous or not + * write_inode_now - write an inode to disk + * @inode: inode to write to disk + * @sync: whether the write should be synchronous or not + * + * This function commits an inode to disk immediately if it is dirty. This is + * primarily needed by knfsd. * - * This function commits an inode to disk immediately if it is - * dirty. This is primarily needed by knfsd. + * The caller must either have a ref on the inode or must have set I_WILL_FREE. */ - int write_inode_now(struct inode *inode, int sync) { int ret; struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = WB_SYNC_ALL, + .range_start = 0, + .range_end = LLONG_MAX, }; if (!mapping_cap_writeback_dirty(inode->i_mapping)) - return 0; + wbc.nr_to_write = 0; might_sleep(); spin_lock(&inode_lock); @@ -612,7 +620,7 @@ EXPORT_SYMBOL(sync_inode); * O_SYNC flag set, to flush dirty writes to disk. * * @what is a bitmask, specifying which part of the inode's data should be - * written and waited upon: + * written and waited upon. * * OSYNC_DATA: i_mapping's dirty data * OSYNC_METADATA: the buffers at i_mapping->private_list @@ -625,7 +633,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int int need_write_inode_now = 0; int err2; - current->flags |= PF_SYNCWRITE; if (what & OSYNC_DATA) err = filemap_fdatawrite(mapping); if (what & (OSYNC_METADATA|OSYNC_DATA)) { @@ -638,7 +645,6 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int if (!err) err = err2; } - current->flags &= ~PF_SYNCWRITE; spin_lock(&inode_lock); if ((inode->i_state & I_DIRTY) && @@ -678,8 +684,9 @@ int writeback_acquire(struct backing_dev_info *bdi) /** * writeback_in_progress: determine whether there is writeback in progress - * against a backing device. * @bdi: the device's backing_dev_info structure. + * + * Determine whether there is writeback in progress against a backing device. */ int writeback_in_progress(struct backing_dev_info *bdi) {