X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=fs%2Fbuffer.c;fp=fs%2Fbuffer.c;h=8a17ebb9742340a1ba0c36ebadbebd4320300d0d;hb=64ba3f394c830ec48a1c31b53dcae312c56f1604;hp=a61beb1b1e228391f2b58a4bfb21425e82fae2ed;hpb=be1e6109ac94a859551f8e1774eb9a8469fe055c;p=linux-2.6.git diff --git a/fs/buffer.c b/fs/buffer.c index a61beb1b1..8a17ebb97 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -18,6 +18,7 @@ * async buffer flushing, 1999 Andrea Arcangeli */ +#include #include #include #include @@ -159,7 +160,12 @@ int sync_blockdev(struct block_device *bdev) } EXPORT_SYMBOL(sync_blockdev); -static void __fsync_super(struct super_block *sb) +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int fsync_super(struct super_block *sb) { sync_inodes_sb(sb, 0); DQUOT_SYNC(sb); @@ -171,21 +177,10 @@ static void __fsync_super(struct super_block *sb) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); sync_inodes_sb(sb, 1); -} -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) -{ - __fsync_super(sb); return sync_blockdev(sb->s_bdev); } -EXPORT_SYMBOL(fsync_super); - /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block @@ -206,7 +201,7 @@ int fsync_bdev(struct block_device *bdev) * freeze_bdev -- lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * - * This takes the block device bd_mount_mutex to make sure no new mounts + * This takes the block device bd_mount_sem to make sure no new mounts * happen on bdev until thaw_bdev() is called. * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. @@ -215,13 +210,25 @@ struct super_block *freeze_bdev(struct block_device *bdev) { struct super_block *sb; - mutex_lock(&bdev->bd_mount_mutex); + down(&bdev->bd_mount_sem); sb = get_super(bdev); if (sb && !(sb->s_flags & MS_RDONLY)) { sb->s_frozen = SB_FREEZE_WRITE; smp_wmb(); - __fsync_super(sb); + sync_inodes_sb(sb, 0); + DQUOT_SYNC(sb); + + lock_super(sb); + if (sb->s_dirt && sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); + + sync_blockdev(sb->s_bdev); + sync_inodes_sb(sb, 1); sb->s_frozen = SB_FREEZE_TRANS; smp_wmb(); @@ -257,7 +264,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb) drop_super(sb); } - mutex_unlock(&bdev->bd_mount_mutex); + up(&bdev->bd_mount_sem); } EXPORT_SYMBOL(thaw_bdev); @@ -320,23 +327,31 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) return ret; } -long do_fsync(struct file *file, int datasync) +static long do_fsync(unsigned int fd, int datasync) { - int ret; - int err; - struct address_space *mapping = file->f_mapping; + struct file * file; + struct address_space *mapping; + int ret, err; + ret = -EBADF; + file = fget(fd); + if (!file) + goto out; + + ret = -EINVAL; if (!file->f_op || !file->f_op->fsync) { /* Why? We can still call filemap_fdatawrite */ - ret = -EINVAL; - goto out; + goto out_putf; } + mapping = file->f_mapping; + + current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(mapping); /* - * We need to protect against concurrent writers, which could cause - * livelocks in fsync_buffers_list(). + * We need to protect against concurrent writers, + * which could cause livelocks in fsync_buffers_list */ mutex_lock(&mapping->host->i_mutex); err = file->f_op->fsync(file, file->f_dentry, datasync); @@ -346,31 +361,22 @@ long do_fsync(struct file *file, int datasync) err = filemap_fdatawait(mapping); if (!ret) ret = err; -out: - return ret; -} - -static long __do_fsync(unsigned int fd, int datasync) -{ - struct file *file; - int ret = -EBADF; + current->flags &= ~PF_SYNCWRITE; - file = fget(fd); - if (file) { - ret = do_fsync(file, datasync); - fput(file); - } +out_putf: + fput(file); +out: return ret; } asmlinkage long sys_fsync(unsigned int fd) { - return __do_fsync(fd, 0); + return do_fsync(fd, 0); } asmlinkage long sys_fdatasync(unsigned int fd) { - return __do_fsync(fd, 1); + return do_fsync(fd, 1); } /* @@ -425,10 +431,8 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) if (all_mapped) { printk("__find_get_block_slow() failed. " "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, - (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%zu\n", - bh->b_state, bh->b_size); + (unsigned long long)block, (unsigned long long)bh->b_blocknr); + printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size); printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits); } out_unlock: @@ -472,18 +476,13 @@ out: pass does the actual I/O. */ void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) { - struct address_space *mapping = bdev->bd_inode->i_mapping; - - if (mapping->nrpages == 0) - return; - invalidate_bh_lrus(); /* * FIXME: what about destroy_dirty_buffers? * We really want to use invalidate_inode_pages2() for * that, but not until that's cleaned up. */ - invalidate_inode_pages(mapping); + invalidate_inode_pages(bdev->bd_inode->i_mapping); } /* @@ -497,7 +496,7 @@ static void free_more_memory(void) wakeup_pdflush(1024); yield(); - for_each_online_pgdat(pgdat) { + for_each_pgdat(pgdat) { zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; if (*zones) try_to_free_pages(zones, GFP_NOFS); @@ -570,7 +569,7 @@ still_busy: * Completion handler for block_write_full_page() - pages which are unlocked * during I/O, and which have PageWriteback cleared upon I/O completion. */ -static void end_buffer_async_write(struct buffer_head *bh, int uptodate) +void end_buffer_async_write(struct buffer_head *bh, int uptodate) { char b[BDEVNAME_SIZE]; unsigned long flags; @@ -802,7 +801,8 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) if (!mapping->assoc_mapping) { mapping->assoc_mapping = buffer_mapping; } else { - BUG_ON(mapping->assoc_mapping != buffer_mapping); + if (mapping->assoc_mapping != buffer_mapping) + BUG(); } if (list_empty(&bh->b_assoc_buffers)) { spin_lock(&buffer_mapping->private_lock); @@ -840,10 +840,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); */ int __set_page_dirty_buffers(struct page *page) { - struct address_space * const mapping = page_mapping(page); - - if (unlikely(!mapping)) - return !TestSetPageDirty(page); + struct address_space * const mapping = page->mapping; spin_lock(&mapping->private_lock); if (page_has_buffers(page)) { @@ -861,15 +858,15 @@ int __set_page_dirty_buffers(struct page *page) write_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ if (mapping_cap_account_dirty(mapping)) - __inc_zone_page_state(page, NR_FILE_DIRTY); + inc_page_state(nr_dirty); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } write_unlock_irq(&mapping->tree_lock); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return 1; } + return 0; } EXPORT_SYMBOL(__set_page_dirty_buffers); @@ -1122,7 +1119,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, if (!page) return NULL; - BUG_ON(!PageLocked(page)); + if (!PageLocked(page)) + BUG(); if (page_has_buffers(page)) { bh = page_buffers(page); @@ -1546,7 +1544,8 @@ void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset) { bh->b_page = page; - BUG_ON(offset >= PAGE_SIZE); + if (offset >= PAGE_SIZE) + BUG(); if (PageHighMem(page)) /* * This catches illegal uses and preserves the offset: @@ -1616,10 +1615,11 @@ EXPORT_SYMBOL(try_to_release_page); * point. Because the caller is about to free (and possibly reuse) those * blocks on-disk. */ -void block_invalidatepage(struct page *page, unsigned long offset) +int block_invalidatepage(struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; unsigned int curr_off = 0; + int ret = 1; BUG_ON(!PageLocked(page)); if (!page_has_buffers(page)) @@ -1646,18 +1646,19 @@ void block_invalidatepage(struct page *page, unsigned long offset) * so real IO is not possible anymore. */ if (offset == 0) - try_to_release_page(page, 0); + ret = try_to_release_page(page, 0); out: - return; + return ret; } EXPORT_SYMBOL(block_invalidatepage); -void do_invalidatepage(struct page *page, unsigned long offset) +int do_invalidatepage(struct page *page, unsigned long offset) { - void (*invalidatepage)(struct page *, unsigned long); - invalidatepage = page->mapping->a_ops->invalidatepage ? : - block_invalidatepage; - (*invalidatepage)(page, offset); + int (*invalidatepage)(struct page *, unsigned long); + invalidatepage = page->mapping->a_ops->invalidatepage; + if (invalidatepage == NULL) + invalidatepage = block_invalidatepage; + return (*invalidatepage)(page, offset); } /* @@ -1759,7 +1760,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page, sector_t block; sector_t last_block; struct buffer_head *bh, *head; - const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; BUG_ON(!PageLocked(page)); @@ -1767,7 +1767,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; if (!page_has_buffers(page)) { - create_empty_buffers(page, blocksize, + create_empty_buffers(page, 1 << inode->i_blkbits, (1 << BH_Dirty)|(1 << BH_Uptodate)); } @@ -1802,7 +1802,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page, clear_buffer_dirty(bh); set_buffer_uptodate(bh); } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { - WARN_ON(bh->b_size != blocksize); err = get_block(inode, block, bh, 1); if (err) goto recover; @@ -1956,7 +1955,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (buffer_new(bh)) clear_buffer_new(bh); if (!buffer_mapped(bh)) { - WARN_ON(bh->b_size != blocksize); err = get_block(inode, block, bh, 1); if (err) break; @@ -2112,7 +2110,6 @@ int block_read_full_page(struct page *page, get_block_t *get_block) fully_mapped = 0; if (iblock < lblock) { - WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); if (err) SetPageError(page); @@ -2434,7 +2431,6 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, create = 1; if (block_start >= to) create = 0; - map_bh.b_size = blocksize; ret = get_block(inode, block_in_file + block_in_page, &map_bh, create); if (ret) @@ -2624,7 +2620,7 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned to; struct page *page; - const struct address_space_operations *a_ops = mapping->a_ops; + struct address_space_operations *a_ops = mapping->a_ops; char *kaddr; int ret = 0; @@ -2695,7 +2691,6 @@ int block_truncate_page(struct address_space *mapping, err = 0; if (!buffer_mapped(bh)) { - WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); if (err) goto unlock; @@ -2782,7 +2777,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, struct inode *inode = mapping->host; tmp.b_state = 0; tmp.b_blocknr = 0; - tmp.b_size = 1 << inode->i_blkbits; get_block(inode, block, &tmp, 0); return tmp.b_blocknr; } @@ -3009,7 +3003,6 @@ int try_to_free_buffers(struct page *page) spin_lock(&mapping->private_lock); ret = drop_buffers(page, &buffers_to_free); - spin_unlock(&mapping->private_lock); if (ret) { /* * If the filesystem writes its buffers by hand (eg ext3) @@ -3021,6 +3014,7 @@ int try_to_free_buffers(struct page *page) */ clear_page_dirty(page); } + spin_unlock(&mapping->private_lock); out: if (buffers_to_free) { struct buffer_head *bh = buffers_to_free; @@ -3035,7 +3029,7 @@ out: } EXPORT_SYMBOL(try_to_free_buffers); -void block_sync_page(struct page *page) +int block_sync_page(struct page *page) { struct address_space *mapping; @@ -3043,6 +3037,7 @@ void block_sync_page(struct page *page) mapping = page_mapping(page); if (mapping) blk_run_backing_dev(mapping->backing_dev_info, page); + return 0; } /* @@ -3072,6 +3067,68 @@ asmlinkage long sys_bdflush(int func, long data) return 0; } +/* + * Migration function for pages with buffers. This function can only be used + * if the underlying filesystem guarantees that no other references to "page" + * exist. + */ +#ifdef CONFIG_MIGRATION +int buffer_migrate_page(struct page *newpage, struct page *page) +{ + struct address_space *mapping = page->mapping; + struct buffer_head *bh, *head; + int rc; + + if (!mapping) + return -EAGAIN; + + if (!page_has_buffers(page)) + return migrate_page(newpage, page); + + head = page_buffers(page); + + rc = migrate_page_remove_references(newpage, page, 3); + if (rc) + return rc; + + bh = head; + do { + get_bh(bh); + lock_buffer(bh); + bh = bh->b_this_page; + + } while (bh != head); + + ClearPagePrivate(page); + set_page_private(newpage, page_private(page)); + set_page_private(page, 0); + put_page(page); + get_page(newpage); + + bh = head; + do { + set_bh_page(bh, newpage, bh_offset(bh)); + bh = bh->b_this_page; + + } while (bh != head); + + SetPagePrivate(newpage); + + migrate_page_copy(newpage, page); + + bh = head; + do { + unlock_buffer(bh); + put_bh(bh); + bh = bh->b_this_page; + + } while (bh != head); + + return 0; +} +EXPORT_SYMBOL(buffer_migrate_page); +#endif + /* * Buffer-head allocation */ @@ -3100,7 +3157,7 @@ static void recalc_bh_state(void) if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) return; __get_cpu_var(bh_accounting).ratelimit = 0; - for_each_online_cpu(i) + for_each_cpu(i) tot += per_cpu(bh_accounting, i).nr; buffer_heads_over_limit = (tot > max_buffer_heads); } @@ -3149,9 +3206,6 @@ static void buffer_exit_cpu(int cpu) brelse(b->bhs[i]); b->bhs[i] = NULL; } - get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr; - per_cpu(bh_accounting, cpu).nr = 0; - put_cpu_var(bh_accounting); } static int buffer_cpu_notify(struct notifier_block *self, @@ -3168,11 +3222,8 @@ void __init buffer_init(void) int nrpages; bh_cachep = kmem_cache_create("buffer_head", - sizeof(struct buffer_head), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| - SLAB_MEM_SPREAD), - init_buffer_head, - NULL); + sizeof(struct buffer_head), 0, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL); /* * Limit the bh occupancy to 10% of ZONE_NORMAL @@ -3192,6 +3243,7 @@ EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_write_full_page); EXPORT_SYMBOL(cont_prepare_write); +EXPORT_SYMBOL(end_buffer_async_write); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync);