X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fmpage.c;h=692a3e578fc8f1102823548dbc5d49a1a1850953;hb=refs%2Fheads%2Fvserver;hp=71c7ca3a455de3687f1147152cf0f76b87f3a1f9;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/fs/mpage.c b/fs/mpage.c index 71c7ca3a4..692a3e578 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -79,15 +79,18 @@ static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err) if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (!uptodate) + if (!uptodate){ SetPageError(page); + if (page->mapping) + set_bit(AS_EIO, &page->mapping->flags); + } end_page_writeback(page); } while (bvec >= bio->bi_io_vec); bio_put(bio); return 0; } -struct bio *mpage_bio_submit(int rw, struct bio *bio) +static struct bio *mpage_bio_submit(int rw, struct bio *bio) { bio->bi_end_io = mpage_end_io_read; if (rw == WRITE) @@ -98,7 +101,8 @@ struct bio *mpage_bio_submit(int rw, struct bio *bio) static struct bio * mpage_alloc(struct block_device *bdev, - sector_t first_sector, int nr_vecs, int gfp_flags) + sector_t first_sector, int nr_vecs, + gfp_t gfp_flags) { struct bio *bio; @@ -159,55 +163,19 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) } while (page_bh != head); } -/** - * mpage_readpages - populate an address space with some pages, and - * start reads against them. - * - * @mapping: the address_space - * @pages: The address of a list_head which contains the target pages. These - * pages have their ->index populated and are otherwise uninitialised. - * - * The page at @pages->prev has the lowest file offset, and reads should be - * issued in @pages->prev to @pages->next order. - * - * @nr_pages: The number of pages at *@pages - * @get_block: The filesystem's block mapper function. - * - * This function walks the pages and the blocks within each page, building and - * emitting large BIOs. - * - * If anything unusual happens, such as: - * - * - encountering a page which has buffers - * - encountering a page which has a non-hole after a hole - * - encountering a page with non-contiguous blocks - * - * then this code just gives up and calls the buffer_head-based read function. - * It does handle a page which has holes at the end - that is a common case: - * the end-of-file on blocksize < PAGE_CACHE_SIZE setups. - * - * BH_Boundary explanation: - * - * There is a problem. The mpage read code assembles several pages, gets all - * their disk mappings, and then submits them all. That's fine, but obtaining - * the disk mappings may require I/O. Reads of indirect blocks, for example. - * - * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be - * submitted in the following order: - * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 - * because the indirect block has to be read to get the mappings of blocks - * 13,14,15,16. Obviously, this impacts performance. - * - * So what we do it to allow the filesystem's get_block() function to set - * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block - * after this one will require I/O against a block which is probably close to - * this one. So you should push what I/O you have currently accumulated. +/* + * This is the worker routine which does all the work of mapping the disk + * blocks and constructs largest possible bios, submits them for IO if the + * blocks are not contiguous on the disk. * - * This all causes the disk requests to be issued in the correct order. + * We pass a buffer_head back and forth and use its buffer_mapped() flag to + * represent the validity of its disk mapping and to decide when to do the next + * get_block() call. */ static struct bio * do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, - sector_t *last_block_in_bio, get_block_t get_block) + sector_t *last_block_in_bio, struct buffer_head *map_bh, + unsigned long *first_logical_block, get_block_t get_block) { struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -215,33 +183,72 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, const unsigned blocksize = 1 << blkbits; sector_t block_in_file; sector_t last_block; + sector_t last_block_in_file; sector_t blocks[MAX_BUF_PER_PAGE]; unsigned page_block; unsigned first_hole = blocks_per_page; struct block_device *bdev = NULL; - struct buffer_head bh; int length; int fully_mapped = 1; + unsigned nblocks; + unsigned relative_block; if (page_has_buffers(page)) goto confused; - block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); - last_block = (i_size_read(inode) + blocksize - 1) >> blkbits; + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); + last_block = block_in_file + nr_pages * blocks_per_page; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + page_block = 0; + + /* + * Map blocks using the result from the previous get_blocks call first. + */ + nblocks = map_bh->b_size >> blkbits; + if (buffer_mapped(map_bh) && block_in_file > *first_logical_block && + block_in_file < (*first_logical_block + nblocks)) { + unsigned map_offset = block_in_file - *first_logical_block; + unsigned last = nblocks - map_offset; + + for (relative_block = 0; ; relative_block++) { + if (relative_block == last) { + clear_buffer_mapped(map_bh); + break; + } + if (page_block == blocks_per_page) + break; + blocks[page_block] = map_bh->b_blocknr + map_offset + + relative_block; + page_block++; + block_in_file++; + } + bdev = map_bh->b_bdev; + } + + /* + * Then do more get_blocks calls until we are done with this page. + */ + map_bh->b_page = page; + while (page_block < blocks_per_page) { + map_bh->b_state = 0; + map_bh->b_size = 0; - bh.b_page = page; - for (page_block = 0; page_block < blocks_per_page; - page_block++, block_in_file++) { - bh.b_state = 0; if (block_in_file < last_block) { - if (get_block(inode, block_in_file, &bh, 0)) + map_bh->b_size = (last_block-block_in_file) << blkbits; + if (get_block(inode, block_in_file, map_bh, 0)) goto confused; + *first_logical_block = block_in_file; } - if (!buffer_mapped(&bh)) { + if (!buffer_mapped(map_bh)) { fully_mapped = 0; if (first_hole == blocks_per_page) first_hole = page_block; + page_block++; + block_in_file++; + clear_buffer_mapped(map_bh); continue; } @@ -251,8 +258,8 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, * we just collected from get_block into the page's buffers * so readpage doesn't have to repeat the get_block call */ - if (buffer_uptodate(&bh)) { - map_buffer_to_page(page, &bh, page_block); + if (buffer_uptodate(map_bh)) { + map_buffer_to_page(page, map_bh, page_block); goto confused; } @@ -260,10 +267,20 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ - if (page_block && blocks[page_block-1] != bh.b_blocknr-1) + if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1) goto confused; - blocks[page_block] = bh.b_blocknr; - bdev = bh.b_bdev; + nblocks = map_bh->b_size >> blkbits; + for (relative_block = 0; ; relative_block++) { + if (relative_block == nblocks) { + clear_buffer_mapped(map_bh); + break; + } else if (page_block == blocks_per_page) + break; + blocks[page_block] = map_bh->b_blocknr+relative_block; + page_block++; + block_in_file++; + } + bdev = map_bh->b_bdev; } if (first_hole != blocks_per_page) { @@ -290,7 +307,8 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, alloc_new: if (bio == NULL) { bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), - nr_pages, GFP_KERNEL); + min_t(int, nr_pages, bio_get_nr_vecs(bdev)), + GFP_KERNEL); if (bio == NULL) goto confused; } @@ -301,7 +319,7 @@ alloc_new: goto alloc_new; } - if (buffer_boundary(&bh) || (first_hole != blocks_per_page)) + if (buffer_boundary(map_bh) || (first_hole != blocks_per_page)) bio = mpage_bio_submit(READ, bio); else *last_block_in_bio = blocks[blocks_per_page - 1]; @@ -318,6 +336,52 @@ confused: goto out; } +/** + * mpage_readpages - populate an address space with some pages, and + * start reads against them. + * + * @mapping: the address_space + * @pages: The address of a list_head which contains the target pages. These + * pages have their ->index populated and are otherwise uninitialised. + * + * The page at @pages->prev has the lowest file offset, and reads should be + * issued in @pages->prev to @pages->next order. + * + * @nr_pages: The number of pages at *@pages + * @get_block: The filesystem's block mapper function. + * + * This function walks the pages and the blocks within each page, building and + * emitting large BIOs. + * + * If anything unusual happens, such as: + * + * - encountering a page which has buffers + * - encountering a page which has a non-hole after a hole + * - encountering a page with non-contiguous blocks + * + * then this code just gives up and calls the buffer_head-based read function. + * It does handle a page which has holes at the end - that is a common case: + * the end-of-file on blocksize < PAGE_CACHE_SIZE setups. + * + * BH_Boundary explanation: + * + * There is a problem. The mpage read code assembles several pages, gets all + * their disk mappings, and then submits them all. That's fine, but obtaining + * the disk mappings may require I/O. Reads of indirect blocks, for example. + * + * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be + * submitted in the following order: + * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 + * because the indirect block has to be read to get the mappings of blocks + * 13,14,15,16. Obviously, this impacts performance. + * + * So what we do it to allow the filesystem's get_block() function to set + * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block + * after this one will require I/O against a block which is probably close to + * this one. So you should push what I/O you have currently accumulated. + * + * This all causes the disk requests to be issued in the correct order. + */ int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block) @@ -326,7 +390,10 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned page_idx; sector_t last_block_in_bio = 0; struct pagevec lru_pvec; + struct buffer_head map_bh; + unsigned long first_logical_block = 0; + clear_buffer_mapped(&map_bh); pagevec_init(&lru_pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); @@ -337,7 +404,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, page->index, GFP_KERNEL)) { bio = do_mpage_readpage(bio, page, nr_pages - page_idx, - &last_block_in_bio, get_block); + &last_block_in_bio, &map_bh, + &first_logical_block, + get_block); if (!pagevec_add(&lru_pvec, page)) __pagevec_lru_add(&lru_pvec); } else { @@ -359,9 +428,12 @@ int mpage_readpage(struct page *page, get_block_t get_block) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; + struct buffer_head map_bh; + unsigned long first_logical_block = 0; - bio = do_mpage_readpage(bio, page, 1, - &last_block_in_bio, get_block); + clear_buffer_mapped(&map_bh); + bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, + &map_bh, &first_logical_block, get_block); if (bio) mpage_bio_submit(READ, bio); return 0; @@ -385,8 +457,9 @@ EXPORT_SYMBOL(mpage_readpage); * just allocate full-size (16-page) BIOs. */ static struct bio * -mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, - sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc) +__mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, + sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc, + writepage_t writepage_fn) { struct address_space *mapping = page->mapping; struct inode *inode = page->mapping->host; @@ -460,12 +533,13 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, * The page has no buffers: map it to disk */ BUG_ON(!PageUptodate(page)); - block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size - 1) >> blkbits; map_bh.b_page = page; for (page_block = 0; page_block < blocks_per_page; ) { map_bh.b_state = 0; + map_bh.b_size = 1 << blkbits; if (get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (buffer_new(&map_bh)) @@ -579,7 +653,13 @@ alloc_new: confused: if (bio) bio = mpage_bio_submit(WRITE, bio); - *ret = page->mapping->a_ops->writepage(page, wbc); + + if (writepage_fn) { + *ret = (*writepage_fn)(page, wbc); + } else { + *ret = -EAGAIN; + goto out; + } /* * The caller has a ref on the inode, so *mapping is stable */ @@ -613,6 +693,8 @@ out: * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. + * + * If you fix this you should check generic_writepages() also! */ int mpage_writepages(struct address_space *mapping, @@ -627,7 +709,9 @@ mpage_writepages(struct address_space *mapping, struct pagevec pvec; int nr_pages; pgoff_t index; + pgoff_t end; /* Inclusive */ int scanned = 0; + int range_whole = 0; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; @@ -639,15 +723,21 @@ mpage_writepages(struct address_space *mapping, writepage = mapping->a_ops->writepage; pagevec_init(&pvec, 0); - if (wbc->sync_mode == WB_SYNC_NONE) { + if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ + end = -1; } else { - index = 0; /* whole-file sweep */ + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; scanned = 1; } retry: - while (!done && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, PAGEVEC_SIZE))) { + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { unsigned i; scanned = 1; @@ -664,10 +754,21 @@ retry: lock_page(page); + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); - if (page->mapping != mapping || PageWriteback(page) || + if (PageWriteback(page) || !clear_page_dirty_for_io(page)) { unlock_page(page); continue; @@ -684,9 +785,12 @@ retry: &mapping->flags); } } else { - bio = mpage_writepage(bio, page, get_block, - &last_block_in_bio, &ret, wbc); + bio = __mpage_writepage(bio, page, get_block, + &last_block_in_bio, &ret, wbc, + page->mapping->a_ops->writepage); } + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + unlock_page(page); if (ret || (--(wbc->nr_to_write) <= 0)) done = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { @@ -706,9 +810,26 @@ retry: index = 0; goto retry; } - mapping->writeback_index = index; + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; if (bio) mpage_bio_submit(WRITE, bio); return ret; } EXPORT_SYMBOL(mpage_writepages); + +int mpage_writepage(struct page *page, get_block_t get_block, + struct writeback_control *wbc) +{ + int ret = 0; + struct bio *bio; + sector_t last_block_in_bio = 0; + + bio = __mpage_writepage(NULL, page, get_block, + &last_block_in_bio, &ret, wbc, NULL); + if (bio) + mpage_bio_submit(WRITE, bio); + + return ret; +} +EXPORT_SYMBOL(mpage_writepage);