X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Focfs2%2Fbuffer_head_io.c;h=c9037414f4f65fad9f0b812b9ba75f7c9823cc6d;hb=refs%2Fheads%2Fvserver;hp=bae3d7548beae59961a436033fd69a99da391a57;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index bae3d7548..c9037414f 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -97,8 +97,11 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, int i, ignore_cache = 0; struct buffer_head *bh; - mlog_entry("(block=(%"MLFu64"), nr=(%d), flags=%d, inode=%p)\n", - block, nr, flags, inode); + mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", + (unsigned long long)block, nr, flags, inode); + + BUG_ON((flags & OCFS2_BH_READAHEAD) && + (!inode || !(flags & OCFS2_BH_CACHED))); if (osb == NULL || osb->sb == NULL || bhs == NULL) { status = -EINVAL; @@ -140,12 +143,36 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, bh = bhs[i]; ignore_cache = 0; + /* There are three read-ahead cases here which we need to + * be concerned with. All three assume a buffer has + * previously been submitted with OCFS2_BH_READAHEAD + * and it hasn't yet completed I/O. + * + * 1) The current request is sync to disk. This rarely + * happens these days, and never when performance + * matters - the code can just wait on the buffer + * lock and re-submit. + * + * 2) The current request is cached, but not + * readahead. ocfs2_buffer_uptodate() will return + * false anyway, so we'll wind up waiting on the + * buffer lock to do I/O. We re-check the request + * with after getting the lock to avoid a re-submit. + * + * 3) The current request is readahead (and so must + * also be a caching one). We short circuit if the + * buffer is locked (under I/O) and if it's in the + * uptodate cache. The re-check from #2 catches the + * case that the previous read-ahead completes just + * before our is-it-in-flight check. + */ + if (flags & OCFS2_BH_CACHED && !ocfs2_buffer_uptodate(inode, bh)) { mlog(ML_UPTODATE, - "bh (%llu), inode %"MLFu64" not uptodate\n", + "bh (%llu), inode %llu not uptodate\n", (unsigned long long)bh->b_blocknr, - OCFS2_I(inode)->ip_blkno); + (unsigned long long)OCFS2_I(inode)->ip_blkno); ignore_cache = 1; } @@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, continue; } + /* A read-ahead request was made - if the + * buffer is already under read-ahead from a + * previously submitted request than we are + * done here. */ + if ((flags & OCFS2_BH_READAHEAD) + && ocfs2_buffer_read_ahead(inode, bh)) + continue; + lock_buffer(bh); if (buffer_jbd(bh)) { #ifdef CATCH_BH_JBD_RACES @@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, continue; #endif } + + /* Re-check ocfs2_buffer_uptodate() as a + * previously read-ahead buffer may have + * completed I/O while we were waiting for the + * buffer lock. */ + if ((flags & OCFS2_BH_CACHED) + && !(flags & OCFS2_BH_READAHEAD) + && ocfs2_buffer_uptodate(inode, bh)) { + unlock_buffer(bh); + continue; + } + clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; - if (flags & OCFS2_BH_READAHEAD) - submit_bh(READA, bh); - else - submit_bh(READ, bh); + submit_bh(READ, bh); continue; } } @@ -197,33 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, for (i = (nr - 1); i >= 0; i--) { bh = bhs[i]; - /* We know this can't have changed as we hold the - * inode sem. Avoid doing any work on the bh if the - * journal has it. */ - if (!buffer_jbd(bh)) - wait_on_buffer(bh); - - if (!buffer_uptodate(bh)) { - /* Status won't be cleared from here on out, - * so we can safely record this and loop back - * to cleanup the other buffers. Don't need to - * remove the clustered uptodate information - * for this bh as it's not marked locally - * uptodate. */ - status = -EIO; - brelse(bh); - bhs[i] = NULL; - continue; + if (!(flags & OCFS2_BH_READAHEAD)) { + /* We know this can't have changed as we hold the + * inode sem. Avoid doing any work on the bh if the + * journal has it. */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) { + /* Status won't be cleared from here on out, + * so we can safely record this and loop back + * to cleanup the other buffers. Don't need to + * remove the clustered uptodate information + * for this bh as it's not marked locally + * uptodate. */ + status = -EIO; + brelse(bh); + bhs[i] = NULL; + continue; + } } + /* Always set the buffer in the cache, even if it was + * a forced read, or read-ahead which hasn't yet + * completed. */ if (inode) ocfs2_set_buffer_uptodate(inode, bh); } if (inode) mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); - mlog(ML_BH_IO, "block=(%"MLFu64"), nr=(%d), cached=%s\n", block, nr, - (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); + mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", + (unsigned long long)block, nr, + (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); bail: