X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fjbd%2Fcommit.c;h=002ad2bbc76992b6acda52def147799886147693;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=a540c7f03da69f0f4f1dd73872bfbc2477f48267;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index a540c7f03..002ad2bbc 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -93,6 +93,73 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh) return 1; } +/* Done it all: now write the commit record. We should have + * cleaned up our previous buffers by now, so if we are in abort + * mode we can now just skip the rest of the journal write + * entirely. + * + * Returns 1 if the journal needs to be aborted or 0 on success + */ +static int journal_write_commit_record(journal_t *journal, + transaction_t *commit_transaction) +{ + struct journal_head *descriptor; + struct buffer_head *bh; + int i, ret; + int barrier_done = 0; + + if (is_journal_aborted(journal)) + return 0; + + descriptor = journal_get_descriptor_buffer(journal); + if (!descriptor) + return 1; + + bh = jh2bh(descriptor); + + /* AKPM: buglet - add `i' to tmp! */ + for (i = 0; i < bh->b_size; i += 512) { + journal_header_t *tmp = (journal_header_t*)bh->b_data; + tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); + tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); + tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); + } + + JBUFFER_TRACE(descriptor, "write commit block"); + set_buffer_dirty(bh); + if (journal->j_flags & JFS_BARRIER) { + set_buffer_ordered(bh); + barrier_done = 1; + } + ret = sync_dirty_buffer(bh); + /* is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP && barrier_done) { + char b[BDEVNAME_SIZE]; + + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + clear_buffer_ordered(bh); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + put_bh(bh); /* One for getblk() */ + journal_put_journal_head(descriptor); + + return (ret == -EIO); +} + /* * journal_commit_transaction * @@ -103,7 +170,7 @@ void journal_commit_transaction(journal_t *journal) { transaction_t *commit_transaction; struct journal_head *jh, *new_jh, *descriptor; - struct buffer_head *wbuf[64]; + struct buffer_head **wbuf = journal->j_wbuf; int bufs; int flags; int err; @@ -194,10 +261,8 @@ void journal_commit_transaction(journal_t *journal) struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); - if (jh->b_committed_data) { - kfree(jh->b_committed_data); - jh->b_committed_data = NULL; - } + kfree(jh->b_committed_data); + jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } journal_refile_buffer(journal, jh); @@ -228,6 +293,22 @@ void journal_commit_transaction(journal_t *journal) jbd_debug (3, "JBD: commit phase 2\n"); + /* + * First, drop modified flag: all accesses to the buffers + * will be tracked for a new trasaction only -bzzz + */ + spin_lock(&journal->j_list_lock); + if (commit_transaction->t_buffers) { + new_jh = jh = commit_transaction->t_buffers->b_tnext; + do { + J_ASSERT_JH(new_jh, new_jh->b_modified == 1 || + new_jh->b_modified == 0); + new_jh->b_modified = 0; + new_jh = new_jh->b_tnext; + } while (new_jh != jh); + } + spin_unlock(&journal->j_list_lock); + /* * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. @@ -258,11 +339,11 @@ write_out_data: BUFFER_TRACE(bh, "locked"); if (!inverted_lock(journal, bh)) goto write_out_data; - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); __journal_file_buffer(jh, commit_transaction, BJ_Locked); jbd_unlock_bh_state(bh); - if (need_resched()) { + if (lock_need_resched(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } @@ -271,11 +352,11 @@ write_out_data: BUFFER_TRACE(bh, "start journal writeout"); get_bh(bh); wbuf[bufs++] = bh; - if (bufs == ARRAY_SIZE(wbuf)) { + if (bufs == journal->j_wbufsize) { jbd_debug(2, "submit %d writes\n", bufs); spin_unlock(&journal->j_list_lock); - ll_rw_block(WRITE, bufs, wbuf); + ll_rw_block(SWRITE, bufs, wbuf); journal_brelse_array(wbuf, bufs); bufs = 0; goto write_out_data; @@ -288,7 +369,7 @@ write_out_data: jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); put_bh(bh); - if (need_resched()) { + if (lock_need_resched(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } @@ -298,7 +379,7 @@ write_out_data: if (bufs) { spin_unlock(&journal->j_list_lock); - ll_rw_block(WRITE, bufs, wbuf); + ll_rw_block(SWRITE, bufs, wbuf); journal_brelse_array(wbuf, bufs); spin_lock(&journal->j_list_lock); } @@ -333,14 +414,13 @@ write_out_data: jbd_unlock_bh_state(bh); } put_bh(bh); - if (need_resched()) { - spin_unlock(&journal->j_list_lock); - cond_resched(); - spin_lock(&journal->j_list_lock); - } + cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); + if (err) + __journal_abort_hard(journal); + journal_write_revoke_records(journal, commit_transaction); jbd_debug(3, "JBD: commit phase 2\n"); @@ -405,9 +485,9 @@ write_out_data: jbd_debug(4, "JBD: got buffer %llu (%p)\n", (unsigned long long)bh->b_blocknr, bh->b_data); header = (journal_header_t *)&bh->b_data[0]; - header->h_magic = htonl(JFS_MAGIC_NUMBER); - header->h_blocktype = htonl(JFS_DESCRIPTOR_BLOCK); - header->h_sequence = htonl(commit_transaction->t_tid); + header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); + header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK); + header->h_sequence = cpu_to_be32(commit_transaction->t_tid); tagp = &bh->b_data[sizeof(journal_header_t)]; space_left = bh->b_size - sizeof(journal_header_t); @@ -473,8 +553,8 @@ write_out_data: tag_flag |= JFS_FLAG_SAME_UUID; tag = (journal_block_tag_t *) tagp; - tag->t_blocknr = htonl(jh2bh(jh)->b_blocknr); - tag->t_flags = htonl(tag_flag); + tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr); + tag->t_flags = cpu_to_be32(tag_flag); tagp += sizeof(journal_block_tag_t); space_left -= sizeof(journal_block_tag_t); @@ -488,7 +568,7 @@ write_out_data: /* If there's no more to do, or if the descriptor is full, let the IO rip! */ - if (bufs == ARRAY_SIZE(wbuf) || + if (bufs == journal->j_wbufsize || commit_transaction->t_buffers == NULL || space_left < sizeof(journal_block_tag_t) + 16) { @@ -498,7 +578,7 @@ write_out_data: submitting the IOs. "tag" still points to the last tag we set up. */ - tag->t_flags |= htonl(JFS_FLAG_LAST_TAG); + tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG); start_journal_io: for (i = 0; i < bufs; i++) { @@ -545,6 +625,8 @@ wait_for_iobuf: wait_on_buffer(bh); goto wait_for_iobuf; } + if (cond_resched()) + goto wait_for_iobuf; if (unlikely(!buffer_uptodate(bh))) err = -EIO; @@ -579,7 +661,7 @@ wait_for_iobuf: journal_file_buffer(jh, commit_transaction, BJ_Forget); /* Wake up any transactions which were waiting for this IO to complete */ - wake_up_buffer(bh); + wake_up_bit(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "brelse shadowed buffer"); __brelse(bh); } @@ -599,6 +681,8 @@ wait_for_iobuf: wait_on_buffer(bh); goto wait_for_ctlbuf; } + if (cond_resched()) + goto wait_for_ctlbuf; if (unlikely(!buffer_uptodate(bh))) err = -EIO; @@ -613,75 +697,17 @@ wait_for_iobuf: jbd_debug(3, "JBD: commit phase 6\n"); - if (is_journal_aborted(journal)) - goto skip_commit; + if (journal_write_commit_record(journal, commit_transaction)) + err = -EIO; - /* Done it all: now write the commit record. We should have - * cleaned up our previous buffers by now, so if we are in abort - * mode we can now just skip the rest of the journal write - * entirely. */ - - descriptor = journal_get_descriptor_buffer(journal); - if (!descriptor) { + if (err) __journal_abort_hard(journal); - goto skip_commit; - } - - /* AKPM: buglet - add `i' to tmp! */ - for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) { - journal_header_t *tmp = - (journal_header_t*)jh2bh(descriptor)->b_data; - tmp->h_magic = htonl(JFS_MAGIC_NUMBER); - tmp->h_blocktype = htonl(JFS_COMMIT_BLOCK); - tmp->h_sequence = htonl(commit_transaction->t_tid); - } - - JBUFFER_TRACE(descriptor, "write commit block"); - { - struct buffer_head *bh = jh2bh(descriptor); - - set_buffer_dirty(bh); - sync_dirty_buffer(bh); - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; - put_bh(bh); /* One for getblk() */ - journal_put_journal_head(descriptor); - } /* End of a transaction! Finally, we can do checkpoint processing: any buffers committed as a result of this transaction can be removed from any checkpoint list it was on before. */ -skip_commit: /* The journal should be unlocked by now. */ - - if (err) - __journal_abort_hard(journal); - - /* - * Call any callbacks that had been registered for handles in this - * transaction. It is up to the callback to free any allocated - * memory. - * - * The spinlocking (t_jcb_lock) here is surely unnecessary... - */ - spin_lock(&commit_transaction->t_jcb_lock); - if (!list_empty(&commit_transaction->t_jcb)) { - struct list_head *p, *n; - int error = is_journal_aborted(journal); - - list_for_each_safe(p, n, &commit_transaction->t_jcb) { - struct journal_callback *jcb; - - jcb = list_entry(p, struct journal_callback, jcb_list); - list_del(p); - spin_unlock(&commit_transaction->t_jcb_lock); - jcb->jcb_func(jcb, error); - spin_lock(&commit_transaction->t_jcb_lock); - } - } - spin_unlock(&commit_transaction->t_jcb_lock); - jbd_debug(3, "JBD: commit phase 7\n"); J_ASSERT(commit_transaction->t_sync_datalist == NULL); @@ -691,11 +717,18 @@ skip_commit: /* The journal should be unlocked by now. */ J_ASSERT(commit_transaction->t_shadow_list == NULL); J_ASSERT(commit_transaction->t_log_list == NULL); +restart_loop: + /* + * As there are other places (journal_unmap_buffer()) adding buffers + * to this list we have to be careful and hold the j_list_lock. + */ + spin_lock(&journal->j_list_lock); while (commit_transaction->t_forget) { transaction_t *cp_transaction; struct buffer_head *bh; jh = commit_transaction->t_forget; + spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); jbd_lock_bh_state(bh); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || @@ -763,7 +796,25 @@ skip_commit: /* The journal should be unlocked by now. */ journal_remove_journal_head(bh); /* needs a brelse */ release_buffer_page(bh); } + cond_resched_lock(&journal->j_list_lock); + } + spin_unlock(&journal->j_list_lock); + /* + * This is a bit sleazy. We borrow j_list_lock to protect + * journal->j_committing_transaction in __journal_remove_checkpoint. + * Really, __journal_remove_checkpoint should be using j_state_lock but + * it's a bit hassle to hold that across __journal_remove_checkpoint + */ + spin_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); + /* + * Now recheck if some buffers did not get attached to the transaction + * while the lock was dropped... + */ + if (commit_transaction->t_forget) { spin_unlock(&journal->j_list_lock); + spin_unlock(&journal->j_state_lock); + goto restart_loop; } /* Done with this transaction! */ @@ -772,14 +823,6 @@ skip_commit: /* The journal should be unlocked by now. */ J_ASSERT(commit_transaction->t_state == T_COMMIT); - /* - * This is a bit sleazy. We borrow j_list_lock to protect - * journal->j_committing_transaction in __journal_remove_checkpoint. - * Really, __jornal_remove_checkpoint should be using j_state_lock but - * it's a bit hassle to hold that across __journal_remove_checkpoint - */ - spin_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid;