X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fjbd%2Ftransaction.c;h=3eb8be62688a9ded7fe0295bb00f6cbdd21f5b3c;hb=d3ec2a4ce4b4743ecf56c2ed7b32d74fd58210dc;hp=733e500a36641cb72c9760880533ff3fb48df3f1;hpb=5167311cae6aa3a5ff5afd39f88c32a435c969ef;p=linux-2.6.git diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 733e500a3..3eb8be626 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -53,8 +53,8 @@ get_transaction(journal_t *journal, transaction_t *transaction) spin_lock_init(&transaction->t_handle_lock); /* Set up the commit timer for the new transaction. */ - journal->j_commit_timer->expires = transaction->t_expires; - add_timer(journal->j_commit_timer); + journal->j_commit_timer.expires = transaction->t_expires; + add_timer(&journal->j_commit_timer); J_ASSERT(journal->j_running_transaction == NULL); journal->j_running_transaction = transaction; @@ -227,7 +227,7 @@ repeat_locked: spin_unlock(&transaction->t_handle_lock); spin_unlock(&journal->j_state_lock); out: - if (new_transaction) + if (unlikely(new_transaction)) /* It's usually NULL */ kfree(new_transaction); return ret; } @@ -456,7 +456,7 @@ void journal_lock_updates(journal_t *journal) * to make sure that we serialise special journal-locked operations * too. */ - down(&journal->j_barrier); + mutex_lock(&journal->j_barrier); } /** @@ -471,7 +471,7 @@ void journal_unlock_updates (journal_t *journal) { J_ASSERT(journal->j_barrier_count != 0); - up(&journal->j_barrier); + mutex_unlock(&journal->j_barrier); spin_lock(&journal->j_state_lock); --journal->j_barrier_count; spin_unlock(&journal->j_state_lock); @@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal) */ static void jbd_unexpected_dirty_buffer(struct journal_head *jh) { - struct buffer_head *bh = jh2bh(jh); int jlist; - if (buffer_dirty(bh)) { - /* If this buffer is one which might reasonably be dirty - * --- ie. data, or not part of this journal --- then - * we're OK to leave it alone, but otherwise we need to - * move the dirty bit to the journal's own internal - * JBDDirty bit. */ - jlist = jh->b_jlist; - - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - if (test_clear_buffer_dirty(jh2bh(jh))) { - set_bit(BH_JBDDirty, &jh2bh(jh)->b_state); - } - } + /* If this buffer is one which might reasonably be dirty + * --- ie. data, or not part of this journal --- then + * we're OK to leave it alone, but otherwise we need to + * move the dirty bit to the journal's own internal + * JBDDirty bit. */ + jlist = jh->b_jlist; + + if (jlist == BJ_Metadata || jlist == BJ_Reserved || + jlist == BJ_Shadow || jlist == BJ_Forget) { + struct buffer_head *bh = jh2bh(jh); + + if (test_clear_buffer_dirty(bh)) + set_buffer_jbddirty(bh); } } @@ -522,7 +520,7 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh) */ static int do_get_write_access(handle_t *handle, struct journal_head *jh, - int force_copy, int *credits) + int force_copy) { struct buffer_head *bh; transaction_t *transaction; @@ -574,9 +572,14 @@ repeat: if (jh->b_next_transaction) J_ASSERT_JH(jh, jh->b_next_transaction == transaction); - JBUFFER_TRACE(jh, "Unexpected dirty buffer"); - jbd_unexpected_dirty_buffer(jh); - } + } + /* + * In any case we need to clean the dirty flag and we must + * do it under the buffer lock to be sure we don't race + * with running write-out. + */ + JBUFFER_TRACE(jh, "Unexpected dirty buffer"); + jbd_unexpected_dirty_buffer(jh); } unlock_buffer(bh); @@ -604,11 +607,6 @@ repeat: JBUFFER_TRACE(jh, "has frozen data"); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); jh->b_next_transaction = transaction; - - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - if (credits) - (*credits)++; goto done; } @@ -668,8 +666,9 @@ repeat: if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); - frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, - GFP_NOFS); + frozen_buffer = + jbd_slab_alloc(jh2bh(jh)->b_size, + GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", @@ -688,10 +687,6 @@ repeat: jh->b_next_transaction = transaction; } - J_ASSERT(handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - if (credits) - (*credits)++; /* * Finally, if the buffer is not journaled right now, we need to make @@ -731,8 +726,8 @@ done: journal_cancel_revoke(handle, jh); out: - if (frozen_buffer) - kfree(frozen_buffer); + if (unlikely(frozen_buffer)) /* It's usually NULL */ + jbd_slab_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error; @@ -742,6 +737,7 @@ out: * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. * @handle: transaction to add buffer modifications to * @bh: bh to be used for metadata writes + * @credits: variable that will receive credits for the buffer * * Returns an error code or 0 on success. * @@ -749,8 +745,7 @@ out: * because we're write()ing a buffer which is also part of a shared mapping. */ -int journal_get_write_access(handle_t *handle, - struct buffer_head *bh, int *credits) +int journal_get_write_access(handle_t *handle, struct buffer_head *bh) { struct journal_head *jh = journal_add_journal_head(bh); int rc; @@ -758,7 +753,7 @@ int journal_get_write_access(handle_t *handle, /* We do not want to get caught playing with fields which the * log thread also manipulates. Make sure that the buffer * completes any outstanding IO before proceeding. */ - rc = do_get_write_access(handle, jh, 0, credits); + rc = do_get_write_access(handle, jh, 0); journal_put_journal_head(jh); return rc; } @@ -814,9 +809,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - if (jh->b_transaction == NULL) { jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); @@ -869,8 +861,7 @@ out: * * Returns error number or 0 on success. */ -int journal_get_undo_access(handle_t *handle, struct buffer_head *bh, - int *credits) +int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) { int err; struct journal_head *jh = journal_add_journal_head(bh); @@ -883,13 +874,13 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh, * make sure that obtaining the committed_data is done * atomically wrt. completion of any outstanding commits. */ - err = do_get_write_access(handle, jh, 1, credits); + err = do_get_write_access(handle, jh, 1); if (err) goto out; repeat: if (!jh->b_committed_data) { - committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -915,8 +906,8 @@ repeat: jbd_unlock_bh_state(bh); out: journal_put_journal_head(jh); - if (committed_data) - kfree(committed_data); + if (unlikely(committed_data)) + jbd_slab_free(committed_data, bh->b_size); return err; } @@ -976,6 +967,14 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) */ jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); + + /* Now that we have bh_state locked, are we really still mapped? */ + if (!buffer_mapped(bh)) { + JBUFFER_TRACE(jh, "unmapped, bailing out"); + // printk("caught an unmapped buffer\n"); + goto no_journal; + } + if (jh->b_transaction) { JBUFFER_TRACE(jh, "has transaction"); if (jh->b_transaction != handle->h_transaction) { @@ -1037,6 +1036,11 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) sync_dirty_buffer(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); + /* Since we dropped the lock... */ + if (!buffer_mapped(bh)) { + JBUFFER_TRACE(jh, "Got unmapped"); + goto no_journal; + } /* The buffer may become locked again at any time if it is redirtied */ } @@ -1044,7 +1048,12 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) /* journal_clean_data_list() may have got there first */ if (jh->b_transaction != NULL) { JBUFFER_TRACE(jh, "unfile from commit"); - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); + /* It still points to the committing + * transaction; move it to this one so + * that the refile assert checks are + * happy. */ + jh->b_transaction = handle->h_transaction; } /* The buffer will be refiled below */ @@ -1058,7 +1067,8 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { JBUFFER_TRACE(jh, "not on correct data list: unfile"); J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); + jh->b_transaction = handle->h_transaction; JBUFFER_TRACE(jh, "file as data"); __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); @@ -1111,6 +1121,17 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) jbd_lock_bh_state(bh); + if (jh->b_modified == 0) { + /* + * This buffer's got modified and becoming part + * of the transaction. This needs to be done + * once a transaction -bzzz + */ + jh->b_modified = 1; + J_ASSERT_JH(jh, handle->h_buffer_credits > 0); + handle->h_buffer_credits--; + } + /* * fastpath, to avoid expensive locking. If this buffer is already * on the running transaction's metadata list there is nothing to do. @@ -1161,24 +1182,11 @@ out: * journal_release_buffer: undo a get_write_access without any buffer * updates, if the update decided in the end that it didn't need access. * - * The caller passes in the number of credits which should be put back for - * this buffer (zero or one). - * - * We leave the buffer attached to t_reserved_list because even though this - * handle doesn't want it, some other concurrent handle may want to journal - * this buffer. If that handle is curently in between get_write_access() and - * journal_dirty_metadata() then it expects the buffer to be reserved. If - * we were to rip it off t_reserved_list here, the other handle will explode - * when journal_dirty_metadata is presented with a non-reserved buffer. - * - * If nobody really wants to journal this buffer then it will be thrown - * away at the start of commit. */ void -journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits) +journal_release_buffer(handle_t *handle, struct buffer_head *bh) { BUFFER_TRACE(bh, "entry"); - handle->h_buffer_credits += credits; } /** @@ -1203,8 +1211,9 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; struct journal_head *jh; + int drop_reserve = 0; int err = 0; - + BUFFER_TRACE(bh, "entry"); jbd_lock_bh_state(bh); @@ -1222,6 +1231,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) goto not_jbd; } + /* + * The buffer's going from the transaction, we must drop + * all references -bzzz + */ + jh->b_modified = 0; + if (jh->b_transaction == handle->h_transaction) { J_ASSERT_JH(jh, !jh->b_frozen_data); @@ -1233,7 +1248,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); - __journal_unfile_buffer(jh); + drop_reserve = 1; /* * We are no longer going to journal this buffer. @@ -1248,15 +1263,17 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) */ if (jh->b_cp_transaction) { + __journal_temp_unlink_buffer(jh); __journal_file_buffer(jh, transaction, BJ_Forget); } else { + __journal_unfile_buffer(jh); journal_remove_journal_head(bh); __brelse(bh); if (!buffer_jbd(bh)) { spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); __bforget(bh); - return 0; + goto drop; } } } else if (jh->b_transaction) { @@ -1271,6 +1288,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) if (jh->b_next_transaction) { J_ASSERT(jh->b_next_transaction == transaction); jh->b_next_transaction = NULL; + drop_reserve = 1; } } @@ -1278,6 +1296,11 @@ not_jbd: spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); __brelse(bh); +drop: + if (drop_reserve) { + /* no need to reserve log space for this block -bzzz */ + handle->h_buffer_credits++; + } return err; } @@ -1302,6 +1325,7 @@ int journal_stop(handle_t *handle) transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; int old_handle_count, err; + pid_t pid; J_ASSERT(transaction->t_updates > 0); J_ASSERT(journal_current_handle() == handle); @@ -1327,12 +1351,18 @@ int journal_stop(handle_t *handle) * It doesn't cost much - we're about to run a commit and sleep * on IO anyway. Speeds up many-threaded, many-dir operations * by 30x or more... + * + * But don't do this if this process was the most recent one to + * perform a synchronous write. We do this to detect the case where a + * single process is doing a stream of sync writes. No point in waiting + * for joiners in that case. */ - if (handle->h_sync) { + pid = current->pid; + if (handle->h_sync && journal->j_last_sync_writer != pid) { + journal->j_last_sync_writer = pid; do { old_handle_count = transaction->t_handle_count; - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); + schedule_timeout_uninterruptible(1); } while (old_handle_count != transaction->t_handle_count); } @@ -1469,7 +1499,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) * * Called under j_list_lock. The journal may not be locked. */ -void __journal_unfile_buffer(struct journal_head *jh) +void __journal_temp_unlink_buffer(struct journal_head *jh) { struct journal_head **list = NULL; transaction_t *transaction; @@ -1486,7 +1516,7 @@ void __journal_unfile_buffer(struct journal_head *jh) switch (jh->b_jlist) { case BJ_None: - goto out; + return; case BJ_SyncData: list = &transaction->t_sync_datalist; break; @@ -1519,7 +1549,11 @@ void __journal_unfile_buffer(struct journal_head *jh) jh->b_jlist = BJ_None; if (test_clear_buffer_jbddirty(bh)) mark_buffer_dirty(bh); /* Expose it to the VM */ -out: +} + +void __journal_unfile_buffer(struct journal_head *jh) +{ + __journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; } @@ -1578,7 +1612,7 @@ out: * int journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation * @page: to try and free - * @gfp_mask: 'IO' mode for try_to_free_buffers() + * @unused_gfp_mask: unused * * * For all the buffers on this page, @@ -1609,7 +1643,7 @@ out: * while the data is part of a transaction. Yes? */ int journal_try_to_free_buffers(journal_t *journal, - struct page *page, int unused_gfp_mask) + struct page *page, gfp_t unused_gfp_mask) { struct buffer_head *head; struct buffer_head *bh; @@ -1730,6 +1764,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) int ret; BUFFER_TRACE(bh, "entry"); + //J_ASSERT_BH(bh, PageLocked(bh->b_page)); /* * It is safe to proceed here without the j_list_lock because the @@ -1775,10 +1810,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); ret = __dispose_buffer(jh, journal->j_running_transaction); + journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); spin_unlock(&journal->j_state_lock); - journal_put_journal_head(jh); return ret; } else { /* There is no currently-running transaction. So the @@ -1789,10 +1824,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) JBUFFER_TRACE(jh, "give to committing trans"); ret = __dispose_buffer(jh, journal->j_committing_transaction); + journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); spin_unlock(&journal->j_state_lock); - journal_put_journal_head(jh); return ret; } else { /* The orphan record's transaction has @@ -1802,7 +1837,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } } } else if (transaction == journal->j_committing_transaction) { - /* If it is committing, we simply cannot touch it. We + if (jh->b_jlist == BJ_Locked) { + JBUFFER_TRACE(jh, "on committing BJ_Locked"); + /* + * The buffer is on the committing transaction's locked + * list. We have the buffer locked, so I/O has + * completed. So we can nail the buffer now. + */ + may_free = __dispose_buffer(jh, transaction); + goto zap_buffer; + } + /* + * If it is committing, we simply cannot touch it. We * can remove it's next_transaction pointer from the * running transaction if that is set, but nothing * else. */ @@ -1813,10 +1859,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) journal->j_running_transaction); jh->b_next_transaction = NULL; } + journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); spin_unlock(&journal->j_state_lock); - journal_put_journal_head(jh); return 0; } else { /* Good, the buffer belongs to the running transaction. @@ -1826,6 +1872,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * i_size already for this truncate so recovery will not * expose the disk blocks we are discarding here.) */ J_ASSERT_JH(jh, transaction == journal->j_running_transaction); + JBUFFER_TRACE(jh, "on running transaction"); may_free = __dispose_buffer(jh, transaction); } @@ -1846,16 +1893,15 @@ zap_buffer_unlocked: } /** - * int journal_invalidatepage() + * void journal_invalidatepage() * @journal: journal to use for flush... * @page: page to flush * @offset: length of page to invalidate. * * Reap page buffers containing data after offset in page. * - * Return non-zero if the page's buffers were successfully reaped. */ -int journal_invalidatepage(journal_t *journal, +void journal_invalidatepage(journal_t *journal, struct page *page, unsigned long offset) { @@ -1866,7 +1912,7 @@ int journal_invalidatepage(journal_t *journal, if (!PageLocked(page)) BUG(); if (!page_has_buffers(page)) - return 1; + return; /* We will potentially be playing with lists other than just the * data lists (especially for journaled data mode), so be @@ -1877,7 +1923,6 @@ int journal_invalidatepage(journal_t *journal, unsigned int next_off = curr_off + bh->b_size; next = bh->b_this_page; - /* AKPM: doing lock_buffer here may be overly paranoid */ if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); @@ -1890,11 +1935,9 @@ int journal_invalidatepage(journal_t *journal, } while (bh != head); if (!offset) { - if (!may_free || !try_to_free_buffers(page)) - return 0; - J_ASSERT(!page_has_buffers(page)); + if (may_free && try_to_free_buffers(page)) + J_ASSERT(!page_has_buffers(page)); } - return 1; } /* @@ -1929,7 +1972,7 @@ void __journal_file_buffer(struct journal_head *jh, } if (jh->b_transaction) - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); jh->b_transaction = transaction; switch (jlist) { @@ -2012,10 +2055,11 @@ void __journal_refile_buffer(struct journal_head *jh) */ was_dirty = test_clear_buffer_jbddirty(bh); - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; - __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata); + __journal_file_buffer(jh, jh->b_transaction, + was_dirty ? BJ_Metadata : BJ_Reserved); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty)