X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fjbd%2Ftransaction.c;h=cceaf57e37781304df0b553ca2f8e9957fd65ecb;hb=refs%2Fheads%2Fvserver;hp=18a678ce2591a96cd97c04fb0e5b454e80e718e4;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 18a678ce2..cceaf57e3 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1,6 +1,6 @@ /* * linux/fs/transaction.c - * + * * Written by Stephen C. Tweedie , 1998 * * Copyright 1998 Red Hat corp --- All Rights Reserved @@ -10,7 +10,7 @@ * option, any later version, incorporated herein by reference. * * Generic filesystem transaction handling code; part of the ext2fs - * journaling system. + * journaling system. * * This file manages transactions (compound commits managed by the * journaling code) and handles (individual atomic operations by the @@ -27,6 +27,8 @@ #include #include +static void __journal_temp_unlink_buffer(struct journal_head *jh); + /* * get_transaction: obtain a new transaction_t object. * @@ -50,13 +52,11 @@ get_transaction(journal_t *journal, transaction_t *transaction) transaction->t_state = T_RUNNING; transaction->t_tid = journal->j_transaction_sequence++; transaction->t_expires = jiffies + journal->j_commit_interval; - INIT_LIST_HEAD(&transaction->t_jcb); spin_lock_init(&transaction->t_handle_lock); - spin_lock_init(&transaction->t_jcb_lock); /* Set up the commit timer for the new transaction. */ - journal->j_commit_timer->expires = transaction->t_expires; - add_timer(journal->j_commit_timer); + journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); + add_timer(&journal->j_commit_timer); J_ASSERT(journal->j_running_transaction == NULL); journal->j_running_transaction = transaction; @@ -76,7 +76,7 @@ get_transaction(journal_t *journal, transaction_t *transaction) * start_this_handle: Given a handle, deal with any locking or stalling * needed to make sure that there is enough journal space for the handle * to begin. Attach the handle to a transaction and set up the - * transaction's buffer credits. + * transaction's buffer credits. */ static int start_this_handle(journal_t *journal, handle_t *handle) @@ -119,7 +119,7 @@ repeat_locked: if (is_journal_aborted(journal) || (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) { spin_unlock(&journal->j_state_lock); - ret = -EROFS; + ret = -EROFS; goto out; } @@ -184,7 +184,7 @@ repeat_locked: goto repeat; } - /* + /* * The commit code assumes that it can get enough log space * without forcing a checkpoint. This is *critical* for * correctness: a checkpoint of a buffer which is also @@ -193,7 +193,7 @@ repeat_locked: * * We must therefore ensure the necessary space in the journal * *before* starting to dirty potentially checkpointed buffers - * in the new transaction. + * in the new transaction. * * The worst part is, any transaction currently committing can * reduce the free space arbitrarily. Be careful to account for @@ -229,7 +229,7 @@ repeat_locked: spin_unlock(&transaction->t_handle_lock); spin_unlock(&journal->j_state_lock); out: - if (new_transaction) + if (unlikely(new_transaction)) /* It's usually NULL */ kfree(new_transaction); return ret; } @@ -243,19 +243,18 @@ static handle_t *new_handle(int nblocks) memset(handle, 0, sizeof(*handle)); handle->h_buffer_credits = nblocks; handle->h_ref = 1; - INIT_LIST_HEAD(&handle->h_jcb); return handle; } /** - * handle_t *journal_start() - Obtain a new handle. + * handle_t *journal_start() - Obtain a new handle. * @journal: Journal to start transaction on. * @nblocks: number of block buffer we might modify * * We make sure that the transaction can guarantee at least nblocks of * modified buffers in the log. We block until the log can guarantee - * that much space. + * that much space. * * This function is visible to journal users (like ext3fs), so is not * called with the journal already locked. @@ -295,11 +294,11 @@ handle_t *journal_start(journal_t *journal, int nblocks) * int journal_extend() - extend buffer credits. * @handle: handle to 'extend' * @nblocks: nr blocks to try to extend by. - * + * * Some transactions, such as large extends and truncates, can be done * atomically all at once or in several stages. The operation requests * a credit for a number of buffer modications in advance, but can - * extend its credit if it needs more. + * extend its credit if it needs more. * * journal_extend tries to give the running handle more buffer credits. * It does not guarantee that allocation - this is a best-effort only. @@ -366,7 +365,7 @@ out: * int journal_restart() - restart a handle . * @handle: handle to restart * @nblocks: nr credits requested - * + * * Restart a handle for a multi-transaction filesystem * operation. * @@ -459,13 +458,13 @@ void journal_lock_updates(journal_t *journal) * to make sure that we serialise special journal-locked operations * too. */ - down(&journal->j_barrier); + mutex_lock(&journal->j_barrier); } /** * void journal_unlock_updates (journal_t* journal) - release barrier * @journal: Journal to release the barrier on. - * + * * Release a transaction barrier obtained with journal_lock_updates(). * * Should be called without the journal lock held. @@ -474,7 +473,7 @@ void journal_unlock_updates (journal_t *journal) { J_ASSERT(journal->j_barrier_count != 0); - up(&journal->j_barrier); + mutex_unlock(&journal->j_barrier); spin_lock(&journal->j_state_lock); --journal->j_barrier_count; spin_unlock(&journal->j_state_lock); @@ -493,23 +492,21 @@ void journal_unlock_updates (journal_t *journal) */ static void jbd_unexpected_dirty_buffer(struct journal_head *jh) { - struct buffer_head *bh = jh2bh(jh); int jlist; - if (buffer_dirty(bh)) { - /* If this buffer is one which might reasonably be dirty - * --- ie. data, or not part of this journal --- then - * we're OK to leave it alone, but otherwise we need to - * move the dirty bit to the journal's own internal - * JBDDirty bit. */ - jlist = jh->b_jlist; - - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - if (test_clear_buffer_dirty(jh2bh(jh))) { - set_bit(BH_JBDDirty, &jh2bh(jh)->b_state); - } - } + /* If this buffer is one which might reasonably be dirty + * --- ie. data, or not part of this journal --- then + * we're OK to leave it alone, but otherwise we need to + * move the dirty bit to the journal's own internal + * JBDDirty bit. */ + jlist = jh->b_jlist; + + if (jlist == BJ_Metadata || jlist == BJ_Reserved || + jlist == BJ_Shadow || jlist == BJ_Forget) { + struct buffer_head *bh = jh2bh(jh); + + if (test_clear_buffer_dirty(bh)) + set_buffer_jbddirty(bh); } } @@ -525,7 +522,7 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh) */ static int do_get_write_access(handle_t *handle, struct journal_head *jh, - int force_copy, int *credits) + int force_copy) { struct buffer_head *bh; transaction_t *transaction; @@ -552,8 +549,8 @@ repeat: jbd_lock_bh_state(bh); /* We now hold the buffer lock so it is safe to query the buffer - * state. Is the buffer dirty? - * + * state. Is the buffer dirty? + * * If so, there are two possibilities. The buffer may be * non-journaled, and undergoing a quite legitimate writeback. * Otherwise, it is journaled, and we don't expect dirty buffers @@ -571,16 +568,21 @@ repeat: */ if (jh->b_transaction) { J_ASSERT_JH(jh, - jh->b_transaction == transaction || + jh->b_transaction == transaction || jh->b_transaction == journal->j_committing_transaction); if (jh->b_next_transaction) J_ASSERT_JH(jh, jh->b_next_transaction == transaction); - JBUFFER_TRACE(jh, "Unexpected dirty buffer"); - jbd_unexpected_dirty_buffer(jh); - } - } + } + /* + * In any case we need to clean the dirty flag and we must + * do it under the buffer lock to be sure we don't race + * with running write-out. + */ + JBUFFER_TRACE(jh, "Unexpected dirty buffer"); + jbd_unexpected_dirty_buffer(jh); + } unlock_buffer(bh); @@ -607,11 +609,6 @@ repeat: JBUFFER_TRACE(jh, "has frozen data"); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); jh->b_next_transaction = transaction; - - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - if (credits) - (*credits)++; goto done; } @@ -633,21 +630,22 @@ repeat: * disk then we cannot do copy-out here. */ if (jh->b_jlist == BJ_Shadow) { + DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); wait_queue_head_t *wqh; - DEFINE_WAIT(wait); + + wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); /* commit wakes up all shadow buffers after IO */ - wqh = bh_waitq_head(bh); for ( ; ; ) { - prepare_to_wait(wqh, &wait, + prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); if (jh->b_jlist != BJ_Shadow) break; schedule(); } - finish_wait(wqh, &wait); + finish_wait(wqh, &wait.wait); goto repeat; } @@ -657,7 +655,7 @@ repeat: * buffer had better remain locked during the kmalloc, * but that should be true --- we hold the journal lock * still and the buffer is already on the BUF_JOURNAL - * list so won't be flushed. + * list so won't be flushed. * * Subtle point, though: if this is a get_undo_access, * then we will be relying on the frozen_data to contain @@ -670,8 +668,9 @@ repeat: if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); - frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, - GFP_NOFS); + frozen_buffer = + jbd_slab_alloc(jh2bh(jh)->b_size, + GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", @@ -690,10 +689,6 @@ repeat: jh->b_next_transaction = transaction; } - J_ASSERT(handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - if (credits) - (*credits)++; /* * Finally, if the buffer is not journaled right now, we need to make @@ -733,8 +728,8 @@ done: journal_cancel_revoke(handle, jh); out: - if (frozen_buffer) - kfree(frozen_buffer); + if (unlikely(frozen_buffer)) /* It's usually NULL */ + jbd_slab_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error; @@ -744,6 +739,7 @@ out: * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. * @handle: transaction to add buffer modifications to * @bh: bh to be used for metadata writes + * @credits: variable that will receive credits for the buffer * * Returns an error code or 0 on success. * @@ -751,8 +747,7 @@ out: * because we're write()ing a buffer which is also part of a shared mapping. */ -int journal_get_write_access(handle_t *handle, - struct buffer_head *bh, int *credits) +int journal_get_write_access(handle_t *handle, struct buffer_head *bh) { struct journal_head *jh = journal_add_journal_head(bh); int rc; @@ -760,7 +755,7 @@ int journal_get_write_access(handle_t *handle, /* We do not want to get caught playing with fields which the * log thread also manipulates. Make sure that the buffer * completes any outstanding IO before proceeding. */ - rc = do_get_write_access(handle, jh, 0, credits); + rc = do_get_write_access(handle, jh, 0); journal_put_journal_head(jh); return rc; } @@ -772,8 +767,8 @@ int journal_get_write_access(handle_t *handle, * manually rather than reading off disk), then we need to keep the * buffer_head locked until it has been completely filled with new * data. In this case, we should be able to make the assertion that - * the bh is not already part of an existing transaction. - * + * the bh is not already part of an existing transaction. + * * The buffer should already be locked by the caller by this point. * There is no lock ranking violation: it was a newly created, * unlocked buffer beforehand. */ @@ -785,7 +780,7 @@ int journal_get_write_access(handle_t *handle, * * Call this if you create a new bh. */ -int journal_get_create_access(handle_t *handle, struct buffer_head *bh) +int journal_get_create_access(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; @@ -816,9 +811,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - if (jh->b_transaction == NULL) { jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); @@ -857,13 +849,13 @@ out: * do not reuse freed space until the deallocation has been committed, * since if we overwrote that space we would make the delete * un-rewindable in case of a crash. - * + * * To deal with that, journal_get_undo_access requests write access to a * buffer for parts of non-rewindable operations such as delete * operations on the bitmaps. The journaling code must keep a copy of * the buffer's contents prior to the undo_access call until such time * as we know that the buffer has definitely been committed to disk. - * + * * We never need to know which transaction the committed data is part * of, buffers touched here are guaranteed to be dirtied later and so * will be committed to a new transaction in due course, at which point @@ -871,8 +863,7 @@ out: * * Returns error number or 0 on success. */ -int journal_get_undo_access(handle_t *handle, struct buffer_head *bh, - int *credits) +int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) { int err; struct journal_head *jh = journal_add_journal_head(bh); @@ -885,13 +876,13 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh, * make sure that obtaining the committed_data is done * atomically wrt. completion of any outstanding commits. */ - err = do_get_write_access(handle, jh, 1, credits); + err = do_get_write_access(handle, jh, 1); if (err) goto out; repeat: if (!jh->b_committed_data) { - committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -917,18 +908,18 @@ repeat: jbd_unlock_bh_state(bh); out: journal_put_journal_head(jh); - if (committed_data) - kfree(committed_data); + if (unlikely(committed_data)) + jbd_slab_free(committed_data, bh->b_size); return err; } -/** +/** * int journal_dirty_data() - mark a buffer as containing dirty data which * needs to be flushed before we can commit the - * current transaction. + * current transaction. * @handle: transaction * @bh: bufferhead to mark - * + * * The buffer is placed on the transaction's data list and is marked as * belonging to the transaction. * @@ -957,15 +948,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) /* * What if the buffer is already part of a running transaction? - * + * * There are two cases: * 1) It is part of the current running transaction. Refile it, * just in case we have allocated it as metadata, deallocated - * it, then reallocated it as data. + * it, then reallocated it as data. * 2) It is part of the previous, still-committing transaction. * If all we want to do is to guarantee that the buffer will be * written to disk before this new transaction commits, then - * being sure that the *previous* transaction has this same + * being sure that the *previous* transaction has this same * property is sufficient for us! Just leave it on its old * transaction. * @@ -978,6 +969,13 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) */ jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); + + /* Now that we have bh_state locked, are we really still mapped? */ + if (!buffer_mapped(bh)) { + JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); + goto no_journal; + } + if (jh->b_transaction) { JBUFFER_TRACE(jh, "has transaction"); if (jh->b_transaction != handle->h_transaction) { @@ -1039,6 +1037,11 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) sync_dirty_buffer(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); + /* Since we dropped the lock... */ + if (!buffer_mapped(bh)) { + JBUFFER_TRACE(jh, "buffer got unmapped"); + goto no_journal; + } /* The buffer may become locked again at any time if it is redirtied */ } @@ -1046,7 +1049,12 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) /* journal_clean_data_list() may have got there first */ if (jh->b_transaction != NULL) { JBUFFER_TRACE(jh, "unfile from commit"); - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); + /* It still points to the committing + * transaction; move it to this one so + * that the refile assert checks are + * happy. */ + jh->b_transaction = handle->h_transaction; } /* The buffer will be refiled below */ @@ -1060,7 +1068,8 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { JBUFFER_TRACE(jh, "not on correct data list: unfile"); J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); + jh->b_transaction = handle->h_transaction; JBUFFER_TRACE(jh, "file as data"); __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); @@ -1081,18 +1090,18 @@ no_journal: return 0; } -/** +/** * int journal_dirty_metadata() - mark a buffer as containing dirty metadata * @handle: transaction to add buffer to. - * @bh: buffer to mark - * + * @bh: buffer to mark + * * mark dirty metadata which needs to be journaled as part of the current * transaction. * * The buffer is placed on the transaction's metadata list and is marked - * as belonging to the transaction. + * as belonging to the transaction. * - * Returns error number or 0 on success. + * Returns error number or 0 on success. * * Special care needs to be taken if the buffer already belongs to the * current committing transaction (in which case we should have frozen @@ -1113,6 +1122,17 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) jbd_lock_bh_state(bh); + if (jh->b_modified == 0) { + /* + * This buffer's got modified and becoming part + * of the transaction. This needs to be done + * once a transaction -bzzz + */ + jh->b_modified = 1; + J_ASSERT_JH(jh, handle->h_buffer_credits > 0); + handle->h_buffer_credits--; + } + /* * fastpath, to avoid expensive locking. If this buffer is already * on the running transaction's metadata list there is nothing to do. @@ -1129,11 +1149,11 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) set_buffer_jbddirty(bh); - /* + /* * Metadata already on the current transaction list doesn't * need to be filed. Metadata on another transaction's list must * be committing, and will be refiled once the commit completes: - * leave it alone for now. + * leave it alone for now. */ if (jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "already on other transaction"); @@ -1159,52 +1179,41 @@ out: return 0; } -/* +/* * journal_release_buffer: undo a get_write_access without any buffer * updates, if the update decided in the end that it didn't need access. * - * The caller passes in the number of credits which should be put back for - * this buffer (zero or one). - * - * We leave the buffer attached to t_reserved_list because even though this - * handle doesn't want it, some other concurrent handle may want to journal - * this buffer. If that handle is curently in between get_write_access() and - * journal_dirty_metadata() then it expects the buffer to be reserved. If - * we were to rip it off t_reserved_list here, the other handle will explode - * when journal_dirty_metadata is presented with a non-reserved buffer. - * - * If nobody really wants to journal this buffer then it will be thrown - * away at the start of commit. */ void -journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits) +journal_release_buffer(handle_t *handle, struct buffer_head *bh) { BUFFER_TRACE(bh, "entry"); - handle->h_buffer_credits += credits; } -/** +/** * void journal_forget() - bforget() for potentially-journaled buffers. * @handle: transaction handle * @bh: bh to 'forget' * * We can only do the bforget if there are no commits pending against the * buffer. If the buffer is dirty in the current running transaction we - * can safely unlink it. + * can safely unlink it. * * bh may not be a journalled buffer at all - it may be a non-JBD * buffer which came off the hashtable. Check for this. * * Decrements bh->b_count by one. - * + * * Allow this call even if the handle has aborted --- it may be part of * the caller's cleanup after an abort. */ -void journal_forget(handle_t *handle, struct buffer_head *bh) +int journal_forget (handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; struct journal_head *jh; + int drop_reserve = 0; + int err = 0; BUFFER_TRACE(bh, "entry"); @@ -1215,6 +1224,20 @@ void journal_forget(handle_t *handle, struct buffer_head *bh) goto not_jbd; jh = bh2jh(bh); + /* Critical error: attempting to delete a bitmap buffer, maybe? + * Don't do any jbd operations, and return an error. */ + if (!J_EXPECT_JH(jh, !jh->b_committed_data, + "inconsistent data on disk")) { + err = -EIO; + goto not_jbd; + } + + /* + * The buffer's going from the transaction, we must drop + * all references -bzzz + */ + jh->b_modified = 0; + if (jh->b_transaction == handle->h_transaction) { J_ASSERT_JH(jh, !jh->b_frozen_data); @@ -1225,11 +1248,10 @@ void journal_forget(handle_t *handle, struct buffer_head *bh) clear_buffer_jbddirty(bh); JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); - J_ASSERT_JH(jh, !jh->b_committed_data); - __journal_unfile_buffer(jh); + drop_reserve = 1; - /* + /* * We are no longer going to journal this buffer. * However, the commit of this transaction is still * important to the buffer: the delete that we are now @@ -1238,23 +1260,25 @@ void journal_forget(handle_t *handle, struct buffer_head *bh) * * So, if we have a checkpoint on the buffer, we should * now refile the buffer on our BJ_Forget list so that - * we know to remove the checkpoint after we commit. + * we know to remove the checkpoint after we commit. */ if (jh->b_cp_transaction) { + __journal_temp_unlink_buffer(jh); __journal_file_buffer(jh, transaction, BJ_Forget); } else { + __journal_unfile_buffer(jh); journal_remove_journal_head(bh); __brelse(bh); if (!buffer_jbd(bh)) { spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); __bforget(bh); - return; + goto drop; } } } else if (jh->b_transaction) { - J_ASSERT_JH(jh, (jh->b_transaction == + J_ASSERT_JH(jh, (jh->b_transaction == journal->j_committing_transaction)); /* However, if the buffer is still owned by a prior * (committing) transaction, we can't drop it yet... */ @@ -1265,6 +1289,7 @@ void journal_forget(handle_t *handle, struct buffer_head *bh) if (jh->b_next_transaction) { J_ASSERT(jh->b_next_transaction == transaction); jh->b_next_transaction = NULL; + drop_reserve = 1; } } @@ -1272,43 +1297,18 @@ not_jbd: spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); __brelse(bh); - return; -} - -/** - * void journal_callback_set() - Register a callback function for this handle. - * @handle: handle to attach the callback to. - * @func: function to callback. - * @jcb: structure with additional information required by func() , and - * some space for jbd internal information. - * - * The function will be - * called when the transaction that this handle is part of has been - * committed to disk with the original callback data struct and the - * error status of the journal as parameters. There is no guarantee of - * ordering between handles within a single transaction, nor between - * callbacks registered on the same handle. - * - * The caller is responsible for allocating the journal_callback struct. - * This is to allow the caller to add as much extra data to the callback - * as needed, but reduce the overhead of multiple allocations. The caller - * allocated struct must start with a struct journal_callback at offset 0, - * and has the caller-specific data afterwards. - */ -void journal_callback_set(handle_t *handle, - void (*func)(struct journal_callback *jcb, int error), - struct journal_callback *jcb) -{ - spin_lock(&handle->h_transaction->t_jcb_lock); - list_add_tail(&jcb->jcb_list, &handle->h_jcb); - spin_unlock(&handle->h_transaction->t_jcb_lock); - jcb->jcb_func = func; +drop: + if (drop_reserve) { + /* no need to reserve log space for this block -bzzz */ + handle->h_buffer_credits++; + } + return err; } /** * int journal_stop() - complete a transaction * @handle: tranaction to complete. - * + * * All done for a particular handle. * * There is not much action needed here. We just return any remaining @@ -1317,7 +1317,7 @@ void journal_callback_set(handle_t *handle, * filesystem is marked for synchronous update. * * journal_stop itself will not usually return an error, but it may - * do so in unusual circumstances. In particular, expect it to + * do so in unusual circumstances. In particular, expect it to * return -EIO if a journal_abort has been executed since the * transaction began. */ @@ -1326,14 +1326,16 @@ int journal_stop(handle_t *handle) transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; int old_handle_count, err; + pid_t pid; - J_ASSERT(transaction->t_updates > 0); J_ASSERT(journal_current_handle() == handle); if (is_handle_aborted(handle)) err = -EIO; - else + else { + J_ASSERT(transaction->t_updates > 0); err = 0; + } if (--handle->h_ref > 0) { jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, @@ -1351,12 +1353,18 @@ int journal_stop(handle_t *handle) * It doesn't cost much - we're about to run a commit and sleep * on IO anyway. Speeds up many-threaded, many-dir operations * by 30x or more... + * + * But don't do this if this process was the most recent one to + * perform a synchronous write. We do this to detect the case where a + * single process is doing a stream of sync writes. No point in waiting + * for joiners in that case. */ - if (handle->h_sync) { + pid = current->pid; + if (handle->h_sync && journal->j_last_sync_writer != pid) { + journal->j_last_sync_writer = pid; do { old_handle_count = transaction->t_handle_count; - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); + schedule_timeout_uninterruptible(1); } while (old_handle_count != transaction->t_handle_count); } @@ -1371,11 +1379,6 @@ int journal_stop(handle_t *handle) wake_up(&journal->j_wait_transaction_locked); } - /* Move callbacks from the handle to the transaction. */ - spin_lock(&transaction->t_jcb_lock); - list_splice(&handle->h_jcb, &transaction->t_jcb); - spin_unlock(&transaction->t_jcb_lock); - /* * If the handle is marked SYNC, we need to set another commit * going! We also want to force a commit if the current @@ -1385,7 +1388,7 @@ int journal_stop(handle_t *handle) if (handle->h_sync || transaction->t_outstanding_credits > journal->j_max_transaction_buffers || - time_after_eq(jiffies, transaction->t_expires)) { + time_after_eq(jiffies, transaction->t_expires)) { /* Do this even for aborted journals: an abort still * completes the commit thread, it just doesn't write * anything to disk. */ @@ -1400,7 +1403,7 @@ int journal_stop(handle_t *handle) /* * Special case: JFS_SYNC synchronous updates require us - * to wait for the commit to complete. + * to wait for the commit to complete. */ if (handle->h_sync && !(current->flags & PF_MEMALLOC)) err = log_wait_commit(journal, tid); @@ -1451,7 +1454,7 @@ int journal_force_commit(journal_t *journal) * jbd_lock_bh_state(jh2bh(jh)) is held. */ -static inline void +static inline void __blist_add_buffer(struct journal_head **list, struct journal_head *jh) { if (!*list) { @@ -1466,7 +1469,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh) } } -/* +/* * Remove a buffer from a transaction list, given the transaction's list * head pointer. * @@ -1487,7 +1490,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) jh->b_tnext->b_tprev = jh->b_tprev; } -/* +/* * Remove a buffer from the appropriate transaction list. * * Note that this function can *change* the value of @@ -1498,7 +1501,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) * * Called under j_list_lock. The journal may not be locked. */ -void __journal_unfile_buffer(struct journal_head *jh) +static void __journal_temp_unlink_buffer(struct journal_head *jh) { struct journal_head **list = NULL; transaction_t *transaction; @@ -1515,7 +1518,7 @@ void __journal_unfile_buffer(struct journal_head *jh) switch (jh->b_jlist) { case BJ_None: - goto out; + return; case BJ_SyncData: list = &transaction->t_sync_datalist; break; @@ -1548,7 +1551,11 @@ void __journal_unfile_buffer(struct journal_head *jh) jh->b_jlist = BJ_None; if (test_clear_buffer_jbddirty(bh)) mark_buffer_dirty(bh); /* Expose it to the VM */ -out: +} + +void __journal_unfile_buffer(struct journal_head *jh) +{ + __journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; } @@ -1603,17 +1610,17 @@ out: } -/** +/** * int journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation * @page: to try and free - * @gfp_mask: 'IO' mode for try_to_free_buffers() + * @unused_gfp_mask: unused + * * - * * For all the buffers on this page, * if they are fully written out ordered data, move them onto BUF_CLEAN * so try_to_free_buffers() can reap them. - * + * * This function returns non-zero if we wish try_to_free_buffers() * to be called. We do this if the page is releasable by try_to_free_buffers(). * We also do it if the page has locked or dirty buffers and the caller wants @@ -1637,8 +1644,8 @@ out: * cannot happen because we never reallocate freed data as metadata * while the data is part of a transaction. Yes? */ -int journal_try_to_free_buffers(journal_t *journal, - struct page *page, int unused_gfp_mask) +int journal_try_to_free_buffers(journal_t *journal, + struct page *page, gfp_t unused_gfp_mask) { struct buffer_head *head; struct buffer_head *bh; @@ -1705,7 +1712,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) } /* - * journal_invalidatepage + * journal_invalidatepage * * This code is tricky. It has a number of cases to deal with. * @@ -1713,15 +1720,15 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * * i_size must be updated on disk before we start calling invalidatepage on the * data. - * + * * This is done in ext3 by defining an ext3_setattr method which * updates i_size before truncate gets going. By maintaining this * invariant, we can be sure that it is safe to throw away any buffers * attached to the current transaction: once the transaction commits, * we know that the data will not be needed. - * + * * Note however that we can *not* throw away data belonging to the - * previous, committing transaction! + * previous, committing transaction! * * Any disk blocks which *are* part of the previous, committing * transaction (and which therefore cannot be discarded immediately) are @@ -1740,7 +1747,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * don't make guarantees about the order in which data hits disk --- in * particular we don't guarantee that new dirty data is flushed before * transaction commit --- so it is always safe just to discard data - * immediately in that mode. --sct + * immediately in that mode. --sct */ /* @@ -1804,10 +1811,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); ret = __dispose_buffer(jh, journal->j_running_transaction); + journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); spin_unlock(&journal->j_state_lock); - journal_put_journal_head(jh); return ret; } else { /* There is no currently-running transaction. So the @@ -1818,10 +1825,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) JBUFFER_TRACE(jh, "give to committing trans"); ret = __dispose_buffer(jh, journal->j_committing_transaction); + journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); spin_unlock(&journal->j_state_lock); - journal_put_journal_head(jh); return ret; } else { /* The orphan record's transaction has @@ -1831,21 +1838,31 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } } } else if (transaction == journal->j_committing_transaction) { - /* If it is committing, we simply cannot touch it. We + JBUFFER_TRACE(jh, "on committing transaction"); + if (jh->b_jlist == BJ_Locked) { + /* + * The buffer is on the committing transaction's locked + * list. We have the buffer locked, so I/O has + * completed. So we can nail the buffer now. + */ + may_free = __dispose_buffer(jh, transaction); + goto zap_buffer; + } + /* + * If it is committing, we simply cannot touch it. We * can remove it's next_transaction pointer from the * running transaction if that is set, but nothing * else. */ - JBUFFER_TRACE(jh, "on committing transaction"); set_buffer_freed(bh); if (jh->b_next_transaction) { J_ASSERT(jh->b_next_transaction == journal->j_running_transaction); jh->b_next_transaction = NULL; } + journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); spin_unlock(&journal->j_state_lock); - journal_put_journal_head(jh); return 0; } else { /* Good, the buffer belongs to the running transaction. @@ -1855,6 +1872,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * i_size already for this truncate so recovery will not * expose the disk blocks we are discarding here.) */ J_ASSERT_JH(jh, transaction == journal->j_running_transaction); + JBUFFER_TRACE(jh, "on running transaction"); may_free = __dispose_buffer(jh, transaction); } @@ -1874,18 +1892,17 @@ zap_buffer_unlocked: return may_free; } -/** - * int journal_invalidatepage() - * @journal: journal to use for flush... +/** + * void journal_invalidatepage() + * @journal: journal to use for flush... * @page: page to flush * @offset: length of page to invalidate. * * Reap page buffers containing data after offset in page. * - * Return non-zero if the page's buffers were successfully reaped. */ -int journal_invalidatepage(journal_t *journal, - struct page *page, +void journal_invalidatepage(journal_t *journal, + struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; @@ -1895,7 +1912,7 @@ int journal_invalidatepage(journal_t *journal, if (!PageLocked(page)) BUG(); if (!page_has_buffers(page)) - return 1; + return; /* We will potentially be playing with lists other than just the * data lists (especially for journaled data mode), so be @@ -1906,9 +1923,8 @@ int journal_invalidatepage(journal_t *journal, unsigned int next_off = curr_off + bh->b_size; next = bh->b_this_page; - /* AKPM: doing lock_buffer here may be overly paranoid */ if (offset <= curr_off) { - /* This block is wholly outside the truncation point */ + /* This block is wholly outside the truncation point */ lock_buffer(bh); may_free &= journal_unmap_buffer(journal, bh); unlock_buffer(bh); @@ -1919,15 +1935,13 @@ int journal_invalidatepage(journal_t *journal, } while (bh != head); if (!offset) { - if (!may_free || !try_to_free_buffers(page)) - return 0; - J_ASSERT(!page_has_buffers(page)); + if (may_free && try_to_free_buffers(page)) + J_ASSERT(!page_has_buffers(page)); } - return 1; } -/* - * File a buffer on the given transaction list. +/* + * File a buffer on the given transaction list. */ void __journal_file_buffer(struct journal_head *jh, transaction_t *transaction, int jlist) @@ -1950,7 +1964,7 @@ void __journal_file_buffer(struct journal_head *jh, * with __jbd_unexpected_dirty_buffer()'s handling of dirty * state. */ - if (jlist == BJ_Metadata || jlist == BJ_Reserved || + if (jlist == BJ_Metadata || jlist == BJ_Reserved || jlist == BJ_Shadow || jlist == BJ_Forget) { if (test_clear_buffer_dirty(bh) || test_clear_buffer_jbddirty(bh)) @@ -1958,7 +1972,7 @@ void __journal_file_buffer(struct journal_head *jh, } if (jh->b_transaction) - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); jh->b_transaction = transaction; switch (jlist) { @@ -2010,7 +2024,7 @@ void journal_file_buffer(struct journal_head *jh, jbd_unlock_bh_state(jh2bh(jh)); } -/* +/* * Remove a buffer from its current buffer list in preparation for * dropping it from its current transaction entirely. If the buffer has * already started to be used by a subsequent transaction, refile the @@ -2041,10 +2055,11 @@ void __journal_refile_buffer(struct journal_head *jh) */ was_dirty = test_clear_buffer_jbddirty(bh); - __journal_unfile_buffer(jh); + __journal_temp_unlink_buffer(jh); jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; - __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata); + __journal_file_buffer(jh, jh->b_transaction, + was_dirty ? BJ_Metadata : BJ_Reserved); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty) @@ -2061,7 +2076,7 @@ void __journal_refile_buffer(struct journal_head *jh) * to the caller to remove the journal_head if necessary. For the * unlocked journal_refile_buffer call, the caller isn't going to be * doing anything else to the buffer so we need to do the cleanup - * ourselves to avoid a jh leak. + * ourselves to avoid a jh leak. * * *** The journal_head may be freed by this call! *** */