Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / fs / jbd / commit.c
index 002ad2b..e998a60 100644 (file)
@@ -160,6 +160,117 @@ static int journal_write_commit_record(journal_t *journal,
        return (ret == -EIO);
 }
 
+void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+{
+       int i;
+
+       for (i = 0; i < bufs; i++) {
+               wbuf[i]->b_end_io = end_buffer_write_sync;
+               /* We use-up our safety reference in submit_bh() */
+               submit_bh(WRITE, wbuf[i]);
+       }
+}
+
+/*
+ *  Submit all the data buffers to disk
+ */
+static void journal_submit_data_buffers(journal_t *journal,
+                               transaction_t *commit_transaction)
+{
+       struct journal_head *jh;
+       struct buffer_head *bh;
+       int locked;
+       int bufs = 0;
+       struct buffer_head **wbuf = journal->j_wbuf;
+
+       /*
+        * Whenever we unlock the journal and sleep, things can get added
+        * onto ->t_sync_datalist, so we have to keep looping back to
+        * write_out_data until we *know* that the list is empty.
+        *
+        * Cleanup any flushed data buffers from the data list.  Even in
+        * abort mode, we want to flush this out as soon as possible.
+        */
+write_out_data:
+       cond_resched();
+       spin_lock(&journal->j_list_lock);
+
+       while (commit_transaction->t_sync_datalist) {
+               jh = commit_transaction->t_sync_datalist;
+               bh = jh2bh(jh);
+               locked = 0;
+
+               /* Get reference just to make sure buffer does not disappear
+                * when we are forced to drop various locks */
+               get_bh(bh);
+               /* If the buffer is dirty, we need to submit IO and hence
+                * we need the buffer lock. We try to lock the buffer without
+                * blocking. If we fail, we need to drop j_list_lock and do
+                * blocking lock_buffer().
+                */
+               if (buffer_dirty(bh)) {
+                       if (test_set_buffer_locked(bh)) {
+                               BUFFER_TRACE(bh, "needs blocking lock");
+                               spin_unlock(&journal->j_list_lock);
+                               /* Write out all data to prevent deadlocks */
+                               journal_do_submit_data(wbuf, bufs);
+                               bufs = 0;
+                               lock_buffer(bh);
+                               spin_lock(&journal->j_list_lock);
+                       }
+                       locked = 1;
+               }
+               /* We have to get bh_state lock. Again out of order, sigh. */
+               if (!inverted_lock(journal, bh)) {
+                       jbd_lock_bh_state(bh);
+                       spin_lock(&journal->j_list_lock);
+               }
+               /* Someone already cleaned up the buffer? */
+               if (!buffer_jbd(bh)
+                       || jh->b_transaction != commit_transaction
+                       || jh->b_jlist != BJ_SyncData) {
+                       jbd_unlock_bh_state(bh);
+                       if (locked)
+                               unlock_buffer(bh);
+                       BUFFER_TRACE(bh, "already cleaned up");
+                       put_bh(bh);
+                       continue;
+               }
+               if (locked && test_clear_buffer_dirty(bh)) {
+                       BUFFER_TRACE(bh, "needs writeout, adding to array");
+                       wbuf[bufs++] = bh;
+                       __journal_file_buffer(jh, commit_transaction,
+                                               BJ_Locked);
+                       jbd_unlock_bh_state(bh);
+                       if (bufs == journal->j_wbufsize) {
+                               spin_unlock(&journal->j_list_lock);
+                               journal_do_submit_data(wbuf, bufs);
+                               bufs = 0;
+                               goto write_out_data;
+                       }
+               }
+               else {
+                       BUFFER_TRACE(bh, "writeout complete: unfile");
+                       __journal_unfile_buffer(jh);
+                       jbd_unlock_bh_state(bh);
+                       if (locked)
+                               unlock_buffer(bh);
+                       journal_remove_journal_head(bh);
+                       /* Once for our safety reference, once for
+                        * journal_remove_journal_head() */
+                       put_bh(bh);
+                       put_bh(bh);
+               }
+
+               if (lock_need_resched(&journal->j_list_lock)) {
+                       spin_unlock(&journal->j_list_lock);
+                       goto write_out_data;
+               }
+       }
+       spin_unlock(&journal->j_list_lock);
+       journal_do_submit_data(wbuf, bufs);
+}
+
 /*
  * journal_commit_transaction
  *
@@ -261,7 +372,7 @@ void journal_commit_transaction(journal_t *journal)
                        struct buffer_head *bh = jh2bh(jh);
 
                        jbd_lock_bh_state(bh);
-                       kfree(jh->b_committed_data);
+                       jbd_slab_free(jh->b_committed_data, bh->b_size);
                        jh->b_committed_data = NULL;
                        jbd_unlock_bh_state(bh);
                }
@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_t *journal)
         * Now start flushing things to disk, in the order they appear
         * on the transaction lists.  Data blocks go first.
         */
-
        err = 0;
-       /*
-        * Whenever we unlock the journal and sleep, things can get added
-        * onto ->t_sync_datalist, so we have to keep looping back to
-        * write_out_data until we *know* that the list is empty.
-        */
-       bufs = 0;
-       /*
-        * Cleanup any flushed data buffers from the data list.  Even in
-        * abort mode, we want to flush this out as soon as possible.
-        */
-write_out_data:
-       cond_resched();
-       spin_lock(&journal->j_list_lock);
-
-       while (commit_transaction->t_sync_datalist) {
-               struct buffer_head *bh;
-
-               jh = commit_transaction->t_sync_datalist;
-               commit_transaction->t_sync_datalist = jh->b_tnext;
-               bh = jh2bh(jh);
-               if (buffer_locked(bh)) {
-                       BUFFER_TRACE(bh, "locked");
-                       if (!inverted_lock(journal, bh))
-                               goto write_out_data;
-                       __journal_temp_unlink_buffer(jh);
-                       __journal_file_buffer(jh, commit_transaction,
-                                               BJ_Locked);
-                       jbd_unlock_bh_state(bh);
-                       if (lock_need_resched(&journal->j_list_lock)) {
-                               spin_unlock(&journal->j_list_lock);
-                               goto write_out_data;
-                       }
-               } else {
-                       if (buffer_dirty(bh)) {
-                               BUFFER_TRACE(bh, "start journal writeout");
-                               get_bh(bh);
-                               wbuf[bufs++] = bh;
-                               if (bufs == journal->j_wbufsize) {
-                                       jbd_debug(2, "submit %d writes\n",
-                                                       bufs);
-                                       spin_unlock(&journal->j_list_lock);
-                                       ll_rw_block(SWRITE, bufs, wbuf);
-                                       journal_brelse_array(wbuf, bufs);
-                                       bufs = 0;
-                                       goto write_out_data;
-                               }
-                       } else {
-                               BUFFER_TRACE(bh, "writeout complete: unfile");
-                               if (!inverted_lock(journal, bh))
-                                       goto write_out_data;
-                               __journal_unfile_buffer(jh);
-                               jbd_unlock_bh_state(bh);
-                               journal_remove_journal_head(bh);
-                               put_bh(bh);
-                               if (lock_need_resched(&journal->j_list_lock)) {
-                                       spin_unlock(&journal->j_list_lock);
-                                       goto write_out_data;
-                               }
-                       }
-               }
-       }
-
-       if (bufs) {
-               spin_unlock(&journal->j_list_lock);
-               ll_rw_block(SWRITE, bufs, wbuf);
-               journal_brelse_array(wbuf, bufs);
-               spin_lock(&journal->j_list_lock);
-       }
+       journal_submit_data_buffers(journal, commit_transaction);
 
        /*
         * Wait for all previously submitted IO to complete.
         */
+       spin_lock(&journal->j_list_lock);
        while (commit_transaction->t_locked_list) {
                struct buffer_head *bh;
 
@@ -745,14 +789,14 @@ restart_loop:
                 * Otherwise, we can just throw away the frozen data now.
                 */
                if (jh->b_committed_data) {
-                       kfree(jh->b_committed_data);
+                       jbd_slab_free(jh->b_committed_data, bh->b_size);
                        jh->b_committed_data = NULL;
                        if (jh->b_frozen_data) {
                                jh->b_committed_data = jh->b_frozen_data;
                                jh->b_frozen_data = NULL;
                        }
                } else if (jh->b_frozen_data) {
-                       kfree(jh->b_frozen_data);
+                       jbd_slab_free(jh->b_frozen_data, bh->b_size);
                        jh->b_frozen_data = NULL;
                }
 
@@ -790,11 +834,22 @@ restart_loop:
                        jbd_unlock_bh_state(bh);
                } else {
                        J_ASSERT_BH(bh, !buffer_dirty(bh));
-                       J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-                       __journal_unfile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
-                       journal_remove_journal_head(bh);  /* needs a brelse */
-                       release_buffer_page(bh);
+                       /* The buffer on BJ_Forget list and not jbddirty means
+                        * it has been freed by this transaction and hence it
+                        * could not have been reallocated until this
+                        * transaction has committed. *BUT* it could be
+                        * reallocated once we have written all the data to
+                        * disk and before we process the buffer on BJ_Forget
+                        * list. */
+                       JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+                       __journal_refile_buffer(jh);
+                       if (!jh->b_transaction) {
+                               jbd_unlock_bh_state(bh);
+                                /* needs a brelse */
+                               journal_remove_journal_head(bh);
+                               release_buffer_page(bh);
+                       } else
+                               jbd_unlock_bh_state(bh);
                }
                cond_resched_lock(&journal->j_list_lock);
        }