bump release number

[linux-2.6.git] / fs / jbd / transaction.c
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c

index 77b7662..3eb8be6 100644 (file)
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -53,8 +53,8 @@ get_transaction(journal_t *journal, transaction_t *transaction)
         spin_lock_init(&transaction->t_handle_lock);
  
         /* Set up the commit timer for the new transaction. */
-       journal->j_commit_timer->expires = transaction->t_expires;
-       add_timer(journal->j_commit_timer);
+       journal->j_commit_timer.expires = transaction->t_expires;
+       add_timer(&journal->j_commit_timer);
  
         J_ASSERT(journal->j_running_transaction == NULL);
         journal->j_running_transaction = transaction;
@@ -227,7 +227,7 @@ repeat_locked:
         spin_unlock(&transaction->t_handle_lock);
         spin_unlock(&journal->j_state_lock);
  out:
-       if (new_transaction)
+       if (unlikely(new_transaction))          /* It's usually NULL */
                 kfree(new_transaction);
         return ret;
  }
@@ -456,7 +456,7 @@ void journal_lock_updates(journal_t *journal)
          * to make sure that we serialise special journal-locked operations
          * too.
          */
-       down(&journal->j_barrier);
+       mutex_lock(&journal->j_barrier);
  }
  
  /**
@@ -471,7 +471,7 @@ void journal_unlock_updates (journal_t *journal)
  {
         J_ASSERT(journal->j_barrier_count != 0);
  
-       up(&journal->j_barrier);
+       mutex_unlock(&journal->j_barrier);
         spin_lock(&journal->j_state_lock);
         --journal->j_barrier_count;
         spin_unlock(&journal->j_state_lock);
@@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal)
   */
  static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
  {
-       struct buffer_head *bh = jh2bh(jh);
         int jlist;
  
-       if (buffer_dirty(bh)) {
-               /* If this buffer is one which might reasonably be dirty
-                * --- ie. data, or not part of this journal --- then
-                * we're OK to leave it alone, but otherwise we need to
-                * move the dirty bit to the journal's own internal
-                * JBDDirty bit. */
-               jlist = jh->b_jlist;
-
-               if (jlist == BJ_Metadata || jlist == BJ_Reserved || 
-                   jlist == BJ_Shadow || jlist == BJ_Forget) {
-                       if (test_clear_buffer_dirty(jh2bh(jh))) {
-                               set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
-                       }
-               }
+       /* If this buffer is one which might reasonably be dirty
+        * --- ie. data, or not part of this journal --- then
+        * we're OK to leave it alone, but otherwise we need to
+        * move the dirty bit to the journal's own internal
+        * JBDDirty bit. */
+       jlist = jh->b_jlist;
+
+       if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+           jlist == BJ_Shadow || jlist == BJ_Forget) {
+               struct buffer_head *bh = jh2bh(jh);
+
+               if (test_clear_buffer_dirty(bh))
+                       set_buffer_jbddirty(bh);
         }
  }
  
@@ -574,9 +572,14 @@ repeat:
                         if (jh->b_next_transaction)
                                 J_ASSERT_JH(jh, jh->b_next_transaction ==
                                                         transaction);
-                       JBUFFER_TRACE(jh, "Unexpected dirty buffer");
-                       jbd_unexpected_dirty_buffer(jh);
-               }
+               }
+               /*
+                * In any case we need to clean the dirty flag and we must
+                * do it under the buffer lock to be sure we don't race
+                * with running write-out.
+                */
+               JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+               jbd_unexpected_dirty_buffer(jh);
         }
  
         unlock_buffer(bh);
@@ -663,8 +666,9 @@ repeat:
                         if (!frozen_buffer) {
                                 JBUFFER_TRACE(jh, "allocate memory for buffer");
                                 jbd_unlock_bh_state(bh);
-                               frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
-                                                           GFP_NOFS);
+                               frozen_buffer =
+                                       jbd_slab_alloc(jh2bh(jh)->b_size,
+                                                        GFP_NOFS);
                                 if (!frozen_buffer) {
                                         printk(KERN_EMERG
                                                "%s: OOM for frozen_buffer\n",
@@ -722,8 +726,8 @@ done:
         journal_cancel_revoke(handle, jh);
  
  out:
-       if (frozen_buffer)
-               kfree(frozen_buffer);
+       if (unlikely(frozen_buffer))    /* It's usually NULL */
+               jbd_slab_free(frozen_buffer, bh->b_size);
  
         JBUFFER_TRACE(jh, "exit");
         return error;
@@ -876,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
  
  repeat:
         if (!jh->b_committed_data) {
-               committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS);
+               committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
                 if (!committed_data) {
                         printk(KERN_EMERG "%s: No memory for committed data\n",
                                 __FUNCTION__);
@@ -902,8 +906,8 @@ repeat:
         jbd_unlock_bh_state(bh);
  out:
         journal_put_journal_head(jh);
-       if (committed_data)
-               kfree(committed_data);
+       if (unlikely(committed_data))
+               jbd_slab_free(committed_data, bh->b_size);
         return err;
  }
  
@@ -963,6 +967,14 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
          */
         jbd_lock_bh_state(bh);
         spin_lock(&journal->j_list_lock);
+
+       /* Now that we have bh_state locked, are we really still mapped? */
+       if (!buffer_mapped(bh)) {
+               JBUFFER_TRACE(jh, "unmapped, bailing out");
+               // printk("caught an unmapped buffer\n");
+               goto no_journal;
+       }
+               
         if (jh->b_transaction) {
                 JBUFFER_TRACE(jh, "has transaction");
                 if (jh->b_transaction != handle->h_transaction) {
@@ -1024,6 +1036,11 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                                 sync_dirty_buffer(bh);
                                 jbd_lock_bh_state(bh);
                                 spin_lock(&journal->j_list_lock);
+                               /* Since we dropped the lock... */
+                               if (!buffer_mapped(bh)) {
+                                       JBUFFER_TRACE(jh, "Got unmapped");
+                                       goto no_journal;
+                               }
                                 /* The buffer may become locked again at any
                                    time if it is redirtied */
                         }
@@ -1308,6 +1325,7 @@ int journal_stop(handle_t *handle)
         transaction_t *transaction = handle->h_transaction;
         journal_t *journal = transaction->t_journal;
         int old_handle_count, err;
+       pid_t pid;
  
         J_ASSERT(transaction->t_updates > 0);
         J_ASSERT(journal_current_handle() == handle);
@@ -1333,12 +1351,18 @@ int journal_stop(handle_t *handle)
          * It doesn't cost much - we're about to run a commit and sleep
          * on IO anyway.  Speeds up many-threaded, many-dir operations
          * by 30x or more...
+        *
+        * But don't do this if this process was the most recent one to
+        * perform a synchronous write.  We do this to detect the case where a
+        * single process is doing a stream of sync writes.  No point in waiting
+        * for joiners in that case.
          */
-       if (handle->h_sync) {
+       pid = current->pid;
+       if (handle->h_sync && journal->j_last_sync_writer != pid) {
+               journal->j_last_sync_writer = pid;
                 do {
                         old_handle_count = transaction->t_handle_count;
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(1);
+                       schedule_timeout_uninterruptible(1);
                 } while (old_handle_count != transaction->t_handle_count);
         }
  
@@ -1619,7 +1643,7 @@ out:
   * while the data is part of a transaction.  Yes?
   */
  int journal_try_to_free_buffers(journal_t *journal, 
-                               struct page *page, int unused_gfp_mask)
+                               struct page *page, gfp_t unused_gfp_mask)
  {
         struct buffer_head *head;
         struct buffer_head *bh;
@@ -1740,6 +1764,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
         int ret;
  
         BUFFER_TRACE(bh, "entry");
+       //J_ASSERT_BH(bh, PageLocked(bh->b_page));
  
         /*
          * It is safe to proceed here without the j_list_lock because the
@@ -1813,6 +1838,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                 }
         } else if (transaction == journal->j_committing_transaction) {
                 if (jh->b_jlist == BJ_Locked) {
+                       JBUFFER_TRACE(jh, "on committing BJ_Locked");
                         /*
                          * The buffer is on the committing transaction's locked
                          * list.  We have the buffer locked, so I/O has
@@ -1846,6 +1872,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                  * i_size already for this truncate so recovery will not
                  * expose the disk blocks we are discarding here.) */
                 J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+               JBUFFER_TRACE(jh, "on running transaction");
                 may_free = __dispose_buffer(jh, transaction);
         }
  
@@ -1866,16 +1893,15 @@ zap_buffer_unlocked:
  }
  
  /** 
- * int journal_invalidatepage() 
+ * void journal_invalidatepage()
   * @journal: journal to use for flush... 
   * @page:    page to flush
   * @offset:  length of page to invalidate.
   *
   * Reap page buffers containing data after offset in page.
   *
- * Return non-zero if the page's buffers were successfully reaped.
   */
-int journal_invalidatepage(journal_t *journal, 
+void journal_invalidatepage(journal_t *journal,
                       struct page *page, 
                       unsigned long offset)
  {
@@ -1886,7 +1912,7 @@ int journal_invalidatepage(journal_t *journal,
         if (!PageLocked(page))
                 BUG();
         if (!page_has_buffers(page))
-               return 1;
+               return;
  
         /* We will potentially be playing with lists other than just the
          * data lists (especially for journaled data mode), so be
@@ -1909,11 +1935,9 @@ int journal_invalidatepage(journal_t *journal,
         } while (bh != head);
  
         if (!offset) {
-               if (!may_free || !try_to_free_buffers(page))
-                       return 0;
-               J_ASSERT(!page_has_buffers(page));
+               if (may_free && try_to_free_buffers(page))
+                       J_ASSERT(!page_has_buffers(page));
         }
-       return 1;
  }
  
  /* 
@@ -2034,7 +2058,8 @@ void __journal_refile_buffer(struct journal_head *jh)
         __journal_temp_unlink_buffer(jh);
         jh->b_transaction = jh->b_next_transaction;
         jh->b_next_transaction = NULL;
-       __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata);
+       __journal_file_buffer(jh, jh->b_transaction,
+                               was_dirty ? BJ_Metadata : BJ_Reserved);
         J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
  
         if (was_dirty)