bump release number
[linux-2.6.git] / fs / jbd / transaction.c
index 733e500..3eb8be6 100644 (file)
@@ -53,8 +53,8 @@ get_transaction(journal_t *journal, transaction_t *transaction)
        spin_lock_init(&transaction->t_handle_lock);
 
        /* Set up the commit timer for the new transaction. */
-       journal->j_commit_timer->expires = transaction->t_expires;
-       add_timer(journal->j_commit_timer);
+       journal->j_commit_timer.expires = transaction->t_expires;
+       add_timer(&journal->j_commit_timer);
 
        J_ASSERT(journal->j_running_transaction == NULL);
        journal->j_running_transaction = transaction;
@@ -227,7 +227,7 @@ repeat_locked:
        spin_unlock(&transaction->t_handle_lock);
        spin_unlock(&journal->j_state_lock);
 out:
-       if (new_transaction)
+       if (unlikely(new_transaction))          /* It's usually NULL */
                kfree(new_transaction);
        return ret;
 }
@@ -456,7 +456,7 @@ void journal_lock_updates(journal_t *journal)
         * to make sure that we serialise special journal-locked operations
         * too.
         */
-       down(&journal->j_barrier);
+       mutex_lock(&journal->j_barrier);
 }
 
 /**
@@ -471,7 +471,7 @@ void journal_unlock_updates (journal_t *journal)
 {
        J_ASSERT(journal->j_barrier_count != 0);
 
-       up(&journal->j_barrier);
+       mutex_unlock(&journal->j_barrier);
        spin_lock(&journal->j_state_lock);
        --journal->j_barrier_count;
        spin_unlock(&journal->j_state_lock);
@@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal)
  */
 static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 {
-       struct buffer_head *bh = jh2bh(jh);
        int jlist;
 
-       if (buffer_dirty(bh)) {
-               /* If this buffer is one which might reasonably be dirty
-                * --- ie. data, or not part of this journal --- then
-                * we're OK to leave it alone, but otherwise we need to
-                * move the dirty bit to the journal's own internal
-                * JBDDirty bit. */
-               jlist = jh->b_jlist;
-
-               if (jlist == BJ_Metadata || jlist == BJ_Reserved || 
-                   jlist == BJ_Shadow || jlist == BJ_Forget) {
-                       if (test_clear_buffer_dirty(jh2bh(jh))) {
-                               set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
-                       }
-               }
+       /* If this buffer is one which might reasonably be dirty
+        * --- ie. data, or not part of this journal --- then
+        * we're OK to leave it alone, but otherwise we need to
+        * move the dirty bit to the journal's own internal
+        * JBDDirty bit. */
+       jlist = jh->b_jlist;
+
+       if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+           jlist == BJ_Shadow || jlist == BJ_Forget) {
+               struct buffer_head *bh = jh2bh(jh);
+
+               if (test_clear_buffer_dirty(bh))
+                       set_buffer_jbddirty(bh);
        }
 }
 
@@ -522,7 +520,7 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
  */
 static int
 do_get_write_access(handle_t *handle, struct journal_head *jh,
-                       int force_copy, int *credits) 
+                       int force_copy)
 {
        struct buffer_head *bh;
        transaction_t *transaction;
@@ -574,9 +572,14 @@ repeat:
                        if (jh->b_next_transaction)
                                J_ASSERT_JH(jh, jh->b_next_transaction ==
                                                        transaction);
-                       JBUFFER_TRACE(jh, "Unexpected dirty buffer");
-                       jbd_unexpected_dirty_buffer(jh);
-               }
+               }
+               /*
+                * In any case we need to clean the dirty flag and we must
+                * do it under the buffer lock to be sure we don't race
+                * with running write-out.
+                */
+               JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+               jbd_unexpected_dirty_buffer(jh);
        }
 
        unlock_buffer(bh);
@@ -604,11 +607,6 @@ repeat:
                JBUFFER_TRACE(jh, "has frozen data");
                J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
                jh->b_next_transaction = transaction;
-
-               J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
-               handle->h_buffer_credits--;
-               if (credits)
-                       (*credits)++;
                goto done;
        }
 
@@ -668,8 +666,9 @@ repeat:
                        if (!frozen_buffer) {
                                JBUFFER_TRACE(jh, "allocate memory for buffer");
                                jbd_unlock_bh_state(bh);
-                               frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
-                                                           GFP_NOFS);
+                               frozen_buffer =
+                                       jbd_slab_alloc(jh2bh(jh)->b_size,
+                                                        GFP_NOFS);
                                if (!frozen_buffer) {
                                        printk(KERN_EMERG
                                               "%s: OOM for frozen_buffer\n",
@@ -688,10 +687,6 @@ repeat:
                jh->b_next_transaction = transaction;
        }
 
-       J_ASSERT(handle->h_buffer_credits > 0);
-       handle->h_buffer_credits--;
-       if (credits)
-               (*credits)++;
 
        /*
         * Finally, if the buffer is not journaled right now, we need to make
@@ -731,8 +726,8 @@ done:
        journal_cancel_revoke(handle, jh);
 
 out:
-       if (frozen_buffer)
-               kfree(frozen_buffer);
+       if (unlikely(frozen_buffer))    /* It's usually NULL */
+               jbd_slab_free(frozen_buffer, bh->b_size);
 
        JBUFFER_TRACE(jh, "exit");
        return error;
@@ -742,6 +737,7 @@ out:
  * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
  * @handle: transaction to add buffer modifications to
  * @bh:     bh to be used for metadata writes
+ * @credits: variable that will receive credits for the buffer
  *
  * Returns an error code or 0 on success.
  *
@@ -749,8 +745,7 @@ out:
  * because we're write()ing a buffer which is also part of a shared mapping.
  */
 
-int journal_get_write_access(handle_t *handle,
-                       struct buffer_head *bh, int *credits)
+int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
 {
        struct journal_head *jh = journal_add_journal_head(bh);
        int rc;
@@ -758,7 +753,7 @@ int journal_get_write_access(handle_t *handle,
        /* We do not want to get caught playing with fields which the
         * log thread also manipulates.  Make sure that the buffer
         * completes any outstanding IO before proceeding. */
-       rc = do_get_write_access(handle, jh, 0, credits);
+       rc = do_get_write_access(handle, jh, 0);
        journal_put_journal_head(jh);
        return rc;
 }
@@ -814,9 +809,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
        J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
        J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
 
-       J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
-       handle->h_buffer_credits--;
-
        if (jh->b_transaction == NULL) {
                jh->b_transaction = transaction;
                JBUFFER_TRACE(jh, "file as BJ_Reserved");
@@ -869,8 +861,7 @@ out:
  *
  * Returns error number or 0 on success.
  */
-int journal_get_undo_access(handle_t *handle, struct buffer_head *bh,
-                               int *credits)
+int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 {
        int err;
        struct journal_head *jh = journal_add_journal_head(bh);
@@ -883,13 +874,13 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh,
         * make sure that obtaining the committed_data is done
         * atomically wrt. completion of any outstanding commits.
         */
-       err = do_get_write_access(handle, jh, 1, credits);
+       err = do_get_write_access(handle, jh, 1);
        if (err)
                goto out;
 
 repeat:
        if (!jh->b_committed_data) {
-               committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS);
+               committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
                if (!committed_data) {
                        printk(KERN_EMERG "%s: No memory for committed data\n",
                                __FUNCTION__);
@@ -915,8 +906,8 @@ repeat:
        jbd_unlock_bh_state(bh);
 out:
        journal_put_journal_head(jh);
-       if (committed_data)
-               kfree(committed_data);
+       if (unlikely(committed_data))
+               jbd_slab_free(committed_data, bh->b_size);
        return err;
 }
 
@@ -976,6 +967,14 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
         */
        jbd_lock_bh_state(bh);
        spin_lock(&journal->j_list_lock);
+
+       /* Now that we have bh_state locked, are we really still mapped? */
+       if (!buffer_mapped(bh)) {
+               JBUFFER_TRACE(jh, "unmapped, bailing out");
+               // printk("caught an unmapped buffer\n");
+               goto no_journal;
+       }
+               
        if (jh->b_transaction) {
                JBUFFER_TRACE(jh, "has transaction");
                if (jh->b_transaction != handle->h_transaction) {
@@ -1037,6 +1036,11 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                                sync_dirty_buffer(bh);
                                jbd_lock_bh_state(bh);
                                spin_lock(&journal->j_list_lock);
+                               /* Since we dropped the lock... */
+                               if (!buffer_mapped(bh)) {
+                                       JBUFFER_TRACE(jh, "Got unmapped");
+                                       goto no_journal;
+                               }
                                /* The buffer may become locked again at any
                                   time if it is redirtied */
                        }
@@ -1044,7 +1048,12 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                        /* journal_clean_data_list() may have got there first */
                        if (jh->b_transaction != NULL) {
                                JBUFFER_TRACE(jh, "unfile from commit");
-                               __journal_unfile_buffer(jh);
+                               __journal_temp_unlink_buffer(jh);
+                               /* It still points to the committing
+                                * transaction; move it to this one so
+                                * that the refile assert checks are
+                                * happy. */
+                               jh->b_transaction = handle->h_transaction;
                        }
                        /* The buffer will be refiled below */
 
@@ -1058,7 +1067,8 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
                        JBUFFER_TRACE(jh, "not on correct data list: unfile");
                        J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
-                       __journal_unfile_buffer(jh);
+                       __journal_temp_unlink_buffer(jh);
+                       jh->b_transaction = handle->h_transaction;
                        JBUFFER_TRACE(jh, "file as data");
                        __journal_file_buffer(jh, handle->h_transaction,
                                                BJ_SyncData);
@@ -1111,6 +1121,17 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 
        jbd_lock_bh_state(bh);
 
+       if (jh->b_modified == 0) {
+               /*
+                * This buffer's got modified and becoming part
+                * of the transaction. This needs to be done
+                * once a transaction -bzzz
+                */
+               jh->b_modified = 1;
+               J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+               handle->h_buffer_credits--;
+       }
+
        /*
         * fastpath, to avoid expensive locking.  If this buffer is already
         * on the running transaction's metadata list there is nothing to do.
@@ -1161,24 +1182,11 @@ out:
  * journal_release_buffer: undo a get_write_access without any buffer
  * updates, if the update decided in the end that it didn't need access.
  *
- * The caller passes in the number of credits which should be put back for
- * this buffer (zero or one).
- *
- * We leave the buffer attached to t_reserved_list because even though this
- * handle doesn't want it, some other concurrent handle may want to journal
- * this buffer.  If that handle is curently in between get_write_access() and
- * journal_dirty_metadata() then it expects the buffer to be reserved.  If
- * we were to rip it off t_reserved_list here, the other handle will explode
- * when journal_dirty_metadata is presented with a non-reserved buffer.
- *
- * If nobody really wants to journal this buffer then it will be thrown
- * away at the start of commit.
  */
 void
-journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits)
+journal_release_buffer(handle_t *handle, struct buffer_head *bh)
 {
        BUFFER_TRACE(bh, "entry");
-       handle->h_buffer_credits += credits;
 }
 
 /** 
@@ -1203,8 +1211,9 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
        struct journal_head *jh;
+       int drop_reserve = 0;
        int err = 0;
-       
+
        BUFFER_TRACE(bh, "entry");
 
        jbd_lock_bh_state(bh);
@@ -1222,6 +1231,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
                goto not_jbd;
        }
 
+       /*
+        * The buffer's going from the transaction, we must drop
+        * all references -bzzz
+        */
+       jh->b_modified = 0;
+
        if (jh->b_transaction == handle->h_transaction) {
                J_ASSERT_JH(jh, !jh->b_frozen_data);
 
@@ -1233,7 +1248,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 
                JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
 
-               __journal_unfile_buffer(jh);
+               drop_reserve = 1;
 
                /* 
                 * We are no longer going to journal this buffer.
@@ -1248,15 +1263,17 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
                 */
 
                if (jh->b_cp_transaction) {
+                       __journal_temp_unlink_buffer(jh);
                        __journal_file_buffer(jh, transaction, BJ_Forget);
                } else {
+                       __journal_unfile_buffer(jh);
                        journal_remove_journal_head(bh);
                        __brelse(bh);
                        if (!buffer_jbd(bh)) {
                                spin_unlock(&journal->j_list_lock);
                                jbd_unlock_bh_state(bh);
                                __bforget(bh);
-                               return 0;
+                               goto drop;
                        }
                }
        } else if (jh->b_transaction) {
@@ -1271,6 +1288,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
                if (jh->b_next_transaction) {
                        J_ASSERT(jh->b_next_transaction == transaction);
                        jh->b_next_transaction = NULL;
+                       drop_reserve = 1;
                }
        }
 
@@ -1278,6 +1296,11 @@ not_jbd:
        spin_unlock(&journal->j_list_lock);
        jbd_unlock_bh_state(bh);
        __brelse(bh);
+drop:
+       if (drop_reserve) {
+               /* no need to reserve log space for this block -bzzz */
+               handle->h_buffer_credits++;
+       }
        return err;
 }
 
@@ -1302,6 +1325,7 @@ int journal_stop(handle_t *handle)
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
        int old_handle_count, err;
+       pid_t pid;
 
        J_ASSERT(transaction->t_updates > 0);
        J_ASSERT(journal_current_handle() == handle);
@@ -1327,12 +1351,18 @@ int journal_stop(handle_t *handle)
         * It doesn't cost much - we're about to run a commit and sleep
         * on IO anyway.  Speeds up many-threaded, many-dir operations
         * by 30x or more...
+        *
+        * But don't do this if this process was the most recent one to
+        * perform a synchronous write.  We do this to detect the case where a
+        * single process is doing a stream of sync writes.  No point in waiting
+        * for joiners in that case.
         */
-       if (handle->h_sync) {
+       pid = current->pid;
+       if (handle->h_sync && journal->j_last_sync_writer != pid) {
+               journal->j_last_sync_writer = pid;
                do {
                        old_handle_count = transaction->t_handle_count;
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(1);
+                       schedule_timeout_uninterruptible(1);
                } while (old_handle_count != transaction->t_handle_count);
        }
 
@@ -1469,7 +1499,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
  *
  * Called under j_list_lock.  The journal may not be locked.
  */
-void __journal_unfile_buffer(struct journal_head *jh)
+void __journal_temp_unlink_buffer(struct journal_head *jh)
 {
        struct journal_head **list = NULL;
        transaction_t *transaction;
@@ -1486,7 +1516,7 @@ void __journal_unfile_buffer(struct journal_head *jh)
 
        switch (jh->b_jlist) {
        case BJ_None:
-               goto out;
+               return;
        case BJ_SyncData:
                list = &transaction->t_sync_datalist;
                break;
@@ -1519,7 +1549,11 @@ void __journal_unfile_buffer(struct journal_head *jh)
        jh->b_jlist = BJ_None;
        if (test_clear_buffer_jbddirty(bh))
                mark_buffer_dirty(bh);  /* Expose it to the VM */
-out:
+}
+
+void __journal_unfile_buffer(struct journal_head *jh)
+{
+       __journal_temp_unlink_buffer(jh);
        jh->b_transaction = NULL;
 }
 
@@ -1578,7 +1612,7 @@ out:
  * int journal_try_to_free_buffers() - try to free page buffers.
  * @journal: journal for operation
  * @page: to try and free
- * @gfp_mask: 'IO' mode for try_to_free_buffers()
+ * @unused_gfp_mask: unused
  *
  * 
  * For all the buffers on this page,
@@ -1609,7 +1643,7 @@ out:
  * while the data is part of a transaction.  Yes?
  */
 int journal_try_to_free_buffers(journal_t *journal, 
-                               struct page *page, int unused_gfp_mask)
+                               struct page *page, gfp_t unused_gfp_mask)
 {
        struct buffer_head *head;
        struct buffer_head *bh;
@@ -1730,6 +1764,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
        int ret;
 
        BUFFER_TRACE(bh, "entry");
+       //J_ASSERT_BH(bh, PageLocked(bh->b_page));
 
        /*
         * It is safe to proceed here without the j_list_lock because the
@@ -1775,10 +1810,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                        JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
                        ret = __dispose_buffer(jh,
                                        journal->j_running_transaction);
+                       journal_put_journal_head(jh);
                        spin_unlock(&journal->j_list_lock);
                        jbd_unlock_bh_state(bh);
                        spin_unlock(&journal->j_state_lock);
-                       journal_put_journal_head(jh);
                        return ret;
                } else {
                        /* There is no currently-running transaction. So the
@@ -1789,10 +1824,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                                JBUFFER_TRACE(jh, "give to committing trans");
                                ret = __dispose_buffer(jh,
                                        journal->j_committing_transaction);
+                               journal_put_journal_head(jh);
                                spin_unlock(&journal->j_list_lock);
                                jbd_unlock_bh_state(bh);
                                spin_unlock(&journal->j_state_lock);
-                               journal_put_journal_head(jh);
                                return ret;
                        } else {
                                /* The orphan record's transaction has
@@ -1802,7 +1837,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                        }
                }
        } else if (transaction == journal->j_committing_transaction) {
-               /* If it is committing, we simply cannot touch it.  We
+               if (jh->b_jlist == BJ_Locked) {
+                       JBUFFER_TRACE(jh, "on committing BJ_Locked");
+                       /*
+                        * The buffer is on the committing transaction's locked
+                        * list.  We have the buffer locked, so I/O has
+                        * completed.  So we can nail the buffer now.
+                        */
+                       may_free = __dispose_buffer(jh, transaction);
+                       goto zap_buffer;
+               }
+               /*
+                * If it is committing, we simply cannot touch it.  We
                 * can remove it's next_transaction pointer from the
                 * running transaction if that is set, but nothing
                 * else. */
@@ -1813,10 +1859,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                                        journal->j_running_transaction);
                        jh->b_next_transaction = NULL;
                }
+               journal_put_journal_head(jh);
                spin_unlock(&journal->j_list_lock);
                jbd_unlock_bh_state(bh);
                spin_unlock(&journal->j_state_lock);
-               journal_put_journal_head(jh);
                return 0;
        } else {
                /* Good, the buffer belongs to the running transaction.
@@ -1826,6 +1872,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                 * i_size already for this truncate so recovery will not
                 * expose the disk blocks we are discarding here.) */
                J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+               JBUFFER_TRACE(jh, "on running transaction");
                may_free = __dispose_buffer(jh, transaction);
        }
 
@@ -1846,16 +1893,15 @@ zap_buffer_unlocked:
 }
 
 /** 
- * int journal_invalidatepage() 
+ * void journal_invalidatepage()
  * @journal: journal to use for flush... 
  * @page:    page to flush
  * @offset:  length of page to invalidate.
  *
  * Reap page buffers containing data after offset in page.
  *
- * Return non-zero if the page's buffers were successfully reaped.
  */
-int journal_invalidatepage(journal_t *journal, 
+void journal_invalidatepage(journal_t *journal,
                      struct page *page, 
                      unsigned long offset)
 {
@@ -1866,7 +1912,7 @@ int journal_invalidatepage(journal_t *journal,
        if (!PageLocked(page))
                BUG();
        if (!page_has_buffers(page))
-               return 1;
+               return;
 
        /* We will potentially be playing with lists other than just the
         * data lists (especially for journaled data mode), so be
@@ -1877,7 +1923,6 @@ int journal_invalidatepage(journal_t *journal,
                unsigned int next_off = curr_off + bh->b_size;
                next = bh->b_this_page;
 
-               /* AKPM: doing lock_buffer here may be overly paranoid */
                if (offset <= curr_off) {
                        /* This block is wholly outside the truncation point */
                        lock_buffer(bh);
@@ -1890,11 +1935,9 @@ int journal_invalidatepage(journal_t *journal,
        } while (bh != head);
 
        if (!offset) {
-               if (!may_free || !try_to_free_buffers(page))
-                       return 0;
-               J_ASSERT(!page_has_buffers(page));
+               if (may_free && try_to_free_buffers(page))
+                       J_ASSERT(!page_has_buffers(page));
        }
-       return 1;
 }
 
 /* 
@@ -1929,7 +1972,7 @@ void __journal_file_buffer(struct journal_head *jh,
        }
 
        if (jh->b_transaction)
-               __journal_unfile_buffer(jh);
+               __journal_temp_unlink_buffer(jh);
        jh->b_transaction = transaction;
 
        switch (jlist) {
@@ -2012,10 +2055,11 @@ void __journal_refile_buffer(struct journal_head *jh)
         */
 
        was_dirty = test_clear_buffer_jbddirty(bh);
-       __journal_unfile_buffer(jh);
+       __journal_temp_unlink_buffer(jh);
        jh->b_transaction = jh->b_next_transaction;
        jh->b_next_transaction = NULL;
-       __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata);
+       __journal_file_buffer(jh, jh->b_transaction,
+                               was_dirty ? BJ_Metadata : BJ_Reserved);
        J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
 
        if (was_dirty)