vserver 1.9.5.x5
[linux-2.6.git] / fs / jbd / commit.c
index 1c029b2..aa5f224 100644 (file)
@@ -262,7 +262,7 @@ write_out_data:
                        __journal_file_buffer(jh, commit_transaction,
                                                BJ_Locked);
                        jbd_unlock_bh_state(bh);
-                       if (need_resched()) {
+                       if (lock_need_resched(&journal->j_list_lock)) {
                                spin_unlock(&journal->j_list_lock);
                                goto write_out_data;
                        }
@@ -288,7 +288,7 @@ write_out_data:
                                jbd_unlock_bh_state(bh);
                                journal_remove_journal_head(bh);
                                put_bh(bh);
-                               if (need_resched()) {
+                               if (lock_need_resched(&journal->j_list_lock)) {
                                        spin_unlock(&journal->j_list_lock);
                                        goto write_out_data;
                                }
@@ -333,14 +333,13 @@ write_out_data:
                        jbd_unlock_bh_state(bh);
                }
                put_bh(bh);
-               if (need_resched()) {
-                       spin_unlock(&journal->j_list_lock);
-                       cond_resched();
-                       spin_lock(&journal->j_list_lock);
-               }
+               cond_resched_lock(&journal->j_list_lock);
        }
        spin_unlock(&journal->j_list_lock);
 
+       if (err)
+               __journal_abort_hard(journal);
+
        journal_write_revoke_records(journal, commit_transaction);
 
        jbd_debug(3, "JBD: commit phase 2\n");
@@ -362,7 +361,7 @@ write_out_data:
         */
        commit_transaction->t_state = T_COMMIT;
 
-       descriptor = 0;
+       descriptor = NULL;
        bufs = 0;
        while (commit_transaction->t_buffers) {
 
@@ -405,14 +404,15 @@ write_out_data:
                        jbd_debug(4, "JBD: got buffer %llu (%p)\n",
                                (unsigned long long)bh->b_blocknr, bh->b_data);
                        header = (journal_header_t *)&bh->b_data[0];
-                       header->h_magic     = htonl(JFS_MAGIC_NUMBER);
-                       header->h_blocktype = htonl(JFS_DESCRIPTOR_BLOCK);
-                       header->h_sequence  = htonl(commit_transaction->t_tid);
+                       header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
+                       header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
+                       header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
 
                        tagp = &bh->b_data[sizeof(journal_header_t)];
                        space_left = bh->b_size - sizeof(journal_header_t);
                        first_tag = 1;
-                       set_bit(BH_JWrite, &bh->b_state);
+                       set_buffer_jwrite(bh);
+                       set_buffer_dirty(bh);
                        wbuf[bufs++] = bh;
 
                        /* Record it so that we can wait for IO
@@ -472,8 +472,8 @@ write_out_data:
                        tag_flag |= JFS_FLAG_SAME_UUID;
 
                tag = (journal_block_tag_t *) tagp;
-               tag->t_blocknr = htonl(jh2bh(jh)->b_blocknr);
-               tag->t_flags = htonl(tag_flag);
+               tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
+               tag->t_flags = cpu_to_be32(tag_flag);
                tagp += sizeof(journal_block_tag_t);
                space_left -= sizeof(journal_block_tag_t);
 
@@ -497,12 +497,12 @@ write_out_data:
                            submitting the IOs.  "tag" still points to
                            the last tag we set up. */
 
-                       tag->t_flags |= htonl(JFS_FLAG_LAST_TAG);
+                       tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
 
 start_journal_io:
                        for (i = 0; i < bufs; i++) {
                                struct buffer_head *bh = wbuf[i];
-                               set_buffer_locked(bh);
+                               lock_buffer(bh);
                                clear_buffer_dirty(bh);
                                set_buffer_uptodate(bh);
                                bh->b_end_io = journal_end_buffer_io_sync;
@@ -544,6 +544,8 @@ wait_for_iobuf:
                        wait_on_buffer(bh);
                        goto wait_for_iobuf;
                }
+               if (cond_resched())
+                       goto wait_for_iobuf;
 
                if (unlikely(!buffer_uptodate(bh)))
                        err = -EIO;
@@ -578,7 +580,7 @@ wait_for_iobuf:
                journal_file_buffer(jh, commit_transaction, BJ_Forget);
                /* Wake up any transactions which were waiting for this
                   IO to complete */
-               wake_up_buffer(bh);
+               wake_up_bit(&bh->b_state, BH_Unshadow);
                JBUFFER_TRACE(jh, "brelse shadowed buffer");
                __brelse(bh);
        }
@@ -598,6 +600,8 @@ wait_for_iobuf:
                        wait_on_buffer(bh);
                        goto wait_for_ctlbuf;
                }
+               if (cond_resched())
+                       goto wait_for_ctlbuf;
 
                if (unlikely(!buffer_uptodate(bh)))
                        err = -EIO;
@@ -630,17 +634,46 @@ wait_for_iobuf:
        for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
                journal_header_t *tmp =
                        (journal_header_t*)jh2bh(descriptor)->b_data;
-               tmp->h_magic = htonl(JFS_MAGIC_NUMBER);
-               tmp->h_blocktype = htonl(JFS_COMMIT_BLOCK);
-               tmp->h_sequence = htonl(commit_transaction->t_tid);
+               tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
+               tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+               tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
        }
 
        JBUFFER_TRACE(descriptor, "write commit block");
        {
                struct buffer_head *bh = jh2bh(descriptor);
-               set_buffer_uptodate(bh);
-               sync_dirty_buffer(bh);
-               if (unlikely(!buffer_uptodate(bh)))
+               int ret;
+               int barrier_done = 0;
+
+               set_buffer_dirty(bh);
+               if (journal->j_flags & JFS_BARRIER) {
+                       set_buffer_ordered(bh);
+                       barrier_done = 1;
+               }
+               ret = sync_dirty_buffer(bh);
+               /* is it possible for another commit to fail at roughly
+                * the same time as this one?  If so, we don't want to
+                * trust the barrier flag in the super, but instead want
+                * to remember if we sent a barrier request
+                */
+               if (ret == -EOPNOTSUPP && barrier_done) {
+                       char b[BDEVNAME_SIZE];
+
+                       printk(KERN_WARNING
+                               "JBD: barrier-based sync failed on %s - "
+                               "disabling barriers\n",
+                               bdevname(journal->j_dev, b));
+                       spin_lock(&journal->j_state_lock);
+                       journal->j_flags &= ~JFS_BARRIER;
+                       spin_unlock(&journal->j_state_lock);
+
+                       /* And try again, without the barrier */
+                       clear_buffer_ordered(bh);
+                       set_buffer_uptodate(bh);
+                       set_buffer_dirty(bh);
+                       ret = sync_dirty_buffer(bh);
+               }
+               if (unlikely(ret == -EIO))
                        err = -EIO;
                put_bh(bh);             /* One for getblk() */
                journal_put_journal_head(descriptor);
@@ -656,30 +689,6 @@ skip_commit: /* The journal should be unlocked by now. */
        if (err)
                __journal_abort_hard(journal);
 
-       /*
-        * Call any callbacks that had been registered for handles in this
-        * transaction.  It is up to the callback to free any allocated
-        * memory.
-        *
-        * The spinlocking (t_jcb_lock) here is surely unnecessary...
-        */
-       spin_lock(&commit_transaction->t_jcb_lock);
-       if (!list_empty(&commit_transaction->t_jcb)) {
-               struct list_head *p, *n;
-               int error = is_journal_aborted(journal);
-
-               list_for_each_safe(p, n, &commit_transaction->t_jcb) {
-                       struct journal_callback *jcb;
-
-                       jcb = list_entry(p, struct journal_callback, jcb_list);
-                       list_del(p);
-                       spin_unlock(&commit_transaction->t_jcb_lock);
-                       jcb->jcb_func(jcb, error);
-                       spin_lock(&commit_transaction->t_jcb_lock);
-               }
-       }
-       spin_unlock(&commit_transaction->t_jcb_lock);
-
        jbd_debug(3, "JBD: commit phase 7\n");
 
        J_ASSERT(commit_transaction->t_sync_datalist == NULL);
@@ -689,6 +698,7 @@ skip_commit: /* The journal should be unlocked by now. */
        J_ASSERT(commit_transaction->t_shadow_list == NULL);
        J_ASSERT(commit_transaction->t_log_list == NULL);
 
+restart_loop:
        while (commit_transaction->t_forget) {
                transaction_t *cp_transaction;
                struct buffer_head *bh;
@@ -762,6 +772,8 @@ skip_commit: /* The journal should be unlocked by now. */
                        release_buffer_page(bh);
                }
                spin_unlock(&journal->j_list_lock);
+               if (cond_resched())
+                       goto restart_loop;
        }
 
        /* Done with this transaction! */