vserver 1.9.5.x5

[linux-2.6.git] / fs / jbd / commit.c
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c

index 1c029b2..aa5f224 100644 (file)
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -262,7 +262,7 @@ write_out_data:
                         __journal_file_buffer(jh, commit_transaction,
                                                 BJ_Locked);
                         jbd_unlock_bh_state(bh);
-                       if (need_resched()) {
+                       if (lock_need_resched(&journal->j_list_lock)) {
                                 spin_unlock(&journal->j_list_lock);
                                 goto write_out_data;
                         }
@@ -288,7 +288,7 @@ write_out_data:
                                 jbd_unlock_bh_state(bh);
                                 journal_remove_journal_head(bh);
                                 put_bh(bh);
-                               if (need_resched()) {
+                               if (lock_need_resched(&journal->j_list_lock)) {
                                         spin_unlock(&journal->j_list_lock);
                                         goto write_out_data;
                                 }
@@ -333,14 +333,13 @@ write_out_data:
                         jbd_unlock_bh_state(bh);
                 }
                 put_bh(bh);
-               if (need_resched()) {
-                       spin_unlock(&journal->j_list_lock);
-                       cond_resched();
-                       spin_lock(&journal->j_list_lock);
-               }
+               cond_resched_lock(&journal->j_list_lock);
         }
         spin_unlock(&journal->j_list_lock);
  
+       if (err)
+               __journal_abort_hard(journal);
+
         journal_write_revoke_records(journal, commit_transaction);
  
         jbd_debug(3, "JBD: commit phase 2\n");
@@ -362,7 +361,7 @@ write_out_data:
          */
         commit_transaction->t_state = T_COMMIT;
  
-       descriptor = 0;
+       descriptor = NULL;
         bufs = 0;
         while (commit_transaction->t_buffers) {
  
@@ -405,14 +404,15 @@ write_out_data:
                         jbd_debug(4, "JBD: got buffer %llu (%p)\n",
                                 (unsigned long long)bh->b_blocknr, bh->b_data);
                         header = (journal_header_t *)&bh->b_data[0];
-                       header->h_magic     = htonl(JFS_MAGIC_NUMBER);
-                       header->h_blocktype = htonl(JFS_DESCRIPTOR_BLOCK);
-                       header->h_sequence  = htonl(commit_transaction->t_tid);
+                       header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
+                       header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
+                       header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
  
                         tagp = &bh->b_data[sizeof(journal_header_t)];
                         space_left = bh->b_size - sizeof(journal_header_t);
                         first_tag = 1;
-                       set_bit(BH_JWrite, &bh->b_state);
+                       set_buffer_jwrite(bh);
+                       set_buffer_dirty(bh);
                         wbuf[bufs++] = bh;
  
                         /* Record it so that we can wait for IO
@@ -472,8 +472,8 @@ write_out_data:
                         tag_flag |= JFS_FLAG_SAME_UUID;
  
                 tag = (journal_block_tag_t *) tagp;
-               tag->t_blocknr = htonl(jh2bh(jh)->b_blocknr);
-               tag->t_flags = htonl(tag_flag);
+               tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
+               tag->t_flags = cpu_to_be32(tag_flag);
                 tagp += sizeof(journal_block_tag_t);
                 space_left -= sizeof(journal_block_tag_t);
  
@@ -497,12 +497,12 @@ write_out_data:
                             submitting the IOs.  "tag" still points to
                             the last tag we set up. */
  
-                       tag->t_flags |= htonl(JFS_FLAG_LAST_TAG);
+                       tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
  
  start_journal_io:
                         for (i = 0; i < bufs; i++) {
                                 struct buffer_head *bh = wbuf[i];
-                               set_buffer_locked(bh);
+                               lock_buffer(bh);
                                 clear_buffer_dirty(bh);
                                 set_buffer_uptodate(bh);
                                 bh->b_end_io = journal_end_buffer_io_sync;
@@ -544,6 +544,8 @@ wait_for_iobuf:
                         wait_on_buffer(bh);
                         goto wait_for_iobuf;
                 }
+               if (cond_resched())
+                       goto wait_for_iobuf;
  
                 if (unlikely(!buffer_uptodate(bh)))
                         err = -EIO;
@@ -578,7 +580,7 @@ wait_for_iobuf:
                 journal_file_buffer(jh, commit_transaction, BJ_Forget);
                 /* Wake up any transactions which were waiting for this
                    IO to complete */
-               wake_up_buffer(bh);
+               wake_up_bit(&bh->b_state, BH_Unshadow);
                 JBUFFER_TRACE(jh, "brelse shadowed buffer");
                 __brelse(bh);
         }
@@ -598,6 +600,8 @@ wait_for_iobuf:
                         wait_on_buffer(bh);
                         goto wait_for_ctlbuf;
                 }
+               if (cond_resched())
+                       goto wait_for_ctlbuf;
  
                 if (unlikely(!buffer_uptodate(bh)))
                         err = -EIO;
@@ -630,17 +634,46 @@ wait_for_iobuf:
         for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
                 journal_header_t *tmp =
                         (journal_header_t*)jh2bh(descriptor)->b_data;
-               tmp->h_magic = htonl(JFS_MAGIC_NUMBER);
-               tmp->h_blocktype = htonl(JFS_COMMIT_BLOCK);
-               tmp->h_sequence = htonl(commit_transaction->t_tid);
+               tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
+               tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+               tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
         }
  
         JBUFFER_TRACE(descriptor, "write commit block");
         {
                 struct buffer_head *bh = jh2bh(descriptor);
-               set_buffer_uptodate(bh);
-               sync_dirty_buffer(bh);
-               if (unlikely(!buffer_uptodate(bh)))
+               int ret;
+               int barrier_done = 0;
+
+               set_buffer_dirty(bh);
+               if (journal->j_flags & JFS_BARRIER) {
+                       set_buffer_ordered(bh);
+                       barrier_done = 1;
+               }
+               ret = sync_dirty_buffer(bh);
+               /* is it possible for another commit to fail at roughly
+                * the same time as this one?  If so, we don't want to
+                * trust the barrier flag in the super, but instead want
+                * to remember if we sent a barrier request
+                */
+               if (ret == -EOPNOTSUPP && barrier_done) {
+                       char b[BDEVNAME_SIZE];
+
+                       printk(KERN_WARNING
+                               "JBD: barrier-based sync failed on %s - "
+                               "disabling barriers\n",
+                               bdevname(journal->j_dev, b));
+                       spin_lock(&journal->j_state_lock);
+                       journal->j_flags &= ~JFS_BARRIER;
+                       spin_unlock(&journal->j_state_lock);
+
+                       /* And try again, without the barrier */
+                       clear_buffer_ordered(bh);
+                       set_buffer_uptodate(bh);
+                       set_buffer_dirty(bh);
+                       ret = sync_dirty_buffer(bh);
+               }
+               if (unlikely(ret == -EIO))
                         err = -EIO;
                 put_bh(bh);             /* One for getblk() */
                 journal_put_journal_head(descriptor);
@@ -656,30 +689,6 @@ skip_commit: /* The journal should be unlocked by now. */
         if (err)
                 __journal_abort_hard(journal);
  
-       /*
-        * Call any callbacks that had been registered for handles in this
-        * transaction.  It is up to the callback to free any allocated
-        * memory.
-        *
-        * The spinlocking (t_jcb_lock) here is surely unnecessary...
-        */
-       spin_lock(&commit_transaction->t_jcb_lock);
-       if (!list_empty(&commit_transaction->t_jcb)) {
-               struct list_head *p, *n;
-               int error = is_journal_aborted(journal);
-
-               list_for_each_safe(p, n, &commit_transaction->t_jcb) {
-                       struct journal_callback *jcb;
-
-                       jcb = list_entry(p, struct journal_callback, jcb_list);
-                       list_del(p);
-                       spin_unlock(&commit_transaction->t_jcb_lock);
-                       jcb->jcb_func(jcb, error);
-                       spin_lock(&commit_transaction->t_jcb_lock);
-               }
-       }
-       spin_unlock(&commit_transaction->t_jcb_lock);
-
         jbd_debug(3, "JBD: commit phase 7\n");
  
         J_ASSERT(commit_transaction->t_sync_datalist == NULL);
@@ -689,6 +698,7 @@ skip_commit: /* The journal should be unlocked by now. */
         J_ASSERT(commit_transaction->t_shadow_list == NULL);
         J_ASSERT(commit_transaction->t_log_list == NULL);
  
+restart_loop:
         while (commit_transaction->t_forget) {
                 transaction_t *cp_transaction;
                 struct buffer_head *bh;
@@ -762,6 +772,8 @@ skip_commit: /* The journal should be unlocked by now. */
                         release_buffer_page(bh);
                 }
                 spin_unlock(&journal->j_list_lock);
+               if (cond_resched())
+                       goto restart_loop;
         }
  
         /* Done with this transaction! */