From 972baef97e68e5446115107c96d56c5967940376 Mon Sep 17 00:00:00 2001
From: =?utf8?q?S=2E=C3=87a=C4=9Flar=20Onur?= <caglar@cs.princeton.edu>
Date: Fri, 26 Mar 2010 18:52:07 +0000
Subject: [PATCH] This patch backports following upstream commits in order to
 solve some of the EXT3 FS related problems;

* orphan_list_check_on_destroy_inode.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/56ccd891a32e6409700786737953906426512ff7
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=a6c15c2b0fbfd5c0a84f5f0e1e3f20f85d2b8692

* don-t-read-inode-block-if-buf-has-write-error.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/1e7d951fa2bd7d49121aff0ec7eb0331e3d5eeb5
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=95450f5a7e53d5752ce1a0d0b8282e10fe745ae0

* error-in-ext3_lookup-if-corruption-found.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/f1d52243f69747ee601d671ec1b98a7363ce0597
Filesystem errors should be logged and not silently ignored

* fix-accessing-freed-memory-in-ext3_abort.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/f863ab8749fca8d167b357357ed7492c1c6d530d
http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.32.y.git;a=commitdiff_plain;h=44d6f78756560e95903de239e10f8a40a6eae444

* make_fdatasync_not_sync_metadata.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/1d29962ae5187764989ede9ec0d0777d2f489345
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=3d61f75eefedf75914ab4453c67aaa2ee64bcf93

* add_checks_for_errors_from_jbd.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/f38c319731f010b1c36c25ed591f79bcb557d2f2
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=2d7c820e56ce83b23daee9eb5343730fb309418e

* add_missing_error_checks.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/8f0e6faf83721162a77cb5df5c483e4799bea22b
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=cbe5f466f6995e10a10c7ae66d6dc8608f08a6b8

The original patch does the following two things:

  (1) stop aborting the journal on file data write errors, instead
      just call printk() and set AS_EIO to appropriate address_space
      objects
  (2) add missing error checks for file data writes

This patch does only (2).

* dont_dirty_original_metadata_buffer_on_abort.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/d003fa1a88c857795ca7e102eefbf26c9088aa66
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=7ad7445f60fe4d46c4c9d2a9463db180d2a3b270

* fix_commit_code_to_properly_abort_journal.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/232632e87d9bc83b89d97f98f311d67d45e0e6dd
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=7a266e75cf5a1efd20d084408a1b7f1a185496dd

* fix_journal_overflow_issues.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/74361d7e55f28847e3b7eda4a4563d02ab001537
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=5bc833feaa8b2236265764e7e81f44937be46eda
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=5b9a499d77e9dd39c9e6611ea10c56a31604f274

* fix_typo_in_recovery_code.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/da999401f4bedf317b2e7dcd3c9163b1a433ba3c
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=439aeec639d7c57f3561054a6d315c40fd24bb74

* jbd-properly-dispose-of-unmapped-data-buffers.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/841d34d702702c85c7b8cc31a185e48ce3ca0a8e
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=fc80c44277b3c92d808b73e9d40e120229aa4b6a

* jdb-abort-when-failed-to-log-metadata-buffers.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/7de4ddac8020dcb2078b7237650e972ecfd112cf
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=44519faf22ad6ce924ad0352d3dc200d9e0b66e8

* fix-assertion-failure-in-fs-jbd-checkpoint.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/268ff9d67ed3269e5d84914aabd30d06ee89f563
http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.32.y.git;a=commitdiff_plain;h=d4beaf4ab5f89496f2bcf67db62ad95d99bfeff6

* fix-error-handling-for-checkpoint-io.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/e1ef6b77a95b8e02255dfa02fef06e2231e92645
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=4afe978530702c934dfdb11f54073136818b2119

* jbd-test-BH_Write_EIO-to-detect-errors-on-metadata.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/1a8ede62acc03d3b2baa98d02189685a4e30044f
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=9f818b4ac04f53458d0354950b4f229f54be4dbf

* handle-corrupted-orphan-list-at-mount.patch - http://github.com/caglar10ur/linux-2.6.27.y/commit/6003003452a5faaa0b2d1deb6356ebf8d4e2fe3f
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=ae76dd9a6b5bbe5315fb7028e03f68f75b8538f3

 fs/ext3/fsync.c         |    3 +
 fs/ext3/ialloc.c        |   69 ++++++++++++++++++++++++--------------
 fs/ext3/inode.c         |   30 +++++++++++++---
 fs/ext3/ioctl.c         |   12 ++++--
 fs/ext3/namei.c         |   15 ++++++++
 fs/ext3/super.c         |   22 +++++++++---
 fs/jbd/checkpoint.c     |   61 +++++++++++++++++++++++----------
 fs/jbd/commit.c         |   87 ++++++++++++++++++++++++++----------------------
 fs/jbd/journal.c        |   28 ++++++++++++---
 fs/jbd/recovery.c       |    9 +++-
 fs/jbd/transaction.c    |   33 ++++++++++++++++--
 include/linux/ext3_fs.h |    1
 include/linux/jbd.h     |    4 +-
 13 files changed, 266 insertions(+), 108 deletions(-)
---
 kernel-2.6.spec                    |    4 +
 linux-2.6.950-ext3_backports.patch | 1057 ++++++++++++++++++++++++++++
 2 files changed, 1061 insertions(+)
 create mode 100644 linux-2.6.950-ext3_backports.patch

diff --git a/kernel-2.6.spec b/kernel-2.6.spec
index e93c0cc9e..fe922bd43 100644
--- a/kernel-2.6.spec
+++ b/kernel-2.6.spec
@@ -220,6 +220,8 @@ Patch810: linux-2.6-810-ich10.patch
 Patch900: linux-2.6-900-ext3_mount_default_to_barrier.patch
 Patch910: linux-2.6-910-support_barriers_on_single_device_dm_devices.patch
 
+Patch950: linux-2.6.950-ext3_backports.patch
+
 # See also the file named 'sources' here for the related checksums
 # NOTE. iwlwifi should be in-kernel starting from 2.6.24
 # see http://bughost.org/bugzilla/show_bug.cgi?id=1584
@@ -460,6 +462,8 @@ KERNEL_PREVIOUS=vanilla
 %ApplyPatch 900
 %ApplyPatch 910
 
+%ApplyPatch 950
+
 # NetNS conflict-resolving patch for VINI. Will work with patch vini_pl_patch-1 but may
 # break with later patches.
 
diff --git a/linux-2.6.950-ext3_backports.patch b/linux-2.6.950-ext3_backports.patch
new file mode 100644
index 000000000..51c21e700
--- /dev/null
+++ b/linux-2.6.950-ext3_backports.patch
@@ -0,0 +1,1057 @@
+* orphan_list_check_on_destroy_inode.patch
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/56ccd891a32e6409700786737953906426512ff7
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=a6c15c2b0fbfd5c0a84f5f0e1e3f20f85d2b8692
+
+* don-t-read-inode-block-if-buf-has-write-error.patch
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/1e7d951fa2bd7d49121aff0ec7eb0331e3d5eeb5
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=95450f5a7e53d5752ce1a0d0b8282e10fe745ae0
+
+* error-in-ext3_lookup-if-corruption-found.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/f1d52243f69747ee601d671ec1b98a7363ce0597
+	Filesystem errors should be logged and not silently ignored
+
+* fix-accessing-freed-memory-in-ext3_abort.patch
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/f863ab8749fca8d167b357357ed7492c1c6d530d
+	http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.32.y.git;a=commitdiff_plain;h=44d6f78756560e95903de239e10f8a40a6eae444
+
+* make_fdatasync_not_sync_metadata.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/1d29962ae5187764989ede9ec0d0777d2f489345
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=3d61f75eefedf75914ab4453c67aaa2ee64bcf93
+
+* add_checks_for_errors_from_jbd.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/f38c319731f010b1c36c25ed591f79bcb557d2f2
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=2d7c820e56ce83b23daee9eb5343730fb309418e
+
+* add_missing_error_checks.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/8f0e6faf83721162a77cb5df5c483e4799bea22b
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=cbe5f466f6995e10a10c7ae66d6dc8608f08a6b8
+
+The original patch does the following two things:
+
+  (1) stop aborting the journal on file data write errors, instead
+      just call printk() and set AS_EIO to appropriate address_space
+      objects
+  (2) add missing error checks for file data writes
+
+This patch does only (2).
+
+* dont_dirty_original_metadata_buffer_on_abort.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/d003fa1a88c857795ca7e102eefbf26c9088aa66
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=7ad7445f60fe4d46c4c9d2a9463db180d2a3b270
+
+* fix_commit_code_to_properly_abort_journal.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/232632e87d9bc83b89d97f98f311d67d45e0e6dd
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=7a266e75cf5a1efd20d084408a1b7f1a185496dd
+
+* fix_journal_overflow_issues.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/74361d7e55f28847e3b7eda4a4563d02ab001537
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=5bc833feaa8b2236265764e7e81f44937be46eda
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=5b9a499d77e9dd39c9e6611ea10c56a31604f274
+
+* fix_typo_in_recovery_code.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/da999401f4bedf317b2e7dcd3c9163b1a433ba3c
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=439aeec639d7c57f3561054a6d315c40fd24bb74
+
+* jbd-properly-dispose-of-unmapped-data-buffers.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/841d34d702702c85c7b8cc31a185e48ce3ca0a8e
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=fc80c44277b3c92d808b73e9d40e120229aa4b6a
+
+* jdb-abort-when-failed-to-log-metadata-buffers.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/7de4ddac8020dcb2078b7237650e972ecfd112cf
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=44519faf22ad6ce924ad0352d3dc200d9e0b66e8
+
+* fix-assertion-failure-in-fs-jbd-checkpoint.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/268ff9d67ed3269e5d84914aabd30d06ee89f563
+	http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.32.y.git;a=commitdiff_plain;h=d4beaf4ab5f89496f2bcf67db62ad95d99bfeff6
+
+* fix-error-handling-for-checkpoint-io.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/e1ef6b77a95b8e02255dfa02fef06e2231e92645
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=4afe978530702c934dfdb11f54073136818b2119
+
+* jbd-test-BH_Write_EIO-to-detect-errors-on-metadata.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/1a8ede62acc03d3b2baa98d02189685a4e30044f
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=9f818b4ac04f53458d0354950b4f229f54be4dbf
+
+* handle-corrupted-orphan-list-at-mount.patch 
+	http://github.com/caglar10ur/linux-2.6.27.y/commit/6003003452a5faaa0b2d1deb6356ebf8d4e2fe3f
+	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=ae76dd9a6b5bbe5315fb7028e03f68f75b8538f3
+
+
+diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
+index b0615c8..841f0f7 100644
+--- a/fs/ext3/fsync.c
++++ b/fs/ext3/fsync.c
+@@ -73,6 +73,9 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
+ 		goto out;
+ 	}
+ 
++	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
++		goto out;
++
+ 	/*
+ 	 * The VFS has written the file data.  If the inode is unaltered
+ 	 * then we need not start a commit.
+diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
+index 9885ff8..d586377 100644
+--- a/fs/ext3/ialloc.c
++++ b/fs/ext3/ialloc.c
+@@ -658,14 +658,15 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
+ 	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
+ 	unsigned long block_group;
+ 	int bit;
+-	struct buffer_head *bitmap_bh = NULL;
++	struct buffer_head *bitmap_bh;
+ 	struct inode *inode = NULL;
++	long err = -EIO;
+ 
+ 	/* Error cases - e2fsck has already cleaned up for us */
+ 	if (ino > max_ino) {
+ 		ext3_warning(sb, __FUNCTION__,
+ 			     "bad orphan ino %lu!  e2fsck was run?", ino);
+-		goto out;
++		goto error;
+ 	}
+ 
+ 	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+@@ -674,38 +675,58 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
+ 	if (!bitmap_bh) {
+ 		ext3_warning(sb, __FUNCTION__,
+ 			     "inode bitmap error for orphan %lu", ino);
+-		goto out;
++		goto error;
+ 	}
+ 
+ 	/* Having the inode bit set should be a 100% indicator that this
+ 	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
+ 	 * inodes that were being truncated, so we can't check i_nlink==0.
+ 	 */
+-	if (!ext3_test_bit(bit, bitmap_bh->b_data) ||
+-			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
+-			NEXT_ORPHAN(inode) > max_ino) {
+-		ext3_warning(sb, __FUNCTION__,
+-			     "bad orphan inode %lu!  e2fsck was run?", ino);
+-		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
+-		       bit, (unsigned long long)bitmap_bh->b_blocknr,
+-		       ext3_test_bit(bit, bitmap_bh->b_data));
+-		printk(KERN_NOTICE "inode=%p\n", inode);
+-		if (inode) {
+-			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+-			       is_bad_inode(inode));
+-			printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
+-			       NEXT_ORPHAN(inode));
+-			printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+-		}
++	if (!ext3_test_bit(bit, bitmap_bh->b_data))
++		goto bad_orphan;
++
++	inode = iget(sb, ino);
++	if (IS_ERR(inode))
++		goto iget_failed;
++
++        /*
++         * If the orphans has i_nlinks > 0 then it should be able to be
++         * truncated, otherwise it won't be removed from the orphan list
++         * during processing and an infinite loop will result.
++         */
++	if (inode->i_nlink && !ext3_can_truncate(inode))
++		goto bad_orphan;
++
++	if (NEXT_ORPHAN(inode) > max_ino)
++		goto bad_orphan;
++	brelse(bitmap_bh);
++	return inode;
++
++iget_failed:
++        err = PTR_ERR(inode);
++        inode = NULL;
++bad_orphan:
++	ext3_warning(sb, __FUNCTION__,
++		     "bad orphan inode %lu!  e2fsck was run?", ino);
++	printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
++	       bit, (unsigned long long)bitmap_bh->b_blocknr,
++	       ext3_test_bit(bit, bitmap_bh->b_data));
++	printk(KERN_NOTICE "inode=%p\n", inode);
++	if (inode) {
++		printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
++		       is_bad_inode(inode));
++		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
++		       NEXT_ORPHAN(inode));
++		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
++                printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
+ 		/* Avoid freeing blocks if we got a bad deleted inode */
+-		if (inode && inode->i_nlink == 0)
++		if (inode->i_nlink == 0)
+ 			inode->i_blocks = 0;
+ 		iput(inode);
+-		inode = NULL;
+ 	}
+-out:
+-	brelse(bitmap_bh);
+-	return inode;
++        brelse(bitmap_bh);
++error:
++	return ERR_PTR(err);
+ }
+ 
+ unsigned long ext3_count_free_inodes (struct super_block * sb)
+diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
+index 0698ce5..cede457 100644
+--- a/fs/ext3/inode.c
++++ b/fs/ext3/inode.c
+@@ -2189,6 +2189,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
+ 	}
+ }
+ 
++int ext3_can_truncate(struct inode *inode)
++{
++	if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
++		return 0;
++	if (S_ISREG(inode->i_mode))
++		return 1;
++	if (S_ISDIR(inode->i_mode))
++		return 1;
++	if (S_ISLNK(inode->i_mode))
++		return !ext3_inode_is_fast_symlink(inode);
++	return 0;
++}
++
+ /*
+  * ext3_truncate()
+  *
+@@ -2233,12 +2246,7 @@ void ext3_truncate(struct inode *inode)
+ 	unsigned blocksize = inode->i_sb->s_blocksize;
+ 	struct page *page;
+ 
+-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+-	    S_ISLNK(inode->i_mode)))
+-		return;
+-	if (ext3_inode_is_fast_symlink(inode))
+-		return;
+-	if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
++	if (!ext3_can_truncate(inode))
+ 		return;
+ 
+ 	/*
+@@ -2462,6 +2470,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
+ 	}
+ 	if (!buffer_uptodate(bh)) {
+ 		lock_buffer(bh);
++
++		/*
++		* If the buffer has the write error flag, we have failed
++		* to write out another inode in the same block.  In this
++		* case, we don't have to read the block because we may
++		* read the old inode data successfully.
++		*/
++		if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
++			set_buffer_uptodate(bh);
++
+ 		if (buffer_uptodate(bh)) {
+ 			/* someone brought it uptodate while we waited */
+ 			unlock_buffer(bh);
+diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
+index 2e910db..127733b 100644
+--- a/fs/ext3/ioctl.c
++++ b/fs/ext3/ioctl.c
+@@ -215,7 +215,7 @@ flags_err:
+ 	case EXT3_IOC_GROUP_EXTEND: {
+ 		ext3_fsblk_t n_blocks_count;
+ 		struct super_block *sb = inode->i_sb;
+-		int err;
++		int err, err2;
+ 
+ 		if (!capable(CAP_SYS_RESOURCE))
+ 			return -EPERM;
+@@ -229,15 +229,17 @@ flags_err:
+ 
+ 		err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
+ 		journal_lock_updates(EXT3_SB(sb)->s_journal);
+-		journal_flush(EXT3_SB(sb)->s_journal);
++		err2 = journal_flush(EXT3_SB(sb)->s_journal);
+ 		journal_unlock_updates(EXT3_SB(sb)->s_journal);
++		if (err == 0)
++			err = err2;
+ 
+ 		return err;
+ 	}
+ 	case EXT3_IOC_GROUP_ADD: {
+ 		struct ext3_new_group_data input;
+ 		struct super_block *sb = inode->i_sb;
+-		int err;
++		int err, err2;
+ 
+ 		if (!capable(CAP_SYS_RESOURCE))
+ 			return -EPERM;
+@@ -252,8 +254,10 @@ flags_err:
+ 
+ 		err = ext3_group_add(sb, &input);
+ 		journal_lock_updates(EXT3_SB(sb)->s_journal);
+-		journal_flush(EXT3_SB(sb)->s_journal);
++		err2 = journal_flush(EXT3_SB(sb)->s_journal);
+ 		journal_unlock_updates(EXT3_SB(sb)->s_journal);
++		if (err == 0)
++			err = err2;
+ 
+ 		return err;
+ 	}
+diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
+index acbfa15..a2a3d92 100644
+--- a/fs/ext3/namei.c
++++ b/fs/ext3/namei.c
+@@ -1053,6 +1053,16 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
+ 
+ 		if (!inode)
+ 			return ERR_PTR(-EACCES);
++
++		if (is_bad_inode(inode)) {
++			/* if bad because unlinked, something has gone wrong */
++			if (!inode->i_nlink && printk_ratelimit())
++				ext3_error(inode->i_sb, __FUNCTION__, "unlinked inode %lu in dir #%lu", inode->i_ino, dir->i_ino);
++
++			iput(inode);
++			return ERR_PTR(-ENOENT);
++		}
++
+ 		dx_propagate_tag(nd, inode);
+ 	}
+ 	return d_splice_alias(inode, dentry);
+@@ -1089,6 +1099,11 @@ struct dentry *ext3_get_parent(struct dentry *child)
+ 	if (!inode)
+ 		return ERR_PTR(-EACCES);
+ 
++	if (is_bad_inode(inode)) {
++		iput(inode);
++		return ERR_PTR(-ENOENT);
++	}
++
+ 	parent = d_alloc_anon(inode);
+ 	if (!parent) {
+ 		iput(inode);
+diff --git a/fs/ext3/super.c b/fs/ext3/super.c
+index 22244a2..ce186bc 100644
+--- a/fs/ext3/super.c
++++ b/fs/ext3/super.c
+@@ -279,7 +279,8 @@ void ext3_abort (struct super_block * sb, const char * function,
+ 	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+ 	sb->s_flags |= MS_RDONLY;
+ 	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+-	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
++	if (EXT3_SB(sb)->s_journal)
++		journal_abort(EXT3_SB(sb)->s_journal, -EIO);
+ }
+ 
+ void ext3_warning (struct super_block * sb, const char * function,
+@@ -388,10 +389,14 @@ static void ext3_put_super (struct super_block * sb)
+ {
+ 	struct ext3_sb_info *sbi = EXT3_SB(sb);
+ 	struct ext3_super_block *es = sbi->s_es;
+-	int i;
++	int i, err;
+ 
+ 	ext3_xattr_put_super(sb);
+-	journal_destroy(sbi->s_journal);
++	err = journal_destroy(sbi->s_journal);
++	sbi->s_journal = NULL; 
++	if (err < 0)
++		ext3_abort(sb, __func__, "Couldn't clean up the journal");
++
+ 	if (!(sb->s_flags & MS_RDONLY)) {
+ 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+ 		es->s_state = cpu_to_le16(sbi->s_mount_state);
+@@ -2161,13 +2166,15 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
+ 	journal_t *journal = EXT3_SB(sb)->s_journal;
+ 
+ 	journal_lock_updates(journal);
+-	journal_flush(journal);
++	if (journal_flush(journal) < 0)
++		goto out;
+ 	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
+ 	    sb->s_flags & MS_RDONLY) {
+ 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+ 		sb->s_dirt = 0;
+ 		ext3_commit_super(sb, es, 1);
+ 	}
++out:
+ 	journal_unlock_updates(journal);
+ }
+ 
+@@ -2269,6 +2276,13 @@ static void ext3_write_super_lockfs(struct super_block *sb)
+ 		journal_lock_updates(journal);
+ 		journal_flush(journal);
+ 
++		/*
++		 * We don't want to clear needs_recovery flag when we failed
++		 * to flush the journal.
++		 */
++		if (journal_flush(journal) < 0)
++        		return;
++
+ 		/* Journal blocked and flushed, clear needs_recovery flag. */
+ 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+ 		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
+index 47552d4..803392f 100644
+--- a/fs/jbd/checkpoint.c
++++ b/fs/jbd/checkpoint.c
+@@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
+ 	int ret = 0;
+ 	struct buffer_head *bh = jh2bh(jh);
+ 
+-	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
++	if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
++	    !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+ 		JBUFFER_TRACE(jh, "remove from checkpoint list");
+ 		ret = __journal_remove_checkpoint(jh) + 1;
+ 		jbd_unlock_bh_state(bh);
+@@ -160,21 +161,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
+  * buffers. Note that we take the buffers in the opposite ordering
+  * from the one in which they were submitted for IO.
+  *
++ * Return 0 on success, and return <0 if some buffers have failed
++ * to be written out.
++ *
+  * Called with j_list_lock held.
+  */
+-static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
++static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
+ {
+ 	struct journal_head *jh;
+ 	struct buffer_head *bh;
+ 	tid_t this_tid;
+ 	int released = 0;
++	int ret = 0;
+ 
+ 	this_tid = transaction->t_tid;
+ restart:
+ 	/* Did somebody clean up the transaction in the meanwhile? */
+ 	if (journal->j_checkpoint_transactions != transaction ||
+ 			transaction->t_tid != this_tid)
+-		return;
++		return ret;
+ 	while (!released && transaction->t_checkpoint_io_list) {
+ 		jh = transaction->t_checkpoint_io_list;
+ 		bh = jh2bh(jh);
+@@ -194,6 +199,9 @@ restart:
+ 			spin_lock(&journal->j_list_lock);
+ 			goto restart;
+ 		}
++		if (unlikely(buffer_write_io_error(bh)))
++			ret = -EIO;
++
+ 		/*
+ 		 * Now in whatever state the buffer currently is, we know that
+ 		 * it has been written out and so we can drop it from the list
+@@ -203,6 +211,8 @@ restart:
+ 		journal_remove_journal_head(bh);
+ 		__brelse(bh);
+ 	}
++
++	return ret;
+ }
+ 
+ #define NR_BATCH	64
+@@ -226,7 +236,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
+  * Try to flush one buffer from the checkpoint list to disk.
+  *
+  * Return 1 if something happened which requires us to abort the current
+- * scan of the checkpoint list.
++ * scan of the checkpoint list.  Return <0 if the buffer has failed to
++ * be written out.
+  *
+  * Called with j_list_lock held and drops it if 1 is returned
+  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
+@@ -256,6 +267,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
+ 		log_wait_commit(journal, tid);
+ 		ret = 1;
+ 	} else if (!buffer_dirty(bh)) {
++		ret = 1;
++		if (unlikely(buffer_write_io_error(bh)))
++			ret = -EIO;
+ 		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
+ 		BUFFER_TRACE(bh, "remove from checkpoint");
+ 		__journal_remove_checkpoint(jh);
+@@ -263,7 +277,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
+ 		jbd_unlock_bh_state(bh);
+ 		journal_remove_journal_head(bh);
+ 		__brelse(bh);
+-		ret = 1;
+ 	} else {
+ 		/*
+ 		 * Important: we are about to write the buffer, and
+@@ -295,6 +308,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
+  * to disk. We submit larger chunks of data at once.
+  *
+  * The journal should be locked before calling this function.
++ * Called with j_checkpoint_mutex held.
+  */
+ int log_do_checkpoint(journal_t *journal)
+ {
+@@ -318,6 +332,7 @@ int log_do_checkpoint(journal_t *journal)
+ 	 * OK, we need to start writing disk blocks.  Take one transaction
+ 	 * and write it.
+ 	 */
++	result = 0;
+ 	spin_lock(&journal->j_list_lock);
+ 	if (!journal->j_checkpoint_transactions)
+ 		goto out;
+@@ -334,7 +349,7 @@ restart:
+ 		int batch_count = 0;
+ 		struct buffer_head *bhs[NR_BATCH];
+ 		struct journal_head *jh;
+-		int retry = 0;
++		int retry = 0, err;
+ 
+ 		while (!retry && transaction->t_checkpoint_list) {
+ 			struct buffer_head *bh;
+@@ -347,6 +362,8 @@ restart:
+ 				break;
+ 			}
+ 			retry = __process_buffer(journal, jh, bhs,&batch_count);
++			if (retry < 0 && !result)
++				result = retry;
+ 			if (!retry && lock_need_resched(&journal->j_list_lock)){
+ 				spin_unlock(&journal->j_list_lock);
+ 				retry = 1;
+@@ -370,14 +387,18 @@ restart:
+ 		 * Now we have cleaned up the first transaction's checkpoint
+ 		 * list. Let's clean up the second one
+ 		 */
+-		__wait_cp_io(journal, transaction);
++		err = __wait_cp_io(journal, transaction);
++		if (!result)
++			result = err;
+ 	}
+ out:
+ 	spin_unlock(&journal->j_list_lock);
+-	result = cleanup_journal_tail(journal);
+ 	if (result < 0)
+-		return result;
+-	return 0;
++		journal_abort(journal, result);
++	else
++		result = cleanup_journal_tail(journal);
++
++	return (result < 0) ? result : 0;
+ }
+ 
+ /*
+@@ -393,8 +414,9 @@ out:
+  * This is the only part of the journaling code which really needs to be
+  * aware of transaction aborts.  Checkpointing involves writing to the
+  * main filesystem area rather than to the journal, so it can proceed
+- * even in abort state, but we must not update the journal superblock if
+- * we have an abort error outstanding.
++ * even in abort state, but we must not update the super block if
++ * checkpointing may have failed.  Otherwise, we would lose some metadata
++ * buffers which should be written-back to the filesystem.
+  */
+ 
+ int cleanup_journal_tail(journal_t *journal)
+@@ -403,6 +425,9 @@ int cleanup_journal_tail(journal_t *journal)
+ 	tid_t		first_tid;
+ 	unsigned long	blocknr, freed;
+ 
++	if (is_journal_aborted(journal))
++		return 1;
++
+ 	/* OK, work out the oldest transaction remaining in the log, and
+ 	 * the log block it starts at.
+ 	 *
+@@ -602,15 +627,15 @@ int __journal_remove_checkpoint(struct journal_head *jh)
+ 
+ 	/*
+ 	 * There is one special case to worry about: if we have just pulled the
+-	 * buffer off a committing transaction's forget list, then even if the
+-	 * checkpoint list is empty, the transaction obviously cannot be
+-	 * dropped!
++	 * buffer off a running or committing transaction's checkpoing list,
++	 * then even if the checkpoint list is empty, the transaction obviously
++	 * cannot be dropped!
+ 	 *
+-	 * The locking here around j_committing_transaction is a bit sleazy.
++	 * The locking here around t_state is a bit sleazy.
+ 	 * See the comment at the end of journal_commit_transaction().
+ 	 */
+-	if (transaction == journal->j_committing_transaction) {
+-		JBUFFER_TRACE(jh, "belongs to committing transaction");
++	if (transaction->t_state != T_FINISHED) {
++		JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+ 		goto out;
+ 	}
+ 
+diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
+index a003d50..0d2785d 100644
+--- a/fs/jbd/commit.c
++++ b/fs/jbd/commit.c
+@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+ 
+ /*
+  * When an ext3-ordered file is truncated, it is possible that many pages are
+- * not sucessfully freed, because they are attached to a committing transaction.
++ * not successfully freed, because they are attached to a committing transaction.
+  * After the transaction commits, these pages are left on the LRU, with no
+  * ->mapping, and with attached buffers.  These pages are trivially reclaimable
+  * by the VM, but their apparent absence upsets the VM accounting, and it makes
+@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+  * So here, we have a buffer which has just come off the forget list.  Look to
+  * see if we can strip all buffers from the backing page.
+  *
+- * Called under lock_journal(), and possibly under journal_datalist_lock.  The
+- * caller provided us with a ref against the buffer, and we drop that here.
++ * Called under journal->j_list_lock.  The caller provided us with a ref
++ * against the buffer, and we drop that here.
+  */
+ static void release_buffer_page(struct buffer_head *bh)
+ {
+@@ -78,6 +78,19 @@ nope:
+ }
+ 
+ /*
++ * Decrement reference counter for data buffer. If it has been marked
++ * 'BH_Freed', release it and the page to which it belongs if possible.
++ */
++static void release_data_buffer(struct buffer_head *bh)
++{
++	if (buffer_freed(bh)) {
++		clear_buffer_freed(bh);
++		release_buffer_page(bh);
++	} else
++		put_bh(bh);
++}
++
++/*
+  * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
+  * held.  For ranking reasons we must trylock.  If we lose, schedule away and
+  * return 0.  j_list_lock is dropped in this case.
+@@ -173,7 +186,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+ /*
+  *  Submit all the data buffers to disk
+  */
+-static void journal_submit_data_buffers(journal_t *journal,
++static int journal_submit_data_buffers(journal_t *journal,
+ 				transaction_t *commit_transaction)
+ {
+ 	struct journal_head *jh;
+@@ -181,6 +194,7 @@ static void journal_submit_data_buffers(journal_t *journal,
+ 	int locked;
+ 	int bufs = 0;
+ 	struct buffer_head **wbuf = journal->j_wbuf;
++	int err = 0;
+ 
+ 	/*
+ 	 * Whenever we unlock the journal and sleep, things can get added
+@@ -232,7 +246,7 @@ write_out_data:
+ 			if (locked)
+ 				unlock_buffer(bh);
+ 			BUFFER_TRACE(bh, "already cleaned up");
+-			put_bh(bh);
++			release_data_buffer(bh);
+ 			continue;
+ 		}
+ 		if (locked && test_clear_buffer_dirty(bh)) {
+@@ -254,15 +268,17 @@ write_out_data:
+ 			put_bh(bh);
+ 		} else {
+ 			BUFFER_TRACE(bh, "writeout complete: unfile");
++			if (unlikely(buffer_write_io_error(bh)))
++				err = -EIO;
+ 			__journal_unfile_buffer(jh);
+ 			jbd_unlock_bh_state(bh);
+ 			if (locked)
+ 				unlock_buffer(bh);
+ 			journal_remove_journal_head(bh);
+-			/* Once for our safety reference, once for
++			/* One for our safety reference, other for
+ 			 * journal_remove_journal_head() */
+ 			put_bh(bh);
+-			put_bh(bh);
++			release_data_buffer(bh);
+ 		}
+ 
+ 		if (lock_need_resched(&journal->j_list_lock)) {
+@@ -272,6 +288,8 @@ write_out_data:
+ 	}
+ 	spin_unlock(&journal->j_list_lock);
+ 	journal_do_submit_data(wbuf, bufs);
++
++	return err;
+ }
+ 
+ /*
+@@ -408,27 +426,10 @@ void journal_commit_transaction(journal_t *journal)
+ 	jbd_debug (3, "JBD: commit phase 2\n");
+ 
+ 	/*
+-	 * First, drop modified flag: all accesses to the buffers
+-	 * will be tracked for a new trasaction only -bzzz
+-	 */
+-	spin_lock(&journal->j_list_lock);
+-	if (commit_transaction->t_buffers) {
+-		new_jh = jh = commit_transaction->t_buffers->b_tnext;
+-		do {
+-			J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
+-					new_jh->b_modified == 0);
+-			new_jh->b_modified = 0;
+-			new_jh = new_jh->b_tnext;
+-		} while (new_jh != jh);
+-	}
+-	spin_unlock(&journal->j_list_lock);
+-
+-	/*
+ 	 * Now start flushing things to disk, in the order they appear
+ 	 * on the transaction lists.  Data blocks go first.
+ 	 */
+-	err = 0;
+-	journal_submit_data_buffers(journal, commit_transaction);
++	err = journal_submit_data_buffers(journal, commit_transaction);
+ 
+ 	/*
+ 	 * Wait for all previously submitted IO to complete.
+@@ -443,10 +444,11 @@ void journal_commit_transaction(journal_t *journal)
+ 		if (buffer_locked(bh)) {
+ 			spin_unlock(&journal->j_list_lock);
+ 			wait_on_buffer(bh);
+-			if (unlikely(!buffer_uptodate(bh)))
+-				err = -EIO;
+ 			spin_lock(&journal->j_list_lock);
+ 		}
++		if (unlikely(!buffer_uptodate(bh)))
++			err = -EIO;
++
+ 		if (!inverted_lock(journal, bh)) {
+ 			put_bh(bh);
+ 			spin_lock(&journal->j_list_lock);
+@@ -460,18 +462,16 @@ void journal_commit_transaction(journal_t *journal)
+ 		} else {
+ 			jbd_unlock_bh_state(bh);
+ 		}
+-		put_bh(bh);
++		release_data_buffer(bh);
+ 		cond_resched_lock(&journal->j_list_lock);
+ 	}
+ 	spin_unlock(&journal->j_list_lock);
+ 
+ 	if (err)
+-		__journal_abort_hard(journal);
++		journal_abort(journal, err);
+ 
+ 	journal_write_revoke_records(journal, commit_transaction);
+ 
+-	jbd_debug(3, "JBD: commit phase 2\n");
+-
+ 	/*
+ 	 * If we found any dirty or locked buffers, then we should have
+ 	 * looped back up to the write_out_data label.  If there weren't
+@@ -489,6 +489,9 @@ void journal_commit_transaction(journal_t *journal)
+ 	 */
+ 	commit_transaction->t_state = T_COMMIT;
+ 
++	J_ASSERT(commit_transaction->t_nr_buffers <=
++		commit_transaction->t_outstanding_credits);
++
+ 	descriptor = NULL;
+ 	bufs = 0;
+ 	while (commit_transaction->t_buffers) {
+@@ -498,9 +501,10 @@ void journal_commit_transaction(journal_t *journal)
+ 		jh = commit_transaction->t_buffers;
+ 
+ 		/* If we're in abort mode, we just un-journal the buffer and
+-		   release it for background writing. */
++		   release it */
+ 
+ 		if (is_journal_aborted(journal)) {
++			clear_buffer_jbddirty(jh2bh(jh));
+ 			JBUFFER_TRACE(jh, "journal is aborting: refile");
+ 			journal_refile_buffer(journal, jh);
+ 			/* If that was the last one, we need to clean up
+@@ -524,7 +528,7 @@ void journal_commit_transaction(journal_t *journal)
+ 
+ 			descriptor = journal_get_descriptor_buffer(journal);
+ 			if (!descriptor) {
+-				__journal_abort_hard(journal);
++				journal_abort(journal, -EIO);
+ 				continue;
+ 			}
+ 
+@@ -557,7 +561,7 @@ void journal_commit_transaction(journal_t *journal)
+ 		   and repeat this loop: we'll fall into the
+ 		   refile-on-abort condition above. */
+ 		if (err) {
+-			__journal_abort_hard(journal);
++			journal_abort(journal, err);
+ 			continue;
+ 		}
+ 
+@@ -742,13 +746,16 @@ wait_for_iobuf:
+ 		/* AKPM: bforget here */
+ 	}
+ 
++	if (err)
++		journal_abort(journal, err);
++
+ 	jbd_debug(3, "JBD: commit phase 6\n");
+ 
+ 	if (journal_write_commit_record(journal, commit_transaction))
+ 		err = -EIO;
+ 
+ 	if (err)
+-		__journal_abort_hard(journal);
++		journal_abort(journal, err);
+ 
+ 	/* End of a transaction!  Finally, we can do checkpoint
+            processing: any buffers committed as a result of this
+@@ -832,6 +839,8 @@ restart_loop:
+ 		if (buffer_jbddirty(bh)) {
+ 			JBUFFER_TRACE(jh, "add to new checkpointing trans");
+ 			__journal_insert_checkpoint(jh, commit_transaction);
++			if (is_journal_aborted(journal))
++				clear_buffer_jbddirty(bh);
+ 			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
+ 			__journal_refile_buffer(jh);
+ 			jbd_unlock_bh_state(bh);
+@@ -858,10 +867,10 @@ restart_loop:
+ 	}
+ 	spin_unlock(&journal->j_list_lock);
+ 	/*
+-	 * This is a bit sleazy.  We borrow j_list_lock to protect
+-	 * journal->j_committing_transaction in __journal_remove_checkpoint.
+-	 * Really, __journal_remove_checkpoint should be using j_state_lock but
+-	 * it's a bit hassle to hold that across __journal_remove_checkpoint
++	 * This is a bit sleazy.  We use j_list_lock to protect transition
++	 * of a transaction into T_FINISHED state and calling
++	 * __journal_drop_transaction(). Otherwise we could race with
++	 * other checkpointing code processing the transaction...
+ 	 */
+ 	spin_lock(&journal->j_state_lock);
+ 	spin_lock(&journal->j_list_lock);
+diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
+index 46fe743..8e937fc 100644
+--- a/fs/jbd/journal.c
++++ b/fs/jbd/journal.c
+@@ -1128,9 +1128,12 @@ recovery_error:
+  *
+  * Release a journal_t structure once it is no longer in use by the
+  * journaled object.
++ * Return <0 if we couldn't clean up the journal.
+  */
+-void journal_destroy(journal_t *journal)
++int journal_destroy(journal_t *journal)
+ {
++	int err = 0;
++
+ 	/* Wait for the commit thread to wake up and die. */
+ 	journal_kill_thread(journal);
+ 
+@@ -1153,11 +1156,16 @@ void journal_destroy(journal_t *journal)
+ 	J_ASSERT(journal->j_checkpoint_transactions == NULL);
+ 	spin_unlock(&journal->j_list_lock);
+ 
+-	/* We can now mark the journal as empty. */
+-	journal->j_tail = 0;
+-	journal->j_tail_sequence = ++journal->j_transaction_sequence;
+ 	if (journal->j_sb_buffer) {
+-		journal_update_superblock(journal, 1);
++		if (!is_journal_aborted(journal)) {
++			/* We can now mark the journal as empty. */
++			journal->j_tail = 0;
++			journal->j_tail_sequence =
++				++journal->j_transaction_sequence;
++			journal_update_superblock(journal, 1);
++		} else {
++			err = -EIO;
++		}
+ 		brelse(journal->j_sb_buffer);
+ 	}
+ 
+@@ -1167,6 +1175,8 @@ void journal_destroy(journal_t *journal)
+ 		journal_destroy_revoke(journal);
+ 	kfree(journal->j_wbuf);
+ 	kfree(journal);
++
++	return err;
+ }
+ 
+ 
+@@ -1366,10 +1376,16 @@ int journal_flush(journal_t *journal)
+ 	spin_lock(&journal->j_list_lock);
+ 	while (!err && journal->j_checkpoint_transactions != NULL) {
+ 		spin_unlock(&journal->j_list_lock);
++		mutex_lock(&journal->j_checkpoint_mutex);
+ 		err = log_do_checkpoint(journal);
++		mutex_unlock(&journal->j_checkpoint_mutex);
+ 		spin_lock(&journal->j_list_lock);
+ 	}
+ 	spin_unlock(&journal->j_list_lock);
++
++	if (is_journal_aborted(journal))
++		return -EIO;
++
+ 	cleanup_journal_tail(journal);
+ 
+ 	/* Finally, mark the journal as really needing no recovery.
+@@ -1391,7 +1407,7 @@ int journal_flush(journal_t *journal)
+ 	J_ASSERT(journal->j_head == journal->j_tail);
+ 	J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
+ 	spin_unlock(&journal->j_state_lock);
+-	return err;
++	return 0;
+ }
+ 
+ /**
+diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
+index 2a5f4b8..66ae0e5 100644
+--- a/fs/jbd/recovery.c
++++ b/fs/jbd/recovery.c
+@@ -223,7 +223,7 @@ do {									\
+  */
+ int journal_recover(journal_t *journal)
+ {
+-	int			err;
++	int			err, err2;
+ 	journal_superblock_t *	sb;
+ 
+ 	struct recovery_info	info;
+@@ -261,7 +261,10 @@ int journal_recover(journal_t *journal)
+ 	journal->j_transaction_sequence = ++info.end_transaction;
+ 
+ 	journal_clear_revoke(journal);
+-	sync_blockdev(journal->j_fs_dev);
++	err2 = sync_blockdev(journal->j_fs_dev);
++	if (!err)
++		err = err2;
++
+ 	return err;
+ }
+ 
+@@ -478,7 +481,7 @@ static int do_one_pass(journal_t *journal,
+ 					memcpy(nbh->b_data, obh->b_data,
+ 							journal->j_blocksize);
+ 					if (flags & JFS_FLAG_ESCAPE) {
+-						*((__be32 *)bh->b_data) =
++						*((__be32 *)nbh->b_data) =
+ 						cpu_to_be32(JFS_MAGIC_NUMBER);
+ 					}
+ 
+diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
+index 772b653..1b9a804 100644
+--- a/fs/jbd/transaction.c
++++ b/fs/jbd/transaction.c
+@@ -600,6 +600,13 @@ repeat:
+ 	    jh->b_next_transaction == transaction)
+ 		goto done;
+ 
++        /*
++	 * this is the first time this transaction is touching this buffer,
++	 * reset the modified flag
++	 */
++	jh->b_modified = 0;
++
++
+ 	/*
+ 	 * If there is already a copy-out version of this buffer, then we don't
+ 	 * need to make another one
+@@ -812,9 +819,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
+ 
+ 	if (jh->b_transaction == NULL) {
+ 		jh->b_transaction = transaction;
++
++		/* first access by this transaction */
++		jh->b_modified = 0;
++
+ 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
+ 		__journal_file_buffer(jh, transaction, BJ_Reserved);
+ 	} else if (jh->b_transaction == journal->j_committing_transaction) {
++		/* first access by this transaction */
++		jh->b_modified = 0;
+ 		JBUFFER_TRACE(jh, "set next transaction");
+ 		jh->b_next_transaction = transaction;
+ 	}
+@@ -1213,6 +1226,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
+ 	struct journal_head *jh;
+ 	int drop_reserve = 0;
+ 	int err = 0;
++	int was_modified = 0;
+ 
+ 	BUFFER_TRACE(bh, "entry");
+ 
+@@ -1231,6 +1245,9 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
+ 		goto not_jbd;
+ 	}
+ 
++	/* keep track of wether or not this transaction modified us */
++	was_modified = jh->b_modified;
++
+ 	/*
+ 	 * The buffer's going from the transaction, we must drop
+ 	 * all references -bzzz
+@@ -1248,7 +1265,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
+ 
+ 		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
+ 
+-		drop_reserve = 1;
++		/*
++		 * we only want to drop a reference if this transaction
++		 * modified the buffer
++		 */
++		if (was_modified)
++			drop_reserve = 1;
+ 
+ 		/*
+ 		 * We are no longer going to journal this buffer.
+@@ -1288,7 +1310,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
+ 		if (jh->b_next_transaction) {
+ 			J_ASSERT(jh->b_next_transaction == transaction);
+ 			jh->b_next_transaction = NULL;
+-			drop_reserve = 1;
++			/*
++			 * only drop a reference if this transaction modified
++			 * the buffer
++			 */
++			if (was_modified)
++				drop_reserve = 1;
+ 		}
+ 	}
+ 
+@@ -2058,7 +2085,7 @@ void __journal_refile_buffer(struct journal_head *jh)
+ 	jh->b_transaction = jh->b_next_transaction;
+ 	jh->b_next_transaction = NULL;
+ 	__journal_file_buffer(jh, jh->b_transaction,
+-				was_dirty ? BJ_Metadata : BJ_Reserved);
++				jh->b_modified ? BJ_Metadata : BJ_Reserved);
+ 	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
+ 
+ 	if (was_dirty)
+diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
+index ff56e39..8c51469 100644
+--- a/include/linux/ext3_fs.h
++++ b/include/linux/ext3_fs.h
+@@ -827,6 +827,7 @@ extern void ext3_discard_reservation (struct inode *);
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
++extern int ext3_can_truncate(struct inode *inode);
+ extern void ext3_truncate (struct inode *);
+ extern void ext3_set_inode_flags(struct inode *);
+ extern void ext3_get_inode_flags(struct ext3_inode_info *);
+diff --git a/include/linux/jbd.h b/include/linux/jbd.h
+index 4527375..6bc0e4f 100644
+--- a/include/linux/jbd.h
++++ b/include/linux/jbd.h
+@@ -446,6 +446,8 @@ struct transaction_s
+ 	/*
+ 	 * Transaction's current state
+ 	 * [no locking - only kjournald alters this]
++	 * [j_list_lock] guards transition of a transaction into T_FINISHED
++	 * state and subsequent call of __journal_drop_transaction()
+ 	 * FIXME: needs barriers
+ 	 * KLUDGE: [use j_state_lock]
+ 	 */
+@@ -924,7 +926,7 @@ extern int	   journal_set_features
+ 		   (journal_t *, unsigned long, unsigned long, unsigned long);
+ extern int	   journal_create     (journal_t *);
+ extern int	   journal_load       (journal_t *journal);
+-extern void	   journal_destroy    (journal_t *);
++extern int	   journal_destroy    (journal_t *);
+ extern int	   journal_recover    (journal_t *journal);
+ extern int	   journal_wipe       (journal_t *, int);
+ extern int	   journal_skip_recovery	(journal_t *);
-- 
2.43.0