linux-2.6.950-ext3_backports.patch

   1 * orphan_list_check_on_destroy_inode.patch
   2         http://github.com/caglar10ur/linux-2.6.27.y/commit/56ccd891a32e6409700786737953906426512ff7
   3         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=a6c15c2b0fbfd5c0a84f5f0e1e3f20f85d2b8692
   4
   5 * don-t-read-inode-block-if-buf-has-write-error.patch
   6         http://github.com/caglar10ur/linux-2.6.27.y/commit/1e7d951fa2bd7d49121aff0ec7eb0331e3d5eeb5
   7         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=95450f5a7e53d5752ce1a0d0b8282e10fe745ae0
   8
   9 * error-in-ext3_lookup-if-corruption-found.patch
  10         http://github.com/caglar10ur/linux-2.6.27.y/commit/f1d52243f69747ee601d671ec1b98a7363ce0597
  11         Filesystem errors should be logged and not silently ignored
  12
  13 * fix-accessing-freed-memory-in-ext3_abort.patch
  14         http://github.com/caglar10ur/linux-2.6.27.y/commit/f863ab8749fca8d167b357357ed7492c1c6d530d
  15         http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.32.y.git;a=commitdiff_plain;h=44d6f78756560e95903de239e10f8a40a6eae444
  16
  17 * make_fdatasync_not_sync_metadata.patch
  18         http://github.com/caglar10ur/linux-2.6.27.y/commit/1d29962ae5187764989ede9ec0d0777d2f489345
  19         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=3d61f75eefedf75914ab4453c67aaa2ee64bcf93
  20
  21 * add_checks_for_errors_from_jbd.patch
  22         http://github.com/caglar10ur/linux-2.6.27.y/commit/f38c319731f010b1c36c25ed591f79bcb557d2f2
  23         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=2d7c820e56ce83b23daee9eb5343730fb309418e
  24
  25 * add_missing_error_checks.patch
  26         http://github.com/caglar10ur/linux-2.6.27.y/commit/8f0e6faf83721162a77cb5df5c483e4799bea22b
  27         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=cbe5f466f6995e10a10c7ae66d6dc8608f08a6b8
  28
  29 The original patch does the following two things:
  30
  31   (1) stop aborting the journal on file data write errors, instead
  32       just call printk() and set AS_EIO to appropriate address_space
  33       objects
  34   (2) add missing error checks for file data writes
  35
  36 This patch does only (2).
  37
  38 * dont_dirty_original_metadata_buffer_on_abort.patch
  39         http://github.com/caglar10ur/linux-2.6.27.y/commit/d003fa1a88c857795ca7e102eefbf26c9088aa66
  40         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=7ad7445f60fe4d46c4c9d2a9463db180d2a3b270
  41
  42 * fix_commit_code_to_properly_abort_journal.patch
  43         http://github.com/caglar10ur/linux-2.6.27.y/commit/232632e87d9bc83b89d97f98f311d67d45e0e6dd
  44         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=7a266e75cf5a1efd20d084408a1b7f1a185496dd
  45
  46 * fix_journal_overflow_issues.patch
  47         http://github.com/caglar10ur/linux-2.6.27.y/commit/74361d7e55f28847e3b7eda4a4563d02ab001537
  48         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=5bc833feaa8b2236265764e7e81f44937be46eda
  49         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=5b9a499d77e9dd39c9e6611ea10c56a31604f274
  50
  51 * fix_typo_in_recovery_code.patch
  52         http://github.com/caglar10ur/linux-2.6.27.y/commit/da999401f4bedf317b2e7dcd3c9163b1a433ba3c
  53         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=439aeec639d7c57f3561054a6d315c40fd24bb74
  54
  55 * jbd-properly-dispose-of-unmapped-data-buffers.patch
  56         http://github.com/caglar10ur/linux-2.6.27.y/commit/841d34d702702c85c7b8cc31a185e48ce3ca0a8e
  57         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=fc80c44277b3c92d808b73e9d40e120229aa4b6a
  58
  59 * jdb-abort-when-failed-to-log-metadata-buffers.patch
  60         http://github.com/caglar10ur/linux-2.6.27.y/commit/7de4ddac8020dcb2078b7237650e972ecfd112cf
  61         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=44519faf22ad6ce924ad0352d3dc200d9e0b66e8
  62
  63 * fix-assertion-failure-in-fs-jbd-checkpoint.patch
  64         http://github.com/caglar10ur/linux-2.6.27.y/commit/268ff9d67ed3269e5d84914aabd30d06ee89f563
  65         http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.32.y.git;a=commitdiff_plain;h=d4beaf4ab5f89496f2bcf67db62ad95d99bfeff6
  66
  67 * fix-error-handling-for-checkpoint-io.patch
  68         http://github.com/caglar10ur/linux-2.6.27.y/commit/e1ef6b77a95b8e02255dfa02fef06e2231e92645
  69         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=4afe978530702c934dfdb11f54073136818b2119
  70
  71 * jbd-test-BH_Write_EIO-to-detect-errors-on-metadata.patch
  72         http://github.com/caglar10ur/linux-2.6.27.y/commit/1a8ede62acc03d3b2baa98d02189685a4e30044f
  73         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=9f818b4ac04f53458d0354950b4f229f54be4dbf
  74
  75 * handle-corrupted-orphan-list-at-mount.patch
  76         http://github.com/caglar10ur/linux-2.6.27.y/commit/6003003452a5faaa0b2d1deb6356ebf8d4e2fe3f
  77         http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff_plain;h=ae76dd9a6b5bbe5315fb7028e03f68f75b8538f3
  78
  79
  80 diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
  81 index b0615c8..841f0f7 100644
  82 --- a/fs/ext3/fsync.c
  83 +++ b/fs/ext3/fsync.c
  84 @@ -73,6 +73,9 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
  85                 goto out;
  86         }
  87
  88 +       if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
  89 +               goto out;
  90 +
  91         /*
  92          * The VFS has written the file data.  If the inode is unaltered
  93          * then we need not start a commit.
  94 diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
  95 index 9885ff8..d586377 100644
  96 --- a/fs/ext3/ialloc.c
  97 +++ b/fs/ext3/ialloc.c
  98 @@ -658,14 +658,15 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
  99         unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
 100         unsigned long block_group;
 101         int bit;
 102 -       struct buffer_head *bitmap_bh = NULL;
 103 +       struct buffer_head *bitmap_bh;
 104         struct inode *inode = NULL;
 105 +       long err = -EIO;
 106
 107         /* Error cases - e2fsck has already cleaned up for us */
 108         if (ino > max_ino) {
 109                 ext3_warning(sb, __FUNCTION__,
 110                              "bad orphan ino %lu!  e2fsck was run?", ino);
 111 -               goto out;
 112 +               goto error;
 113         }
 114
 115         block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
 116 @@ -674,38 +675,58 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 117         if (!bitmap_bh) {
 118                 ext3_warning(sb, __FUNCTION__,
 119                              "inode bitmap error for orphan %lu", ino);
 120 -               goto out;
 121 +               goto error;
 122         }
 123
 124         /* Having the inode bit set should be a 100% indicator that this
 125          * is a valid orphan (no e2fsck run on fs).  Orphans also include
 126          * inodes that were being truncated, so we can't check i_nlink==0.
 127          */
 128 -       if (!ext3_test_bit(bit, bitmap_bh->b_data) ||
 129 -                       !(inode = iget(sb, ino)) || is_bad_inode(inode) ||
 130 -                       NEXT_ORPHAN(inode) > max_ino) {
 131 -               ext3_warning(sb, __FUNCTION__,
 132 -                            "bad orphan inode %lu!  e2fsck was run?", ino);
 133 -               printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
 134 -                      bit, (unsigned long long)bitmap_bh->b_blocknr,
 135 -                      ext3_test_bit(bit, bitmap_bh->b_data));
 136 -               printk(KERN_NOTICE "inode=%p\n", inode);
 137 -               if (inode) {
 138 -                       printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
 139 -                              is_bad_inode(inode));
 140 -                       printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
 141 -                              NEXT_ORPHAN(inode));
 142 -                       printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
 143 -               }
 144 +       if (!ext3_test_bit(bit, bitmap_bh->b_data))
 145 +               goto bad_orphan;
 146 +
 147 +       inode = iget(sb, ino);
 148 +       if (IS_ERR(inode))
 149 +               goto iget_failed;
 150 +
 151 +        /*
 152 +         * If the orphans has i_nlinks > 0 then it should be able to be
 153 +         * truncated, otherwise it won't be removed from the orphan list
 154 +         * during processing and an infinite loop will result.
 155 +         */
 156 +       if (inode->i_nlink && !ext3_can_truncate(inode))
 157 +               goto bad_orphan;
 158 +
 159 +       if (NEXT_ORPHAN(inode) > max_ino)
 160 +               goto bad_orphan;
 161 +       brelse(bitmap_bh);
 162 +       return inode;
 163 +
 164 +iget_failed:
 165 +        err = PTR_ERR(inode);
 166 +        inode = NULL;
 167 +bad_orphan:
 168 +       ext3_warning(sb, __FUNCTION__,
 169 +                    "bad orphan inode %lu!  e2fsck was run?", ino);
 170 +       printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
 171 +              bit, (unsigned long long)bitmap_bh->b_blocknr,
 172 +              ext3_test_bit(bit, bitmap_bh->b_data));
 173 +       printk(KERN_NOTICE "inode=%p\n", inode);
 174 +       if (inode) {
 175 +               printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
 176 +                      is_bad_inode(inode));
 177 +               printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
 178 +                      NEXT_ORPHAN(inode));
 179 +               printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
 180 +                printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
 181                 /* Avoid freeing blocks if we got a bad deleted inode */
 182 -               if (inode && inode->i_nlink == 0)
 183 +               if (inode->i_nlink == 0)
 184                         inode->i_blocks = 0;
 185                 iput(inode);
 186 -               inode = NULL;
 187         }
 188 -out:
 189 -       brelse(bitmap_bh);
 190 -       return inode;
 191 +        brelse(bitmap_bh);
 192 +error:
 193 +       return ERR_PTR(err);
 194  }
 195
 196  unsigned long ext3_count_free_inodes (struct super_block * sb)
 197 diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
 198 index 0698ce5..cede457 100644
 199 --- a/fs/ext3/inode.c
 200 +++ b/fs/ext3/inode.c
 201 @@ -2189,6 +2189,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 202         }
 203  }
 204
 205 +int ext3_can_truncate(struct inode *inode)
 206 +{
 207 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
 208 +               return 0;
 209 +       if (S_ISREG(inode->i_mode))
 210 +               return 1;
 211 +       if (S_ISDIR(inode->i_mode))
 212 +               return 1;
 213 +       if (S_ISLNK(inode->i_mode))
 214 +               return !ext3_inode_is_fast_symlink(inode);
 215 +       return 0;
 216 +}
 217 +
 218  /*
 219   * ext3_truncate()
 220   *
 221 @@ -2233,12 +2246,7 @@ void ext3_truncate(struct inode *inode)
 222         unsigned blocksize = inode->i_sb->s_blocksize;
 223         struct page *page;
 224
 225 -       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 226 -           S_ISLNK(inode->i_mode)))
 227 -               return;
 228 -       if (ext3_inode_is_fast_symlink(inode))
 229 -               return;
 230 -       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
 231 +       if (!ext3_can_truncate(inode))
 232                 return;
 233
 234         /*
 235 @@ -2462,6 +2470,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
 236         }
 237         if (!buffer_uptodate(bh)) {
 238                 lock_buffer(bh);
 239 +
 240 +               /*
 241 +               * If the buffer has the write error flag, we have failed
 242 +               * to write out another inode in the same block.  In this
 243 +               * case, we don't have to read the block because we may
 244 +               * read the old inode data successfully.
 245 +               */
 246 +               if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
 247 +                       set_buffer_uptodate(bh);
 248 +
 249                 if (buffer_uptodate(bh)) {
 250                         /* someone brought it uptodate while we waited */
 251                         unlock_buffer(bh);
 252 diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
 253 index 2e910db..127733b 100644
 254 --- a/fs/ext3/ioctl.c
 255 +++ b/fs/ext3/ioctl.c
 256 @@ -215,7 +215,7 @@ flags_err:
 257         case EXT3_IOC_GROUP_EXTEND: {
 258                 ext3_fsblk_t n_blocks_count;
 259                 struct super_block *sb = inode->i_sb;
 260 -               int err;
 261 +               int err, err2;
 262
 263                 if (!capable(CAP_SYS_RESOURCE))
 264                         return -EPERM;
 265 @@ -229,15 +229,17 @@ flags_err:
 266
 267                 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
 268                 journal_lock_updates(EXT3_SB(sb)->s_journal);
 269 -               journal_flush(EXT3_SB(sb)->s_journal);
 270 +               err2 = journal_flush(EXT3_SB(sb)->s_journal);
 271                 journal_unlock_updates(EXT3_SB(sb)->s_journal);
 272 +               if (err == 0)
 273 +                       err = err2;
 274
 275                 return err;
 276         }
 277         case EXT3_IOC_GROUP_ADD: {
 278                 struct ext3_new_group_data input;
 279                 struct super_block *sb = inode->i_sb;
 280 -               int err;
 281 +               int err, err2;
 282
 283                 if (!capable(CAP_SYS_RESOURCE))
 284                         return -EPERM;
 285 @@ -252,8 +254,10 @@ flags_err:
 286
 287                 err = ext3_group_add(sb, &input);
 288                 journal_lock_updates(EXT3_SB(sb)->s_journal);
 289 -               journal_flush(EXT3_SB(sb)->s_journal);
 290 +               err2 = journal_flush(EXT3_SB(sb)->s_journal);
 291                 journal_unlock_updates(EXT3_SB(sb)->s_journal);
 292 +               if (err == 0)
 293 +                       err = err2;
 294
 295                 return err;
 296         }
 297 diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
 298 index acbfa15..a2a3d92 100644
 299 --- a/fs/ext3/namei.c
 300 +++ b/fs/ext3/namei.c
 301 @@ -1053,6 +1053,16 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 302
 303                 if (!inode)
 304                         return ERR_PTR(-EACCES);
 305 +
 306 +               if (is_bad_inode(inode)) {
 307 +                       /* if bad because unlinked, something has gone wrong */
 308 +                       if (!inode->i_nlink && printk_ratelimit())
 309 +                               ext3_error(inode->i_sb, __FUNCTION__, "unlinked inode %lu in dir #%lu", inode->i_ino, dir->i_ino);
 310 +
 311 +                       iput(inode);
 312 +                       return ERR_PTR(-ENOENT);
 313 +               }
 314 +
 315                 dx_propagate_tag(nd, inode);
 316         }
 317         return d_splice_alias(inode, dentry);
 318 @@ -1089,6 +1099,11 @@ struct dentry *ext3_get_parent(struct dentry *child)
 319         if (!inode)
 320                 return ERR_PTR(-EACCES);
 321
 322 +       if (is_bad_inode(inode)) {
 323 +               iput(inode);
 324 +               return ERR_PTR(-ENOENT);
 325 +       }
 326 +
 327         parent = d_alloc_anon(inode);
 328         if (!parent) {
 329                 iput(inode);
 330 diff --git a/fs/ext3/super.c b/fs/ext3/super.c
 331 index 22244a2..ce186bc 100644
 332 --- a/fs/ext3/super.c
 333 +++ b/fs/ext3/super.c
 334 @@ -279,7 +279,8 @@ void ext3_abort (struct super_block * sb, const char * function,
 335         EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
 336         sb->s_flags |= MS_RDONLY;
 337         EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
 338 -       journal_abort(EXT3_SB(sb)->s_journal, -EIO);
 339 +       if (EXT3_SB(sb)->s_journal)
 340 +               journal_abort(EXT3_SB(sb)->s_journal, -EIO);
 341  }
 342
 343  void ext3_warning (struct super_block * sb, const char * function,
 344 @@ -388,10 +389,14 @@ static void ext3_put_super (struct super_block * sb)
 345  {
 346         struct ext3_sb_info *sbi = EXT3_SB(sb);
 347         struct ext3_super_block *es = sbi->s_es;
 348 -       int i;
 349 +       int i, err;
 350
 351         ext3_xattr_put_super(sb);
 352 -       journal_destroy(sbi->s_journal);
 353 +       err = journal_destroy(sbi->s_journal);
 354 +       sbi->s_journal = NULL;
 355 +       if (err < 0)
 356 +               ext3_abort(sb, __func__, "Couldn't clean up the journal");
 357 +
 358         if (!(sb->s_flags & MS_RDONLY)) {
 359                 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
 360                 es->s_state = cpu_to_le16(sbi->s_mount_state);
 361 @@ -2161,13 +2166,15 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
 362         journal_t *journal = EXT3_SB(sb)->s_journal;
 363
 364         journal_lock_updates(journal);
 365 -       journal_flush(journal);
 366 +       if (journal_flush(journal) < 0)
 367 +               goto out;
 368         if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
 369             sb->s_flags & MS_RDONLY) {
 370                 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
 371                 sb->s_dirt = 0;
 372                 ext3_commit_super(sb, es, 1);
 373         }
 374 +out:
 375         journal_unlock_updates(journal);
 376  }
 377
 378 @@ -2269,6 +2276,13 @@ static void ext3_write_super_lockfs(struct super_block *sb)
 379                 journal_lock_updates(journal);
 380                 journal_flush(journal);
 381
 382 +               /*
 383 +                * We don't want to clear needs_recovery flag when we failed
 384 +                * to flush the journal.
 385 +                */
 386 +               if (journal_flush(journal) < 0)
 387 +                       return;
 388 +
 389                 /* Journal blocked and flushed, clear needs_recovery flag. */
 390                 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
 391                 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
 392 diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
 393 index 47552d4..803392f 100644
 394 --- a/fs/jbd/checkpoint.c
 395 +++ b/fs/jbd/checkpoint.c
 396 @@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 397         int ret = 0;
 398         struct buffer_head *bh = jh2bh(jh);
 399
 400 -       if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 401 +       if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
 402 +           !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
 403                 JBUFFER_TRACE(jh, "remove from checkpoint list");
 404                 ret = __journal_remove_checkpoint(jh) + 1;
 405                 jbd_unlock_bh_state(bh);
 406 @@ -160,21 +161,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 407   * buffers. Note that we take the buffers in the opposite ordering
 408   * from the one in which they were submitted for IO.
 409   *
 410 + * Return 0 on success, and return <0 if some buffers have failed
 411 + * to be written out.
 412 + *
 413   * Called with j_list_lock held.
 414   */
 415 -static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
 416 +static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
 417  {
 418         struct journal_head *jh;
 419         struct buffer_head *bh;
 420         tid_t this_tid;
 421         int released = 0;
 422 +       int ret = 0;
 423
 424         this_tid = transaction->t_tid;
 425  restart:
 426         /* Did somebody clean up the transaction in the meanwhile? */
 427         if (journal->j_checkpoint_transactions != transaction ||
 428                         transaction->t_tid != this_tid)
 429 -               return;
 430 +               return ret;
 431         while (!released && transaction->t_checkpoint_io_list) {
 432                 jh = transaction->t_checkpoint_io_list;
 433                 bh = jh2bh(jh);
 434 @@ -194,6 +199,9 @@ restart:
 435                         spin_lock(&journal->j_list_lock);
 436                         goto restart;
 437                 }
 438 +               if (unlikely(buffer_write_io_error(bh)))
 439 +                       ret = -EIO;
 440 +
 441                 /*
 442                  * Now in whatever state the buffer currently is, we know that
 443                  * it has been written out and so we can drop it from the list
 444 @@ -203,6 +211,8 @@ restart:
 445                 journal_remove_journal_head(bh);
 446                 __brelse(bh);
 447         }
 448 +
 449 +       return ret;
 450  }
 451
 452  #define NR_BATCH       64
 453 @@ -226,7 +236,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 454   * Try to flush one buffer from the checkpoint list to disk.
 455   *
 456   * Return 1 if something happened which requires us to abort the current
 457 - * scan of the checkpoint list.
 458 + * scan of the checkpoint list.  Return <0 if the buffer has failed to
 459 + * be written out.
 460   *
 461   * Called with j_list_lock held and drops it if 1 is returned
 462   * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
 463 @@ -256,6 +267,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 464                 log_wait_commit(journal, tid);
 465                 ret = 1;
 466         } else if (!buffer_dirty(bh)) {
 467 +               ret = 1;
 468 +               if (unlikely(buffer_write_io_error(bh)))
 469 +                       ret = -EIO;
 470                 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
 471                 BUFFER_TRACE(bh, "remove from checkpoint");
 472                 __journal_remove_checkpoint(jh);
 473 @@ -263,7 +277,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 474                 jbd_unlock_bh_state(bh);
 475                 journal_remove_journal_head(bh);
 476                 __brelse(bh);
 477 -               ret = 1;
 478         } else {
 479                 /*
 480                  * Important: we are about to write the buffer, and
 481 @@ -295,6 +308,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 482   * to disk. We submit larger chunks of data at once.
 483   *
 484   * The journal should be locked before calling this function.
 485 + * Called with j_checkpoint_mutex held.
 486   */
 487  int log_do_checkpoint(journal_t *journal)
 488  {
 489 @@ -318,6 +332,7 @@ int log_do_checkpoint(journal_t *journal)
 490          * OK, we need to start writing disk blocks.  Take one transaction
 491          * and write it.
 492          */
 493 +       result = 0;
 494         spin_lock(&journal->j_list_lock);
 495         if (!journal->j_checkpoint_transactions)
 496                 goto out;
 497 @@ -334,7 +349,7 @@ restart:
 498                 int batch_count = 0;
 499                 struct buffer_head *bhs[NR_BATCH];
 500                 struct journal_head *jh;
 501 -               int retry = 0;
 502 +               int retry = 0, err;
 503
 504                 while (!retry && transaction->t_checkpoint_list) {
 505                         struct buffer_head *bh;
 506 @@ -347,6 +362,8 @@ restart:
 507                                 break;
 508                         }
 509                         retry = __process_buffer(journal, jh, bhs,&batch_count);
 510 +                       if (retry < 0 && !result)
 511 +                               result = retry;
 512                         if (!retry && lock_need_resched(&journal->j_list_lock)){
 513                                 spin_unlock(&journal->j_list_lock);
 514                                 retry = 1;
 515 @@ -370,14 +387,18 @@ restart:
 516                  * Now we have cleaned up the first transaction's checkpoint
 517                  * list. Let's clean up the second one
 518                  */
 519 -               __wait_cp_io(journal, transaction);
 520 +               err = __wait_cp_io(journal, transaction);
 521 +               if (!result)
 522 +                       result = err;
 523         }
 524  out:
 525         spin_unlock(&journal->j_list_lock);
 526 -       result = cleanup_journal_tail(journal);
 527         if (result < 0)
 528 -               return result;
 529 -       return 0;
 530 +               journal_abort(journal, result);
 531 +       else
 532 +               result = cleanup_journal_tail(journal);
 533 +
 534 +       return (result < 0) ? result : 0;
 535  }
 536
 537  /*
 538 @@ -393,8 +414,9 @@ out:
 539   * This is the only part of the journaling code which really needs to be
 540   * aware of transaction aborts.  Checkpointing involves writing to the
 541   * main filesystem area rather than to the journal, so it can proceed
 542 - * even in abort state, but we must not update the journal superblock if
 543 - * we have an abort error outstanding.
 544 + * even in abort state, but we must not update the super block if
 545 + * checkpointing may have failed.  Otherwise, we would lose some metadata
 546 + * buffers which should be written-back to the filesystem.
 547   */
 548
 549  int cleanup_journal_tail(journal_t *journal)
 550 @@ -403,6 +425,9 @@ int cleanup_journal_tail(journal_t *journal)
 551         tid_t           first_tid;
 552         unsigned long   blocknr, freed;
 553
 554 +       if (is_journal_aborted(journal))
 555 +               return 1;
 556 +
 557         /* OK, work out the oldest transaction remaining in the log, and
 558          * the log block it starts at.
 559          *
 560 @@ -602,15 +627,15 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 561
 562         /*
 563          * There is one special case to worry about: if we have just pulled the
 564 -        * buffer off a committing transaction's forget list, then even if the
 565 -        * checkpoint list is empty, the transaction obviously cannot be
 566 -        * dropped!
 567 +        * buffer off a running or committing transaction's checkpoing list,
 568 +        * then even if the checkpoint list is empty, the transaction obviously
 569 +        * cannot be dropped!
 570          *
 571 -        * The locking here around j_committing_transaction is a bit sleazy.
 572 +        * The locking here around t_state is a bit sleazy.
 573          * See the comment at the end of journal_commit_transaction().
 574          */
 575 -       if (transaction == journal->j_committing_transaction) {
 576 -               JBUFFER_TRACE(jh, "belongs to committing transaction");
 577 +       if (transaction->t_state != T_FINISHED) {
 578 +               JBUFFER_TRACE(jh, "belongs to running/committing transaction");
 579                 goto out;
 580         }
 581
 582 diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
 583 index a003d50..0d2785d 100644
 584 --- a/fs/jbd/commit.c
 585 +++ b/fs/jbd/commit.c
 586 @@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 587
 588  /*
 589   * When an ext3-ordered file is truncated, it is possible that many pages are
 590 - * not sucessfully freed, because they are attached to a committing transaction.
 591 + * not successfully freed, because they are attached to a committing transaction.
 592   * After the transaction commits, these pages are left on the LRU, with no
 593   * ->mapping, and with attached buffers.  These pages are trivially reclaimable
 594   * by the VM, but their apparent absence upsets the VM accounting, and it makes
 595 @@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 596   * So here, we have a buffer which has just come off the forget list.  Look to
 597   * see if we can strip all buffers from the backing page.
 598   *
 599 - * Called under lock_journal(), and possibly under journal_datalist_lock.  The
 600 - * caller provided us with a ref against the buffer, and we drop that here.
 601 + * Called under journal->j_list_lock.  The caller provided us with a ref
 602 + * against the buffer, and we drop that here.
 603   */
 604  static void release_buffer_page(struct buffer_head *bh)
 605  {
 606 @@ -78,6 +78,19 @@ nope:
 607  }
 608
 609  /*
 610 + * Decrement reference counter for data buffer. If it has been marked
 611 + * 'BH_Freed', release it and the page to which it belongs if possible.
 612 + */
 613 +static void release_data_buffer(struct buffer_head *bh)
 614 +{
 615 +       if (buffer_freed(bh)) {
 616 +               clear_buffer_freed(bh);
 617 +               release_buffer_page(bh);
 618 +       } else
 619 +               put_bh(bh);
 620 +}
 621 +
 622 +/*
 623   * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
 624   * held.  For ranking reasons we must trylock.  If we lose, schedule away and
 625   * return 0.  j_list_lock is dropped in this case.
 626 @@ -173,7 +186,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
 627  /*
 628   *  Submit all the data buffers to disk
 629   */
 630 -static void journal_submit_data_buffers(journal_t *journal,
 631 +static int journal_submit_data_buffers(journal_t *journal,
 632                                 transaction_t *commit_transaction)
 633  {
 634         struct journal_head *jh;
 635 @@ -181,6 +194,7 @@ static void journal_submit_data_buffers(journal_t *journal,
 636         int locked;
 637         int bufs = 0;
 638         struct buffer_head **wbuf = journal->j_wbuf;
 639 +       int err = 0;
 640
 641         /*
 642          * Whenever we unlock the journal and sleep, things can get added
 643 @@ -232,7 +246,7 @@ write_out_data:
 644                         if (locked)
 645                                 unlock_buffer(bh);
 646                         BUFFER_TRACE(bh, "already cleaned up");
 647 -                       put_bh(bh);
 648 +                       release_data_buffer(bh);
 649                         continue;
 650                 }
 651                 if (locked && test_clear_buffer_dirty(bh)) {
 652 @@ -254,15 +268,17 @@ write_out_data:
 653                         put_bh(bh);
 654                 } else {
 655                         BUFFER_TRACE(bh, "writeout complete: unfile");
 656 +                       if (unlikely(buffer_write_io_error(bh)))
 657 +                               err = -EIO;
 658                         __journal_unfile_buffer(jh);
 659                         jbd_unlock_bh_state(bh);
 660                         if (locked)
 661                                 unlock_buffer(bh);
 662                         journal_remove_journal_head(bh);
 663 -                       /* Once for our safety reference, once for
 664 +                       /* One for our safety reference, other for
 665                          * journal_remove_journal_head() */
 666                         put_bh(bh);
 667 -                       put_bh(bh);
 668 +                       release_data_buffer(bh);
 669                 }
 670
 671                 if (lock_need_resched(&journal->j_list_lock)) {
 672 @@ -272,6 +288,8 @@ write_out_data:
 673         }
 674         spin_unlock(&journal->j_list_lock);
 675         journal_do_submit_data(wbuf, bufs);
 676 +
 677 +       return err;
 678  }
 679
 680  /*
 681 @@ -408,27 +426,10 @@ void journal_commit_transaction(journal_t *journal)
 682         jbd_debug (3, "JBD: commit phase 2\n");
 683
 684         /*
 685 -        * First, drop modified flag: all accesses to the buffers
 686 -        * will be tracked for a new trasaction only -bzzz
 687 -        */
 688 -       spin_lock(&journal->j_list_lock);
 689 -       if (commit_transaction->t_buffers) {
 690 -               new_jh = jh = commit_transaction->t_buffers->b_tnext;
 691 -               do {
 692 -                       J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
 693 -                                       new_jh->b_modified == 0);
 694 -                       new_jh->b_modified = 0;
 695 -                       new_jh = new_jh->b_tnext;
 696 -               } while (new_jh != jh);
 697 -       }
 698 -       spin_unlock(&journal->j_list_lock);
 699 -
 700 -       /*
 701          * Now start flushing things to disk, in the order they appear
 702          * on the transaction lists.  Data blocks go first.
 703          */
 704 -       err = 0;
 705 -       journal_submit_data_buffers(journal, commit_transaction);
 706 +       err = journal_submit_data_buffers(journal, commit_transaction);
 707
 708         /*
 709          * Wait for all previously submitted IO to complete.
 710 @@ -443,10 +444,11 @@ void journal_commit_transaction(journal_t *journal)
 711                 if (buffer_locked(bh)) {
 712                         spin_unlock(&journal->j_list_lock);
 713                         wait_on_buffer(bh);
 714 -                       if (unlikely(!buffer_uptodate(bh)))
 715 -                               err = -EIO;
 716                         spin_lock(&journal->j_list_lock);
 717                 }
 718 +               if (unlikely(!buffer_uptodate(bh)))
 719 +                       err = -EIO;
 720 +
 721                 if (!inverted_lock(journal, bh)) {
 722                         put_bh(bh);
 723                         spin_lock(&journal->j_list_lock);
 724 @@ -460,18 +462,16 @@ void journal_commit_transaction(journal_t *journal)
 725                 } else {
 726                         jbd_unlock_bh_state(bh);
 727                 }
 728 -               put_bh(bh);
 729 +               release_data_buffer(bh);
 730                 cond_resched_lock(&journal->j_list_lock);
 731         }
 732         spin_unlock(&journal->j_list_lock);
 733
 734         if (err)
 735 -               __journal_abort_hard(journal);
 736 +               journal_abort(journal, err);
 737
 738         journal_write_revoke_records(journal, commit_transaction);
 739
 740 -       jbd_debug(3, "JBD: commit phase 2\n");
 741 -
 742         /*
 743          * If we found any dirty or locked buffers, then we should have
 744          * looped back up to the write_out_data label.  If there weren't
 745 @@ -489,6 +489,9 @@ void journal_commit_transaction(journal_t *journal)
 746          */
 747         commit_transaction->t_state = T_COMMIT;
 748
 749 +       J_ASSERT(commit_transaction->t_nr_buffers <=
 750 +               commit_transaction->t_outstanding_credits);
 751 +
 752         descriptor = NULL;
 753         bufs = 0;
 754         while (commit_transaction->t_buffers) {
 755 @@ -498,9 +501,10 @@ void journal_commit_transaction(journal_t *journal)
 756                 jh = commit_transaction->t_buffers;
 757
 758                 /* If we're in abort mode, we just un-journal the buffer and
 759 -                  release it for background writing. */
 760 +                  release it */
 761
 762                 if (is_journal_aborted(journal)) {
 763 +                       clear_buffer_jbddirty(jh2bh(jh));
 764                         JBUFFER_TRACE(jh, "journal is aborting: refile");
 765                         journal_refile_buffer(journal, jh);
 766                         /* If that was the last one, we need to clean up
 767 @@ -524,7 +528,7 @@ void journal_commit_transaction(journal_t *journal)
 768
 769                         descriptor = journal_get_descriptor_buffer(journal);
 770                         if (!descriptor) {
 771 -                               __journal_abort_hard(journal);
 772 +                               journal_abort(journal, -EIO);
 773                                 continue;
 774                         }
 775
 776 @@ -557,7 +561,7 @@ void journal_commit_transaction(journal_t *journal)
 777                    and repeat this loop: we'll fall into the
 778                    refile-on-abort condition above. */
 779                 if (err) {
 780 -                       __journal_abort_hard(journal);
 781 +                       journal_abort(journal, err);
 782                         continue;
 783                 }
 784
 785 @@ -742,13 +746,16 @@ wait_for_iobuf:
 786                 /* AKPM: bforget here */
 787         }
 788
 789 +       if (err)
 790 +               journal_abort(journal, err);
 791 +
 792         jbd_debug(3, "JBD: commit phase 6\n");
 793
 794         if (journal_write_commit_record(journal, commit_transaction))
 795                 err = -EIO;
 796
 797         if (err)
 798 -               __journal_abort_hard(journal);
 799 +               journal_abort(journal, err);
 800
 801         /* End of a transaction!  Finally, we can do checkpoint
 802             processing: any buffers committed as a result of this
 803 @@ -832,6 +839,8 @@ restart_loop:
 804                 if (buffer_jbddirty(bh)) {
 805                         JBUFFER_TRACE(jh, "add to new checkpointing trans");
 806                         __journal_insert_checkpoint(jh, commit_transaction);
 807 +                       if (is_journal_aborted(journal))
 808 +                               clear_buffer_jbddirty(bh);
 809                         JBUFFER_TRACE(jh, "refile for checkpoint writeback");
 810                         __journal_refile_buffer(jh);
 811                         jbd_unlock_bh_state(bh);
 812 @@ -858,10 +867,10 @@ restart_loop:
 813         }
 814         spin_unlock(&journal->j_list_lock);
 815         /*
 816 -        * This is a bit sleazy.  We borrow j_list_lock to protect
 817 -        * journal->j_committing_transaction in __journal_remove_checkpoint.
 818 -        * Really, __journal_remove_checkpoint should be using j_state_lock but
 819 -        * it's a bit hassle to hold that across __journal_remove_checkpoint
 820 +        * This is a bit sleazy.  We use j_list_lock to protect transition
 821 +        * of a transaction into T_FINISHED state and calling
 822 +        * __journal_drop_transaction(). Otherwise we could race with
 823 +        * other checkpointing code processing the transaction...
 824          */
 825         spin_lock(&journal->j_state_lock);
 826         spin_lock(&journal->j_list_lock);
 827 diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
 828 index 46fe743..8e937fc 100644
 829 --- a/fs/jbd/journal.c
 830 +++ b/fs/jbd/journal.c
 831 @@ -1128,9 +1128,12 @@ recovery_error:
 832   *
 833   * Release a journal_t structure once it is no longer in use by the
 834   * journaled object.
 835 + * Return <0 if we couldn't clean up the journal.
 836   */
 837 -void journal_destroy(journal_t *journal)
 838 +int journal_destroy(journal_t *journal)
 839  {
 840 +       int err = 0;
 841 +
 842         /* Wait for the commit thread to wake up and die. */
 843         journal_kill_thread(journal);
 844
 845 @@ -1153,11 +1156,16 @@ void journal_destroy(journal_t *journal)
 846         J_ASSERT(journal->j_checkpoint_transactions == NULL);
 847         spin_unlock(&journal->j_list_lock);
 848
 849 -       /* We can now mark the journal as empty. */
 850 -       journal->j_tail = 0;
 851 -       journal->j_tail_sequence = ++journal->j_transaction_sequence;
 852         if (journal->j_sb_buffer) {
 853 -               journal_update_superblock(journal, 1);
 854 +               if (!is_journal_aborted(journal)) {
 855 +                       /* We can now mark the journal as empty. */
 856 +                       journal->j_tail = 0;
 857 +                       journal->j_tail_sequence =
 858 +                               ++journal->j_transaction_sequence;
 859 +                       journal_update_superblock(journal, 1);
 860 +               } else {
 861 +                       err = -EIO;
 862 +               }
 863                 brelse(journal->j_sb_buffer);
 864         }
 865
 866 @@ -1167,6 +1175,8 @@ void journal_destroy(journal_t *journal)
 867                 journal_destroy_revoke(journal);
 868         kfree(journal->j_wbuf);
 869         kfree(journal);
 870 +
 871 +       return err;
 872  }
 873
 874
 875 @@ -1366,10 +1376,16 @@ int journal_flush(journal_t *journal)
 876         spin_lock(&journal->j_list_lock);
 877         while (!err && journal->j_checkpoint_transactions != NULL) {
 878                 spin_unlock(&journal->j_list_lock);
 879 +               mutex_lock(&journal->j_checkpoint_mutex);
 880                 err = log_do_checkpoint(journal);
 881 +               mutex_unlock(&journal->j_checkpoint_mutex);
 882                 spin_lock(&journal->j_list_lock);
 883         }
 884         spin_unlock(&journal->j_list_lock);
 885 +
 886 +       if (is_journal_aborted(journal))
 887 +               return -EIO;
 888 +
 889         cleanup_journal_tail(journal);
 890
 891         /* Finally, mark the journal as really needing no recovery.
 892 @@ -1391,7 +1407,7 @@ int journal_flush(journal_t *journal)
 893         J_ASSERT(journal->j_head == journal->j_tail);
 894         J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
 895         spin_unlock(&journal->j_state_lock);
 896 -       return err;
 897 +       return 0;
 898  }
 899
 900  /**
 901 diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
 902 index 2a5f4b8..66ae0e5 100644
 903 --- a/fs/jbd/recovery.c
 904 +++ b/fs/jbd/recovery.c
 905 @@ -223,7 +223,7 @@ do {                                                                        \
 906   */
 907  int journal_recover(journal_t *journal)
 908  {
 909 -       int                     err;
 910 +       int                     err, err2;
 911         journal_superblock_t *  sb;
 912
 913         struct recovery_info    info;
 914 @@ -261,7 +261,10 @@ int journal_recover(journal_t *journal)
 915         journal->j_transaction_sequence = ++info.end_transaction;
 916
 917         journal_clear_revoke(journal);
 918 -       sync_blockdev(journal->j_fs_dev);
 919 +       err2 = sync_blockdev(journal->j_fs_dev);
 920 +       if (!err)
 921 +               err = err2;
 922 +
 923         return err;
 924  }
 925
 926 @@ -478,7 +481,7 @@ static int do_one_pass(journal_t *journal,
 927                                         memcpy(nbh->b_data, obh->b_data,
 928                                                         journal->j_blocksize);
 929                                         if (flags & JFS_FLAG_ESCAPE) {
 930 -                                               *((__be32 *)bh->b_data) =
 931 +                                               *((__be32 *)nbh->b_data) =
 932                                                 cpu_to_be32(JFS_MAGIC_NUMBER);
 933                                         }
 934
 935 diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
 936 index 772b653..1b9a804 100644
 937 --- a/fs/jbd/transaction.c
 938 +++ b/fs/jbd/transaction.c
 939 @@ -600,6 +600,13 @@ repeat:
 940             jh->b_next_transaction == transaction)
 941                 goto done;
 942
 943 +        /*
 944 +        * this is the first time this transaction is touching this buffer,
 945 +        * reset the modified flag
 946 +        */
 947 +       jh->b_modified = 0;
 948 +
 949 +
 950         /*
 951          * If there is already a copy-out version of this buffer, then we don't
 952          * need to make another one
 953 @@ -812,9 +819,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 954
 955         if (jh->b_transaction == NULL) {
 956                 jh->b_transaction = transaction;
 957 +
 958 +               /* first access by this transaction */
 959 +               jh->b_modified = 0;
 960 +
 961                 JBUFFER_TRACE(jh, "file as BJ_Reserved");
 962                 __journal_file_buffer(jh, transaction, BJ_Reserved);
 963         } else if (jh->b_transaction == journal->j_committing_transaction) {
 964 +               /* first access by this transaction */
 965 +               jh->b_modified = 0;
 966                 JBUFFER_TRACE(jh, "set next transaction");
 967                 jh->b_next_transaction = transaction;
 968         }
 969 @@ -1213,6 +1226,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 970         struct journal_head *jh;
 971         int drop_reserve = 0;
 972         int err = 0;
 973 +       int was_modified = 0;
 974
 975         BUFFER_TRACE(bh, "entry");
 976
 977 @@ -1231,6 +1245,9 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 978                 goto not_jbd;
 979         }
 980
 981 +       /* keep track of wether or not this transaction modified us */
 982 +       was_modified = jh->b_modified;
 983 +
 984         /*
 985          * The buffer's going from the transaction, we must drop
 986          * all references -bzzz
 987 @@ -1248,7 +1265,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 988
 989                 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
 990
 991 -               drop_reserve = 1;
 992 +               /*
 993 +                * we only want to drop a reference if this transaction
 994 +                * modified the buffer
 995 +                */
 996 +               if (was_modified)
 997 +                       drop_reserve = 1;
 998
 999                 /*
1000                  * We are no longer going to journal this buffer.
1001 @@ -1288,7 +1310,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1002                 if (jh->b_next_transaction) {
1003                         J_ASSERT(jh->b_next_transaction == transaction);
1004                         jh->b_next_transaction = NULL;
1005 -                       drop_reserve = 1;
1006 +                       /*
1007 +                        * only drop a reference if this transaction modified
1008 +                        * the buffer
1009 +                        */
1010 +                       if (was_modified)
1011 +                               drop_reserve = 1;
1012                 }
1013         }
1014
1015 @@ -2058,7 +2085,7 @@ void __journal_refile_buffer(struct journal_head *jh)
1016         jh->b_transaction = jh->b_next_transaction;
1017         jh->b_next_transaction = NULL;
1018         __journal_file_buffer(jh, jh->b_transaction,
1019 -                               was_dirty ? BJ_Metadata : BJ_Reserved);
1020 +                               jh->b_modified ? BJ_Metadata : BJ_Reserved);
1021         J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
1022
1023         if (was_dirty)
1024 diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
1025 index ff56e39..8c51469 100644
1026 --- a/include/linux/ext3_fs.h
1027 +++ b/include/linux/ext3_fs.h
1028 @@ -827,6 +827,7 @@ extern void ext3_discard_reservation (struct inode *);
1029  extern void ext3_dirty_inode(struct inode *);
1030  extern int ext3_change_inode_journal_flag(struct inode *, int);
1031  extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
1032 +extern int ext3_can_truncate(struct inode *inode);
1033  extern void ext3_truncate (struct inode *);
1034  extern void ext3_set_inode_flags(struct inode *);
1035  extern void ext3_get_inode_flags(struct ext3_inode_info *);
1036 diff --git a/include/linux/jbd.h b/include/linux/jbd.h
1037 index 4527375..6bc0e4f 100644
1038 --- a/include/linux/jbd.h
1039 +++ b/include/linux/jbd.h
1040 @@ -446,6 +446,8 @@ struct transaction_s
1041         /*
1042          * Transaction's current state
1043          * [no locking - only kjournald alters this]
1044 +        * [j_list_lock] guards transition of a transaction into T_FINISHED
1045 +        * state and subsequent call of __journal_drop_transaction()
1046          * FIXME: needs barriers
1047          * KLUDGE: [use j_state_lock]
1048          */
1049 @@ -924,7 +926,7 @@ extern int     journal_set_features
1050                    (journal_t *, unsigned long, unsigned long, unsigned long);
1051  extern int        journal_create     (journal_t *);
1052  extern int        journal_load       (journal_t *journal);
1053 -extern void       journal_destroy    (journal_t *);
1054 +extern int        journal_destroy    (journal_t *);
1055  extern int        journal_recover    (journal_t *journal);
1056  extern int        journal_wipe       (journal_t *, int);
1057  extern int        journal_skip_recovery        (journal_t *);