Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / fs / ext3 / inode.c
index da40fc7..23d956f 100644 (file)
 #include "xattr.h"
 #include "acl.h"
 
+static int ext3_writepage_trans_blocks(struct inode *inode);
+
 /*
  * Test whether an inode is a fast symlink.
  */
-static inline int ext3_inode_is_fast_symlink(struct inode *inode)
+static int ext3_inode_is_fast_symlink(struct inode *inode)
 {
        int ea_blocks = EXT3_I(inode)->i_file_acl ?
                (inode->i_sb->s_blocksize >> 9) : 0;
 
-       return (S_ISLNK(inode->i_mode) &&
-               inode->i_blocks - ea_blocks == 0);
+       return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 }
 
-/* The ext3 forget function must perform a revoke if we are freeing data
+/*
+ * The ext3 forget function must perform a revoke if we are freeing data
  * which has been journaled.  Metadata (eg. indirect blocks) must be
  * revoked in all cases. 
  *
@@ -60,13 +62,13 @@ static inline int ext3_inode_is_fast_symlink(struct inode *inode)
  * but there may still be a record of it in the journal, and that record
  * still needs to be revoked.
  */
-
-int ext3_forget(handle_t *handle, int is_metadata,
-                      struct inode *inode, struct buffer_head *bh,
-                      int blocknr)
+int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
+                       struct buffer_head *bh, ext3_fsblk_t blocknr)
 {
        int err;
 
+       might_sleep();
+
        BUFFER_TRACE(bh, "enter");
 
        jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -83,7 +85,7 @@ int ext3_forget(handle_t *handle, int is_metadata,
            (!is_metadata && !ext3_should_journal_data(inode))) {
                if (bh) {
                        BUFFER_TRACE(bh, "call journal_forget");
-                       ext3_journal_forget(handle, bh);
+                       return ext3_journal_forget(handle, bh);
                }
                return 0;
        }
@@ -101,10 +103,9 @@ int ext3_forget(handle_t *handle, int is_metadata,
 }
 
 /*
- * Work out how many blocks we need to progress with the next chunk of a
+ * Work out how many blocks we need to proceed with the next chunk of a
  * truncate transaction.
  */
-
 static unsigned long blocks_for_truncate(struct inode *inode) 
 {
        unsigned long needed;
@@ -125,7 +126,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
        if (needed > EXT3_MAX_TRANS_DATA) 
                needed = EXT3_MAX_TRANS_DATA;
 
-       return EXT3_DATA_TRANS_BLOCKS + needed;
+       return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
 }
 
 /* 
@@ -138,7 +139,6 @@ static unsigned long blocks_for_truncate(struct inode *inode)
  * extend fails, we need to propagate the failure up and restart the
  * transaction in the top-level truncate loop. --sct 
  */
-
 static handle_t *start_transaction(struct inode *inode) 
 {
        handle_t *result;
@@ -177,21 +177,6 @@ static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
        return ext3_journal_restart(handle, blocks_for_truncate(inode));
 }
 
-/*
- * Called at each iput()
- *
- * The inode may be "bad" if ext3_read_inode() saw an error from
- * ext3_get_inode(), so we need to check that to avoid freeing random disk
- * blocks.
- */
-void ext3_put_inode(struct inode *inode)
-{
-       if (!is_bad_inode(inode))
-               ext3_discard_prealloc(inode);
-}
-
-static void ext3_truncate_nocheck (struct inode *inode);
-
 /*
  * Called at the last iput() if i_nlink is zero.
  */
@@ -199,14 +184,18 @@ void ext3_delete_inode (struct inode * inode)
 {
        handle_t *handle;
 
+       truncate_inode_pages(&inode->i_data, 0);
+
        if (is_bad_inode(inode))
                goto no_delete;
 
        handle = start_transaction(inode);
        if (IS_ERR(handle)) {
-               /* If we're going to skip the normal cleanup, we still
-                * need to make sure that the in-core orphan linked list
-                * is properly cleaned up. */
+               /*
+                * If we're going to skip the normal cleanup, we still need to
+                * make sure that the in-core orphan linked list is properly
+                * cleaned up.
+                */
                ext3_orphan_del(NULL, inode);
                goto no_delete;
        }
@@ -215,7 +204,7 @@ void ext3_delete_inode (struct inode * inode)
                handle->h_sync = 1;
        inode->i_size = 0;
        if (inode->i_blocks)
-               ext3_truncate_nocheck(inode);
+               ext3_truncate(inode);
        /*
         * Kill off the orphan record which ext3_truncate created.
         * AKPM: I think this can be inside the above `if'.
@@ -245,79 +234,19 @@ no_delete:
        clear_inode(inode);     /* We must guarantee clearing of inode... */
 }
 
-void ext3_discard_prealloc (struct inode * inode)
-{
-#ifdef EXT3_PREALLOCATE
-       struct ext3_inode_info *ei = EXT3_I(inode);
-       /* Writer: ->i_prealloc* */
-       if (ei->i_prealloc_count) {
-               unsigned short total = ei->i_prealloc_count;
-               unsigned long block = ei->i_prealloc_block;
-               ei->i_prealloc_count = 0;
-               ei->i_prealloc_block = 0;
-               /* Writer: end */
-               ext3_free_blocks (inode, block, total);
-       }
-#endif
-}
-
-static int ext3_alloc_block (handle_t *handle,
-                       struct inode * inode, unsigned long goal, int *err)
-{
-       unsigned long result;
-
-#ifdef EXT3_PREALLOCATE
-#ifdef EXT3FS_DEBUG
-       static unsigned long alloc_hits, alloc_attempts;
-#endif
-       struct ext3_inode_info *ei = EXT3_I(inode);
-       /* Writer: ->i_prealloc* */
-       if (ei->i_prealloc_count &&
-           (goal == ei->i_prealloc_block ||
-            goal + 1 == ei->i_prealloc_block))
-       {
-               result = ei->i_prealloc_block++;
-               ei->i_prealloc_count--;
-               /* Writer: end */
-               ext3_debug ("preallocation hit (%lu/%lu).\n",
-                           ++alloc_hits, ++alloc_attempts);
-       } else {
-               ext3_discard_prealloc (inode);
-               ext3_debug ("preallocation miss (%lu/%lu).\n",
-                           alloc_hits, ++alloc_attempts);
-               if (S_ISREG(inode->i_mode))
-                       result = ext3_new_block (inode, goal, 
-                                &ei->i_prealloc_count,
-                                &ei->i_prealloc_block, err);
-               else
-                       result = ext3_new_block(inode, goal, NULL, NULL, err);
-               /*
-                * AKPM: this is somewhat sticky.  I'm not surprised it was
-                * disabled in 2.2's ext3.  Need to integrate b_committed_data
-                * guarding with preallocation, if indeed preallocation is
-                * effective.
-                */
-       }
-#else
-       result = ext3_new_block(handle, inode, goal, NULL, NULL, err);
-#endif
-       return result;
-}
-
-
 typedef struct {
-       u32     *p;
-       u32     key;
+       __le32  *p;
+       __le32  key;
        struct buffer_head *bh;
 } Indirect;
 
-static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
+static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
 {
        p->key = *(p->p = v);
        p->bh = bh;
 }
 
-static inline int verify_chain(Indirect *from, Indirect *to)
+static int verify_chain(Indirect *from, Indirect *to)
 {
        while (from <= to && from->key == *from->p)
                from++;
@@ -387,10 +316,10 @@ static int ext3_block_to_path(struct inode *inode,
                offsets[n++] = i_block & (ptrs - 1);
                final = ptrs;
        } else {
-               ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big");
+               ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
        }
        if (boundary)
-               *boundary = (i_block & (ptrs - 1)) == (final - 1);
+               *boundary = final - 1 - (i_block & (ptrs - 1));
        return n;
 }
 
@@ -442,7 +371,7 @@ static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
                /* Reader: pointers */
                if (!verify_chain(chain, p))
                        goto changed;
-               add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
+               add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
                /* Reader: end */
                if (!p->key)
                        goto no_block;
@@ -479,30 +408,29 @@ no_block:
  *
  *     Caller must make sure that @ind is valid and will stay that way.
  */
-
-static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
+static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
 {
        struct ext3_inode_info *ei = EXT3_I(inode);
-       u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data;
-       u32 *p;
-       unsigned long bg_start;
-       unsigned long colour;
+       __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
+       __le32 *p;
+       ext3_fsblk_t bg_start;
+       ext3_grpblk_t colour;
 
        /* Try to find previous block */
-       for (p = ind->p - 1; p >= start; p--)
+       for (p = ind->p - 1; p >= start; p--) {
                if (*p)
                        return le32_to_cpu(*p);
+       }
 
        /* No such thing, so let's try location of indirect block */
        if (ind->bh)
                return ind->bh->b_blocknr;
 
        /*
-        * It is going to be refered from inode itself? OK, just put it into
-        * the same cylinder group then.
+        * It is going to be referred to from the inode itself? OK, just put it
+        * into the same cylinder group then.
         */
-       bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
-               le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
+       bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group);
        colour = (current->pid % 16) *
                        (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
        return bg_start + colour;
@@ -517,51 +445,143 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
  *     @goal:  place to store the result.
  *
  *     Normally this function find the prefered place for block allocation,
- *     stores it in *@goal and returns zero. If the branch had been changed
- *     under us we return -EAGAIN.
+ *     stores it in *@goal and returns zero.
  */
 
-static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
-                         Indirect *partial, unsigned long *goal)
+static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
+               Indirect chain[4], Indirect *partial)
 {
-       struct ext3_inode_info *ei = EXT3_I(inode);
-       /* Writer: ->i_next_alloc* */
-       if (block == ei->i_next_alloc_block + 1) {
-               ei->i_next_alloc_block++;
-               ei->i_next_alloc_goal++;
+       struct ext3_block_alloc_info *block_i;
+
+       block_i =  EXT3_I(inode)->i_block_alloc_info;
+
+       /*
+        * try the heuristic for sequential allocation,
+        * failing that at least try to get decent locality.
+        */
+       if (block_i && (block == block_i->last_alloc_logical_block + 1)
+               && (block_i->last_alloc_physical_block != 0)) {
+               return block_i->last_alloc_physical_block + 1;
        }
-       /* Writer: end */
-       /* Reader: pointers, ->i_next_alloc* */
-       if (verify_chain(chain, partial)) {
-               /*
-                * try the heuristic for sequential allocation,
-                * failing that at least try to get decent locality.
-                */
-               if (block == ei->i_next_alloc_block)
-                       *goal = ei->i_next_alloc_goal;
-               if (!*goal)
-                       *goal = ext3_find_near(inode, partial);
-               return 0;
+
+       return ext3_find_near(inode, partial);
+}
+
+/**
+ *     ext3_blks_to_allocate: Look up the block map and count the number
+ *     of direct blocks need to be allocated for the given branch.
+ *
+ *     @branch: chain of indirect blocks
+ *     @k: number of blocks need for indirect blocks
+ *     @blks: number of data blocks to be mapped.
+ *     @blocks_to_boundary:  the offset in the indirect block
+ *
+ *     return the total number of blocks to be allocate, including the
+ *     direct and indirect blocks.
+ */
+static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
+               int blocks_to_boundary)
+{
+       unsigned long count = 0;
+
+       /*
+        * Simple case, [t,d]Indirect block(s) has not allocated yet
+        * then it's clear blocks on that path have not allocated
+        */
+       if (k > 0) {
+               /* right now we don't handle cross boundary allocation */
+               if (blks < blocks_to_boundary + 1)
+                       count += blks;
+               else
+                       count += blocks_to_boundary + 1;
+               return count;
+       }
+
+       count++;
+       while (count < blks && count <= blocks_to_boundary &&
+               le32_to_cpu(*(branch[0].p + count)) == 0) {
+               count++;
+       }
+       return count;
+}
+
+/**
+ *     ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ *     @indirect_blks: the number of blocks need to allocate for indirect
+ *                     blocks
+ *
+ *     @new_blocks: on return it will store the new block numbers for
+ *     the indirect blocks(if needed) and the first direct block,
+ *     @blks:  on return it will store the total number of allocated
+ *             direct blocks
+ */
+static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
+                       ext3_fsblk_t goal, int indirect_blks, int blks,
+                       ext3_fsblk_t new_blocks[4], int *err)
+{
+       int target, i;
+       unsigned long count = 0;
+       int index = 0;
+       ext3_fsblk_t current_block = 0;
+       int ret = 0;
+
+       /*
+        * Here we try to allocate the requested multiple blocks at once,
+        * on a best-effort basis.
+        * To build a branch, we should allocate blocks for
+        * the indirect blocks(if not allocated yet), and at least
+        * the first direct block of this branch.  That's the
+        * minimum number of blocks need to allocate(required)
+        */
+       target = blks + indirect_blks;
+
+       while (1) {
+               count = target;
+               /* allocating blocks for indirect blocks and direct blocks */
+               current_block = ext3_new_blocks(handle,inode,goal,&count,err);
+               if (*err)
+                       goto failed_out;
+
+               target -= count;
+               /* allocate blocks for indirect blocks */
+               while (index < indirect_blks && count) {
+                       new_blocks[index++] = current_block++;
+                       count--;
+               }
+
+               if (count > 0)
+                       break;
        }
-       /* Reader: end */
-       return -EAGAIN;
+
+       /* save the new block number for the first direct block */
+       new_blocks[index] = current_block;
+
+       /* total number of blocks allocated for direct blocks */
+       ret = count;
+       *err = 0;
+       return ret;
+failed_out:
+       for (i = 0; i <index; i++)
+               ext3_free_blocks(handle, inode, new_blocks[i], 1);
+       return ret;
 }
 
 /**
  *     ext3_alloc_branch - allocate and set up a chain of blocks.
  *     @inode: owner
- *     @num: depth of the chain (number of blocks to allocate)
+ *     @indirect_blks: number of allocated indirect blocks
+ *     @blks: number of allocated direct blocks
  *     @offsets: offsets (in the blocks) to store the pointers to next.
  *     @branch: place to store the chain in.
  *
- *     This function allocates @num blocks, zeroes out all but the last one,
+ *     This function allocates blocks, zeroes out all but the last one,
  *     links them into chain and (if we are synchronous) writes them to disk.
  *     In other words, it prepares a branch that can be spliced onto the
  *     inode. It stores the information about that chain in the branch[], in
  *     the same format as ext3_get_branch() would do. We are calling it after
  *     we had read the existing part of chain and partial points to the last
  *     triple of that (one with zero ->key). Upon the exit we have the same
- *     picture as after the successful ext3_get_block(), excpet that in one
+ *     picture as after the successful ext3_get_block(), except that in one
  *     place chain is disconnected - *branch->p is still zero (we did not
  *     set the last link), but branch->key contains the number that should
  *     be placed into *branch->p to fill that gap.
@@ -571,98 +591,106 @@ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
  *     ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
  *     as described above and return 0.
  */
-
 static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
-                            int num,
-                            unsigned long goal,
-                            int *offsets,
-                            Indirect *branch)
+                       int indirect_blks, int *blks, ext3_fsblk_t goal,
+                       int *offsets, Indirect *branch)
 {
        int blocksize = inode->i_sb->s_blocksize;
-       int n = 0, keys = 0;
+       int i, n = 0;
        int err = 0;
-       int i;
-       int parent = ext3_alloc_block(handle, inode, goal, &err);
-
-       branch[0].key = cpu_to_le32(parent);
-       if (parent) {
-               for (n = 1; n < num; n++) {
-                       struct buffer_head *bh;
-                       /* Allocate the next block */
-                       int nr = ext3_alloc_block(handle, inode, parent, &err);
-                       if (!nr)
-                               break;
-                       branch[n].key = cpu_to_le32(nr);
-                       keys = n+1;
+       struct buffer_head *bh;
+       int num;
+       ext3_fsblk_t new_blocks[4];
+       ext3_fsblk_t current_block;
 
-                       /*
-                        * Get buffer_head for parent block, zero it out
-                        * and set the pointer to new one, then send
-                        * parent to disk.  
-                        */
-                       bh = sb_getblk(inode->i_sb, parent);
-                       branch[n].bh = bh;
-                       lock_buffer(bh);
-                       BUFFER_TRACE(bh, "call get_create_access");
-                       err = ext3_journal_get_create_access(handle, bh);
-                       if (err) {
-                               unlock_buffer(bh);
-                               brelse(bh);
-                               break;
-                       }
+       num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
+                               *blks, new_blocks, &err);
+       if (err)
+               return err;
 
-                       memset(bh->b_data, 0, blocksize);
-                       branch[n].p = (u32*) bh->b_data + offsets[n];
-                       *branch[n].p = branch[n].key;
-                       BUFFER_TRACE(bh, "marking uptodate");
-                       set_buffer_uptodate(bh);
+       branch[0].key = cpu_to_le32(new_blocks[0]);
+       /*
+        * metadata blocks and data blocks are allocated.
+        */
+       for (n = 1; n <= indirect_blks;  n++) {
+               /*
+                * Get buffer_head for parent block, zero it out
+                * and set the pointer to new one, then send
+                * parent to disk.
+                */
+               bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+               branch[n].bh = bh;
+               lock_buffer(bh);
+               BUFFER_TRACE(bh, "call get_create_access");
+               err = ext3_journal_get_create_access(handle, bh);
+               if (err) {
                        unlock_buffer(bh);
+                       brelse(bh);
+                       goto failed;
+               }
 
-                       BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-                       err = ext3_journal_dirty_metadata(handle, bh);
-                       if (err)
-                               break;
-
-                       parent = nr;
+               memset(bh->b_data, 0, blocksize);
+               branch[n].p = (__le32 *) bh->b_data + offsets[n];
+               branch[n].key = cpu_to_le32(new_blocks[n]);
+               *branch[n].p = branch[n].key;
+               if ( n == indirect_blks) {
+                       current_block = new_blocks[n];
+                       /*
+                        * End of chain, update the last new metablock of
+                        * the chain to point to the new allocated
+                        * data blocks numbers
+                        */
+                       for (i=1; i < num; i++)
+                               *(branch[n].p + i) = cpu_to_le32(++current_block);
                }
-       }
-       if (n == num)
-               return 0;
+               BUFFER_TRACE(bh, "marking uptodate");
+               set_buffer_uptodate(bh);
+               unlock_buffer(bh);
 
+               BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+               err = ext3_journal_dirty_metadata(handle, bh);
+               if (err)
+                       goto failed;
+       }
+       *blks = num;
+       return err;
+failed:
        /* Allocation failed, free what we already allocated */
-       for (i = 1; i < keys; i++) {
+       for (i = 1; i <= n ; i++) {
                BUFFER_TRACE(branch[i].bh, "call journal_forget");
                ext3_journal_forget(handle, branch[i].bh);
        }
-       for (i = 0; i < keys; i++)
-               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
+       for (i = 0; i <indirect_blks; i++)
+               ext3_free_blocks(handle, inode, new_blocks[i], 1);
+
+       ext3_free_blocks(handle, inode, new_blocks[i], num);
+
        return err;
 }
 
 /**
- *     ext3_splice_branch - splice the allocated branch onto inode.
- *     @inode: owner
- *     @block: (logical) number of block we are adding
- *     @chain: chain of indirect blocks (with a missing link - see
- *             ext3_alloc_branch)
- *     @where: location of missing link
- *     @num:   number of blocks we are adding
- *
- *     This function verifies that chain (up to the missing link) had not
- *     changed, fills the missing link and does all housekeeping needed in
- *     inode (->i_blocks, etc.). In case of success we end up with the full
- *     chain to new block and return 0. Otherwise (== chain had been changed)
- *     we free the new blocks (forgetting their buffer_heads, indeed) and
- *     return -EAGAIN.
+ * ext3_splice_branch - splice the allocated branch onto inode.
+ * @inode: owner
+ * @block: (logical) number of block we are adding
+ * @chain: chain of indirect blocks (with a missing link - see
+ *     ext3_alloc_branch)
+ * @where: location of missing link
+ * @num:   number of indirect blocks we are adding
+ * @blks:  number of direct blocks we are adding
+ *
+ * This function fills the missing link and does all housekeeping needed in
+ * inode (->i_blocks, etc.). In case of success we end up with the full
+ * chain to new block and return 0.
  */
-
-static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
-                             Indirect chain[4], Indirect *where, int num)
+static int ext3_splice_branch(handle_t *handle, struct inode *inode,
+                       long block, Indirect *where, int num, int blks)
 {
        int i;
        int err = 0;
-       struct ext3_inode_info *ei = EXT3_I(inode);
+       struct ext3_block_alloc_info *block_i;
+       ext3_fsblk_t current_block;
 
+       block_i = EXT3_I(inode)->i_block_alloc_info;
        /*
         * If we're splicing into a [td]indirect block (as opposed to the
         * inode) then we need to get write access to the [td]indirect block
@@ -674,29 +702,40 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
                if (err)
                        goto err_out;
        }
-       /* Verify that place we are splicing to is still there and vacant */
-
-       /* Writer: pointers, ->i_next_alloc* */
-       if (!verify_chain(chain, where-1) || *where->p)
-               /* Writer: end */
-               goto changed;
-
        /* That's it */
 
        *where->p = where->key;
-       ei->i_next_alloc_block = block;
-       ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key);
-       /* Writer: end */
+
+       /*
+        * Update the host buffer_head or inode to point to more just allocated
+        * direct blocks blocks
+        */
+       if (num == 0 && blks > 1) {
+               current_block = le32_to_cpu(where->key) + 1;
+               for (i = 1; i < blks; i++)
+                       *(where->p + i ) = cpu_to_le32(current_block++);
+       }
+
+       /*
+        * update the most recently allocated logical & physical block
+        * in i_block_alloc_info, to assist find the proper goal block for next
+        * allocation
+        */
+       if (block_i) {
+               block_i->last_alloc_logical_block = block + blks - 1;
+               block_i->last_alloc_physical_block =
+                               le32_to_cpu(where[num].key) + blks - 1;
+       }
 
        /* We are done with atomic stuff, now do the rest of housekeeping */
 
-       inode->i_ctime = CURRENT_TIME;
+       inode->i_ctime = CURRENT_TIME_SEC;
        ext3_mark_inode_dirty(handle, inode);
 
        /* had we spliced it onto indirect block? */
        if (where->bh) {
                /*
-                * akpm: If we spliced it onto an indirect block, we haven't
+                * If we spliced it onto an indirect block, we haven't
                 * altered the inode.  Note however that if it is being spliced
                 * onto an indirect block at the very end of the file (the
                 * file is growing) then we *will* alter the inode to reflect
@@ -717,26 +756,14 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
        }
        return err;
 
-changed:
-       /*
-        * AKPM: if where[i].bh isn't part of the current updating
-        * transaction then we explode nastily.  Test this code path.
-        */
-       jbd_debug(1, "the chain changed: try again\n");
-       err = -EAGAIN;
-
 err_out:
-       for (i = 1; i < num; i++) {
+       for (i = 1; i <= num; i++) {
                BUFFER_TRACE(where[i].bh, "call journal_forget");
                ext3_journal_forget(handle, where[i].bh);
+               ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
        }
-       /* For the normal collision cleanup case, we free up the blocks.
-        * On genuine filesystem errors we don't even think about doing
-        * that. */
-       if (err == -EAGAIN)
-               for (i = 0; i < num; i++)
-                       ext3_free_blocks(handle, inode, 
-                                        le32_to_cpu(where[i].key), 1);
+       ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
+
        return err;
 }
 
@@ -752,141 +779,184 @@ err_out:
  * allocations is needed - we simply release blocks and do not touch anything
  * reachable from inode.
  *
- * akpm: `handle' can be NULL if create == 0.
+ * `handle' can be NULL if create == 0.
  *
  * The BKL may not be held on entry here.  Be sure to take it early.
+ * return > 0, # of blocks mapped or allocated.
+ * return = 0, if plain lookup failed.
+ * return < 0, error case.
  */
-
-static int
-ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
-               struct buffer_head *bh_result, int create, int extend_disksize)
+int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+               sector_t iblock, unsigned long maxblocks,
+               struct buffer_head *bh_result,
+               int create, int extend_disksize)
 {
        int err = -EIO;
        int offsets[4];
        Indirect chain[4];
        Indirect *partial;
-       unsigned long goal;
-       int left;
-       int boundary = 0;
-       int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
+       ext3_fsblk_t goal;
+       int indirect_blks;
+       int blocks_to_boundary = 0;
+       int depth;
        struct ext3_inode_info *ei = EXT3_I(inode);
+       int count = 0;
+       ext3_fsblk_t first_block = 0;
+
 
        J_ASSERT(handle != NULL || create == 0);
+       depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
        if (depth == 0)
                goto out;
 
-reread:
        partial = ext3_get_branch(inode, depth, offsets, chain, &err);
 
        /* Simplest case - block found, no allocation needed */
        if (!partial) {
+               first_block = le32_to_cpu(chain[depth - 1].key);
                clear_buffer_new(bh_result);
-got_it:
-               map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
-               if (boundary)
-                       set_buffer_boundary(bh_result);
-               /* Clean up and exit */
-               partial = chain+depth-1; /* the whole chain */
-               goto cleanup;
+               count++;
+               /*map more blocks*/
+               while (count < maxblocks && count <= blocks_to_boundary) {
+                       ext3_fsblk_t blk;
+
+                       if (!verify_chain(chain, partial)) {
+                               /*
+                                * Indirect block might be removed by
+                                * truncate while we were reading it.
+                                * Handling of that case: forget what we've
+                                * got now. Flag the err as EAGAIN, so it
+                                * will reread.
+                                */
+                               err = -EAGAIN;
+                               count = 0;
+                               break;
+                       }
+                       blk = le32_to_cpu(*(chain[depth-1].p + count));
+
+                       if (blk == first_block + count)
+                               count++;
+                       else
+                               break;
+               }
+               if (err != -EAGAIN)
+                       goto got_it;
        }
 
        /* Next simple case - plain lookup or failed read of indirect block */
-       if (!create || err == -EIO) {
-cleanup:
+       if (!create || err == -EIO)
+               goto cleanup;
+
+       mutex_lock(&ei->truncate_mutex);
+
+       /*
+        * If the indirect block is missing while we are reading
+        * the chain(ext3_get_branch() returns -EAGAIN err), or
+        * if the chain has been changed after we grab the semaphore,
+        * (either because another process truncated this branch, or
+        * another get_block allocated this branch) re-grab the chain to see if
+        * the request block has been allocated or not.
+        *
+        * Since we already block the truncate/other get_block
+        * at this point, we will have the current copy of the chain when we
+        * splice the branch into the tree.
+        */
+       if (err == -EAGAIN || !verify_chain(chain, partial)) {
                while (partial > chain) {
-                       BUFFER_TRACE(partial->bh, "call brelse");
                        brelse(partial->bh);
                        partial--;
                }
-               BUFFER_TRACE(bh_result, "returned");
-out:
-               return err;
+               partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+               if (!partial) {
+                       count++;
+                       mutex_unlock(&ei->truncate_mutex);
+                       if (err)
+                               goto cleanup;
+                       clear_buffer_new(bh_result);
+                       goto got_it;
+               }
        }
 
        /*
-        * Indirect block might be removed by truncate while we were
-        * reading it. Handling of that case (forget what we've got and
-        * reread) is taken out of the main path.
-        */
-       if (err == -EAGAIN)
-               goto changed;
+        * Okay, we need to do block allocation.  Lazily initialize the block
+        * allocation info here if necessary
+       */
+       if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
+               ext3_init_block_alloc_info(inode);
 
-       goal = 0;
-       down(&ei->truncate_sem);
-       if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0) {
-               up(&ei->truncate_sem);
-               goto changed;
-       }
+       goal = ext3_find_goal(inode, iblock, chain, partial);
 
-       left = (chain + depth) - partial;
+       /* the number of blocks need to allocate for [d,t]indirect blocks */
+       indirect_blks = (chain + depth) - partial - 1;
 
+       /*
+        * Next look up the indirect map to count the totoal number of
+        * direct blocks to allocate for this branch.
+        */
+       count = ext3_blks_to_allocate(partial, indirect_blks,
+                                       maxblocks, blocks_to_boundary);
        /*
         * Block out ext3_truncate while we alter the tree
         */
-       err = ext3_alloc_branch(handle, inode, left, goal,
-                                       offsets+(partial-chain), partial);
+       err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
+                               offsets + (partial - chain), partial);
 
-       /* The ext3_splice_branch call will free and forget any buffers
+       /*
+        * The ext3_splice_branch call will free and forget any buffers
         * on the new chain if there is a failure, but that risks using
         * up transaction credits, especially for bitmaps where the
         * credits cannot be returned.  Can we handle this somehow?  We
-        * may need to return -EAGAIN upwards in the worst case.  --sct */
+        * may need to return -EAGAIN upwards in the worst case.  --sct
+        */
        if (!err)
-               err = ext3_splice_branch(handle, inode, iblock, chain,
-                                        partial, left);
-       /* i_disksize growing is protected by truncate_sem
-        * don't forget to protect it if you're about to implement
-        * concurrent ext3_get_block() -bzzz */
+               err = ext3_splice_branch(handle, inode, iblock,
+                                       partial, indirect_blks, count);
+       /*
+        * i_disksize growing is protected by truncate_mutex.  Don't forget to
+        * protect it if you're about to implement concurrent
+        * ext3_get_block() -bzzz
+       */
        if (!err && extend_disksize && inode->i_size > ei->i_disksize)
                ei->i_disksize = inode->i_size;
-       up(&ei->truncate_sem);
-       if (err == -EAGAIN)
-               goto changed;
+       mutex_unlock(&ei->truncate_mutex);
        if (err)
                goto cleanup;
 
        set_buffer_new(bh_result);
-       goto got_it;
-
-changed:
+got_it:
+       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       if (count > blocks_to_boundary)
+               set_buffer_boundary(bh_result);
+       err = count;
+       /* Clean up and exit */
+       partial = chain + depth - 1;    /* the whole chain */
+cleanup:
        while (partial > chain) {
-               jbd_debug(1, "buffer chain changed, retrying\n");
-               BUFFER_TRACE(partial->bh, "brelsing");
+               BUFFER_TRACE(partial->bh, "call brelse");
                brelse(partial->bh);
                partial--;
        }
-       goto reread;
-}
-
-static int ext3_get_block(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh_result, int create)
-{
-       handle_t *handle = NULL;
-       int ret;
-
-       if (create) {
-               handle = ext3_journal_current_handle();
-               J_ASSERT(handle != 0);
-       }
-       ret = ext3_get_block_handle(handle, inode, iblock,
-                               bh_result, create, 1);
-       return ret;
+       BUFFER_TRACE(bh_result, "returned");
+out:
+       return err;
 }
 
 #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
 
-static int
-ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
-               unsigned long max_blocks, struct buffer_head *bh_result,
-               int create)
+static int ext3_get_block(struct inode *inode, sector_t iblock,
+                       struct buffer_head *bh_result, int create)
 {
        handle_t *handle = journal_current_handle();
        int ret = 0;
+       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 
-       if (!handle)
+       if (!create)
                goto get_block;         /* A read */
 
+       if (max_blocks == 1)
+               goto get_block;         /* A single block get */
+
        if (handle->h_transaction->t_state == T_LOCKED) {
                /*
                 * Huge direct-io writes can hold off commits for long
@@ -913,18 +983,22 @@ ext3_direct_io_get_blocks(struct inode *inode, sector_t iblock,
        }
 
 get_block:
-       if (ret == 0)
-               ret = ext3_get_block_handle(handle, inode, iblock,
-                                       bh_result, create, 0);
-       bh_result->b_size = (1 << inode->i_blkbits);
+       if (ret == 0) {
+               ret = ext3_get_blocks_handle(handle, inode, iblock,
+                                       max_blocks, bh_result, create, 0);
+               if (ret > 0) {
+                       bh_result->b_size = (ret << inode->i_blkbits);
+                       ret = 0;
+               }
+       }
        return ret;
 }
 
 /*
  * `handle' can be NULL if create is zero
  */
-struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
-                               long block, int create, int * errp)
+struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
+                               long block, int create, int *errp)
 {
        struct buffer_head dummy;
        int fatal = 0, err;
@@ -934,25 +1008,41 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
        dummy.b_state = 0;
        dummy.b_blocknr = -1000;
        buffer_trace_init(&dummy.b_history);
-       *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
-       if (!*errp && buffer_mapped(&dummy)) {
+       err = ext3_get_blocks_handle(handle, inode, block, 1,
+                                       &dummy, create, 1);
+       /*
+        * ext3_get_blocks_handle() returns number of blocks
+        * mapped. 0 in case of a HOLE.
+        */
+       if (err > 0) {
+               if (err > 1)
+                       WARN_ON(1);
+               err = 0;
+       }
+       *errp = err;
+       if (!err && buffer_mapped(&dummy)) {
                struct buffer_head *bh;
                bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+               if (!bh) {
+                       *errp = -EIO;
+                       goto err;
+               }
                if (buffer_new(&dummy)) {
                        J_ASSERT(create != 0);
                        J_ASSERT(handle != 0);
 
-                       /* Now that we do not always journal data, we
-                          should keep in mind whether this should
-                          always journal the new buffer as metadata.
-                          For now, regular file writes use
-                          ext3_get_block instead, so it's not a
-                          problem. */
+                       /*
+                        * Now that we do not always journal data, we should
+                        * keep in mind whether this should always journal the
+                        * new buffer as metadata.  For now, regular file
+                        * writes use ext3_get_block instead, so it's not a
+                        * problem.
+                        */
                        lock_buffer(bh);
                        BUFFER_TRACE(bh, "call get_create_access");
                        fatal = ext3_journal_get_create_access(handle, bh);
                        if (!fatal && !buffer_uptodate(bh)) {
-                               memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+                               memset(bh->b_data,0,inode->i_sb->s_blocksize);
                                set_buffer_uptodate(bh);
                        }
                        unlock_buffer(bh);
@@ -970,59 +1060,25 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
                }
                return bh;
        }
+err:
        return NULL;
 }
 
-struct buffer_head *ext3_bread(handle_t *handle, struct inode * inode,
+struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
                               int block, int create, int *err)
 {
        struct buffer_head * bh;
-       int prev_blocks;
 
-       prev_blocks = inode->i_blocks;
-
-       bh = ext3_getblk (handle, inode, block, create, err);
+       bh = ext3_getblk(handle, inode, block, create, err);
        if (!bh)
                return bh;
-#ifdef EXT3_PREALLOCATE
-       /*
-        * If the inode has grown, and this is a directory, then use a few
-        * more of the preallocated blocks to keep directory fragmentation
-        * down.  The preallocated blocks are guaranteed to be contiguous.
-        */
-       if (create &&
-           S_ISDIR(inode->i_mode) &&
-           inode->i_blocks > prev_blocks &&
-           EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
-                                   EXT3_FEATURE_COMPAT_DIR_PREALLOC)) {
-               int i;
-               struct buffer_head *tmp_bh;
-
-               for (i = 1;
-                    EXT3_I(inode)->i_prealloc_count &&
-                    i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks;
-                    i++) {
-                       /*
-                        * ext3_getblk will zero out the contents of the
-                        * directory for us
-                        */
-                       tmp_bh = ext3_getblk(handle, inode,
-                                               block+i, create, err);
-                       if (!tmp_bh) {
-                               brelse (bh);
-                               return 0;
-                       }
-                       brelse (tmp_bh);
-               }
-       }
-#endif
        if (buffer_uptodate(bh))
                return bh;
-       ll_rw_block (READ, 1, &bh);
-       wait_on_buffer (bh);
+       ll_rw_block(READ, 1, &bh);
+       wait_on_buffer(bh);
        if (buffer_uptodate(bh))
                return bh;
-       brelse (bh);
+       put_bh(bh);
        *err = -EIO;
        return NULL;
 }
@@ -1084,9 +1140,8 @@ static int walk_page_buffers(     handle_t *handle,
  * is elevated.  We'll still have enough credits for the tiny quotafile
  * write.  
  */
-
-static int do_journal_get_write_access(handle_t *handle, 
-                                      struct buffer_head *bh)
+static int do_journal_get_write_access(handle_t *handle,
+                                       struct buffer_head *bh)
 {
        if (!buffer_mapped(bh) || buffer_freed(bh))
                return 0;
@@ -1107,7 +1162,10 @@ retry:
                ret = PTR_ERR(handle);
                goto out;
        }
-       ret = block_prepare_write(page, from, to, ext3_get_block);
+       if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+               ret = nobh_prepare_write(page, from, to, ext3_get_block);
+       else
+               ret = block_prepare_write(page, from, to, ext3_get_block);
        if (ret)
                goto prepare_write_failed;
 
@@ -1124,8 +1182,7 @@ out:
        return ret;
 }
 
-static int
-ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
        int err = journal_dirty_data(handle, bh);
        if (err)
@@ -1150,7 +1207,6 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
  * ext3 never places buffers on inode->i_mapping->private_list.  metadata
  * buffers are managed internally.
  */
-
 static int ext3_ordered_commit_write(struct file *file, struct page *page,
                             unsigned from, unsigned to)
 {
@@ -1191,7 +1247,12 @@ static int ext3_writeback_commit_write(struct file *file, struct page *page,
        new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
        if (new_i_size > EXT3_I(inode)->i_disksize)
                EXT3_I(inode)->i_disksize = new_i_size;
-       ret = generic_commit_write(file, page, from, to);
+
+       if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+               ret = nobh_commit_write(file, page, from, to);
+       else
+               ret = generic_commit_write(file, page, from, to);
+
        ret2 = ext3_journal_stop(handle);
        if (!ret)
                ret = ret2;
@@ -1321,7 +1382,7 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  *     ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
  *
  * Same applies to ext3_get_block().  We will deadlock on various things like
- * lock_journal and i_truncate_sem.
+ * lock_journal and i_truncate_mutex.
  *
  * Setting PF_MEMALLOC here doesn't work - too many internal memory
  * allocations fail.
@@ -1355,7 +1416,7 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  * we don't need to open a transaction here.
  */
 static int ext3_ordered_writepage(struct page *page,
-                       struct writeback_control *wbc)
+                               struct writeback_control *wbc)
 {
        struct inode *inode = page->mapping->host;
        struct buffer_head *page_bufs;
@@ -1437,7 +1498,11 @@ static int ext3_writeback_writepage(struct page *page,
                goto out_fail;
        }
 
-       ret = block_write_full_page(page, ext3_get_block, wbc);
+       if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+               ret = nobh_writepage(page, ext3_get_block, wbc);
+       else
+               ret = block_write_full_page(page, ext3_get_block, wbc);
+
        err = ext3_journal_stop(handle);
        if (!ret)
                ret = err;
@@ -1466,16 +1531,18 @@ static int ext3_journalled_writepage(struct page *page,
                goto no_write;
        }
 
-       if (!page_has_buffers(page) || PageChecked(page)) {
+       if (!page_has_buffers(page) || PageFsMisc(page)) {
                /*
                 * It's mmapped pagecache.  Add buffers and journal it.  There
                 * doesn't seem much point in redirtying the page here.
                 */
-               ClearPageChecked(page);
+               ClearPageFsMisc(page);
                ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
                                        ext3_get_block);
-               if (ret != 0)
+               if (ret != 0) {
+                       ext3_journal_stop(handle);
                        goto out_unlock;
+               }
                ret = walk_page_buffers(handle, page_buffers(page), 0,
                        PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
 
@@ -1518,7 +1585,7 @@ ext3_readpages(struct file *file, struct address_space *mapping,
        return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
 }
 
-static int ext3_invalidatepage(struct page *page, unsigned long offset)
+static void ext3_invalidatepage(struct page *page, unsigned long offset)
 {
        journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
@@ -1526,16 +1593,18 @@ static int ext3_invalidatepage(struct page *page, unsigned long offset)
         * If it's a full truncate we just forget about the pending dirtying
         */
        if (offset == 0)
-               ClearPageChecked(page);
+               ClearPageFsMisc(page);
 
-       return journal_invalidatepage(journal, page, offset);
+       journal_invalidatepage(journal, page, offset);
 }
 
-static int ext3_releasepage(struct page *page, int wait)
+static int ext3_releasepage(struct page *page, gfp_t wait)
 {
        journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
-       WARN_ON(PageChecked(page));
+       WARN_ON(PageFsMisc(page));
+       if (!page_has_buffers(page))
+               return 0;
        return journal_try_to_free_buffers(journal, page, wait);
 }
 
@@ -1578,22 +1647,32 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 
        ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 
                                 offset, nr_segs,
-                                ext3_direct_io_get_blocks, NULL);
+                                ext3_get_block, NULL);
+
+       /*
+        * Reacquire the handle: ext3_get_block() can restart the transaction
+        */
+       handle = journal_current_handle();
 
 out_stop:
        if (handle) {
                int err;
 
-               if (orphan
+               if (orphan && inode->i_nlink)
                        ext3_orphan_del(handle, inode);
                if (orphan && ret > 0) {
                        loff_t end = offset + ret;
                        if (end > inode->i_size) {
                                ei->i_disksize = end;
                                i_size_write(inode, end);
-                               err = ext3_mark_inode_dirty(handle, inode);
-                               if (!ret) 
-                                       ret = err;
+                               /*
+                                * We're going to return a positive `ret'
+                                * here due to non-zero-length I/O, so there's
+                                * no way of reporting error returns from
+                                * ext3_mark_inode_dirty() to userspace.  So
+                                * ignore it.
+                                */
+                               ext3_mark_inode_dirty(handle, inode);
                        }
                }
                err = ext3_journal_stop(handle);
@@ -1619,11 +1698,11 @@ out:
  */
 static int ext3_journalled_set_page_dirty(struct page *page)
 {
-       SetPageChecked(page);
+       SetPageFsMisc(page);
        return __set_page_dirty_nobuffers(page);
 }
 
-static struct address_space_operations ext3_ordered_aops = {
+static const struct address_space_operations ext3_ordered_aops = {
        .readpage       = ext3_readpage,
        .readpages      = ext3_readpages,
        .writepage      = ext3_ordered_writepage,
@@ -1634,9 +1713,10 @@ static struct address_space_operations ext3_ordered_aops = {
        .invalidatepage = ext3_invalidatepage,
        .releasepage    = ext3_releasepage,
        .direct_IO      = ext3_direct_IO,
+       .migratepage    = buffer_migrate_page,
 };
 
-static struct address_space_operations ext3_writeback_aops = {
+static const struct address_space_operations ext3_writeback_aops = {
        .readpage       = ext3_readpage,
        .readpages      = ext3_readpages,
        .writepage      = ext3_writeback_writepage,
@@ -1647,9 +1727,10 @@ static struct address_space_operations ext3_writeback_aops = {
        .invalidatepage = ext3_invalidatepage,
        .releasepage    = ext3_releasepage,
        .direct_IO      = ext3_direct_IO,
+       .migratepage    = buffer_migrate_page,
 };
 
-static struct address_space_operations ext3_journalled_aops = {
+static const struct address_space_operations ext3_journalled_aops = {
        .readpage       = ext3_readpage,
        .readpages      = ext3_readpages,
        .writepage      = ext3_journalled_writepage,
@@ -1681,18 +1762,32 @@ void ext3_set_aops(struct inode *inode)
 static int ext3_block_truncate_page(handle_t *handle, struct page *page,
                struct address_space *mapping, loff_t from)
 {
-       unsigned long index = from >> PAGE_CACHE_SHIFT;
+       ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
        unsigned offset = from & (PAGE_CACHE_SIZE-1);
        unsigned blocksize, iblock, length, pos;
        struct inode *inode = mapping->host;
        struct buffer_head *bh;
-       int err;
+       int err = 0;
        void *kaddr;
 
        blocksize = inode->i_sb->s_blocksize;
        length = blocksize - (offset & (blocksize - 1));
        iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 
+       /*
+        * For "nobh" option,  we can only work if we don't need to
+        * read-in the page - otherwise we create buffers to do the IO.
+        */
+       if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
+            ext3_should_writeback_data(inode) && PageUptodate(page)) {
+               kaddr = kmap_atomic(page, KM_USER0);
+               memset(kaddr + offset, 0, length);
+               flush_dcache_page(page);
+               kunmap_atomic(kaddr, KM_USER0);
+               set_page_dirty(page);
+               goto unlock;
+       }
+
        if (!page_has_buffers(page))
                create_empty_buffers(page, blocksize, 0);
 
@@ -1768,7 +1863,7 @@ unlock:
  * or memcmp with zero_page, whatever is better for particular architecture.
  * Linus?
  */
-static inline int all_zeroes(u32 *p, u32 *q)
+static inline int all_zeroes(__le32 *p, __le32 *q)
 {
        while (p < q)
                if (*p++)
@@ -1811,11 +1906,8 @@ static inline int all_zeroes(u32 *p, u32 *q)
  *             c) free the subtrees growing from the inode past the @chain[0].
  *                     (no partially truncated stuff there).  */
 
-static Indirect *ext3_find_shared(struct inode *inode,
-                               int depth,
-                               int offsets[4],
-                               Indirect chain[4],
-                               u32 *top)
+static Indirect *ext3_find_shared(struct inode *inode, int depth,
+                       int offsets[4], Indirect chain[4], __le32 *top)
 {
        Indirect *partial, *p;
        int k, err;
@@ -1835,7 +1927,7 @@ static Indirect *ext3_find_shared(struct inode *inode,
        if (!partial->key && *partial->p)
                /* Writer: end */
                goto no_top;
-       for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
+       for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
                ;
        /*
         * OK, we've found the last block that must survive. The rest of our
@@ -1854,8 +1946,7 @@ static Indirect *ext3_find_shared(struct inode *inode,
        }
        /* Writer: end */
 
-       while(partial > p)
-       {
+       while(partial > p) {
                brelse(partial->bh);
                partial--;
        }
@@ -1871,12 +1962,11 @@ no_top:
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
  */
-static void
-ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
-               unsigned long block_to_free, unsigned long count,
-               u32 *first, u32 *last)
+static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
+               struct buffer_head *bh, ext3_fsblk_t block_to_free,
+               unsigned long count, __le32 *first, __le32 *last)
 {
-       u32 *p;
+       __le32 *p;
        if (try_to_extend_transaction(handle, inode)) {
                if (bh) {
                        BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
@@ -1932,15 +2022,16 @@ ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
  * block pointers.
  */
 static void ext3_free_data(handle_t *handle, struct inode *inode,
-                          struct buffer_head *this_bh, u32 *first, u32 *last)
+                          struct buffer_head *this_bh,
+                          __le32 *first, __le32 *last)
 {
-       unsigned long block_to_free = 0;    /* Starting block # of a run */
+       ext3_fsblk_t block_to_free = 0;    /* Starting block # of a run */
        unsigned long count = 0;            /* Number of blocks in the run */ 
-       u32 *block_to_free_p = NULL;        /* Pointer into inode/ind
+       __le32 *block_to_free_p = NULL;     /* Pointer into inode/ind
                                               corresponding to
                                               block_to_free */
-       unsigned long nr;                   /* Current block # */
-       u32 *p;                             /* Pointer into inode/ind
+       ext3_fsblk_t nr;                    /* Current block # */
+       __le32 *p;                          /* Pointer into inode/ind
                                               for current block */
        int err;
 
@@ -1999,10 +2090,10 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
  */
 static void ext3_free_branches(handle_t *handle, struct inode *inode,
                               struct buffer_head *parent_bh,
-                              u32 *first, u32 *last, int depth)
+                              __le32 *first, __le32 *last, int depth)
 {
-       unsigned long nr;
-       u32 *p;
+       ext3_fsblk_t nr;
+       __le32 *p;
 
        if (is_handle_aborted(handle))
                return;
@@ -2025,15 +2116,16 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
                         */
                        if (!bh) {
                                ext3_error(inode->i_sb, "ext3_free_branches",
-                                          "Read failure, inode=%ld, block=%ld",
+                                          "Read failure, inode=%lu, block="E3FSBLK,
                                           inode->i_ino, nr);
                                continue;
                        }
 
                        /* This zaps the entire block.  Bottom up. */
                        BUFFER_TRACE(bh, "free child branches");
-                       ext3_free_branches(handle, inode, bh, (u32*)bh->b_data,
-                                          (u32*)bh->b_data + addr_per_block,
+                       ext3_free_branches(handle, inode, bh,
+                                          (__le32*)bh->b_data,
+                                          (__le32*)bh->b_data + addr_per_block,
                                           depth);
 
                        /*
@@ -2133,18 +2225,17 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
  * that's fine - as long as they are linked from the inode, the post-crash
  * ext3_truncate() run will find them and release them.
  */
-
-void ext3_truncate_nocheck(struct inode * inode)
+void ext3_truncate(struct inode *inode)
 {
        handle_t *handle;
        struct ext3_inode_info *ei = EXT3_I(inode);
-       u32 *i_data = ei->i_data;
+       __le32 *i_data = ei->i_data;
        int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
        struct address_space *mapping = inode->i_mapping;
        int offsets[4];
        Indirect chain[4];
        Indirect *partial;
-       int nr = 0;
+       __le32 nr = 0;
        int n;
        long last_block;
        unsigned blocksize = inode->i_sb->s_blocksize;
@@ -2155,8 +2246,8 @@ void ext3_truncate_nocheck(struct inode * inode)
                return;
        if (ext3_inode_is_fast_symlink(inode))
                return;
-
-       ext3_discard_prealloc(inode);
+       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
+               return;
 
        /*
         * We have to lock the EOF page here, because lock_page() nests
@@ -2218,7 +2309,7 @@ void ext3_truncate_nocheck(struct inode * inode)
         * From here we block out all ext3_get_block() callers who want to
         * modify the block allocation tree.
         */
-       down(&ei->truncate_sem);
+       mutex_lock(&ei->truncate_mutex);
 
        if (n == 1) {           /* direct blocks */
                ext3_free_data(handle, inode, NULL, i_data+offsets[0],
@@ -2249,7 +2340,7 @@ void ext3_truncate_nocheck(struct inode * inode)
        /* Clear the ends of indirect blocks on the shared branch */
        while (partial > chain) {
                ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
-                                  (u32*)partial->bh->b_data + addr_per_block,
+                                  (__le32*)partial->bh->b_data+addr_per_block,
                                   (chain+n-1) - partial);
                BUFFER_TRACE(partial->bh, "call brelse");
                brelse (partial->bh);
@@ -2258,36 +2349,38 @@ void ext3_truncate_nocheck(struct inode * inode)
 do_indirects:
        /* Kill the remaining (whole) subtrees */
        switch (offsets[0]) {
-               default:
-                       nr = i_data[EXT3_IND_BLOCK];
-                       if (nr) {
-                               ext3_free_branches(handle, inode, NULL,
-                                                  &nr, &nr+1, 1);
-                               i_data[EXT3_IND_BLOCK] = 0;
-                       }
-               case EXT3_IND_BLOCK:
-                       nr = i_data[EXT3_DIND_BLOCK];
-                       if (nr) {
-                               ext3_free_branches(handle, inode, NULL,
-                                                  &nr, &nr+1, 2);
-                               i_data[EXT3_DIND_BLOCK] = 0;
-                       }
-               case EXT3_DIND_BLOCK:
-                       nr = i_data[EXT3_TIND_BLOCK];
-                       if (nr) {
-                               ext3_free_branches(handle, inode, NULL,
-                                                  &nr, &nr+1, 3);
-                               i_data[EXT3_TIND_BLOCK] = 0;
-                       }
-               case EXT3_TIND_BLOCK:
-                       ;
+       default:
+               nr = i_data[EXT3_IND_BLOCK];
+               if (nr) {
+                       ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+                       i_data[EXT3_IND_BLOCK] = 0;
+               }
+       case EXT3_IND_BLOCK:
+               nr = i_data[EXT3_DIND_BLOCK];
+               if (nr) {
+                       ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+                       i_data[EXT3_DIND_BLOCK] = 0;
+               }
+       case EXT3_DIND_BLOCK:
+               nr = i_data[EXT3_TIND_BLOCK];
+               if (nr) {
+                       ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+                       i_data[EXT3_TIND_BLOCK] = 0;
+               }
+       case EXT3_TIND_BLOCK:
+               ;
        }
-       up(&ei->truncate_sem);
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
+       ext3_discard_reservation(inode);
+
+       mutex_unlock(&ei->truncate_mutex);
+       inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
        ext3_mark_inode_dirty(handle, inode);
 
-       /* In a multi-transaction truncate, we only make the final
-        * transaction synchronous */
+       /*
+        * In a multi-transaction truncate, we only make the final transaction
+        * synchronous
+        */
        if (IS_SYNC(inode))
                handle->h_sync = 1;
 out_stop:
@@ -2304,29 +2397,30 @@ out_stop:
        ext3_journal_stop(handle);
 }
 
-static unsigned long ext3_get_inode_block(struct super_block *sb,
+static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
                unsigned long ino, struct ext3_iloc *iloc)
 {
        unsigned long desc, group_desc, block_group;
-       unsigned long offset, block;
+       unsigned long offset;
+       ext3_fsblk_t block;
        struct buffer_head *bh;
        struct ext3_group_desc * gdp;
 
-       if ((ino != EXT3_ROOT_INO &&
-               ino != EXT3_JOURNAL_INO &&
-               ino < EXT3_FIRST_INO(sb)) ||
-               ino > le32_to_cpu(
-                       EXT3_SB(sb)->s_es->s_inodes_count)) {
-               ext3_error (sb, "ext3_get_inode_block",
-                           "bad inode number: %lu", ino);
+       if (!ext3_valid_inum(sb, ino)) {
+               /*
+                * This error is already checked for in namei.c unless we are
+                * looking at an NFS filehandle, in which case no error
+                * report is needed
+                */
                return 0;
        }
+
        block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
        if (block_group >= EXT3_SB(sb)->s_groups_count) {
-               ext3_error (sb, "ext3_get_inode_block",
-                           "group >= groups count");
+               ext3_error(sb,"ext3_get_inode_block","group >= groups count");
                return 0;
        }
+       smp_rmb();
        group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
        desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
        bh = EXT3_SB(sb)->s_group_desc[group_desc];
@@ -2336,7 +2430,7 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
                return 0;
        }
 
-       gdp = (struct ext3_group_desc *) bh->b_data;
+       gdp = (struct ext3_group_desc *)bh->b_data;
        /*
         * Figure out the offset within the block group inode table
         */
@@ -2350,16 +2444,16 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
        return block;
 }
 
-/* 
+/*
  * ext3_get_inode_loc returns with an extra refcount against the inode's
- * underlying buffer_head on success.  If `in_mem' is false then we're purely
- * trying to determine the inode's location on-disk and no read need be
- * performed.
+ * underlying buffer_head on success. If 'in_mem' is true, we have all
+ * data in memory that is needed to recreate the on-disk version of this
+ * inode.
  */
-static int ext3_get_inode_loc(struct inode *inode,
+static int __ext3_get_inode_loc(struct inode *inode,
                                struct ext3_iloc *iloc, int in_mem)
 {
-       unsigned long block;
+       ext3_fsblk_t block;
        struct buffer_head *bh;
 
        block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
@@ -2370,7 +2464,8 @@ static int ext3_get_inode_loc(struct inode *inode,
        if (!bh) {
                ext3_error (inode->i_sb, "ext3_get_inode_loc",
                                "unable to read inode block - "
-                               "inode=%lu, block=%lu", inode->i_ino, block);
+                               "inode=%lu, block="E3FSBLK,
+                                inode->i_ino, block);
                return -EIO;
        }
        if (!buffer_uptodate(bh)) {
@@ -2381,7 +2476,11 @@ static int ext3_get_inode_loc(struct inode *inode,
                        goto has_buffer;
                }
 
-               /* we can't skip I/O if inode is on a disk only */
+               /*
+                * If we have all information of the inode in memory and this
+                * is the only valid inode in the block, we need not read the
+                * block.
+                */
                if (in_mem) {
                        struct buffer_head *bitmap_bh;
                        struct ext3_group_desc *desc;
@@ -2390,10 +2489,6 @@ static int ext3_get_inode_loc(struct inode *inode,
                        int block_group;
                        int start;
 
-                       /*
-                        * If this is the only valid inode in the block we
-                        * need not read the block.
-                        */
                        block_group = (inode->i_ino - 1) /
                                        EXT3_INODES_PER_GROUP(inode->i_sb);
                        inodes_per_buffer = bh->b_size /
@@ -2440,8 +2535,9 @@ static int ext3_get_inode_loc(struct inode *inode,
 
 make_io:
                /*
-                * There are another valid inodes in the buffer so we must
-                * read the block from disk
+                * There are other valid inodes in the buffer, this inode
+                * has in-inode xattrs, or we don't have this inode in memory.
+                * Read the block from disk.
                 */
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
@@ -2450,7 +2546,7 @@ make_io:
                if (!buffer_uptodate(bh)) {
                        ext3_error(inode->i_sb, "ext3_get_inode_loc",
                                        "unable to read inode block - "
-                                       "inode=%lu, block=%lu",
+                                       "inode=%lu, block="E3FSBLK,
                                        inode->i_ino, block);
                        brelse(bh);
                        return -EIO;
@@ -2461,34 +2557,88 @@ has_buffer:
        return 0;
 }
 
-void ext3_truncate(struct inode * inode)
+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
 {
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return;
-       ext3_truncate_nocheck(inode);
+       /* We have all inode data except xattrs in memory here. */
+       return __ext3_get_inode_loc(inode, iloc,
+               !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
 }
 
 void ext3_set_inode_flags(struct inode *inode)
 {
        unsigned int flags = EXT3_I(inode)->i_flags;
 
-       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-       if (flags & EXT3_SYNC_FL)
-               inode->i_flags |= S_SYNC;
-       if (flags & EXT3_APPEND_FL)
-               inode->i_flags |= S_APPEND;
+       inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER |
+               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+
        if (flags & EXT3_IMMUTABLE_FL)
                inode->i_flags |= S_IMMUTABLE;
        if (flags & EXT3_IUNLINK_FL)
                inode->i_flags |= S_IUNLINK;
        if (flags & EXT3_BARRIER_FL)
                inode->i_flags |= S_BARRIER;
+
+       if (flags & EXT3_SYNC_FL)
+               inode->i_flags |= S_SYNC;
+       if (flags & EXT3_APPEND_FL)
+               inode->i_flags |= S_APPEND;
        if (flags & EXT3_NOATIME_FL)
                inode->i_flags |= S_NOATIME;
        if (flags & EXT3_DIRSYNC_FL)
                inode->i_flags |= S_DIRSYNC;
 }
 
+int ext3_sync_flags(struct inode *inode)
+{
+       unsigned int oldflags, newflags;
+       int err = 0;
+
+       oldflags = EXT3_I(inode)->i_flags;
+       newflags = oldflags & ~(EXT3_APPEND_FL |
+               EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL |
+               EXT3_BARRIER_FL | EXT3_NOATIME_FL |
+               EXT3_SYNC_FL | EXT3_DIRSYNC_FL);
+
+       if (IS_APPEND(inode))
+               newflags |= EXT3_APPEND_FL;
+       if (IS_IMMUTABLE(inode))
+               newflags |= EXT3_IMMUTABLE_FL;
+       if (IS_IUNLINK(inode))
+               newflags |= EXT3_IUNLINK_FL;
+       if (IS_BARRIER(inode))
+               newflags |= EXT3_BARRIER_FL;
+
+       /* we do not want to copy superblock flags */
+       if (inode->i_flags & S_NOATIME)
+               newflags |= EXT3_NOATIME_FL;
+       if (inode->i_flags & S_SYNC)
+               newflags |= EXT3_SYNC_FL;
+       if (inode->i_flags & S_DIRSYNC)
+               newflags |= EXT3_DIRSYNC_FL;
+
+       if (oldflags ^ newflags) {
+               handle_t *handle;
+               struct ext3_iloc iloc;
+
+               handle = ext3_journal_start(inode, 1);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+               if (IS_SYNC(inode))
+                       handle->h_sync = 1;
+               err = ext3_reserve_inode_write(handle, inode, &iloc);
+               if (err)
+                       goto flags_err;
+
+               EXT3_I(inode)->i_flags = newflags;
+               inode->i_ctime = CURRENT_TIME;
+
+               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+       flags_err:
+               ext3_journal_stop(handle);
+       }
+       return err;
+}
+
 void ext3_read_inode(struct inode * inode)
 {
        struct ext3_iloc iloc;
@@ -2503,7 +2653,9 @@ void ext3_read_inode(struct inode * inode)
        ei->i_acl = EXT3_ACL_NOT_CACHED;
        ei->i_default_acl = EXT3_ACL_NOT_CACHED;
 #endif
-       if (ext3_get_inode_loc(inode, &iloc, 0))
+       ei->i_block_alloc_info = NULL;
+
+       if (__ext3_get_inode_loc(inode, &iloc, 0))
                goto bad_inode;
        bh = iloc.bh;
        raw_inode = ext3_raw_inode(&iloc);
@@ -2527,8 +2679,6 @@ void ext3_read_inode(struct inode * inode)
        inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
        ei->i_state = 0;
-       ei->i_next_alloc_block = 0;
-       ei->i_next_alloc_goal = 0;
        ei->i_dir_start_lookup = 0;
        ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
        /* We now have enough fields to check if the inode was active or not.
@@ -2548,9 +2698,6 @@ void ext3_read_inode(struct inode * inode)
                 * recovery code: that's fine, we're about to complete
                 * the process of deleting those. */
        }
-       inode->i_blksize = PAGE_SIZE;   /* This is the optimal IO size
-                                        * (for stat), not the fs block
-                                        * size */  
        inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
        ei->i_flags = le32_to_cpu(raw_inode->i_flags);
 #ifdef EXT3_FRAGMENTS
@@ -2567,11 +2714,7 @@ void ext3_read_inode(struct inode * inode)
        }
        ei->i_disksize = inode->i_size;
        inode->i_generation = le32_to_cpu(raw_inode->i_generation);
-#ifdef EXT3_PREALLOCATE
-       ei->i_prealloc_count = 0;
-#endif
        ei->i_block_group = iloc.block_group;
-
        /*
         * NOTE! The in-memory inode i_data array is in little-endian order
         * even on big-endian machines: we do NOT byteswap the block numbers!
@@ -2580,6 +2723,31 @@ void ext3_read_inode(struct inode * inode)
                ei->i_data[block] = raw_inode->i_block[block];
        INIT_LIST_HEAD(&ei->i_orphan);
 
+       if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
+           EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
+               /*
+                * When mke2fs creates big inodes it does not zero out
+                * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE,
+                * so ignore those first few inodes.
+                */
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+                   EXT3_INODE_SIZE(inode->i_sb))
+                       goto bad_inode;
+               if (ei->i_extra_isize == 0) {
+                       /* The extra space is currently unused. Use it. */
+                       ei->i_extra_isize = sizeof(struct ext3_inode) -
+                                           EXT3_GOOD_OLD_INODE_SIZE;
+               } else {
+                       __le32 *magic = (void *)raw_inode +
+                                       EXT3_GOOD_OLD_INODE_SIZE +
+                                       ei->i_extra_isize;
+                       if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
+                                ei->i_state |= EXT3_STATE_XATTR;
+               }
+       } else
+               ei->i_extra_isize = 0;
+
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
@@ -2660,7 +2828,7 @@ static int ext3_do_update_inode(handle_t *handle,
                raw_inode->i_uid_high = 0;
                raw_inode->i_gid_high = 0;
        }
-#ifdef CONFIG_INOXID_GID32
+#ifdef CONFIG_INOXID_INTERN
        raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
 #endif
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
@@ -2720,6 +2888,9 @@ static int ext3_do_update_inode(handle_t *handle,
        } else for (block = 0; block < EXT3_N_BLOCKS; block++)
                raw_inode->i_block[block] = ei->i_data[block];
 
+       if (ei->i_extra_isize)
+               raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
+
        BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
        rc = ext3_journal_dirty_metadata(handle, bh);
        if (!err)
@@ -2767,59 +2938,21 @@ out_brelse:
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-void ext3_write_inode(struct inode *inode, int wait)
+int ext3_write_inode(struct inode *inode, int wait)
 {
        if (current->flags & PF_MEMALLOC)
-               return;
+               return 0;
 
        if (ext3_journal_current_handle()) {
                jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
                dump_stack();
-               return;
+               return -EIO;
        }
 
        if (!wait)
-               return;
-
-       ext3_force_commit(inode->i_sb);
-}
-
-int ext3_setattr_flags(struct inode *inode, unsigned int flags)
-{
-       unsigned int oldflags, newflags;
-       int err = 0;
-
-       oldflags = EXT3_I(inode)->i_flags;
-       newflags = oldflags &
-               ~(EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | EXT3_BARRIER_FL);       
-       if (flags & ATTR_FLAG_IMMUTABLE)
-               newflags |= EXT3_IMMUTABLE_FL;
-       if (flags & ATTR_FLAG_IUNLINK)
-               newflags |= EXT3_IUNLINK_FL;
-       if (flags & ATTR_FLAG_BARRIER)
-               newflags |= EXT3_BARRIER_FL;
-
-       if (oldflags ^ newflags) {
-               handle_t *handle;
-               struct ext3_iloc iloc;
-
-               handle = ext3_journal_start(inode, 1);
-               if (IS_ERR(handle))
-                       return PTR_ERR(handle);
-               if (IS_SYNC(inode))
-                       handle->h_sync = 1;
-               err = ext3_reserve_inode_write(handle, inode, &iloc);
-               if (err)
-                       goto flags_err;
-               
-               EXT3_I(inode)->i_flags = newflags;
-               inode->i_ctime = CURRENT_TIME;
+               return 0;
 
-               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
-       flags_err:
-               ext3_journal_stop(handle);
-       }
-       return err;
+       return ext3_force_commit(inode->i_sb);
 }
 
 /*
@@ -2856,7 +2989,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
                /* (user+group)*(old+new) structure, inode write (sb,
                 * inode block, ? - but truncate inode update has it) */
-               handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3);
+               handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
+                                       EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
                if (IS_ERR(handle)) {
                        error = PTR_ERR(handle);
                        goto err_out;
@@ -2872,9 +3006,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
                        inode->i_uid = attr->ia_uid;
                if (attr->ia_valid & ATTR_GID)
                        inode->i_gid = attr->ia_gid;
-               if ((attr->ia_valid & ATTR_XID)
-                       && inode->i_sb
-                       && (inode->i_sb->s_flags & MS_TAGXID))
+               if ((attr->ia_valid & ATTR_XID) && IS_TAGXID(inode))
                        inode->i_xid = attr->ia_xid;
                error = ext3_mark_inode_dirty(handle, inode);
                ext3_journal_stop(handle);
@@ -2898,12 +3030,6 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
                ext3_journal_stop(handle);
        }
 
-       if (ia_valid & ATTR_ATTR_FLAG) {
-               rc = ext3_setattr_flags(inode, attr->ia_attr_flags);
-               if (!error)
-                       error = rc;
-       }
-
        rc = inode_setattr(inode, attr);
 
        /* If inode_setattr's call to ext3_truncate failed to get a
@@ -2924,7 +3050,7 @@ err_out:
 
 
 /*
- * akpm: how many blocks doth make a writepage()?
+ * How many blocks doth make a writepage()?
  *
  * With N blocks per page, it may be:
  * N data blocks
@@ -2950,7 +3076,7 @@ err_out:
  * block and work out the exact number of indirects which are touched.  Pah.
  */
 
-int ext3_writepage_trans_blocks(struct inode *inode)
+static int ext3_writepage_trans_blocks(struct inode *inode)
 {
        int bpp = ext3_journal_blocks_per_page(inode);
        int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
@@ -2964,7 +3090,7 @@ int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
        /* We know that structure was already allocated during DQUOT_INIT so
         * we will be updating only the data blocks + inodes */
-       ret += 2*EXT3_QUOTA_TRANS_BLOCKS;
+       ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
 
        return ret;
@@ -2999,7 +3125,7 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
 {
        int err = 0;
        if (handle) {
-               err = ext3_get_inode_loc(inode, iloc, 1);
+               err = ext3_get_inode_loc(inode, iloc);
                if (!err) {
                        BUFFER_TRACE(iloc->bh, "get_write_access");
                        err = ext3_journal_get_write_access(handle, iloc->bh);
@@ -3014,8 +3140,8 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
 }
 
 /*
- * akpm: What we do here is to mark the in-core inode as clean
- * with respect to inode dirtiness (it may still be data-dirty).
+ * What we do here is to mark the in-core inode as clean with respect to inode
+ * dirtiness (it may still be data-dirty).
  * This means that the in-core inode may be reaped by prune_icache
  * without having to perform any I/O.  This is a very good thing,
  * because *any* task may call prune_icache - even ones which
@@ -3039,6 +3165,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
        struct ext3_iloc iloc;
        int err;
 
+       might_sleep();
        err = ext3_reserve_inode_write(handle, inode, &iloc);
        if (!err)
                err = ext3_mark_iloc_dirty(handle, inode, &iloc);
@@ -3046,7 +3173,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
 }
 
 /*
- * akpm: ext3_dirty_inode() is called from __mark_inode_dirty()
+ * ext3_dirty_inode() is called from __mark_inode_dirty()
  *
  * We're really interested in the case where a file is being extended.
  * i_size has been changed by generic_commit_write() and we thus need
@@ -3082,7 +3209,7 @@ out:
        return;
 }
 
-#ifdef AKPM
+#if 0
 /* 
  * Bind an inode's backing buffer_head into this transaction, to prevent
  * it from being flushed to disk early.  Unlike
@@ -3090,14 +3217,13 @@ out:
  * returns no iloc structure, so the caller needs to repeat the iloc
  * lookup to mark the inode dirty later.
  */
-static inline int
-ext3_pin_inode(handle_t *handle, struct inode *inode)
+static int ext3_pin_inode(handle_t *handle, struct inode *inode)
 {
        struct ext3_iloc iloc;
 
        int err = 0;
        if (handle) {
-               err = ext3_get_inode_loc(inode, &iloc, 1);
+               err = ext3_get_inode_loc(inode, &iloc);
                if (!err) {
                        BUFFER_TRACE(iloc.bh, "get_write_access");
                        err = journal_get_write_access(handle, iloc.bh);