Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / fs / ext3 / balloc.c
index e75c6af..794c389 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/config.h>
 #include <linux/time.h>
+#include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/ext3_fs.h>
@@ -44,34 +45,34 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
                                             struct buffer_head ** bh)
 {
        unsigned long group_desc;
-       unsigned long desc;
-       struct ext3_group_desc * gdp;
+       unsigned long offset;
+       struct ext3_group_desc * desc;
+       struct ext3_sb_info *sbi = EXT3_SB(sb);
 
-       if (block_group >= EXT3_SB(sb)->s_groups_count) {
+       if (block_group >= sbi->s_groups_count) {
                ext3_error (sb, "ext3_get_group_desc",
                            "block_group >= groups_count - "
                            "block_group = %d, groups_count = %lu",
-                           block_group, EXT3_SB(sb)->s_groups_count);
+                           block_group, sbi->s_groups_count);
 
                return NULL;
        }
        smp_rmb();
 
        group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
-       desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
-       if (!EXT3_SB(sb)->s_group_desc[group_desc]) {
+       offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+       if (!sbi->s_group_desc[group_desc]) {
                ext3_error (sb, "ext3_get_group_desc",
                            "Group descriptor not loaded - "
                            "block_group = %d, group_desc = %lu, desc = %lu",
-                            block_group, group_desc, desc);
+                            block_group, group_desc, offset);
                return NULL;
        }
 
-       gdp = (struct ext3_group_desc *) 
-             EXT3_SB(sb)->s_group_desc[group_desc]->b_data;
+       desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
        if (bh)
-               *bh = EXT3_SB(sb)->s_group_desc[group_desc];
-       return gdp + desc;
+               *bh = sbi->s_group_desc[group_desc];
+       return desc + offset;
 }
 
 /*
@@ -251,7 +252,7 @@ static void rsv_window_remove(struct super_block *sb,
 {
        rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
        rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
-       atomic_set(&rsv->rsv_alloc_hit, 0);
+       rsv->rsv_alloc_hit = 0;
        rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
 }
 
@@ -260,13 +261,46 @@ static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
        /* a valid reservation end block could not be 0 */
        return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED);
 }
+void ext3_init_block_alloc_info(struct inode *inode)
+{
+       struct ext3_inode_info *ei = EXT3_I(inode);
+       struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+       struct super_block *sb = inode->i_sb;
+
+       block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
+       if (block_i) {
+               struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
+
+               rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+               rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+
+               /*
+                * if filesystem is mounted with NORESERVATION, the goal
+                * reservation window size is set to zero to indicate
+                * block reservation is off
+                */
+               if (!test_opt(sb, RESERVATION))
+                       rsv->rsv_goal_size = 0;
+               else
+                       rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
+               rsv->rsv_alloc_hit = 0;
+               block_i->last_alloc_logical_block = 0;
+               block_i->last_alloc_physical_block = 0;
+       }
+       ei->i_block_alloc_info = block_i;
+}
 
 void ext3_discard_reservation(struct inode *inode)
 {
        struct ext3_inode_info *ei = EXT3_I(inode);
-       struct ext3_reserve_window_node *rsv = &ei->i_rsv_window;
+       struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+       struct ext3_reserve_window_node *rsv;
        spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
 
+       if (!block_i)
+               return;
+
+       rsv = &block_i->rsv_window_node;
        if (!rsv_is_empty(&rsv->rsv_window)) {
                spin_lock(rsv_lock);
                if (!rsv_is_empty(&rsv->rsv_window))
@@ -286,14 +320,15 @@ void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
        unsigned long bit;
        unsigned long i;
        unsigned long overflow;
-       struct ext3_group_desc * gdp;
+       struct ext3_group_desc * desc;
        struct ext3_super_block * es;
        struct ext3_sb_info *sbi;
        int err = 0, ret;
+       unsigned group_freed;
 
        *pdquot_freed_blocks = 0;
        sbi = EXT3_SB(sb);
-       es = EXT3_SB(sb)->s_es;
+       es = sbi->s_es;
        if (block < le32_to_cpu(es->s_first_data_block) ||
            block + count < block ||
            block + count > le32_to_cpu(es->s_blocks_count)) {
@@ -303,7 +338,7 @@ void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
                goto error_return;
        }
 
-       ext3_debug ("freeing block %lu\n", block);
+       ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
 
 do_more:
        overflow = 0;
@@ -323,16 +358,16 @@ do_more:
        bitmap_bh = read_block_bitmap(sb, block_group);
        if (!bitmap_bh)
                goto error_return;
-       gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
-       if (!gdp)
+       desc = ext3_get_group_desc (sb, block_group, &gd_bh);
+       if (!desc)
                goto error_return;
 
-       if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
-           in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
-           in_range (block, le32_to_cpu(gdp->bg_inode_table),
-                     EXT3_SB(sb)->s_itb_per_group) ||
-           in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
-                     EXT3_SB(sb)->s_itb_per_group))
+       if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
+           in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
+           in_range (block, le32_to_cpu(desc->bg_inode_table),
+                     sbi->s_itb_per_group) ||
+           in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
+                     sbi->s_itb_per_group))
                ext3_error (sb, "ext3_free_blocks",
                            "Freeing blocks in system zones - "
                            "Block = %lu, count = %lu",
@@ -344,7 +379,7 @@ do_more:
         */
        /* @@@ check errors */
        BUFFER_TRACE(bitmap_bh, "getting undo access");
-       err = ext3_journal_get_undo_access(handle, bitmap_bh, NULL);
+       err = ext3_journal_get_undo_access(handle, bitmap_bh);
        if (err)
                goto error_return;
 
@@ -360,7 +395,7 @@ do_more:
 
        jbd_lock_bh_state(bitmap_bh);
 
-       for (i = 0; i < count; i++) {
+       for (i = 0, group_freed = 0; i < count; i++) {
                /*
                 * An HJ special.  This is expensive...
                 */
@@ -423,15 +458,15 @@ do_more:
                        jbd_lock_bh_state(bitmap_bh);
                        BUFFER_TRACE(bitmap_bh, "bit already cleared");
                } else {
-                       (*pdquot_freed_blocks)++;
+                       group_freed++;
                }
        }
        jbd_unlock_bh_state(bitmap_bh);
 
        spin_lock(sb_bgl_lock(sbi, block_group));
-       gdp->bg_free_blocks_count =
-               cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
-                       *pdquot_freed_blocks);
+       desc->bg_free_blocks_count =
+               cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
+                       group_freed);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_mod(&sbi->s_freeblocks_counter, count);
 
@@ -443,6 +478,7 @@ do_more:
        BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
        ret = ext3_journal_dirty_metadata(handle, gd_bh);
        if (!err) err = ret;
+       *pdquot_freed_blocks += group_freed;
 
        if (overflow && !err) {
                block += count;
@@ -470,7 +506,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
        }
        ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
        if (dquot_freed_blocks) {
-               DLIMIT_FREE_BLOCK(sb, inode->i_xid, dquot_freed_blocks);
+               DLIMIT_FREE_BLOCK(inode, dquot_freed_blocks);
                DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
        }
        return;
@@ -620,9 +656,11 @@ claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
  */
 static int
 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
-       struct buffer_head *bitmap_bh, int goal, struct ext3_reserve_window *my_rsv)
+                       struct buffer_head *bitmap_bh, int goal,
+                       unsigned long *count, struct ext3_reserve_window *my_rsv)
 {
        int group_first_block, start, end;
+       unsigned long num = 0;
 
        /* we do allocation within the reservation window if we have a window */
        if (my_rsv) {
@@ -680,8 +718,18 @@ repeat:
                        goto fail_access;
                goto repeat;
        }
-       return goal;
+       num++;
+       goal++;
+       while (num < *count && goal < end
+               && ext3_test_allocatable(goal, bitmap_bh)
+               && claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) {
+               num++;
+               goal++;
+       }
+       *count = num;
+       return goal - num;
 fail_access:
+       *count = num;
        return -1;
 }
 
@@ -717,24 +765,24 @@ fail_access:
  *     to find a free region that is of my size and has not
  *     been reserved.
  *
- *     on succeed, it returns the reservation window to be appended to.
- *     failed, return NULL.
  */
-static struct ext3_reserve_window_node *find_next_reservable_window(
+static int find_next_reservable_window(
                                struct ext3_reserve_window_node *search_head,
-                               unsigned long size, int *start_block,
+                               struct ext3_reserve_window_node *my_rsv,
+                               struct super_block * sb, int start_block,
                                int last_block)
 {
        struct rb_node *next;
        struct ext3_reserve_window_node *rsv, *prev;
        int cur;
+       int size = my_rsv->rsv_goal_size;
 
        /* TODO: make the start of the reservation window byte-aligned */
        /* cur = *start_block & ~7;*/
-       cur = *start_block;
+       cur = start_block;
        rsv = search_head;
        if (!rsv)
-               return NULL;
+               return -1;
 
        while (1) {
                if (cur <= rsv->rsv_end)
@@ -750,11 +798,11 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
                 * space with expected-size (or more)...
                 */
                if (cur > last_block)
-                       return NULL;            /* fail */
+                       return -1;              /* fail */
 
                prev = rsv;
                next = rb_next(&rsv->rsv_node);
-               rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+               rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
 
                /*
                 * Reached the last reservation, we can just append to the
@@ -781,8 +829,25 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
         * return the reservation window that we could append to.
         * succeed.
         */
-       *start_block = cur;
-       return prev;
+
+       if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
+               rsv_window_remove(sb, my_rsv);
+
+       /*
+        * Let's book the whole avaliable window for now.  We will check the
+        * disk bitmap later and then, if there are free blocks then we adjust
+        * the window size if it's larger than requested.
+        * Otherwise, we will remove this node from the tree next time
+        * call find_next_reservable_window.
+        */
+       my_rsv->rsv_start = cur;
+       my_rsv->rsv_end = cur + size - 1;
+       my_rsv->rsv_alloc_hit = 0;
+
+       if (prev != my_rsv)
+               ext3_rsv_window_add(sb, my_rsv);
+
+       return 0;
 }
 
 /**
@@ -820,6 +885,7 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
  *     @sb: the super block
  *     @group: the group we are trying to allocate in
  *     @bitmap_bh: the block group block bitmap
+ *
  */
 static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
                int goal, struct super_block *sb,
@@ -828,10 +894,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
        struct ext3_reserve_window_node *search_head;
        int group_first_block, group_end_block, start_block;
        int first_free_block;
-       int reservable_space_start;
-       struct ext3_reserve_window_node *prev_rsv;
        struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
        unsigned long size;
+       int ret;
+       spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
 
        group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
                                group * EXT3_BLOCKS_PER_GROUP(sb);
@@ -842,7 +908,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
        else
                start_block = goal + group_first_block;
 
-       size = atomic_read(&my_rsv->rsv_goal_size);
+       size = my_rsv->rsv_goal_size;
+
        if (!rsv_is_empty(&my_rsv->rsv_window)) {
                /*
                 * if the old reservation is cross group boundary
@@ -863,7 +930,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
                                (start_block >= my_rsv->rsv_start))
                        return -1;
 
-               if ((atomic_read(&my_rsv->rsv_alloc_hit) >
+               if ((my_rsv->rsv_alloc_hit >
                     (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
                        /*
                         * if we previously allocation hit ration is greater than half
@@ -873,9 +940,11 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
                        size = size * 2;
                        if (size > EXT3_MAX_RESERVE_BLOCKS)
                                size = EXT3_MAX_RESERVE_BLOCKS;
-                       atomic_set(&my_rsv->rsv_goal_size, size);
+                       my_rsv->rsv_goal_size= size;
                }
        }
+
+       spin_lock(rsv_lock);
        /*
         * shift the search start to the window near the goal block
         */
@@ -889,11 +958,16 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
         * need to check the bitmap after we found a reservable window.
         */
 retry:
-       prev_rsv = find_next_reservable_window(search_head, size,
-                                               &start_block, group_end_block);
-       if (prev_rsv == NULL)
-               goto failed;
-       reservable_space_start = start_block;
+       ret = find_next_reservable_window(search_head, my_rsv, sb,
+                                               start_block, group_end_block);
+
+       if (ret == -1) {
+               if (!rsv_is_empty(&my_rsv->rsv_window))
+                       rsv_window_remove(sb, my_rsv);
+               spin_unlock(rsv_lock);
+               return -1;
+       }
+
        /*
         * On success, find_next_reservable_window() returns the
         * reservation window where there is a reservable space after it.
@@ -905,8 +979,9 @@ retry:
         * block. Search start from the start block of the reservable space
         * we just found.
         */
+       spin_unlock(rsv_lock);
        first_free_block = bitmap_search_next_usable_block(
-                       reservable_space_start - group_first_block,
+                       my_rsv->rsv_start - group_first_block,
                        bitmap_bh, group_end_block - group_first_block + 1);
 
        if (first_free_block < 0) {
@@ -914,54 +989,54 @@ retry:
                 * no free block left on the bitmap, no point
                 * to reserve the space. return failed.
                 */
-               goto failed;
+               spin_lock(rsv_lock);
+               if (!rsv_is_empty(&my_rsv->rsv_window))
+                       rsv_window_remove(sb, my_rsv);
+               spin_unlock(rsv_lock);
+               return -1;              /* failed */
        }
+
        start_block = first_free_block + group_first_block;
        /*
         * check if the first free block is within the
-        * free space we just found
+        * free space we just reserved
         */
-       if ((start_block >= reservable_space_start) &&
-         (start_block < reservable_space_start + size))
-               goto found_rsv_window;
+       if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+               return 0;               /* success */
        /*
         * if the first free bit we found is out of the reservable space
-        * this means there is no free block on the reservable space
-        * we should continue search for next reservable space,
+        * continue search for next reservable space,
         * start from where the free block is,
         * we also shift the list head to where we stopped last time
         */
-       search_head = prev_rsv;
+       search_head = my_rsv;
+       spin_lock(rsv_lock);
        goto retry;
+}
 
-found_rsv_window:
-       /*
-        * great! the reservable space contains some free blocks.
-        * if the search returns that we should add the new
-        * window just next to where the old window, we don't
-        * need to remove the old window first then add it to the
-        * same place, just update the new start and new end.
-        */
-       if (my_rsv != prev_rsv)  {
-               if (!rsv_is_empty(&my_rsv->rsv_window))
-                       rsv_window_remove(sb, my_rsv);
-       }
-       my_rsv->rsv_start = reservable_space_start;
-       my_rsv->rsv_end = my_rsv->rsv_start + size - 1;
-       atomic_set(&my_rsv->rsv_alloc_hit, 0);
-       if (my_rsv != prev_rsv)  {
-               ext3_rsv_window_add(sb, my_rsv);
+static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
+                       struct super_block *sb, int size)
+{
+       struct ext3_reserve_window_node *next_rsv;
+       struct rb_node *next;
+       spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+
+       if (!spin_trylock(rsv_lock))
+               return;
+
+       next = rb_next(&my_rsv->rsv_node);
+
+       if (!next)
+               my_rsv->rsv_end += size;
+       else {
+               next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+
+               if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
+                       my_rsv->rsv_end += size;
+               else
+                       my_rsv->rsv_end = next_rsv->rsv_start - 1;
        }
-       return 0;               /* succeed */
-failed:
-       /*
-        * failed to find a new reservation window in the current
-        * group, remove the current(stale) reservation window
-        * if there is any
-        */
-       if (!rsv_is_empty(&my_rsv->rsv_window))
-               rsv_window_remove(sb, my_rsv);
-       return -1;              /* failed */
+       spin_unlock(rsv_lock);
 }
 
 /*
@@ -976,7 +1051,7 @@ failed:
  * allocation within the reservation window.
  *
  * This will avoid keeping on searching the reservation list again and
- * again when someboday is looking for a free block (without
+ * again when somebody is looking for a free block (without
  * reservation), and there are lots of free blocks, but they are all
  * being reserved.
  *
@@ -989,13 +1064,12 @@ static int
 ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
                        unsigned int group, struct buffer_head *bitmap_bh,
                        int goal, struct ext3_reserve_window_node * my_rsv,
-                       int *errp)
+                       unsigned long *count, int *errp)
 {
-       spinlock_t *rsv_lock;
        unsigned long group_first_block;
        int ret = 0;
        int fatal;
-       int credits = 0;
+       unsigned long num = *count;
 
        *errp = 0;
 
@@ -1005,7 +1079,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
         * if the buffer is in BJ_Forget state in the committing transaction.
         */
        BUFFER_TRACE(bitmap_bh, "get undo access for new block");
-       fatal = ext3_journal_get_undo_access(handle, bitmap_bh, &credits);
+       fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
        if (fatal) {
                *errp = fatal;
                return -1;
@@ -1018,10 +1092,10 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
         * or last attempt to allocate a block with reservation turned on failed
         */
        if (my_rsv == NULL ) {
-               ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
+               ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+                                               goal, count, NULL);
                goto out;
        }
-       rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
        /*
         * goal is a group relative block number (if there is a goal)
         * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb)
@@ -1047,41 +1121,32 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
         * then we could go to allocate from the reservation window directly.
         */
        while (1) {
-               struct ext3_reserve_window rsv_copy;
-               unsigned int seq;
-
-               do {
-                       seq = read_seqbegin(&my_rsv->rsv_seqlock);
-                       rsv_copy._rsv_start = my_rsv->rsv_start;
-                       rsv_copy._rsv_end = my_rsv->rsv_end;
-               } while (read_seqretry(&my_rsv->rsv_seqlock, seq));
-
-               if (rsv_is_empty(&rsv_copy) || (ret < 0) ||
-                       !goal_in_my_reservation(&rsv_copy, goal, group, sb)) {
-                       spin_lock(rsv_lock);
-                       write_seqlock(&my_rsv->rsv_seqlock);
+               if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
+                       !goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) {
+                       if (my_rsv->rsv_goal_size < *count)
+                               my_rsv->rsv_goal_size = *count;
                        ret = alloc_new_reservation(my_rsv, goal, sb,
                                                        group, bitmap_bh);
-                       rsv_copy._rsv_start = my_rsv->rsv_start;
-                       rsv_copy._rsv_end = my_rsv->rsv_end;
-                       write_sequnlock(&my_rsv->rsv_seqlock);
-                       spin_unlock(rsv_lock);
                        if (ret < 0)
                                break;                  /* failed */
 
-                       if (!goal_in_my_reservation(&rsv_copy, goal, group, sb))
+                       if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb))
                                goal = -1;
-               }
-               if ((rsv_copy._rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
-                   || (rsv_copy._rsv_end < group_first_block))
+               } else if (goal > 0 && (my_rsv->rsv_end-goal+1) < *count)
+                       try_to_extend_reservation(my_rsv, sb,
+                                       *count-my_rsv->rsv_end + goal - 1);
+
+               if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
+                   || (my_rsv->rsv_end < group_first_block))
                        BUG();
                ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
-                                          &rsv_copy);
+                                          &num, &my_rsv->rsv_window);
                if (ret >= 0) {
-                       if (!read_seqretry(&my_rsv->rsv_seqlock, seq))
-                               atomic_inc(&my_rsv->rsv_alloc_hit);
+                       my_rsv->rsv_alloc_hit += num;
+                       *count = num;
                        break;                          /* succeed */
                }
+               num = *count;
        }
 out:
        if (ret >= 0) {
@@ -1096,7 +1161,7 @@ out:
        }
 
        BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
-       ext3_journal_release_buffer(handle, bitmap_bh, credits);
+       ext3_journal_release_buffer(handle, bitmap_bh);
        return ret;
 }
 
@@ -1152,8 +1217,8 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries)
  * bitmap, and then for any free bit if that fails.
  * This function also updates quota and i_blocks field.
  */
-int ext3_new_block(handle_t *handle, struct inode *inode,
-                       unsigned long goal, int *errp)
+int ext3_new_blocks(handle_t *handle, struct inode *inode,
+                       unsigned long goal, unsigned long *count, int *errp)
 {
        struct buffer_head *bitmap_bh = NULL;
        struct buffer_head *gdp_bh;
@@ -1170,12 +1235,13 @@ int ext3_new_block(handle_t *handle, struct inode *inode,
        struct ext3_super_block *es;
        struct ext3_sb_info *sbi;
        struct ext3_reserve_window_node *my_rsv = NULL;
-       struct ext3_reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window;
+       struct ext3_block_alloc_info *block_i;
        unsigned short windowsz = 0;
 #ifdef EXT3FS_DEBUG
        static int goal_hits, goal_attempts;
 #endif
        unsigned long ngroups;
+       unsigned long num = *count;
 
        *errp = -ENOSPC;
        sb = inode->i_sb;
@@ -1187,12 +1253,12 @@ int ext3_new_block(handle_t *handle, struct inode *inode,
        /*
         * Check quota for allocation of this block.
         */
-       if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+       if (DQUOT_ALLOC_BLOCK(inode, num)) {
                *errp = -EDQUOT;
                return 0;
        }
-       if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1))
-               goto out_dlimit;
+       if (DLIMIT_ALLOC_BLOCK(inode, 1))
+           goto out_dlimit;
 
        sbi = EXT3_SB(sb);
        es = EXT3_SB(sb)->s_es;
@@ -1205,10 +1271,10 @@ int ext3_new_block(handle_t *handle, struct inode *inode,
         * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off
         * reservation on that particular file)
         */
-       windowsz = atomic_read(&rsv->rsv_goal_size);
-       if (test_opt(sb, RESERVATION) &&
-               S_ISREG(inode->i_mode) && (windowsz > 0))
-               my_rsv = rsv;
+       block_i = EXT3_I(inode)->i_block_alloc_info;
+       if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
+               my_rsv = &block_i->rsv_window_node;
+
        if (!ext3_has_free_blocks(sb)) {
                *errp = -ENOSPC;
                goto out;
@@ -1229,6 +1295,14 @@ int ext3_new_block(handle_t *handle, struct inode *inode,
        goal_group = group_no;
 retry:
        free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+       /*
+        * if there is not enough free blocks to make a new resevation
+        * turn off reservation for this allocation
+        */
+       if (my_rsv && (free_blocks < windowsz)
+               && (rsv_is_empty(&my_rsv->rsv_window)))
+               my_rsv = NULL;
+
        if (free_blocks > 0) {
                ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
                                EXT3_BLOCKS_PER_GROUP(sb));
@@ -1236,7 +1310,7 @@ retry:
                if (!bitmap_bh)
                        goto io_error;
                ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no,
-                                       bitmap_bh, ret_block, my_rsv, &fatal);
+                                       bitmap_bh, ret_block, my_rsv, &num, &fatal);
                if (fatal)
                        goto out;
                if (ret_block >= 0)
@@ -1273,7 +1347,7 @@ retry:
                if (!bitmap_bh)
                        goto io_error;
                ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no,
-                                       bitmap_bh, -1, my_rsv, &fatal);
+                                       bitmap_bh, -1, my_rsv, &num, &fatal);
                if (fatal)
                        goto out;
                if (ret_block >= 0) 
@@ -1308,13 +1382,15 @@ allocated:
        target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
                                + le32_to_cpu(es->s_first_data_block);
 
-       if (target_block == le32_to_cpu(gdp->bg_block_bitmap) ||
-           target_block == le32_to_cpu(gdp->bg_inode_bitmap) ||
+       if (in_range(le32_to_cpu(gdp->bg_block_bitmap), target_block, num) ||
+           in_range(le32_to_cpu(gdp->bg_inode_bitmap), target_block, num) ||
            in_range(target_block, le32_to_cpu(gdp->bg_inode_table),
+                     EXT3_SB(sb)->s_itb_per_group) ||
+           in_range(target_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
                      EXT3_SB(sb)->s_itb_per_group))
                ext3_error(sb, "ext3_new_block",
                            "Allocating block in system zone - "
-                           "block = %u", target_block);
+                           "blocks from %u, length %lu", target_block, num);
 
        performed_allocation = 1;
 
@@ -1333,10 +1409,14 @@ allocated:
        jbd_lock_bh_state(bitmap_bh);
        spin_lock(sb_bgl_lock(sbi, group_no));
        if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
-               if (ext3_test_bit(ret_block,
-                               bh2jh(bitmap_bh)->b_committed_data)) {
-                       printk("%s: block was unexpectedly set in "
-                               "b_committed_data\n", __FUNCTION__);
+               int i;
+
+               for (i = 0; i < num; i++) {
+                       if (ext3_test_bit(ret_block,
+                                       bh2jh(bitmap_bh)->b_committed_data)) {
+                               printk("%s: block was unexpectedly set in "
+                                       "b_committed_data\n", __FUNCTION__);
+                       }
                }
        }
        ext3_debug("found bit %d\n", ret_block);
@@ -1347,7 +1427,7 @@ allocated:
        /* ret_block was blockgroup-relative.  Now it becomes fs-relative */
        ret_block = target_block;
 
-       if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
+       if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
                ext3_error(sb, "ext3_new_block",
                            "block(%d) >= blocks count(%d) - "
                            "block_group = %d, es == %p ", ret_block,
@@ -1365,9 +1445,9 @@ allocated:
 
        spin_lock(sb_bgl_lock(sbi, group_no));
        gdp->bg_free_blocks_count =
-                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
+                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - num);
        spin_unlock(sb_bgl_lock(sbi, group_no));
-       percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
+       percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
 
        BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
        err = ext3_journal_dirty_metadata(handle, gdp_bh);
@@ -1380,13 +1460,15 @@ allocated:
 
        *errp = 0;
        brelse(bitmap_bh);
+       DQUOT_FREE_BLOCK(inode, *count-num);
+       *count = num;
        return ret_block;
 
 io_error:
        *errp = -EIO;
 out:
        if (!performed_allocation)
-               DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
+               DLIMIT_FREE_BLOCK(inode, 1);
 out_dlimit:
        if (fatal) {
                *errp = fatal;
@@ -1396,28 +1478,37 @@ out_dlimit:
         * Undo the block allocation
         */
        if (!performed_allocation)
-               DQUOT_FREE_BLOCK(inode, 1);
+               DQUOT_FREE_BLOCK(inode, *count);
        brelse(bitmap_bh);
        return 0;
 }
 
+int ext3_new_block(handle_t *handle, struct inode *inode,
+                       unsigned long goal, int *errp)
+{
+       unsigned long count = 1;
+
+       return ext3_new_blocks(handle, inode, goal, &count, errp);
+}
+
 unsigned long ext3_count_free_blocks(struct super_block *sb)
 {
        unsigned long desc_count;
        struct ext3_group_desc *gdp;
        int i;
-       unsigned long ngroups;
+       unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
 #ifdef EXT3FS_DEBUG
        struct ext3_super_block *es;
        unsigned long bitmap_count, x;
        struct buffer_head *bitmap_bh = NULL;
 
-       lock_super(sb);
        es = EXT3_SB(sb)->s_es;
        desc_count = 0;
        bitmap_count = 0;
        gdp = NULL;
-       for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+
+       smp_rmb();
+       for (i = 0; i < ngroups; i++) {
                gdp = ext3_get_group_desc(sb, i, NULL);
                if (!gdp)
                        continue;
@@ -1435,11 +1526,9 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
        brelse(bitmap_bh);
        printk("ext3_count_free_blocks: stored = %u, computed = %lu, %lu\n",
               le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
-       unlock_super(sb);
        return bitmap_count;
 #else
        desc_count = 0;
-       ngroups = EXT3_SB(sb)->s_groups_count;
        smp_rmb();
        for (i = 0; i < ngroups; i++) {
                gdp = ext3_get_group_desc(sb, i, NULL);
@@ -1452,9 +1541,8 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 #endif
 }
 
-static inline int block_in_use(unsigned long block,
-                               struct super_block * sb,
-                               unsigned char * map)
+static inline int
+block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
 {
        return ext3_test_bit ((block -
                le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
@@ -1474,8 +1562,10 @@ static int ext3_group_sparse(int group)
 {
        if (group <= 1)
                return 1;
-       return (test_root(group, 3) || test_root(group, 5) ||
-               test_root(group, 7));
+       if (!(group & 1))
+               return 0;
+       return (test_root(group, 7) || test_root(group, 5) ||
+               test_root(group, 3));
 }
 
 /**
@@ -1488,12 +1578,33 @@ static int ext3_group_sparse(int group)
  */
 int ext3_bg_has_super(struct super_block *sb, int group)
 {
-       if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
-           !ext3_group_sparse(group))
+       if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+                               EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+                       !ext3_group_sparse(group))
                return 0;
        return 1;
 }
 
+static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
+{
+       unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+       unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
+       unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
+
+       if (group == first || group == first + 1 || group == last)
+               return 1;
+       return 0;
+}
+
+static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
+{
+       if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+                               EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+                       !ext3_group_sparse(group))
+               return 0;
+       return EXT3_SB(sb)->s_gdb_count;
+}
+
 /**
  *     ext3_bg_num_gdb - number of blocks used by the group table in group
  *     @sb: superblock for filesystem
@@ -1505,82 +1616,14 @@ int ext3_bg_has_super(struct super_block *sb, int group)
  */
 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
 {
-       if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
-           !ext3_group_sparse(group))
-               return 0;
-       return EXT3_SB(sb)->s_gdb_count;
-}
-
-#ifdef CONFIG_EXT3_CHECK
-/* Called at mount-time, super-block is locked */
-void ext3_check_blocks_bitmap (struct super_block * sb)
-{
-       struct ext3_super_block *es;
-       unsigned long desc_count, bitmap_count, x, j;
-       unsigned long desc_blocks;
-       struct buffer_head *bitmap_bh = NULL;
-       struct ext3_group_desc *gdp;
-       int i;
+       unsigned long first_meta_bg =
+                       le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
+       unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
 
-       es = EXT3_SB(sb)->s_es;
-       desc_count = 0;
-       bitmap_count = 0;
-       gdp = NULL;
-       for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
-               gdp = ext3_get_group_desc (sb, i, NULL);
-               if (!gdp)
-                       continue;
-               desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
-               brelse(bitmap_bh);
-               bitmap_bh = read_block_bitmap(sb, i);
-               if (bitmap_bh == NULL)
-                       continue;
+       if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
+                       metagroup < first_meta_bg)
+               return ext3_bg_num_gdb_nometa(sb,group);
 
-               if (ext3_bg_has_super(sb, i) &&
-                               !ext3_test_bit(0, bitmap_bh->b_data))
-                       ext3_error(sb, __FUNCTION__,
-                                  "Superblock in group %d is marked free", i);
-
-               desc_blocks = ext3_bg_num_gdb(sb, i);
-               for (j = 0; j < desc_blocks; j++)
-                       if (!ext3_test_bit(j + 1, bitmap_bh->b_data))
-                               ext3_error(sb, __FUNCTION__,
-                                          "Descriptor block #%ld in group "
-                                          "%d is marked free", j, i);
-
-               if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
-                                               sb, bitmap_bh->b_data))
-                       ext3_error (sb, "ext3_check_blocks_bitmap",
-                                   "Block bitmap for group %d is marked free",
-                                   i);
-
-               if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
-                                               sb, bitmap_bh->b_data))
-                       ext3_error (sb, "ext3_check_blocks_bitmap",
-                                   "Inode bitmap for group %d is marked free",
-                                   i);
-
-               for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++)
-                       if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
-                                                       sb, bitmap_bh->b_data))
-                               ext3_error (sb, "ext3_check_blocks_bitmap",
-                                           "Block #%d of the inode table in "
-                                           "group %d is marked free", j, i);
+       return ext3_bg_num_gdb_meta(sb,group);
 
-               x = ext3_count_free(bitmap_bh, sb->s_blocksize);
-               if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
-                       ext3_error (sb, "ext3_check_blocks_bitmap",
-                                   "Wrong free blocks count for group %d, "
-                                   "stored = %d, counted = %lu", i,
-                                   le16_to_cpu(gdp->bg_free_blocks_count), x);
-               bitmap_count += x;
-       }
-       brelse(bitmap_bh);
-       if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
-               ext3_error (sb, "ext3_check_blocks_bitmap",
-                       "Wrong free blocks count in super block, "
-                       "stored = %lu, counted = %lu",
-                       (unsigned long)le32_to_cpu(es->s_free_blocks_count),
-                       bitmap_count);
 }
-#endif