fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / fs / ocfs2 / suballoc.c
index c46c164..6dbb117 100644 (file)
@@ -49,7 +49,7 @@
 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
-static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
+static int ocfs2_block_group_fill(handle_t *handle,
                                  struct inode *alloc_inode,
                                  struct buffer_head *bg_bh,
                                  u64 group_blkno,
@@ -59,9 +59,6 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
                                   struct inode *alloc_inode,
                                   struct buffer_head *bh);
 
-static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
-                                      struct ocfs2_alloc_context *ac);
-
 static int ocfs2_cluster_group_search(struct inode *inode,
                                      struct buffer_head *group_bh,
                                      u32 bits_wanted, u32 min_bits,
@@ -70,14 +67,9 @@ static int ocfs2_block_group_search(struct inode *inode,
                                    struct buffer_head *group_bh,
                                    u32 bits_wanted, u32 min_bits,
                                    u16 *bit_off, u16 *bits_found);
-static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
-                             u32 bits_wanted,
-                             u32 min_bits,
-                             u16 *bit_off,
-                             unsigned int *num_bits,
-                             u64 *bg_blkno);
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                                     struct ocfs2_alloc_context *ac,
+                                    handle_t *handle,
                                     u32 bits_wanted,
                                     u32 min_bits,
                                     u16 *bit_off,
@@ -85,25 +77,20 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                                     u64 *bg_blkno);
 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
                                         int nr);
-static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
-                                            struct buffer_head *bg_bh,
-                                            unsigned int bits_wanted,
-                                            u16 *bit_off,
-                                            u16 *bits_found);
-static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_set_bits(handle_t *handle,
                                             struct inode *alloc_inode,
                                             struct ocfs2_group_desc *bg,
                                             struct buffer_head *group_bh,
                                             unsigned int bit_off,
                                             unsigned int num_bits);
-static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_clear_bits(handle_t *handle,
                                               struct inode *alloc_inode,
                                               struct ocfs2_group_desc *bg,
                                               struct buffer_head *group_bh,
                                               unsigned int bit_off,
                                               unsigned int num_bits);
 
-static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
+static int ocfs2_relink_block_group(handle_t *handle,
                                    struct inode *alloc_inode,
                                    struct buffer_head *fe_bh,
                                    struct buffer_head *bg_bh,
@@ -111,7 +98,7 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
                                    u16 chain);
 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
                                                     u32 wanted);
-static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
+static int ocfs2_free_suballoc_bits(handle_t *handle,
                                    struct inode *alloc_inode,
                                    struct buffer_head *alloc_bh,
                                    unsigned int start_bit,
@@ -131,8 +118,16 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
 
 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
 {
-       if (ac->ac_inode)
-               iput(ac->ac_inode);
+       struct inode *inode = ac->ac_inode;
+
+       if (inode) {
+               if (ac->ac_which != OCFS2_AC_USE_LOCAL)
+                       ocfs2_meta_unlock(inode, 1);
+
+               mutex_unlock(&inode->i_mutex);
+
+               iput(inode);
+       }
        if (ac->ac_bh)
                brelse(ac->ac_bh);
        kfree(ac);
@@ -143,7 +138,65 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
        return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
 }
 
-static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
+/* somewhat more expensive than our other checks, so use sparingly. */
+static int ocfs2_check_group_descriptor(struct super_block *sb,
+                                       struct ocfs2_dinode *di,
+                                       struct ocfs2_group_desc *gd)
+{
+       unsigned int max_bits;
+
+       if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+               OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
+               return -EIO;
+       }
+
+       if (di->i_blkno != gd->bg_parent_dinode) {
+               ocfs2_error(sb, "Group descriptor # %llu has bad parent "
+                           "pointer (%llu, expected %llu)",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+                           (unsigned long long)le64_to_cpu(di->i_blkno));
+               return -EIO;
+       }
+
+       max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
+       if (le16_to_cpu(gd->bg_bits) > max_bits) {
+               ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           le16_to_cpu(gd->bg_bits));
+               return -EIO;
+       }
+
+       if (le16_to_cpu(gd->bg_chain) >=
+           le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
+               ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           le16_to_cpu(gd->bg_chain));
+               return -EIO;
+       }
+
+       if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+               ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+                           "claims that %u are free",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           le16_to_cpu(gd->bg_bits),
+                           le16_to_cpu(gd->bg_free_bits_count));
+               return -EIO;
+       }
+
+       if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+               ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+                           "max bitmap bits of %u",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           le16_to_cpu(gd->bg_bits),
+                           8 * le16_to_cpu(gd->bg_size));
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int ocfs2_block_group_fill(handle_t *handle,
                                  struct inode *alloc_inode,
                                  struct buffer_head *bg_bh,
                                  u64 group_blkno,
@@ -157,8 +210,9 @@ static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
        mlog_entry_void();
 
        if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
-               ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") "
-                           "!= b_blocknr (%llu)", group_blkno,
+               ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
+                           "b_blocknr (%llu)",
+                           (unsigned long long)group_blkno,
                            (unsigned long long) bg_bh->b_blocknr);
                status = -EIO;
                goto bail;
@@ -225,7 +279,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
        struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
        struct ocfs2_chain_list *cl;
        struct ocfs2_alloc_context *ac = NULL;
-       struct ocfs2_journal_handle *handle = NULL;
+       handle_t *handle = NULL;
        u32 bit_off, num_bits;
        u16 alloc_rec;
        u64 bg_blkno;
@@ -236,16 +290,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
        mlog_entry_void();
 
-       handle = ocfs2_alloc_handle(osb);
-       if (!handle) {
-               status = -ENOMEM;
-               mlog_errno(status);
-               goto bail;
-       }
-
        cl = &fe->id2.i_chain;
        status = ocfs2_reserve_clusters(osb,
-                                       handle,
                                        le16_to_cpu(cl->cl_cpg),
                                        &ac);
        if (status < 0) {
@@ -256,7 +302,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
        credits = ocfs2_calc_group_alloc_credits(osb->sb,
                                                 le16_to_cpu(cl->cl_cpg));
-       handle = ocfs2_start_trans(osb, handle, credits);
+       handle = ocfs2_start_trans(osb, credits);
        if (IS_ERR(handle)) {
                status = PTR_ERR(handle);
                handle = NULL;
@@ -280,8 +326,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
        /* setup the group */
        bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-       mlog(0, "new descriptor, record %u, at block %"MLFu64"\n",
-            alloc_rec, bg_blkno);
+       mlog(0, "new descriptor, record %u, at block %llu\n",
+            alloc_rec, (unsigned long long)bg_blkno);
 
        bg_bh = sb_getblk(osb->sb, bg_blkno);
        if (!bg_bh) {
@@ -341,7 +387,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
        status = 0;
 bail:
        if (handle)
-               ocfs2_commit_trans(handle);
+               ocfs2_commit_trans(osb, handle);
 
        if (ac)
                ocfs2_free_alloc_context(ac);
@@ -354,27 +400,38 @@ bail:
 }
 
 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
-                                      struct ocfs2_alloc_context *ac)
+                                      struct ocfs2_alloc_context *ac,
+                                      int type,
+                                      u32 slot)
 {
        int status;
        u32 bits_wanted = ac->ac_bits_wanted;
-       struct inode *alloc_inode = ac->ac_inode;
+       struct inode *alloc_inode;
        struct buffer_head *bh = NULL;
-       struct ocfs2_journal_handle *handle = ac->ac_handle;
        struct ocfs2_dinode *fe;
        u32 free_bits;
 
        mlog_entry_void();
 
-       BUG_ON(handle->flags & OCFS2_HANDLE_STARTED);
+       alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
+       if (!alloc_inode) {
+               mlog_errno(-EINVAL);
+               return -EINVAL;
+       }
 
-       ocfs2_handle_add_inode(handle, alloc_inode);
-       status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1);
+       mutex_lock(&alloc_inode->i_mutex);
+
+       status = ocfs2_meta_lock(alloc_inode, &bh, 1);
        if (status < 0) {
+               mutex_unlock(&alloc_inode->i_mutex);
+               iput(alloc_inode);
+
                mlog_errno(status);
-               goto bail;
+               return status;
        }
 
+       ac->ac_inode = alloc_inode;
+
        fe = (struct ocfs2_dinode *) bh->b_data;
        if (!OCFS2_IS_VALID_DINODE(fe)) {
                OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
@@ -382,8 +439,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
                goto bail;
        }
        if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
-               ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator "
-                           "# %"MLFu64, le64_to_cpu(fe->i_blkno));
+               ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
+                           (unsigned long long)le64_to_cpu(fe->i_blkno));
                status = -EIO;
                goto bail;
        }
@@ -425,14 +482,13 @@ bail:
 }
 
 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
-                              struct ocfs2_journal_handle *handle,
                               struct ocfs2_dinode *fe,
                               struct ocfs2_alloc_context **ac)
 {
        int status;
-       struct inode *alloc_inode = NULL;
+       u32 slot;
 
-       *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+       *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
        if (!(*ac)) {
                status = -ENOMEM;
                mlog_errno(status);
@@ -440,28 +496,18 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
        }
 
        (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
-       (*ac)->ac_handle = handle;
        (*ac)->ac_which = OCFS2_AC_USE_META;
 
 #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
-       alloc_inode = ocfs2_get_system_file_inode(osb,
-                                                 EXTENT_ALLOC_SYSTEM_INODE,
-                                                 0);
+       slot = 0;
 #else
-       alloc_inode = ocfs2_get_system_file_inode(osb,
-                                                 EXTENT_ALLOC_SYSTEM_INODE,
-                                                 osb->slot_num);
+       slot = osb->slot_num;
 #endif
-       if (!alloc_inode) {
-               status = -ENOMEM;
-               mlog_errno(status);
-               goto bail;
-       }
 
-       (*ac)->ac_inode = igrab(alloc_inode);
        (*ac)->ac_group_search = ocfs2_block_group_search;
 
-       status = ocfs2_reserve_suballoc_bits(osb, (*ac));
+       status = ocfs2_reserve_suballoc_bits(osb, (*ac),
+                                            EXTENT_ALLOC_SYSTEM_INODE, slot);
        if (status < 0) {
                if (status != -ENOSPC)
                        mlog_errno(status);
@@ -475,21 +521,16 @@ bail:
                *ac = NULL;
        }
 
-       if (alloc_inode)
-               iput(alloc_inode);
-
        mlog_exit(status);
        return status;
 }
 
 int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
-                           struct ocfs2_journal_handle *handle,
                            struct ocfs2_alloc_context **ac)
 {
        int status;
-       struct inode *alloc_inode = NULL;
 
-       *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+       *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
        if (!(*ac)) {
                status = -ENOMEM;
                mlog_errno(status);
@@ -497,22 +538,13 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
        }
 
        (*ac)->ac_bits_wanted = 1;
-       (*ac)->ac_handle = handle;
        (*ac)->ac_which = OCFS2_AC_USE_INODE;
 
-       alloc_inode = ocfs2_get_system_file_inode(osb,
-                                                 INODE_ALLOC_SYSTEM_INODE,
-                                                 osb->slot_num);
-       if (!alloc_inode) {
-               status = -ENOMEM;
-               mlog_errno(status);
-               goto bail;
-       }
-
-       (*ac)->ac_inode = igrab(alloc_inode);
        (*ac)->ac_group_search = ocfs2_block_group_search;
 
-       status = ocfs2_reserve_suballoc_bits(osb, *ac);
+       status = ocfs2_reserve_suballoc_bits(osb, *ac,
+                                            INODE_ALLOC_SYSTEM_INODE,
+                                            osb->slot_num);
        if (status < 0) {
                if (status != -ENOSPC)
                        mlog_errno(status);
@@ -526,9 +558,6 @@ bail:
                *ac = NULL;
        }
 
-       if (alloc_inode)
-               iput(alloc_inode);
-
        mlog_exit(status);
        return status;
 }
@@ -540,20 +569,17 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
 {
        int status;
 
-       ac->ac_inode = ocfs2_get_system_file_inode(osb,
-                                                  GLOBAL_BITMAP_SYSTEM_INODE,
-                                                  OCFS2_INVALID_SLOT);
-       if (!ac->ac_inode) {
-               status = -EINVAL;
-               mlog(ML_ERROR, "Could not get bitmap inode!\n");
-               goto bail;
-       }
        ac->ac_which = OCFS2_AC_USE_MAIN;
        ac->ac_group_search = ocfs2_cluster_group_search;
 
-       status = ocfs2_reserve_suballoc_bits(osb, ac);
-       if (status < 0 && status != -ENOSPC)
+       status = ocfs2_reserve_suballoc_bits(osb, ac,
+                                            GLOBAL_BITMAP_SYSTEM_INODE,
+                                            OCFS2_INVALID_SLOT);
+       if (status < 0 && status != -ENOSPC) {
                mlog_errno(status);
+               goto bail;
+       }
+
 bail:
        return status;
 }
@@ -562,7 +588,6 @@ bail:
  * use so we figure it out for them, but unfortunately this clutters
  * things a bit. */
 int ocfs2_reserve_clusters(struct ocfs2_super *osb,
-                          struct ocfs2_journal_handle *handle,
                           u32 bits_wanted,
                           struct ocfs2_alloc_context **ac)
 {
@@ -570,9 +595,7 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 
        mlog_entry_void();
 
-       BUG_ON(!handle);
-
-       *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+       *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
        if (!(*ac)) {
                status = -ENOMEM;
                mlog_errno(status);
@@ -580,12 +603,10 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
        }
 
        (*ac)->ac_bits_wanted = bits_wanted;
-       (*ac)->ac_handle = handle;
 
        status = -ENOSPC;
        if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
                status = ocfs2_reserve_local_alloc_bits(osb,
-                                                       handle,
                                                        bits_wanted,
                                                        *ac);
                if ((status < 0) && (status != -ENOSPC)) {
@@ -662,6 +683,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
                                             struct buffer_head *bg_bh,
                                             unsigned int bits_wanted,
+                                            unsigned int total_bits,
                                             u16 *bit_off,
                                             u16 *bits_found)
 {
@@ -678,10 +700,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
        found = start = best_offset = best_size = 0;
        bitmap = bg->bg_bitmap;
 
-       while((offset = ocfs2_find_next_zero_bit(bitmap,
-                                                le16_to_cpu(bg->bg_bits),
-                                                start)) != -1) {
-               if (offset == le16_to_cpu(bg->bg_bits))
+       while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
+               if (offset == total_bits)
                        break;
 
                if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
@@ -727,7 +747,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
        return status;
 }
 
-static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_set_bits(handle_t *handle,
                                             struct inode *alloc_inode,
                                             struct ocfs2_group_desc *bg,
                                             struct buffer_head *group_bh,
@@ -798,7 +818,7 @@ static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
        return best;
 }
 
-static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
+static int ocfs2_relink_block_group(handle_t *handle,
                                    struct inode *alloc_inode,
                                    struct buffer_head *fe_bh,
                                    struct buffer_head *bg_bh,
@@ -829,9 +849,10 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
                goto out;
        }
 
-       mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to "
-            "top, prev = %"MLFu64"\n",
-            fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno);
+       mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
+            (unsigned long long)fe->i_blkno, chain,
+            (unsigned long long)bg->bg_blkno,
+            (unsigned long long)prev_bg->bg_blkno);
 
        fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
        bg_ptr = le64_to_cpu(bg->bg_next_group);
@@ -909,14 +930,35 @@ static int ocfs2_cluster_group_search(struct inode *inode,
 {
        int search = -ENOSPC;
        int ret;
-       struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
+       struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
        u16 tmp_off, tmp_found;
+       unsigned int max_bits, gd_cluster_off;
 
        BUG_ON(!ocfs2_is_cluster_bitmap(inode));
 
-       if (bg->bg_free_bits_count) {
+       if (gd->bg_free_bits_count) {
+               max_bits = le16_to_cpu(gd->bg_bits);
+
+               /* Tail groups in cluster bitmaps which aren't cpg
+                * aligned are prone to partial extention by a failed
+                * fs resize. If the file system resize never got to
+                * update the dinode cluster count, then we don't want
+                * to trust any clusters past it, regardless of what
+                * the group descriptor says. */
+               gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
+                                                         le64_to_cpu(gd->bg_blkno));
+               if ((gd_cluster_off + max_bits) >
+                   OCFS2_I(inode)->ip_clusters) {
+                       max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
+                       mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
+                            (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                            le16_to_cpu(gd->bg_bits),
+                            OCFS2_I(inode)->ip_clusters, max_bits);
+               }
+
                ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
                                                        group_bh, bits_wanted,
+                                                       max_bits,
                                                        &tmp_off, &tmp_found);
                if (ret)
                        return ret;
@@ -949,23 +991,115 @@ static int ocfs2_block_group_search(struct inode *inode,
        if (bg->bg_free_bits_count)
                ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
                                                        group_bh, bits_wanted,
+                                                       le16_to_cpu(bg->bg_bits),
                                                        bit_off, bits_found);
 
        return ret;
 }
 
+static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+                                      handle_t *handle,
+                                      struct buffer_head *di_bh,
+                                      u32 num_bits,
+                                      u16 chain)
+{
+       int ret;
+       u32 tmp_used;
+       struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+       struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
+
+       ret = ocfs2_journal_access(handle, inode, di_bh,
+                                  OCFS2_JOURNAL_ACCESS_WRITE);
+       if (ret < 0) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
+       di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
+       le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
+
+       ret = ocfs2_journal_dirty(handle, di_bh);
+       if (ret < 0)
+               mlog_errno(ret);
+
+out:
+       return ret;
+}
+
+static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
+                                 handle_t *handle,
+                                 u32 bits_wanted,
+                                 u32 min_bits,
+                                 u16 *bit_off,
+                                 unsigned int *num_bits,
+                                 u64 gd_blkno,
+                                 u16 *bits_left)
+{
+       int ret;
+       u16 found;
+       struct buffer_head *group_bh = NULL;
+       struct ocfs2_group_desc *gd;
+       struct inode *alloc_inode = ac->ac_inode;
+
+       ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
+                              &group_bh, OCFS2_BH_CACHED, alloc_inode);
+       if (ret < 0) {
+               mlog_errno(ret);
+               return ret;
+       }
+
+       gd = (struct ocfs2_group_desc *) group_bh->b_data;
+       if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+               OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
+               ret = -EIO;
+               goto out;
+       }
+
+       ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
+                                 bit_off, &found);
+       if (ret < 0) {
+               if (ret != -ENOSPC)
+                       mlog_errno(ret);
+               goto out;
+       }
+
+       *num_bits = found;
+
+       ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
+                                              *num_bits,
+                                              le16_to_cpu(gd->bg_chain));
+       if (ret < 0) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
+                                        *bit_off, *num_bits);
+       if (ret < 0)
+               mlog_errno(ret);
+
+       *bits_left = le16_to_cpu(gd->bg_free_bits_count);
+
+out:
+       brelse(group_bh);
+
+       return ret;
+}
+
 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
+                             handle_t *handle,
                              u32 bits_wanted,
                              u32 min_bits,
                              u16 *bit_off,
                              unsigned int *num_bits,
-                             u64 *bg_blkno)
+                             u64 *bg_blkno,
+                             u16 *bits_left)
 {
        int status;
        u16 chain, tmp_bits;
        u32 tmp_used;
        u64 next_group;
-       struct ocfs2_journal_handle *handle = ac->ac_handle;
        struct inode *alloc_inode = ac->ac_inode;
        struct buffer_head *group_bh = NULL;
        struct buffer_head *prev_group_bh = NULL;
@@ -974,8 +1108,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
        struct ocfs2_group_desc *bg;
 
        chain = ac->ac_chain;
-       mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n",
-            bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno);
+       mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
+            bits_wanted, chain,
+            (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
 
        status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
                                  le64_to_cpu(cl->cl_recs[chain].c_blkno),
@@ -985,9 +1120,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                goto bail;
        }
        bg = (struct ocfs2_group_desc *) group_bh->b_data;
-       if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-               OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-               status = -EIO;
+       status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+       if (status) {
+               mlog_errno(status);
                goto bail;
        }
 
@@ -1015,9 +1150,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                        goto bail;
                }
                bg = (struct ocfs2_group_desc *) group_bh->b_data;
-               if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-                       OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-                       status = -EIO;
+               status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+               if (status) {
+                       mlog_errno(status);
                        goto bail;
                }
        }
@@ -1027,8 +1162,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                goto bail;
        }
 
-       mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n",
-            tmp_bits, bg->bg_blkno);
+       mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
+            tmp_bits, (unsigned long long)bg->bg_blkno);
 
        *num_bits = tmp_bits;
 
@@ -1092,10 +1227,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                goto bail;
        }
 
-       mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n",
-            *num_bits, fe->i_blkno);
+       mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
+            (unsigned long long)fe->i_blkno);
 
        *bg_blkno = le64_to_cpu(bg->bg_blkno);
+       *bits_left = le16_to_cpu(bg->bg_free_bits_count);
 bail:
        if (group_bh)
                brelse(group_bh);
@@ -1109,6 +1245,7 @@ bail:
 /* will give out up to bits_wanted contiguous bits. */
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                                     struct ocfs2_alloc_context *ac,
+                                    handle_t *handle,
                                     u32 bits_wanted,
                                     u32 min_bits,
                                     u16 *bit_off,
@@ -1117,6 +1254,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 {
        int status;
        u16 victim, i;
+       u16 bits_left = 0;
+       u64 hint_blkno = ac->ac_last_group;
        struct ocfs2_chain_list *cl;
        struct ocfs2_dinode *fe;
 
@@ -1134,25 +1273,47 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
        }
        if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
            le32_to_cpu(fe->id1.bitmap1.i_total)) {
-               ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u"
-                           "used bits but only %u total.",
-                           le64_to_cpu(fe->i_blkno),
+               ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
+                           "bits but only %u total.",
+                           (unsigned long long)le64_to_cpu(fe->i_blkno),
                            le32_to_cpu(fe->id1.bitmap1.i_used),
                            le32_to_cpu(fe->id1.bitmap1.i_total));
                status = -EIO;
                goto bail;
        }
 
+       if (hint_blkno) {
+               /* Attempt to short-circuit the usual search mechanism
+                * by jumping straight to the most recently used
+                * allocation group. This helps us mantain some
+                * contiguousness across allocations. */
+               status = ocfs2_search_one_group(ac, handle, bits_wanted,
+                                               min_bits, bit_off, num_bits,
+                                               hint_blkno, &bits_left);
+               if (!status) {
+                       /* Be careful to update *bg_blkno here as the
+                        * caller is expecting it to be filled in, and
+                        * ocfs2_search_one_group() won't do that for
+                        * us. */
+                       *bg_blkno = hint_blkno;
+                       goto set_hint;
+               }
+               if (status < 0 && status != -ENOSPC) {
+                       mlog_errno(status);
+                       goto bail;
+               }
+       }
+
        cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
 
        victim = ocfs2_find_victim_chain(cl);
        ac->ac_chain = victim;
        ac->ac_allow_chain_relink = 1;
 
-       status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off,
-                                   num_bits, bg_blkno);
+       status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
+                                   num_bits, bg_blkno, &bits_left);
        if (!status)
-               goto bail;
+               goto set_hint;
        if (status < 0 && status != -ENOSPC) {
                mlog_errno(status);
                goto bail;
@@ -1173,9 +1334,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                        continue;
 
                ac->ac_chain = i;
-               status = ocfs2_search_chain(ac, bits_wanted, min_bits,
-                                           bit_off, num_bits,
-                                           bg_blkno);
+               status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
+                                           bit_off, num_bits, bg_blkno,
+                                           &bits_left);
                if (!status)
                        break;
                if (status < 0 && status != -ENOSPC) {
@@ -1183,14 +1344,25 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                        goto bail;
                }
        }
-bail:
 
+set_hint:
+       if (status != -ENOSPC) {
+               /* If the next search of this group is not likely to
+                * yield a suitable extent, then we reset the last
+                * group hint so as to not waste a disk read */
+               if (bits_left < min_bits)
+                       ac->ac_last_group = 0;
+               else
+                       ac->ac_last_group = *bg_blkno;
+       }
+
+bail:
        mlog_exit(status);
        return status;
 }
 
 int ocfs2_claim_metadata(struct ocfs2_super *osb,
-                        struct ocfs2_journal_handle *handle,
+                        handle_t *handle,
                         struct ocfs2_alloc_context *ac,
                         u32 bits_wanted,
                         u16 *suballoc_bit_start,
@@ -1203,10 +1375,10 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb,
        BUG_ON(!ac);
        BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
        BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
-       BUG_ON(ac->ac_handle != handle);
 
        status = ocfs2_claim_suballoc_bits(osb,
                                           ac,
+                                          handle,
                                           bits_wanted,
                                           1,
                                           suballoc_bit_start,
@@ -1227,7 +1399,7 @@ bail:
 }
 
 int ocfs2_claim_new_inode(struct ocfs2_super *osb,
-                         struct ocfs2_journal_handle *handle,
+                         handle_t *handle,
                          struct ocfs2_alloc_context *ac,
                          u16 *suballoc_bit,
                          u64 *fe_blkno)
@@ -1242,10 +1414,10 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
        BUG_ON(ac->ac_bits_given != 0);
        BUG_ON(ac->ac_bits_wanted != 1);
        BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
-       BUG_ON(ac->ac_handle != handle);
 
        status = ocfs2_claim_suballoc_bits(osb,
                                           ac,
+                                          handle,
                                           1,
                                           1,
                                           suballoc_bit,
@@ -1330,7 +1502,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
  * of any size.
  */
 int ocfs2_claim_clusters(struct ocfs2_super *osb,
-                        struct ocfs2_journal_handle *handle,
+                        handle_t *handle,
                         struct ocfs2_alloc_context *ac,
                         u32 min_clusters,
                         u32 *cluster_start,
@@ -1338,7 +1510,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 {
        int status;
        unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
-       u64 bg_blkno;
+       u64 bg_blkno = 0;
        u16 bg_bit_off;
 
        mlog_entry_void();
@@ -1348,7 +1520,6 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 
        BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
               && ac->ac_which != OCFS2_AC_USE_MAIN);
-       BUG_ON(ac->ac_handle != handle);
 
        if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
                status = ocfs2_claim_local_alloc_bits(osb,
@@ -1374,6 +1545,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 
                status = ocfs2_claim_suballoc_bits(osb,
                                                   ac,
+                                                  handle,
                                                   bits_wanted,
                                                   min_clusters,
                                                   &bg_bit_off,
@@ -1400,7 +1572,7 @@ bail:
        return status;
 }
 
-static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_clear_bits(handle_t *handle,
                                               struct inode *alloc_inode,
                                               struct ocfs2_group_desc *bg,
                                               struct buffer_head *group_bh,
@@ -1455,7 +1627,7 @@ bail:
 /*
  * expects the suballoc inode to already be locked.
  */
-static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
+static int ocfs2_free_suballoc_bits(handle_t *handle,
                                    struct inode *alloc_inode,
                                    struct buffer_head *alloc_bh,
                                    unsigned int start_bit,
@@ -1479,10 +1651,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
        }
        BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
 
-       mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64
-            ", starting at %u\n",
-            OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno,
-            start_bit);
+       mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
+            (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
+            (unsigned long long)bg_blkno, start_bit);
 
        status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
                                  alloc_inode);
@@ -1492,9 +1663,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
        }
 
        group = (struct ocfs2_group_desc *) group_bh->b_data;
-       if (!OCFS2_IS_VALID_GROUP_DESC(group)) {
-               OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group);
-               status = -EIO;
+       status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
+       if (status) {
+               mlog_errno(status);
                goto bail;
        }
        BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
@@ -1540,7 +1711,7 @@ static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
        return group;
 }
 
-int ocfs2_free_dinode(struct ocfs2_journal_handle *handle,
+int ocfs2_free_dinode(handle_t *handle,
                      struct inode *inode_alloc_inode,
                      struct buffer_head *inode_alloc_bh,
                      struct ocfs2_dinode *di)
@@ -1553,7 +1724,7 @@ int ocfs2_free_dinode(struct ocfs2_journal_handle *handle,
                                        inode_alloc_bh, bit, bg_blkno, 1);
 }
 
-int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle,
+int ocfs2_free_extent_block(handle_t *handle,
                            struct inode *eb_alloc_inode,
                            struct buffer_head *eb_alloc_bh,
                            struct ocfs2_extent_block *eb)
@@ -1566,7 +1737,7 @@ int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle,
                                        bit, bg_blkno, 1);
 }
 
-int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
+int ocfs2_free_clusters(handle_t *handle,
                       struct inode *bitmap_inode,
                       struct buffer_head *bitmap_bh,
                       u64 start_blk,
@@ -1592,10 +1763,10 @@ int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
        ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
                                     &bg_start_bit);
 
-       mlog(0, "want to free %u clusters starting at block %"MLFu64"\n",
-            num_clusters, start_blk);
-       mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n",
-            bg_blkno, bg_start_bit);
+       mlog(0, "want to free %u clusters starting at block %llu\n",
+            num_clusters, (unsigned long long)start_blk);
+       mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
+            (unsigned long long)bg_blkno, bg_start_bit);
 
        status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
                                          bg_start_bit, bg_blkno,
@@ -1616,18 +1787,22 @@ static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
        printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
        printk("bg_chain:           %u\n", bg->bg_chain);
        printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
-       printk("bg_next_group:      %"MLFu64"\n", bg->bg_next_group);
-       printk("bg_parent_dinode:   %"MLFu64"\n", bg->bg_parent_dinode);
-       printk("bg_blkno:           %"MLFu64"\n", bg->bg_blkno);
+       printk("bg_next_group:      %llu\n",
+              (unsigned long long)bg->bg_next_group);
+       printk("bg_parent_dinode:   %llu\n",
+              (unsigned long long)bg->bg_parent_dinode);
+       printk("bg_blkno:           %llu\n",
+              (unsigned long long)bg->bg_blkno);
 }
 
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
 {
        int i;
 
-       printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno);
+       printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
        printk("i_signature:                  %s\n", fe->i_signature);
-       printk("i_size:                       %"MLFu64"\n", fe->i_size);
+       printk("i_size:                       %llu\n",
+              (unsigned long long)fe->i_size);
        printk("i_clusters:                   %u\n", fe->i_clusters);
        printk("i_generation:                 %u\n",
               le32_to_cpu(fe->i_generation));
@@ -1645,7 +1820,7 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
                       fe->id2.i_chain.cl_recs[i].c_free);
                printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
                       fe->id2.i_chain.cl_recs[i].c_total);
-               printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i,
-                      fe->id2.i_chain.cl_recs[i].c_blkno);
+               printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
+                      (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
        }
 }