X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Focfs2%2Fsuballoc.c;h=6dbb1176275973d7088bdc5d696e93227b1668bf;hb=refs%2Fheads%2Fvserver;hp=c46c164aefbb23d6f12626df7c3ff4ff6c0268ba;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c46c164ae..6dbb11762 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -49,7 +49,7 @@ static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); -static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, +static int ocfs2_block_group_fill(handle_t *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, u64 group_blkno, @@ -59,9 +59,6 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, struct inode *alloc_inode, struct buffer_head *bh); -static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, - struct ocfs2_alloc_context *ac); - static int ocfs2_cluster_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, @@ -70,14 +67,9 @@ static int ocfs2_block_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u16 *bit_off, u16 *bits_found); -static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, - u32 bits_wanted, - u32 min_bits, - u16 *bit_off, - unsigned int *num_bits, - u64 *bg_blkno); static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, + handle_t *handle, u32 bits_wanted, u32 min_bits, u16 *bit_off, @@ -85,25 +77,20 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, u64 *bg_blkno); static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr); -static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, - struct buffer_head *bg_bh, - unsigned int bits_wanted, - u16 *bit_off, - u16 *bits_found); -static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, +static inline int ocfs2_block_group_set_bits(handle_t *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, struct buffer_head *group_bh, unsigned int bit_off, unsigned int num_bits); -static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, +static inline int ocfs2_block_group_clear_bits(handle_t *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, struct buffer_head *group_bh, unsigned int bit_off, unsigned int num_bits); -static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, +static int ocfs2_relink_block_group(handle_t *handle, struct inode *alloc_inode, struct buffer_head *fe_bh, struct buffer_head *bg_bh, @@ -111,7 +98,7 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, u16 chain); static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, u32 wanted); -static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, +static int ocfs2_free_suballoc_bits(handle_t *handle, struct inode *alloc_inode, struct buffer_head *alloc_bh, unsigned int start_bit, @@ -131,8 +118,16 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) { - if (ac->ac_inode) - iput(ac->ac_inode); + struct inode *inode = ac->ac_inode; + + if (inode) { + if (ac->ac_which != OCFS2_AC_USE_LOCAL) + ocfs2_meta_unlock(inode, 1); + + mutex_unlock(&inode->i_mutex); + + iput(inode); + } if (ac->ac_bh) brelse(ac->ac_bh); kfree(ac); @@ -143,7 +138,65 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } -static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, +/* somewhat more expensive than our other checks, so use sparingly. */ +static int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd) +{ + unsigned int max_bits; + + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); + return -EIO; + } + + if (di->i_blkno != gd->bg_parent_dinode) { + ocfs2_error(sb, "Group descriptor # %llu has bad parent " + "pointer (%llu, expected %llu)", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), + (unsigned long long)le64_to_cpu(di->i_blkno)); + return -EIO; + } + + max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); + if (le16_to_cpu(gd->bg_bits) > max_bits) { + ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_chain) >= + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { + ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_chain)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { + ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " + "claims that %u are free", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { + ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EIO; + } + + return 0; +} + +static int ocfs2_block_group_fill(handle_t *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, u64 group_blkno, @@ -157,8 +210,9 @@ static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, mlog_entry_void(); if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { - ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") " - "!= b_blocknr (%llu)", group_blkno, + ocfs2_error(alloc_inode->i_sb, "group block (%llu) != " + "b_blocknr (%llu)", + (unsigned long long)group_blkno, (unsigned long long) bg_bh->b_blocknr); status = -EIO; goto bail; @@ -225,7 +279,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; struct ocfs2_chain_list *cl; struct ocfs2_alloc_context *ac = NULL; - struct ocfs2_journal_handle *handle = NULL; + handle_t *handle = NULL; u32 bit_off, num_bits; u16 alloc_rec; u64 bg_blkno; @@ -236,16 +290,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, mlog_entry_void(); - handle = ocfs2_alloc_handle(osb); - if (!handle) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - cl = &fe->id2.i_chain; status = ocfs2_reserve_clusters(osb, - handle, le16_to_cpu(cl->cl_cpg), &ac); if (status < 0) { @@ -256,7 +302,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, credits = ocfs2_calc_group_alloc_credits(osb->sb, le16_to_cpu(cl->cl_cpg)); - handle = ocfs2_start_trans(osb, handle, credits); + handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -280,8 +326,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, /* setup the group */ bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); - mlog(0, "new descriptor, record %u, at block %"MLFu64"\n", - alloc_rec, bg_blkno); + mlog(0, "new descriptor, record %u, at block %llu\n", + alloc_rec, (unsigned long long)bg_blkno); bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { @@ -341,7 +387,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, status = 0; bail: if (handle) - ocfs2_commit_trans(handle); + ocfs2_commit_trans(osb, handle); if (ac) ocfs2_free_alloc_context(ac); @@ -354,27 +400,38 @@ bail: } static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, - struct ocfs2_alloc_context *ac) + struct ocfs2_alloc_context *ac, + int type, + u32 slot) { int status; u32 bits_wanted = ac->ac_bits_wanted; - struct inode *alloc_inode = ac->ac_inode; + struct inode *alloc_inode; struct buffer_head *bh = NULL; - struct ocfs2_journal_handle *handle = ac->ac_handle; struct ocfs2_dinode *fe; u32 free_bits; mlog_entry_void(); - BUG_ON(handle->flags & OCFS2_HANDLE_STARTED); + alloc_inode = ocfs2_get_system_file_inode(osb, type, slot); + if (!alloc_inode) { + mlog_errno(-EINVAL); + return -EINVAL; + } - ocfs2_handle_add_inode(handle, alloc_inode); - status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1); + mutex_lock(&alloc_inode->i_mutex); + + status = ocfs2_meta_lock(alloc_inode, &bh, 1); if (status < 0) { + mutex_unlock(&alloc_inode->i_mutex); + iput(alloc_inode); + mlog_errno(status); - goto bail; + return status; } + ac->ac_inode = alloc_inode; + fe = (struct ocfs2_dinode *) bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); @@ -382,8 +439,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, goto bail; } if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { - ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator " - "# %"MLFu64, le64_to_cpu(fe->i_blkno)); + ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", + (unsigned long long)le64_to_cpu(fe->i_blkno)); status = -EIO; goto bail; } @@ -425,14 +482,13 @@ bail: } int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, - struct ocfs2_journal_handle *handle, struct ocfs2_dinode *fe, struct ocfs2_alloc_context **ac) { int status; - struct inode *alloc_inode = NULL; + u32 slot; - *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); + *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); if (!(*ac)) { status = -ENOMEM; mlog_errno(status); @@ -440,28 +496,18 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, } (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); - (*ac)->ac_handle = handle; (*ac)->ac_which = OCFS2_AC_USE_META; #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS - alloc_inode = ocfs2_get_system_file_inode(osb, - EXTENT_ALLOC_SYSTEM_INODE, - 0); + slot = 0; #else - alloc_inode = ocfs2_get_system_file_inode(osb, - EXTENT_ALLOC_SYSTEM_INODE, - osb->slot_num); + slot = osb->slot_num; #endif - if (!alloc_inode) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - (*ac)->ac_inode = igrab(alloc_inode); (*ac)->ac_group_search = ocfs2_block_group_search; - status = ocfs2_reserve_suballoc_bits(osb, (*ac)); + status = ocfs2_reserve_suballoc_bits(osb, (*ac), + EXTENT_ALLOC_SYSTEM_INODE, slot); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -475,21 +521,16 @@ bail: *ac = NULL; } - if (alloc_inode) - iput(alloc_inode); - mlog_exit(status); return status; } int ocfs2_reserve_new_inode(struct ocfs2_super *osb, - struct ocfs2_journal_handle *handle, struct ocfs2_alloc_context **ac) { int status; - struct inode *alloc_inode = NULL; - *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); + *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); if (!(*ac)) { status = -ENOMEM; mlog_errno(status); @@ -497,22 +538,13 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, } (*ac)->ac_bits_wanted = 1; - (*ac)->ac_handle = handle; (*ac)->ac_which = OCFS2_AC_USE_INODE; - alloc_inode = ocfs2_get_system_file_inode(osb, - INODE_ALLOC_SYSTEM_INODE, - osb->slot_num); - if (!alloc_inode) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - - (*ac)->ac_inode = igrab(alloc_inode); (*ac)->ac_group_search = ocfs2_block_group_search; - status = ocfs2_reserve_suballoc_bits(osb, *ac); + status = ocfs2_reserve_suballoc_bits(osb, *ac, + INODE_ALLOC_SYSTEM_INODE, + osb->slot_num); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -526,9 +558,6 @@ bail: *ac = NULL; } - if (alloc_inode) - iput(alloc_inode); - mlog_exit(status); return status; } @@ -540,20 +569,17 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, { int status; - ac->ac_inode = ocfs2_get_system_file_inode(osb, - GLOBAL_BITMAP_SYSTEM_INODE, - OCFS2_INVALID_SLOT); - if (!ac->ac_inode) { - status = -EINVAL; - mlog(ML_ERROR, "Could not get bitmap inode!\n"); - goto bail; - } ac->ac_which = OCFS2_AC_USE_MAIN; ac->ac_group_search = ocfs2_cluster_group_search; - status = ocfs2_reserve_suballoc_bits(osb, ac); - if (status < 0 && status != -ENOSPC) + status = ocfs2_reserve_suballoc_bits(osb, ac, + GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT); + if (status < 0 && status != -ENOSPC) { mlog_errno(status); + goto bail; + } + bail: return status; } @@ -562,7 +588,6 @@ bail: * use so we figure it out for them, but unfortunately this clutters * things a bit. */ int ocfs2_reserve_clusters(struct ocfs2_super *osb, - struct ocfs2_journal_handle *handle, u32 bits_wanted, struct ocfs2_alloc_context **ac) { @@ -570,9 +595,7 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, mlog_entry_void(); - BUG_ON(!handle); - - *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); + *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); if (!(*ac)) { status = -ENOMEM; mlog_errno(status); @@ -580,12 +603,10 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, } (*ac)->ac_bits_wanted = bits_wanted; - (*ac)->ac_handle = handle; status = -ENOSPC; if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { status = ocfs2_reserve_local_alloc_bits(osb, - handle, bits_wanted, *ac); if ((status < 0) && (status != -ENOSPC)) { @@ -662,6 +683,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, struct buffer_head *bg_bh, unsigned int bits_wanted, + unsigned int total_bits, u16 *bit_off, u16 *bits_found) { @@ -678,10 +700,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; - while((offset = ocfs2_find_next_zero_bit(bitmap, - le16_to_cpu(bg->bg_bits), - start)) != -1) { - if (offset == le16_to_cpu(bg->bg_bits)) + while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { + if (offset == total_bits) break; if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { @@ -727,7 +747,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, return status; } -static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, +static inline int ocfs2_block_group_set_bits(handle_t *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, struct buffer_head *group_bh, @@ -798,7 +818,7 @@ static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl) return best; } -static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, +static int ocfs2_relink_block_group(handle_t *handle, struct inode *alloc_inode, struct buffer_head *fe_bh, struct buffer_head *bg_bh, @@ -829,9 +849,10 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, goto out; } - mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to " - "top, prev = %"MLFu64"\n", - fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno); + mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", + (unsigned long long)fe->i_blkno, chain, + (unsigned long long)bg->bg_blkno, + (unsigned long long)prev_bg->bg_blkno); fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); bg_ptr = le64_to_cpu(bg->bg_next_group); @@ -909,14 +930,35 @@ static int ocfs2_cluster_group_search(struct inode *inode, { int search = -ENOSPC; int ret; - struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; u16 tmp_off, tmp_found; + unsigned int max_bits, gd_cluster_off; BUG_ON(!ocfs2_is_cluster_bitmap(inode)); - if (bg->bg_free_bits_count) { + if (gd->bg_free_bits_count) { + max_bits = le16_to_cpu(gd->bg_bits); + + /* Tail groups in cluster bitmaps which aren't cpg + * aligned are prone to partial extention by a failed + * fs resize. If the file system resize never got to + * update the dinode cluster count, then we don't want + * to trust any clusters past it, regardless of what + * the group descriptor says. */ + gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb, + le64_to_cpu(gd->bg_blkno)); + if ((gd_cluster_off + max_bits) > + OCFS2_I(inode)->ip_clusters) { + max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; + mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + OCFS2_I(inode)->ip_clusters, max_bits); + } + ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, + max_bits, &tmp_off, &tmp_found); if (ret) return ret; @@ -949,23 +991,115 @@ static int ocfs2_block_group_search(struct inode *inode, if (bg->bg_free_bits_count) ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, + le16_to_cpu(bg->bg_bits), bit_off, bits_found); return ret; } +static int ocfs2_alloc_dinode_update_counts(struct inode *inode, + handle_t *handle, + struct buffer_head *di_bh, + u32 num_bits, + u16 chain) +{ + int ret; + u32 tmp_used; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; + struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; + + ret = ocfs2_journal_access(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); + di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); + le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); + + ret = ocfs2_journal_dirty(handle, di_bh); + if (ret < 0) + mlog_errno(ret); + +out: + return ret; +} + +static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, + handle_t *handle, + u32 bits_wanted, + u32 min_bits, + u16 *bit_off, + unsigned int *num_bits, + u64 gd_blkno, + u16 *bits_left) +{ + int ret; + u16 found; + struct buffer_head *group_bh = NULL; + struct ocfs2_group_desc *gd; + struct inode *alloc_inode = ac->ac_inode; + + ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, + &group_bh, OCFS2_BH_CACHED, alloc_inode); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + gd = (struct ocfs2_group_desc *) group_bh->b_data; + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); + ret = -EIO; + goto out; + } + + ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, + bit_off, &found); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + + *num_bits = found; + + ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, + *num_bits, + le16_to_cpu(gd->bg_chain)); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, + *bit_off, *num_bits); + if (ret < 0) + mlog_errno(ret); + + *bits_left = le16_to_cpu(gd->bg_free_bits_count); + +out: + brelse(group_bh); + + return ret; +} + static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, + handle_t *handle, u32 bits_wanted, u32 min_bits, u16 *bit_off, unsigned int *num_bits, - u64 *bg_blkno) + u64 *bg_blkno, + u16 *bits_left) { int status; u16 chain, tmp_bits; u32 tmp_used; u64 next_group; - struct ocfs2_journal_handle *handle = ac->ac_handle; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; struct buffer_head *prev_group_bh = NULL; @@ -974,8 +1108,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, struct ocfs2_group_desc *bg; chain = ac->ac_chain; - mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n", - bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno); + mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", + bits_wanted, chain, + (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), le64_to_cpu(cl->cl_recs[chain].c_blkno), @@ -985,9 +1120,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); + if (status) { + mlog_errno(status); goto bail; } @@ -1015,9 +1150,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); + if (status) { + mlog_errno(status); goto bail; } } @@ -1027,8 +1162,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } - mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n", - tmp_bits, bg->bg_blkno); + mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", + tmp_bits, (unsigned long long)bg->bg_blkno); *num_bits = tmp_bits; @@ -1092,10 +1227,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } - mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n", - *num_bits, fe->i_blkno); + mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, + (unsigned long long)fe->i_blkno); *bg_blkno = le64_to_cpu(bg->bg_blkno); + *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: if (group_bh) brelse(group_bh); @@ -1109,6 +1245,7 @@ bail: /* will give out up to bits_wanted contiguous bits. */ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, + handle_t *handle, u32 bits_wanted, u32 min_bits, u16 *bit_off, @@ -1117,6 +1254,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, { int status; u16 victim, i; + u16 bits_left = 0; + u64 hint_blkno = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; @@ -1134,25 +1273,47 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, } if (le32_to_cpu(fe->id1.bitmap1.i_used) >= le32_to_cpu(fe->id1.bitmap1.i_total)) { - ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u" - "used bits but only %u total.", - le64_to_cpu(fe->i_blkno), + ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " + "bits but only %u total.", + (unsigned long long)le64_to_cpu(fe->i_blkno), le32_to_cpu(fe->id1.bitmap1.i_used), le32_to_cpu(fe->id1.bitmap1.i_total)); status = -EIO; goto bail; } + if (hint_blkno) { + /* Attempt to short-circuit the usual search mechanism + * by jumping straight to the most recently used + * allocation group. This helps us mantain some + * contiguousness across allocations. */ + status = ocfs2_search_one_group(ac, handle, bits_wanted, + min_bits, bit_off, num_bits, + hint_blkno, &bits_left); + if (!status) { + /* Be careful to update *bg_blkno here as the + * caller is expecting it to be filled in, and + * ocfs2_search_one_group() won't do that for + * us. */ + *bg_blkno = hint_blkno; + goto set_hint; + } + if (status < 0 && status != -ENOSPC) { + mlog_errno(status); + goto bail; + } + } + cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; ac->ac_allow_chain_relink = 1; - status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, - num_bits, bg_blkno); + status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off, + num_bits, bg_blkno, &bits_left); if (!status) - goto bail; + goto set_hint; if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1173,9 +1334,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, continue; ac->ac_chain = i; - status = ocfs2_search_chain(ac, bits_wanted, min_bits, - bit_off, num_bits, - bg_blkno); + status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, + bit_off, num_bits, bg_blkno, + &bits_left); if (!status) break; if (status < 0 && status != -ENOSPC) { @@ -1183,14 +1344,25 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, goto bail; } } -bail: +set_hint: + if (status != -ENOSPC) { + /* If the next search of this group is not likely to + * yield a suitable extent, then we reset the last + * group hint so as to not waste a disk read */ + if (bits_left < min_bits) + ac->ac_last_group = 0; + else + ac->ac_last_group = *bg_blkno; + } + +bail: mlog_exit(status); return status; } int ocfs2_claim_metadata(struct ocfs2_super *osb, - struct ocfs2_journal_handle *handle, + handle_t *handle, struct ocfs2_alloc_context *ac, u32 bits_wanted, u16 *suballoc_bit_start, @@ -1203,10 +1375,10 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb, BUG_ON(!ac); BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); BUG_ON(ac->ac_which != OCFS2_AC_USE_META); - BUG_ON(ac->ac_handle != handle); status = ocfs2_claim_suballoc_bits(osb, ac, + handle, bits_wanted, 1, suballoc_bit_start, @@ -1227,7 +1399,7 @@ bail: } int ocfs2_claim_new_inode(struct ocfs2_super *osb, - struct ocfs2_journal_handle *handle, + handle_t *handle, struct ocfs2_alloc_context *ac, u16 *suballoc_bit, u64 *fe_blkno) @@ -1242,10 +1414,10 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb, BUG_ON(ac->ac_bits_given != 0); BUG_ON(ac->ac_bits_wanted != 1); BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); - BUG_ON(ac->ac_handle != handle); status = ocfs2_claim_suballoc_bits(osb, ac, + handle, 1, 1, suballoc_bit, @@ -1330,7 +1502,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, * of any size. */ int ocfs2_claim_clusters(struct ocfs2_super *osb, - struct ocfs2_journal_handle *handle, + handle_t *handle, struct ocfs2_alloc_context *ac, u32 min_clusters, u32 *cluster_start, @@ -1338,7 +1510,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, { int status; unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; - u64 bg_blkno; + u64 bg_blkno = 0; u16 bg_bit_off; mlog_entry_void(); @@ -1348,7 +1520,6 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL && ac->ac_which != OCFS2_AC_USE_MAIN); - BUG_ON(ac->ac_handle != handle); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { status = ocfs2_claim_local_alloc_bits(osb, @@ -1374,6 +1545,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, status = ocfs2_claim_suballoc_bits(osb, ac, + handle, bits_wanted, min_clusters, &bg_bit_off, @@ -1400,7 +1572,7 @@ bail: return status; } -static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, +static inline int ocfs2_block_group_clear_bits(handle_t *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, struct buffer_head *group_bh, @@ -1455,7 +1627,7 @@ bail: /* * expects the suballoc inode to already be locked. */ -static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, +static int ocfs2_free_suballoc_bits(handle_t *handle, struct inode *alloc_inode, struct buffer_head *alloc_bh, unsigned int start_bit, @@ -1479,10 +1651,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, } BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); - mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64 - ", starting at %u\n", - OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno, - start_bit); + mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", + (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, + (unsigned long long)bg_blkno, start_bit); status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, alloc_inode); @@ -1492,9 +1663,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, } group = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(group)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); + if (status) { + mlog_errno(status); goto bail; } BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); @@ -1540,7 +1711,7 @@ static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) return group; } -int ocfs2_free_dinode(struct ocfs2_journal_handle *handle, +int ocfs2_free_dinode(handle_t *handle, struct inode *inode_alloc_inode, struct buffer_head *inode_alloc_bh, struct ocfs2_dinode *di) @@ -1553,7 +1724,7 @@ int ocfs2_free_dinode(struct ocfs2_journal_handle *handle, inode_alloc_bh, bit, bg_blkno, 1); } -int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle, +int ocfs2_free_extent_block(handle_t *handle, struct inode *eb_alloc_inode, struct buffer_head *eb_alloc_bh, struct ocfs2_extent_block *eb) @@ -1566,7 +1737,7 @@ int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle, bit, bg_blkno, 1); } -int ocfs2_free_clusters(struct ocfs2_journal_handle *handle, +int ocfs2_free_clusters(handle_t *handle, struct inode *bitmap_inode, struct buffer_head *bitmap_bh, u64 start_blk, @@ -1592,10 +1763,10 @@ int ocfs2_free_clusters(struct ocfs2_journal_handle *handle, ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, &bg_start_bit); - mlog(0, "want to free %u clusters starting at block %"MLFu64"\n", - num_clusters, start_blk); - mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n", - bg_blkno, bg_start_bit); + mlog(0, "want to free %u clusters starting at block %llu\n", + num_clusters, (unsigned long long)start_blk); + mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", + (unsigned long long)bg_blkno, bg_start_bit); status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, bg_start_bit, bg_blkno, @@ -1616,18 +1787,22 @@ static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count); printk("bg_chain: %u\n", bg->bg_chain); printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation)); - printk("bg_next_group: %"MLFu64"\n", bg->bg_next_group); - printk("bg_parent_dinode: %"MLFu64"\n", bg->bg_parent_dinode); - printk("bg_blkno: %"MLFu64"\n", bg->bg_blkno); + printk("bg_next_group: %llu\n", + (unsigned long long)bg->bg_next_group); + printk("bg_parent_dinode: %llu\n", + (unsigned long long)bg->bg_parent_dinode); + printk("bg_blkno: %llu\n", + (unsigned long long)bg->bg_blkno); } static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) { int i; - printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno); + printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno); printk("i_signature: %s\n", fe->i_signature); - printk("i_size: %"MLFu64"\n", fe->i_size); + printk("i_size: %llu\n", + (unsigned long long)fe->i_size); printk("i_clusters: %u\n", fe->i_clusters); printk("i_generation: %u\n", le32_to_cpu(fe->i_generation)); @@ -1645,7 +1820,7 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) fe->id2.i_chain.cl_recs[i].c_free); printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, fe->id2.i_chain.cl_recs[i].c_total); - printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i, - fe->id2.i_chain.cl_recs[i].c_blkno); + printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i, + (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); } }