2 * linux/fs/ext2/balloc.c
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 * Big-endian to little-endian byte-swapping/bitmaps by
11 * David S. Miller (davem@caip.rutgers.edu), 1995
15 #include <linux/quotaops.h>
16 #include <linux/sched.h>
17 #include <linux/buffer_head.h>
18 #include <linux/capability.h>
19 #include <linux/vs_base.h>
20 #include <linux/vs_dlimit.h>
21 #include <linux/vs_tag.h>
24 * balloc.c contains the blocks allocation and deallocation routines
28 * The free blocks are managed by bitmaps. A file system contains several
29 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
30 * block for inodes, N blocks for the inode table and data blocks.
32 * The file system contains group descriptors which are located after the
33 * super block. Each descriptor contains the number of the bitmap block and
34 * the free blocks count in the block. The descriptors are loaded in memory
35 * when a file system is mounted (see ext2_read_super).
39 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
41 struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
42 unsigned int block_group,
43 struct buffer_head ** bh)
45 unsigned long group_desc;
47 struct ext2_group_desc * desc;
48 struct ext2_sb_info *sbi = EXT2_SB(sb);
50 if (block_group >= sbi->s_groups_count) {
51 ext2_error (sb, "ext2_get_group_desc",
52 "block_group >= groups_count - "
53 "block_group = %d, groups_count = %lu",
54 block_group, sbi->s_groups_count);
59 group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
60 offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
61 if (!sbi->s_group_desc[group_desc]) {
62 ext2_error (sb, "ext2_get_group_desc",
63 "Group descriptor not loaded - "
64 "block_group = %d, group_desc = %lu, desc = %lu",
65 block_group, group_desc, offset);
69 desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
71 *bh = sbi->s_group_desc[group_desc];
76 * Read the bitmap for a given block_group, reading into the specified
77 * slot in the superblock's bitmap cache.
79 * Return buffer_head on success or NULL in case of failure.
81 static struct buffer_head *
82 read_block_bitmap(struct super_block *sb, unsigned int block_group)
84 struct ext2_group_desc * desc;
85 struct buffer_head * bh = NULL;
87 desc = ext2_get_group_desc (sb, block_group, NULL);
90 bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
92 ext2_error (sb, "read_block_bitmap",
93 "Cannot read block bitmap - "
94 "block_group = %d, block_bitmap = %u",
95 block_group, le32_to_cpu(desc->bg_block_bitmap));
101 * Set sb->s_dirt here because the superblock was "logically" altered. We
102 * need to recalculate its free blocks count and flush it out.
104 static int reserve_blocks(struct super_block *sb, int count)
106 struct ext2_sb_info *sbi = EXT2_SB(sb);
107 struct ext2_super_block *es = sbi->s_es;
108 unsigned long long free_blocks, root_blocks;
110 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
111 root_blocks = le32_to_cpu(es->s_r_blocks_count);
113 DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks);
115 if (free_blocks < count)
118 if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
119 sbi->s_resuid != current->fsuid &&
120 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
122 * We are too close to reserve and we are not privileged.
123 * Can we allocate anything at all?
125 if (free_blocks > root_blocks)
126 count = free_blocks - root_blocks;
131 percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
136 static void release_blocks(struct super_block *sb, int count)
139 struct ext2_sb_info *sbi = EXT2_SB(sb);
141 percpu_counter_mod(&sbi->s_freeblocks_counter, count);
146 static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
147 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
149 unsigned free_blocks;
151 if (!desc->bg_free_blocks_count)
154 spin_lock(sb_bgl_lock(sbi, group_no));
155 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
156 if (free_blocks < count)
158 desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
159 spin_unlock(sb_bgl_lock(sbi, group_no));
160 mark_buffer_dirty(bh);
164 static void group_release_blocks(struct super_block *sb, int group_no,
165 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
168 struct ext2_sb_info *sbi = EXT2_SB(sb);
169 unsigned free_blocks;
171 spin_lock(sb_bgl_lock(sbi, group_no));
172 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
173 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
174 spin_unlock(sb_bgl_lock(sbi, group_no));
176 mark_buffer_dirty(bh);
180 /* Free given blocks, update quota and i_blocks field */
181 void ext2_free_blocks (struct inode * inode, unsigned long block,
184 struct buffer_head *bitmap_bh = NULL;
185 struct buffer_head * bh2;
186 unsigned long block_group;
189 unsigned long overflow;
190 struct super_block * sb = inode->i_sb;
191 struct ext2_sb_info * sbi = EXT2_SB(sb);
192 struct ext2_group_desc * desc;
193 struct ext2_super_block * es = sbi->s_es;
194 unsigned freed = 0, group_freed;
196 if (block < le32_to_cpu(es->s_first_data_block) ||
197 block + count < block ||
198 block + count > le32_to_cpu(es->s_blocks_count)) {
199 ext2_error (sb, "ext2_free_blocks",
200 "Freeing blocks not in datazone - "
201 "block = %lu, count = %lu", block, count);
205 ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
209 block_group = (block - le32_to_cpu(es->s_first_data_block)) /
210 EXT2_BLOCKS_PER_GROUP(sb);
211 bit = (block - le32_to_cpu(es->s_first_data_block)) %
212 EXT2_BLOCKS_PER_GROUP(sb);
214 * Check to see if we are freeing blocks across a group
217 if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
218 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
222 bitmap_bh = read_block_bitmap(sb, block_group);
226 desc = ext2_get_group_desc (sb, block_group, &bh2);
230 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
231 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
232 in_range (block, le32_to_cpu(desc->bg_inode_table),
233 sbi->s_itb_per_group) ||
234 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
235 sbi->s_itb_per_group))
236 ext2_error (sb, "ext2_free_blocks",
237 "Freeing blocks in system zones - "
238 "Block = %lu, count = %lu",
241 for (i = 0, group_freed = 0; i < count; i++) {
242 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
243 bit + i, bitmap_bh->b_data)) {
244 ext2_error(sb, __FUNCTION__,
245 "bit already cleared for block %lu", block + i);
251 mark_buffer_dirty(bitmap_bh);
252 if (sb->s_flags & MS_SYNCHRONOUS)
253 sync_dirty_buffer(bitmap_bh);
255 group_release_blocks(sb, block_group, desc, bh2, group_freed);
256 freed += group_freed;
265 DLIMIT_FREE_BLOCK(inode, freed);
266 release_blocks(sb, freed);
267 DQUOT_FREE_BLOCK(inode, freed);
270 static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
275 if (!ext2_test_bit(goal, map))
281 * The goal was occupied; search forward for a free
282 * block within the next XX blocks.
284 * end_goal is more or less random, but it has to be
285 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
286 * next 64-bit boundary is simple..
288 k = (goal + 63) & ~63;
289 goal = ext2_find_next_zero_bit(map, k, goal);
293 * Search in the remainder of the current group.
297 p = map + (goal >> 3);
298 r = memscan(p, 0, (size - goal + 7) >> 3);
302 * We have succeeded in finding a free byte in the block
303 * bitmap. Now search backwards to find the start of this
304 * group of free blocks - won't take more than 7 iterations.
306 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
311 k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
318 if (ext2_set_bit_atomic(lock, goal, (void *) map))
324 * ext2_new_block uses a goal block to assist allocation. If the goal is
325 * free, or there is a free block within 32 blocks of the goal, that block
326 * is allocated. Otherwise a forward search is made for a free block; within
327 * each block group the search first looks for an entire free byte in the block
328 * bitmap, and then for any free bit if that fails.
329 * This function also updates quota and i_blocks field.
331 int ext2_new_block(struct inode *inode, unsigned long goal,
332 u32 *prealloc_count, u32 *prealloc_block, int *err)
334 struct buffer_head *bitmap_bh = NULL;
335 struct buffer_head *gdp_bh; /* bh2 */
336 struct ext2_group_desc *desc;
337 int group_no; /* i */
338 int ret_block; /* j */
339 int group_idx; /* k */
340 int target_block; /* tmp */
342 struct super_block *sb = inode->i_sb;
343 struct ext2_sb_info *sbi = EXT2_SB(sb);
344 struct ext2_super_block *es = sbi->s_es;
345 unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
346 unsigned prealloc_goal = es->s_prealloc_blocks;
347 unsigned group_alloc = 0, es_alloc, dq_alloc;
348 int nr_scanned_groups;
350 if (!prealloc_goal--)
351 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
352 if (!prealloc_count || *prealloc_count)
355 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
360 while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
363 dq_alloc = prealloc_goal + 1;
364 es_alloc = reserve_blocks(sb, dq_alloc);
369 if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) {
374 ext2_debug ("goal=%lu.\n", goal);
376 if (goal < le32_to_cpu(es->s_first_data_block) ||
377 goal >= le32_to_cpu(es->s_blocks_count))
378 goal = le32_to_cpu(es->s_first_data_block);
379 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
380 desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
383 * gdp_bh may still be uninitialised. But group_release_blocks
384 * will not touch it because group_alloc is zero.
389 group_alloc = group_reserve_blocks(sbi, group_no, desc,
392 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
395 bitmap_bh = read_block_bitmap(sb, group_no);
399 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
401 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
402 bitmap_bh->b_data, group_size, ret_block);
405 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
409 ext2_debug ("Bit not found in block group %d.\n", group_no);
412 * Now search the rest of the groups. We assume that
413 * i and desc correctly point to the last group visited.
415 nr_scanned_groups = 0;
417 for (group_idx = 0; !group_alloc &&
418 group_idx < sbi->s_groups_count; group_idx++) {
420 if (group_no >= sbi->s_groups_count)
422 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
425 group_alloc = group_reserve_blocks(sbi, group_no, desc,
433 bitmap_bh = read_block_bitmap(sb, group_no);
437 ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
441 * If a free block counter is corrupted we can loop inifintely.
445 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
446 ext2_error(sb, "ext2_new_block",
447 "corrupted free blocks counters");
451 * Someone else grabbed the last free block in this blockgroup
452 * before us. Retry the scan.
454 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
460 ext2_debug("using block group %d(%d)\n",
461 group_no, desc->bg_free_blocks_count);
463 target_block = ret_block + group_no * group_size +
464 le32_to_cpu(es->s_first_data_block);
466 if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
467 target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
468 in_range(target_block, le32_to_cpu(desc->bg_inode_table),
469 sbi->s_itb_per_group))
470 ext2_error (sb, "ext2_new_block",
471 "Allocating block in system zone - "
472 "block = %u", target_block);
474 if (target_block >= le32_to_cpu(es->s_blocks_count)) {
475 ext2_error (sb, "ext2_new_block",
476 "block(%d) >= blocks count(%d) - "
477 "block_group = %d, es == %p ", ret_block,
478 le32_to_cpu(es->s_blocks_count), group_no, es);
481 block = target_block;
483 /* OK, we _had_ allocated something */
484 ext2_debug("found bit %d\n", ret_block);
491 * Do block preallocation now if required.
493 write_lock(&EXT2_I(inode)->i_meta_lock);
494 if (group_alloc && !*prealloc_count) {
497 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
498 if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
500 (void*) bitmap_bh->b_data))
503 *prealloc_block = block + 1;
509 write_unlock(&EXT2_I(inode)->i_meta_lock);
511 mark_buffer_dirty(bitmap_bh);
512 if (sb->s_flags & MS_SYNCHRONOUS)
513 sync_dirty_buffer(bitmap_bh);
515 ext2_debug ("allocating block %d. ", block);
519 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
520 DLIMIT_FREE_BLOCK(inode, es_alloc);
522 release_blocks(sb, es_alloc);
524 DQUOT_FREE_BLOCK(inode, dq_alloc);
536 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
538 unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
541 unsigned long sum = 0;
545 for (i = 0; i < numchars; i++)
546 sum += nibblemap[map->b_data[i] & 0xf] +
547 nibblemap[(map->b_data[i] >> 4) & 0xf];
551 #endif /* EXT2FS_DEBUG */
553 unsigned long ext2_count_free_blocks (struct super_block * sb)
555 struct ext2_group_desc * desc;
556 unsigned long desc_count = 0;
559 unsigned long bitmap_count, x;
560 struct ext2_super_block *es;
562 es = EXT2_SB(sb)->s_es;
566 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
567 struct buffer_head *bitmap_bh;
568 desc = ext2_get_group_desc (sb, i, NULL);
571 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
572 bitmap_bh = read_block_bitmap(sb, i);
576 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
577 printk ("group %d: stored = %d, counted = %lu\n",
578 i, le16_to_cpu(desc->bg_free_blocks_count), x);
582 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
583 (long)le32_to_cpu(es->s_free_blocks_count),
584 desc_count, bitmap_count);
587 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
588 desc = ext2_get_group_desc (sb, i, NULL);
591 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
598 block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
600 return ext2_test_bit ((block -
601 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
602 EXT2_BLOCKS_PER_GROUP(sb), map);
605 static inline int test_root(int a, int b)
614 static int ext2_group_sparse(int group)
618 return (test_root(group, 3) || test_root(group, 5) ||
619 test_root(group, 7));
623 * ext2_bg_has_super - number of blocks used by the superblock in group
624 * @sb: superblock for filesystem
625 * @group: group number to check
627 * Return the number of blocks used by the superblock (primary or backup)
628 * in this group. Currently this will be only 0 or 1.
630 int ext2_bg_has_super(struct super_block *sb, int group)
632 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
633 !ext2_group_sparse(group))
639 * ext2_bg_num_gdb - number of blocks used by the group table in group
640 * @sb: superblock for filesystem
641 * @group: group number to check
643 * Return the number of blocks used by the group descriptor table
644 * (primary or backup) in this group. In the future there may be a
645 * different number of descriptor blocks in each group.
647 unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
649 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
650 !ext2_group_sparse(group))
652 return EXT2_SB(sb)->s_gdb_count;