This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / fs / ext3 / balloc.c
1 /*
2  *  linux/fs/ext3/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include <linux/config.h>
15 #include <linux/time.h>
16 #include <linux/fs.h>
17 #include <linux/jbd.h>
18 #include <linux/ext3_fs.h>
19 #include <linux/ext3_jbd.h>
20 #include <linux/quotaops.h>
21 #include <linux/buffer_head.h>
22
23 /*
24  * balloc.c contains the blocks allocation and deallocation routines
25  */
26
27 /*
28  * The free blocks are managed by bitmaps.  A file system contains several
29  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
30  * block for inodes, N blocks for the inode table and data blocks.
31  *
32  * The file system contains group descriptors which are located after the
33  * super block.  Each descriptor contains the number of the bitmap block and
34  * the free blocks count in the block.  The descriptors are loaded in memory
35  * when a file system is mounted (see ext3_read_super).
36  */
37
38
39 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
40
41 struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
42                                              unsigned int block_group,
43                                              struct buffer_head ** bh)
44 {
45         unsigned long group_desc;
46         unsigned long desc;
47         struct ext3_group_desc * gdp;
48
49         if (block_group >= EXT3_SB(sb)->s_groups_count) {
50                 ext3_error (sb, "ext3_get_group_desc",
51                             "block_group >= groups_count - "
52                             "block_group = %d, groups_count = %lu",
53                             block_group, EXT3_SB(sb)->s_groups_count);
54
55                 return NULL;
56         }
57
58         group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
59         desc = block_group % EXT3_DESC_PER_BLOCK(sb);
60         if (!EXT3_SB(sb)->s_group_desc[group_desc]) {
61                 ext3_error (sb, "ext3_get_group_desc",
62                             "Group descriptor not loaded - "
63                             "block_group = %d, group_desc = %lu, desc = %lu",
64                              block_group, group_desc, desc);
65                 return NULL;
66         }
67
68         gdp = (struct ext3_group_desc *) 
69               EXT3_SB(sb)->s_group_desc[group_desc]->b_data;
70         if (bh)
71                 *bh = EXT3_SB(sb)->s_group_desc[group_desc];
72         return gdp + desc;
73 }
74
75 /*
76  * Read the bitmap for a given block_group, reading into the specified 
77  * slot in the superblock's bitmap cache.
78  *
79  * Return buffer_head on success or NULL in case of failure.
80  */
81 static struct buffer_head *
82 read_block_bitmap(struct super_block *sb, unsigned int block_group)
83 {
84         struct ext3_group_desc * desc;
85         struct buffer_head * bh = NULL;
86
87         desc = ext3_get_group_desc (sb, block_group, NULL);
88         if (!desc)
89                 goto error_out;
90         bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
91         if (!bh)
92                 ext3_error (sb, "read_block_bitmap",
93                             "Cannot read block bitmap - "
94                             "block_group = %d, block_bitmap = %lu",
95                             block_group, (unsigned long) desc->bg_block_bitmap);
96 error_out:
97         return bh;
98 }
99
100 /* Free given blocks, update quota and i_blocks field */
101 void ext3_free_blocks (handle_t *handle, struct inode * inode,
102                         unsigned long block, unsigned long count)
103 {
104         struct buffer_head *bitmap_bh = NULL;
105         struct buffer_head *gd_bh;
106         unsigned long block_group;
107         unsigned long bit;
108         unsigned long i;
109         unsigned long overflow;
110         struct super_block * sb;
111         struct ext3_group_desc * gdp;
112         struct ext3_super_block * es;
113         struct ext3_sb_info *sbi;
114         int err = 0, ret;
115         int dquot_freed_blocks = 0;
116
117         sb = inode->i_sb;
118         if (!sb) {
119                 printk ("ext3_free_blocks: nonexistent device");
120                 return;
121         }
122         sbi = EXT3_SB(sb);
123         es = EXT3_SB(sb)->s_es;
124         if (block < le32_to_cpu(es->s_first_data_block) ||
125             block + count < block ||
126             block + count > le32_to_cpu(es->s_blocks_count)) {
127                 ext3_error (sb, "ext3_free_blocks",
128                             "Freeing blocks not in datazone - "
129                             "block = %lu, count = %lu", block, count);
130                 goto error_return;
131         }
132
133         ext3_debug ("freeing block %lu\n", block);
134
135 do_more:
136         overflow = 0;
137         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
138                       EXT3_BLOCKS_PER_GROUP(sb);
139         bit = (block - le32_to_cpu(es->s_first_data_block)) %
140                       EXT3_BLOCKS_PER_GROUP(sb);
141         /*
142          * Check to see if we are freeing blocks across a group
143          * boundary.
144          */
145         if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
146                 overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
147                 count -= overflow;
148         }
149         brelse(bitmap_bh);
150         bitmap_bh = read_block_bitmap(sb, block_group);
151         if (!bitmap_bh)
152                 goto error_return;
153         gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
154         if (!gdp)
155                 goto error_return;
156
157         if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
158             in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
159             in_range (block, le32_to_cpu(gdp->bg_inode_table),
160                       EXT3_SB(sb)->s_itb_per_group) ||
161             in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
162                       EXT3_SB(sb)->s_itb_per_group))
163                 ext3_error (sb, "ext3_free_blocks",
164                             "Freeing blocks in system zones - "
165                             "Block = %lu, count = %lu",
166                             block, count);
167
168         /*
169          * We are about to start releasing blocks in the bitmap,
170          * so we need undo access.
171          */
172         /* @@@ check errors */
173         BUFFER_TRACE(bitmap_bh, "getting undo access");
174         err = ext3_journal_get_undo_access(handle, bitmap_bh, NULL);
175         if (err)
176                 goto error_return;
177
178         /*
179          * We are about to modify some metadata.  Call the journal APIs
180          * to unshare ->b_data if a currently-committing transaction is
181          * using it
182          */
183         BUFFER_TRACE(gd_bh, "get_write_access");
184         err = ext3_journal_get_write_access(handle, gd_bh);
185         if (err)
186                 goto error_return;
187
188         jbd_lock_bh_state(bitmap_bh);
189
190         for (i = 0; i < count; i++) {
191                 /*
192                  * An HJ special.  This is expensive...
193                  */
194 #ifdef CONFIG_JBD_DEBUG
195                 jbd_unlock_bh_state(bitmap_bh);
196                 {
197                         struct buffer_head *debug_bh;
198                         debug_bh = sb_find_get_block(sb, block + i);
199                         if (debug_bh) {
200                                 BUFFER_TRACE(debug_bh, "Deleted!");
201                                 if (!bh2jh(bitmap_bh)->b_committed_data)
202                                         BUFFER_TRACE(debug_bh,
203                                                 "No commited data in bitmap");
204                                 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
205                                 __brelse(debug_bh);
206                         }
207                 }
208                 jbd_lock_bh_state(bitmap_bh);
209 #endif
210                 /* @@@ This prevents newly-allocated data from being
211                  * freed and then reallocated within the same
212                  * transaction. 
213                  * 
214                  * Ideally we would want to allow that to happen, but to
215                  * do so requires making journal_forget() capable of
216                  * revoking the queued write of a data block, which
217                  * implies blocking on the journal lock.  *forget()
218                  * cannot block due to truncate races.
219                  *
220                  * Eventually we can fix this by making journal_forget()
221                  * return a status indicating whether or not it was able
222                  * to revoke the buffer.  On successful revoke, it is
223                  * safe not to set the allocation bit in the committed
224                  * bitmap, because we know that there is no outstanding
225                  * activity on the buffer any more and so it is safe to
226                  * reallocate it.  
227                  */
228                 BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
229                 J_ASSERT_BH(bitmap_bh,
230                                 bh2jh(bitmap_bh)->b_committed_data != NULL);
231                 ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
232                                 bh2jh(bitmap_bh)->b_committed_data);
233
234                 /*
235                  * We clear the bit in the bitmap after setting the committed
236                  * data bit, because this is the reverse order to that which
237                  * the allocator uses.
238                  */
239                 BUFFER_TRACE(bitmap_bh, "clear bit");
240                 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
241                                                 bit + i, bitmap_bh->b_data)) {
242                         jbd_unlock_bh_state(bitmap_bh);
243                         ext3_error(sb, __FUNCTION__,
244                                 "bit already cleared for block %lu", block + i);
245                         jbd_lock_bh_state(bitmap_bh);
246                         BUFFER_TRACE(bitmap_bh, "bit already cleared");
247                 } else {
248                         dquot_freed_blocks++;
249                 }
250         }
251         jbd_unlock_bh_state(bitmap_bh);
252
253         spin_lock(sb_bgl_lock(sbi, block_group));
254         gdp->bg_free_blocks_count =
255                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
256                         dquot_freed_blocks);
257         spin_unlock(sb_bgl_lock(sbi, block_group));
258         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
259
260         /* We dirtied the bitmap block */
261         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
262         err = ext3_journal_dirty_metadata(handle, bitmap_bh);
263
264         /* And the group descriptor block */
265         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
266         ret = ext3_journal_dirty_metadata(handle, gd_bh);
267         if (!err) err = ret;
268
269         if (overflow && !err) {
270                 block += count;
271                 count = overflow;
272                 goto do_more;
273         }
274         sb->s_dirt = 1;
275 error_return:
276         brelse(bitmap_bh);
277         ext3_std_error(sb, err);
278         if (dquot_freed_blocks)
279                 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
280         return;
281 }
282
283 /*
284  * For ext3 allocations, we must not reuse any blocks which are
285  * allocated in the bitmap buffer's "last committed data" copy.  This
286  * prevents deletes from freeing up the page for reuse until we have
287  * committed the delete transaction.
288  *
289  * If we didn't do this, then deleting something and reallocating it as
290  * data would allow the old block to be overwritten before the
291  * transaction committed (because we force data to disk before commit).
292  * This would lead to corruption if we crashed between overwriting the
293  * data and committing the delete. 
294  *
295  * @@@ We may want to make this allocation behaviour conditional on
296  * data-writes at some point, and disable it for metadata allocations or
297  * sync-data inodes.
298  */
299 static inline int ext3_test_allocatable(int nr, struct buffer_head *bh)
300 {
301         int ret;
302         struct journal_head *jh = bh2jh(bh);
303
304         if (ext3_test_bit(nr, bh->b_data))
305                 return 0;
306
307         jbd_lock_bh_state(bh);
308         if (!jh->b_committed_data)
309                 ret = 1;
310         else
311                 ret = !ext3_test_bit(nr, jh->b_committed_data);
312         jbd_unlock_bh_state(bh);
313         return ret;
314 }
315
316 /*
317  * Find an allocatable block in a bitmap.  We honour both the bitmap and
318  * its last-committed copy (if that exists), and perform the "most
319  * appropriate allocation" algorithm of looking for a free block near
320  * the initial goal; then for a free byte somewhere in the bitmap; then
321  * for any free bit in the bitmap.
322  */
323 static int
324 find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
325 {
326         int here, next;
327         char *p, *r;
328         struct journal_head *jh = bh2jh(bh);
329
330         if (start > 0) {
331                 /*
332                  * The goal was occupied; search forward for a free 
333                  * block within the next XX blocks.
334                  *
335                  * end_goal is more or less random, but it has to be
336                  * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
337                  * next 64-bit boundary is simple..
338                  */
339                 int end_goal = (start + 63) & ~63;
340                 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
341                 if (here < end_goal && ext3_test_allocatable(here, bh))
342                         return here;
343                 ext3_debug("Bit not found near goal\n");
344         }
345
346         here = start;
347         if (here < 0)
348                 here = 0;
349
350         p = ((char *)bh->b_data) + (here >> 3);
351         r = memscan(p, 0, (maxblocks - here + 7) >> 3);
352         next = (r - ((char *)bh->b_data)) << 3;
353
354         if (next < maxblocks && ext3_test_allocatable(next, bh))
355                 return next;
356
357         /*
358          * The bitmap search --- search forward alternately through the actual
359          * bitmap and the last-committed copy until we find a bit free in
360          * both
361          */
362         while (here < maxblocks) {
363                 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, here);
364                 if (next >= maxblocks)
365                         return -1;
366                 if (ext3_test_allocatable(next, bh))
367                         return next;
368                 jbd_lock_bh_state(bh);
369                 if (jh->b_committed_data)
370                         here = ext3_find_next_zero_bit(jh->b_committed_data,
371                                                         maxblocks, next);
372                 jbd_unlock_bh_state(bh);
373         }
374         return -1;
375 }
376
377 /*
378  * We think we can allocate this block in this bitmap.  Try to set the bit.
379  * If that succeeds then check that nobody has allocated and then freed the
380  * block since we saw that is was not marked in b_committed_data.  If it _was_
381  * allocated and freed then clear the bit in the bitmap again and return
382  * zero (failure).
383  */
384 static inline int
385 claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
386 {
387         struct journal_head *jh = bh2jh(bh);
388         int ret;
389
390         if (ext3_set_bit_atomic(lock, block, bh->b_data))
391                 return 0;
392         jbd_lock_bh_state(bh);
393         if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
394                 ext3_clear_bit_atomic(lock, block, bh->b_data);
395                 ret = 0;
396         } else {
397                 ret = 1;
398         }
399         jbd_unlock_bh_state(bh);
400         return ret;
401 }
402
403 /*
404  * If we failed to allocate the desired block then we may end up crossing to a
405  * new bitmap.  In that case we must release write access to the old one via
406  * ext3_journal_release_buffer(), else we'll run out of credits.
407  */
408 static int
409 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
410                 struct buffer_head *bitmap_bh, int goal, int *errp)
411 {
412         int i;
413         int fatal;
414         int credits = 0;
415
416         *errp = 0;
417
418         /*
419          * Make sure we use undo access for the bitmap, because it is critical
420          * that we do the frozen_data COW on bitmap buffers in all cases even
421          * if the buffer is in BJ_Forget state in the committing transaction.
422          */
423         BUFFER_TRACE(bitmap_bh, "get undo access for new block");
424         fatal = ext3_journal_get_undo_access(handle, bitmap_bh, &credits);
425         if (fatal) {
426                 *errp = fatal;
427                 goto fail;
428         }
429
430 repeat:
431         if (goal < 0 || !ext3_test_allocatable(goal, bitmap_bh)) {
432                 goal = find_next_usable_block(goal, bitmap_bh,
433                                         EXT3_BLOCKS_PER_GROUP(sb));
434                 if (goal < 0)
435                         goto fail_access;
436
437                 for (i = 0; i < 7 && goal > 0 &&
438                                 ext3_test_allocatable(goal - 1, bitmap_bh);
439                         i++, goal--);
440         }
441
442         if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) {
443                 /*
444                  * The block was allocated by another thread, or it was
445                  * allocated and then freed by another thread
446                  */
447                 goal++;
448                 if (goal >= EXT3_BLOCKS_PER_GROUP(sb))
449                         goto fail_access;
450                 goto repeat;
451         }
452
453         BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
454         fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
455         if (fatal) {
456                 *errp = fatal;
457                 goto fail;
458         }
459         return goal;
460
461 fail_access:
462         BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
463         ext3_journal_release_buffer(handle, bitmap_bh, credits);
464 fail:
465         return -1;
466 }
467
468 static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
469 {
470         int free_blocks, root_blocks;
471
472         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
473         root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
474         if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
475                 sbi->s_resuid != current->fsuid &&
476                 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
477                 return 0;
478         }
479         return 1;
480 }
481
482 /*
483  * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
484  * it is profitable to retry the operation, this function will wait
485  * for the current or commiting transaction to complete, and then
486  * return TRUE.
487  */
488 int ext3_should_retry_alloc(struct super_block *sb, int *retries)
489 {
490         if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
491                 return 0;
492
493         jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
494
495         return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
496 }
497
498 /*
499  * ext3_new_block uses a goal block to assist allocation.  If the goal is
500  * free, or there is a free block within 32 blocks of the goal, that block
501  * is allocated.  Otherwise a forward search is made for a free block; within 
502  * each block group the search first looks for an entire free byte in the block
503  * bitmap, and then for any free bit if that fails.
504  * This function also updates quota and i_blocks field.
505  */
506 int
507 ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
508                 u32 *prealloc_count, u32 *prealloc_block, int *errp)
509 {
510         struct buffer_head *bitmap_bh = NULL;   /* bh */
511         struct buffer_head *gdp_bh;             /* bh2 */
512         int group_no;                           /* i */
513         int ret_block;                          /* j */
514         int bgi;                                /* blockgroup iteration index */
515         int target_block;                       /* tmp */
516         int fatal = 0, err;
517         int performed_allocation = 0;
518         int free_blocks;
519         struct super_block *sb;
520         struct ext3_group_desc *gdp;
521         struct ext3_super_block *es;
522         struct ext3_sb_info *sbi;
523 #ifdef EXT3FS_DEBUG
524         static int goal_hits, goal_attempts;
525 #endif
526         *errp = -ENOSPC;
527         sb = inode->i_sb;
528         if (!sb) {
529                 printk("ext3_new_block: nonexistent device");
530                 return 0;
531         }
532
533         /*
534          * Check quota for allocation of this block.
535          */
536         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
537                 *errp = -EDQUOT;
538                 return 0;
539         }
540
541         sbi = EXT3_SB(sb);
542         es = EXT3_SB(sb)->s_es;
543         ext3_debug("goal=%lu.\n", goal);
544
545         if (!ext3_has_free_blocks(sbi)) {
546                 *errp = -ENOSPC;
547                 goto out;
548         }
549
550         /*
551          * First, test whether the goal block is free.
552          */
553         if (goal < le32_to_cpu(es->s_first_data_block) ||
554             goal >= le32_to_cpu(es->s_blocks_count))
555                 goal = le32_to_cpu(es->s_first_data_block);
556         group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
557                         EXT3_BLOCKS_PER_GROUP(sb);
558         gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
559         if (!gdp)
560                 goto io_error;
561
562         free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
563         if (free_blocks > 0) {
564                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
565                                 EXT3_BLOCKS_PER_GROUP(sb));
566                 bitmap_bh = read_block_bitmap(sb, group_no);
567                 if (!bitmap_bh)
568                         goto io_error;
569                 ret_block = ext3_try_to_allocate(sb, handle, group_no,
570                                         bitmap_bh, ret_block, &fatal);
571                 if (fatal)
572                         goto out;
573                 if (ret_block >= 0)
574                         goto allocated;
575         }
576
577         /*
578          * Now search the rest of the groups.  We assume that 
579          * i and gdp correctly point to the last group visited.
580          */
581         for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
582                 group_no++;
583                 if (group_no >= EXT3_SB(sb)->s_groups_count)
584                         group_no = 0;
585                 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
586                 if (!gdp) {
587                         *errp = -EIO;
588                         goto out;
589                 }
590                 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
591                 if (free_blocks <= 0)
592                         continue;
593
594                 brelse(bitmap_bh);
595                 bitmap_bh = read_block_bitmap(sb, group_no);
596                 if (!bitmap_bh)
597                         goto io_error;
598                 ret_block = ext3_try_to_allocate(sb, handle, group_no,
599                                                 bitmap_bh, -1, &fatal);
600                 if (fatal)
601                         goto out;
602                 if (ret_block >= 0) 
603                         goto allocated;
604         }
605
606         /* No space left on the device */
607         *errp = -ENOSPC;
608         goto out;
609
610 allocated:
611
612         ext3_debug("using block group %d(%d)\n",
613                         group_no, gdp->bg_free_blocks_count);
614
615         BUFFER_TRACE(gdp_bh, "get_write_access");
616         fatal = ext3_journal_get_write_access(handle, gdp_bh);
617         if (fatal)
618                 goto out;
619
620         target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
621                                 + le32_to_cpu(es->s_first_data_block);
622
623         if (target_block == le32_to_cpu(gdp->bg_block_bitmap) ||
624             target_block == le32_to_cpu(gdp->bg_inode_bitmap) ||
625             in_range(target_block, le32_to_cpu(gdp->bg_inode_table),
626                       EXT3_SB(sb)->s_itb_per_group))
627                 ext3_error(sb, "ext3_new_block",
628                             "Allocating block in system zone - "
629                             "block = %u", target_block);
630
631         performed_allocation = 1;
632
633 #ifdef CONFIG_JBD_DEBUG
634         {
635                 struct buffer_head *debug_bh;
636
637                 /* Record bitmap buffer state in the newly allocated block */
638                 debug_bh = sb_find_get_block(sb, target_block);
639                 if (debug_bh) {
640                         BUFFER_TRACE(debug_bh, "state when allocated");
641                         BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
642                         brelse(debug_bh);
643                 }
644         }
645         jbd_lock_bh_state(bitmap_bh);
646         spin_lock(sb_bgl_lock(sbi, group_no));
647         if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
648                 if (ext3_test_bit(ret_block,
649                                 bh2jh(bitmap_bh)->b_committed_data)) {
650                         printk("%s: block was unexpectedly set in "
651                                 "b_committed_data\n", __FUNCTION__);
652                 }
653         }
654         ext3_debug("found bit %d\n", ret_block);
655         spin_unlock(sb_bgl_lock(sbi, group_no));
656         jbd_unlock_bh_state(bitmap_bh);
657 #endif
658
659         /* ret_block was blockgroup-relative.  Now it becomes fs-relative */
660         ret_block = target_block;
661
662         if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
663                 ext3_error(sb, "ext3_new_block",
664                             "block(%d) >= blocks count(%d) - "
665                             "block_group = %d, es == %p ", ret_block,
666                         le32_to_cpu(es->s_blocks_count), group_no, es);
667                 goto out;
668         }
669
670         /*
671          * It is up to the caller to add the new buffer to a journal
672          * list of some description.  We don't know in advance whether
673          * the caller wants to use it as metadata or data.
674          */
675         ext3_debug("allocating block %d. Goal hits %d of %d.\n",
676                         ret_block, goal_hits, goal_attempts);
677
678         spin_lock(sb_bgl_lock(sbi, group_no));
679         gdp->bg_free_blocks_count =
680                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
681         spin_unlock(sb_bgl_lock(sbi, group_no));
682         percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
683
684         BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
685         err = ext3_journal_dirty_metadata(handle, gdp_bh);
686         if (!fatal)
687                 fatal = err;
688
689         sb->s_dirt = 1;
690         if (fatal)
691                 goto out;
692
693         *errp = 0;
694         brelse(bitmap_bh);
695         return ret_block;
696
697 io_error:
698         *errp = -EIO;
699 out:
700         if (fatal) {
701                 *errp = fatal;
702                 ext3_std_error(sb, fatal);
703         }
704         /*
705          * Undo the block allocation
706          */
707         if (!performed_allocation)
708                 DQUOT_FREE_BLOCK(inode, 1);
709         brelse(bitmap_bh);
710         return 0;
711 }
712
713 unsigned long ext3_count_free_blocks(struct super_block *sb)
714 {
715         unsigned long desc_count;
716         struct ext3_group_desc *gdp;
717         int i;
718 #ifdef EXT3FS_DEBUG
719         struct ext3_super_block *es;
720         unsigned long bitmap_count, x;
721         struct buffer_head *bitmap_bh = NULL;
722
723         lock_super(sb);
724         es = EXT3_SB(sb)->s_es;
725         desc_count = 0;
726         bitmap_count = 0;
727         gdp = NULL;
728         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
729                 gdp = ext3_get_group_desc(sb, i, NULL);
730                 if (!gdp)
731                         continue;
732                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
733                 brelse(bitmap_bh);
734                 bitmap_bh = read_block_bitmap(sb, i);
735                 if (bitmap_bh == NULL)
736                         continue;
737
738                 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
739                 printk("group %d: stored = %d, counted = %lu\n",
740                         i, le16_to_cpu(gdp->bg_free_blocks_count), x);
741                 bitmap_count += x;
742         }
743         brelse(bitmap_bh);
744         printk("ext3_count_free_blocks: stored = %u, computed = %lu, %lu\n",
745                le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
746         unlock_super(sb);
747         return bitmap_count;
748 #else
749         desc_count = 0;
750         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
751                 gdp = ext3_get_group_desc(sb, i, NULL);
752                 if (!gdp)
753                         continue;
754                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
755         }
756
757         return desc_count;
758 #endif
759 }
760
761 static inline int block_in_use(unsigned long block,
762                                 struct super_block * sb,
763                                 unsigned char * map)
764 {
765         return ext3_test_bit ((block -
766                 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
767                          EXT3_BLOCKS_PER_GROUP(sb), map);
768 }
769
770 static inline int test_root(int a, int b)
771 {
772         if (a == 0)
773                 return 1;
774         while (1) {
775                 if (a == 1)
776                         return 1;
777                 if (a % b)
778                         return 0;
779                 a = a / b;
780         }
781 }
782
783 int ext3_group_sparse(int group)
784 {
785         return (test_root(group, 3) || test_root(group, 5) ||
786                 test_root(group, 7));
787 }
788
789 /**
790  *      ext3_bg_has_super - number of blocks used by the superblock in group
791  *      @sb: superblock for filesystem
792  *      @group: group number to check
793  *
794  *      Return the number of blocks used by the superblock (primary or backup)
795  *      in this group.  Currently this will be only 0 or 1.
796  */
797 int ext3_bg_has_super(struct super_block *sb, int group)
798 {
799         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
800             !ext3_group_sparse(group))
801                 return 0;
802         return 1;
803 }
804
805 /**
806  *      ext3_bg_num_gdb - number of blocks used by the group table in group
807  *      @sb: superblock for filesystem
808  *      @group: group number to check
809  *
810  *      Return the number of blocks used by the group descriptor table
811  *      (primary or backup) in this group.  In the future there may be a
812  *      different number of descriptor blocks in each group.
813  */
814 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
815 {
816         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
817             !ext3_group_sparse(group))
818                 return 0;
819         return EXT3_SB(sb)->s_gdb_count;
820 }
821
822 #ifdef CONFIG_EXT3_CHECK
823 /* Called at mount-time, super-block is locked */
824 void ext3_check_blocks_bitmap (struct super_block * sb)
825 {
826         struct ext3_super_block *es;
827         unsigned long desc_count, bitmap_count, x, j;
828         unsigned long desc_blocks;
829         struct buffer_head *bitmap_bh = NULL;
830         struct ext3_group_desc *gdp;
831         int i;
832
833         es = EXT3_SB(sb)->s_es;
834         desc_count = 0;
835         bitmap_count = 0;
836         gdp = NULL;
837         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
838                 gdp = ext3_get_group_desc (sb, i, NULL);
839                 if (!gdp)
840                         continue;
841                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
842                 brelse(bitmap_bh);
843                 bitmap_bh = read_block_bitmap(sb, i);
844                 if (bitmap_bh == NULL)
845                         continue;
846
847                 if (ext3_bg_has_super(sb, i) &&
848                                 !ext3_test_bit(0, bitmap_bh->b_data))
849                         ext3_error(sb, __FUNCTION__,
850                                    "Superblock in group %d is marked free", i);
851
852                 desc_blocks = ext3_bg_num_gdb(sb, i);
853                 for (j = 0; j < desc_blocks; j++)
854                         if (!ext3_test_bit(j + 1, bitmap_bh->b_data))
855                                 ext3_error(sb, __FUNCTION__,
856                                            "Descriptor block #%ld in group "
857                                            "%d is marked free", j, i);
858
859                 if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
860                                                 sb, bitmap_bh->b_data))
861                         ext3_error (sb, "ext3_check_blocks_bitmap",
862                                     "Block bitmap for group %d is marked free",
863                                     i);
864
865                 if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
866                                                 sb, bitmap_bh->b_data))
867                         ext3_error (sb, "ext3_check_blocks_bitmap",
868                                     "Inode bitmap for group %d is marked free",
869                                     i);
870
871                 for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++)
872                         if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
873                                                         sb, bitmap_bh->b_data))
874                                 ext3_error (sb, "ext3_check_blocks_bitmap",
875                                             "Block #%d of the inode table in "
876                                             "group %d is marked free", j, i);
877
878                 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
879                 if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
880                         ext3_error (sb, "ext3_check_blocks_bitmap",
881                                     "Wrong free blocks count for group %d, "
882                                     "stored = %d, counted = %lu", i,
883                                     le16_to_cpu(gdp->bg_free_blocks_count), x);
884                 bitmap_count += x;
885         }
886         brelse(bitmap_bh);
887         if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
888                 ext3_error (sb, "ext3_check_blocks_bitmap",
889                         "Wrong free blocks count in super block, "
890                         "stored = %lu, counted = %lu",
891                         (unsigned long)le32_to_cpu(es->s_free_blocks_count),
892                         bitmap_count);
893 }
894 #endif