patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / fs / ext3 / balloc.c
1 /*
2  *  linux/fs/ext3/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include <linux/config.h>
15 #include <linux/time.h>
16 #include <linux/fs.h>
17 #include <linux/jbd.h>
18 #include <linux/ext3_fs.h>
19 #include <linux/ext3_jbd.h>
20 #include <linux/quotaops.h>
21 #include <linux/buffer_head.h>
22 #include <linux/vs_dlimit.h>
23
24 /*
25  * balloc.c contains the blocks allocation and deallocation routines
26  */
27
28 /*
29  * The free blocks are managed by bitmaps.  A file system contains several
30  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
31  * block for inodes, N blocks for the inode table and data blocks.
32  *
33  * The file system contains group descriptors which are located after the
34  * super block.  Each descriptor contains the number of the bitmap block and
35  * the free blocks count in the block.  The descriptors are loaded in memory
36  * when a file system is mounted (see ext3_read_super).
37  */
38
39
40 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
41
42 struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
43                                              unsigned int block_group,
44                                              struct buffer_head ** bh)
45 {
46         unsigned long group_desc;
47         unsigned long desc;
48         struct ext3_group_desc * gdp;
49
50         if (block_group >= EXT3_SB(sb)->s_groups_count) {
51                 ext3_error (sb, "ext3_get_group_desc",
52                             "block_group >= groups_count - "
53                             "block_group = %d, groups_count = %lu",
54                             block_group, EXT3_SB(sb)->s_groups_count);
55
56                 return NULL;
57         }
58
59         group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
60         desc = block_group % EXT3_DESC_PER_BLOCK(sb);
61         if (!EXT3_SB(sb)->s_group_desc[group_desc]) {
62                 ext3_error (sb, "ext3_get_group_desc",
63                             "Group descriptor not loaded - "
64                             "block_group = %d, group_desc = %lu, desc = %lu",
65                              block_group, group_desc, desc);
66                 return NULL;
67         }
68
69         gdp = (struct ext3_group_desc *) 
70               EXT3_SB(sb)->s_group_desc[group_desc]->b_data;
71         if (bh)
72                 *bh = EXT3_SB(sb)->s_group_desc[group_desc];
73         return gdp + desc;
74 }
75
76 /*
77  * Read the bitmap for a given block_group, reading into the specified 
78  * slot in the superblock's bitmap cache.
79  *
80  * Return buffer_head on success or NULL in case of failure.
81  */
82 static struct buffer_head *
83 read_block_bitmap(struct super_block *sb, unsigned int block_group)
84 {
85         struct ext3_group_desc * desc;
86         struct buffer_head * bh = NULL;
87
88         desc = ext3_get_group_desc (sb, block_group, NULL);
89         if (!desc)
90                 goto error_out;
91         bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
92         if (!bh)
93                 ext3_error (sb, "read_block_bitmap",
94                             "Cannot read block bitmap - "
95                             "block_group = %d, block_bitmap = %lu",
96                             block_group, (unsigned long) desc->bg_block_bitmap);
97 error_out:
98         return bh;
99 }
100
101 /* Free given blocks, update quota and i_blocks field */
102 void ext3_free_blocks (handle_t *handle, struct inode * inode,
103                         unsigned long block, unsigned long count)
104 {
105         struct buffer_head *bitmap_bh = NULL;
106         struct buffer_head *gd_bh;
107         unsigned long block_group;
108         unsigned long bit;
109         unsigned long i;
110         unsigned long overflow;
111         struct super_block * sb;
112         struct ext3_group_desc * gdp;
113         struct ext3_super_block * es;
114         struct ext3_sb_info *sbi;
115         int err = 0, ret;
116         int dquot_freed_blocks = 0;
117
118         sb = inode->i_sb;
119         if (!sb) {
120                 printk ("ext3_free_blocks: nonexistent device");
121                 return;
122         }
123         sbi = EXT3_SB(sb);
124         es = EXT3_SB(sb)->s_es;
125         if (block < le32_to_cpu(es->s_first_data_block) ||
126             block + count < block ||
127             block + count > le32_to_cpu(es->s_blocks_count)) {
128                 ext3_error (sb, "ext3_free_blocks",
129                             "Freeing blocks not in datazone - "
130                             "block = %lu, count = %lu", block, count);
131                 goto error_return;
132         }
133
134         ext3_debug ("freeing block %lu\n", block);
135
136 do_more:
137         overflow = 0;
138         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
139                       EXT3_BLOCKS_PER_GROUP(sb);
140         bit = (block - le32_to_cpu(es->s_first_data_block)) %
141                       EXT3_BLOCKS_PER_GROUP(sb);
142         /*
143          * Check to see if we are freeing blocks across a group
144          * boundary.
145          */
146         if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
147                 overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
148                 count -= overflow;
149         }
150         brelse(bitmap_bh);
151         bitmap_bh = read_block_bitmap(sb, block_group);
152         if (!bitmap_bh)
153                 goto error_return;
154         gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
155         if (!gdp)
156                 goto error_return;
157
158         if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
159             in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
160             in_range (block, le32_to_cpu(gdp->bg_inode_table),
161                       EXT3_SB(sb)->s_itb_per_group) ||
162             in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
163                       EXT3_SB(sb)->s_itb_per_group))
164                 ext3_error (sb, "ext3_free_blocks",
165                             "Freeing blocks in system zones - "
166                             "Block = %lu, count = %lu",
167                             block, count);
168
169         /*
170          * We are about to start releasing blocks in the bitmap,
171          * so we need undo access.
172          */
173         /* @@@ check errors */
174         BUFFER_TRACE(bitmap_bh, "getting undo access");
175         err = ext3_journal_get_undo_access(handle, bitmap_bh, NULL);
176         if (err)
177                 goto error_return;
178
179         /*
180          * We are about to modify some metadata.  Call the journal APIs
181          * to unshare ->b_data if a currently-committing transaction is
182          * using it
183          */
184         BUFFER_TRACE(gd_bh, "get_write_access");
185         err = ext3_journal_get_write_access(handle, gd_bh);
186         if (err)
187                 goto error_return;
188
189         jbd_lock_bh_state(bitmap_bh);
190
191         for (i = 0; i < count; i++) {
192                 /*
193                  * An HJ special.  This is expensive...
194                  */
195 #ifdef CONFIG_JBD_DEBUG
196                 jbd_unlock_bh_state(bitmap_bh);
197                 {
198                         struct buffer_head *debug_bh;
199                         debug_bh = sb_find_get_block(sb, block + i);
200                         if (debug_bh) {
201                                 BUFFER_TRACE(debug_bh, "Deleted!");
202                                 if (!bh2jh(bitmap_bh)->b_committed_data)
203                                         BUFFER_TRACE(debug_bh,
204                                                 "No commited data in bitmap");
205                                 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
206                                 __brelse(debug_bh);
207                         }
208                 }
209                 jbd_lock_bh_state(bitmap_bh);
210 #endif
211                 /* @@@ This prevents newly-allocated data from being
212                  * freed and then reallocated within the same
213                  * transaction. 
214                  * 
215                  * Ideally we would want to allow that to happen, but to
216                  * do so requires making journal_forget() capable of
217                  * revoking the queued write of a data block, which
218                  * implies blocking on the journal lock.  *forget()
219                  * cannot block due to truncate races.
220                  *
221                  * Eventually we can fix this by making journal_forget()
222                  * return a status indicating whether or not it was able
223                  * to revoke the buffer.  On successful revoke, it is
224                  * safe not to set the allocation bit in the committed
225                  * bitmap, because we know that there is no outstanding
226                  * activity on the buffer any more and so it is safe to
227                  * reallocate it.  
228                  */
229                 BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
230                 J_ASSERT_BH(bitmap_bh,
231                                 bh2jh(bitmap_bh)->b_committed_data != NULL);
232                 ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
233                                 bh2jh(bitmap_bh)->b_committed_data);
234
235                 /*
236                  * We clear the bit in the bitmap after setting the committed
237                  * data bit, because this is the reverse order to that which
238                  * the allocator uses.
239                  */
240                 BUFFER_TRACE(bitmap_bh, "clear bit");
241                 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
242                                                 bit + i, bitmap_bh->b_data)) {
243                         jbd_unlock_bh_state(bitmap_bh);
244                         ext3_error(sb, __FUNCTION__,
245                                 "bit already cleared for block %lu", block + i);
246                         jbd_lock_bh_state(bitmap_bh);
247                         BUFFER_TRACE(bitmap_bh, "bit already cleared");
248                 } else {
249                         dquot_freed_blocks++;
250                 }
251         }
252         jbd_unlock_bh_state(bitmap_bh);
253
254         spin_lock(sb_bgl_lock(sbi, block_group));
255         gdp->bg_free_blocks_count =
256                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
257                         dquot_freed_blocks);
258         spin_unlock(sb_bgl_lock(sbi, block_group));
259         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
260
261         /* We dirtied the bitmap block */
262         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
263         err = ext3_journal_dirty_metadata(handle, bitmap_bh);
264
265         /* And the group descriptor block */
266         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
267         ret = ext3_journal_dirty_metadata(handle, gd_bh);
268         if (!err) err = ret;
269
270         if (overflow && !err) {
271                 block += count;
272                 count = overflow;
273                 goto do_more;
274         }
275         sb->s_dirt = 1;
276 error_return:
277         brelse(bitmap_bh);
278         ext3_std_error(sb, err);
279         if (dquot_freed_blocks) {
280                 DLIMIT_FREE_BLOCK(sb, inode->i_xid, dquot_freed_blocks);
281                 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
282         }
283         return;
284 }
285
286 /*
287  * For ext3 allocations, we must not reuse any blocks which are
288  * allocated in the bitmap buffer's "last committed data" copy.  This
289  * prevents deletes from freeing up the page for reuse until we have
290  * committed the delete transaction.
291  *
292  * If we didn't do this, then deleting something and reallocating it as
293  * data would allow the old block to be overwritten before the
294  * transaction committed (because we force data to disk before commit).
295  * This would lead to corruption if we crashed between overwriting the
296  * data and committing the delete. 
297  *
298  * @@@ We may want to make this allocation behaviour conditional on
299  * data-writes at some point, and disable it for metadata allocations or
300  * sync-data inodes.
301  */
302 static inline int ext3_test_allocatable(int nr, struct buffer_head *bh)
303 {
304         int ret;
305         struct journal_head *jh = bh2jh(bh);
306
307         if (ext3_test_bit(nr, bh->b_data))
308                 return 0;
309
310         jbd_lock_bh_state(bh);
311         if (!jh->b_committed_data)
312                 ret = 1;
313         else
314                 ret = !ext3_test_bit(nr, jh->b_committed_data);
315         jbd_unlock_bh_state(bh);
316         return ret;
317 }
318
319 /*
320  * Find an allocatable block in a bitmap.  We honour both the bitmap and
321  * its last-committed copy (if that exists), and perform the "most
322  * appropriate allocation" algorithm of looking for a free block near
323  * the initial goal; then for a free byte somewhere in the bitmap; then
324  * for any free bit in the bitmap.
325  */
326 static int
327 find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
328 {
329         int here, next;
330         char *p, *r;
331         struct journal_head *jh = bh2jh(bh);
332
333         if (start > 0) {
334                 /*
335                  * The goal was occupied; search forward for a free 
336                  * block within the next XX blocks.
337                  *
338                  * end_goal is more or less random, but it has to be
339                  * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
340                  * next 64-bit boundary is simple..
341                  */
342                 int end_goal = (start + 63) & ~63;
343                 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
344                 if (here < end_goal && ext3_test_allocatable(here, bh))
345                         return here;
346                 ext3_debug("Bit not found near goal\n");
347         }
348
349         here = start;
350         if (here < 0)
351                 here = 0;
352
353         p = ((char *)bh->b_data) + (here >> 3);
354         r = memscan(p, 0, (maxblocks - here + 7) >> 3);
355         next = (r - ((char *)bh->b_data)) << 3;
356
357         if (next < maxblocks && ext3_test_allocatable(next, bh))
358                 return next;
359
360         /*
361          * The bitmap search --- search forward alternately through the actual
362          * bitmap and the last-committed copy until we find a bit free in
363          * both
364          */
365         while (here < maxblocks) {
366                 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, here);
367                 if (next >= maxblocks)
368                         return -1;
369                 if (ext3_test_allocatable(next, bh))
370                         return next;
371                 jbd_lock_bh_state(bh);
372                 if (jh->b_committed_data)
373                         here = ext3_find_next_zero_bit(jh->b_committed_data,
374                                                         maxblocks, next);
375                 jbd_unlock_bh_state(bh);
376         }
377         return -1;
378 }
379
380 /*
381  * We think we can allocate this block in this bitmap.  Try to set the bit.
382  * If that succeeds then check that nobody has allocated and then freed the
383  * block since we saw that is was not marked in b_committed_data.  If it _was_
384  * allocated and freed then clear the bit in the bitmap again and return
385  * zero (failure).
386  */
387 static inline int
388 claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
389 {
390         struct journal_head *jh = bh2jh(bh);
391         int ret;
392
393         if (ext3_set_bit_atomic(lock, block, bh->b_data))
394                 return 0;
395         jbd_lock_bh_state(bh);
396         if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
397                 ext3_clear_bit_atomic(lock, block, bh->b_data);
398                 ret = 0;
399         } else {
400                 ret = 1;
401         }
402         jbd_unlock_bh_state(bh);
403         return ret;
404 }
405
406 /*
407  * If we failed to allocate the desired block then we may end up crossing to a
408  * new bitmap.  In that case we must release write access to the old one via
409  * ext3_journal_release_buffer(), else we'll run out of credits.
410  */
411 static int
412 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
413                 struct buffer_head *bitmap_bh, int goal, int *errp)
414 {
415         int i;
416         int fatal;
417         int credits = 0;
418
419         *errp = 0;
420
421         /*
422          * Make sure we use undo access for the bitmap, because it is critical
423          * that we do the frozen_data COW on bitmap buffers in all cases even
424          * if the buffer is in BJ_Forget state in the committing transaction.
425          */
426         BUFFER_TRACE(bitmap_bh, "get undo access for new block");
427         fatal = ext3_journal_get_undo_access(handle, bitmap_bh, &credits);
428         if (fatal) {
429                 *errp = fatal;
430                 goto fail;
431         }
432
433 repeat:
434         if (goal < 0 || !ext3_test_allocatable(goal, bitmap_bh)) {
435                 goal = find_next_usable_block(goal, bitmap_bh,
436                                         EXT3_BLOCKS_PER_GROUP(sb));
437                 if (goal < 0)
438                         goto fail_access;
439
440                 for (i = 0; i < 7 && goal > 0 &&
441                                 ext3_test_allocatable(goal - 1, bitmap_bh);
442                         i++, goal--);
443         }
444
445         if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) {
446                 /*
447                  * The block was allocated by another thread, or it was
448                  * allocated and then freed by another thread
449                  */
450                 goal++;
451                 if (goal >= EXT3_BLOCKS_PER_GROUP(sb))
452                         goto fail_access;
453                 goto repeat;
454         }
455
456         BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
457         fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
458         if (fatal) {
459                 *errp = fatal;
460                 goto fail;
461         }
462         return goal;
463
464 fail_access:
465         BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
466         ext3_journal_release_buffer(handle, bitmap_bh, credits);
467 fail:
468         return -1;
469 }
470
471 /*
472  * ext3_new_block uses a goal block to assist allocation.  If the goal is
473  * free, or there is a free block within 32 blocks of the goal, that block
474  * is allocated.  Otherwise a forward search is made for a free block; within 
475  * each block group the search first looks for an entire free byte in the block
476  * bitmap, and then for any free bit if that fails.
477  * This function also updates quota and i_blocks field.
478  */
479 int
480 ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
481                 u32 *prealloc_count, u32 *prealloc_block, int *errp)
482 {
483         struct buffer_head *bitmap_bh = NULL;   /* bh */
484         struct buffer_head *gdp_bh;             /* bh2 */
485         int group_no;                           /* i */
486         int ret_block;                          /* j */
487         int bgi;                                /* blockgroup iteration index */
488         int target_block;                       /* tmp */
489         int fatal = 0, err;
490         int performed_allocation = 0;
491         int free_blocks, root_blocks;
492         struct super_block *sb;
493         struct ext3_group_desc *gdp;
494         struct ext3_super_block *es;
495         struct ext3_sb_info *sbi;
496 #ifdef EXT3FS_DEBUG
497         static int goal_hits, goal_attempts;
498 #endif
499         *errp = -ENOSPC;
500         sb = inode->i_sb;
501         if (!sb) {
502                 printk("ext3_new_block: nonexistent device");
503                 return 0;
504         }
505
506         /*
507          * Check quota for allocation of this block.
508          */
509         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
510                 *errp = -EDQUOT;
511                 return 0;
512         }
513         if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1))
514                 goto out_dlimit;
515
516         sbi = EXT3_SB(sb);
517         es = EXT3_SB(sb)->s_es;
518         ext3_debug("goal=%lu.\n", goal);
519
520         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
521         root_blocks = le32_to_cpu(es->s_r_blocks_count);
522
523         DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks);
524
525         if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
526                 sbi->s_resuid != current->fsuid &&
527                 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
528                 *errp = -ENOSPC;
529                 goto out;
530         }
531
532         /*
533          * First, test whether the goal block is free.
534          */
535         if (goal < le32_to_cpu(es->s_first_data_block) ||
536             goal >= le32_to_cpu(es->s_blocks_count))
537                 goal = le32_to_cpu(es->s_first_data_block);
538         group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
539                         EXT3_BLOCKS_PER_GROUP(sb);
540         gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
541         if (!gdp)
542                 goto io_error;
543
544         free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
545         if (free_blocks > 0) {
546                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
547                                 EXT3_BLOCKS_PER_GROUP(sb));
548                 bitmap_bh = read_block_bitmap(sb, group_no);
549                 if (!bitmap_bh)
550                         goto io_error;
551                 ret_block = ext3_try_to_allocate(sb, handle, group_no,
552                                         bitmap_bh, ret_block, &fatal);
553                 if (fatal)
554                         goto out;
555                 if (ret_block >= 0)
556                         goto allocated;
557         }
558
559         /*
560          * Now search the rest of the groups.  We assume that 
561          * i and gdp correctly point to the last group visited.
562          */
563         for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
564                 group_no++;
565                 if (group_no >= EXT3_SB(sb)->s_groups_count)
566                         group_no = 0;
567                 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
568                 if (!gdp) {
569                         *errp = -EIO;
570                         goto out;
571                 }
572                 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
573                 if (free_blocks <= 0)
574                         continue;
575
576                 brelse(bitmap_bh);
577                 bitmap_bh = read_block_bitmap(sb, group_no);
578                 if (!bitmap_bh)
579                         goto io_error;
580                 ret_block = ext3_try_to_allocate(sb, handle, group_no,
581                                                 bitmap_bh, -1, &fatal);
582                 if (fatal)
583                         goto out;
584                 if (ret_block >= 0) 
585                         goto allocated;
586         }
587
588         /* No space left on the device */
589         *errp = -ENOSPC;
590         goto out;
591
592 allocated:
593
594         ext3_debug("using block group %d(%d)\n",
595                         group_no, gdp->bg_free_blocks_count);
596
597         BUFFER_TRACE(gdp_bh, "get_write_access");
598         fatal = ext3_journal_get_write_access(handle, gdp_bh);
599         if (fatal)
600                 goto out;
601
602         target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
603                                 + le32_to_cpu(es->s_first_data_block);
604
605         if (target_block == le32_to_cpu(gdp->bg_block_bitmap) ||
606             target_block == le32_to_cpu(gdp->bg_inode_bitmap) ||
607             in_range(target_block, le32_to_cpu(gdp->bg_inode_table),
608                       EXT3_SB(sb)->s_itb_per_group))
609                 ext3_error(sb, "ext3_new_block",
610                             "Allocating block in system zone - "
611                             "block = %u", target_block);
612
613         performed_allocation = 1;
614
615 #ifdef CONFIG_JBD_DEBUG
616         {
617                 struct buffer_head *debug_bh;
618
619                 /* Record bitmap buffer state in the newly allocated block */
620                 debug_bh = sb_find_get_block(sb, target_block);
621                 if (debug_bh) {
622                         BUFFER_TRACE(debug_bh, "state when allocated");
623                         BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
624                         brelse(debug_bh);
625                 }
626         }
627         jbd_lock_bh_state(bitmap_bh);
628         spin_lock(sb_bgl_lock(sbi, group_no));
629         if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
630                 if (ext3_test_bit(ret_block,
631                                 bh2jh(bitmap_bh)->b_committed_data)) {
632                         printk("%s: block was unexpectedly set in "
633                                 "b_committed_data\n", __FUNCTION__);
634                 }
635         }
636         ext3_debug("found bit %d\n", ret_block);
637         spin_unlock(sb_bgl_lock(sbi, group_no));
638         jbd_unlock_bh_state(bitmap_bh);
639 #endif
640
641         /* ret_block was blockgroup-relative.  Now it becomes fs-relative */
642         ret_block = target_block;
643
644         if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
645                 ext3_error(sb, "ext3_new_block",
646                             "block(%d) >= blocks count(%d) - "
647                             "block_group = %d, es == %p ", ret_block,
648                         le32_to_cpu(es->s_blocks_count), group_no, es);
649                 goto out;
650         }
651
652         /*
653          * It is up to the caller to add the new buffer to a journal
654          * list of some description.  We don't know in advance whether
655          * the caller wants to use it as metadata or data.
656          */
657         ext3_debug("allocating block %d. Goal hits %d of %d.\n",
658                         ret_block, goal_hits, goal_attempts);
659
660         spin_lock(sb_bgl_lock(sbi, group_no));
661         gdp->bg_free_blocks_count =
662                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
663         spin_unlock(sb_bgl_lock(sbi, group_no));
664         percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
665
666         BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
667         err = ext3_journal_dirty_metadata(handle, gdp_bh);
668         if (!fatal)
669                 fatal = err;
670
671         sb->s_dirt = 1;
672         if (fatal)
673                 goto out;
674
675         *errp = 0;
676         brelse(bitmap_bh);
677         return ret_block;
678
679 io_error:
680         *errp = -EIO;
681 out:
682         DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
683 out_dlimit:
684         if (fatal) {
685                 *errp = fatal;
686                 ext3_std_error(sb, fatal);
687         }
688         /*
689          * Undo the block allocation
690          */
691         if (!performed_allocation) {
692                 DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
693                 DQUOT_FREE_BLOCK(inode, 1);
694         }
695         brelse(bitmap_bh);
696         return 0;
697 }
698
699 unsigned long ext3_count_free_blocks(struct super_block *sb)
700 {
701         unsigned long desc_count;
702         struct ext3_group_desc *gdp;
703         int i;
704 #ifdef EXT3FS_DEBUG
705         struct ext3_super_block *es;
706         unsigned long bitmap_count, x;
707         struct buffer_head *bitmap_bh = NULL;
708
709         lock_super(sb);
710         es = EXT3_SB(sb)->s_es;
711         desc_count = 0;
712         bitmap_count = 0;
713         gdp = NULL;
714         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
715                 gdp = ext3_get_group_desc(sb, i, NULL);
716                 if (!gdp)
717                         continue;
718                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
719                 brelse(bitmap_bh);
720                 bitmap_bh = read_block_bitmap(sb, i);
721                 if (bitmap_bh == NULL)
722                         continue;
723
724                 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
725                 printk("group %d: stored = %d, counted = %lu\n",
726                         i, le16_to_cpu(gdp->bg_free_blocks_count), x);
727                 bitmap_count += x;
728         }
729         brelse(bitmap_bh);
730         printk("ext3_count_free_blocks: stored = %u, computed = %lu, %lu\n",
731                le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
732         unlock_super(sb);
733         return bitmap_count;
734 #else
735         desc_count = 0;
736         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
737                 gdp = ext3_get_group_desc(sb, i, NULL);
738                 if (!gdp)
739                         continue;
740                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
741         }
742
743         return desc_count;
744 #endif
745 }
746
747 static inline int block_in_use(unsigned long block,
748                                 struct super_block * sb,
749                                 unsigned char * map)
750 {
751         return ext3_test_bit ((block -
752                 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
753                          EXT3_BLOCKS_PER_GROUP(sb), map);
754 }
755
756 static inline int test_root(int a, int b)
757 {
758         if (a == 0)
759                 return 1;
760         while (1) {
761                 if (a == 1)
762                         return 1;
763                 if (a % b)
764                         return 0;
765                 a = a / b;
766         }
767 }
768
769 int ext3_group_sparse(int group)
770 {
771         return (test_root(group, 3) || test_root(group, 5) ||
772                 test_root(group, 7));
773 }
774
775 /**
776  *      ext3_bg_has_super - number of blocks used by the superblock in group
777  *      @sb: superblock for filesystem
778  *      @group: group number to check
779  *
780  *      Return the number of blocks used by the superblock (primary or backup)
781  *      in this group.  Currently this will be only 0 or 1.
782  */
783 int ext3_bg_has_super(struct super_block *sb, int group)
784 {
785         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
786             !ext3_group_sparse(group))
787                 return 0;
788         return 1;
789 }
790
791 /**
792  *      ext3_bg_num_gdb - number of blocks used by the group table in group
793  *      @sb: superblock for filesystem
794  *      @group: group number to check
795  *
796  *      Return the number of blocks used by the group descriptor table
797  *      (primary or backup) in this group.  In the future there may be a
798  *      different number of descriptor blocks in each group.
799  */
800 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
801 {
802         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
803             !ext3_group_sparse(group))
804                 return 0;
805         return EXT3_SB(sb)->s_gdb_count;
806 }
807
808 #ifdef CONFIG_EXT3_CHECK
809 /* Called at mount-time, super-block is locked */
810 void ext3_check_blocks_bitmap (struct super_block * sb)
811 {
812         struct ext3_super_block *es;
813         unsigned long desc_count, bitmap_count, x, j;
814         unsigned long desc_blocks;
815         struct buffer_head *bitmap_bh = NULL;
816         struct ext3_group_desc *gdp;
817         int i;
818
819         es = EXT3_SB(sb)->s_es;
820         desc_count = 0;
821         bitmap_count = 0;
822         gdp = NULL;
823         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
824                 gdp = ext3_get_group_desc (sb, i, NULL);
825                 if (!gdp)
826                         continue;
827                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
828                 brelse(bitmap_bh);
829                 bitmap_bh = read_block_bitmap(sb, i);
830                 if (bitmap_bh == NULL)
831                         continue;
832
833                 if (ext3_bg_has_super(sb, i) &&
834                                 !ext3_test_bit(0, bitmap_bh->b_data))
835                         ext3_error(sb, __FUNCTION__,
836                                    "Superblock in group %d is marked free", i);
837
838                 desc_blocks = ext3_bg_num_gdb(sb, i);
839                 for (j = 0; j < desc_blocks; j++)
840                         if (!ext3_test_bit(j + 1, bitmap_bh->b_data))
841                                 ext3_error(sb, __FUNCTION__,
842                                            "Descriptor block #%ld in group "
843                                            "%d is marked free", j, i);
844
845                 if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
846                                                 sb, bitmap_bh->b_data))
847                         ext3_error (sb, "ext3_check_blocks_bitmap",
848                                     "Block bitmap for group %d is marked free",
849                                     i);
850
851                 if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
852                                                 sb, bitmap_bh->b_data))
853                         ext3_error (sb, "ext3_check_blocks_bitmap",
854                                     "Inode bitmap for group %d is marked free",
855                                     i);
856
857                 for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++)
858                         if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
859                                                         sb, bitmap_bh->b_data))
860                                 ext3_error (sb, "ext3_check_blocks_bitmap",
861                                             "Block #%d of the inode table in "
862                                             "group %d is marked free", j, i);
863
864                 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
865                 if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
866                         ext3_error (sb, "ext3_check_blocks_bitmap",
867                                     "Wrong free blocks count for group %d, "
868                                     "stored = %d, counted = %lu", i,
869                                     le16_to_cpu(gdp->bg_free_blocks_count), x);
870                 bitmap_count += x;
871         }
872         brelse(bitmap_bh);
873         if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
874                 ext3_error (sb, "ext3_check_blocks_bitmap",
875                         "Wrong free blocks count in super block, "
876                         "stored = %lu, counted = %lu",
877                         (unsigned long)le32_to_cpu(es->s_free_blocks_count),
878                         bitmap_count);
879 }
880 #endif