vserver 1.9.3
[linux-2.6.git] / fs / ext3 / balloc.c
1 /*
2  *  linux/fs/ext3/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include <linux/config.h>
15 #include <linux/time.h>
16 #include <linux/fs.h>
17 #include <linux/jbd.h>
18 #include <linux/ext3_fs.h>
19 #include <linux/ext3_jbd.h>
20 #include <linux/quotaops.h>
21 #include <linux/buffer_head.h>
22 #include <linux/vs_base.h>
23 #include <linux/vs_dlimit.h>
24
25 /*
26  * balloc.c contains the blocks allocation and deallocation routines
27  */
28
29 /*
30  * The free blocks are managed by bitmaps.  A file system contains several
31  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
32  * block for inodes, N blocks for the inode table and data blocks.
33  *
34  * The file system contains group descriptors which are located after the
35  * super block.  Each descriptor contains the number of the bitmap block and
36  * the free blocks count in the block.  The descriptors are loaded in memory
37  * when a file system is mounted (see ext3_read_super).
38  */
39
40
41 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
42
43 struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
44                                              unsigned int block_group,
45                                              struct buffer_head ** bh)
46 {
47         unsigned long group_desc;
48         unsigned long desc;
49         struct ext3_group_desc * gdp;
50
51         if (block_group >= EXT3_SB(sb)->s_groups_count) {
52                 ext3_error (sb, "ext3_get_group_desc",
53                             "block_group >= groups_count - "
54                             "block_group = %d, groups_count = %lu",
55                             block_group, EXT3_SB(sb)->s_groups_count);
56
57                 return NULL;
58         }
59
60         group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
61         desc = block_group % EXT3_DESC_PER_BLOCK(sb);
62         if (!EXT3_SB(sb)->s_group_desc[group_desc]) {
63                 ext3_error (sb, "ext3_get_group_desc",
64                             "Group descriptor not loaded - "
65                             "block_group = %d, group_desc = %lu, desc = %lu",
66                              block_group, group_desc, desc);
67                 return NULL;
68         }
69
70         gdp = (struct ext3_group_desc *) 
71               EXT3_SB(sb)->s_group_desc[group_desc]->b_data;
72         if (bh)
73                 *bh = EXT3_SB(sb)->s_group_desc[group_desc];
74         return gdp + desc;
75 }
76
77 /*
78  * Read the bitmap for a given block_group, reading into the specified 
79  * slot in the superblock's bitmap cache.
80  *
81  * Return buffer_head on success or NULL in case of failure.
82  */
83 static struct buffer_head *
84 read_block_bitmap(struct super_block *sb, unsigned int block_group)
85 {
86         struct ext3_group_desc * desc;
87         struct buffer_head * bh = NULL;
88
89         desc = ext3_get_group_desc (sb, block_group, NULL);
90         if (!desc)
91                 goto error_out;
92         bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
93         if (!bh)
94                 ext3_error (sb, "read_block_bitmap",
95                             "Cannot read block bitmap - "
96                             "block_group = %d, block_bitmap = %u",
97                             block_group, le32_to_cpu(desc->bg_block_bitmap));
98 error_out:
99         return bh;
100 }
101
102 /* Free given blocks, update quota and i_blocks field */
103 void ext3_free_blocks (handle_t *handle, struct inode * inode,
104                         unsigned long block, unsigned long count)
105 {
106         struct buffer_head *bitmap_bh = NULL;
107         struct buffer_head *gd_bh;
108         unsigned long block_group;
109         unsigned long bit;
110         unsigned long i;
111         unsigned long overflow;
112         struct super_block * sb;
113         struct ext3_group_desc * gdp;
114         struct ext3_super_block * es;
115         struct ext3_sb_info *sbi;
116         int err = 0, ret;
117         int dquot_freed_blocks = 0;
118
119         sb = inode->i_sb;
120         if (!sb) {
121                 printk ("ext3_free_blocks: nonexistent device");
122                 return;
123         }
124         sbi = EXT3_SB(sb);
125         es = EXT3_SB(sb)->s_es;
126         if (block < le32_to_cpu(es->s_first_data_block) ||
127             block + count < block ||
128             block + count > le32_to_cpu(es->s_blocks_count)) {
129                 ext3_error (sb, "ext3_free_blocks",
130                             "Freeing blocks not in datazone - "
131                             "block = %lu, count = %lu", block, count);
132                 goto error_return;
133         }
134
135         ext3_debug ("freeing block %lu\n", block);
136
137 do_more:
138         overflow = 0;
139         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
140                       EXT3_BLOCKS_PER_GROUP(sb);
141         bit = (block - le32_to_cpu(es->s_first_data_block)) %
142                       EXT3_BLOCKS_PER_GROUP(sb);
143         /*
144          * Check to see if we are freeing blocks across a group
145          * boundary.
146          */
147         if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
148                 overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
149                 count -= overflow;
150         }
151         brelse(bitmap_bh);
152         bitmap_bh = read_block_bitmap(sb, block_group);
153         if (!bitmap_bh)
154                 goto error_return;
155         gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
156         if (!gdp)
157                 goto error_return;
158
159         if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
160             in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
161             in_range (block, le32_to_cpu(gdp->bg_inode_table),
162                       EXT3_SB(sb)->s_itb_per_group) ||
163             in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
164                       EXT3_SB(sb)->s_itb_per_group))
165                 ext3_error (sb, "ext3_free_blocks",
166                             "Freeing blocks in system zones - "
167                             "Block = %lu, count = %lu",
168                             block, count);
169
170         /*
171          * We are about to start releasing blocks in the bitmap,
172          * so we need undo access.
173          */
174         /* @@@ check errors */
175         BUFFER_TRACE(bitmap_bh, "getting undo access");
176         err = ext3_journal_get_undo_access(handle, bitmap_bh, NULL);
177         if (err)
178                 goto error_return;
179
180         /*
181          * We are about to modify some metadata.  Call the journal APIs
182          * to unshare ->b_data if a currently-committing transaction is
183          * using it
184          */
185         BUFFER_TRACE(gd_bh, "get_write_access");
186         err = ext3_journal_get_write_access(handle, gd_bh);
187         if (err)
188                 goto error_return;
189
190         jbd_lock_bh_state(bitmap_bh);
191
192         for (i = 0; i < count; i++) {
193                 /*
194                  * An HJ special.  This is expensive...
195                  */
196 #ifdef CONFIG_JBD_DEBUG
197                 jbd_unlock_bh_state(bitmap_bh);
198                 {
199                         struct buffer_head *debug_bh;
200                         debug_bh = sb_find_get_block(sb, block + i);
201                         if (debug_bh) {
202                                 BUFFER_TRACE(debug_bh, "Deleted!");
203                                 if (!bh2jh(bitmap_bh)->b_committed_data)
204                                         BUFFER_TRACE(debug_bh,
205                                                 "No commited data in bitmap");
206                                 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
207                                 __brelse(debug_bh);
208                         }
209                 }
210                 jbd_lock_bh_state(bitmap_bh);
211 #endif
212                 /* @@@ This prevents newly-allocated data from being
213                  * freed and then reallocated within the same
214                  * transaction. 
215                  * 
216                  * Ideally we would want to allow that to happen, but to
217                  * do so requires making journal_forget() capable of
218                  * revoking the queued write of a data block, which
219                  * implies blocking on the journal lock.  *forget()
220                  * cannot block due to truncate races.
221                  *
222                  * Eventually we can fix this by making journal_forget()
223                  * return a status indicating whether or not it was able
224                  * to revoke the buffer.  On successful revoke, it is
225                  * safe not to set the allocation bit in the committed
226                  * bitmap, because we know that there is no outstanding
227                  * activity on the buffer any more and so it is safe to
228                  * reallocate it.  
229                  */
230                 BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
231                 J_ASSERT_BH(bitmap_bh,
232                                 bh2jh(bitmap_bh)->b_committed_data != NULL);
233                 ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
234                                 bh2jh(bitmap_bh)->b_committed_data);
235
236                 /*
237                  * We clear the bit in the bitmap after setting the committed
238                  * data bit, because this is the reverse order to that which
239                  * the allocator uses.
240                  */
241                 BUFFER_TRACE(bitmap_bh, "clear bit");
242                 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
243                                                 bit + i, bitmap_bh->b_data)) {
244                         jbd_unlock_bh_state(bitmap_bh);
245                         ext3_error(sb, __FUNCTION__,
246                                 "bit already cleared for block %lu", block + i);
247                         jbd_lock_bh_state(bitmap_bh);
248                         BUFFER_TRACE(bitmap_bh, "bit already cleared");
249                 } else {
250                         dquot_freed_blocks++;
251                 }
252         }
253         jbd_unlock_bh_state(bitmap_bh);
254
255         spin_lock(sb_bgl_lock(sbi, block_group));
256         gdp->bg_free_blocks_count =
257                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
258                         dquot_freed_blocks);
259         spin_unlock(sb_bgl_lock(sbi, block_group));
260         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
261
262         /* We dirtied the bitmap block */
263         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
264         err = ext3_journal_dirty_metadata(handle, bitmap_bh);
265
266         /* And the group descriptor block */
267         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
268         ret = ext3_journal_dirty_metadata(handle, gd_bh);
269         if (!err) err = ret;
270
271         if (overflow && !err) {
272                 block += count;
273                 count = overflow;
274                 goto do_more;
275         }
276         sb->s_dirt = 1;
277 error_return:
278         brelse(bitmap_bh);
279         ext3_std_error(sb, err);
280         if (dquot_freed_blocks) {
281                 DLIMIT_FREE_BLOCK(sb, inode->i_xid, dquot_freed_blocks);
282                 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
283         }
284         return;
285 }
286
287 /*
288  * For ext3 allocations, we must not reuse any blocks which are
289  * allocated in the bitmap buffer's "last committed data" copy.  This
290  * prevents deletes from freeing up the page for reuse until we have
291  * committed the delete transaction.
292  *
293  * If we didn't do this, then deleting something and reallocating it as
294  * data would allow the old block to be overwritten before the
295  * transaction committed (because we force data to disk before commit).
296  * This would lead to corruption if we crashed between overwriting the
297  * data and committing the delete. 
298  *
299  * @@@ We may want to make this allocation behaviour conditional on
300  * data-writes at some point, and disable it for metadata allocations or
301  * sync-data inodes.
302  */
303 static inline int ext3_test_allocatable(int nr, struct buffer_head *bh)
304 {
305         int ret;
306         struct journal_head *jh = bh2jh(bh);
307
308         if (ext3_test_bit(nr, bh->b_data))
309                 return 0;
310
311         jbd_lock_bh_state(bh);
312         if (!jh->b_committed_data)
313                 ret = 1;
314         else
315                 ret = !ext3_test_bit(nr, jh->b_committed_data);
316         jbd_unlock_bh_state(bh);
317         return ret;
318 }
319
320 /*
321  * Find an allocatable block in a bitmap.  We honour both the bitmap and
322  * its last-committed copy (if that exists), and perform the "most
323  * appropriate allocation" algorithm of looking for a free block near
324  * the initial goal; then for a free byte somewhere in the bitmap; then
325  * for any free bit in the bitmap.
326  */
327 static int
328 find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
329 {
330         int here, next;
331         char *p, *r;
332         struct journal_head *jh = bh2jh(bh);
333
334         if (start > 0) {
335                 /*
336                  * The goal was occupied; search forward for a free 
337                  * block within the next XX blocks.
338                  *
339                  * end_goal is more or less random, but it has to be
340                  * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
341                  * next 64-bit boundary is simple..
342                  */
343                 int end_goal = (start + 63) & ~63;
344                 here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
345                 if (here < end_goal && ext3_test_allocatable(here, bh))
346                         return here;
347                 ext3_debug("Bit not found near goal\n");
348         }
349
350         here = start;
351         if (here < 0)
352                 here = 0;
353
354         p = ((char *)bh->b_data) + (here >> 3);
355         r = memscan(p, 0, (maxblocks - here + 7) >> 3);
356         next = (r - ((char *)bh->b_data)) << 3;
357
358         if (next < maxblocks && ext3_test_allocatable(next, bh))
359                 return next;
360
361         /*
362          * The bitmap search --- search forward alternately through the actual
363          * bitmap and the last-committed copy until we find a bit free in
364          * both
365          */
366         while (here < maxblocks) {
367                 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, here);
368                 if (next >= maxblocks)
369                         return -1;
370                 if (ext3_test_allocatable(next, bh))
371                         return next;
372                 jbd_lock_bh_state(bh);
373                 if (jh->b_committed_data)
374                         here = ext3_find_next_zero_bit(jh->b_committed_data,
375                                                         maxblocks, next);
376                 jbd_unlock_bh_state(bh);
377         }
378         return -1;
379 }
380
381 /*
382  * We think we can allocate this block in this bitmap.  Try to set the bit.
383  * If that succeeds then check that nobody has allocated and then freed the
384  * block since we saw that is was not marked in b_committed_data.  If it _was_
385  * allocated and freed then clear the bit in the bitmap again and return
386  * zero (failure).
387  */
388 static inline int
389 claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
390 {
391         struct journal_head *jh = bh2jh(bh);
392         int ret;
393
394         if (ext3_set_bit_atomic(lock, block, bh->b_data))
395                 return 0;
396         jbd_lock_bh_state(bh);
397         if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
398                 ext3_clear_bit_atomic(lock, block, bh->b_data);
399                 ret = 0;
400         } else {
401                 ret = 1;
402         }
403         jbd_unlock_bh_state(bh);
404         return ret;
405 }
406
407 /*
408  * If we failed to allocate the desired block then we may end up crossing to a
409  * new bitmap.  In that case we must release write access to the old one via
410  * ext3_journal_release_buffer(), else we'll run out of credits.
411  */
412 static int
413 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
414                 struct buffer_head *bitmap_bh, int goal, int *errp)
415 {
416         int i;
417         int fatal;
418         int credits = 0;
419
420         *errp = 0;
421
422         /*
423          * Make sure we use undo access for the bitmap, because it is critical
424          * that we do the frozen_data COW on bitmap buffers in all cases even
425          * if the buffer is in BJ_Forget state in the committing transaction.
426          */
427         BUFFER_TRACE(bitmap_bh, "get undo access for new block");
428         fatal = ext3_journal_get_undo_access(handle, bitmap_bh, &credits);
429         if (fatal) {
430                 *errp = fatal;
431                 goto fail;
432         }
433
434 repeat:
435         if (goal < 0 || !ext3_test_allocatable(goal, bitmap_bh)) {
436                 goal = find_next_usable_block(goal, bitmap_bh,
437                                         EXT3_BLOCKS_PER_GROUP(sb));
438                 if (goal < 0)
439                         goto fail_access;
440
441                 for (i = 0; i < 7 && goal > 0 &&
442                                 ext3_test_allocatable(goal - 1, bitmap_bh);
443                         i++, goal--);
444         }
445
446         if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) {
447                 /*
448                  * The block was allocated by another thread, or it was
449                  * allocated and then freed by another thread
450                  */
451                 goal++;
452                 if (goal >= EXT3_BLOCKS_PER_GROUP(sb))
453                         goto fail_access;
454                 goto repeat;
455         }
456
457         BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
458         fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
459         if (fatal) {
460                 *errp = fatal;
461                 goto fail;
462         }
463         return goal;
464
465 fail_access:
466         BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
467         ext3_journal_release_buffer(handle, bitmap_bh, credits);
468 fail:
469         return -1;
470 }
471
472 static int ext3_has_free_blocks(struct super_block *sb)
473 {
474         struct ext3_sb_info *sbi = EXT3_SB(sb);
475         int free_blocks, root_blocks, cond;
476
477         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
478         root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
479
480         vxdprintk(VXD_CBIT(dlim, 3),
481                 "ext3_has_free_blocks(%p): free=%u, root=%u",
482                 sb, free_blocks, root_blocks);
483
484         DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks);
485
486         cond = (free_blocks < root_blocks + 1 &&
487                 !capable(CAP_SYS_RESOURCE) &&
488                 sbi->s_resuid != current->fsuid &&
489                 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid)));
490
491         vxdprintk(VXD_CBIT(dlim, 3),
492                 "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d",
493                 sb, free_blocks, root_blocks,
494                 !capable(CAP_SYS_RESOURCE)?'1':'0',
495                 sbi->s_resuid, current->fsuid, cond?0:1);
496
497         return (cond ? 0 : 1);
498 }
499
500 /*
501  * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
502  * it is profitable to retry the operation, this function will wait
503  * for the current or commiting transaction to complete, and then
504  * return TRUE.
505  */
506 int ext3_should_retry_alloc(struct super_block *sb, int *retries)
507 {
508         if (!ext3_has_free_blocks(sb) || (*retries)++ > 3)
509                 return 0;
510
511         jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
512
513         return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
514 }
515
516 /*
517  * ext3_new_block uses a goal block to assist allocation.  If the goal is
518  * free, or there is a free block within 32 blocks of the goal, that block
519  * is allocated.  Otherwise a forward search is made for a free block; within 
520  * each block group the search first looks for an entire free byte in the block
521  * bitmap, and then for any free bit if that fails.
522  * This function also updates quota and i_blocks field.
523  */
524 int
525 ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
526                 u32 *prealloc_count, u32 *prealloc_block, int *errp)
527 {
528         struct buffer_head *bitmap_bh = NULL;   /* bh */
529         struct buffer_head *gdp_bh;             /* bh2 */
530         int group_no;                           /* i */
531         int ret_block;                          /* j */
532         int bgi;                                /* blockgroup iteration index */
533         int target_block;                       /* tmp */
534         int fatal = 0, err;
535         int performed_allocation = 0;
536         int free_blocks;
537         struct super_block *sb;
538         struct ext3_group_desc *gdp;
539         struct ext3_super_block *es;
540         struct ext3_sb_info *sbi;
541 #ifdef EXT3FS_DEBUG
542         static int goal_hits, goal_attempts;
543 #endif
544         *errp = -ENOSPC;
545         sb = inode->i_sb;
546         if (!sb) {
547                 printk("ext3_new_block: nonexistent device");
548                 return 0;
549         }
550
551         /*
552          * Check quota for allocation of this block.
553          */
554         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
555                 *errp = -EDQUOT;
556                 return 0;
557         }
558         if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1))
559                 goto out_dlimit;
560
561         sbi = EXT3_SB(sb);
562         es = EXT3_SB(sb)->s_es;
563         ext3_debug("goal=%lu.\n", goal);
564
565         if (!ext3_has_free_blocks(sb)) {
566                 *errp = -ENOSPC;
567                 goto out;
568         }
569
570         /*
571          * First, test whether the goal block is free.
572          */
573         if (goal < le32_to_cpu(es->s_first_data_block) ||
574             goal >= le32_to_cpu(es->s_blocks_count))
575                 goal = le32_to_cpu(es->s_first_data_block);
576         group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
577                         EXT3_BLOCKS_PER_GROUP(sb);
578         gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
579         if (!gdp)
580                 goto io_error;
581
582         free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
583         if (free_blocks > 0) {
584                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
585                                 EXT3_BLOCKS_PER_GROUP(sb));
586                 bitmap_bh = read_block_bitmap(sb, group_no);
587                 if (!bitmap_bh)
588                         goto io_error;
589                 ret_block = ext3_try_to_allocate(sb, handle, group_no,
590                                         bitmap_bh, ret_block, &fatal);
591                 if (fatal)
592                         goto out;
593                 if (ret_block >= 0)
594                         goto allocated;
595         }
596
597         /*
598          * Now search the rest of the groups.  We assume that 
599          * i and gdp correctly point to the last group visited.
600          */
601         for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
602                 group_no++;
603                 if (group_no >= EXT3_SB(sb)->s_groups_count)
604                         group_no = 0;
605                 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
606                 if (!gdp) {
607                         *errp = -EIO;
608                         goto out;
609                 }
610                 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
611                 if (free_blocks <= 0)
612                         continue;
613
614                 brelse(bitmap_bh);
615                 bitmap_bh = read_block_bitmap(sb, group_no);
616                 if (!bitmap_bh)
617                         goto io_error;
618                 ret_block = ext3_try_to_allocate(sb, handle, group_no,
619                                                 bitmap_bh, -1, &fatal);
620                 if (fatal)
621                         goto out;
622                 if (ret_block >= 0) 
623                         goto allocated;
624         }
625
626         /* No space left on the device */
627         *errp = -ENOSPC;
628         goto out;
629
630 allocated:
631
632         ext3_debug("using block group %d(%d)\n",
633                         group_no, gdp->bg_free_blocks_count);
634
635         BUFFER_TRACE(gdp_bh, "get_write_access");
636         fatal = ext3_journal_get_write_access(handle, gdp_bh);
637         if (fatal)
638                 goto out;
639
640         target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
641                                 + le32_to_cpu(es->s_first_data_block);
642
643         if (target_block == le32_to_cpu(gdp->bg_block_bitmap) ||
644             target_block == le32_to_cpu(gdp->bg_inode_bitmap) ||
645             in_range(target_block, le32_to_cpu(gdp->bg_inode_table),
646                       EXT3_SB(sb)->s_itb_per_group))
647                 ext3_error(sb, "ext3_new_block",
648                             "Allocating block in system zone - "
649                             "block = %u", target_block);
650
651         performed_allocation = 1;
652
653 #ifdef CONFIG_JBD_DEBUG
654         {
655                 struct buffer_head *debug_bh;
656
657                 /* Record bitmap buffer state in the newly allocated block */
658                 debug_bh = sb_find_get_block(sb, target_block);
659                 if (debug_bh) {
660                         BUFFER_TRACE(debug_bh, "state when allocated");
661                         BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
662                         brelse(debug_bh);
663                 }
664         }
665         jbd_lock_bh_state(bitmap_bh);
666         spin_lock(sb_bgl_lock(sbi, group_no));
667         if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
668                 if (ext3_test_bit(ret_block,
669                                 bh2jh(bitmap_bh)->b_committed_data)) {
670                         printk("%s: block was unexpectedly set in "
671                                 "b_committed_data\n", __FUNCTION__);
672                 }
673         }
674         ext3_debug("found bit %d\n", ret_block);
675         spin_unlock(sb_bgl_lock(sbi, group_no));
676         jbd_unlock_bh_state(bitmap_bh);
677 #endif
678
679         /* ret_block was blockgroup-relative.  Now it becomes fs-relative */
680         ret_block = target_block;
681
682         if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
683                 ext3_error(sb, "ext3_new_block",
684                             "block(%d) >= blocks count(%d) - "
685                             "block_group = %d, es == %p ", ret_block,
686                         le32_to_cpu(es->s_blocks_count), group_no, es);
687                 goto out;
688         }
689
690         /*
691          * It is up to the caller to add the new buffer to a journal
692          * list of some description.  We don't know in advance whether
693          * the caller wants to use it as metadata or data.
694          */
695         ext3_debug("allocating block %d. Goal hits %d of %d.\n",
696                         ret_block, goal_hits, goal_attempts);
697
698         spin_lock(sb_bgl_lock(sbi, group_no));
699         gdp->bg_free_blocks_count =
700                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
701         spin_unlock(sb_bgl_lock(sbi, group_no));
702         percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
703
704         BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
705         err = ext3_journal_dirty_metadata(handle, gdp_bh);
706         if (!fatal)
707                 fatal = err;
708
709         sb->s_dirt = 1;
710         if (fatal)
711                 goto out;
712
713         *errp = 0;
714         brelse(bitmap_bh);
715         return ret_block;
716
717 io_error:
718         *errp = -EIO;
719 out:
720         if (!performed_allocation)
721                 DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
722 out_dlimit:
723         if (fatal) {
724                 *errp = fatal;
725                 ext3_std_error(sb, fatal);
726         }
727         /*
728          * Undo the block allocation
729          */
730         if (!performed_allocation)
731                 DQUOT_FREE_BLOCK(inode, 1);
732         brelse(bitmap_bh);
733         return 0;
734 }
735
736 unsigned long ext3_count_free_blocks(struct super_block *sb)
737 {
738         unsigned long desc_count;
739         struct ext3_group_desc *gdp;
740         int i;
741 #ifdef EXT3FS_DEBUG
742         struct ext3_super_block *es;
743         unsigned long bitmap_count, x;
744         struct buffer_head *bitmap_bh = NULL;
745
746         lock_super(sb);
747         es = EXT3_SB(sb)->s_es;
748         desc_count = 0;
749         bitmap_count = 0;
750         gdp = NULL;
751         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
752                 gdp = ext3_get_group_desc(sb, i, NULL);
753                 if (!gdp)
754                         continue;
755                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
756                 brelse(bitmap_bh);
757                 bitmap_bh = read_block_bitmap(sb, i);
758                 if (bitmap_bh == NULL)
759                         continue;
760
761                 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
762                 printk("group %d: stored = %d, counted = %lu\n",
763                         i, le16_to_cpu(gdp->bg_free_blocks_count), x);
764                 bitmap_count += x;
765         }
766         brelse(bitmap_bh);
767         printk("ext3_count_free_blocks: stored = %u, computed = %lu, %lu\n",
768                le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
769         unlock_super(sb);
770         return bitmap_count;
771 #else
772         desc_count = 0;
773         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
774                 gdp = ext3_get_group_desc(sb, i, NULL);
775                 if (!gdp)
776                         continue;
777                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
778         }
779
780         return desc_count;
781 #endif
782 }
783
784 static inline int block_in_use(unsigned long block,
785                                 struct super_block * sb,
786                                 unsigned char * map)
787 {
788         return ext3_test_bit ((block -
789                 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
790                          EXT3_BLOCKS_PER_GROUP(sb), map);
791 }
792
793 static inline int test_root(int a, int b)
794 {
795         if (a == 0)
796                 return 1;
797         while (1) {
798                 if (a == 1)
799                         return 1;
800                 if (a % b)
801                         return 0;
802                 a = a / b;
803         }
804 }
805
806 int ext3_group_sparse(int group)
807 {
808         return (test_root(group, 3) || test_root(group, 5) ||
809                 test_root(group, 7));
810 }
811
812 /**
813  *      ext3_bg_has_super - number of blocks used by the superblock in group
814  *      @sb: superblock for filesystem
815  *      @group: group number to check
816  *
817  *      Return the number of blocks used by the superblock (primary or backup)
818  *      in this group.  Currently this will be only 0 or 1.
819  */
820 int ext3_bg_has_super(struct super_block *sb, int group)
821 {
822         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
823             !ext3_group_sparse(group))
824                 return 0;
825         return 1;
826 }
827
828 /**
829  *      ext3_bg_num_gdb - number of blocks used by the group table in group
830  *      @sb: superblock for filesystem
831  *      @group: group number to check
832  *
833  *      Return the number of blocks used by the group descriptor table
834  *      (primary or backup) in this group.  In the future there may be a
835  *      different number of descriptor blocks in each group.
836  */
837 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
838 {
839         if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
840             !ext3_group_sparse(group))
841                 return 0;
842         return EXT3_SB(sb)->s_gdb_count;
843 }
844
845 #ifdef CONFIG_EXT3_CHECK
846 /* Called at mount-time, super-block is locked */
847 void ext3_check_blocks_bitmap (struct super_block * sb)
848 {
849         struct ext3_super_block *es;
850         unsigned long desc_count, bitmap_count, x, j;
851         unsigned long desc_blocks;
852         struct buffer_head *bitmap_bh = NULL;
853         struct ext3_group_desc *gdp;
854         int i;
855
856         es = EXT3_SB(sb)->s_es;
857         desc_count = 0;
858         bitmap_count = 0;
859         gdp = NULL;
860         for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
861                 gdp = ext3_get_group_desc (sb, i, NULL);
862                 if (!gdp)
863                         continue;
864                 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
865                 brelse(bitmap_bh);
866                 bitmap_bh = read_block_bitmap(sb, i);
867                 if (bitmap_bh == NULL)
868                         continue;
869
870                 if (ext3_bg_has_super(sb, i) &&
871                                 !ext3_test_bit(0, bitmap_bh->b_data))
872                         ext3_error(sb, __FUNCTION__,
873                                    "Superblock in group %d is marked free", i);
874
875                 desc_blocks = ext3_bg_num_gdb(sb, i);
876                 for (j = 0; j < desc_blocks; j++)
877                         if (!ext3_test_bit(j + 1, bitmap_bh->b_data))
878                                 ext3_error(sb, __FUNCTION__,
879                                            "Descriptor block #%ld in group "
880                                            "%d is marked free", j, i);
881
882                 if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
883                                                 sb, bitmap_bh->b_data))
884                         ext3_error (sb, "ext3_check_blocks_bitmap",
885                                     "Block bitmap for group %d is marked free",
886                                     i);
887
888                 if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
889                                                 sb, bitmap_bh->b_data))
890                         ext3_error (sb, "ext3_check_blocks_bitmap",
891                                     "Inode bitmap for group %d is marked free",
892                                     i);
893
894                 for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++)
895                         if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
896                                                         sb, bitmap_bh->b_data))
897                                 ext3_error (sb, "ext3_check_blocks_bitmap",
898                                             "Block #%d of the inode table in "
899                                             "group %d is marked free", j, i);
900
901                 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
902                 if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
903                         ext3_error (sb, "ext3_check_blocks_bitmap",
904                                     "Wrong free blocks count for group %d, "
905                                     "stored = %d, counted = %lu", i,
906                                     le16_to_cpu(gdp->bg_free_blocks_count), x);
907                 bitmap_count += x;
908         }
909         brelse(bitmap_bh);
910         if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
911                 ext3_error (sb, "ext3_check_blocks_bitmap",
912                         "Wrong free blocks count in super block, "
913                         "stored = %lu, counted = %lu",
914                         (unsigned long)le32_to_cpu(es->s_free_blocks_count),
915                         bitmap_count);
916 }
917 #endif