Revert to Fedora kernel-2.6.17-1.2187_FC5 patched with vs2.0.2.1; there are too many...
[linux-2.6.git] / fs / ext2 / balloc.c
1 /*
2  *  linux/fs/ext2/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include <linux/config.h>
15 #include "ext2.h"
16 #include <linux/quotaops.h>
17 #include <linux/sched.h>
18 #include <linux/buffer_head.h>
19 #include <linux/capability.h>
20 #include <linux/vs_base.h>
21 #include <linux/vs_dlimit.h>
22
23 /*
24  * balloc.c contains the blocks allocation and deallocation routines
25  */
26
27 /*
28  * The free blocks are managed by bitmaps.  A file system contains several
29  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
30  * block for inodes, N blocks for the inode table and data blocks.
31  *
32  * The file system contains group descriptors which are located after the
33  * super block.  Each descriptor contains the number of the bitmap block and
34  * the free blocks count in the block.  The descriptors are loaded in memory
35  * when a file system is mounted (see ext2_read_super).
36  */
37
38
39 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
40
41 struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
42                                              unsigned int block_group,
43                                              struct buffer_head ** bh)
44 {
45         unsigned long group_desc;
46         unsigned long offset;
47         struct ext2_group_desc * desc;
48         struct ext2_sb_info *sbi = EXT2_SB(sb);
49
50         if (block_group >= sbi->s_groups_count) {
51                 ext2_error (sb, "ext2_get_group_desc",
52                             "block_group >= groups_count - "
53                             "block_group = %d, groups_count = %lu",
54                             block_group, sbi->s_groups_count);
55
56                 return NULL;
57         }
58
59         group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
60         offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
61         if (!sbi->s_group_desc[group_desc]) {
62                 ext2_error (sb, "ext2_get_group_desc",
63                             "Group descriptor not loaded - "
64                             "block_group = %d, group_desc = %lu, desc = %lu",
65                              block_group, group_desc, offset);
66                 return NULL;
67         }
68
69         desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
70         if (bh)
71                 *bh = sbi->s_group_desc[group_desc];
72         return desc + offset;
73 }
74
75 /*
76  * Read the bitmap for a given block_group, reading into the specified 
77  * slot in the superblock's bitmap cache.
78  *
79  * Return buffer_head on success or NULL in case of failure.
80  */
81 static struct buffer_head *
82 read_block_bitmap(struct super_block *sb, unsigned int block_group)
83 {
84         struct ext2_group_desc * desc;
85         struct buffer_head * bh = NULL;
86         
87         desc = ext2_get_group_desc (sb, block_group, NULL);
88         if (!desc)
89                 goto error_out;
90         bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
91         if (!bh)
92                 ext2_error (sb, "read_block_bitmap",
93                             "Cannot read block bitmap - "
94                             "block_group = %d, block_bitmap = %u",
95                             block_group, le32_to_cpu(desc->bg_block_bitmap));
96 error_out:
97         return bh;
98 }
99
100 /*
101  * Set sb->s_dirt here because the superblock was "logically" altered.  We
102  * need to recalculate its free blocks count and flush it out.
103  */
104 static int reserve_blocks(struct super_block *sb, int count)
105 {
106         struct ext2_sb_info *sbi = EXT2_SB(sb);
107         struct ext2_super_block *es = sbi->s_es;
108         unsigned free_blocks;
109         unsigned root_blocks;
110
111         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
112         root_blocks = le32_to_cpu(es->s_r_blocks_count);
113
114         DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks);
115
116         if (free_blocks < count)
117                 count = free_blocks;
118
119         if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
120             sbi->s_resuid != current->fsuid &&
121             (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
122                 /*
123                  * We are too close to reserve and we are not privileged.
124                  * Can we allocate anything at all?
125                  */
126                 if (free_blocks > root_blocks)
127                         count = free_blocks - root_blocks;
128                 else
129                         return 0;
130         }
131
132         percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
133         sb->s_dirt = 1;
134         return count;
135 }
136
137 static void release_blocks(struct super_block *sb, int count)
138 {
139         if (count) {
140                 struct ext2_sb_info *sbi = EXT2_SB(sb);
141
142                 percpu_counter_mod(&sbi->s_freeblocks_counter, count);
143                 sb->s_dirt = 1;
144         }
145 }
146
147 static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
148         struct ext2_group_desc *desc, struct buffer_head *bh, int count)
149 {
150         unsigned free_blocks;
151
152         if (!desc->bg_free_blocks_count)
153                 return 0;
154
155         spin_lock(sb_bgl_lock(sbi, group_no));
156         free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
157         if (free_blocks < count)
158                 count = free_blocks;
159         desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
160         spin_unlock(sb_bgl_lock(sbi, group_no));
161         mark_buffer_dirty(bh);
162         return count;
163 }
164
165 static void group_release_blocks(struct super_block *sb, int group_no,
166         struct ext2_group_desc *desc, struct buffer_head *bh, int count)
167 {
168         if (count) {
169                 struct ext2_sb_info *sbi = EXT2_SB(sb);
170                 unsigned free_blocks;
171
172                 spin_lock(sb_bgl_lock(sbi, group_no));
173                 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
174                 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
175                 spin_unlock(sb_bgl_lock(sbi, group_no));
176                 sb->s_dirt = 1;
177                 mark_buffer_dirty(bh);
178         }
179 }
180
181 /* Free given blocks, update quota and i_blocks field */
182 void ext2_free_blocks (struct inode * inode, unsigned long block,
183                        unsigned long count)
184 {
185         struct buffer_head *bitmap_bh = NULL;
186         struct buffer_head * bh2;
187         unsigned long block_group;
188         unsigned long bit;
189         unsigned long i;
190         unsigned long overflow;
191         struct super_block * sb = inode->i_sb;
192         struct ext2_sb_info * sbi = EXT2_SB(sb);
193         struct ext2_group_desc * desc;
194         struct ext2_super_block * es = sbi->s_es;
195         unsigned freed = 0, group_freed;
196
197         if (block < le32_to_cpu(es->s_first_data_block) ||
198             block + count < block ||
199             block + count > le32_to_cpu(es->s_blocks_count)) {
200                 ext2_error (sb, "ext2_free_blocks",
201                             "Freeing blocks not in datazone - "
202                             "block = %lu, count = %lu", block, count);
203                 goto error_return;
204         }
205
206         ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
207
208 do_more:
209         overflow = 0;
210         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
211                       EXT2_BLOCKS_PER_GROUP(sb);
212         bit = (block - le32_to_cpu(es->s_first_data_block)) %
213                       EXT2_BLOCKS_PER_GROUP(sb);
214         /*
215          * Check to see if we are freeing blocks across a group
216          * boundary.
217          */
218         if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
219                 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
220                 count -= overflow;
221         }
222         brelse(bitmap_bh);
223         bitmap_bh = read_block_bitmap(sb, block_group);
224         if (!bitmap_bh)
225                 goto error_return;
226
227         desc = ext2_get_group_desc (sb, block_group, &bh2);
228         if (!desc)
229                 goto error_return;
230
231         if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
232             in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
233             in_range (block, le32_to_cpu(desc->bg_inode_table),
234                       sbi->s_itb_per_group) ||
235             in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
236                       sbi->s_itb_per_group))
237                 ext2_error (sb, "ext2_free_blocks",
238                             "Freeing blocks in system zones - "
239                             "Block = %lu, count = %lu",
240                             block, count);
241
242         for (i = 0, group_freed = 0; i < count; i++) {
243                 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
244                                                 bit + i, bitmap_bh->b_data)) {
245                         ext2_error(sb, __FUNCTION__,
246                                 "bit already cleared for block %lu", block + i);
247                 } else {
248                         group_freed++;
249                 }
250         }
251
252         mark_buffer_dirty(bitmap_bh);
253         if (sb->s_flags & MS_SYNCHRONOUS)
254                 sync_dirty_buffer(bitmap_bh);
255
256         group_release_blocks(sb, block_group, desc, bh2, group_freed);
257         freed += group_freed;
258
259         if (overflow) {
260                 block += count;
261                 count = overflow;
262                 goto do_more;
263         }
264 error_return:
265         brelse(bitmap_bh);
266         DLIMIT_FREE_BLOCK(inode, freed);
267         release_blocks(sb, freed);
268         DQUOT_FREE_BLOCK(inode, freed);
269 }
270
271 static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
272 {
273         int k;
274         char *p, *r;
275
276         if (!ext2_test_bit(goal, map))
277                 goto got_it;
278
279 repeat:
280         if (goal) {
281                 /*
282                  * The goal was occupied; search forward for a free 
283                  * block within the next XX blocks.
284                  *
285                  * end_goal is more or less random, but it has to be
286                  * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
287                  * next 64-bit boundary is simple..
288                  */
289                 k = (goal + 63) & ~63;
290                 goal = ext2_find_next_zero_bit(map, k, goal);
291                 if (goal < k)
292                         goto got_it;
293                 /*
294                  * Search in the remainder of the current group.
295                  */
296         }
297
298         p = map + (goal >> 3);
299         r = memscan(p, 0, (size - goal + 7) >> 3);
300         k = (r - map) << 3;
301         if (k < size) {
302                 /* 
303                  * We have succeeded in finding a free byte in the block
304                  * bitmap.  Now search backwards to find the start of this
305                  * group of free blocks - won't take more than 7 iterations.
306                  */
307                 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
308                         ;
309                 goto got_it;
310         }
311
312         k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
313         if (k < size) {
314                 goal = k;
315                 goto got_it;
316         }
317         return -1;
318 got_it:
319         if (ext2_set_bit_atomic(lock, goal, (void *) map)) 
320                 goto repeat;    
321         return goal;
322 }
323
324 /*
325  * ext2_new_block uses a goal block to assist allocation.  If the goal is
326  * free, or there is a free block within 32 blocks of the goal, that block
327  * is allocated.  Otherwise a forward search is made for a free block; within 
328  * each block group the search first looks for an entire free byte in the block
329  * bitmap, and then for any free bit if that fails.
330  * This function also updates quota and i_blocks field.
331  */
332 int ext2_new_block(struct inode *inode, unsigned long goal,
333                         u32 *prealloc_count, u32 *prealloc_block, int *err)
334 {
335         struct buffer_head *bitmap_bh = NULL;
336         struct buffer_head *gdp_bh;     /* bh2 */
337         struct ext2_group_desc *desc;
338         int group_no;                   /* i */
339         int ret_block;                  /* j */
340         int group_idx;                  /* k */
341         int target_block;               /* tmp */
342         int block = 0;
343         struct super_block *sb = inode->i_sb;
344         struct ext2_sb_info *sbi = EXT2_SB(sb);
345         struct ext2_super_block *es = sbi->s_es;
346         unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
347         unsigned prealloc_goal = es->s_prealloc_blocks;
348         unsigned group_alloc = 0, es_alloc, dq_alloc;
349         int nr_scanned_groups;
350
351         if (!prealloc_goal--)
352                 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
353         if (!prealloc_count || *prealloc_count)
354                 prealloc_goal = 0;
355
356         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
357                 *err = -EDQUOT;
358                 goto out;
359         }
360
361         while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
362                 prealloc_goal--;
363
364         dq_alloc = prealloc_goal + 1;
365         es_alloc = reserve_blocks(sb, dq_alloc);
366         if (!es_alloc) {
367                 *err = -ENOSPC;
368                 goto out_dquot;
369         }
370         if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) {
371                 *err = -ENOSPC;
372                 goto out_dlimit;
373         }
374
375         ext2_debug ("goal=%lu.\n", goal);
376
377         if (goal < le32_to_cpu(es->s_first_data_block) ||
378             goal >= le32_to_cpu(es->s_blocks_count))
379                 goal = le32_to_cpu(es->s_first_data_block);
380         group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
381         desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
382         if (!desc) {
383                 /*
384                  * gdp_bh may still be uninitialised.  But group_release_blocks
385                  * will not touch it because group_alloc is zero.
386                  */
387                 goto io_error;
388         }
389
390         group_alloc = group_reserve_blocks(sbi, group_no, desc,
391                                         gdp_bh, es_alloc);
392         if (group_alloc) {
393                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
394                                         group_size);
395                 brelse(bitmap_bh);
396                 bitmap_bh = read_block_bitmap(sb, group_no);
397                 if (!bitmap_bh)
398                         goto io_error;
399                 
400                 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
401
402                 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
403                                 bitmap_bh->b_data, group_size, ret_block);
404                 if (ret_block >= 0)
405                         goto got_block;
406                 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
407                 group_alloc = 0;
408         }
409
410         ext2_debug ("Bit not found in block group %d.\n", group_no);
411
412         /*
413          * Now search the rest of the groups.  We assume that 
414          * i and desc correctly point to the last group visited.
415          */
416         nr_scanned_groups = 0;
417 retry:
418         for (group_idx = 0; !group_alloc &&
419                         group_idx < sbi->s_groups_count; group_idx++) {
420                 group_no++;
421                 if (group_no >= sbi->s_groups_count)
422                         group_no = 0;
423                 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
424                 if (!desc)
425                         goto io_error;
426                 group_alloc = group_reserve_blocks(sbi, group_no, desc,
427                                                 gdp_bh, es_alloc);
428         }
429         if (!group_alloc) {
430                 *err = -ENOSPC;
431                 goto out_release;
432         }
433         brelse(bitmap_bh);
434         bitmap_bh = read_block_bitmap(sb, group_no);
435         if (!bitmap_bh)
436                 goto io_error;
437
438         ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
439                                 group_size, 0);
440         if (ret_block < 0) {
441                 /*
442                  * If a free block counter is corrupted we can loop inifintely.
443                  * Detect that here.
444                  */
445                 nr_scanned_groups++;
446                 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
447                         ext2_error(sb, "ext2_new_block",
448                                 "corrupted free blocks counters");
449                         goto io_error;
450                 }
451                 /*
452                  * Someone else grabbed the last free block in this blockgroup
453                  * before us.  Retry the scan.
454                  */
455                 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
456                 group_alloc = 0;
457                 goto retry;
458         }
459
460 got_block:
461         ext2_debug("using block group %d(%d)\n",
462                 group_no, desc->bg_free_blocks_count);
463
464         target_block = ret_block + group_no * group_size +
465                         le32_to_cpu(es->s_first_data_block);
466
467         if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
468             target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
469             in_range(target_block, le32_to_cpu(desc->bg_inode_table),
470                       sbi->s_itb_per_group))
471                 ext2_error (sb, "ext2_new_block",
472                             "Allocating block in system zone - "
473                             "block = %u", target_block);
474
475         if (target_block >= le32_to_cpu(es->s_blocks_count)) {
476                 ext2_error (sb, "ext2_new_block",
477                             "block(%d) >= blocks count(%d) - "
478                             "block_group = %d, es == %p ", ret_block,
479                         le32_to_cpu(es->s_blocks_count), group_no, es);
480                 goto io_error;
481         }
482         block = target_block;
483
484         /* OK, we _had_ allocated something */
485         ext2_debug("found bit %d\n", ret_block);
486
487         dq_alloc--;
488         es_alloc--;
489         group_alloc--;
490
491         /*
492          * Do block preallocation now if required.
493          */
494         write_lock(&EXT2_I(inode)->i_meta_lock);
495         if (group_alloc && !*prealloc_count) {
496                 unsigned n;
497
498                 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
499                         if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
500                                                 ret_block,
501                                                 (void*) bitmap_bh->b_data))
502                                 break;
503                 }
504                 *prealloc_block = block + 1;
505                 *prealloc_count = n;
506                 es_alloc -= n;
507                 dq_alloc -= n;
508                 group_alloc -= n;
509         }
510         write_unlock(&EXT2_I(inode)->i_meta_lock);
511
512         mark_buffer_dirty(bitmap_bh);
513         if (sb->s_flags & MS_SYNCHRONOUS)
514                 sync_dirty_buffer(bitmap_bh);
515
516         ext2_debug ("allocating block %d. ", block);
517
518         *err = 0;
519 out_release:
520         group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
521         DLIMIT_FREE_BLOCK(inode, es_alloc);
522 out_dlimit:
523         release_blocks(sb, es_alloc);
524 out_dquot:
525         DQUOT_FREE_BLOCK(inode, dq_alloc);
526 out:
527         brelse(bitmap_bh);
528         return block;
529
530 io_error:
531         *err = -EIO;
532         goto out_release;
533 }
534
535 unsigned long ext2_count_free_blocks (struct super_block * sb)
536 {
537         struct ext2_group_desc * desc;
538         unsigned long desc_count = 0;
539         int i;
540 #ifdef EXT2FS_DEBUG
541         unsigned long bitmap_count, x;
542         struct ext2_super_block *es;
543
544         lock_super (sb);
545         es = EXT2_SB(sb)->s_es;
546         desc_count = 0;
547         bitmap_count = 0;
548         desc = NULL;
549         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
550                 struct buffer_head *bitmap_bh;
551                 desc = ext2_get_group_desc (sb, i, NULL);
552                 if (!desc)
553                         continue;
554                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
555                 bitmap_bh = read_block_bitmap(sb, i);
556                 if (!bitmap_bh)
557                         continue;
558                 
559                 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
560                 printk ("group %d: stored = %d, counted = %lu\n",
561                         i, le16_to_cpu(desc->bg_free_blocks_count), x);
562                 bitmap_count += x;
563                 brelse(bitmap_bh);
564         }
565         printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
566                 (long)le32_to_cpu(es->s_free_blocks_count),
567                 desc_count, bitmap_count);
568         unlock_super (sb);
569         return bitmap_count;
570 #else
571         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
572                 desc = ext2_get_group_desc (sb, i, NULL);
573                 if (!desc)
574                         continue;
575                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
576         }
577         return desc_count;
578 #endif
579 }
580
581 static inline int
582 block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
583 {
584         return ext2_test_bit ((block -
585                 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
586                          EXT2_BLOCKS_PER_GROUP(sb), map);
587 }
588
589 static inline int test_root(int a, int b)
590 {
591         int num = b;
592
593         while (a > num)
594                 num *= b;
595         return num == a;
596 }
597
598 static int ext2_group_sparse(int group)
599 {
600         if (group <= 1)
601                 return 1;
602         return (test_root(group, 3) || test_root(group, 5) ||
603                 test_root(group, 7));
604 }
605
606 /**
607  *      ext2_bg_has_super - number of blocks used by the superblock in group
608  *      @sb: superblock for filesystem
609  *      @group: group number to check
610  *
611  *      Return the number of blocks used by the superblock (primary or backup)
612  *      in this group.  Currently this will be only 0 or 1.
613  */
614 int ext2_bg_has_super(struct super_block *sb, int group)
615 {
616         if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
617             !ext2_group_sparse(group))
618                 return 0;
619         return 1;
620 }
621
622 /**
623  *      ext2_bg_num_gdb - number of blocks used by the group table in group
624  *      @sb: superblock for filesystem
625  *      @group: group number to check
626  *
627  *      Return the number of blocks used by the group descriptor table
628  *      (primary or backup) in this group.  In the future there may be a
629  *      different number of descriptor blocks in each group.
630  */
631 unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
632 {
633         if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
634             !ext2_group_sparse(group))
635                 return 0;
636         return EXT2_SB(sb)->s_gdb_count;
637 }
638