Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / fs / ext2 / balloc.c
1 /*
2  *  linux/fs/ext2/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include "ext2.h"
15 #include <linux/quotaops.h>
16 #include <linux/sched.h>
17 #include <linux/buffer_head.h>
18 #include <linux/capability.h>
19 #include <linux/vs_dlimit.h>
20
21 /*
22  * balloc.c contains the blocks allocation and deallocation routines
23  */
24
25 /*
26  * The free blocks are managed by bitmaps.  A file system contains several
27  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
28  * block for inodes, N blocks for the inode table and data blocks.
29  *
30  * The file system contains group descriptors which are located after the
31  * super block.  Each descriptor contains the number of the bitmap block and
32  * the free blocks count in the block.  The descriptors are loaded in memory
33  * when a file system is mounted (see ext2_read_super).
34  */
35
36
37 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
38
39 struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
40                                              unsigned int block_group,
41                                              struct buffer_head ** bh)
42 {
43         unsigned long group_desc;
44         unsigned long offset;
45         struct ext2_group_desc * desc;
46         struct ext2_sb_info *sbi = EXT2_SB(sb);
47
48         if (block_group >= sbi->s_groups_count) {
49                 ext2_error (sb, "ext2_get_group_desc",
50                             "block_group >= groups_count - "
51                             "block_group = %d, groups_count = %lu",
52                             block_group, sbi->s_groups_count);
53
54                 return NULL;
55         }
56
57         group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
58         offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
59         if (!sbi->s_group_desc[group_desc]) {
60                 ext2_error (sb, "ext2_get_group_desc",
61                             "Group descriptor not loaded - "
62                             "block_group = %d, group_desc = %lu, desc = %lu",
63                              block_group, group_desc, offset);
64                 return NULL;
65         }
66
67         desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
68         if (bh)
69                 *bh = sbi->s_group_desc[group_desc];
70         return desc + offset;
71 }
72
73 /*
74  * Read the bitmap for a given block_group, reading into the specified 
75  * slot in the superblock's bitmap cache.
76  *
77  * Return buffer_head on success or NULL in case of failure.
78  */
79 static struct buffer_head *
80 read_block_bitmap(struct super_block *sb, unsigned int block_group)
81 {
82         struct ext2_group_desc * desc;
83         struct buffer_head * bh = NULL;
84         
85         desc = ext2_get_group_desc (sb, block_group, NULL);
86         if (!desc)
87                 goto error_out;
88         bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
89         if (!bh)
90                 ext2_error (sb, "read_block_bitmap",
91                             "Cannot read block bitmap - "
92                             "block_group = %d, block_bitmap = %u",
93                             block_group, le32_to_cpu(desc->bg_block_bitmap));
94 error_out:
95         return bh;
96 }
97
98 /*
99  * Set sb->s_dirt here because the superblock was "logically" altered.  We
100  * need to recalculate its free blocks count and flush it out.
101  */
102 static int reserve_blocks(struct super_block *sb, int count)
103 {
104         struct ext2_sb_info *sbi = EXT2_SB(sb);
105         struct ext2_super_block *es = sbi->s_es;
106         unsigned long free_blocks;
107         unsigned long root_blocks;
108
109         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
110         root_blocks = le32_to_cpu(es->s_r_blocks_count);
111
112         DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks);
113
114         if (free_blocks < count)
115                 count = free_blocks;
116
117         if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
118             sbi->s_resuid != current->fsuid &&
119             (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
120                 /*
121                  * We are too close to reserve and we are not privileged.
122                  * Can we allocate anything at all?
123                  */
124                 if (free_blocks > root_blocks)
125                         count = free_blocks - root_blocks;
126                 else
127                         return 0;
128         }
129
130         percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
131         sb->s_dirt = 1;
132         return count;
133 }
134
135 static void release_blocks(struct super_block *sb, int count)
136 {
137         if (count) {
138                 struct ext2_sb_info *sbi = EXT2_SB(sb);
139
140                 percpu_counter_mod(&sbi->s_freeblocks_counter, count);
141                 sb->s_dirt = 1;
142         }
143 }
144
145 static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
146         struct ext2_group_desc *desc, struct buffer_head *bh, int count)
147 {
148         unsigned free_blocks;
149
150         if (!desc->bg_free_blocks_count)
151                 return 0;
152
153         spin_lock(sb_bgl_lock(sbi, group_no));
154         free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
155         if (free_blocks < count)
156                 count = free_blocks;
157         desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
158         spin_unlock(sb_bgl_lock(sbi, group_no));
159         mark_buffer_dirty(bh);
160         return count;
161 }
162
163 static void group_release_blocks(struct super_block *sb, int group_no,
164         struct ext2_group_desc *desc, struct buffer_head *bh, int count)
165 {
166         if (count) {
167                 struct ext2_sb_info *sbi = EXT2_SB(sb);
168                 unsigned free_blocks;
169
170                 spin_lock(sb_bgl_lock(sbi, group_no));
171                 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
172                 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
173                 spin_unlock(sb_bgl_lock(sbi, group_no));
174                 sb->s_dirt = 1;
175                 mark_buffer_dirty(bh);
176         }
177 }
178
179 /* Free given blocks, update quota and i_blocks field */
180 void ext2_free_blocks (struct inode * inode, unsigned long block,
181                        unsigned long count)
182 {
183         struct buffer_head *bitmap_bh = NULL;
184         struct buffer_head * bh2;
185         unsigned long block_group;
186         unsigned long bit;
187         unsigned long i;
188         unsigned long overflow;
189         struct super_block * sb = inode->i_sb;
190         struct ext2_sb_info * sbi = EXT2_SB(sb);
191         struct ext2_group_desc * desc;
192         struct ext2_super_block * es = sbi->s_es;
193         unsigned freed = 0, group_freed;
194
195         if (block < le32_to_cpu(es->s_first_data_block) ||
196             block + count < block ||
197             block + count > le32_to_cpu(es->s_blocks_count)) {
198                 ext2_error (sb, "ext2_free_blocks",
199                             "Freeing blocks not in datazone - "
200                             "block = %lu, count = %lu", block, count);
201                 goto error_return;
202         }
203
204         ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
205
206 do_more:
207         overflow = 0;
208         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
209                       EXT2_BLOCKS_PER_GROUP(sb);
210         bit = (block - le32_to_cpu(es->s_first_data_block)) %
211                       EXT2_BLOCKS_PER_GROUP(sb);
212         /*
213          * Check to see if we are freeing blocks across a group
214          * boundary.
215          */
216         if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
217                 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
218                 count -= overflow;
219         }
220         brelse(bitmap_bh);
221         bitmap_bh = read_block_bitmap(sb, block_group);
222         if (!bitmap_bh)
223                 goto error_return;
224
225         desc = ext2_get_group_desc (sb, block_group, &bh2);
226         if (!desc)
227                 goto error_return;
228
229         if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
230             in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
231             in_range (block, le32_to_cpu(desc->bg_inode_table),
232                       sbi->s_itb_per_group) ||
233             in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
234                       sbi->s_itb_per_group))
235                 ext2_error (sb, "ext2_free_blocks",
236                             "Freeing blocks in system zones - "
237                             "Block = %lu, count = %lu",
238                             block, count);
239
240         for (i = 0, group_freed = 0; i < count; i++) {
241                 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
242                                                 bit + i, bitmap_bh->b_data)) {
243                         ext2_error(sb, __FUNCTION__,
244                                 "bit already cleared for block %lu", block + i);
245                 } else {
246                         group_freed++;
247                 }
248         }
249
250         mark_buffer_dirty(bitmap_bh);
251         if (sb->s_flags & MS_SYNCHRONOUS)
252                 sync_dirty_buffer(bitmap_bh);
253
254         group_release_blocks(sb, block_group, desc, bh2, group_freed);
255         freed += group_freed;
256
257         if (overflow) {
258                 block += count;
259                 count = overflow;
260                 goto do_more;
261         }
262 error_return:
263         brelse(bitmap_bh);
264         DLIMIT_FREE_BLOCK(inode, freed);
265         release_blocks(sb, freed);
266         DQUOT_FREE_BLOCK(inode, freed);
267 }
268
269 static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
270 {
271         int k;
272         char *p, *r;
273
274         if (!ext2_test_bit(goal, map))
275                 goto got_it;
276
277 repeat:
278         if (goal) {
279                 /*
280                  * The goal was occupied; search forward for a free 
281                  * block within the next XX blocks.
282                  *
283                  * end_goal is more or less random, but it has to be
284                  * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
285                  * next 64-bit boundary is simple..
286                  */
287                 k = (goal + 63) & ~63;
288                 goal = ext2_find_next_zero_bit(map, k, goal);
289                 if (goal < k)
290                         goto got_it;
291                 /*
292                  * Search in the remainder of the current group.
293                  */
294         }
295
296         p = map + (goal >> 3);
297         r = memscan(p, 0, (size - goal + 7) >> 3);
298         k = (r - map) << 3;
299         if (k < size) {
300                 /* 
301                  * We have succeeded in finding a free byte in the block
302                  * bitmap.  Now search backwards to find the start of this
303                  * group of free blocks - won't take more than 7 iterations.
304                  */
305                 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
306                         ;
307                 goto got_it;
308         }
309
310         k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
311         if (k < size) {
312                 goal = k;
313                 goto got_it;
314         }
315         return -1;
316 got_it:
317         if (ext2_set_bit_atomic(lock, goal, (void *) map)) 
318                 goto repeat;    
319         return goal;
320 }
321
322 /*
323  * ext2_new_block uses a goal block to assist allocation.  If the goal is
324  * free, or there is a free block within 32 blocks of the goal, that block
325  * is allocated.  Otherwise a forward search is made for a free block; within 
326  * each block group the search first looks for an entire free byte in the block
327  * bitmap, and then for any free bit if that fails.
328  * This function also updates quota and i_blocks field.
329  */
330 int ext2_new_block(struct inode *inode, unsigned long goal,
331                         u32 *prealloc_count, u32 *prealloc_block, int *err)
332 {
333         struct buffer_head *bitmap_bh = NULL;
334         struct buffer_head *gdp_bh;     /* bh2 */
335         struct ext2_group_desc *desc;
336         int group_no;                   /* i */
337         int ret_block;                  /* j */
338         int group_idx;                  /* k */
339         int target_block;               /* tmp */
340         int block = 0;
341         struct super_block *sb = inode->i_sb;
342         struct ext2_sb_info *sbi = EXT2_SB(sb);
343         struct ext2_super_block *es = sbi->s_es;
344         unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
345         unsigned prealloc_goal = es->s_prealloc_blocks;
346         unsigned group_alloc = 0, es_alloc, dq_alloc;
347         int nr_scanned_groups;
348
349         if (!prealloc_goal--)
350                 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
351         if (!prealloc_count || *prealloc_count)
352                 prealloc_goal = 0;
353
354         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
355                 *err = -EDQUOT;
356                 goto out;
357         }
358
359         while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
360                 prealloc_goal--;
361
362         dq_alloc = prealloc_goal + 1;
363         es_alloc = reserve_blocks(sb, dq_alloc);
364         if (!es_alloc) {
365                 *err = -ENOSPC;
366                 goto out_dquot;
367         }
368         if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) {
369                 *err = -ENOSPC;
370                 goto out_dlimit;
371         }
372
373         ext2_debug ("goal=%lu.\n", goal);
374
375         if (goal < le32_to_cpu(es->s_first_data_block) ||
376             goal >= le32_to_cpu(es->s_blocks_count))
377                 goal = le32_to_cpu(es->s_first_data_block);
378         group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
379         desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
380         if (!desc) {
381                 /*
382                  * gdp_bh may still be uninitialised.  But group_release_blocks
383                  * will not touch it because group_alloc is zero.
384                  */
385                 goto io_error;
386         }
387
388         group_alloc = group_reserve_blocks(sbi, group_no, desc,
389                                         gdp_bh, es_alloc);
390         if (group_alloc) {
391                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
392                                         group_size);
393                 brelse(bitmap_bh);
394                 bitmap_bh = read_block_bitmap(sb, group_no);
395                 if (!bitmap_bh)
396                         goto io_error;
397                 
398                 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
399
400                 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
401                                 bitmap_bh->b_data, group_size, ret_block);
402                 if (ret_block >= 0)
403                         goto got_block;
404                 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
405                 group_alloc = 0;
406         }
407
408         ext2_debug ("Bit not found in block group %d.\n", group_no);
409
410         /*
411          * Now search the rest of the groups.  We assume that 
412          * i and desc correctly point to the last group visited.
413          */
414         nr_scanned_groups = 0;
415 retry:
416         for (group_idx = 0; !group_alloc &&
417                         group_idx < sbi->s_groups_count; group_idx++) {
418                 group_no++;
419                 if (group_no >= sbi->s_groups_count)
420                         group_no = 0;
421                 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
422                 if (!desc)
423                         goto io_error;
424                 group_alloc = group_reserve_blocks(sbi, group_no, desc,
425                                                 gdp_bh, es_alloc);
426         }
427         if (!group_alloc) {
428                 *err = -ENOSPC;
429                 goto out_release;
430         }
431         brelse(bitmap_bh);
432         bitmap_bh = read_block_bitmap(sb, group_no);
433         if (!bitmap_bh)
434                 goto io_error;
435
436         ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
437                                 group_size, 0);
438         if (ret_block < 0) {
439                 /*
440                  * If a free block counter is corrupted we can loop inifintely.
441                  * Detect that here.
442                  */
443                 nr_scanned_groups++;
444                 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
445                         ext2_error(sb, "ext2_new_block",
446                                 "corrupted free blocks counters");
447                         goto io_error;
448                 }
449                 /*
450                  * Someone else grabbed the last free block in this blockgroup
451                  * before us.  Retry the scan.
452                  */
453                 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
454                 group_alloc = 0;
455                 goto retry;
456         }
457
458 got_block:
459         ext2_debug("using block group %d(%d)\n",
460                 group_no, desc->bg_free_blocks_count);
461
462         target_block = ret_block + group_no * group_size +
463                         le32_to_cpu(es->s_first_data_block);
464
465         if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
466             target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
467             in_range(target_block, le32_to_cpu(desc->bg_inode_table),
468                       sbi->s_itb_per_group))
469                 ext2_error (sb, "ext2_new_block",
470                             "Allocating block in system zone - "
471                             "block = %u", target_block);
472
473         if (target_block >= le32_to_cpu(es->s_blocks_count)) {
474                 ext2_error (sb, "ext2_new_block",
475                             "block(%d) >= blocks count(%d) - "
476                             "block_group = %d, es == %p ", ret_block,
477                         le32_to_cpu(es->s_blocks_count), group_no, es);
478                 goto io_error;
479         }
480         block = target_block;
481
482         /* OK, we _had_ allocated something */
483         ext2_debug("found bit %d\n", ret_block);
484
485         dq_alloc--;
486         es_alloc--;
487         group_alloc--;
488
489         /*
490          * Do block preallocation now if required.
491          */
492         write_lock(&EXT2_I(inode)->i_meta_lock);
493         if (group_alloc && !*prealloc_count) {
494                 unsigned n;
495
496                 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
497                         if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
498                                                 ret_block,
499                                                 (void*) bitmap_bh->b_data))
500                                 break;
501                 }
502                 *prealloc_block = block + 1;
503                 *prealloc_count = n;
504                 es_alloc -= n;
505                 dq_alloc -= n;
506                 group_alloc -= n;
507         }
508         write_unlock(&EXT2_I(inode)->i_meta_lock);
509
510         mark_buffer_dirty(bitmap_bh);
511         if (sb->s_flags & MS_SYNCHRONOUS)
512                 sync_dirty_buffer(bitmap_bh);
513
514         ext2_debug ("allocating block %d. ", block);
515
516         *err = 0;
517 out_release:
518         group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
519         DLIMIT_FREE_BLOCK(inode, es_alloc);
520 out_dlimit:
521         release_blocks(sb, es_alloc);
522 out_dquot:
523         DQUOT_FREE_BLOCK(inode, dq_alloc);
524 out:
525         brelse(bitmap_bh);
526         return block;
527
528 io_error:
529         *err = -EIO;
530         goto out_release;
531 }
532
533 #ifdef EXT2FS_DEBUG
534
535 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
536
537 unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
538 {
539         unsigned int i;
540         unsigned long sum = 0;
541
542         if (!map)
543                 return (0);
544         for (i = 0; i < numchars; i++)
545                 sum += nibblemap[map->b_data[i] & 0xf] +
546                         nibblemap[(map->b_data[i] >> 4) & 0xf];
547         return (sum);
548 }
549
550 #endif  /*  EXT2FS_DEBUG  */
551
552 unsigned long ext2_count_free_blocks (struct super_block * sb)
553 {
554         struct ext2_group_desc * desc;
555         unsigned long desc_count = 0;
556         int i;
557 #ifdef EXT2FS_DEBUG
558         unsigned long bitmap_count, x;
559         struct ext2_super_block *es;
560
561         es = EXT2_SB(sb)->s_es;
562         desc_count = 0;
563         bitmap_count = 0;
564         desc = NULL;
565         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
566                 struct buffer_head *bitmap_bh;
567                 desc = ext2_get_group_desc (sb, i, NULL);
568                 if (!desc)
569                         continue;
570                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
571                 bitmap_bh = read_block_bitmap(sb, i);
572                 if (!bitmap_bh)
573                         continue;
574                 
575                 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
576                 printk ("group %d: stored = %d, counted = %lu\n",
577                         i, le16_to_cpu(desc->bg_free_blocks_count), x);
578                 bitmap_count += x;
579                 brelse(bitmap_bh);
580         }
581         printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
582                 (long)le32_to_cpu(es->s_free_blocks_count),
583                 desc_count, bitmap_count);
584         return bitmap_count;
585 #else
586         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
587                 desc = ext2_get_group_desc (sb, i, NULL);
588                 if (!desc)
589                         continue;
590                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
591         }
592         return desc_count;
593 #endif
594 }
595
596 static inline int
597 block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
598 {
599         return ext2_test_bit ((block -
600                 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
601                          EXT2_BLOCKS_PER_GROUP(sb), map);
602 }
603
604 static inline int test_root(int a, int b)
605 {
606         int num = b;
607
608         while (a > num)
609                 num *= b;
610         return num == a;
611 }
612
613 static int ext2_group_sparse(int group)
614 {
615         if (group <= 1)
616                 return 1;
617         return (test_root(group, 3) || test_root(group, 5) ||
618                 test_root(group, 7));
619 }
620
621 /**
622  *      ext2_bg_has_super - number of blocks used by the superblock in group
623  *      @sb: superblock for filesystem
624  *      @group: group number to check
625  *
626  *      Return the number of blocks used by the superblock (primary or backup)
627  *      in this group.  Currently this will be only 0 or 1.
628  */
629 int ext2_bg_has_super(struct super_block *sb, int group)
630 {
631         if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
632             !ext2_group_sparse(group))
633                 return 0;
634         return 1;
635 }
636
637 /**
638  *      ext2_bg_num_gdb - number of blocks used by the group table in group
639  *      @sb: superblock for filesystem
640  *      @group: group number to check
641  *
642  *      Return the number of blocks used by the group descriptor table
643  *      (primary or backup) in this group.  In the future there may be a
644  *      different number of descriptor blocks in each group.
645  */
646 unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
647 {
648         if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
649             !ext2_group_sparse(group))
650                 return 0;
651         return EXT2_SB(sb)->s_gdb_count;
652 }
653