VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / fs / jffs2 / build.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@redhat.com>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: build.c,v 1.55 2003/10/28 17:02:44 dwmw2 Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
17 #include "nodelist.h"
18
19 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *, struct jffs2_inode_cache *, struct jffs2_full_dirent **);
20
21 static inline struct jffs2_inode_cache *
22 first_inode_chain(int *i, struct jffs2_sb_info *c)
23 {
24         for (; *i < INOCACHE_HASHSIZE; (*i)++) {
25                 if (c->inocache_list[*i])
26                         return c->inocache_list[*i];
27         }
28         return NULL;
29 }
30
31 static inline struct jffs2_inode_cache *
32 next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
33 {
34         /* More in this chain? */
35         if (ic->next)
36                 return ic->next;
37         (*i)++;
38         return first_inode_chain(i, c);
39 }
40
41 #define for_each_inode(i, c, ic)                        \
42         for (i = 0, ic = first_inode_chain(&i, (c));    \
43              ic;                                        \
44              ic = next_inode(&i, ic, (c)))
45
46
47 static inline void jffs2_build_inode_pass1(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
48 {
49         struct jffs2_full_dirent *fd;
50
51         D1(printk(KERN_DEBUG "jffs2_build_inode building directory inode #%u\n", ic->ino));
52
53         /* For each child, increase nlink */
54         for(fd = ic->scan_dents; fd; fd = fd->next) {
55                 struct jffs2_inode_cache *child_ic;
56                 if (!fd->ino)
57                         continue;
58
59                 /* XXX: Can get high latency here with huge directories */
60
61                 child_ic = jffs2_get_ino_cache(c, fd->ino);
62                 if (!child_ic) {
63                         printk(KERN_NOTICE "Eep. Child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
64                                   fd->name, fd->ino, ic->ino);
65                         continue;
66                 }
67
68                 if (child_ic->nlink++ && fd->type == DT_DIR) {
69                         printk(KERN_NOTICE "Child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", fd->name, fd->ino, ic->ino);
70                         if (fd->ino == 1 && ic->ino == 1) {
71                                 printk(KERN_NOTICE "This is mostly harmless, and probably caused by creating a JFFS2 image\n");
72                                 printk(KERN_NOTICE "using a buggy version of mkfs.jffs2. Use at least v1.17.\n");
73                         }
74                         /* What do we do about it? */
75                 }
76                 D1(printk(KERN_DEBUG "Increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino));
77                 /* Can't free them. We might need them in pass 2 */
78         }
79 }
80
81 /* Scan plan:
82  - Scan physical nodes. Build map of inodes/dirents. Allocate inocaches as we go
83  - Scan directory tree from top down, setting nlink in inocaches
84  - Scan inocaches for inodes with nlink==0
85 */
86 static int jffs2_build_filesystem(struct jffs2_sb_info *c)
87 {
88         int ret;
89         int i;
90         struct jffs2_inode_cache *ic;
91         struct jffs2_full_dirent *dead_fds = NULL;
92
93         /* First, scan the medium and build all the inode caches with
94            lists of physical nodes */
95
96         c->flags |= JFFS2_SB_FLAG_MOUNTING;
97         ret = jffs2_scan_medium(c);
98         c->flags &= ~JFFS2_SB_FLAG_MOUNTING;
99
100         if (ret)
101                 return ret;
102
103         D1(printk(KERN_DEBUG "Scanned flash completely\n"));
104         D1(jffs2_dump_block_lists(c));
105
106         /* Now scan the directory tree, increasing nlink according to every dirent found. */
107         for_each_inode(i, c, ic) {
108                 D1(printk(KERN_DEBUG "Pass 1: ino #%u\n", ic->ino));
109
110                 D1(BUG_ON(ic->ino > c->highest_ino));
111
112                 if (ic->scan_dents) {
113                         jffs2_build_inode_pass1(c, ic);
114                         cond_resched();
115                 }
116         }
117         D1(printk(KERN_DEBUG "Pass 1 complete\n"));
118
119         /* Next, scan for inodes with nlink == 0 and remove them. If
120            they were directories, then decrement the nlink of their
121            children too, and repeat the scan. As that's going to be
122            a fairly uncommon occurrence, it's not so evil to do it this
123            way. Recursion bad. */
124         D1(printk(KERN_DEBUG "Pass 2 starting\n"));
125
126         for_each_inode(i, c, ic) {
127                 D1(printk(KERN_DEBUG "Pass 2: ino #%u, nlink %d, ic %p, nodes %p\n", ic->ino, ic->nlink, ic, ic->nodes));
128                 if (ic->nlink)
129                         continue;
130                         
131                 jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
132                 cond_resched();
133         } 
134
135         D1(printk(KERN_DEBUG "Pass 2a starting\n"));
136
137         while (dead_fds) {
138                 struct jffs2_inode_cache *ic;
139                 struct jffs2_full_dirent *fd = dead_fds;
140
141                 dead_fds = fd->next;
142
143                 ic = jffs2_get_ino_cache(c, fd->ino);
144                 D1(printk(KERN_DEBUG "Removing dead_fd ino #%u (\"%s\"), ic at %p\n", fd->ino, fd->name, ic));
145
146                 if (ic)
147                         jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
148                 jffs2_free_full_dirent(fd);
149         }
150
151         D1(printk(KERN_DEBUG "Pass 2 complete\n"));
152         
153         /* Finally, we can scan again and free the dirent structs */
154         for_each_inode(i, c, ic) {
155                 struct jffs2_full_dirent *fd;
156                 D1(printk(KERN_DEBUG "Pass 3: ino #%u, ic %p, nodes %p\n", ic->ino, ic, ic->nodes));
157
158                 while(ic->scan_dents) {
159                         fd = ic->scan_dents;
160                         ic->scan_dents = fd->next;
161                         jffs2_free_full_dirent(fd);
162                 }
163                 ic->scan_dents = NULL;
164                 cond_resched();
165         }
166         D1(printk(KERN_DEBUG "Pass 3 complete\n"));
167         D1(jffs2_dump_block_lists(c));
168
169         /* Rotate the lists by some number to ensure wear levelling */
170         jffs2_rotate_lists(c);
171
172         return ret;
173 }
174
175 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, struct jffs2_full_dirent **dead_fds)
176 {
177         struct jffs2_raw_node_ref *raw;
178         struct jffs2_full_dirent *fd;
179
180         D1(printk(KERN_DEBUG "JFFS2: Removing ino #%u with nlink == zero.\n", ic->ino));
181         
182         for (raw = ic->nodes; raw != (void *)ic; raw = raw->next_in_ino) {
183                 D1(printk(KERN_DEBUG "obsoleting node at 0x%08x\n", ref_offset(raw)));
184                 jffs2_mark_node_obsolete(c, raw);
185         }
186
187         if (ic->scan_dents) {
188                 int whinged = 0;
189                 D1(printk(KERN_DEBUG "Inode #%u was a directory which may have children...\n", ic->ino));
190
191                 while(ic->scan_dents) {
192                         struct jffs2_inode_cache *child_ic;
193
194                         fd = ic->scan_dents;
195                         ic->scan_dents = fd->next;
196
197                         if (!fd->ino) {
198                                 /* It's a deletion dirent. Ignore it */
199                                 D1(printk(KERN_DEBUG "Child \"%s\" is a deletion dirent, skipping...\n", fd->name));
200                                 jffs2_free_full_dirent(fd);
201                                 continue;
202                         }
203                         if (!whinged) {
204                                 whinged = 1;
205                                 printk(KERN_NOTICE "Inode #%u was a directory with children - removing those too...\n", ic->ino);
206                         }
207
208                         D1(printk(KERN_DEBUG "Removing child \"%s\", ino #%u\n",
209                                   fd->name, fd->ino));
210                         
211                         child_ic = jffs2_get_ino_cache(c, fd->ino);
212                         if (!child_ic) {
213                                 printk(KERN_NOTICE "Cannot remove child \"%s\", ino #%u, because it doesn't exist\n", fd->name, fd->ino);
214                                 jffs2_free_full_dirent(fd);
215                                 continue;
216                         }
217
218                         /* Reduce nlink of the child. If it's now zero, stick it on the 
219                            dead_fds list to be cleaned up later. Else just free the fd */
220
221                         child_ic->nlink--;
222                         
223                         if (!child_ic->nlink) {
224                                 D1(printk(KERN_DEBUG "Inode #%u (\"%s\") has now got zero nlink. Adding to dead_fds list.\n",
225                                           fd->ino, fd->name));
226                                 fd->next = *dead_fds;
227                                 *dead_fds = fd;
228                         } else {
229                                 D1(printk(KERN_DEBUG "Inode #%u (\"%s\") has now got nlink %d. Ignoring.\n",
230                                           fd->ino, fd->name, child_ic->nlink));
231                                 jffs2_free_full_dirent(fd);
232                         }
233                 }
234         }
235
236         /*
237            We don't delete the inocache from the hash list and free it yet. 
238            The erase code will do that, when all the nodes are completely gone.
239         */
240 }
241
242 static void jffs2_calc_trigger_levels(struct jffs2_sb_info *c)
243 {
244         uint32_t size;
245
246         /* Deletion should almost _always_ be allowed. We're fairly
247            buggered once we stop allowing people to delete stuff
248            because there's not enough free space... */
249         c->resv_blocks_deletion = 2;
250
251         /* Be conservative about how much space we need before we allow writes. 
252            On top of that which is required for deletia, require an extra 2%
253            of the medium to be available, for overhead caused by nodes being
254            split across blocks, etc. */
255
256         size = c->flash_size / 50; /* 2% of flash size */
257         size += c->nr_blocks * 100; /* And 100 bytes per eraseblock */
258         size += c->sector_size - 1; /* ... and round up */
259
260         c->resv_blocks_write = c->resv_blocks_deletion + (size / c->sector_size);
261
262         /* When do we let the GC thread run in the background */
263
264         c->resv_blocks_gctrigger = c->resv_blocks_write + 1;
265
266         /* When do we allow garbage collection to merge nodes to make 
267            long-term progress at the expense of short-term space exhaustion? */
268         c->resv_blocks_gcmerge = c->resv_blocks_deletion + 1;
269
270         /* When do we allow garbage collection to eat from bad blocks rather
271            than actually making progress? */
272         c->resv_blocks_gcbad = 0;//c->resv_blocks_deletion + 2;
273
274         /* If there's less than this amount of dirty space, don't bother
275            trying to GC to make more space. It'll be a fruitless task */
276         c->nospc_dirty_size = c->sector_size + (c->flash_size / 100);
277
278         D1(printk(KERN_DEBUG "JFFS2 trigger levels (size %d KiB, block size %d KiB, %d blocks)\n",
279                   c->flash_size / 1024, c->sector_size / 1024, c->nr_blocks));
280         D1(printk(KERN_DEBUG "Blocks required to allow deletion:    %d (%d KiB)\n",
281                   c->resv_blocks_deletion, c->resv_blocks_deletion*c->sector_size/1024));
282         D1(printk(KERN_DEBUG "Blocks required to allow writes:      %d (%d KiB)\n",
283                   c->resv_blocks_write, c->resv_blocks_write*c->sector_size/1024));
284         D1(printk(KERN_DEBUG "Blocks required to quiesce GC thread: %d (%d KiB)\n",
285                   c->resv_blocks_gctrigger, c->resv_blocks_gctrigger*c->sector_size/1024));
286         D1(printk(KERN_DEBUG "Blocks required to allow GC merges:   %d (%d KiB)\n",
287                   c->resv_blocks_gcmerge, c->resv_blocks_gcmerge*c->sector_size/1024));
288         D1(printk(KERN_DEBUG "Blocks required to GC bad blocks:     %d (%d KiB)\n",
289                   c->resv_blocks_gcbad, c->resv_blocks_gcbad*c->sector_size/1024));
290         D1(printk(KERN_DEBUG "Amount of dirty space required to GC: %d bytes\n",
291                   c->nospc_dirty_size));
292
293
294 int jffs2_do_mount_fs(struct jffs2_sb_info *c)
295 {
296         int i;
297
298         c->free_size = c->flash_size;
299         c->nr_blocks = c->flash_size / c->sector_size;
300         c->blocks = kmalloc(sizeof(struct jffs2_eraseblock) * c->nr_blocks, GFP_KERNEL);
301         if (!c->blocks)
302                 return -ENOMEM;
303         for (i=0; i<c->nr_blocks; i++) {
304                 INIT_LIST_HEAD(&c->blocks[i].list);
305                 c->blocks[i].offset = i * c->sector_size;
306                 c->blocks[i].free_size = c->sector_size;
307                 c->blocks[i].dirty_size = 0;
308                 c->blocks[i].wasted_size = 0;
309                 c->blocks[i].unchecked_size = 0;
310                 c->blocks[i].used_size = 0;
311                 c->blocks[i].first_node = NULL;
312                 c->blocks[i].last_node = NULL;
313         }
314
315         init_MUTEX(&c->alloc_sem);
316         init_MUTEX(&c->erase_free_sem);
317         init_waitqueue_head(&c->erase_wait);
318         init_waitqueue_head(&c->inocache_wq);
319         spin_lock_init(&c->erase_completion_lock);
320         spin_lock_init(&c->inocache_lock);
321
322         INIT_LIST_HEAD(&c->clean_list);
323         INIT_LIST_HEAD(&c->very_dirty_list);
324         INIT_LIST_HEAD(&c->dirty_list);
325         INIT_LIST_HEAD(&c->erasable_list);
326         INIT_LIST_HEAD(&c->erasing_list);
327         INIT_LIST_HEAD(&c->erase_pending_list);
328         INIT_LIST_HEAD(&c->erasable_pending_wbuf_list);
329         INIT_LIST_HEAD(&c->erase_complete_list);
330         INIT_LIST_HEAD(&c->free_list);
331         INIT_LIST_HEAD(&c->bad_list);
332         INIT_LIST_HEAD(&c->bad_used_list);
333         c->highest_ino = 1;
334
335         if (jffs2_build_filesystem(c)) {
336                 D1(printk(KERN_DEBUG "build_fs failed\n"));
337                 jffs2_free_ino_caches(c);
338                 jffs2_free_raw_node_refs(c);
339                 kfree(c->blocks);
340                 return -EIO;
341         }
342
343         jffs2_calc_trigger_levels(c);
344
345         return 0;
346 }