2 * JFFS2 -- Journalling Flash File System, Version 2.
4 * Copyright (C) 2001-2003 Red Hat, Inc.
6 * Created by David Woodhouse <dwmw2@redhat.com>
8 * For licensing information, see the file 'LICENCE' in this directory.
10 * $Id: gc.c,v 1.136 2004/05/27 19:06:09 gleixner Exp $
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
25 struct jffs2_inode_cache *ic,
26 struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35 uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38 uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
40 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
45 struct jffs2_eraseblock *ret;
46 struct list_head *nextlist = NULL;
47 int n = jiffies % 128;
49 /* Pick an eraseblock to garbage collect next. This is where we'll
50 put the clever wear-levelling algorithms. Eventually. */
51 /* We possibly want to favour the dirtier blocks more when the
52 number of free blocks is low. */
53 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
54 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
55 nextlist = &c->bad_used_list;
56 } else if (n < 50 && !list_empty(&c->erasable_list)) {
57 /* Note that most of them will have gone directly to be erased.
58 So don't favour the erasable_list _too_ much. */
59 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
60 nextlist = &c->erasable_list;
61 } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
62 /* Most of the time, pick one off the very_dirty list */
63 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
64 nextlist = &c->very_dirty_list;
65 } else if (n < 126 && !list_empty(&c->dirty_list)) {
66 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
67 nextlist = &c->dirty_list;
68 } else if (!list_empty(&c->clean_list)) {
69 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
70 nextlist = &c->clean_list;
71 } else if (!list_empty(&c->dirty_list)) {
72 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74 nextlist = &c->dirty_list;
75 } else if (!list_empty(&c->very_dirty_list)) {
76 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
77 nextlist = &c->very_dirty_list;
78 } else if (!list_empty(&c->erasable_list)) {
79 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81 nextlist = &c->erasable_list;
83 /* Eep. All were empty */
84 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
88 ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
91 ret->gc_node = ret->first_node;
93 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
97 /* Have we accidentally picked a clean block with wasted space ? */
98 if (ret->wasted_size) {
99 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
100 ret->dirty_size += ret->wasted_size;
101 c->wasted_size -= ret->wasted_size;
102 c->dirty_size += ret->wasted_size;
103 ret->wasted_size = 0;
106 D1(jffs2_dump_block_lists(c));
110 /* jffs2_garbage_collect_pass
111 * Make a single attempt to progress GC. Move one node, and possibly
112 * start erasing one eraseblock.
114 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
116 struct jffs2_inode_info *f;
117 struct jffs2_inode_cache *ic;
118 struct jffs2_eraseblock *jeb;
119 struct jffs2_raw_node_ref *raw;
120 int ret = 0, inum, nlink;
122 if (down_interruptible(&c->alloc_sem))
126 spin_lock(&c->erase_completion_lock);
127 if (!c->unchecked_size)
130 /* We can't start doing GC yet. We haven't finished checking
131 the node CRCs etc. Do it now. */
133 /* checked_ino is protected by the alloc_sem */
134 if (c->checked_ino > c->highest_ino) {
135 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
137 D1(jffs2_dump_block_lists(c));
138 spin_unlock(&c->erase_completion_lock);
142 spin_unlock(&c->erase_completion_lock);
144 spin_lock(&c->inocache_lock);
146 ic = jffs2_get_ino_cache(c, c->checked_ino++);
149 spin_unlock(&c->inocache_lock);
154 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
156 spin_unlock(&c->inocache_lock);
160 case INO_STATE_CHECKEDABSENT:
161 case INO_STATE_PRESENT:
162 D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
163 spin_unlock(&c->inocache_lock);
167 case INO_STATE_CHECKING:
168 printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
169 spin_unlock(&c->inocache_lock);
172 case INO_STATE_READING:
173 /* We need to wait for it to finish, lest we move on
174 and trigger the BUG() above while we haven't yet
175 finished checking all its nodes */
176 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
178 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
184 case INO_STATE_UNCHECKED:
187 ic->state = INO_STATE_CHECKING;
188 spin_unlock(&c->inocache_lock);
190 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
192 ret = jffs2_do_crccheck_inode(c, ic);
194 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
196 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
201 /* First, work out which block we're garbage-collecting */
205 jeb = jffs2_find_gc_block(c);
208 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
209 spin_unlock(&c->erase_completion_lock);
214 D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
216 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
218 if (!jeb->used_size) {
225 while(ref_obsolete(raw)) {
226 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
227 raw = raw->next_phys;
228 if (unlikely(!raw)) {
229 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
230 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
231 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
233 spin_unlock(&c->erase_completion_lock);
240 D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
242 if (!raw->next_in_ino) {
243 /* Inode-less node. Clean marker, snapshot or something like that */
244 /* FIXME: If it's something that needs to be copied, including something
245 we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
246 spin_unlock(&c->erase_completion_lock);
247 jffs2_mark_node_obsolete(c, raw);
252 ic = jffs2_raw_ref_to_ic(raw);
254 /* We need to hold the inocache. Either the erase_completion_lock or
255 the inocache_lock are sufficient; we trade down since the inocache_lock
256 causes less contention. */
257 spin_lock(&c->inocache_lock);
259 spin_unlock(&c->erase_completion_lock);
261 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
263 /* Three possibilities:
264 1. Inode is already in-core. We must iget it and do proper
265 updating to its fragtree, etc.
266 2. Inode is not in-core, node is REF_PRISTINE. We lock the
267 inocache to prevent a read_inode(), copy the node intact.
268 3. Inode is not in-core, node is not pristine. We must iget()
269 and take the slow path.
273 case INO_STATE_CHECKEDABSENT:
274 /* It's been checked, but it's not currently in-core.
275 We can just copy any pristine nodes, but have
276 to prevent anyone else from doing read_inode() while
277 we're at it, so we set the state accordingly */
278 if (ref_flags(raw) == REF_PRISTINE)
279 ic->state = INO_STATE_GC;
281 D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
286 case INO_STATE_PRESENT:
287 /* It's in-core. GC must iget() it. */
290 case INO_STATE_UNCHECKED:
291 case INO_STATE_CHECKING:
293 /* Should never happen. We should have finished checking
294 by the time we actually start doing any GC, and since
295 we're holding the alloc_sem, no other garbage collection
298 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
301 spin_unlock(&c->inocache_lock);
304 case INO_STATE_READING:
305 /* Someone's currently trying to read it. We must wait for
306 them to finish and then go through the full iget() route
307 to do the GC. However, sometimes read_inode() needs to get
308 the alloc_sem() (for marking nodes invalid) so we must
309 drop the alloc_sem before sleeping. */
312 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
313 ic->ino, ic->state));
314 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
315 /* And because we dropped the alloc_sem we must start again from the
316 beginning. Ponder chance of livelock here -- we're returning success
317 without actually making any progress.
319 Q: What are the chances that the inode is back in INO_STATE_READING
320 again by the time we next enter this function? And that this happens
321 enough times to cause a real delay?
323 A: Small enough that I don't care :)
328 /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
329 node intact, and we don't have to muck about with the fragtree etc.
330 because we know it's not in-core. If it _was_ in-core, we go through
331 all the iget() crap anyway */
333 if (ic->state == INO_STATE_GC) {
334 spin_unlock(&c->inocache_lock);
336 ret = jffs2_garbage_collect_pristine(c, ic, raw);
338 spin_lock(&c->inocache_lock);
339 ic->state = INO_STATE_CHECKEDABSENT;
340 wake_up(&c->inocache_wq);
342 if (ret != -EBADFD) {
343 spin_unlock(&c->inocache_lock);
347 /* Fall through if it wanted us to, with inocache_lock held */
350 /* Prevent the fairly unlikely race where the gcblock is
351 entirely obsoleted by the final close of a file which had
352 the only valid nodes in the block, followed by erasure,
353 followed by freeing of the ic because the erased block(s)
354 held _all_ the nodes of that inode.... never been seen but
355 it's vaguely possible. */
359 spin_unlock(&c->inocache_lock);
361 f = jffs2_gc_fetch_inode(c, inum, nlink);
367 ret = jffs2_garbage_collect_live(c, jeb, raw, f);
369 jffs2_gc_release_inode(c, f);
375 /* If we've finished this block, start it erasing */
376 spin_lock(&c->erase_completion_lock);
379 if (c->gcblock && !c->gcblock->used_size) {
380 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
381 /* We're GC'ing an empty block? */
382 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
384 c->nr_erasing_blocks++;
385 jffs2_erase_pending_trigger(c);
387 spin_unlock(&c->erase_completion_lock);
392 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
393 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
395 struct jffs2_node_frag *frag;
396 struct jffs2_full_dnode *fn = NULL;
397 struct jffs2_full_dirent *fd;
398 uint32_t start = 0, end = 0, nrfrags = 0;
403 /* Now we have the lock for this inode. Check that it's still the one at the head
406 spin_lock(&c->erase_completion_lock);
408 if (c->gcblock != jeb) {
409 spin_unlock(&c->erase_completion_lock);
410 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
413 if (ref_obsolete(raw)) {
414 spin_unlock(&c->erase_completion_lock);
415 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
416 /* They'll call again */
419 spin_unlock(&c->erase_completion_lock);
421 /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
422 if (f->metadata && f->metadata->raw == raw) {
424 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
428 /* FIXME. Read node and do lookup? */
429 for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
430 if (frag->node && frag->node->raw == raw) {
432 end = frag->ofs + frag->size;
435 if (nrfrags == frag->node->frags)
436 break; /* We've found them all */
440 if (ref_flags(raw) == REF_PRISTINE) {
441 ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
443 /* Urgh. Return it sensibly. */
444 frag->node->raw = f->inocache->nodes;
449 /* We found a datanode. Do the GC */
450 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
451 /* It crosses a page boundary. Therefore, it must be a hole. */
452 ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
454 /* It could still be a hole. But we GC the page this way anyway */
455 ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
460 /* Wasn't a dnode. Try dirent */
461 for (fd = f->dents; fd; fd=fd->next) {
467 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
469 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
471 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
472 ref_offset(raw), f->inocache->ino);
473 if (ref_obsolete(raw)) {
474 printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
485 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
486 struct jffs2_inode_cache *ic,
487 struct jffs2_raw_node_ref *raw)
489 union jffs2_node_union *node;
490 struct jffs2_raw_node_ref *nraw;
493 uint32_t phys_ofs, alloclen;
494 uint32_t crc, rawlen;
497 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
499 rawlen = ref_totlen(c, c->gcblock, raw);
501 /* Ask for a small amount of space (or the totlen if smaller) because we
502 don't want to force wastage of the end of a block if splitting would
504 ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN,
505 rawlen), &phys_ofs, &alloclen);
509 if (alloclen < rawlen) {
510 /* Doesn't fit untouched. We'll go the old route and split it */
514 node = kmalloc(rawlen, GFP_KERNEL);
518 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
519 if (!ret && retlen != rawlen)
524 crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
525 if (je32_to_cpu(node->u.hdr_crc) != crc) {
526 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
527 ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
531 switch(je16_to_cpu(node->u.nodetype)) {
532 case JFFS2_NODETYPE_INODE:
533 crc = crc32(0, node, sizeof(node->i)-8);
534 if (je32_to_cpu(node->i.node_crc) != crc) {
535 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
536 ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
540 if (je32_to_cpu(node->i.dsize)) {
541 crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
542 if (je32_to_cpu(node->i.data_crc) != crc) {
543 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
544 ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
550 case JFFS2_NODETYPE_DIRENT:
551 crc = crc32(0, node, sizeof(node->d)-8);
552 if (je32_to_cpu(node->d.node_crc) != crc) {
553 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
554 ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
559 crc = crc32(0, node->d.name, node->d.nsize);
560 if (je32_to_cpu(node->d.name_crc) != crc) {
561 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
562 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
568 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
569 ref_offset(raw), je16_to_cpu(node->u.nodetype));
573 nraw = jffs2_alloc_raw_node_ref();
579 /* OK, all the CRCs are good; this node can just be copied as-is. */
581 nraw->flash_offset = phys_ofs;
582 nraw->__totlen = rawlen;
583 nraw->next_phys = NULL;
585 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
587 if (ret || (retlen != rawlen)) {
588 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
589 rawlen, phys_ofs, ret, retlen);
591 /* Doesn't belong to any inode */
592 nraw->next_in_ino = NULL;
594 nraw->flash_offset |= REF_OBSOLETE;
595 jffs2_add_physical_node_ref(c, nraw);
596 jffs2_mark_node_obsolete(c, nraw);
598 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
599 jffs2_free_raw_node_ref(nraw);
601 if (!retried && (nraw == jffs2_alloc_raw_node_ref())) {
602 /* Try to reallocate space and retry */
604 struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
608 D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
610 ACCT_SANITY_CHECK(c,jeb);
611 D1(ACCT_PARANOIA_CHECK(jeb));
613 ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy);
616 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
618 ACCT_SANITY_CHECK(c,jeb);
619 D1(ACCT_PARANOIA_CHECK(jeb));
623 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
624 jffs2_free_raw_node_ref(nraw);
631 nraw->flash_offset |= REF_PRISTINE;
632 jffs2_add_physical_node_ref(c, nraw);
634 /* Link into per-inode list. This is safe because of the ic
635 state being INO_STATE_GC. Note that if we're doing this
636 for an inode which is in-code, the 'nraw' pointer is then
637 going to be fetched from ic->nodes by our caller. */
638 nraw->next_in_ino = ic->nodes;
641 jffs2_mark_node_obsolete(c, raw);
642 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
652 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
653 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
655 struct jffs2_full_dnode *new_fn;
656 struct jffs2_raw_inode ri;
658 char *mdata = NULL, mdatalen = 0;
659 uint32_t alloclen, phys_ofs;
662 if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
663 S_ISCHR(JFFS2_F_I_MODE(f)) ) {
664 /* For these, we don't actually need to read the old node */
665 /* FIXME: for minor or major > 255. */
666 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) |
667 JFFS2_F_I_RDEV_MIN(f)));
668 mdata = (char *)&dev;
669 mdatalen = sizeof(dev);
670 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
671 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
673 mdata = kmalloc(fn->size, GFP_KERNEL);
675 printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
678 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
680 printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
684 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
688 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen);
690 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
691 sizeof(ri)+ mdatalen, ret);
695 memset(&ri, 0, sizeof(ri));
696 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
697 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
698 ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
699 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
701 ri.ino = cpu_to_je32(f->inocache->ino);
702 ri.version = cpu_to_je32(++f->highest_version);
703 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
704 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
705 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
706 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
707 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
708 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
709 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
710 ri.offset = cpu_to_je32(0);
711 ri.csize = cpu_to_je32(mdatalen);
712 ri.dsize = cpu_to_je32(mdatalen);
713 ri.compr = JFFS2_COMPR_NONE;
714 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
715 ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
717 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
719 if (IS_ERR(new_fn)) {
720 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
721 ret = PTR_ERR(new_fn);
724 jffs2_mark_node_obsolete(c, fn->raw);
725 jffs2_free_full_dnode(fn);
726 f->metadata = new_fn;
728 if (S_ISLNK(JFFS2_F_I_MODE(f)))
733 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
734 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
736 struct jffs2_full_dirent *new_fd;
737 struct jffs2_raw_dirent rd;
738 uint32_t alloclen, phys_ofs;
741 rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
742 rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
743 rd.nsize = strlen(fd->name);
744 rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
745 rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
747 rd.pino = cpu_to_je32(f->inocache->ino);
748 rd.version = cpu_to_je32(++f->highest_version);
749 rd.ino = cpu_to_je32(fd->ino);
750 rd.mctime = cpu_to_je32(max(JFFS2_F_I_MTIME(f), JFFS2_F_I_CTIME(f)));
752 rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
753 rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
755 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen);
757 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
758 sizeof(rd)+rd.nsize, ret);
761 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
763 if (IS_ERR(new_fd)) {
764 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
765 return PTR_ERR(new_fd);
767 jffs2_add_fd_to_list(c, new_fd, &f->dents);
771 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
772 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
774 struct jffs2_full_dirent **fdp = &f->dents;
777 /* On a medium where we can't actually mark nodes obsolete
778 pernamently, such as NAND flash, we need to work out
779 whether this deletion dirent is still needed to actively
780 delete a 'real' dirent with the same name that's still
781 somewhere else on the flash. */
782 if (!jffs2_can_mark_obsolete(c)) {
783 struct jffs2_raw_dirent *rd;
784 struct jffs2_raw_node_ref *raw;
787 int name_len = strlen(fd->name);
788 uint32_t name_crc = crc32(0, fd->name, name_len);
789 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
791 rd = kmalloc(rawlen, GFP_KERNEL);
795 /* Prevent the erase code from nicking the obsolete node refs while
796 we're looking at them. I really don't like this extra lock but
797 can't see any alternative. Suggestions on a postcard to... */
798 down(&c->erase_free_sem);
800 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
802 /* We only care about obsolete ones */
803 if (!(ref_obsolete(raw)))
806 /* Any dirent with the same name is going to have the same length... */
807 if (ref_totlen(c, NULL, raw) != rawlen)
810 /* Doesn't matter if there's one in the same erase block. We're going to
811 delete it too at the same time. */
812 if ((raw->flash_offset & ~(c->sector_size-1)) ==
813 (fd->raw->flash_offset & ~(c->sector_size-1)))
816 D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
818 /* This is an obsolete node belonging to the same directory, and it's of the right
819 length. We need to take a closer look...*/
820 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
822 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
823 /* If we can't read it, we don't need to continue to obsolete it. Continue */
826 if (retlen != rawlen) {
827 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %zd) reading header from obsolete node at %08x\n",
828 retlen, rawlen, ref_offset(raw));
832 if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
835 /* If the name CRC doesn't match, skip */
836 if (je32_to_cpu(rd->name_crc) != name_crc)
839 /* If the name length doesn't match, or it's another deletion dirent, skip */
840 if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
843 /* OK, check the actual name now */
844 if (memcmp(rd->name, fd->name, name_len))
847 /* OK. The name really does match. There really is still an older node on
848 the flash which our deletion dirent obsoletes. So we have to write out
849 a new deletion dirent to replace it */
850 up(&c->erase_free_sem);
852 D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
853 ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
856 return jffs2_garbage_collect_dirent(c, jeb, f, fd);
859 up(&c->erase_free_sem);
863 /* No need for it any more. Just mark it obsolete and remove it from the list */
873 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
875 jffs2_mark_node_obsolete(c, fd->raw);
876 jffs2_free_full_dirent(fd);
880 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
881 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
882 uint32_t start, uint32_t end)
884 struct jffs2_raw_inode ri;
885 struct jffs2_node_frag *frag;
886 struct jffs2_full_dnode *new_fn;
887 uint32_t alloclen, phys_ofs;
890 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
891 f->inocache->ino, start, end));
893 memset(&ri, 0, sizeof(ri));
898 /* It's partially obsoleted by a later write. So we have to
899 write it out again with the _same_ version as before */
900 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
901 if (readlen != sizeof(ri) || ret) {
902 printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
905 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
906 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
908 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
911 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
912 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
914 je32_to_cpu(ri.totlen), sizeof(ri));
917 crc = crc32(0, &ri, sizeof(ri)-8);
918 if (crc != je32_to_cpu(ri.node_crc)) {
919 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
921 je32_to_cpu(ri.node_crc), crc);
922 /* FIXME: We could possibly deal with this by writing new holes for each frag */
923 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
924 start, end, f->inocache->ino);
927 if (ri.compr != JFFS2_COMPR_ZERO) {
928 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
929 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
930 start, end, f->inocache->ino);
935 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
936 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
937 ri.totlen = cpu_to_je32(sizeof(ri));
938 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
940 ri.ino = cpu_to_je32(f->inocache->ino);
941 ri.version = cpu_to_je32(++f->highest_version);
942 ri.offset = cpu_to_je32(start);
943 ri.dsize = cpu_to_je32(end - start);
944 ri.csize = cpu_to_je32(0);
945 ri.compr = JFFS2_COMPR_ZERO;
947 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
948 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
949 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
950 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
951 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
952 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
953 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
954 ri.data_crc = cpu_to_je32(0);
955 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
957 ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen);
959 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
963 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
965 if (IS_ERR(new_fn)) {
966 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
967 return PTR_ERR(new_fn);
969 if (je32_to_cpu(ri.version) == f->highest_version) {
970 jffs2_add_full_dnode_to_inode(c, f, new_fn);
972 jffs2_mark_node_obsolete(c, f->metadata->raw);
973 jffs2_free_full_dnode(f->metadata);
980 * We should only get here in the case where the node we are
981 * replacing had more than one frag, so we kept the same version
982 * number as before. (Except in case of error -- see 'goto fill;'
985 D1(if(unlikely(fn->frags <= 1)) {
986 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
987 fn->frags, je32_to_cpu(ri.version), f->highest_version,
988 je32_to_cpu(ri.ino));
991 /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
992 mark_ref_normal(new_fn->raw);
994 for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
995 frag; frag = frag_next(frag)) {
996 if (frag->ofs > fn->size + fn->ofs)
998 if (frag->node == fn) {
1005 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1008 if (!new_fn->frags) {
1009 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1013 jffs2_mark_node_obsolete(c, fn->raw);
1014 jffs2_free_full_dnode(fn);
1019 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1020 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1021 uint32_t start, uint32_t end)
1023 struct jffs2_full_dnode *new_fn;
1024 struct jffs2_raw_inode ri;
1025 uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
1027 unsigned char *comprbuf = NULL, *writebuf;
1029 unsigned char *pg_ptr;
1031 memset(&ri, 0, sizeof(ri));
1033 D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1034 f->inocache->ino, start, end));
1039 if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1040 /* Attempt to do some merging. But only expand to cover logically
1041 adjacent frags if the block containing them is already considered
1042 to be dirty. Otherwise we end up with GC just going round in
1043 circles dirtying the nodes it already wrote out, especially
1044 on NAND where we have small eraseblocks and hence a much higher
1045 chance of nodes having to be split to cross boundaries. */
1047 struct jffs2_node_frag *frag;
1050 min = start & ~(PAGE_CACHE_SIZE-1);
1051 max = min + PAGE_CACHE_SIZE;
1053 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1055 /* BUG_ON(!frag) but that'll happen anyway... */
1057 BUG_ON(frag->ofs != start);
1059 /* First grow down... */
1060 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1062 /* If the previous frag doesn't even reach the beginning, there's
1063 excessive fragmentation. Just merge. */
1064 if (frag->ofs > min) {
1065 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1066 frag->ofs, frag->ofs+frag->size));
1070 /* OK. This frag holds the first byte of the page. */
1071 if (!frag->node || !frag->node->raw) {
1072 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1073 frag->ofs, frag->ofs+frag->size));
1077 /* OK, it's a frag which extends to the beginning of the page. Does it live
1078 in a block which is still considered clean? If so, don't obsolete it.
1079 If not, cover it anyway. */
1081 struct jffs2_raw_node_ref *raw = frag->node->raw;
1082 struct jffs2_eraseblock *jeb;
1084 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1086 if (jeb == c->gcblock) {
1087 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1088 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1092 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1093 D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1094 frag->ofs, frag->ofs+frag->size, jeb->offset));
1098 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1099 frag->ofs, frag->ofs+frag->size, jeb->offset));
1107 /* Find last frag which is actually part of the node we're to GC. */
1108 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1110 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1112 /* If the previous frag doesn't even reach the beginning, there's lots
1113 of fragmentation. Just merge. */
1114 if (frag->ofs+frag->size < max) {
1115 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1116 frag->ofs, frag->ofs+frag->size));
1117 end = frag->ofs + frag->size;
1121 if (!frag->node || !frag->node->raw) {
1122 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1123 frag->ofs, frag->ofs+frag->size));
1127 /* OK, it's a frag which extends to the beginning of the page. Does it live
1128 in a block which is still considered clean? If so, don't obsolete it.
1129 If not, cover it anyway. */
1131 struct jffs2_raw_node_ref *raw = frag->node->raw;
1132 struct jffs2_eraseblock *jeb;
1134 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1136 if (jeb == c->gcblock) {
1137 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1138 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1139 end = frag->ofs + frag->size;
1142 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1143 D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1144 frag->ofs, frag->ofs+frag->size, jeb->offset));
1148 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1149 frag->ofs, frag->ofs+frag->size, jeb->offset));
1150 end = frag->ofs + frag->size;
1154 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1155 orig_start, orig_end, start, end));
1157 BUG_ON(end > JFFS2_F_I_SIZE(f));
1158 BUG_ON(end < orig_end);
1159 BUG_ON(start > orig_start);
1162 /* First, use readpage() to read the appropriate page into the page cache */
1163 /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1164 * triggered garbage collection in the first place?
1165 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1166 * page OK. We'll actually write it out again in commit_write, which is a little
1167 * suboptimal, but at least we're correct.
1169 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1171 if (IS_ERR(pg_ptr)) {
1172 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1173 return PTR_ERR(pg_ptr);
1177 while(offset < orig_end) {
1180 uint16_t comprtype = JFFS2_COMPR_NONE;
1182 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, &alloclen);
1185 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1186 sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1189 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1190 datalen = end - offset;
1192 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1194 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1196 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1197 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1198 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1199 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1201 ri.ino = cpu_to_je32(f->inocache->ino);
1202 ri.version = cpu_to_je32(++f->highest_version);
1203 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1204 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1205 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1206 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1207 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1208 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1209 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1210 ri.offset = cpu_to_je32(offset);
1211 ri.csize = cpu_to_je32(cdatalen);
1212 ri.dsize = cpu_to_je32(datalen);
1213 ri.compr = comprtype & 0xff;
1214 ri.usercompr = (comprtype >> 8) & 0xff;
1215 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1216 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1218 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1220 jffs2_free_comprbuf(comprbuf, writebuf);
1222 if (IS_ERR(new_fn)) {
1223 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1224 ret = PTR_ERR(new_fn);
1227 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1230 jffs2_mark_node_obsolete(c, f->metadata->raw);
1231 jffs2_free_full_dnode(f->metadata);
1236 jffs2_gc_release_page(c, pg_ptr, &pg);